Skip to content
Snippets Groups Projects
Commit f3644cbd authored by Manavalan Gajapathy's avatar Manavalan Gajapathy
Browse files

makes dummy ped file creator CLI compatible

parent 0f0ef52e
No related branches found
No related tags found
1 merge request!1QuaC - First major review
......@@ -2,17 +2,24 @@
Create dummy ped file by project
Usage:
# setup environment
ml reset
ml Anaconda3
conda activate quac_common
python src/create_dummy_ped.py
# Example
python src/create_dummy_ped.py --project_path "/data/project/worthey_lab/projects/CF_CFF_PFarrell/" --outfile test.ped
"""
from pathlib import Path
import pandas as pd
import fire
def read_project_tracker(project_tracker_f):
"""
Reads project tracking excel file. Expects certain columns to be present.
"""
df = pd.read_excel(project_tracker_f, usecols=["CGDS ID", "Sex"])
......@@ -24,70 +31,51 @@ def read_project_tracker(project_tracker_f):
return sample_sex_dict
def nbbbb():
def main(project_path, outfile, tracking_sheet=False):
"""
Creates dummy pedigree file for the project requested
Args:
project_path (str): Project path. Script will look for samples under its subdirectory "analysis".
outfile (str): Output pedigree file path
tracking_sheet (str, optional): Project tracking sheet in excel format. Uses this for sex info. Defaults to False.
"""
project_path = Path("/data/project/worthey_lab/projects") / project_name / "analysis"
# get sample's sex info from project tracking sheet, if supplied
if tracking_sheet:
sample_sex_dict = read_project_tracker(tracking_sheet)
# get samples from cheaha for the project
project_path = Path(project_path) / "analysis"
samples = (
f.name for f in project_path.iterdir() if f.is_dir() and f.name.startswith(("LW", "UDN"))
)
header = ["#family_id", "sample_id", "paternal_id", "maternal_id", "sex", "phenotype"]
with open(Path(outpath) / f"{project_name}.ped", "w") as out_handle:
with open(outfile, "w") as out_handle:
out_handle.write("\t".join(header) + "\n")
for sample in sorted(samples):
data = ["unknown", sample, "-9", "-9", "-9", "-9"]
data = [
"unknown",
sample,
"-9", # father
"-9", # mother
sample_sex_dict[sample] if tracking_sheet else "-9", # sample sex
"-9", # affected
]
out_handle.write("\t".join(data) + "\n")
return None
def main(outpath):
project_dict = {
"CF_CFF_PFarrell": {
"tracking_sheet": "data/external/project_tracker/PROJECT TRACKING -CF.xlsx",
"affected": "all",
},
"CF_TLOAF_PFarrell": {
"tracking_sheet": "data/external/project_tracker/PROJECT TRACKING -CF.xlsx",
"affected": "all",
},
# "EDS3_unkn_DGreenspan",
# "MuscDyst_SU_MAlexander",
# "UDN_Phase1_EAWorthey",
}
for project_name in project_dict:
# get sample's sex info from project tracking sheet
sample_sex_dict = read_project_tracker(project_dict[project_name]["tracking_sheet"])
# get samples from cheaha for the project
project_path = Path("/data/project/worthey_lab/projects") / project_name / "analysis"
samples = (
f.name
for f in project_path.iterdir()
if f.is_dir() and f.name.startswith(("LW", "UDN"))
)
header = ["#family_id", "sample_id", "paternal_id", "maternal_id", "sex", "phenotype"]
with open(Path(outpath) / f"{project_name}.ped", "w") as out_handle:
out_handle.write("\t".join(header) + "\n")
for sample in sorted(samples):
data = [
"unknown",
sample,
"-9", # father
"-9", # mother
sample_sex_dict[sample], # sample sex
"1" if project_dict[project_name]["affected"] == "all" else "-9", # affected
]
out_handle.write("\t".join(data) + "\n")
return None
if __name__ == "__main__":
OUT_PATH = "data/raw/ped" # not so raw, is it?
main(OUT_PATH)
FIRE_MODE = True
# FIRE_MODE = False
if FIRE_MODE:
fire.Fire(main)
else:
PROJECT_PATH = "/data/project/worthey_lab/projects/CF_CFF_PFarrell/"
OUTFILE = "out.ped"
main(PROJECT_PATH, OUTFILE)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment