Newer
Older
Usage:
ml reset
ml Anaconda3
conda activate quac_common
python src/create_dummy_ped.py
def read_project_tracker(project_tracker_f):
df = pd.read_excel(project_tracker_f, usecols=["CGDS ID", "Sex"])
df["sex_ped"] = df["Sex"].str.lower().map({"m": "1", "f": "2"})
df["sex_ped"] = df["sex_ped"].fillna("-9") # unknown sex
sample_sex_dict = df.set_index("CGDS ID")["sex_ped"].to_dict()
return sample_sex_dict
def nbbbb():
project_path = Path("/data/project/worthey_lab/projects") / project_name / "analysis"
samples = (
f.name for f in project_path.iterdir() if f.is_dir() and f.name.startswith(("LW", "UDN"))
)
header = ["#family_id", "sample_id", "paternal_id", "maternal_id", "sex", "phenotype"]
with open(Path(outpath) / f"{project_name}.ped", "w") as out_handle:
out_handle.write("\t".join(header) + "\n")
data = ["unknown", sample, "-9", "-9", "-9", "-9"]
out_handle.write("\t".join(data) + "\n")
return None
def main(outpath):
project_dict = {
"CF_CFF_PFarrell": {
"tracking_sheet": "data/external/project_tracker/PROJECT TRACKING -CF.xlsx",
"affected": "all",
},
"CF_TLOAF_PFarrell": {
"tracking_sheet": "data/external/project_tracker/PROJECT TRACKING -CF.xlsx",
"affected": "all",
},
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# "EDS3_unkn_DGreenspan",
# "MuscDyst_SU_MAlexander",
# "UDN_Phase1_EAWorthey",
}
for project_name in project_dict:
# get sample's sex info from project tracking sheet
sample_sex_dict = read_project_tracker(project_dict[project_name]["tracking_sheet"])
# get samples from cheaha for the project
project_path = Path("/data/project/worthey_lab/projects") / project_name / "analysis"
samples = (
f.name
for f in project_path.iterdir()
if f.is_dir() and f.name.startswith(("LW", "UDN"))
)
header = ["#family_id", "sample_id", "paternal_id", "maternal_id", "sex", "phenotype"]
with open(Path(outpath) / f"{project_name}.ped", "w") as out_handle:
out_handle.write("\t".join(header) + "\n")
for sample in sorted(samples):
data = [
"unknown",
sample,
"-9", # father
"-9", # mother
sample_sex_dict[sample], # sample sex
"1" if project_dict[project_name]["affected"] == "all" else "-9", # affected
]
out_handle.write("\t".join(data) + "\n")
return None
if __name__ == "__main__":
OUT_PATH = "data/raw/ped" # not so raw, is it?