Skip to content
Snippets Groups Projects
Commit 012cbbe7 authored by Manavalan Gajapathy's avatar Manavalan Gajapathy
Browse files

gets somalier working

parent 7ebee4f6
No related merge requests found
......@@ -72,17 +72,11 @@ target/
.ipynb_checkpoints/
# exclude data from source control by default
# data/
variant_annotation/data/
data/
#snakemake
.snakemake/
# exclude test data used for development
to_be_deleted/test_data/data/ref
to_be_deleted/test_data/data/reads
#logs
logs/
......
- somalier:
somalier:
tool: "/data/project/worthey_lab/projects/experimental_pipelines/mana/tools/somalier/0.2.12/somalier"
sites: "/data/project/worthey_lab/projects/experimental_pipelines/mana/tools/somalier/0.2.12/sites/sites.hg38.vcf.gz"
labels_1kg: "/data/project/worthey_lab/projects/experimental_pipelines/mana/tools/somalier/0.2.12/ancestry/ancestry-labels-1kg.tsv"
somalier_1kg: "/data/project/worthey_lab/projects/experimental_pipelines/mana/tools/somalier/0.2.12/ancestry/somalier_1kg/*.somalier"
somalier_1kg: "/data/project/worthey_lab/projects/experimental_pipelines/mana/tools/somalier/0.2.12/ancestry/1kg-somalier/"
ref: "/data/project/worthey_lab/datasets_central/human_reference_genome/processed/GRCh38/no_alt_rel20190408/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna"
......@@ -7,18 +7,51 @@ module load snakemake/5.9.1-foss-2018b-Python-3.6.6
snakemake -s workflow/Snakefile --use-conda -p
'''
from pathlib import Path
import pandas as pd
WORKFLOW_PATH = Path(workflow.basedir).parent
configfile: WORKFLOW_PATH / "configs/workflow.yaml"
def get_samples(fpath):
samples = ()
with open(fpath, 'r') as f_handle:
for line in f_handle:
sample = line.split(' ')[1]
samples += (sample,)
return samples
EXTERNAL_DIR = Path("data/external")
RAW_DIR = Path("data/raw")
INTERIM_DIR = Path("data/interim")
PROCESSED_DIR = Path("data/processed")
PROJECTS_PATH = Path(f"/data/project/worthey_lab/projects/")
# PROJECTS_PATH = Path(f"/data/project/worthey_lab/projects/{PROJECT}/analysis")
PROJECT_NAME = "MuscDyst_SU_MAlexander"
SAMPLES = get_samples(RAW_DIR / f"ped/{PROJECT_NAME}.ped")
print (SAMPLES)
############## MODULES ##############
include: "rules/somalier.smk"
#########################################
############ CONSTRAINTS ############
wildcard_constraints:
sample = '|'.join(SAMPLES),
project = PROJECT_NAME
#########################################
rule all:
input:
expand(str(PROCESSED_DIR / "somalier/{project}/somalier.html"),
project=[PROJECT_NAME]),
expand(str(PROCESSED_DIR / "somalier/{project}/somalier.somalier-ancestry.html"),
project=[PROJECT_NAME]),
rule extract:
input:
# TODO
vcf = ""
vcf = PROJECTS_PATH / "{project}" / "analysis" / "{sample}" / "vcf" / "{sample}.vcf.gz",
vcf_index = PROJECTS_PATH / "{project}" / "analysis" / "{sample}" / "vcf" / "{sample}.vcf.gz.tbi",
somalier_tool = config['somalier']['tool'],
sites = config['somalier']['sites'],
ref_genome = config['somalier']['ref'],
output:
TODO
# message:
INTERIM_DIR / "somalier_extract/{project}/{sample}.somalier"
message:
"Running somalier extract. Project: {wildcards.project}"
params:
outdir = lambda wildcards, output: Path(output[[0]]).parent,
outdir = lambda wildcards, output: Path(output[0]).parent,
shell:
r"""
{input.somalier_tool} extract \
--sites {input.sites} \
--fasta {input.ref} \
--fasta {input.ref_genome} \
--out-dir {params.outdir} \
{input.vcf}
"""
......@@ -22,15 +23,17 @@ rule extract:
rule relate:
input:
# TODO
extracted = "",
# ped = ""
extracted = expand(str(INTERIM_DIR / "somalier_extract/{{project}}/{sample}.somalier"),
sample=SAMPLES),
# ped = RAW_DIR / "ped" / "{project}.ped",
somalier_tool = config['somalier']['tool'],
output:
TODO
# message:
expand(str(PROCESSED_DIR / "somalier/{{project}}/somalier.{ext}"),
ext=['html', 'groups.tsv', 'pairs.tsv', 'samples.tsv']),
message:
"Running somalier relate. Project: {wildcards.project}"
params:
outdir = lambda wildcards, output: Path(output[[0]]).parent,
outdir = lambda wildcards, output: Path(output[0]).parent,
shell:
r"""
{input.somalier_tool} relate \
......@@ -38,25 +41,28 @@ rule relate:
--output-prefix {params.outdir}/somalier \
{input.extracted}
"""
# --ped {input.ped} \
# --ped {input.ped} \
rule ancestry:
input:
# TODO
extracted=""
extracted = expand(str(INTERIM_DIR / "somalier_extract/{{project}}/{sample}.somalier"),
sample=SAMPLES),
somalier_tool = config['somalier']['tool'],
labels_1kg = config['somalier']['labels_1kg'],
somalier_1kg = config['somalier']['somalier_1kg'],
somalier_1kg = directory(config['somalier']['somalier_1kg']),
output:
# message:
expand(str(PROCESSED_DIR / "somalier/{{project}}/somalier.somalier-ancestry.{ext}"),
ext=['html', 'tsv']),
message:
"Running somalier ancestry. Project: {wildcards.project}"
params:
outdir = lambda wildcards, output: Path(output[[0]]).parent,
outdir = lambda wildcards, output: Path(output[0]).parent,
shell:
r"""
{input.somalier_tool} ancestry \
--output-prefix {params.outdir}/somalier \
--labels {input.labels_1kg} \
{input.somalier_1kg} ++ \
{input.somalier_1kg}*.somalier ++ \
{input.extracted}
"""
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment