Skip to content
Snippets Groups Projects
Commit 012cbbe7 authored by Manavalan Gajapathy's avatar Manavalan Gajapathy
Browse files

gets somalier working

parent 7ebee4f6
No related branches found
No related tags found
No related merge requests found
...@@ -72,17 +72,11 @@ target/ ...@@ -72,17 +72,11 @@ target/
.ipynb_checkpoints/ .ipynb_checkpoints/
# exclude data from source control by default # exclude data from source control by default
# data/ data/
variant_annotation/data/
#snakemake #snakemake
.snakemake/ .snakemake/
# exclude test data used for development
to_be_deleted/test_data/data/ref
to_be_deleted/test_data/data/reads
#logs #logs
logs/ logs/
......
- somalier: somalier:
tool: "/data/project/worthey_lab/projects/experimental_pipelines/mana/tools/somalier/0.2.12/somalier" tool: "/data/project/worthey_lab/projects/experimental_pipelines/mana/tools/somalier/0.2.12/somalier"
sites: "/data/project/worthey_lab/projects/experimental_pipelines/mana/tools/somalier/0.2.12/sites/sites.hg38.vcf.gz" sites: "/data/project/worthey_lab/projects/experimental_pipelines/mana/tools/somalier/0.2.12/sites/sites.hg38.vcf.gz"
labels_1kg: "/data/project/worthey_lab/projects/experimental_pipelines/mana/tools/somalier/0.2.12/ancestry/ancestry-labels-1kg.tsv" labels_1kg: "/data/project/worthey_lab/projects/experimental_pipelines/mana/tools/somalier/0.2.12/ancestry/ancestry-labels-1kg.tsv"
somalier_1kg: "/data/project/worthey_lab/projects/experimental_pipelines/mana/tools/somalier/0.2.12/ancestry/somalier_1kg/*.somalier" somalier_1kg: "/data/project/worthey_lab/projects/experimental_pipelines/mana/tools/somalier/0.2.12/ancestry/1kg-somalier/"
ref: "/data/project/worthey_lab/datasets_central/human_reference_genome/processed/GRCh38/no_alt_rel20190408/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna" ref: "/data/project/worthey_lab/datasets_central/human_reference_genome/processed/GRCh38/no_alt_rel20190408/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna"
...@@ -7,18 +7,51 @@ module load snakemake/5.9.1-foss-2018b-Python-3.6.6 ...@@ -7,18 +7,51 @@ module load snakemake/5.9.1-foss-2018b-Python-3.6.6
snakemake -s workflow/Snakefile --use-conda -p snakemake -s workflow/Snakefile --use-conda -p
''' '''
from pathlib import Path from pathlib import Path
import pandas as pd
WORKFLOW_PATH = Path(workflow.basedir).parent WORKFLOW_PATH = Path(workflow.basedir).parent
configfile: WORKFLOW_PATH / "configs/workflow.yaml" configfile: WORKFLOW_PATH / "configs/workflow.yaml"
def get_samples(fpath):
samples = ()
with open(fpath, 'r') as f_handle:
for line in f_handle:
sample = line.split(' ')[1]
samples += (sample,)
return samples
EXTERNAL_DIR = Path("data/external") EXTERNAL_DIR = Path("data/external")
RAW_DIR = Path("data/raw") RAW_DIR = Path("data/raw")
INTERIM_DIR = Path("data/interim") INTERIM_DIR = Path("data/interim")
PROCESSED_DIR = Path("data/processed") PROCESSED_DIR = Path("data/processed")
PROJECTS_PATH = Path(f"/data/project/worthey_lab/projects/")
# PROJECTS_PATH = Path(f"/data/project/worthey_lab/projects/{PROJECT}/analysis")
PROJECT_NAME = "MuscDyst_SU_MAlexander"
SAMPLES = get_samples(RAW_DIR / f"ped/{PROJECT_NAME}.ped")
print (SAMPLES)
############## MODULES ##############
include: "rules/somalier.smk"
#########################################
############ CONSTRAINTS ############
wildcard_constraints:
sample = '|'.join(SAMPLES),
project = PROJECT_NAME
#########################################
rule all: rule all:
input: input:
expand(str(PROCESSED_DIR / "somalier/{project}/somalier.html"),
project=[PROJECT_NAME]),
expand(str(PROCESSED_DIR / "somalier/{project}/somalier.somalier-ancestry.html"),
project=[PROJECT_NAME]),
rule extract: rule extract:
input: input:
# TODO vcf = PROJECTS_PATH / "{project}" / "analysis" / "{sample}" / "vcf" / "{sample}.vcf.gz",
vcf = "" vcf_index = PROJECTS_PATH / "{project}" / "analysis" / "{sample}" / "vcf" / "{sample}.vcf.gz.tbi",
somalier_tool = config['somalier']['tool'], somalier_tool = config['somalier']['tool'],
sites = config['somalier']['sites'], sites = config['somalier']['sites'],
ref_genome = config['somalier']['ref'], ref_genome = config['somalier']['ref'],
output: output:
TODO INTERIM_DIR / "somalier_extract/{project}/{sample}.somalier"
# message: message:
"Running somalier extract. Project: {wildcards.project}"
params: params:
outdir = lambda wildcards, output: Path(output[[0]]).parent, outdir = lambda wildcards, output: Path(output[0]).parent,
shell: shell:
r""" r"""
{input.somalier_tool} extract \ {input.somalier_tool} extract \
--sites {input.sites} \ --sites {input.sites} \
--fasta {input.ref} \ --fasta {input.ref_genome} \
--out-dir {params.outdir} \ --out-dir {params.outdir} \
{input.vcf} {input.vcf}
""" """
...@@ -22,15 +23,17 @@ rule extract: ...@@ -22,15 +23,17 @@ rule extract:
rule relate: rule relate:
input: input:
# TODO extracted = expand(str(INTERIM_DIR / "somalier_extract/{{project}}/{sample}.somalier"),
extracted = "", sample=SAMPLES),
# ped = "" # ped = RAW_DIR / "ped" / "{project}.ped",
somalier_tool = config['somalier']['tool'], somalier_tool = config['somalier']['tool'],
output: output:
TODO expand(str(PROCESSED_DIR / "somalier/{{project}}/somalier.{ext}"),
# message: ext=['html', 'groups.tsv', 'pairs.tsv', 'samples.tsv']),
message:
"Running somalier relate. Project: {wildcards.project}"
params: params:
outdir = lambda wildcards, output: Path(output[[0]]).parent, outdir = lambda wildcards, output: Path(output[0]).parent,
shell: shell:
r""" r"""
{input.somalier_tool} relate \ {input.somalier_tool} relate \
...@@ -38,25 +41,28 @@ rule relate: ...@@ -38,25 +41,28 @@ rule relate:
--output-prefix {params.outdir}/somalier \ --output-prefix {params.outdir}/somalier \
{input.extracted} {input.extracted}
""" """
# --ped {input.ped} \ # --ped {input.ped} \
rule ancestry: rule ancestry:
input: input:
# TODO extracted = expand(str(INTERIM_DIR / "somalier_extract/{{project}}/{sample}.somalier"),
extracted="" sample=SAMPLES),
somalier_tool = config['somalier']['tool'], somalier_tool = config['somalier']['tool'],
labels_1kg = config['somalier']['labels_1kg'], labels_1kg = config['somalier']['labels_1kg'],
somalier_1kg = config['somalier']['somalier_1kg'], somalier_1kg = directory(config['somalier']['somalier_1kg']),
output: output:
# message: expand(str(PROCESSED_DIR / "somalier/{{project}}/somalier.somalier-ancestry.{ext}"),
ext=['html', 'tsv']),
message:
"Running somalier ancestry. Project: {wildcards.project}"
params: params:
outdir = lambda wildcards, output: Path(output[[0]]).parent, outdir = lambda wildcards, output: Path(output[0]).parent,
shell: shell:
r""" r"""
{input.somalier_tool} ancestry \ {input.somalier_tool} ancestry \
--output-prefix {params.outdir}/somalier \ --output-prefix {params.outdir}/somalier \
--labels {input.labels_1kg} \ --labels {input.labels_1kg} \
{input.somalier_1kg} ++ \ {input.somalier_1kg}*.somalier ++ \
{input.extracted} {input.extracted}
""" """
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment