center-for-computational-genomics-and-data-science
public
QuaC

Repository

git clone -b master \
    --recurse-submodules \
    git@gitlab.rc.uab.edu:center-for-computational-genomics-and-data-science/sciops/pipelines/quac.git
cd /path/to/quac/repo

module reset
module load Anaconda3/2020.02

# create conda environment. Needed only the first time.
conda env create --file configs/env/quac.yaml
# activate conda environment
conda activate quac

# if you need to update the existing environment
conda env update --file configs/env/quac.yaml
ref: "path to ref genome path"
somalier:
    sites: "path to somalier's site file"
    labels_1kg: "path to somalier's ancestry-labels-1kg file"
    somalier_1kg: "dirpath to somalier's 1kg-somalier files"
verifyBamID:
    svd_dat_wgs: "path to WGS resources .dat files"
    svd_dat_exome: "path to exome resources .dat files"
# cd into exome resources dir
cd <path-to>/VerifyBamID-2.0.1/resource/exome/
sed -e 's/^/chr/' 1000g.phase3.10k.b38.exome.vcf.gz.dat.bed > 1000g.phase3.10k.b38_chr.exome.vcf.gz.dat.bed
sed -e 's/^/chr/' 1000g.phase3.10k.b38.exome.vcf.gz.dat.mu > 1000g.phase3.10k.b38_chr.exome.vcf.gz.dat.mu
cp 1000g.phase3.10k.b38.exome.vcf.gz.dat.UD 1000g.phase3.10k.b38_chr.exome.vcf.gz.dat.UD
cp 1000g.phase3.10k.b38.exome.vcf.gz.dat.V 1000g.phase3.10k.b38_chr.exome.vcf.gz.dat.V
$ ./src/run_quac.py -h
usage: run_quac.py [-h] [--project_name] [--projects_path] [--pedigree]
                   [--quac_watch_config] [--workflow_config] [--outdir]
                   [--exome] [--cluster_config] [--log_dir] [-e] [-n] [-l]
                   [--rerun_failed] [--slurm_partition]

Wrapper tool for QuaC pipeline.

optional arguments:
  -h, --help            show this help message and exit

QuaC workflow options:
  --project_name        Project name (default: None)
  --projects_path       Path where all projects are hosted. Do not include
                        project name here. (default:
                        /data/project/worthey_lab/projects/)
  --pedigree            Pedigree filepath. Must correspond to the project
                        supplied via --project_name (default: None)
  --quac_watch_config   YAML config path specifying QC thresholds for QuaC-
                        Watch (default:
                        configs/quac_watch/wgs_quac_watch_config.yaml)
  --workflow_config     YAML config path specifying filepath to dependencies
                        of tools used in QuaC (default: configs/workflow.yaml)
  --outdir              Out directory path (default:
                        $USER_SCRATCH/tmp/quac/results/test_project/analysis)
  --exome               Flag to run in exome mode. WARNING: Please provide
                        appropriate configs via --quac_watch_config. (default:
                        False)

QuaC wrapper options:
  --cluster_config      Cluster config json file. Needed for snakemake to run
                        jobs in cluster. (default: configs/cluster_config.json)
  --log_dir             Directory path where logs (both workflow's and
                        wrapper's) will be stored (default:
                        $USER_SCRATCH/tmp/quac/logs)
  -e , --extra_args     Pass additional custom args to snakemake. Equal symbol
                        is needed for assignment as in this example: -e='--
                        forceall' (default: None)
  -n, --dryrun          Flag to dry-run snakemake. Does not execute anything,
                        and just display what would be done. Equivalent to '--
                        extra_args "-n"' (default: False)
  -l, --run_locally     Flag to run the snakemake locally and not as a Slurm
                        job. Useful for testing purposes. (default: False)
  --rerun_failed        Number of times snakemake restarts failed jobs. This
                        may be set to >0 to avoid pipeline failing due to job
                        fails due to random SLURM issues (default: 1)
  --slurm_partition     Request a specific partition for the slurm resource
                        allocation for QuaC workflow. Available partitions in
                        Cheaha are: express(max 2 hrs), short(max 12 hrs),
                        medium(max 50 hrs), long(max 150 hrs) (default: short)
module reset
module load Anaconda3/2020.02
conda activate quac

python src/run_quac.py \
      --project_name PROJECT_DUCK \
      --pedigree "path/to/lake/with/pedigree_file.ped"
module reset
module load Anaconda3/2020.02
conda activate quac

# WGS mode
python src/run_quac.py \
      --project_name test_project \
      --projects_path ".test/ngs-data/" \
      --pedigree ".test/configs/project.ped" \
      --outdir "$USER_SCRATCH/tmp/quac/results/test_project_wgs/analysis" \
      -e="--conda-create-envs-only"
# to quack on a WGS project
python src/run_quac.py \
      --project_name CF_CFF_PFarrell \
      --pedigree "data/raw/ped/CF_CFF_PFarrell.ped"

# to quack on a WGS project and write results to a dir of choice
PROJECT="CF_CFF_PFarrell"
python src/run_quac.py \
      --slurm_partition medium \
      --project_name ${PROJECT} \
      --pedigree "data/raw/ped/${PROJECT}.ped" \
      --outdir "/data/scratch/manag/tmp/quac/results/test_${PROJECT}/analysis"

# to quack on an exome project
python src/run_quac.py \
      --project_name HCC \
      --pedigree "data/raw/ped/HCC.ped" \
      --quac_watch_config "configs/quac_watch/exome_quac_watch_config.yaml" \
      --exome

# to quack on an exome project which is not in the default CGDS projects_path
python src/run_quac.py \
      --project_name UnusualCancers_CMGalluzi \
      --projects_path "/data/project/sloss/cgds_path_cmgalluzzi/" \
      --pedigree "data/raw/ped/UnusualCancers_CMGalluzi.ped" \
      --quac_watch_config "configs/quac_watch/exome_quac_watch_config.yaml" \
      --exome
module reset
module load Anaconda3/2020.02
conda activate quac

# WGS mode
python src/run_quac.py \
      --project_name test_project \
      --projects_path ".test/ngs-data/" \
      --pedigree ".test/configs/project.ped" \
      --outdir "$USER_SCRATCH/tmp/quac/results/test_project_wgs/analysis"

# Exome mode
python src/run_quac.py \
      --project_name test_project \
      --projects_path ".test/ngs-data/" \
      --pedigree ".test/configs/project.ped" \
      --outdir "$USER_SCRATCH/tmp/quac/results/test_project_exome/analysis" \
      --quac_watch_config "configs/quac_watch/exome_quac_watch_config.yaml" \
      --exome
$ tree $USER_SCRATCH/tmp/quac/results/test_project/ -d -L 4
/data/scratch/manag/tmp/quac/results/test_project/
└── analysis
    ├── A
    │   └── qc
    │       ├── bcftools-index
    │       │   └── ...
    │       ├── bcftools-stats
    │       │   └── ...
    │       ├── mosdepth
    │       │   └── ...
    │       ├── multiqc_final_pass
    │       │   ├── ...
    │       │   └── A_multiqc.html
    │       ├── multiqc_initial_pass
    │       │   ├── ...
    │       │   └── A_multiqc.html
    │       ├── picard-stats
    │       │   └── ...
    │       ├── quac_watch
    │       │   └── ...
    │       ├── qualimap
    │       │   └── ...
    │       ├── samtools-stats
    │       │   └── ...
    │       └── verifyBamID
    │           └── ...
    ├── B
    │   └── qc
    │       └── same directory structure as that of sample A
    └── project_level_qc
        ├── covviz
        │   └── ...
        ├── indexcov
        │   └── ...
        ├── mosdepth
        │   └── ...
        ├── multiqc
        │   ├── ...
        │   └── multiqc_report.html
        └── somalier
            ├── ancestry
            │   └── ...
            ├── extract
            │   └── ...
            └── relatedness
                └── ...
# open interactive node
srun --ntasks=1 --cpus-per-task=1 --mem-per-cpu=4096 --partition=express --pty /bin/bash

# setup environment
module reset
module load Anaconda3/2020.02
conda activate quac
DAG_DIR="pipeline_visualized"

###### WGS mode ######
# DAG
python src/run_quac.py \
      --project_name test_project \
      --projects_path .test/ngs-data/ \
      --pedigree .test/configs/project.ped \
      --run_locally --extra_args "--dag -F | dot -Tpng > ${DAG_DIR}/wgs_dag.png"

# Rulegraph - less informative than DAG at sample level but less dense than DAG makes this easier to skim
python src/run_quac.py \
      --project_name test_project \
      --projects_path .test/ngs-data/ \
      --pedigree .test/configs/project.ped \
      --run_locally --extra_args "--rulegraph -F | dot -Tpng > ${DAG_DIR}/wgs_rulegraph.png"

###### Exome mode ######
# DAG
python src/run_quac.py \
      --project_name test_project \
      --projects_path .test/ngs-data/ \
      --pedigree .test/configs/project.ped \
      --exome \
      --quac_watch_config "configs/quac_watch/exome_quac_watch_config.yaml" \
      --run_locally --extra_args "--dag -F | dot -Tpng > ${DAG_DIR}/exome_dag.png"

# Rulegraph - less informative than DAG at sample level but less dense than DAG makes this easier to skim
python src/run_quac.py \
      --project_name test_project \
      --projects_path .test/ngs-data/ \
      --pedigree .test/configs/project.ped \
      --exome \
      --quac_watch_config "configs/quac_watch/exome_quac_watch_config.yaml" \
      --run_locally --extra_args "--rulegraph -F | dot -Tpng > ${DAG_DIR}/exome_rulegraph.png"