Thomas
/
pandora_lib_promethion


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
							# Pandora configuration template

#######################################
# General filesystem layout / I/O
#######################################

# Directory where POD / run description files are located.
pod_dir = "/data/run_data"

# Root directory where all results will be written.
result_dir = "/mnt/beegfs02/scratch/t_steimle/data/wgs"

# Temporary directory.
tmp_dir = "/mnt/beegfs02/scratch/t_steimle/tmp"

# Should use Slurm as runner
slurm_runner = true

# Run cache directory.
run_cache_dir = "/home/t_steimle/data/prom_runs"

# Software threads
threads = 5

# Singularity bin
singularity_bin = "module load singularity-ce && singularity"

# Temporary directory used when unarchiving input data.
unarchive_tmp_dir = "/data/unarchived"

# Maximum memory available for dockerized tools, in GiB.
docker_max_memory_go = 400

# Path to the SQLite database of processed cases.
db_cases_path = "/data/cases.sqlite"

# Path to the conda activation script.
conda_sh = "/mnt/beegfs02/software/recherche/miniconda/25.1.1/etc/profile.d/conda.sh"


#######################################
# Reference genome & annotations
#######################################

# Reference FASTA used throughout the pipeline.
reference = "/home/t_steimle/ref/hs1/chm13v2.0.fa"

# Short reference name used in filenames.
reference_name = "hs1"

# Pseudoautosomal regions (PARs) BED file.
pseudoautosomal_regions_bed = "/home/t_steimle/ref/hs1/chm13v2.0_PAR.bed"

# Sequence dictionary (.dict) for the reference.
dict_file = "/data/ref/hs1/chm13v2.0.dict"

# RefSeq GFF3 annotation (sorted/indexed).
refseq_gff = "/data/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_sorted.gff3.gz"

# Template for mask BED file (low-quality / filtered regions).
# {result_dir} -> global result directory
# {id}         -> case identifier
mask_bed = "{result_dir}/{id}/diag/mask.bed"

# BED file with early-replicating regions.
early_bed = "/data/ref/hs1/replication_early_25_hs1.bed"

# BED file with late-replicating regions.
late_bed = "/data/ref/hs1/replication_late_75_hs1.bed"

# BED file with CpG coordinates.
cpg_bed = "/data/ref/hs1/hs1/hs1_CpG.bed"

# Panels of interest: [ [name, bed_path], ... ]
panels = [
  ["OncoT",         "/data/ref/hs1/V1_V2_V3_V4_V5_intersect_targets_hs1_uniq.bed"],
  ["variable_chips","/data/ref/hs1/top_1500_sd_pos.bed"],
]


#######################################
# Sample naming / BAM handling
#######################################

# Tumor sample label (used in paths & filenames).
tumoral_name = "diag"

# Normal sample label.
normal_name = "norm"

# BAM tag name used for haplotagged reads.
haplotagged_bam_tag_name = "HP"

# Minimum MAPQ for reads kept during BAM filtering.
bam_min_mapq = 40

# Threads for BAM-level operations (view/sort/index…).
bam_n_threads = 150

# Number of reads sampled for BAM composition estimation.
bam_composition_sample_size = 20000


#######################################
# Coverage counting / somatic-scan
#######################################

# Name of directory (under each sample dir) where counts are stored.
count_dir_name = "counts"

# Bin size (bp) for count files.
count_bin_size = 1000

# Number of chunks used to split contigs for counting.
count_n_chunks = 1000

# Force recomputation of counting even if outputs exist.
somatic_scan_force = false


#######################################
# Somatic pipeline global settings
#######################################

# Force recomputation of the entire somatic pipeline.
somatic_pipe_force = true

# Default thread count for heavy tools.
somatic_pipe_threads = 150

# Template for somatic pipeline statistics directory.
# {result_dir}, {id}
somatic_pipe_stats = "{result_dir}/{id}/diag/somatic_pipe_stats"


#######################################
# Filtering / QC thresholds
#######################################

# Minimum depth in constitutional sample to consider site evaluable.
somatic_min_constit_depth = 5

# Maximum allowed ALT count in constitutional sample for a somatic call.
somatic_max_alt_constit = 1

# Window size (bp) for sequence entropy around variants.
entropy_seq_len = 10

# Minimum Shannon entropy threshold.
min_shannon_entropy = 1.0

# Max depth considered "low quality".
max_depth_low_quality = 20

# Min depth considered "high quality".
min_high_quality_depth = 14

# Minimum number of callers required to keep a variant.
min_n_callers = 1


#######################################
# DeepVariant configuration
#######################################

# DeepVariant output directory template.
# {result_dir}, {id}, {time}
deepvariant_output_dir = "{result_dir}/{id}/{time}/DeepVariant"

# Threads for DeepVariant.
deepvariant_threads = 20

# DeepVariant singularity image path
deepvariant_image = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/deepvariant_latest.sif"

# DeepVariant model type (e.g. ONT).
deepvariant_model_type = "ONT_R104"

# Force DeepVariant recomputation.
deepvariant_force = false


#######################################
# DeepSomatic configuration
#######################################

# DeepSomatic output directory template.
# {result_dir}, {id}, {time}
deepsomatic_output_dir = "{result_dir}/{id}/{time}/DeepSomatic"

# Threads for DeepSomatic.
deepsomatic_threads = 20

# DeepVariant singularity image path
deepsomatic_image = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/deepsomatic_latest.sif"

# DeepSomatic model type.
deepsomatic_model_type = "ONT"

# Force DeepSomatic recomputation.
deepsomatic_force = false


#######################################
# ClairS configuration
#######################################

# Threads for ClairS.
clairs_threads = 40

# ClairS docker tag.
clairs_image = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/clairs_latest.sif"

# Force ClairS recomputation.
clairs_force = false

# Platform preset for ClairS.
clairs_platform = "ont_r10_dorado_sup_5khz_ssrs"

# ClairS output directory template.
# {result_dir}, {id}
clairs_output_dir = "{result_dir}/{id}/diag/ClairS"


#######################################
# Savana configuration
#######################################

# Savana binary (name or full path).
savana_bin = "/home/t_steimle/.conda/envs/savana_env/bin/savana"

# Threads for Savana.
savana_threads = 40

# Savana output directory template.
# {result_dir}, {id}
savana_output_dir = "{result_dir}/{id}/diag/savana"

# Savana copy-number output file.
# {output_dir}, {id}, {reference_name}, {haplotagged_bam_tag_name}
savana_copy_number = "{output_dir}/{id}_diag_{reference_name}_{haplotagged_bam_tag_name}_segmented_absolute_copy_number.tsv"

# Savana raw read counts file.
savana_read_counts = "{output_dir}/{id}_diag_{reference_name}_{haplotagged_bam_tag_name}_raw_read_counts.tsv"

# Savana passed VCF.
savana_passed_vcf = "{output_dir}/{id}_diag_savana_PASSED.vcf.gz"

# Force Savana recomputation.
savana_force = false

# Constitutional phased VCF template.
# {result_dir}, {id}
germline_phased_vcf = "{result_dir}/{id}/diag/{id}_variants_constit_phased.vcf.gz"


#######################################
# Severus configuration
#######################################

# Path to Severus script.
severus_bin = " /home/t_steimle/somatic_pipe_tools/Severus/severus.py"

# Force Severus recomputation.
severus_force = false

# Threads for Severus.
severus_threads = 32

# VNTRs BED for Severus.
vntrs_bed = "/home/t_steimle/ref/hs1/vntrs_chm13.bed"

# Path of the Severus panel of normals.
severus_pon = "/home/t_steimle/ref/hs1/PoN_1000G_chm13.tsv.gz"

# Paired Severus output directory.
# {result_dir}, {id}
severus_output_dir = "{result_dir}/{id}/diag/severus"

# Solo Severus output directory.
# {result_dir}, {id}, {time}
severus_solo_output_dir = "{result_dir}/{id}/{time}/severus"

#######################################
# Straglr configuration
#######################################

# Path to Straglr executable.
straglr_bin = "/home/t_steimle/.conda/envs/straglr_env/bin/straglr.py"

# Path to STR loci BED file for Straglr.
#
# RepeatMasker Simple_repeat
straglr_loci_bed = "/home/t_steimle/ref/hs1/simple_repeat_ucsc_hs1.bed"

# Minimum allele size difference in bp to report as changed between normal and tumoral
straglr_min_size_diff = 4

# Minimum read support required for an allele to be considered for 
# change between normal and tumoral
straglr_min_support_diff = 2

# Minimum read support for STR genotyping.
straglr_min_support = 2

# Minimum cluster size for STR detection.
straglr_min_cluster_size = 2

# Whether to genotype in size mode.
straglr_genotype_in_size = true

# Template for paired Straglr output directory.
#
# Placeholders: `{result_dir}`, `{id}`.
straglr_output_dir = "{result_dir}/{id}/diag/straglr"

# Template for solo Straglr output directory.
#
# Placeholders: `{result_dir}`, `{id}`, `{time}`.
straglr_solo_output_dir = "{result_dir}/{id}/{time}/straglr"

# Force Straglr recomputation.
straglr_force = false

#######################################
# Marlin
#######################################

marlin_bed = "/home/t_steimle/ref/hs1/marlin_v1.probes_t2t.bed"

#######################################
# Bcftools configuration
#######################################

# Path to longphase binary.
bcftools_bin = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/bcftools"

# Threads for longphase.
bcftools_threads = 30

#######################################
# Longphase configuration
#######################################

# Path to longphase binary.
longphase_bin = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/longphase_linux-x64"

# Threads for longphase.
longphase_threads = 20

# Threads for longphase modcall step.
# limit memory usage here
longphase_modcall_threads = 6

# Force longphase recomputation (haplotagging/phasing).
longphase_force = false

# Longphase modcall VCF template.
# {result_dir}, {id}, {time}
longphase_modcall_vcf = "{result_dir}/{id}/{time}/5mC_5hmC/{id}_{time}_5mC_5hmC_modcall.vcf.gz"


#######################################
# Modkit configuration
#######################################

# Path to modkit binary.
modkit_bin = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/modkit_latest/modkit"

# Threads for `modkit summary`.
modkit_summary_threads = 40

# Modkit summary file template.
# {result_dir}, {id}, {time}
modkit_summary_file = "{result_dir}/{id}/{time}/{id}_{time}_5mC_5hmC_summary.txt"


#######################################
# Nanomonsv configuration
#######################################

# Path to nanomonsv binary.
nanomonsv_bin = "/home/t_steimle/.conda/envs/nanomonsv_env/bin/nanomonsv"

# Paired nanomonsv output directory template.
# {result_dir}, {id}, {time}
nanomonsv_output_dir = "{result_dir}/{id}/{time}/nanomonsv"

# Force nanomonsv recomputation.
nanomonsv_force = false

# Threads for nanomonsv.
nanomonsv_threads = 40

# Paired nanomonsv PASSED VCF template.
# {output_dir}, {id}
nanomonsv_passed_vcf = "{output_dir}/{id}_diag_nanomonsv_PASSED.vcf.gz"

# Solo nanomonsv output directory template.
# {result_dir}, {id}, {time}
nanomonsv_solo_output_dir = "{result_dir}/{id}/{time}/nanomonsv-solo"

# Solo nanomonsv PASSED VCF template.
# {output_dir}, {id}, {time}
nanomonsv_solo_passed_vcf = "{output_dir}/{id}_{time}_nanomonsv-solo_PASSED.vcf.gz"

# Path to simple repeat BED file for nanomonsv.
# https://github.com/friend1ws/nanomonsv
# Warning TBI index should exists
nanomonsv_simple_repeat_bed = "/home/t_steimle/ref/hs1/human_chm13v2.0_simpleRepeat.bed.gz"

#######################################
# PromethION metadata
#######################################

# Directory containing PromethION run metadata.
promethion_runs_metadata_dir = "/data/promethion-runs-metadata"

# JSON file mapping flowcell IDs / runs for Pandora.
promethion_runs_input = "/data/pandora-flowcell-id.json"

#######################################
# Alignment / basecalling (Dorado)
#######################################

[align]
# Path to Dorado binary.
dorado_bin = "/mnt/beegfs02/scratch/t_steimle/tools/dorado-latest-linux-x64/bin/dorado"

# Dorado basecalling arguments (device, model, modifications…).
dorado_basecall_arg = "-x 'cuda:all' sup,5mC_5hmC"

# Should dorado re-align after demux ?
dorado_should_realign = false

# Dorado aligner threads number
dorado_aligner_threads = 10

# Reference FASTA used for alignment.
ref_fa = "/mnt/beegfs02/scratch/t_steimle/ref/hs1/chm13v2.0.fa"

# Minimap2 index used for alignment.
ref_mmi = ""

# Samtools bin 
samtools_bin = "/mnt/beegfs02/scratch/t_steimle/tools/samtools"

# Threads for `samtools view`.
samtools_view_threads = 10

# Threads for `samtools sort`.
samtools_sort_threads = 20

# Threads for `samtools merge`.
samtools_merge_threads = 40

# Threads for `samtools split`.
samtools_split_threads = 20