pandora-config.example.toml 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. # Pandora configuration template
  2. #######################################
  3. # General filesystem layout / I/O
  4. #######################################
  5. # Root directory where all results will be written.
  6. result_dir = "/mnt/beegfs02/scratch/t_steimle/data/wgs"
  7. # Temporary directory.
  8. tmp_dir = "/mnt/beegfs02/scratch/t_steimle/tmp"
  9. # Should use Slurm as runner
  10. slurm_runner = true
  11. # Run cache directory.
  12. run_cache_dir = "/home/t_steimle/data/prom_runs"
  13. # Software threads
  14. threads = 5
  15. # Singularity bin
  16. singularity_bin = "module load singularity-ce && singularity"
  17. # Path to the conda activation script.
  18. conda_sh = "/mnt/beegfs02/software/recherche/miniconda/25.1.1/etc/profile.d/conda.sh"
  19. #######################################
  20. # Reference genome & annotations
  21. #######################################
  22. # Reference FASTA used throughout the pipeline.
  23. reference = "/home/t_steimle/ref/hs1/chm13v2.0.fa"
  24. # Short reference name used in filenames.
  25. reference_name = "hs1"
  26. # Pseudoautosomal regions (PARs) BED file.
  27. pseudoautosomal_regions_bed = "/home/t_steimle/ref/hs1/chm13v2.0_PAR.bed"
  28. # Sequence dictionary (.dict) for the reference.
  29. dict_file = "/home/t_steimle/ref/hs1/chm13v2.0.dict"
  30. # RefSeq GFF3 annotation (sorted/indexed).
  31. refseq_gff = "/home/t_steimle/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_sorted.gff3.gz"
  32. # dbSNP vcf.gz file (should be indexed)
  33. db_snp = "/home/t_steimle/ref/hs1/chm13v2.0_dbSNPv155.vcf.gz"
  34. # BED with genes on the 4th column should be sorted
  35. genes_bed = "/home/t_steimle/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_Genes.bed"
  36. # Cytobands BED file
  37. cytobands_bed = "/home/t_steimle/ref/hs1/chm13v2.0_cytobands_allchrs.bed"
  38. # Chromosome alias file
  39. # ex: https://hgdownload.soe.ucsc.edu/hubs/GCA/009/914/755/GCA_009914755.4/GCA_009914755.4.chromAlias.txt
  40. chromosomes_alias = "/home/t_steimle/ref/hs1/GCA_009914755.4.chromAlias.txt"
  41. # Template for mask BED file (low-quality / filtered regions).
  42. # {result_dir} -> global result directory
  43. # {id} -> case identifier
  44. mask_bed = "{result_dir}/{id}/diag/mask.bed"
  45. # Panels of interest: [ [name, bed_path], ... ]
  46. panels = [
  47. ["CM", "/home/t_steimle/ref/hs1/panel_cm_hs1.bed"],
  48. ]
  49. repeats_bed = "/home/t_steimle/ref/hs1/all_repeats_chm13_final.bed"
  50. #######################################
  51. # Sample naming / BAM handling
  52. #######################################
  53. # Tumor sample label (used in paths & filenames).
  54. tumoral_name = "diag"
  55. # Normal sample label.
  56. normal_name = "norm"
  57. # BAM tag name used for haplotagged reads.
  58. haplotagged_bam_tag_name = "HP"
  59. # Minimum MAPQ for reads kept during BAM filtering.
  60. bam_min_mapq = 40
  61. # Number of threads for hts BAM reader decrompression (should be adapted to IO speed).
  62. bam_n_threads = 4
  63. # Number of reads sampled for BAM composition estimation.
  64. bam_composition_sample_size = 20000
  65. #######################################
  66. # Coverage counting / somatic-scan
  67. #######################################
  68. # Name of directory (under each sample dir) where counts are stored.
  69. count_dir_name = "counts"
  70. # Bin size (bp) for count files.
  71. count_bin_size = 1000
  72. # Number of chunks used to split contigs for counting.
  73. count_n_chunks = 1000
  74. # Force recomputation of counting even if outputs exist.
  75. somatic_scan_force = false
  76. #######################################
  77. # Somatic pipeline global settings
  78. #######################################
  79. # Force recomputation of the entire somatic pipeline.
  80. somatic_pipe_force = true
  81. # Default thread count for heavy tools.
  82. somatic_pipe_threads = 15
  83. # Template for somatic pipeline statistics directory.
  84. # {result_dir}, {id}
  85. somatic_pipe_stats = "{result_dir}/{id}/diag/somatic_pipe_stats"
  86. #######################################
  87. # Filtering / QC thresholds
  88. #######################################
  89. # Minimum depth in constitutional sample to consider site evaluable.
  90. somatic_min_constit_depth = 5
  91. # Maximum allowed ALT count in constitutional sample for a somatic call.
  92. somatic_max_alt_constit = 1
  93. # Window size (bp) for sequence entropy around variants.
  94. entropy_seq_len = 10
  95. # Minimum Shannon entropy threshold.
  96. min_shannon_entropy = 1.0
  97. # Max depth considered "low quality".
  98. max_depth_low_quality = 20
  99. # Min depth considered "high quality".
  100. min_high_quality_depth = 14
  101. # Minimum number of callers required to keep a variant.
  102. min_n_callers = 1
  103. #######################################
  104. # DeepVariant configuration
  105. #######################################
  106. # DeepVariant output directory template.
  107. # {result_dir}, {id}, {time}
  108. deepvariant_output_dir = "{result_dir}/{id}/{time}/DeepVariant"
  109. # Threads for DeepVariant.
  110. deepvariant_threads = 20
  111. # DeepVariant singularity image path
  112. deepvariant_image = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/deepvariant_latest.sif"
  113. # DeepVariant model type (e.g. ONT).
  114. deepvariant_model_type = "ONT_R104"
  115. # Force DeepVariant recomputation.
  116. deepvariant_force = false
  117. #######################################
  118. # DeepSomatic configuration
  119. #######################################
  120. # DeepSomatic output directory template.
  121. # {result_dir}, {id}, {time}
  122. deepsomatic_output_dir = "{result_dir}/{id}/{time}/DeepSomatic"
  123. # Threads for DeepSomatic.
  124. deepsomatic_threads = 20
  125. # DeepVariant singularity image path
  126. deepsomatic_image = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/deepsomatic_latest.sif"
  127. # DeepSomatic model type.
  128. deepsomatic_model_type = "ONT"
  129. # Force DeepSomatic recomputation.
  130. deepsomatic_force = false
  131. #######################################
  132. # ClairS configuration
  133. #######################################
  134. # Threads for ClairS.
  135. clairs_threads = 20
  136. # ClairS docker tag.
  137. clairs_image = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/clairs_latest.sif"
  138. # Force ClairS recomputation.
  139. clairs_force = false
  140. # Keep per-part directories after chunked ClairS merging.
  141. # Set to true to retain intermediate VCFs (raw SNV/indel/germline) for reanalysis.
  142. clairs_keep_parts = false
  143. # Platform preset for ClairS.
  144. clairs_platform = "ont_r10_dorado_sup_5khz_ssrs"
  145. # ClairS output directory template.
  146. # {result_dir}, {id}
  147. clairs_output_dir = "{result_dir}/{id}/diag/ClairS"
  148. #######################################
  149. # GATK configuration
  150. #######################################
  151. # Path to the GATK container image (Singularity/Apptainer .sif, or a docker:// URI
  152. # if you pull at runtime).
  153. #
  154. # Examples:
  155. # - "/containers/gatk_4.6.0.0.sif"
  156. gatk_image = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/gatk_latest.sif"
  157. # Path to a BED file restricting analysis to target regions (0-based, half-open).
  158. # Must match contig naming of the reference/BAMs (e.g. "chr9" vs "9").
  159. #
  160. # Used for targeted calling (e.g. Mutect2 `-L` or region chunking).
  161. gatk_bed_path = "/home/t_steimle/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_Genes.bed"
  162. # Local single-run CPU threads (non-Slurm execution).
  163. # Used for full-run Mutect2 or other GATK tools.
  164. # Typically forwarded to:
  165. # - `--native-pair-hmm-threads`
  166. # - `--reader-threads`
  167. # Should match available cores on the node.
  168. gatk_threads = 100
  169. # Local single-run memory limit in GB.
  170. # Used to size Java heap:
  171. # `--java-options "-Xmx{mem}g"`
  172. # Should leave headroom for native memory (PairHMM, buffers).
  173. gatk_mem_gb = 120
  174. # Per-chunk CPU threads when running chunked under Slurm.
  175. # Applies to each parallel job independently.
  176. gatk_slurm_threads = 8
  177. # Per-chunk memory (GB) when running under Slurm.
  178. # Used both for scheduler request and Java heap sizing per chunk.
  179. # Must be sufficient for interval-restricted Mutect2.
  180. gatk_slurm_mem_gb = 32
  181. # If true, force re-run of GATK steps by removing or ignoring existing outputs.
  182. gatk_force = false
  183. # GATK output directory template.
  184. # {result_dir}, {id}
  185. gatk_output_dir = "{result_dir}/{id}/{tumoral_name}/GATK"
  186. # GATK passed VCF.
  187. gatk_passed_vcf = "{output_dir}/{id}_{tumoral_name}_{reference_name}_GATK_PASSED.vcf.gz"
  188. #######################################
  189. # Savana configuration
  190. #######################################
  191. # Savana binary (name or full path).
  192. savana_bin = "/home/t_steimle/.conda/envs/savana_env/bin/savana"
  193. # Threads for Savana.
  194. savana_threads = 40
  195. # Savana output directory template.
  196. # {result_dir}, {id}
  197. savana_output_dir = "{result_dir}/{id}/diag/savana"
  198. # Savana copy-number output file.
  199. # {output_dir}, {id}, {reference_name}, {haplotagged_bam_tag_name}
  200. savana_copy_number = "{output_dir}/{id}_diag_{reference_name}_{haplotagged_bam_tag_name}_segmented_absolute_copy_number.tsv"
  201. # Savana raw read counts file.
  202. savana_read_counts = "{output_dir}/{id}_diag_{reference_name}_{haplotagged_bam_tag_name}_raw_read_counts.tsv"
  203. # Savana passed VCF.
  204. savana_passed_vcf = "{output_dir}/{id}_diag_savana_PASSED.vcf.gz"
  205. # Force Savana recomputation.
  206. savana_force = false
  207. # Constitutional phased VCF template.
  208. # {result_dir}, {id}
  209. germline_phased_vcf = "{result_dir}/{id}/diag/{id}_variants_constit_phased.vcf.gz"
  210. #######################################
  211. # Severus configuration
  212. #######################################
  213. # Path to Severus script.
  214. severus_bin = " /home/t_steimle/somatic_pipe_tools/Severus/severus.py"
  215. # Force Severus recomputation.
  216. severus_force = false
  217. # Threads for Severus.
  218. severus_threads = 32
  219. # VNTRs BED for Severus.
  220. vntrs_bed = "/home/t_steimle/ref/hs1/vntrs_chm13.bed"
  221. # Path of the Severus panel of normals.
  222. severus_pon = "/home/t_steimle/ref/hs1/PoN_1000G_chm13.tsv.gz"
  223. # Paired Severus output directory.
  224. # {result_dir}, {id}
  225. severus_output_dir = "{result_dir}/{id}/diag/severus"
  226. # Solo Severus output directory.
  227. # {result_dir}, {id}, {time}
  228. severus_solo_output_dir = "{result_dir}/{id}/{time}/severus"
  229. #######################################
  230. # Straglr configuration
  231. #######################################
  232. # Path to Straglr executable.
  233. straglr_bin = "/home/t_steimle/.conda/envs/straglr_env/bin/straglr.py"
  234. # Path to STR loci BED file for Straglr.
  235. #
  236. # RepeatMasker Simple_repeat
  237. straglr_loci_bed = "/home/t_steimle/ref/hs1/simple_repeat_ucsc_hs1.bed"
  238. # Minimum allele size difference in bp to report as changed between normal and tumoral
  239. straglr_min_size_diff = 4
  240. # Minimum read support required for an allele to be considered for
  241. # change between normal and tumoral
  242. straglr_min_support_diff = 2
  243. # Minimum read support for STR genotyping.
  244. straglr_min_support = 2
  245. # Minimum cluster size for STR detection.
  246. straglr_min_cluster_size = 2
  247. # Whether to genotype in size mode.
  248. straglr_genotype_in_size = true
  249. # Template for paired Straglr output directory.
  250. #
  251. # Placeholders: `{result_dir}`, `{id}`.
  252. straglr_output_dir = "{result_dir}/{id}/diag/straglr"
  253. # Template for solo Straglr output directory.
  254. #
  255. # Placeholders: `{result_dir}`, `{id}`, `{time}`.
  256. straglr_solo_output_dir = "{result_dir}/{id}/{time}/straglr"
  257. # Force Straglr recomputation.
  258. straglr_force = false
  259. #######################################
  260. # Marlin
  261. #######################################
  262. marlin_bed = "/home/t_steimle/ref/hs1/marlin_v1.probes_t2t.bed"
  263. #######################################
  264. # Echtvar
  265. #######################################
  266. echtvar_bin = "/home/t_steimle/somatic_pipe_tools/echtvar"
  267. echtvar_sources = [
  268. "/home/t_steimle/ref/hs1/gnomAD_4-2022_10-gnomad.echtvar.zip",
  269. "/home/t_steimle/ref/hs1/CosmicCodingMuts.echtvar.zip"
  270. ]
  271. #######################################
  272. # Bcftools configuration
  273. #######################################
  274. # Path to longphase binary.
  275. bcftools_bin = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/bcftools"
  276. # Threads for longphase.
  277. bcftools_threads = 30
  278. #######################################
  279. # Longphase configuration
  280. #######################################
  281. # Path to longphase binary.
  282. longphase_bin = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/longphase_linux-x64"
  283. # Threads for longphase.
  284. longphase_threads = 20
  285. # Threads for longphase modcall step.
  286. # limit memory usage here
  287. longphase_modcall_threads = 4
  288. # Force longphase recomputation (haplotagging/phasing).
  289. longphase_force = false
  290. # Longphase modcall VCF template.
  291. # {result_dir}, {id}, {time}
  292. longphase_modcall_vcf = "{result_dir}/{id}/{time}/5mC_5hmC/{id}_{time}_5mC_5hmC_modcall.vcf.gz"
  293. #######################################
  294. # Modkit configuration
  295. #######################################
  296. # Path to modkit binary.
  297. modkit_bin = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/modkit_latest/modkit"
  298. # Threads for `modkit summary`.
  299. modkit_summary_threads = 40
  300. # Modkit summary file template.
  301. # {result_dir}, {id}, {time}
  302. modkit_summary_file = "{result_dir}/{id}/{time}/{id}_{time}_5mC_5hmC_summary.txt"
  303. #######################################
  304. # Nanomonsv configuration
  305. #######################################
  306. # Path to nanomonsv binary.
  307. nanomonsv_bin = "/home/t_steimle/.conda/envs/nanomonsv_env/bin/nanomonsv"
  308. # Paired nanomonsv output directory template.
  309. # {result_dir}, {id}, {time}
  310. nanomonsv_output_dir = "{result_dir}/{id}/{time}/nanomonsv"
  311. # Force nanomonsv recomputation.
  312. nanomonsv_force = false
  313. # Threads for nanomonsv.
  314. nanomonsv_threads = 40
  315. # Paired nanomonsv PASSED VCF template.
  316. # {output_dir}, {id}
  317. nanomonsv_passed_vcf = "{output_dir}/{id}_diag_nanomonsv_PASSED.vcf.gz"
  318. # Solo nanomonsv output directory template.
  319. # {result_dir}, {id}, {time}
  320. nanomonsv_solo_output_dir = "{result_dir}/{id}/{time}/nanomonsv-solo"
  321. # Solo nanomonsv PASSED VCF template.
  322. # {output_dir}, {id}, {time}
  323. nanomonsv_solo_passed_vcf = "{output_dir}/{id}_{time}_nanomonsv-solo_PASSED.vcf.gz"
  324. # Path to simple repeat BED file for nanomonsv.
  325. # https://github.com/friend1ws/nanomonsv
  326. # Warning TBI index should exists
  327. nanomonsv_simple_repeat_bed = "/home/t_steimle/ref/hs1/human_chm13v2.0_simpleRepeat.bed.gz"
  328. #######################################
  329. # PromethION metadata
  330. #######################################
  331. # Directory containing PromethION run metadata.
  332. promethion_runs_metadata_dir = "/data/promethion-runs-metadata"
  333. # JSON file mapping flowcell IDs / runs for Pandora.
  334. promethion_runs_input = "/data/pandora-flowcell-id.json"
  335. #######################################
  336. # VEP configuration
  337. #######################################
  338. # Path to VEP singularity image
  339. vep_image = "/home/t_steimle/somatic_pipe_tools/vep_latest.sif"
  340. # Path to the VEP cache directory
  341. vep_cache_dir = "/home/t_steimle/ref/hs1/vepcache"
  342. # Path to VEP sorted GFF
  343. vep_gff = "/home/t_steimle/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_sorted.gff3.gz"
  344. #######################################
  345. # Alignment / basecalling (Dorado)
  346. #######################################
  347. [align]
  348. # Path to Dorado binary.
  349. dorado_bin = "/mnt/beegfs02/scratch/t_steimle/tools/dorado-latest-linux-x64/bin/dorado"
  350. # Dorado basecalling arguments (device, model, modifications…).
  351. dorado_basecall_arg = "-x 'cuda:all' sup,5mC_5hmC"
  352. # Should dorado re-align after demux ?
  353. dorado_should_realign = false
  354. # Dorado aligner threads number
  355. dorado_aligner_threads = 10
  356. # Reference FASTA used for alignment.
  357. ref_fa = "/mnt/beegfs02/scratch/t_steimle/ref/hs1/chm13v2.0.fa"
  358. # Minimap2 index used for alignment.
  359. ref_mmi = ""
  360. # Samtools bin
  361. samtools_bin = "/mnt/beegfs02/scratch/t_steimle/tools/samtools"
  362. # Threads for `samtools view`.
  363. samtools_view_threads = 10
  364. # Threads for `samtools sort`.
  365. samtools_sort_threads = 20
  366. # Threads for `samtools merge`.
  367. samtools_merge_threads = 40
  368. # Threads for `samtools split`.
  369. samtools_split_threads = 20