pandora-config.example.toml 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. # Pandora configuration template
  2. #######################################
  3. # General filesystem layout / I/O
  4. #######################################
  5. # Directory where POD / run description files are located.
  6. pod_dir = "/data/run_data"
  7. # Root directory where all results will be written.
  8. result_dir = "/mnt/beegfs02/scratch/t_steimle/data/wgs"
  9. # Temporary directory.
  10. tmp_dir = "/mnt/beegfs02/scratch/t_steimle/tmp"
  11. # Should use Slurm as runner
  12. slurm_runner = true
  13. # Run cache directory.
  14. run_cache_dir = "/home/t_steimle/data/prom_runs"
  15. # Software threads
  16. threads = 5
  17. # Singularity bin
  18. singularity_bin = "module load singularity-ce && singularity"
  19. # Temporary directory used when unarchiving input data.
  20. unarchive_tmp_dir = "/data/unarchived"
  21. # Maximum memory available for dockerized tools, in GiB.
  22. docker_max_memory_go = 400
  23. # Path to the SQLite database of processed cases.
  24. db_cases_path = "/data/cases.sqlite"
  25. # Path to the conda activation script.
  26. conda_sh = "/mnt/beegfs02/software/recherche/miniconda/25.1.1/etc/profile.d/conda.sh"
  27. #######################################
  28. # Reference genome & annotations
  29. #######################################
  30. # Reference FASTA used throughout the pipeline.
  31. reference = "/home/t_steimle/ref/hs1/chm13v2.0.fa"
  32. # Short reference name used in filenames.
  33. reference_name = "hs1"
  34. # Pseudoautosomal regions (PARs) BED file.
  35. pseudoautosomal_regions_bed = "/home/t_steimle/ref/hs1/chm13v2.0_PAR.bed"
  36. # Sequence dictionary (.dict) for the reference.
  37. dict_file = "/data/ref/hs1/chm13v2.0.dict"
  38. # RefSeq GFF3 annotation (sorted/indexed).
  39. refseq_gff = "/data/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_sorted.gff3.gz"
  40. # Template for mask BED file (low-quality / filtered regions).
  41. # {result_dir} -> global result directory
  42. # {id} -> case identifier
  43. mask_bed = "{result_dir}/{id}/diag/mask.bed"
  44. # BED file with early-replicating regions.
  45. early_bed = "/data/ref/hs1/replication_early_25_hs1.bed"
  46. # BED file with late-replicating regions.
  47. late_bed = "/data/ref/hs1/replication_late_75_hs1.bed"
  48. # BED file with CpG coordinates.
  49. cpg_bed = "/data/ref/hs1/hs1/hs1_CpG.bed"
  50. # Panels of interest: [ [name, bed_path], ... ]
  51. panels = [
  52. ["OncoT", "/data/ref/hs1/V1_V2_V3_V4_V5_intersect_targets_hs1_uniq.bed"],
  53. ["variable_chips","/data/ref/hs1/top_1500_sd_pos.bed"],
  54. ]
  55. #######################################
  56. # Sample naming / BAM handling
  57. #######################################
  58. # Tumor sample label (used in paths & filenames).
  59. tumoral_name = "diag"
  60. # Normal sample label.
  61. normal_name = "norm"
  62. # BAM tag name used for haplotagged reads.
  63. haplotagged_bam_tag_name = "HP"
  64. # Minimum MAPQ for reads kept during BAM filtering.
  65. bam_min_mapq = 40
  66. # Threads for BAM-level operations (view/sort/index…).
  67. bam_n_threads = 150
  68. # Number of reads sampled for BAM composition estimation.
  69. bam_composition_sample_size = 20000
  70. #######################################
  71. # Coverage counting / somatic-scan
  72. #######################################
  73. # Name of directory (under each sample dir) where counts are stored.
  74. count_dir_name = "counts"
  75. # Bin size (bp) for count files.
  76. count_bin_size = 1000
  77. # Number of chunks used to split contigs for counting.
  78. count_n_chunks = 1000
  79. # Force recomputation of counting even if outputs exist.
  80. somatic_scan_force = false
  81. #######################################
  82. # Somatic pipeline global settings
  83. #######################################
  84. # Force recomputation of the entire somatic pipeline.
  85. somatic_pipe_force = true
  86. # Default thread count for heavy tools.
  87. somatic_pipe_threads = 150
  88. # Template for somatic pipeline statistics directory.
  89. # {result_dir}, {id}
  90. somatic_pipe_stats = "{result_dir}/{id}/diag/somatic_pipe_stats"
  91. #######################################
  92. # Filtering / QC thresholds
  93. #######################################
  94. # Minimum depth in constitutional sample to consider site evaluable.
  95. somatic_min_constit_depth = 5
  96. # Maximum allowed ALT count in constitutional sample for a somatic call.
  97. somatic_max_alt_constit = 1
  98. # Window size (bp) for sequence entropy around variants.
  99. entropy_seq_len = 10
  100. # Minimum Shannon entropy threshold.
  101. min_shannon_entropy = 1.0
  102. # Max depth considered "low quality".
  103. max_depth_low_quality = 20
  104. # Min depth considered "high quality".
  105. min_high_quality_depth = 14
  106. # Minimum number of callers required to keep a variant.
  107. min_n_callers = 1
  108. #######################################
  109. # DeepVariant configuration
  110. #######################################
  111. # DeepVariant output directory template.
  112. # {result_dir}, {id}, {time}
  113. deepvariant_output_dir = "{result_dir}/{id}/{time}/DeepVariant"
  114. # Threads for DeepVariant.
  115. deepvariant_threads = 20
  116. # DeepVariant singularity image path
  117. deepvariant_image = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/deepvariant_latest.sif"
  118. # DeepVariant model type (e.g. ONT).
  119. deepvariant_model_type = "ONT_R104"
  120. # Force DeepVariant recomputation.
  121. deepvariant_force = false
  122. #######################################
  123. # DeepSomatic configuration
  124. #######################################
  125. # DeepSomatic output directory template.
  126. # {result_dir}, {id}, {time}
  127. deepsomatic_output_dir = "{result_dir}/{id}/{time}/DeepSomatic"
  128. # Threads for DeepSomatic.
  129. deepsomatic_threads = 20
  130. # DeepVariant singularity image path
  131. deepsomatic_image = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/deepsomatic_latest.sif"
  132. # DeepSomatic model type.
  133. deepsomatic_model_type = "ONT"
  134. # Force DeepSomatic recomputation.
  135. deepsomatic_force = false
  136. #######################################
  137. # ClairS configuration
  138. #######################################
  139. # Threads for ClairS.
  140. clairs_threads = 40
  141. # ClairS docker tag.
  142. clairs_image = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/clairs_latest.sif"
  143. # Force ClairS recomputation.
  144. clairs_force = false
  145. # Platform preset for ClairS.
  146. clairs_platform = "ont_r10_dorado_sup_5khz_ssrs"
  147. # ClairS output directory template.
  148. # {result_dir}, {id}
  149. clairs_output_dir = "{result_dir}/{id}/diag/ClairS"
  150. #######################################
  151. # Savana configuration
  152. #######################################
  153. # Savana binary (name or full path).
  154. savana_bin = "/home/t_steimle/.conda/envs/savana_env/bin/savana"
  155. # Threads for Savana.
  156. savana_threads = 40
  157. # Savana output directory template.
  158. # {result_dir}, {id}
  159. savana_output_dir = "{result_dir}/{id}/diag/savana"
  160. # Savana copy-number output file.
  161. # {output_dir}, {id}, {reference_name}, {haplotagged_bam_tag_name}
  162. savana_copy_number = "{output_dir}/{id}_diag_{reference_name}_{haplotagged_bam_tag_name}_segmented_absolute_copy_number.tsv"
  163. # Savana raw read counts file.
  164. savana_read_counts = "{output_dir}/{id}_diag_{reference_name}_{haplotagged_bam_tag_name}_raw_read_counts.tsv"
  165. # Savana passed VCF.
  166. savana_passed_vcf = "{output_dir}/{id}_diag_savana_PASSED.vcf.gz"
  167. # Force Savana recomputation.
  168. savana_force = false
  169. # Constitutional phased VCF template.
  170. # {result_dir}, {id}
  171. germline_phased_vcf = "{result_dir}/{id}/diag/{id}_variants_constit_phased.vcf.gz"
  172. #######################################
  173. # Severus configuration
  174. #######################################
  175. # Path to Severus script.
  176. severus_bin = " /home/t_steimle/somatic_pipe_tools/Severus/severus.py"
  177. # Force Severus recomputation.
  178. severus_force = false
  179. # Threads for Severus.
  180. severus_threads = 32
  181. # VNTRs BED for Severus.
  182. vntrs_bed = "/home/t_steimle/ref/hs1/vntrs_chm13.bed"
  183. # Path of the Severus panel of normals.
  184. severus_pon = "/home/t_steimle/ref/hs1/PoN_1000G_chm13.tsv.gz"
  185. # Paired Severus output directory.
  186. # {result_dir}, {id}
  187. severus_output_dir = "{result_dir}/{id}/diag/severus"
  188. # Solo Severus output directory.
  189. # {result_dir}, {id}, {time}
  190. severus_solo_output_dir = "{result_dir}/{id}/{time}/severus"
  191. #######################################
  192. # Straglr configuration
  193. #######################################
  194. # Path to Straglr executable.
  195. straglr_bin = "/home/t_steimle/.conda/envs/straglr_env/bin/straglr.py"
  196. # Path to STR loci BED file for Straglr.
  197. #
  198. # RepeatMasker Simple_repeat
  199. straglr_loci_bed = "/home/t_steimle/ref/hs1/simple_repeat_ucsc_hs1.bed"
  200. # Minimum allele size difference in bp to report as changed between normal and tumoral
  201. straglr_min_size_diff = 4
  202. # Minimum read support required for an allele to be considered for
  203. # change between normal and tumoral
  204. straglr_min_support_diff = 2
  205. # Minimum read support for STR genotyping.
  206. straglr_min_support = 2
  207. # Minimum cluster size for STR detection.
  208. straglr_min_cluster_size = 2
  209. # Whether to genotype in size mode.
  210. straglr_genotype_in_size = true
  211. # Template for paired Straglr output directory.
  212. #
  213. # Placeholders: `{result_dir}`, `{id}`.
  214. straglr_output_dir = "{result_dir}/{id}/diag/straglr"
  215. # Template for solo Straglr output directory.
  216. #
  217. # Placeholders: `{result_dir}`, `{id}`, `{time}`.
  218. straglr_solo_output_dir = "{result_dir}/{id}/{time}/straglr"
  219. # Force Straglr recomputation.
  220. straglr_force = false
  221. #######################################
  222. # Marlin
  223. #######################################
  224. marlin_bed = "/home/t_steimle/ref/hs1/marlin_v1.probes_t2t.bed"
  225. #######################################
  226. # Bcftools configuration
  227. #######################################
  228. # Path to longphase binary.
  229. bcftools_bin = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/bcftools"
  230. # Threads for longphase.
  231. bcftools_threads = 30
  232. #######################################
  233. # Longphase configuration
  234. #######################################
  235. # Path to longphase binary.
  236. longphase_bin = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/longphase_linux-x64"
  237. # Threads for longphase.
  238. longphase_threads = 20
  239. # Threads for longphase modcall step.
  240. # limit memory usage here
  241. longphase_modcall_threads = 6
  242. # Force longphase recomputation (haplotagging/phasing).
  243. longphase_force = false
  244. # Longphase modcall VCF template.
  245. # {result_dir}, {id}, {time}
  246. longphase_modcall_vcf = "{result_dir}/{id}/{time}/5mC_5hmC/{id}_{time}_5mC_5hmC_modcall.vcf.gz"
  247. #######################################
  248. # Modkit configuration
  249. #######################################
  250. # Path to modkit binary.
  251. modkit_bin = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/modkit_latest/modkit"
  252. # Threads for `modkit summary`.
  253. modkit_summary_threads = 40
  254. # Modkit summary file template.
  255. # {result_dir}, {id}, {time}
  256. modkit_summary_file = "{result_dir}/{id}/{time}/{id}_{time}_5mC_5hmC_summary.txt"
  257. #######################################
  258. # Nanomonsv configuration
  259. #######################################
  260. # Path to nanomonsv binary.
  261. nanomonsv_bin = "/home/t_steimle/.conda/envs/nanomonsv_env/bin/nanomonsv"
  262. # Paired nanomonsv output directory template.
  263. # {result_dir}, {id}, {time}
  264. nanomonsv_output_dir = "{result_dir}/{id}/{time}/nanomonsv"
  265. # Force nanomonsv recomputation.
  266. nanomonsv_force = false
  267. # Threads for nanomonsv.
  268. nanomonsv_threads = 40
  269. # Paired nanomonsv PASSED VCF template.
  270. # {output_dir}, {id}
  271. nanomonsv_passed_vcf = "{output_dir}/{id}_diag_nanomonsv_PASSED.vcf.gz"
  272. # Solo nanomonsv output directory template.
  273. # {result_dir}, {id}, {time}
  274. nanomonsv_solo_output_dir = "{result_dir}/{id}/{time}/nanomonsv-solo"
  275. # Solo nanomonsv PASSED VCF template.
  276. # {output_dir}, {id}, {time}
  277. nanomonsv_solo_passed_vcf = "{output_dir}/{id}_{time}_nanomonsv-solo_PASSED.vcf.gz"
  278. # Path to simple repeat BED file for nanomonsv.
  279. # https://github.com/friend1ws/nanomonsv
  280. # Warning TBI index should exists
  281. nanomonsv_simple_repeat_bed = "/home/t_steimle/ref/hs1/human_chm13v2.0_simpleRepeat.bed.gz"
  282. #######################################
  283. # PromethION metadata
  284. #######################################
  285. # Directory containing PromethION run metadata.
  286. promethion_runs_metadata_dir = "/data/promethion-runs-metadata"
  287. # JSON file mapping flowcell IDs / runs for Pandora.
  288. promethion_runs_input = "/data/pandora-flowcell-id.json"
  289. #######################################
  290. # Alignment / basecalling (Dorado)
  291. #######################################
  292. [align]
  293. # Path to Dorado binary.
  294. dorado_bin = "/mnt/beegfs02/scratch/t_steimle/tools/dorado-latest-linux-x64/bin/dorado"
  295. # Dorado basecalling arguments (device, model, modifications…).
  296. dorado_basecall_arg = "-x 'cuda:all' sup,5mC_5hmC"
  297. # Should dorado re-align after demux ?
  298. dorado_should_realign = false
  299. # Dorado aligner threads number
  300. dorado_aligner_threads = 10
  301. # Reference FASTA used for alignment.
  302. ref_fa = "/mnt/beegfs02/scratch/t_steimle/ref/hs1/chm13v2.0.fa"
  303. # Minimap2 index used for alignment.
  304. ref_mmi = ""
  305. # Samtools bin
  306. samtools_bin = "/mnt/beegfs02/scratch/t_steimle/tools/samtools"
  307. # Threads for `samtools view`.
  308. samtools_view_threads = 10
  309. # Threads for `samtools sort`.
  310. samtools_sort_threads = 20
  311. # Threads for `samtools merge`.
  312. samtools_merge_threads = 40
  313. # Threads for `samtools split`.
  314. samtools_split_threads = 20