pandora-config.example.toml 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693
  1. # Pandora configuration template
  2. #######################################
  3. # General filesystem layout / I/O
  4. #######################################
  5. # Root directory where all results will be written.
  6. result_dir = "/mnt/beegfs02/scratch/t_steimle/data/wgs"
  7. # Temporary directory.
  8. tmp_dir = "/mnt/beegfs02/scratch/t_steimle/tmp"
  9. # Should use Slurm as runner
  10. slurm_runner = true
  11. # Slurm max parallel jobs
  12. slurm_max_par = 20
  13. # Run cache directory.
  14. run_cache_dir = "/home/t_steimle/data/prom_runs"
  15. # Software threads
  16. threads = 5
  17. # Singularity bin
  18. singularity_bin = "module load singularity-ce && singularity"
  19. # Path to the conda activation script.
  20. conda_sh = "/mnt/beegfs02/software/recherche/miniconda/25.1.1/etc/profile.d/conda.sh"
  21. #######################################
  22. # Reference genome & annotations
  23. #######################################
  24. # Reference FASTA used throughout the pipeline.
  25. reference = "/home/t_steimle/ref/hs1/chm13v2.0.fa"
  26. # Short reference name used in filenames.
  27. reference_name = "hs1"
  28. # Pseudoautosomal regions (PARs) BED file.
  29. pseudoautosomal_regions_bed = "/home/t_steimle/ref/hs1/chm13v2.0_PAR.bed"
  30. # Sequence dictionary (.dict) for the reference.
  31. dict_file = "/home/t_steimle/ref/hs1/chm13v2.0.dict"
  32. # RefSeq GFF3 annotation (sorted/bgzipped/indexed).
  33. refseq_gff = "/home/t_steimle/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_sorted.gff3.gz"
  34. refseq_gtf = "/home/t_steimle/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_sorted.gtf"
  35. # dbSNP vcf.gz file (should be indexed)
  36. db_snp = "/home/t_steimle/ref/hs1/chm13v2.0_dbSNPv155.vcf.gz"
  37. # BED with genes on the 4th column should be sorted
  38. genes_bed = "/home/t_steimle/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_Genes.bed"
  39. # Cytobands BED file
  40. cytobands_bed = "/home/t_steimle/ref/hs1/chm13v2.0_cytobands_allchrs.bed"
  41. # Chromosome alias file
  42. # ex: https://hgdownload.soe.ucsc.edu/hubs/GCA/009/914/755/GCA_009914755.4/GCA_009914755.4.chromAlias.txt
  43. chromosomes_alias = "/home/t_steimle/ref/hs1/GCA_009914755.4.chromAlias.txt"
  44. # Template for mask BED file (low-quality / filtered regions).
  45. # {result_dir} -> global result directory
  46. # {id} -> case identifier
  47. mask_bed = "{result_dir}/{id}/diag/mask.bed"
  48. # Panels of interest: [ [name, bed_path], ... ]
  49. panels = [
  50. ["CM", "/home/t_steimle/ref/hs1/panel_cm_hs1.bed"],
  51. ]
  52. repeats_bed = "/home/t_steimle/ref/hs1/all_repeats_chm13_final.bed"
  53. #######################################
  54. # Sample naming / BAM handling
  55. #######################################
  56. # Tumor sample label (used in paths & filenames).
  57. tumoral_name = "diag"
  58. # Normal sample label.
  59. normal_name = "norm"
  60. # BAM tag name used for haplotagged reads.
  61. haplotagged_bam_tag_name = "HP"
  62. # Minimum MAPQ for reads kept during BAM filtering.
  63. bam_min_mapq = 40
  64. # Number of threads for hts BAM reader decrompression (should be adapted to IO speed).
  65. bam_n_threads = 4
  66. # Number of reads sampled for BAM composition estimation.
  67. bam_composition_sample_size = 20000
  68. #######################################
  69. # Coverage counting / somatic-scan
  70. #######################################
  71. # Name of directory (under each sample dir) where counts are stored.
  72. count_dir_name = "counts"
  73. # Bin size (bp) for count files.
  74. count_bin_size = 1000
  75. # Number of chunks used to split contigs for counting.
  76. count_n_chunks = 1000
  77. # Force recomputation of counting even if outputs exist.
  78. somatic_scan_force = false
  79. #######################################
  80. # Somatic pipeline global settings
  81. #######################################
  82. # Force recomputation of the entire somatic pipeline.
  83. somatic_pipe_force = true
  84. # Default thread count for heavy tools.
  85. somatic_pipe_threads = 15
  86. # Template for somatic pipeline statistics directory.
  87. # {result_dir}, {id}
  88. somatic_pipe_stats = "{result_dir}/{id}/diag/somatic_pipe_stats"
  89. #######################################
  90. # Filtering / QC thresholds
  91. #######################################
  92. # Minimum depth in constitutional sample to consider site evaluable.
  93. somatic_min_constit_depth = 5
  94. # Maximum allowed ALT count in constitutional sample for a somatic call.
  95. somatic_max_alt_constit = 1
  96. # Window size (bp) for sequence entropy around variants.
  97. entropy_seq_len = 10
  98. # Minimum Shannon entropy threshold.
  99. min_shannon_entropy = 1.0
  100. # Max depth considered "low quality".
  101. max_depth_low_quality = 20
  102. # Min depth considered "high quality".
  103. min_high_quality_depth = 14
  104. # Minimum number of callers required to keep a variant.
  105. min_n_callers = 1
  106. #######################################
  107. # DeepVariant configuration
  108. #######################################
  109. # DeepVariant output directory template.
  110. # {result_dir}, {id}, {time}
  111. deepvariant_output_dir = "{result_dir}/{id}/{time}/DeepVariant"
  112. # Threads for DeepVariant.
  113. deepvariant_threads = 20
  114. # DeepVariant singularity image path
  115. deepvariant_image = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/deepvariant_latest.sif"
  116. # DeepVariant model type (e.g. ONT).
  117. deepvariant_model_type = "ONT_R104"
  118. # Force DeepVariant recomputation.
  119. deepvariant_force = false
  120. #######################################
  121. # DeepSomatic configuration
  122. #######################################
  123. # DeepSomatic output directory template.
  124. # {result_dir}, {id}, {time}
  125. deepsomatic_output_dir = "{result_dir}/{id}/{time}/DeepSomatic"
  126. # Threads for DeepSomatic.
  127. deepsomatic_threads = 20
  128. # DeepVariant singularity image path
  129. deepsomatic_image = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/deepsomatic_latest.sif"
  130. # DeepSomatic model type.
  131. deepsomatic_model_type = "ONT"
  132. # Force DeepSomatic recomputation.
  133. deepsomatic_force = false
  134. #######################################
  135. # ClairS configuration
  136. #######################################
  137. # Threads for ClairS.
  138. clairs_threads = 10
  139. # ClairS docker tag.
  140. clairs_image = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/clairs_latest.sif"
  141. # Force ClairS recomputation.
  142. clairs_force = false
  143. # Keep per-part directories after chunked ClairS merging.
  144. # Set to true to retain intermediate VCFs (raw SNV/indel/germline) for reanalysis.
  145. clairs_keep_parts = false
  146. # Platform preset for ClairS.
  147. clairs_platform = "ont_r10_dorado_sup_5khz_ssrs"
  148. # ClairS output directory template.
  149. # {result_dir}, {id}
  150. clairs_output_dir = "{result_dir}/{id}/diag/ClairS"
  151. #######################################
  152. # GATK configuration
  153. #######################################
  154. # Path to the GATK container image (Singularity/Apptainer .sif, or a docker:// URI
  155. # if you pull at runtime).
  156. #
  157. # Examples:
  158. # - "/containers/gatk_4.6.0.0.sif"
  159. gatk_image = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/gatk_latest.sif"
  160. # Path to a BED file restricting analysis to target regions (0-based, half-open).
  161. # Must match contig naming of the reference/BAMs (e.g. "chr9" vs "9").
  162. #
  163. # Used for targeted calling (e.g. Mutect2 `-L` or region chunking).
  164. gatk_bed_path = "/home/t_steimle/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_Genes.bed"
  165. # Local single-run CPU threads (non-Slurm execution).
  166. # Used for full-run Mutect2 or other GATK tools.
  167. # Typically forwarded to:
  168. # - `--native-pair-hmm-threads`
  169. # - `--reader-threads`
  170. # Should match available cores on the node.
  171. gatk_threads = 100
  172. # Local single-run memory limit in GB.
  173. # Used to size Java heap:
  174. # `--java-options "-Xmx{mem}g"`
  175. # Should leave headroom for native memory (PairHMM, buffers).
  176. gatk_mem_gb = 120
  177. # Per-chunk CPU threads when running chunked under Slurm.
  178. # Applies to each parallel job independently.
  179. gatk_slurm_threads = 8
  180. # Per-chunk memory (GB) when running under Slurm.
  181. # Used both for scheduler request and Java heap sizing per chunk.
  182. # Must be sufficient for interval-restricted Mutect2.
  183. gatk_slurm_mem_gb = 32
  184. # If true, force re-run of GATK steps by removing or ignoring existing outputs.
  185. gatk_force = false
  186. # GATK output directory template.
  187. # {result_dir}, {id}
  188. gatk_output_dir = "{result_dir}/{id}/{tumoral_name}/GATK"
  189. # GATK passed VCF.
  190. gatk_passed_vcf = "{output_dir}/{id}_{tumoral_name}_{reference_name}_GATK_PASSED.vcf.gz"
  191. #######################################
  192. # Savana configuration
  193. #######################################
  194. # Savana binary (name or full path).
  195. savana_bin = "/home/t_steimle/.conda/envs/savana_env/bin/savana"
  196. # Threads for Savana.
  197. savana_threads = 40
  198. # RAM capacity used for running Savana with slurm (in GB).
  199. savana_mem = 110
  200. # Savana output directory template.
  201. # {result_dir}, {id}
  202. savana_output_dir = "{result_dir}/{id}/diag/savana"
  203. # Savana copy-number output file.
  204. # {output_dir}, {id}, {reference_name}, {haplotagged_bam_tag_name}
  205. savana_copy_number = "{output_dir}/{id}_diag_{reference_name}_{haplotagged_bam_tag_name}_segmented_absolute_copy_number.tsv"
  206. # Savana raw read counts file.
  207. savana_read_counts = "{output_dir}/{id}_diag_{reference_name}_{haplotagged_bam_tag_name}_raw_read_counts.tsv"
  208. # Savana passed VCF.
  209. savana_passed_vcf = "{output_dir}/{id}_diag_savana_PASSED.vcf.gz"
  210. # Force Savana recomputation.
  211. savana_force = false
  212. # Constitutional phased VCF template.
  213. # {result_dir}, {id}
  214. germline_phased_vcf = "{result_dir}/{id}/diag/{id}_variants_constit_phased.vcf.gz"
  215. #######################################
  216. # Severus configuration
  217. #######################################
  218. # Path to Severus script.
  219. severus_bin = " /home/t_steimle/somatic_pipe_tools/Severus/severus.py"
  220. # Force Severus recomputation.
  221. severus_force = false
  222. # Threads for Severus.
  223. severus_threads = 32
  224. # VNTRs BED for Severus.
  225. vntrs_bed = "/home/t_steimle/ref/hs1/vntrs_chm13.bed"
  226. # Path of the Severus panel of normals.
  227. severus_pon = "/home/t_steimle/ref/hs1/PoN_1000G_chm13.tsv.gz"
  228. # Paired Severus output directory.
  229. # {result_dir}, {id}
  230. severus_output_dir = "{result_dir}/{id}/diag/severus"
  231. # Solo Severus output directory.
  232. # {result_dir}, {id}, {time}
  233. severus_solo_output_dir = "{result_dir}/{id}/{time}/severus"
  234. #######################################
  235. # Straglr configuration
  236. #######################################
  237. # Path to Straglr executable.
  238. straglr_bin = "/home/t_steimle/.conda/envs/straglr_env/bin/straglr.py"
  239. # Path to STR loci BED file for Straglr.
  240. #
  241. # RepeatMasker Simple_repeat
  242. straglr_loci_bed = "/home/t_steimle/ref/hs1/simple_repeat_ucsc_hs1.bed"
  243. # Minimum allele size difference in bp to report as changed between normal and tumoral
  244. straglr_min_size_diff = 4
  245. # Minimum read support required for an allele to be considered for
  246. # change between normal and tumoral
  247. straglr_min_support_diff = 2
  248. # Minimum read support for STR genotyping.
  249. straglr_min_support = 2
  250. # Minimum cluster size for STR detection.
  251. straglr_min_cluster_size = 2
  252. # Whether to genotype in size mode.
  253. straglr_genotype_in_size = true
  254. # Template for paired Straglr output directory.
  255. #
  256. # Placeholders: `{result_dir}`, `{id}`.
  257. straglr_output_dir = "{result_dir}/{id}/diag/straglr"
  258. # Template for solo Straglr output directory.
  259. #
  260. # Placeholders: `{result_dir}`, `{id}`, `{time}`.
  261. straglr_solo_output_dir = "{result_dir}/{id}/{time}/straglr"
  262. # Force Straglr recomputation.
  263. straglr_force = false
  264. #######################################
  265. # CoRAL
  266. #######################################
  267. # Number of CPU threads for the CoRAL reconstruction job.
  268. #
  269. # CoRAL is CPU-bound during breakpoint graph construction and quadratic
  270. # programming cycle extraction. 8–16 threads is sufficient for most
  271. # focal amplification cases; increase for highly complex ecDNA with
  272. # many amplicons.
  273. coral_threads = 16
  274. # CoRAL cloned dir (required...)
  275. coral_dir = "/home/t_steimle/somatic_pipe_tools/CoRAL"
  276. # Memory allocation for the CoRAL SLURM job (e.g. `"32G"`).
  277. #
  278. # Memory usage scales with amplicon complexity and BAM depth.
  279. # 32G is sufficient for typical WGS at 30–60×; increase to 64G
  280. # for highly rearranged genomes (chromothripsis, high ecDNA copy number).
  281. coral_slurm_mem = "32G"
  282. # SLURM partition to use for CoRAL jobs.
  283. #
  284. # CoRAL requires only CPU — do not submit to a GPU partition.
  285. coral_slurm_partition = "shortq"
  286. # Minimum copy number gain threshold for a segment to be considered
  287. # a focal amplification seed (CoRAL `--gain`).
  288. #
  289. # CoRAL applies this threshold to the raw absolute CN values from the
  290. # cn_segs BED — do NOT pre-correct for purity or ploidy, as this may
  291. # cause entire chromosome arms to exceed the threshold in aneuploid tumours.
  292. #
  293. # Default in CoRAL is 6.0 (diploid assumption). For hyperdiploid tumours
  294. # (e.g. hyperploid ALL, CML blast crisis) consider lowering to 4.0–5.0.
  295. coral_seed_gain = 6.0
  296. # Minimum size in base pairs for a CN segment to qualify as a seed
  297. # (CoRAL `--min-seed-size`).
  298. #
  299. # Segments below this size are discarded even if they exceed `coral_seed_gain`.
  300. # Two merged proximal segments (see `coral_max_seg_gap`) are evaluated
  301. # against this threshold as a single combined interval.
  302. #
  303. # Default in CoRAL is 100000 (100 kb). Reducing this risks including
  304. # artefactual short high-copy segments; increasing it misses small focal
  305. # amplifications (e.g. narrow EGFR or MYC peaks).
  306. coral_min_seed_size = 100000
  307. # Maximum gap in base pairs between two proximal CN segments to allow
  308. # merging into a single seed candidate (CoRAL `--max-seg-gap`).
  309. #
  310. # If two amplified segments are separated by a gap smaller than this value,
  311. # they are merged before the `coral_min_seed_size` filter is applied.
  312. # This handles cases where a single focal amplicon is split by a low-coverage
  313. # or diploid bin.
  314. #
  315. # Default in CoRAL is 300000 (300 kb). For haematological cancers with
  316. # compact focal amplifications (e.g. NUP214::ABL1, ABL1 amplification in
  317. # CML blast crisis) a tighter value such as 100000 reduces spurious merging
  318. # of adjacent independent amplicons.
  319. coral_max_seg_gap = 100000
  320. #######################################
  321. # Flye
  322. #######################################
  323. # Path to the Flye binary. Can be a python-prefixed call if Flye is not
  324. # installed as a standalone executable.
  325. flye_bin = "/usr/bin/python /home/t_steimle/somatic_pipe_tools/Flye/bin/flye"
  326. # Number of threads allocated to Flye. 8–16 is sufficient for local assembly
  327. # of a single locus; diminishing returns above 16.
  328. flye_threads = 12
  329. # Memory allocated to the Flye SLURM job. 16G is comfortable for local
  330. # assembly (<1 Mb target). Increase to 32G+ for larger regions.
  331. flye_slurm_mem = "16G"
  332. #######################################
  333. # Medaka
  334. #######################################
  335. # Name of the conda environment containing medaka.
  336. # Activated via conda_sh before running medaka_consensus.
  337. medaka_env = "medaka_env"
  338. # Path to the medaka_consensus binary within the conda environment.
  339. # Usually just "medaka_consensus" if the env is correctly activated.
  340. medaka_consensus_bin = "medaka_consensus"
  341. # Number of threads for medaka. Used for the minimap2 alignment step;
  342. # the neural network inference step is GPU-bound when a GPU is available.
  343. medaka_threads = 8
  344. # Memory allocated to the Medaka SLURM job. 16G is sufficient for local
  345. # polishing of a small assembly.
  346. medaka_slurm_mem = "16G"
  347. # Medaka model — MUST match the basecalling chemistry and Dorado version exactly.
  348. # Using the wrong model silently degrades polishing quality.
  349. #
  350. # Model naming: {chemistry}_{flowcell}_{speed}bps_{caller}_{version}
  351. # r1041_e82 = R10.4.1 flowcell
  352. # 400bps = 400 bps sampling rate (standard; 260bps is legacy)
  353. # sup = Dorado sup basecalling (use hac if basecalled with hac)
  354. #
  355. # Current default (medaka tools list_models): r1041_e82_400bps_sup_v5.2.0
  356. #
  357. # v5.2.0 also has dwell-time variants for improved homopolymer resolution:
  358. # r1041_e82_400bps_sup_v5.2.0_rl_lstm384_dwells — use if Dorado called with dwell times
  359. # r1041_e82_400bps_sup_v5.2.0_rl_lstm384_no_dwells — use if Dorado called without dwell times
  360. #
  361. # For R9.4.1 data use r941_min_sup_g507 (MinION) or r941_prom_sup_g507 (PromethION).
  362. # Run `medaka tools list_models` to list all available models.
  363. medaka_model = "r1041_e82_400bps_sup_v5.2.0"
  364. #######################################
  365. # Minimap2
  366. #######################################
  367. # Path to the minimap2 binary. Use a versioned path to ensure reproducibility
  368. # across pipeline runs — minimap2 output is version-sensitive.
  369. minimap2_bin = "/home/t_steimle/somatic_pipe_tools/minimap2-2.30_x64-linux/minimap2"
  370. # Number of threads for minimap2 alignment. Scales linearly up to ~16;
  371. # 16 is appropriate for read→reference alignment on a full WGS BAM.
  372. # For local assembly realignment (few hundred reads) 8 is sufficient.
  373. minimap2_threads = 16
  374. # Memory allocated to the minimap2 SLURM job.
  375. # 32G is required for read→reference alignment against a human genome
  376. # (minimap2 loads the MMI index into memory: ~14G for hg38 map-ont).
  377. # Can be reduced to 8G for contig→contig or local assembly realignment.
  378. minimap2_slurm_mem = "32G"
  379. #######################################
  380. # wtdbg2
  381. #######################################
  382. # Path to the wtdbg2.pl wrapper script.
  383. # Handles both assembly (wtdbg2) and consensus (wtpoa-cns) in one call.
  384. wtdbg2_bin = "/home/t_steimle/somatic_pipe_tools/wtdbg2/wtdbg2.pl"
  385. # Threads for wtdbg2 + wtpoa-cns. 8 is sufficient for local assembly.
  386. wtdbg2_threads = 8
  387. # Memory for SLURM. wtdbg2 is lightweight — 16G is ample for local assembly.
  388. wtdbg2_slurm_mem = "16G"
  389. #######################################
  390. # longcallD
  391. #######################################
  392. # Template for the longcallD output directory (solo and normal/tumor runs).
  393. #
  394. # Required placeholders: `{result_dir}`, `{id}`, `{time}`.
  395. longcalld_output_dir = "{result_dir}/{id}/{time}/longcallD"
  396. longcalld_bin = "/home/t_steimle/somatic_pipe_tools/longcallD-v0.0.10_x64-linux/longcallD"
  397. longcalld_threads = 10
  398. longcalld_slurm_mem = "40G"
  399. #######################################
  400. # Marlin
  401. #######################################
  402. marlin_bed = "/home/t_steimle/ref/hs1/marlin_v1.probes_t2t.bed"
  403. #######################################
  404. # Echtvar
  405. #######################################
  406. echtvar_bin = "/home/t_steimle/somatic_pipe_tools/echtvar"
  407. echtvar_sources = [
  408. "/home/t_steimle/ref/hs1/gnomAD_4-2022_10-gnomad.echtvar.zip",
  409. "/home/t_steimle/ref/hs1/CosmicCodingMuts.echtvar.zip"
  410. ]
  411. #######################################
  412. # Bcftools configuration
  413. #######################################
  414. # Path to bcftools binary.
  415. bcftools_bin = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/bcftools"
  416. # Threads for bcftools.
  417. bcftools_threads = 10
  418. #######################################
  419. # Longphase configuration
  420. #######################################
  421. # Path to longphase binary.
  422. longphase_bin = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/longphase_linux-x64"
  423. # Threads for longphase.
  424. longphase_threads = 20
  425. # Threads for longphase modcall step.
  426. # limit memory usage here
  427. longphase_modcall_threads = 4
  428. # Force longphase recomputation (haplotagging/phasing).
  429. longphase_force = false
  430. # Longphase modcall VCF template.
  431. # {result_dir}, {id}, {time}
  432. longphase_modcall_vcf = "{result_dir}/{id}/{time}/5mC_5hmC/{id}_{time}_5mC_5hmC_modcall.vcf.gz"
  433. #######################################
  434. # Modkit configuration
  435. #######################################
  436. # Path to modkit binary.
  437. modkit_bin = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/modkit_latest/modkit"
  438. # Threads for `modkit summary`.
  439. modkit_summary_threads = 40
  440. # Modkit summary file template.
  441. # {result_dir}, {id}, {time}
  442. modkit_summary_file = "{result_dir}/{id}/{time}/{id}_{time}_5mC_5hmC_summary.txt"
  443. #######################################
  444. # Nanomonsv configuration
  445. #######################################
  446. # Path to nanomonsv binary.
  447. nanomonsv_bin = "/home/t_steimle/.conda/envs/nanomonsv_env/bin/nanomonsv"
  448. # Paired nanomonsv output directory template.
  449. # {result_dir}, {id}, {time}
  450. nanomonsv_output_dir = "{result_dir}/{id}/{time}/nanomonsv"
  451. # Force nanomonsv recomputation.
  452. nanomonsv_force = false
  453. # Threads for nanomonsv.
  454. nanomonsv_threads = 40
  455. # Paired nanomonsv PASSED VCF template.
  456. # {output_dir}, {id}
  457. nanomonsv_passed_vcf = "{output_dir}/{id}_diag_nanomonsv_PASSED.vcf.gz"
  458. # Solo nanomonsv output directory template.
  459. # {result_dir}, {id}, {time}
  460. nanomonsv_solo_output_dir = "{result_dir}/{id}/{time}/nanomonsv-solo"
  461. # Solo nanomonsv PASSED VCF template.
  462. # {output_dir}, {id}, {time}
  463. nanomonsv_solo_passed_vcf = "{output_dir}/{id}_{time}_nanomonsv-solo_PASSED.vcf.gz"
  464. # Path to simple repeat BED file for nanomonsv.
  465. # https://github.com/friend1ws/nanomonsv
  466. # Warning TBI index should exists
  467. nanomonsv_simple_repeat_bed = "/home/t_steimle/ref/hs1/human_chm13v2.0_simpleRepeat.bed.gz"
  468. # Path to LINE1.chm13v2.0.bed.gz file for nanomonsv.
  469. # https://github.com/friend1ws/nanomonsv
  470. # Warning TBI index should exists
  471. nanomonsv_line1_bed = "/home/t_steimle/ref/hs1/LINE1.chm13v2.0.bed.gz"
  472. #######################################
  473. # PromethION metadata
  474. #######################################
  475. # Directory containing PromethION run metadata.
  476. promethion_runs_metadata_dir = "/data/promethion-runs-metadata"
  477. # JSON file mapping flowcell IDs / runs for Pandora.
  478. promethion_runs_input = "/data/pandora-flowcell-id.json"
  479. #######################################
  480. # VEP configuration
  481. #######################################
  482. # Path to VEP singularity image
  483. vep_image = "/home/t_steimle/somatic_pipe_tools/vep_latest.sif"
  484. # Path to the VEP cache directory
  485. vep_cache_dir = "/home/t_steimle/ref/hs1/vepcache"
  486. # Path to VEP sorted GFF
  487. vep_gff = "/home/t_steimle/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_sorted.gff3.gz"
  488. #######################################
  489. # Alignment / basecalling (Dorado)
  490. #######################################
  491. [align]
  492. # Path to Dorado binary.
  493. dorado_bin = "/mnt/beegfs02/scratch/t_steimle/tools/dorado-latest-linux-x64/bin/dorado"
  494. # Dorado basecalling arguments (device, model, modifications…).
  495. dorado_basecall_arg = "-x 'cuda:all' sup,5mC_5hmC"
  496. # Should dorado re-align after demux ?
  497. dorado_should_realign = false
  498. # Dorado aligner threads number
  499. dorado_aligner_threads = 10
  500. # Reference FASTA used for alignment.
  501. ref_fa = "/mnt/beegfs02/scratch/t_steimle/ref/hs1/chm13v2.0.fa"
  502. # Minimap2 index used for alignment.
  503. ref_mmi = ""
  504. # Samtools bin
  505. samtools_bin = "/mnt/beegfs02/scratch/t_steimle/tools/samtools"
  506. # Threads for `samtools view`.
  507. samtools_view_threads = 10
  508. # Threads for `samtools sort`.
  509. samtools_sort_threads = 20
  510. # Threads for `samtools merge`.
  511. samtools_merge_threads = 40
  512. # Threads for `samtools split`.
  513. samtools_split_threads = 20