pandora-config.example.toml 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. # Pandora configuration template
  2. #######################################
  3. # General filesystem layout / I/O
  4. #######################################
  5. # Directory where POD / run description files are located.
  6. pod_dir = "/data/run_data"
  7. # Root directory where all results will be written.
  8. result_dir = "/mnt/beegfs02/scratch/t_steimle/data/wgs"
  9. # Temporary directory used when unarchiving input data.
  10. unarchive_tmp_dir = "/data/unarchived"
  11. # Maximum memory available for dockerized tools, in GiB.
  12. docker_max_memory_go = 400
  13. # Path to the SQLite database of processed cases.
  14. db_cases_path = "/data/cases.sqlite"
  15. # Path to the conda activation script.
  16. conda_sh = "/data/miniconda3/etc/profile.d/conda.sh"
  17. #######################################
  18. # Reference genome & annotations
  19. #######################################
  20. # Reference FASTA used throughout the pipeline.
  21. reference = "/data/ref/hs1/chm13v2.0.fa"
  22. # Short reference name used in filenames.
  23. reference_name = "hs1"
  24. # Sequence dictionary (.dict) for the reference.
  25. dict_file = "/data/ref/hs1/chm13v2.0.dict"
  26. # RefSeq GFF3 annotation (sorted/indexed).
  27. refseq_gff = "/data/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_sorted.gff3.gz"
  28. # Template for mask BED file (low-quality / filtered regions).
  29. # {result_dir} -> global result directory
  30. # {id} -> case identifier
  31. mask_bed = "{result_dir}/{id}/diag/mask.bed"
  32. # BED file with early-replicating regions.
  33. early_bed = "/data/ref/hs1/replication_early_25_hs1.bed"
  34. # BED file with late-replicating regions.
  35. late_bed = "/data/ref/hs1/replication_late_75_hs1.bed"
  36. # BED file with CpG coordinates.
  37. cpg_bed = "/data/ref/hs1/hs1/hs1_CpG.bed"
  38. # Panels of interest: [ [name, bed_path], ... ]
  39. panels = [
  40. ["OncoT", "/data/ref/hs1/V1_V2_V3_V4_V5_intersect_targets_hs1_uniq.bed"],
  41. ["variable_chips","/data/ref/hs1/top_1500_sd_pos.bed"],
  42. ]
  43. #######################################
  44. # Sample naming / BAM handling
  45. #######################################
  46. # Tumor sample label (used in paths & filenames).
  47. tumoral_name = "diag"
  48. # Normal sample label.
  49. normal_name = "norm"
  50. # BAM tag name used for haplotagged reads.
  51. haplotagged_bam_tag_name = "HP"
  52. # Minimum MAPQ for reads kept during BAM filtering.
  53. bam_min_mapq = 40
  54. # Threads for BAM-level operations (view/sort/index…).
  55. bam_n_threads = 150
  56. # Number of reads sampled for BAM composition estimation.
  57. bam_composition_sample_size = 20000
  58. #######################################
  59. # Coverage counting / somatic-scan
  60. #######################################
  61. # Name of directory (under each sample dir) where counts are stored.
  62. count_dir_name = "counts"
  63. # Bin size (bp) for count files.
  64. count_bin_size = 1000
  65. # Number of chunks used to split contigs for counting.
  66. count_n_chunks = 1000
  67. # Force recomputation of counting even if outputs exist.
  68. somatic_scan_force = false
  69. #######################################
  70. # Somatic pipeline global settings
  71. #######################################
  72. # Force recomputation of the entire somatic pipeline.
  73. somatic_pipe_force = true
  74. # Default thread count for heavy tools.
  75. somatic_pipe_threads = 150
  76. # Template for somatic pipeline statistics directory.
  77. # {result_dir}, {id}
  78. somatic_pipe_stats = "{result_dir}/{id}/diag/somatic_pipe_stats"
  79. #######################################
  80. # Filtering / QC thresholds
  81. #######################################
  82. # Minimum depth in constitutional sample to consider site evaluable.
  83. somatic_min_constit_depth = 5
  84. # Maximum allowed ALT count in constitutional sample for a somatic call.
  85. somatic_max_alt_constit = 1
  86. # Window size (bp) for sequence entropy around variants.
  87. entropy_seq_len = 10
  88. # Minimum Shannon entropy threshold.
  89. min_shannon_entropy = 1.0
  90. # Max depth considered "low quality".
  91. max_depth_low_quality = 20
  92. # Min depth considered "high quality".
  93. min_high_quality_depth = 14
  94. # Minimum number of callers required to keep a variant.
  95. min_n_callers = 1
  96. #######################################
  97. # DeepVariant configuration
  98. #######################################
  99. # DeepVariant output directory template.
  100. # {result_dir}, {id}, {time}
  101. deepvariant_output_dir = "{result_dir}/{id}/{time}/DeepVariant"
  102. # Threads for DeepVariant.
  103. deepvariant_threads = 150
  104. # DeepVariant version / image tag.
  105. deepvariant_bin_version = "1.9.0"
  106. # DeepVariant model type (e.g. ONT_R104).
  107. deepvariant_model_type = "ONT_R104"
  108. # Force DeepVariant recomputation.
  109. deepvariant_force = false
  110. #######################################
  111. # DeepSomatic configuration
  112. #######################################
  113. # DeepSomatic output directory template.
  114. # {result_dir}, {id}, {time}
  115. deepsomatic_output_dir = "{result_dir}/{id}/{time}/DeepSomatic"
  116. # Threads for DeepSomatic.
  117. deepsomatic_threads = 150
  118. # DeepSomatic version / image tag.
  119. deepsomatic_bin_version = "1.9.0"
  120. # DeepSomatic model type.
  121. deepsomatic_model_type = "ONT"
  122. # Force DeepSomatic recomputation.
  123. deepsomatic_force = false
  124. #######################################
  125. # ClairS configuration
  126. #######################################
  127. # Threads for ClairS.
  128. clairs_threads = 155
  129. # ClairS docker tag.
  130. clairs_docker_tag = "latest"
  131. # Force ClairS recomputation.
  132. clairs_force = false
  133. # Platform preset for ClairS.
  134. clairs_platform = "ont_r10_dorado_sup_5khz_ssrs"
  135. # ClairS output directory template.
  136. # {result_dir}, {id}
  137. clairs_output_dir = "{result_dir}/{id}/diag/ClairS"
  138. #######################################
  139. # Savana configuration
  140. #######################################
  141. # Savana binary (name or full path).
  142. savana_bin = "savana"
  143. # Threads for Savana.
  144. savana_threads = 150
  145. # Savana output directory template.
  146. # {result_dir}, {id}
  147. savana_output_dir = "{result_dir}/{id}/diag/savana"
  148. # Savana copy-number output file.
  149. # {output_dir}, {id}, {reference_name}, {haplotagged_bam_tag_name}
  150. savana_copy_number = "{output_dir}/{id}_diag_{reference_name}_{haplotagged_bam_tag_name}_segmented_absolute_copy_number.tsv"
  151. # Savana raw read counts file.
  152. savana_read_counts = "{output_dir}/{id}_diag_{reference_name}_{haplotagged_bam_tag_name}_raw_read_counts.tsv"
  153. # Savana passed VCF.
  154. savana_passed_vcf = "{output_dir}/{id}_diag_savana_PASSED.vcf.gz"
  155. # Force Savana recomputation.
  156. savana_force = false
  157. # Constitutional phased VCF template.
  158. # {result_dir}, {id}
  159. germline_phased_vcf = "{result_dir}/{id}/diag/{id}_variants_constit_phased.vcf.gz"
  160. #######################################
  161. # Severus configuration
  162. #######################################
  163. # Path to Severus script.
  164. severus_bin = "/data/tools/MySeverus/severus.py"
  165. # Force Severus recomputation.
  166. severus_force = false
  167. # Threads for Severus.
  168. severus_threads = 32
  169. # VNTRs BED for Severus.
  170. vntrs_bed = "/data/ref/hs1/vntrs_chm13.bed"
  171. # Path of the Severus panel of normals.
  172. severus_pon = "/data/ref/hs1/PoN_1000G_chm13.tsv.gz"
  173. # Paired Severus output directory.
  174. # {result_dir}, {id}
  175. severus_output_dir = "{result_dir}/{id}/diag/severus"
  176. # Solo Severus output directory.
  177. # {result_dir}, {id}, {time}
  178. severus_solo_output_dir = "{result_dir}/{id}/{time}/severus"
  179. #######################################
  180. # Longphase configuration
  181. #######################################
  182. # Path to longphase binary.
  183. longphase_bin = "/data/tools/longphase_linux-x64"
  184. # Threads for longphase.
  185. longphase_threads = 150
  186. # Threads for longphase modcall step.
  187. longphase_modcall_threads = 8
  188. # Longphase modcall VCF template.
  189. # {result_dir}, {id}, {time}
  190. longphase_modcall_vcf = "{result_dir}/{id}/{time}/5mC_5hmC/{id}_{time}_5mC_5hmC_modcall.vcf.gz"
  191. #######################################
  192. # Modkit configuration
  193. #######################################
  194. # Path to modkit binary.
  195. modkit_bin = "modkit"
  196. # Threads for `modkit summary`.
  197. modkit_summary_threads = 50
  198. # Modkit summary file template.
  199. # {result_dir}, {id}, {time}
  200. modkit_summary_file = "{result_dir}/{id}/{time}/{id}_{time}_5mC_5hmC_summary.txt"
  201. #######################################
  202. # Nanomonsv configuration
  203. #######################################
  204. # Path to nanomonsv binary.
  205. nanomonsv_bin = "/home/prom/.local/bin/nanomonsv"
  206. # Paired nanomonsv output directory template.
  207. # {result_dir}, {id}, {time}
  208. nanomonsv_output_dir = "{result_dir}/{id}/{time}/nanomonsv"
  209. # Force nanomonsv recomputation.
  210. nanomonsv_force = false
  211. # Threads for nanomonsv.
  212. nanomonsv_threads = 150
  213. # Paired nanomonsv PASSED VCF template.
  214. # {output_dir}, {id}
  215. nanomonsv_passed_vcf = "{output_dir}/{id}_diag_nanomonsv_PASSED.vcf.gz"
  216. # Solo nanomonsv output directory template.
  217. # {result_dir}, {id}, {time}
  218. nanomonsv_solo_output_dir = "{result_dir}/{id}/{time}/nanomonsv-solo"
  219. # Solo nanomonsv PASSED VCF template.
  220. # {output_dir}, {id}, {time}
  221. nanomonsv_solo_passed_vcf = "{output_dir}/{id}_{time}_nanomonsv-solo_PASSED.vcf.gz"
  222. #######################################
  223. # PromethION metadata
  224. #######################################
  225. # Directory containing PromethION run metadata.
  226. promethion_runs_metadata_dir = "/data/promethion-runs-metadata"
  227. # JSON file mapping flowcell IDs / runs for Pandora.
  228. promethion_runs_input = "/data/pandora-flowcell-id.json"
  229. #######################################
  230. # Alignment / basecalling (Dorado)
  231. #######################################
  232. [align]
  233. # Path to Dorado binary.
  234. dorado_bin = "/mnt/beegfs02/scratch/t_steimle/tools/dorado-latest-linux-x64/bin/dorado"
  235. # Dorado basecalling arguments (device, model, modifications…).
  236. dorado_basecall_arg = "-x 'cuda:all' sup,5mC_5hmC"
  237. # Should dorado re-align after demux ?
  238. dorado_should_realign = false
  239. # Dorado aligner threads number
  240. dorado_aligner_threads = 20
  241. # Reference FASTA used for alignment.
  242. ref_fa = "/mnt/beegfs02/scratch/t_steimle/ref/hs1/chm13v2.0.fa"
  243. # Minimap2 index used for alignment.
  244. ref_mmi = "/mnt/beegfs02/scratch/t_steimle/ref/chm13v2.0.mmi"
  245. # Samtools bin
  246. samtools_bin = "/mnt/beegfs02/scratch/t_steimle/tools/samtools"
  247. # Threads for `samtools view`.
  248. samtools_view_threads = 20
  249. # Threads for `samtools sort`.
  250. samtools_sort_threads = 48
  251. # Threads for `samtools merge`.
  252. samtools_merge_threads = 48
  253. # Threads for `samtools split`.
  254. samtools_split_threads = 48