config.rs 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009
  1. use log::{info, warn};
  2. use serde::{Deserialize, Serialize};
  3. use std::fs;
  4. use std::path::{Path, PathBuf};
  5. const CONFIG_TEMPLATE: &str = include_str!("../pandora-config.example.toml");
  6. #[derive(Debug, Clone, Serialize, Deserialize)]
  7. /// Global configuration for the Pandora somatic pipeline.
  8. ///
  9. /// Loaded from `~/.local/share/pandora/pandora-config.toml` (see [`Config::config_path`]).
  10. /// Most fields are path templates that can contain placeholders such as:
  11. /// `{result_dir}`, `{id}`, `{time}`, `{reference_name}`, `{haplotagged_bam_tag_name}`, `{output_dir}`.
  12. pub struct Config {
  13. // === General filesystem layout / I/O ===
  14. /// Root directory where all results will be written.
  15. pub result_dir: String,
  16. /// Temporary directory.
  17. pub tmp_dir: String,
  18. /// Run cache directory.
  19. pub run_cache_dir: String,
  20. /// Runner can slurm
  21. pub slurm_runner: bool,
  22. /// Software threads
  23. pub threads: u8,
  24. /// Singularity/Apptainer bin
  25. pub singularity_bin: String,
  26. /// Path to the `conda.sh` activation script (used to activate envs before running tools).
  27. pub conda_sh: String,
  28. // === Alignment / BAM handling ===
  29. /// Configuration for Dorado + samtools alignment pipeline.
  30. pub align: AlignConfig,
  31. /// Minimum MAPQ for reads to be kept during BAM filtering.
  32. pub bam_min_mapq: u8,
  33. /// Number of threads for hts BAM reader
  34. pub bam_n_threads: u8,
  35. /// Number of reads sampled when estimating BAM composition (e.g. tumor contamination).
  36. pub bam_composition_sample_size: u32,
  37. // === Reference genome and annotations ===
  38. /// Path to the reference FASTA used throughout the pipeline.
  39. pub reference: String,
  40. /// Short name for the reference (e.g. "hs1"), used in filenames.
  41. pub reference_name: String,
  42. /// Pseudoautosomal regions (PARs) BED file.
  43. pub pseudoautosomal_regions_bed: String,
  44. /// Path to the sequence dictionary (`.dict`) for the reference.
  45. pub dict_file: String,
  46. /// Path to the RefSeq GFF3 annotation, sorted and indexed.
  47. pub refseq_gff: String,
  48. /// dbSNP vcf.gz file (should be indexed)
  49. pub db_snp: String,
  50. /// BED with genes on the 4th column
  51. pub genes_bed: String,
  52. /// Cytobands BED file
  53. pub cytobands_bed: String,
  54. /// Chromosome alias file
  55. pub chromosomes_alias: String,
  56. /// BED template used to mask low-quality or filtered regions.
  57. ///
  58. /// Placeholders:
  59. /// - `{result_dir}`: global result directory
  60. /// - `{id}`: case identifier
  61. pub mask_bed: String,
  62. /// Panels of interest (name, BED path).
  63. pub panels: Vec<(String, String)>,
  64. /// Repeats bed file
  65. pub repeats_bed: String,
  66. // === Sample naming conventions ===
  67. /// Label used for the tumor sample in directory and file names (e.g. "diag").
  68. pub tumoral_name: String,
  69. /// Label used for the normal sample (e.g. "mrd").
  70. pub normal_name: String,
  71. /// BAM tag name used for haplotagged reads (e.g. "HP").
  72. pub haplotagged_bam_tag_name: String,
  73. // === Coverage counting (somatic-scan) ===
  74. /// Name of the subdirectory (under each sample dir) where count files are stored.
  75. pub count_dir_name: String,
  76. /// Bin size (bp) for count files.
  77. pub count_bin_size: u32,
  78. /// Number of chunks used to split chromosomes for counting.
  79. pub count_n_chunks: u32,
  80. /// Whether to force recomputation of coverage / counting even if outputs already exist.
  81. pub somatic_scan_force: bool,
  82. // === Somatic pipeline global options ===
  83. /// Whether to force recomputation of the whole somatic pipeline.
  84. pub somatic_pipe_force: bool,
  85. /// Default number of threads for most heavy tools (DeepVariant, Savana, etc.).
  86. pub somatic_pipe_threads: u8,
  87. /// Path template to the per-case somatic pipeline statistics directory.
  88. ///
  89. /// Placeholders: `{result_dir}`, `{id}`.
  90. pub somatic_pipe_stats: String,
  91. // === Basic somatic filtering / QC thresholds ===
  92. /// Minimum depth in the constitutional sample to consider a site evaluable.
  93. pub somatic_min_constit_depth: u16,
  94. /// Maximum allowed ALT count in the constitutional sample for a somatic call.
  95. pub somatic_max_alt_constit: u16,
  96. /// Window size (bp) used when computing sequence entropy around variants.
  97. pub entropy_seq_len: usize,
  98. /// Minimum Shannon entropy threshold for keeping a variant.
  99. pub min_shannon_entropy: f64,
  100. /// Maximum depth considered "low quality" for certain filters.
  101. pub max_depth_low_quality: u32,
  102. /// Minimum depth considered "high quality" for certain filters.
  103. pub min_high_quality_depth: u32,
  104. /// Minimum number of callers supporting a variant for it to be kept.
  105. pub min_n_callers: u8,
  106. // === DeepVariant configuration ===
  107. /// Template for the DeepVariant output directory (solo and normal/tumor runs).
  108. ///
  109. /// Placeholders: `{result_dir}`, `{id}`, `{time}`.
  110. pub deepvariant_output_dir: String,
  111. /// Number of threads to use for DeepVariant.
  112. pub deepvariant_threads: u8,
  113. /// DeepVariant singularity image path
  114. pub deepvariant_image: String,
  115. /// DeepVariant model type (e.g. "ONT_R104").
  116. pub deepvariant_model_type: String,
  117. /// Force DeepVariant recomputation even if outputs already exist.
  118. pub deepvariant_force: bool,
  119. // === DeepSomatic configuration ===
  120. /// Template for the DeepSomatic output directory.
  121. ///
  122. /// Placeholders: `{result_dir}`, `{id}`, `{time}`.
  123. pub deepsomatic_output_dir: String,
  124. /// Number of threads for DeepSomatic.
  125. pub deepsomatic_threads: u8,
  126. /// DeepSomatic singularity image path
  127. pub deepsomatic_image: String,
  128. /// DeepSomatic model type (e.g. "ONT").
  129. pub deepsomatic_model_type: String,
  130. /// Force DeepSomatic recomputation.
  131. pub deepsomatic_force: bool,
  132. // === ClairS configuration ===
  133. /// Number of threads for ClairS.
  134. pub clairs_threads: u8,
  135. /// Path to ClairS singularity image.
  136. pub clairs_image: String,
  137. /// Force ClairS recomputation.
  138. pub clairs_force: bool,
  139. /// Platform preset for ClairS (e.g. "ont_r10_dorado_sup_5khz_ssrs").
  140. pub clairs_platform: String,
  141. /// Template for ClairS output directory (`{result_dir}`, `{id}`).
  142. pub clairs_output_dir: String,
  143. // === Savana configuration ===
  144. /// Savana binary name or full path.
  145. pub savana_bin: String,
  146. /// Number of threads for Savana.
  147. pub savana_threads: u8,
  148. /// Template for Savana output directory (`{result_dir}`, `{id}`).
  149. pub savana_output_dir: String,
  150. /// Template for Savana copy number file.
  151. ///
  152. /// Placeholders: `{output_dir}`, `{id}`, `{reference_name}`, `{haplotagged_bam_tag_name}`.
  153. pub savana_copy_number: String,
  154. /// Template for Savana raw read counts file.
  155. ///
  156. /// Same placeholders as [`Config::savana_copy_number`].
  157. pub savana_read_counts: String,
  158. /// Template for Savana passed VCF output (`{output_dir}`, `{id}`).
  159. pub savana_passed_vcf: String,
  160. /// Force Savana recomputation.
  161. pub savana_force: bool,
  162. /// Template for constitutional phased VCF (`{result_dir}`, `{id}`).
  163. pub germline_phased_vcf: String,
  164. // === Severus configuration ===
  165. /// Path to Severus main script (`severus.py`).
  166. pub severus_bin: String,
  167. /// Force Severus recomputation.
  168. pub severus_force: bool,
  169. /// Number of threads for Severus.
  170. pub severus_threads: u8,
  171. /// VNTRs BED file for Severus.
  172. pub vntrs_bed: String,
  173. /// Path to Severus PoN file (TSV or VCF).
  174. pub severus_pon: String,
  175. /// Template for Severus tumor/normal (paired) output directory.
  176. ///
  177. /// Placeholders: `{result_dir}`, `{id}`.
  178. pub severus_output_dir: String,
  179. /// Template for Severus solo output directory.
  180. ///
  181. /// Placeholders: `{result_dir}`, `{id}`, `{time}`.
  182. pub severus_solo_output_dir: String,
  183. // === MARLIN ===
  184. pub marlin_bed: String,
  185. // === Echtvar ===
  186. pub echtvar_bin: String,
  187. pub echtvar_sources: Vec<String>,
  188. // === Bcftools configuration ===
  189. /// Path to Bcftools binary.
  190. pub bcftools_bin: String,
  191. /// Number of threads for Bcftools.
  192. pub bcftools_threads: u8,
  193. // === Longphase configuration ===
  194. /// Path to longphase binary.
  195. pub longphase_bin: String,
  196. /// Number of threads for longphase.
  197. pub longphase_threads: u8,
  198. /// Number of threads for longphase modcall step.
  199. pub longphase_modcall_threads: u8,
  200. /// Force longphase recomputation (haplotagging/phasing).
  201. pub longphase_force: bool,
  202. /// Template for longphase modcall VCF.
  203. ///
  204. /// Placeholders: `{result_dir}`, `{id}`, `{time}`.
  205. pub longphase_modcall_vcf: String,
  206. // === Modkit configuration ===
  207. /// Path to modkit binary.
  208. pub modkit_bin: String,
  209. /// Number of threads for `modkit summary`.
  210. pub modkit_summary_threads: u8,
  211. /// Template for modkit summary output file.
  212. ///
  213. /// Placeholders: `{result_dir}`, `{id}`, `{time}`.
  214. pub modkit_summary_file: String,
  215. // === Nanomonsv configuration ===
  216. /// Path to nanomonsv binary.
  217. pub nanomonsv_bin: String,
  218. /// Template for paired nanomonsv output directory (`{result_dir}`, `{id}`, `{time}`).
  219. pub nanomonsv_output_dir: String,
  220. /// Force nanomonsv recomputation.
  221. pub nanomonsv_force: bool,
  222. /// Number of threads for nanomonsv.
  223. pub nanomonsv_threads: u8,
  224. /// Template for paired nanomonsv passed VCF (`{output_dir}`, `{id}`).
  225. pub nanomonsv_passed_vcf: String,
  226. /// Template for solo nanomonsv output directory.
  227. ///
  228. /// Placeholders: `{result_dir}`, `{id}`, `{time}`.
  229. pub nanomonsv_solo_output_dir: String,
  230. /// Template for solo nanomonsv passed VCF (`{output_dir}`, `{id}`, `{time}`).
  231. pub nanomonsv_solo_passed_vcf: String,
  232. pub nanomonsv_simple_repeat_bed: String,
  233. // === Straglr configuration ===
  234. /// Path to Straglr executable.
  235. pub straglr_bin: String,
  236. /// Path to STR loci BED file for Straglr.
  237. pub straglr_loci_bed: String,
  238. /// Size of reported difference between normal and tumoral
  239. pub straglr_min_size_diff: u32,
  240. /// Minimum CN of reported difference between normal and tumoral
  241. pub straglr_min_support_diff: u32,
  242. /// Minimum read support for STR genotyping.
  243. pub straglr_min_support: u32,
  244. /// Minimum cluster size for STR detection.
  245. pub straglr_min_cluster_size: u32,
  246. /// Whether to genotype in size mode.
  247. pub straglr_genotype_in_size: bool,
  248. /// Template for paired Straglr output directory.
  249. ///
  250. /// Placeholders: `{result_dir}`, `{id}`.
  251. pub straglr_output_dir: String,
  252. /// Template for solo Straglr output directory.
  253. ///
  254. /// Placeholders: `{result_dir}`, `{id}`, `{time}`.
  255. pub straglr_solo_output_dir: String,
  256. /// Force Straglr recomputation.
  257. pub straglr_force: bool,
  258. // === PromethION runs / metadata ===
  259. /// Directory containing metadata about PromethION runs.
  260. pub promethion_runs_metadata_dir: String,
  261. /// JSON file describing PromethION runs and flowcell IDs.
  262. pub promethion_runs_input: String,
  263. // === VEP ===
  264. /// Path to VEP singularity image
  265. pub vep_image: String,
  266. /// Path to the VEP cache directory
  267. pub vep_cache_dir: String,
  268. /// Path to VEP sorted GFF
  269. pub vep_gff: String,
  270. }
  271. #[derive(Debug, Clone, Serialize, Deserialize)]
  272. /// Configuration for basecalling and alignment using Dorado and samtools.
  273. pub struct AlignConfig {
  274. /// Path to Dorado binary.
  275. pub dorado_bin: String,
  276. /// Arguments passed to `dorado basecaller` (e.g. devices and model name).
  277. pub dorado_basecall_arg: String,
  278. /// Should dorado re-align after demux ?
  279. pub dorado_should_realign: bool,
  280. /// Dorado aligner threads number
  281. pub dorado_aligner_threads: u8,
  282. /// Reference FASTA used for alignment.
  283. pub ref_fa: String,
  284. /// Minimap2 index (`.mmi`) used by Dorado or downstream tools.
  285. pub ref_mmi: String,
  286. /// Path to Samtools binary.
  287. pub samtools_bin: String,
  288. /// Number of threads given to `samtools view`.
  289. pub samtools_view_threads: u8,
  290. /// Number of threads given to `samtools sort`.
  291. pub samtools_sort_threads: u8,
  292. /// Number of threads given to `samtools merge`.
  293. pub samtools_merge_threads: u8,
  294. /// Number of threads given to `samtools split`.
  295. pub samtools_split_threads: u8,
  296. }
  297. // Here comes names that can't be changed from output of tools
  298. lazy_static! {
  299. /// Template name for DeepVariant VCF outputs.
  300. static ref DEEPVARIANT_OUTPUT_NAME: &'static str = "{id}_{time}_DeepVariant.vcf.gz";
  301. /// ClairS main SNP/indel VCF name.
  302. static ref CLAIRS_OUTPUT_NAME: &'static str = "output.vcf.gz";
  303. /// ClairS indel-only VCF name.
  304. static ref CLAIRS_OUTPUT_INDELS_NAME: &'static str = "indel.vcf.gz";
  305. /// ClairS germline normal VCF name.
  306. static ref CLAIRS_GERMLINE_NORMAL: &'static str = "clair3_normal_germline_output.vcf.gz";
  307. /// ClairS germline tumor VCF name.
  308. static ref CLAIRS_GERMLINE_TUMOR: &'static str = "clair3_tumor_germline_output.vcf.gz";
  309. }
  310. // impl Default for AlignConfig {
  311. // fn default() -> Self {
  312. // Self {
  313. // dorado_bin: "/data/tools/dorado-1.1.1-linux-x64/bin/dorado".to_string(),
  314. // dorado_basecall_arg: "-x 'cuda:0,1,2,3' sup,5mC_5hmC".to_string(),
  315. // ref_fa: "/data/ref/hs1/chm13v2.0.fa".to_string(),
  316. // ref_mmi: "/data/ref/chm13v2.0.mmi".to_string(),
  317. // samtools_view_threads: 20,
  318. // samtools_sort_threads: 50,
  319. // }
  320. // }
  321. // }
  322. //
  323. impl Config {
  324. /// Returns the config file path, e.g.:
  325. /// `~/.local/share/pandora/pandora-config.toml`.
  326. pub fn config_path() -> PathBuf {
  327. let mut path = directories::ProjectDirs::from("", "", "pandora")
  328. .expect("Could not determine project directory")
  329. .config_dir()
  330. .to_path_buf();
  331. path.push("pandora-config.toml");
  332. path
  333. }
  334. /// Install the commented template config on disk **if it does not exist yet**.
  335. ///
  336. /// This writes `CONFIG_TEMPLATE` verbatim so comments are preserved.
  337. fn write_template_if_missing() -> Result<(), Box<dyn std::error::Error>> {
  338. let path = Self::config_path();
  339. if path.exists() {
  340. // Do not touch an existing user config.
  341. return Ok(());
  342. }
  343. if let Some(parent) = path.parent() {
  344. fs::create_dir_all(parent)?;
  345. }
  346. fs::write(&path, CONFIG_TEMPLATE)?;
  347. info!("Config template written to: {}", path.display());
  348. Ok(())
  349. }
  350. /// “Save” configuration.
  351. ///
  352. /// In this model, we do **not** overwrite the user config (to preserve comments).
  353. /// `save()` only ensures the template exists on disk on first run.
  354. pub fn save(&self) -> Result<(), Box<dyn std::error::Error>> {
  355. Self::write_template_if_missing()
  356. }
  357. pub fn from_path(path: impl AsRef<Path>) -> Self {
  358. let path = path.as_ref().to_path_buf();
  359. // First, ensure there is at least a file on disk (template on first run).
  360. if let Err(e) = Self::write_template_if_missing() {
  361. warn!(
  362. "Warning: failed to ensure config template at {}: {}",
  363. path.display(),
  364. e
  365. );
  366. }
  367. // Try to load and parse the user config file.
  368. match fs::read_to_string(&path) {
  369. Ok(content) => match toml::from_str::<Config>(&content) {
  370. Ok(cfg) => cfg,
  371. Err(e) => {
  372. warn!(
  373. "Warning: failed to parse user config {}: {}. Falling back to embedded template.",
  374. path.display(),
  375. e
  376. );
  377. // Fallback: parse the embedded template.
  378. toml::from_str::<Config>(CONFIG_TEMPLATE)
  379. .expect("embedded config template is invalid")
  380. }
  381. },
  382. Err(e) => {
  383. warn!(
  384. "Warning: failed to read user config {}: {}. Falling back to embedded template.",
  385. path.display(),
  386. e
  387. );
  388. toml::from_str::<Config>(CONFIG_TEMPLATE)
  389. .expect("embedded config template is invalid")
  390. }
  391. }
  392. }
  393. /// Returns `<result_dir>/<id>/<tumoral_name>`.
  394. #[inline]
  395. pub fn tumoral_dir(&self, id: &str) -> String {
  396. format!("{}/{}/{}", self.result_dir, id, self.tumoral_name)
  397. }
  398. /// Returns `<result_dir>/<id>/<normal_name>`.
  399. #[inline]
  400. pub fn normal_dir(&self, id: &str) -> String {
  401. format!("{}/{}/{}", self.result_dir, id, self.normal_name)
  402. }
  403. /// Returns the directory for a "solo" run (timepoint or tag), i.e. `<result_dir>/<id>/<time>`.
  404. #[inline]
  405. pub fn solo_dir(&self, id: &str, time: &str) -> String {
  406. format!("{}/{}/{}", self.result_dir, id, time)
  407. }
  408. /// BAM for a solo run: `<solo_dir>/<id>_<time>_<reference_name>.bam`.
  409. pub fn solo_bam(&self, id: &str, time: &str) -> String {
  410. format!(
  411. "{}/{}_{}_{}.bam",
  412. self.solo_dir(id, time),
  413. id,
  414. time,
  415. self.reference_name,
  416. )
  417. }
  418. /// JSON sidecar for the solo BAM.
  419. pub fn solo_bam_info_json(&self, id: &str, time: &str) -> String {
  420. format!(
  421. "{}/{}_{}_{}_info.json",
  422. self.solo_dir(id, time),
  423. id,
  424. time,
  425. self.reference_name,
  426. )
  427. }
  428. /// Tumor BAM path: `<tumoral_dir>/<id>_<tumoral_name>_<reference_name>.bam`.
  429. pub fn tumoral_bam(&self, id: &str) -> String {
  430. format!(
  431. "{}/{}_{}_{}.bam",
  432. self.tumoral_dir(id),
  433. id,
  434. self.tumoral_name,
  435. self.reference_name,
  436. )
  437. }
  438. /// Normal BAM path: `<normal_dir>/<id>_<normal_name>_<reference_name>.bam`.
  439. pub fn normal_bam(&self, id: &str) -> String {
  440. format!(
  441. "{}/{}_{}_{}.bam",
  442. self.normal_dir(id),
  443. id,
  444. self.normal_name,
  445. self.reference_name,
  446. )
  447. }
  448. /// Tumor haplotagged BAM.
  449. pub fn solo_haplotagged_bam(&self, id: &str, time: &str) -> String {
  450. format!(
  451. "{}/{}_{}_{}_{}.bam",
  452. self.solo_dir(id, time),
  453. id,
  454. time,
  455. self.reference_name,
  456. self.haplotagged_bam_tag_name
  457. )
  458. }
  459. /// Tumor haplotagged BAM.
  460. pub fn tumoral_haplotagged_bam(&self, id: &str) -> String {
  461. format!(
  462. "{}/{}_{}_{}_{}.bam",
  463. self.tumoral_dir(id),
  464. id,
  465. self.tumoral_name,
  466. self.reference_name,
  467. self.haplotagged_bam_tag_name
  468. )
  469. }
  470. /// Normal haplotagged BAM.
  471. pub fn normal_haplotagged_bam(&self, id: &str) -> String {
  472. format!(
  473. "{}/{}_{}_{}_{}.bam",
  474. self.normal_dir(id),
  475. id,
  476. self.normal_name,
  477. self.reference_name,
  478. self.haplotagged_bam_tag_name
  479. )
  480. }
  481. /// Normal count directory: `<normal_dir>/counts`.
  482. pub fn normal_dir_count(&self, id: &str) -> String {
  483. format!("{}/{}", self.normal_dir(id), self.count_dir_name)
  484. }
  485. /// Tumor count directory: `<tumoral_dir>/counts`.
  486. pub fn tumoral_dir_count(&self, id: &str) -> String {
  487. format!("{}/{}", self.tumoral_dir(id), self.count_dir_name)
  488. }
  489. /// Mask BED path with `{result_dir}` and `{id}` expanded.
  490. pub fn mask_bed(&self, id: &str) -> String {
  491. self.mask_bed
  492. .replace("{result_dir}", &self.result_dir)
  493. .replace("{id}", id)
  494. }
  495. /// Germline phased VCF with `{result_dir}` and `{id}` expanded.
  496. pub fn germline_phased_vcf(&self, id: &str) -> String {
  497. self.germline_phased_vcf
  498. .replace("{result_dir}", &self.result_dir)
  499. .replace("{id}", id)
  500. }
  501. /// Somatic pipeline stats directory with `{result_dir}` and `{id}` expanded.
  502. pub fn somatic_pipe_stats(&self, id: &str) -> String {
  503. self.somatic_pipe_stats
  504. .replace("{result_dir}", &self.result_dir)
  505. .replace("{id}", id)
  506. }
  507. /// DeepVariant output directory for a given run (`{result_dir}`, `{id}`, `{time}`).
  508. pub fn deepvariant_output_dir(&self, id: &str, time: &str) -> String {
  509. self.deepvariant_output_dir
  510. .replace("{result_dir}", &self.result_dir)
  511. .replace("{id}", id)
  512. .replace("{time}", time)
  513. }
  514. /// DeepVariant solo VCF (raw) for `<id>, <time>`.
  515. pub fn deepvariant_solo_output_vcf(&self, id: &str, time: &str) -> String {
  516. format!(
  517. "{}/{}",
  518. self.deepvariant_output_dir(id, time),
  519. *DEEPVARIANT_OUTPUT_NAME
  520. )
  521. .replace("{id}", id)
  522. .replace("{time}", time)
  523. }
  524. /// DeepVariant output directory for the normal sample.
  525. pub fn deepvariant_normal_output_dir(&self, id: &str) -> String {
  526. self.deepvariant_output_dir(id, &self.normal_name)
  527. }
  528. /// DeepVariant "tumoral output dir" (as in your original code – note: this actually returns the *PASSED VCF* path).
  529. pub fn deepvariant_tumoral_output_dir(&self, id: &str) -> String {
  530. self.deepvariant_solo_passed_vcf(id, &self.tumoral_name)
  531. }
  532. /// DeepVariant solo *PASSED* VCF for `<id>, <time>`.
  533. pub fn deepvariant_solo_passed_vcf(&self, id: &str, time: &str) -> String {
  534. format!(
  535. "{}/{}_{}_DeepVariant_PASSED.vcf.gz",
  536. self.deepvariant_output_dir(id, time),
  537. id,
  538. time
  539. )
  540. }
  541. /// DeepVariant *PASSED* VCF for the normal sample.
  542. pub fn deepvariant_normal_passed_vcf(&self, id: &str) -> String {
  543. self.deepvariant_solo_passed_vcf(id, &self.normal_name)
  544. }
  545. /// DeepVariant *PASSED* VCF for the tumor sample.
  546. pub fn deepvariant_tumoral_passed_vcf(&self, id: &str) -> String {
  547. self.deepvariant_solo_passed_vcf(id, &self.tumoral_name)
  548. }
  549. /// DeepSomatic output directory (uses `{time} = tumoral_name`).
  550. pub fn deepsomatic_output_dir(&self, id: &str) -> String {
  551. self.deepsomatic_output_dir
  552. .replace("{result_dir}", &self.result_dir)
  553. .replace("{id}", id)
  554. .replace("{time}", &self.tumoral_name)
  555. }
  556. /// DeepSomatic raw VCF.
  557. pub fn deepsomatic_output_vcf(&self, id: &str) -> String {
  558. format!(
  559. "{}/{}_{}_DeepSomatic.vcf.gz",
  560. self.deepsomatic_output_dir(id),
  561. id,
  562. self.tumoral_name
  563. )
  564. }
  565. /// DeepSomatic *PASSED* VCF.
  566. pub fn deepsomatic_passed_vcf(&self, id: &str) -> String {
  567. format!(
  568. "{}/{}_{}_DeepSomatic_PASSED.vcf.gz",
  569. self.deepsomatic_output_dir(id),
  570. id,
  571. self.tumoral_name
  572. )
  573. }
  574. /// ClairS output directory (`{result_dir}`, `{id}`).
  575. pub fn clairs_output_dir(&self, id: &str) -> String {
  576. self.clairs_output_dir
  577. .replace("{result_dir}", &self.result_dir)
  578. .replace("{id}", id)
  579. }
  580. /// ClairS main SNP/indel VCFs (standard + indel-only).
  581. pub fn clairs_output_vcfs(&self, id: &str) -> (String, String) {
  582. let dir = self.clairs_output_dir(id);
  583. (
  584. format!("{dir}/{}", *CLAIRS_OUTPUT_NAME),
  585. format!("{dir}/{}", *CLAIRS_OUTPUT_INDELS_NAME),
  586. )
  587. }
  588. /// ClairS somatic *PASSED* VCF.
  589. pub fn clairs_passed_vcf(&self, id: &str) -> String {
  590. format!(
  591. "{}/{}_{}_clairs_PASSED.vcf.gz",
  592. self.clairs_output_dir(id),
  593. id,
  594. self.tumoral_name
  595. )
  596. }
  597. /// ClairS germline normal VCF.
  598. pub fn clairs_germline_normal_vcf(&self, id: &str) -> String {
  599. let dir = self.clairs_output_dir(id);
  600. format!("{dir}/{}", *CLAIRS_GERMLINE_NORMAL)
  601. }
  602. /// ClairS germline tumor VCF.
  603. pub fn clairs_germline_tumor_vcf(&self, id: &str) -> String {
  604. let dir = self.clairs_output_dir(id);
  605. format!("{dir}/{}", *CLAIRS_GERMLINE_TUMOR)
  606. }
  607. /// Consolidated germline *PASSED* VCF from ClairS.
  608. pub fn clairs_germline_passed_vcf(&self, id: &str) -> String {
  609. let dir = self.clairs_output_dir(id);
  610. format!("{dir}/{id}_diag_clair3-germline_PASSED.vcf.gz")
  611. }
  612. /// Paired nanomonsv output directory.
  613. pub fn nanomonsv_output_dir(&self, id: &str, time: &str) -> String {
  614. self.nanomonsv_output_dir
  615. .replace("{result_dir}", &self.result_dir)
  616. .replace("{id}", id)
  617. .replace("{time}", time)
  618. }
  619. /// Paired nanomonsv *PASSED* VCF.
  620. pub fn nanomonsv_passed_vcf(&self, id: &str) -> String {
  621. self.nanomonsv_passed_vcf
  622. .replace("{output_dir}", &self.nanomonsv_output_dir(id, "diag"))
  623. .replace("{id}", id)
  624. }
  625. /// Solo nanomonsv output directory.
  626. pub fn nanomonsv_solo_output_dir(&self, id: &str, time: &str) -> String {
  627. self.nanomonsv_solo_output_dir
  628. .replace("{result_dir}", &self.result_dir)
  629. .replace("{id}", id)
  630. .replace("{time}", time)
  631. }
  632. /// Solo nanomonsv *PASSED* VCF.
  633. pub fn nanomonsv_solo_passed_vcf(&self, id: &str, time: &str) -> String {
  634. self.nanomonsv_solo_passed_vcf
  635. .replace("{output_dir}", &self.nanomonsv_solo_output_dir(id, time))
  636. .replace("{id}", id)
  637. .replace("{time}", time)
  638. }
  639. /// Savana output directory (`{result_dir}`, `{id}`).
  640. pub fn savana_output_dir(&self, id: &str) -> String {
  641. self.savana_output_dir
  642. .replace("{result_dir}", &self.result_dir)
  643. .replace("{id}", id)
  644. }
  645. /// Savana main somatic VCF (classified).
  646. pub fn savana_output_vcf(&self, id: &str) -> String {
  647. let output_dir = self.savana_output_dir(id);
  648. format!(
  649. "{output_dir}/{id}_{}_{}_{}.classified.somatic.vcf",
  650. self.tumoral_name, self.reference_name, self.haplotagged_bam_tag_name
  651. )
  652. }
  653. /// Savana *PASSED* VCF.
  654. pub fn savana_passed_vcf(&self, id: &str) -> String {
  655. self.savana_passed_vcf
  656. .replace("{output_dir}", &self.savana_output_dir(id))
  657. .replace("{id}", id)
  658. }
  659. /// Savana read counts file.
  660. pub fn savana_read_counts(&self, id: &str) -> String {
  661. self.savana_read_counts
  662. .replace("{output_dir}", &self.savana_output_dir(id))
  663. .replace("{id}", id)
  664. .replace("{reference_name}", &self.reference_name)
  665. .replace("{haplotagged_bam_tag_name}", &self.haplotagged_bam_tag_name)
  666. }
  667. /// Savana copy-number file.
  668. pub fn savana_copy_number(&self, id: &str) -> String {
  669. self.savana_copy_number
  670. .replace("{output_dir}", &self.savana_output_dir(id))
  671. .replace("{id}", id)
  672. .replace("{reference_name}", &self.reference_name)
  673. .replace("{haplotagged_bam_tag_name}", &self.haplotagged_bam_tag_name)
  674. }
  675. /// Severus paired output directory.
  676. pub fn severus_output_dir(&self, id: &str) -> String {
  677. self.severus_output_dir
  678. .replace("{result_dir}", &self.result_dir)
  679. .replace("{id}", id)
  680. }
  681. /// Severus somatic SV VCF (paired).
  682. pub fn severus_output_vcf(&self, id: &str) -> String {
  683. let output_dir = self.severus_output_dir(id);
  684. format!("{output_dir}/somatic_SVs/severus_somatic.vcf")
  685. }
  686. /// Severus *PASSED* VCF (paired).
  687. pub fn severus_passed_vcf(&self, id: &str) -> String {
  688. format!(
  689. "{}/{}_diag_severus_PASSED.vcf.gz",
  690. &self.severus_output_dir(id),
  691. id
  692. )
  693. }
  694. /// Severus solo output directory.
  695. pub fn severus_solo_output_dir(&self, id: &str, time: &str) -> String {
  696. self.severus_solo_output_dir
  697. .replace("{result_dir}", &self.result_dir)
  698. .replace("{id}", id)
  699. .replace("{time}", time)
  700. }
  701. /// Severus solo SV VCF.
  702. pub fn severus_solo_output_vcf(&self, id: &str, time: &str) -> String {
  703. let output_dir = self.severus_solo_output_dir(id, time);
  704. format!("{output_dir}/all_SVs/severus_all.vcf")
  705. }
  706. /// Severus solo *PASSED* VCF.
  707. pub fn severus_solo_passed_vcf(&self, id: &str, time: &str) -> String {
  708. format!(
  709. "{}/{}_{}_severus-solo_PASSED.vcf.gz",
  710. &self.severus_solo_output_dir(id, time),
  711. id,
  712. time
  713. )
  714. }
  715. /// Straglr paired output directory.
  716. pub fn straglr_output_dir(&self, id: &str) -> String {
  717. self.straglr_output_dir
  718. .replace("{result_dir}", &self.result_dir)
  719. .replace("{id}", id)
  720. }
  721. /// Straglr normal sample TSV output.
  722. pub fn straglr_normal_tsv(&self, id: &str) -> String {
  723. format!(
  724. "{}/{}_{}_straglr.tsv",
  725. self.straglr_solo_output_dir(id, &self.normal_name),
  726. id,
  727. self.normal_name
  728. )
  729. }
  730. /// Straglr tumor sample TSV output.
  731. pub fn straglr_tumor_tsv(&self, id: &str) -> String {
  732. format!(
  733. "{}/{}_{}_straglr.tsv",
  734. self.straglr_output_dir(id),
  735. id,
  736. self.tumoral_name
  737. )
  738. }
  739. /// Straglr tumor sample TSV output.
  740. pub fn straglr_tumor_normal_diff_tsv(&self, id: &str) -> String {
  741. format!(
  742. "{}/{}_{}_straglr_diff.tsv",
  743. self.straglr_output_dir(id),
  744. id,
  745. self.tumoral_name
  746. )
  747. }
  748. /// Straglr solo output directory.
  749. pub fn straglr_solo_output_dir(&self, id: &str, time: &str) -> String {
  750. self.straglr_solo_output_dir
  751. .replace("{result_dir}", &self.result_dir)
  752. .replace("{id}", id)
  753. .replace("{time}", time)
  754. }
  755. /// Straglr solo TSV output.
  756. pub fn straglr_solo_tsv(&self, id: &str, time: &str) -> String {
  757. format!(
  758. "{}/{}_{}_straglr.tsv",
  759. self.straglr_solo_output_dir(id, time),
  760. id,
  761. time
  762. )
  763. }
  764. /// Alias for the constitutional germline VCF.
  765. pub fn constit_vcf(&self, id: &str) -> String {
  766. self.clairs_germline_passed_vcf(id)
  767. }
  768. /// Somatic-scan output directory for a solo run (counts subdir).
  769. pub fn somatic_scan_solo_output_dir(&self, id: &str, time: &str) -> String {
  770. format!("{}/counts", self.solo_dir(id, time))
  771. }
  772. /// Somatic-scan output dir for the normal sample.
  773. pub fn somatic_scan_normal_output_dir(&self, id: &str) -> String {
  774. self.somatic_scan_solo_output_dir(id, &self.normal_name)
  775. }
  776. /// Somatic-scan output dir for the tumor sample.
  777. pub fn somatic_scan_tumoral_output_dir(&self, id: &str) -> String {
  778. self.somatic_scan_solo_output_dir(id, &self.tumoral_name)
  779. }
  780. /// Somatic-scan count file for a given contig in a solo run.
  781. pub fn somatic_scan_solo_count_file(&self, id: &str, time: &str, contig: &str) -> String {
  782. format!(
  783. "{}/{}_count.tsv.gz",
  784. self.somatic_scan_solo_output_dir(id, time),
  785. contig
  786. )
  787. }
  788. /// Somatic-scan count file (normal) for a given contig.
  789. pub fn somatic_scan_normal_count_file(&self, id: &str, contig: &str) -> String {
  790. self.somatic_scan_solo_count_file(id, &self.normal_name, contig)
  791. }
  792. /// Somatic-scan count file (tumor) for a given contig.
  793. pub fn somatic_scan_tumoral_count_file(&self, id: &str, contig: &str) -> String {
  794. self.somatic_scan_solo_count_file(id, &self.tumoral_name, contig)
  795. }
  796. /// Modkit summary file (`{result_dir}`, `{id}`, `{time}`).
  797. pub fn modkit_summary_file(&self, id: &str, time: &str) -> String {
  798. self.modkit_summary_file
  799. .replace("{result_dir}", &self.result_dir)
  800. .replace("{id}", id)
  801. .replace("{time}", time)
  802. }
  803. /// Longphase modcall VCF (`{result_dir}`, `{id}`, `{time}`).
  804. pub fn longphase_modcall_vcf(&self, id: &str, time: &str) -> String {
  805. self.longphase_modcall_vcf
  806. .replace("{result_dir}", &self.result_dir)
  807. .replace("{id}", id)
  808. .replace("{time}", time)
  809. }
  810. }
  811. impl Default for Config {
  812. fn default() -> Self {
  813. let path = Self::config_path();
  814. Self::from_path(path)
  815. }
  816. }