mod.rs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. //! # Variant Caller Integrations
  2. //!
  3. //! This module provides wrappers for multiple variant callers optimized for long-read
  4. //! sequencing data (ONT and PacBio). All callers are integrated with the shared runner
  5. //! pattern, allowing seamless execution in local or Slurm HPC environments via the `run!` macro.
  6. //!
  7. //! ## Overview
  8. //!
  9. //! The module includes seven production-grade variant callers, each specialized for different
  10. //! variant types and use cases:
  11. //!
  12. //! ### Small Variant Callers
  13. //!
  14. //! - **[ClairS]** - Deep learning-based somatic SNV/indel caller (paired tumor-normal)
  15. //! - Haplotype-aware calling with LongPhase integration
  16. //! - Dual output: somatic + germline variants
  17. //! - Best for: Somatic SNV/indel detection in cancer samples
  18. //! - [GitHub](https://github.com/HKU-BAL/ClairS)
  19. //!
  20. //! - **[DeepVariant]** - Deep learning-based germline variant caller (single-sample)
  21. //! - Karyotype-aware for accurate X/Y chromosome calling
  22. //! - Platform-agnostic models (ONT, PacBio, Illumina)
  23. //! - Best for: Germline SNV/indel detection
  24. //! - [GitHub](https://github.com/google/deepvariant)
  25. //!
  26. //! - **[DeepSomatic]** - Deep learning-based somatic variant caller (paired tumor-normal)
  27. //! - Derived from DeepVariant architecture
  28. //! - Optimized for somatic mutation detection
  29. //! - Best for: Somatic SNV/indel detection
  30. //! - [GitHub](https://github.com/google/deepsomatic)
  31. //!
  32. //! ### Structural Variant Callers
  33. //!
  34. //! - **[NanomonSV]** - Structural variant caller for paired and solo modes
  35. //! - Detects deletions, insertions, duplications, inversions, translocations
  36. //! - Supports tumor-normal paired analysis
  37. //! - Best for: General SV detection in cancer samples
  38. //! - [GitHub](https://github.com/friend1ws/nanomonsv)
  39. //!
  40. //! - **[Savana]** - Haplotype-aware SV and CNV caller (paired tumor-normal)
  41. //! - Integrated copy number variation analysis
  42. //! - Allele-specific CNV detection
  43. //! - Requires phased germline variants and haplotagged BAMs
  44. //! - Best for: Combined SV + CNV analysis with haplotype information
  45. //! - [GitHub](https://github.com/cortes-ciriano-lab/savana)
  46. //!
  47. //! - **[Severus]** - VNTR and structural variant caller (paired and solo modes)
  48. //! - Specialized in VNTR (Variable Number Tandem Repeat) detection
  49. //! - High-precision breakpoint resolution
  50. //! - Resolves complex overlapping SVs
  51. //! - Best for: VNTR analysis and complex SV detection
  52. //! - [GitHub](https://github.com/KolmogorovLab/Severus)
  53. //!
  54. //! ### STR Genotypers
  55. //!
  56. //! - **[Straglr]** - Short Tandem Repeat (STR) genotyper (paired and solo modes)
  57. //! - Detects pathogenic repeat expansions in known disease loci
  58. //! - Supports custom loci via BED file (RepeatMasker Simple_repeat)
  59. //! - Provides allele-level genotyping with read support
  60. //! - Best for: STR expansion detection in neurological and muscular diseases
  61. //! - [GitHub](https://github.com/bcgsc/straglr)
  62. //!
  63. //! ## Execution Modes
  64. //!
  65. //! All callers support:
  66. //! - **Local execution** - Direct command execution for debugging/testing
  67. //! - **Slurm execution** - HPC job submission via `srun` or `sbatch`
  68. //! - **Chunked parallel execution** - Genome splitting for whole-genome analysis
  69. //!
  70. //! Execution mode is automatically selected based on `config.slurm_runner`.
  71. //!
  72. //! ## Concurrency Control
  73. //!
  74. //! All callers use [`SampleLock`] to prevent concurrent execution on the same sample.
  75. //! This is critical for:
  76. //! - Preventing data corruption from parallel writes
  77. //! - Avoiding redundant computation when multiple jobs target the same sample
  78. //! - Ensuring atomicity of multi-step pipelines (e.g., chunked execution + merge)
  79. //!
  80. //! The locking mechanism uses atomic directory creation, which is reliable on distributed
  81. //! filesystems (BeegFS, NFS, Lustre). Stale locks are automatically detected and cleaned
  82. //! via SLURM job ID validation or PID checks.
  83. //!
  84. //! ## Typical Workflow
  85. //!
  86. //! 1. **Initialize** - Create caller instance with `Initialize::initialize()` or `InitializeSolo::initialize()`
  87. //! 2. **Check freshness** - Use `ShouldRun::should_run()` to avoid redundant work
  88. //! 3. **Execute** - Run caller with `Run::run()`
  89. //! 4. **Load variants** - Extract results with `Variants::variants()`
  90. //!
  91. //! ## Convenience Function
  92. //!
  93. //! The [`run_somatic_callers()`] function executes all somatic callers sequentially
  94. //! for a complete multi-caller analysis pipeline.
  95. //!
  96. //! ## Usage Examples
  97. //!
  98. //! ### Individual Caller
  99. //!
  100. //! ```ignore
  101. //! use pandora_lib_promethion::callers::clairs::ClairS;
  102. //! use pandora_lib_promethion::config::Config;
  103. //! use pandora_lib_promethion::pipes::Initialize;
  104. //! use pandora_lib_promethion::runners::Run;
  105. //!
  106. //! let config = Config::default();
  107. //! let mut clairs = ClairS::initialize("sample_001", &config)?;
  108. //!
  109. //! if clairs.should_run() {
  110. //! clairs.run()?;
  111. //! }
  112. //!
  113. //! let variants = clairs.variants(&annotations)?;
  114. //! # Ok::<(), anyhow::Error>(())
  115. //! ```
  116. //!
  117. //! ### Complete Multi-Caller Pipeline
  118. //!
  119. //! ```ignore
  120. //! use pandora_lib_promethion::callers::run_somatic_callers;
  121. //! use pandora_lib_promethion::config::Config;
  122. //!
  123. //! let config = Config::default();
  124. //! run_somatic_callers("sample_001", &config)?;
  125. //! # Ok::<(), anyhow::Error>(())
  126. //! ```
  127. //!
  128. //! ## References
  129. //!
  130. //! Each caller module contains detailed documentation including:
  131. //! - Variant types detected
  132. //! - Requirements and dependencies
  133. //! - Output file formats and locations
  134. //! - Usage examples
  135. //! - Scientific publications
  136. use std::{sync::Arc, thread};
  137. use crate::{
  138. callers::{
  139. clairs::ClairS, deep_somatic::DeepSomatic, deep_variant::DeepVariant, gatk::Mutect2,
  140. nanomonsv::NanomonSV, savana::Savana, severus::Severus, straglr::Straglr,
  141. },
  142. commands::longphase::run_phasing_somatic,
  143. config::Config,
  144. pipes::{Initialize, InitializeSolo},
  145. runners::Run,
  146. scan::scan::SomaticScan,
  147. };
  148. pub mod clairs;
  149. pub mod deep_somatic;
  150. pub mod deep_variant;
  151. pub mod gatk;
  152. pub mod nanomonsv;
  153. pub mod savana;
  154. pub mod severus;
  155. pub mod straglr;
  156. pub mod coral;
  157. /// Runs all somatic variant callers sequentially for comprehensive multi-caller analysis.
  158. ///
  159. /// Executes the following callers in order:
  160. /// 1. **DeepVariant** (normal sample) - Germline SNV/indels
  161. /// 2. **DeepVariant** (tumor sample) - Germline SNV/indels
  162. /// 3. **ClairS** - Somatic SNV/indels (paired)
  163. /// 4. **Severus** - Somatic SVs and VNTRs (paired)
  164. /// 5. **Savana** - Somatic SVs and CNVs (paired, haplotype-aware)
  165. /// 6. **NanomonSV** - Somatic SVs (paired)
  166. /// 7. **DeepSomatic** - Somatic SNV/indels (paired)
  167. ///
  168. /// Each caller automatically:
  169. /// - Checks if it needs to run based on output freshness
  170. /// - Skips execution if outputs are up-to-date
  171. /// - Handles prerequisite steps (e.g., phasing, haplotagging)
  172. /// - Filters results to PASS-only variants
  173. ///
  174. /// # Arguments
  175. ///
  176. /// * `id` - Sample identifier
  177. /// * `config` - Global pipeline configuration
  178. ///
  179. /// # Returns
  180. ///
  181. /// `Ok(())` if all callers complete successfully, or an error from the first failed caller.
  182. ///
  183. /// # Errors
  184. ///
  185. /// Returns an error if any caller fails. Common failure modes:
  186. /// - Missing or corrupted BAM files
  187. /// - Missing reference genome or annotation files
  188. /// - Insufficient disk space for outputs
  189. /// - Singularity/Docker image not found
  190. /// - Slurm job submission failures (if `config.slurm_runner = true`)
  191. /// - Individual caller-specific errors (see each caller's documentation)
  192. ///
  193. /// # Performance Notes
  194. ///
  195. /// This function runs callers **sequentially**, not in parallel. For parallel execution,
  196. /// invoke callers individually using separate processes or jobs.
  197. ///
  198. /// Typical runtime for whole-genome sequencing (30x coverage):
  199. /// - DeepVariant: 2-4 hours (per sample, chunked)
  200. /// - ClairS: 4-6 hours (chunked)
  201. /// - Severus: 1-2 hours
  202. /// - Savana: 6 hours
  203. /// - NanomonSV: 1-2 hours
  204. /// - DeepSomatic: 3-5 hours (chunked)
  205. ///
  206. /// Total: ~15-25 hours sequential execution
  207. ///
  208. /// # Example
  209. ///
  210. /// ```ignore
  211. /// use pandora_lib_promethion::callers::run_somatic_callers;
  212. /// use pandora_lib_promethion::config::Config;
  213. ///
  214. /// let config = Config::default();
  215. /// run_somatic_callers("sample_001", &config)?;
  216. ///
  217. /// println!("All somatic callers completed successfully!");
  218. /// # Ok::<(), anyhow::Error>(())
  219. /// ```
  220. pub fn run_somatic_callers(id: &str, config: &Config) -> anyhow::Result<()> {
  221. // ClairS - somatic SNV/indels with haplotype awareness
  222. // First gives germlines for phasing/haplotagging
  223. ClairS::initialize(id, config)?.run()?;
  224. run_phasing_somatic(id, config)?;
  225. // if slurm send jobs in parallel else run caller sequentially
  226. if config.slurm_runner {
  227. let config = Arc::new(config.clone());
  228. let id: Arc<str> = Arc::from(id);
  229. let handles = vec![
  230. {
  231. let config = Arc::clone(&config);
  232. let id = Arc::clone(&id);
  233. thread::spawn(move || -> anyhow::Result<()> {
  234. SomaticScan::initialize(&id, &config)?.run()
  235. })
  236. },
  237. {
  238. let config = Arc::clone(&config);
  239. let id = Arc::clone(&id);
  240. thread::spawn(move || -> anyhow::Result<()> {
  241. Severus::initialize(&id, &config)?.run()
  242. })
  243. },
  244. {
  245. let config = Arc::clone(&config);
  246. let id = Arc::clone(&id);
  247. thread::spawn(move || -> anyhow::Result<()> {
  248. Savana::initialize(&id, &config)?.run()
  249. })
  250. },
  251. {
  252. let config = Arc::clone(&config);
  253. let id = Arc::clone(&id);
  254. thread::spawn(move || -> anyhow::Result<()> {
  255. NanomonSV::initialize(&id, &config)?.run()
  256. })
  257. },
  258. {
  259. let config = Arc::clone(&config);
  260. let id = Arc::clone(&id);
  261. thread::spawn(move || -> anyhow::Result<()> { run_chunkeds(&id, &config) })
  262. },
  263. ];
  264. for h in handles {
  265. h.join()
  266. .map_err(|_| anyhow::anyhow!("somatic caller thread panicked"))??;
  267. }
  268. } else {
  269. Severus::initialize(id, config)?.run()?;
  270. Savana::initialize(id, config)?.run()?;
  271. NanomonSV::initialize(id, config)?.run()?;
  272. run_chunkeds(id, config)?;
  273. }
  274. Ok(())
  275. }
  276. pub fn run_chunkeds(id: &str, config: &Config) -> anyhow::Result<()> {
  277. // DeepSomatic - somatic SNV/indels
  278. DeepSomatic::initialize(id, config)?.run()?;
  279. // Mutect2 - somatic SNV/indels caller
  280. // Mutect2::initialize(id, config)?.run()?;
  281. // DeepVariant - germline variants for normal sample
  282. DeepVariant::initialize(id, &config.normal_name, config)?.run()?;
  283. // DeepVariant - germline variants for tumor sample
  284. DeepVariant::initialize(id, &config.tumoral_name, config)?.run()?;
  285. // Straglr - Short Tandem Repeat (STR) genotyper
  286. // Straglr::initialize(id, config)?.run()
  287. Ok(())
  288. }
  289. #[cfg(test)]
  290. mod tests {
  291. use super::*;
  292. use crate::helpers::test_init;
  293. #[test]
  294. fn callers_run_all() -> anyhow::Result<()> {
  295. test_init();
  296. let config = Config::default();
  297. run_somatic_callers("CHAHA", &config)
  298. }
  299. }