//! # Variant Caller Integrations //! //! This module provides wrappers for multiple variant callers optimized for long-read //! sequencing data (ONT and PacBio). All callers are integrated with the shared runner //! pattern, allowing seamless execution in local or Slurm HPC environments via the `run!` macro. //! //! ## Overview //! //! The module includes seven production-grade variant callers, each specialized for different //! variant types and use cases: //! //! ### Small Variant Callers //! //! - **[ClairS]** - Deep learning-based somatic SNV/indel caller (paired tumor-normal) //! - Haplotype-aware calling with LongPhase integration //! - Dual output: somatic + germline variants //! - Best for: Somatic SNV/indel detection in cancer samples //! - [GitHub](https://github.com/HKU-BAL/ClairS) //! //! - **[DeepVariant]** - Deep learning-based germline variant caller (single-sample) //! - Karyotype-aware for accurate X/Y chromosome calling //! - Platform-agnostic models (ONT, PacBio, Illumina) //! - Best for: Germline SNV/indel detection //! - [GitHub](https://github.com/google/deepvariant) //! //! - **[DeepSomatic]** - Deep learning-based somatic variant caller (paired tumor-normal) //! - Derived from DeepVariant architecture //! - Optimized for somatic mutation detection //! - Best for: Somatic SNV/indel detection //! - [GitHub](https://github.com/google/deepsomatic) //! //! ### Structural Variant Callers //! //! - **[NanomonSV]** - Structural variant caller for paired and solo modes //! - Detects deletions, insertions, duplications, inversions, translocations //! - Supports tumor-normal paired analysis //! - Best for: General SV detection in cancer samples //! - [GitHub](https://github.com/friend1ws/nanomonsv) //! //! - **[Savana]** - Haplotype-aware SV and CNV caller (paired tumor-normal) //! - Integrated copy number variation analysis //! - Allele-specific CNV detection //! - Requires phased germline variants and haplotagged BAMs //! - Best for: Combined SV + CNV analysis with haplotype information //! - [GitHub](https://github.com/cortes-ciriano-lab/savana) //! //! - **[Severus]** - VNTR and structural variant caller (paired and solo modes) //! - Specialized in VNTR (Variable Number Tandem Repeat) detection //! - High-precision breakpoint resolution //! - Resolves complex overlapping SVs //! - Best for: VNTR analysis and complex SV detection //! - [GitHub](https://github.com/KolmogorovLab/Severus) //! //! ### STR Genotypers //! //! - **[Straglr]** - Short Tandem Repeat (STR) genotyper (paired and solo modes) //! - Detects pathogenic repeat expansions in known disease loci //! - Supports custom loci via BED file (RepeatMasker Simple_repeat) //! - Provides allele-level genotyping with read support //! - Best for: STR expansion detection in neurological and muscular diseases //! - [GitHub](https://github.com/bcgsc/straglr) //! //! ## Execution Modes //! //! All callers support: //! - **Local execution** - Direct command execution for debugging/testing //! - **Slurm execution** - HPC job submission via `srun` or `sbatch` //! - **Chunked parallel execution** - Genome splitting for whole-genome analysis //! //! Execution mode is automatically selected based on `config.slurm_runner`. //! //! ## Concurrency Control //! //! All callers use [`SampleLock`] to prevent concurrent execution on the same sample. //! This is critical for: //! - Preventing data corruption from parallel writes //! - Avoiding redundant computation when multiple jobs target the same sample //! - Ensuring atomicity of multi-step pipelines (e.g., chunked execution + merge) //! //! The locking mechanism uses atomic directory creation, which is reliable on distributed //! filesystems (BeegFS, NFS, Lustre). Stale locks are automatically detected and cleaned //! via SLURM job ID validation or PID checks. //! //! ## Typical Workflow //! //! 1. **Initialize** - Create caller instance with `Initialize::initialize()` or `InitializeSolo::initialize()` //! 2. **Check freshness** - Use `ShouldRun::should_run()` to avoid redundant work //! 3. **Execute** - Run caller with `Run::run()` //! 4. **Load variants** - Extract results with `Variants::variants()` //! //! ## Convenience Function //! //! The [`run_somatic_callers()`] function executes all somatic callers sequentially //! for a complete multi-caller analysis pipeline. //! //! ## Usage Examples //! //! ### Individual Caller //! //! ```ignore //! use pandora_lib_promethion::callers::clairs::ClairS; //! use pandora_lib_promethion::config::Config; //! use pandora_lib_promethion::pipes::Initialize; //! use pandora_lib_promethion::runners::Run; //! //! let config = Config::default(); //! let mut clairs = ClairS::initialize("sample_001", &config)?; //! //! if clairs.should_run() { //! clairs.run()?; //! } //! //! let variants = clairs.variants(&annotations)?; //! # Ok::<(), anyhow::Error>(()) //! ``` //! //! ### Complete Multi-Caller Pipeline //! //! ```ignore //! use pandora_lib_promethion::callers::run_somatic_callers; //! use pandora_lib_promethion::config::Config; //! //! let config = Config::default(); //! run_somatic_callers("sample_001", &config)?; //! # Ok::<(), anyhow::Error>(()) //! ``` //! //! ## References //! //! Each caller module contains detailed documentation including: //! - Variant types detected //! - Requirements and dependencies //! - Output file formats and locations //! - Usage examples //! - Scientific publications use std::{sync::Arc, thread}; use crate::{ callers::{ clairs::ClairS, deep_somatic::DeepSomatic, deep_variant::DeepVariant, gatk::Mutect2, nanomonsv::NanomonSV, savana::Savana, severus::Severus, straglr::Straglr, }, commands::longphase::run_phasing_somatic, config::Config, pipes::{Initialize, InitializeSolo}, runners::Run, scan::scan::SomaticScan, }; pub mod clairs; pub mod deep_somatic; pub mod deep_variant; pub mod gatk; pub mod nanomonsv; pub mod savana; pub mod severus; pub mod straglr; pub mod coral; /// Runs all somatic variant callers sequentially for comprehensive multi-caller analysis. /// /// Executes the following callers in order: /// 1. **DeepVariant** (normal sample) - Germline SNV/indels /// 2. **DeepVariant** (tumor sample) - Germline SNV/indels /// 3. **ClairS** - Somatic SNV/indels (paired) /// 4. **Severus** - Somatic SVs and VNTRs (paired) /// 5. **Savana** - Somatic SVs and CNVs (paired, haplotype-aware) /// 6. **NanomonSV** - Somatic SVs (paired) /// 7. **DeepSomatic** - Somatic SNV/indels (paired) /// /// Each caller automatically: /// - Checks if it needs to run based on output freshness /// - Skips execution if outputs are up-to-date /// - Handles prerequisite steps (e.g., phasing, haplotagging) /// - Filters results to PASS-only variants /// /// # Arguments /// /// * `id` - Sample identifier /// * `config` - Global pipeline configuration /// /// # Returns /// /// `Ok(())` if all callers complete successfully, or an error from the first failed caller. /// /// # Errors /// /// Returns an error if any caller fails. Common failure modes: /// - Missing or corrupted BAM files /// - Missing reference genome or annotation files /// - Insufficient disk space for outputs /// - Singularity/Docker image not found /// - Slurm job submission failures (if `config.slurm_runner = true`) /// - Individual caller-specific errors (see each caller's documentation) /// /// # Performance Notes /// /// This function runs callers **sequentially**, not in parallel. For parallel execution, /// invoke callers individually using separate processes or jobs. /// /// Typical runtime for whole-genome sequencing (30x coverage): /// - DeepVariant: 2-4 hours (per sample, chunked) /// - ClairS: 4-6 hours (chunked) /// - Severus: 1-2 hours /// - Savana: 6 hours /// - NanomonSV: 1-2 hours /// - DeepSomatic: 3-5 hours (chunked) /// /// Total: ~15-25 hours sequential execution /// /// # Example /// /// ```ignore /// use pandora_lib_promethion::callers::run_somatic_callers; /// use pandora_lib_promethion::config::Config; /// /// let config = Config::default(); /// run_somatic_callers("sample_001", &config)?; /// /// println!("All somatic callers completed successfully!"); /// # Ok::<(), anyhow::Error>(()) /// ``` pub fn run_somatic_callers(id: &str, config: &Config) -> anyhow::Result<()> { // ClairS - somatic SNV/indels with haplotype awareness // First gives germlines for phasing/haplotagging ClairS::initialize(id, config)?.run()?; run_phasing_somatic(id, config)?; // if slurm send jobs in parallel else run caller sequentially if config.slurm_runner { let config = Arc::new(config.clone()); let id: Arc = Arc::from(id); let handles = vec![ { let config = Arc::clone(&config); let id = Arc::clone(&id); thread::spawn(move || -> anyhow::Result<()> { SomaticScan::initialize(&id, &config)?.run() }) }, { let config = Arc::clone(&config); let id = Arc::clone(&id); thread::spawn(move || -> anyhow::Result<()> { Severus::initialize(&id, &config)?.run() }) }, { let config = Arc::clone(&config); let id = Arc::clone(&id); thread::spawn(move || -> anyhow::Result<()> { Savana::initialize(&id, &config)?.run() }) }, { let config = Arc::clone(&config); let id = Arc::clone(&id); thread::spawn(move || -> anyhow::Result<()> { NanomonSV::initialize(&id, &config)?.run() }) }, { let config = Arc::clone(&config); let id = Arc::clone(&id); thread::spawn(move || -> anyhow::Result<()> { run_chunkeds(&id, &config) }) }, ]; for h in handles { h.join() .map_err(|_| anyhow::anyhow!("somatic caller thread panicked"))??; } } else { Severus::initialize(id, config)?.run()?; Savana::initialize(id, config)?.run()?; NanomonSV::initialize(id, config)?.run()?; run_chunkeds(id, config)?; } Ok(()) } pub fn run_chunkeds(id: &str, config: &Config) -> anyhow::Result<()> { // DeepSomatic - somatic SNV/indels DeepSomatic::initialize(id, config)?.run()?; // Mutect2 - somatic SNV/indels caller // Mutect2::initialize(id, config)?.run()?; // DeepVariant - germline variants for normal sample DeepVariant::initialize(id, &config.normal_name, config)?.run()?; // DeepVariant - germline variants for tumor sample DeepVariant::initialize(id, &config.tumoral_name, config)?.run()?; // Straglr - Short Tandem Repeat (STR) genotyper // Straglr::initialize(id, config)?.run() Ok(()) } #[cfg(test)] mod tests { use super::*; use crate::helpers::test_init; #[test] fn callers_run_all() -> anyhow::Result<()> { test_init(); let config = Config::default(); run_somatic_callers("CHAHA", &config) } }