//! # Severus Structural Variant Caller //! //! This module provides wrappers for [Severus](https://github.com/KolmogorovLab/Severus), //! a structural variant caller specialized in VNTR (Variable Number Tandem Repeat) detection //! and complex SV resolution from long-read sequencing data. //! //! ## Overview //! //! Severus detects: //! - Structural variants (deletions, insertions, inversions, translocations) //! - VNTR expansions and contractions //! - Complex nested rearrangements //! - Junction-level SV breakpoints with high precision //! //! ## Key Features //! //! - **VNTR-aware calling** - Uses VNTR annotations from BED file //! - **Phasing integration** - Leverages phased VCFs for haplotype resolution //! - **High precision** - Resolves overlapping SVs and ambiguous junctions //! - **Alignment writing** - Optional detailed alignment output for validation //! //! ## Requirements //! //! - Tumor and normal BAMs (paired mode) or single BAM (solo mode) //! - VNTR annotation BED file (`config.vntrs_bed`) //! - Phased germline VCF (automatically generated via LongPhase if missing) //! - Conda environment with Severus configured (environment name: severus_env) //! //! ## Output Files //! //! Paired mode PASS-filtered VCF: //! ```text //! {result_dir}/{id}/severus/severus_PASSED.vcf.gz //! ``` //! //! Solo mode PASS-filtered VCF: //! ```text //! {result_dir}/{id}/severus_solo/{time_point}/severus_PASSED.vcf.gz //! ``` //! //! ## Usage //! //! ### Paired (Tumor-Normal) Mode //! //! ```ignore //! use pandora_lib_promethion::callers::severus::Severus; //! use pandora_lib_promethion::config::Config; //! use pandora_lib_promethion::pipes::Initialize; //! use pandora_lib_promethion::runners::Run; //! //! let config = Config::default(); //! let mut caller = Severus::initialize("sample_001", &config)?; //! //! if caller.should_run() { //! caller.run()?; // Automatically handles phasing if needed //! } //! //! // Load variants including VNTRs //! let variants = caller.variants(&annotations)?; //! println!("Found {} SVs (including VNTRs)", variants.variants.len()); //! # Ok::<(), anyhow::Error>(()) //! ``` //! //! ### Solo Mode //! //! ```ignore //! use pandora_lib_promethion::callers::severus::SeverusSolo; //! use pandora_lib_promethion::pipes::InitializeSolo; //! //! let config = Config::default(); //! let mut caller = SeverusSolo::initialize("sample_001", "norm", &config)?; //! caller.run()?; //! # Ok::<(), anyhow::Error>(()) //! ``` //! //! ## References //! //! - [Severus GitHub](https://github.com/KolmogorovLab/Severus) //! - [Severus Paper](https://doi.org/10.1038/s41587-024-02340-1) use crate::{ annotation::{Annotation, Annotations, Caller, CallerCat, Sample}, collection::vcf::Vcf, commands::{ bcftools::BcftoolsKeepPassPrecise, longphase::LongphasePhase, Command as JobCommand, LocalRunner, SlurmParams, SlurmRunner, }, config::Config, helpers::{is_file_older, remove_dir_if_exists}, io::vcf::read_vcf, pipes::{Initialize, InitializeSolo, ShouldRun, Version}, run, runners::Run, variant::{ variant::{Label, Variants}, variant_collection::VariantCollection, }, }; use anyhow::Context; use log::{debug, info}; use rayon::prelude::*; use std::{fs, path::Path}; /// Severus paired (tumor-normal) structural variant caller. /// /// Executes Severus for somatic SV detection including VNTR analysis. /// Automatically handles prerequisite phasing via LongPhase if needed. /// /// # Fields /// /// - `id` - Sample identifier (e.g., "34528") /// - `config` - Global pipeline configuration /// - `log_dir` - Directory for execution logs (e.g., "{result_dir}/{id}/log/severus") #[derive(Debug)] pub struct Severus { /// Sample identifier pub id: String, /// Global pipeline configuration pub config: Config, /// Directory for log file storage pub log_dir: String, } impl Initialize for Severus { /// Initializes a new Severus instance for a given sample ID and configuration. /// /// This will create the output log directory path and clean up previous output files /// if the run should be re-executed or `severus_force` is set and an output VCF exists. /// /// # Arguments /// /// * `id` - The sample ID /// * `config` - The execution configuration /// /// # Returns /// /// A `Severus` instance wrapped in `Ok`, or an error if setup fails fn initialize(id: &str, config: &Config) -> anyhow::Result { info!("Initialize Severus for {id}."); let log_dir = format!("{}/{}/log/severus", config.result_dir, id); let severus = Self { id: id.to_string(), config: config.clone(), log_dir, }; if severus.config.severus_force { remove_dir_if_exists(&severus.config.severus_output_dir(id))?; } Ok(severus) } } impl ShouldRun for Severus { /// Determines whether Severus should re-run based on whether the PASS VCF /// is older than either the tumor or normal BAM file. /// /// # Returns /// /// `true` if Severus needs to be re-run, otherwise `false` fn should_run(&self) -> bool { let passed_vcf = &self.config.severus_passed_vcf(&self.id); let result = is_file_older(passed_vcf, &self.config.normal_bam(&self.id), true) .unwrap_or(true) || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id), true).unwrap_or(true); if result { info!("Severus should run for: {}.", self.id); } result } } impl Run for Severus { /// Runs the Severus structural variant caller if its output VCF does not already exist. /// /// It also conditionally runs the upstream phasing module (`LongphasePhase`) if the required /// phased VCF is missing. After running Severus, it filters PASS variants using `bcftools`. /// /// # Returns /// /// `Ok(())` if everything runs successfully; otherwise, an error with context. fn run(&mut self) -> anyhow::Result<()> { if !self.should_run() { anyhow::bail!("Severus is up-to-date"); } let id = &self.id; let output_vcf = &self.config.severus_output_vcf(id); let passed_vcf = &self.config.severus_passed_vcf(id); if !Path::new(output_vcf).exists() { let constit_phased_vcf = &self.config.constit_phased_vcf(id); // Run Longphase if necessary if !Path::new(constit_phased_vcf).exists() { let mut phase = LongphasePhase::initialize(&self.id, &self.config)?; crate::runners::Run::run(&mut phase)?; // run!(&self.config, &mut phase)?; } fs::create_dir_all(self.config.severus_output_dir(id)) .context("Failed to create Severus output directory")?; let severus_args: Vec = vec![ "--target-bam".into(), self.config.tumoral_bam(id), "--control-bam".into(), self.config.normal_bam(id), "--phasing-vcf".into(), constit_phased_vcf.clone(), "--out-dir".into(), self.config.severus_output_dir(id), "-t".into(), self.config.severus_threads.to_string(), "--write-alignments".into(), "--use-supplementary-tag".into(), "--resolve-overlaps".into(), "--between-junction-ins".into(), "--vntr-bed".into(), self.config.vntrs_bed.clone(), ]; let mut job = SeverusJob { conda_sh: self.config.conda_sh.clone(), severus_bin: self.config.severus_bin.clone(), args: severus_args, }; let output = run!(&self.config, &mut job) .context("Error while running Severus (local/Slurm)")?; let log_file = format!("{}/severus_", self.log_dir); output .save_to_file(&log_file) .context(format!("Error while writing Severus logs into {log_file}"))?; } else { debug!( "Severus output VCF already exists for {}, skipping execution.", self.id ); } // Filter PASS variants if !Path::new(passed_vcf).exists() && Path::new(output_vcf).exists() { let mut keep = BcftoolsKeepPassPrecise::from_config(&self.config, output_vcf, passed_vcf); let report = run!(&self.config, &mut keep).context(format!( "Error while running bcftools keep PASS for {output_vcf}" ))?; let log_file = format!("{}/severus_bcftools_pass", self.log_dir); report .save_to_file(&log_file) .context(format!("Error while writing logs into {log_file}"))?; } else { debug!( "Severus PASSED VCF already exists for {}, skipping execution.", self.id ); } Ok(()) } } #[derive(Debug, Clone)] struct SeverusJob { conda_sh: String, severus_bin: String, args: Vec, } impl JobCommand for SeverusJob { fn cmd(&self) -> String { format!( "source {conda_sh} && conda activate severus_env && {bin} {args}", conda_sh = self.conda_sh, bin = self.severus_bin, args = self.args.join(" ") ) } } impl LocalRunner for SeverusJob {} impl SlurmRunner for SeverusJob { fn slurm_args(&self) -> Vec { SlurmParams { job_name: Some("severus".into()), partition: Some("shortq".into()), cpus_per_task: Some(16), mem: Some("120G".into()), gres: None, } .to_args() } } impl CallerCat for Severus { /// Returns the annotation category for Severus calls. /// /// This indicates that the variants come from the `Severus` caller /// and are somatic in nature. It is used downstream for caller-specific /// annotation tracking and filtering. fn caller_cat(&self) -> Annotation { Annotation::Callers(Caller::Severus, Sample::Somatic) } } impl Variants for Severus { /// Loads the variant calls from the Severus filtered PASS VCF. /// /// This method reads the VCF file output by Severus (after PASS filtering), /// assigns the appropriate annotation tag to each variant using `caller_cat()`, /// and updates the provided `Annotations` map in parallel. /// /// # Arguments /// /// * `annotations` - A reference to the global annotations map used /// for aggregating and tracking variant-level metadata across callers. /// /// # Returns /// /// A `VariantCollection` object containing: /// - the parsed variants, /// - the `Vcf` wrapper of the file path, /// - and the caller identity used for tagging. /// /// # Errors /// /// Returns an error if the VCF file cannot be read or parsed. fn variants(&self, annotations: &Annotations) -> anyhow::Result { let vcf_passed = self.config.severus_passed_vcf(&self.id); let caller = self.caller_cat(); let add = vec![caller.clone()]; info!("Loading variants from {caller}: {vcf_passed}"); let variants = read_vcf(&vcf_passed) .map_err(|e| anyhow::anyhow!("Failed to read Severus VCF {}.\n{e}", vcf_passed))?; variants.par_iter().for_each(|v| { annotations.insert_update(v.hash(), &add); }); info!("{}, {} variants loaded.", caller, variants.len()); Ok(VariantCollection { variants, vcf: Vcf::new(vcf_passed.into())?, caller, }) } } impl Label for Severus { /// Returns the string label for this caller. fn label(&self) -> String { self.caller_cat().to_string() } } impl Version for Severus { /// Retrieves the Severus version by running `severus --version` in its conda environment. /// /// # Errors /// Returns an error if command execution fails or "Version " not found in output. fn version(config: &Config) -> anyhow::Result { let mut job = SeverusJob { conda_sh: config.conda_sh.clone(), severus_bin: config.severus_bin.clone(), args: vec!["--version".to_string()], }; let out = run!(&config, &mut job).context("Error while running `severus --version`")?; let combined = format!("{}{}", out.stdout, out.stderr); // Take first non-empty trimmed line let line = combined .lines() .map(str::trim) .find(|l| !l.is_empty()) .ok_or_else(|| anyhow::anyhow!("No output from `severus --version`"))?; // Accept either "1.6" or "Severus version: 1.6" let v = if let Some((_, value)) = line.split_once(':') { value.trim().to_string() } else { line.to_string() }; Ok(v) } } /// Severus solo (single-sample) structural variant caller. /// /// Detects structural variants including VNTRs from a single BAM file without a matched control. /// Useful for germline SV detection or when no matched normal is available. /// /// # Fields /// /// - `id` - Sample identifier (e.g., "34528") /// - `time` - Time point label: typically `config.normal_name` ("norm") or `config.tumoral_name` ("diag") /// - `config` - Global pipeline configuration /// - `log_dir` - Directory for execution logs (e.g., "{result_dir}/{id}/log/severus_solo") #[derive(Debug)] pub struct SeverusSolo { /// Sample identifier pub id: String, /// Time point identifier (e.g., "norm" or "diag") pub time: String, /// Global pipeline configuration pub config: Config, /// Directory for log file storage pub log_dir: String, } impl InitializeSolo for SeverusSolo { /// Initializes Severus solo analysis for a sample at a specific time point. /// /// Creates necessary log directory. /// /// # Errors /// Returns an error if directory creation fails. fn initialize(id: &str, time: &str, config: &Config) -> anyhow::Result { let log_dir = format!("{}/{}/log/severus_solo", config.result_dir, id); if !Path::new(&log_dir).exists() { fs::create_dir_all(&log_dir) .context(format!("Failed to create {log_dir} directory"))?; } Ok(SeverusSolo { id: id.to_string(), time: time.to_string(), config: config.clone(), log_dir, }) } } impl Run for SeverusSolo { /// Runs the Severus pipeline and filters for PASS variants. /// /// Skips steps if their output files already exist. /// /// # Errors /// Returns an error if Severus execution, filtering, or log writing fails. fn run(&mut self) -> anyhow::Result<()> { let id = &self.id; let time = &self.time; let output_vcf = &self.config.severus_solo_output_vcf(id, time); let passed_vcf = &self.config.severus_solo_passed_vcf(id, time); if !Path::new(output_vcf).exists() { // Run command if output VCF doesn't exist let mut job = SeverusJob { conda_sh: self.config.conda_sh.clone(), severus_bin: self.config.severus_bin.clone(), args: vec![ "--target-bam".into(), self.config.solo_bam(id, time), "--out-dir".into(), self.config.severus_solo_output_dir(id, time), "-t".into(), self.config.severus_threads.to_string(), "--write-alignments".into(), "--use-supplementary-tag".into(), "--resolve-overlaps".into(), "--between-junction-ins".into(), "--vntr-bed".into(), self.config.vntrs_bed.clone(), ], }; let report = run!(&self.config, &mut job).context("Error while running severus solo")?; let log_file = format!("{}/severus_", self.log_dir); report .save_to_file(&log_file) .context(format!("Error while writing logs into {log_file}"))?; } else { debug!("Severus output vcf already exists."); } // Keep PASS if !Path::new(passed_vcf).exists() && Path::new(output_vcf).exists() { let mut keep = BcftoolsKeepPassPrecise::from_config(&self.config, output_vcf, passed_vcf); let report = run!(&self.config, &mut keep).context(format!( "Error while running bcftools keep PASS for {output_vcf}" ))?; let log_file = format!("{}/bcftools_pass", self.log_dir); report .save_to_file(&log_file) .context(format!("Error while writing logs into {log_file}"))?; } else { debug!("VCF PASSED already exists for Severus."); } Ok(()) } } #[cfg(test)] mod tests { use super::*; use crate::helpers::test_init; #[test] fn severus_version() -> anyhow::Result<()> { test_init(); let vl = Severus::version(&Config::default())?; info!("Severus version: {vl}"); Ok(()) } #[test] fn severus_run() -> anyhow::Result<()> { test_init(); let config = Config::default(); // let clairs = ClairS::initialize("34528", &config)?; // info!("{clairs}"); let mut caller = Severus::initialize("DUMCO", &config)?; caller.run()?; Ok(()) } }