| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524 |
- //! # Severus Structural Variant Caller
- //!
- //! This module provides wrappers for [Severus](https://github.com/KolmogorovLab/Severus),
- //! a structural variant caller specialized in VNTR (Variable Number Tandem Repeat) detection
- //! and complex SV resolution from long-read sequencing data.
- //!
- //! ## Overview
- //!
- //! Severus detects:
- //! - Structural variants (deletions, insertions, inversions, translocations)
- //! - VNTR expansions and contractions
- //! - Complex nested rearrangements
- //! - Junction-level SV breakpoints with high precision
- //!
- //! ## Key Features
- //!
- //! - **VNTR-aware calling** - Uses VNTR annotations from BED file
- //! - **Phasing integration** - Leverages phased VCFs for haplotype resolution
- //! - **High precision** - Resolves overlapping SVs and ambiguous junctions
- //! - **Alignment writing** - Optional detailed alignment output for validation
- //!
- //! ## Requirements
- //!
- //! - Tumor and normal BAMs (paired mode) or single BAM (solo mode)
- //! - VNTR annotation BED file (`config.vntrs_bed`)
- //! - Phased germline VCF (automatically generated via LongPhase if missing)
- //! - Conda environment with Severus configured (environment name: severus_env)
- //!
- //! ## Output Files
- //!
- //! Paired mode PASS-filtered VCF:
- //! ```text
- //! {result_dir}/{id}/severus/severus_PASSED.vcf.gz
- //! ```
- //!
- //! Solo mode PASS-filtered VCF:
- //! ```text
- //! {result_dir}/{id}/severus_solo/{time_point}/severus_PASSED.vcf.gz
- //! ```
- //!
- //! ## Usage
- //!
- //! ### Paired (Tumor-Normal) Mode
- //!
- //! ```ignore
- //! use pandora_lib_promethion::callers::severus::Severus;
- //! use pandora_lib_promethion::config::Config;
- //! use pandora_lib_promethion::pipes::Initialize;
- //! use pandora_lib_promethion::runners::Run;
- //!
- //! let config = Config::default();
- //! let mut caller = Severus::initialize("sample_001", &config)?;
- //!
- //! if caller.should_run() {
- //! caller.run()?; // Automatically handles phasing if needed
- //! }
- //!
- //! // Load variants including VNTRs
- //! let variants = caller.variants(&annotations)?;
- //! println!("Found {} SVs (including VNTRs)", variants.variants.len());
- //! # Ok::<(), anyhow::Error>(())
- //! ```
- //!
- //! ### Solo Mode
- //!
- //! ```ignore
- //! use pandora_lib_promethion::callers::severus::SeverusSolo;
- //! use pandora_lib_promethion::pipes::InitializeSolo;
- //!
- //! let config = Config::default();
- //! let mut caller = SeverusSolo::initialize("sample_001", "norm", &config)?;
- //! caller.run()?;
- //! # Ok::<(), anyhow::Error>(())
- //! ```
- //!
- //! ## References
- //!
- //! - [Severus GitHub](https://github.com/KolmogorovLab/Severus)
- //! - [Severus Paper](https://doi.org/10.1038/s41587-024-02340-1)
- use crate::{
- annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
- collection::vcf::Vcf,
- commands::{
- bcftools::BcftoolsKeepPassPrecise, longphase::LongphasePhase, Command as JobCommand,
- LocalRunner, SlurmParams, SlurmRunner,
- },
- config::Config,
- helpers::{is_file_older, remove_dir_if_exists},
- io::vcf::read_vcf,
- pipes::{Initialize, InitializeSolo, ShouldRun, Version},
- run,
- runners::Run,
- variant::{
- variant::{Label, Variants},
- variant_collection::VariantCollection,
- },
- };
- use anyhow::Context;
- use log::{debug, info};
- use rayon::prelude::*;
- use std::{fs, path::Path};
- /// Severus paired (tumor-normal) structural variant caller.
- ///
- /// Executes Severus for somatic SV detection including VNTR analysis.
- /// Automatically handles prerequisite phasing via LongPhase if needed.
- ///
- /// # Fields
- ///
- /// - `id` - Sample identifier (e.g., "34528")
- /// - `config` - Global pipeline configuration
- /// - `log_dir` - Directory for execution logs (e.g., "{result_dir}/{id}/log/severus")
- #[derive(Debug)]
- pub struct Severus {
- /// Sample identifier
- pub id: String,
- /// Global pipeline configuration
- pub config: Config,
- /// Directory for log file storage
- pub log_dir: String,
- }
- impl Initialize for Severus {
- /// Initializes a new Severus instance for a given sample ID and configuration.
- ///
- /// This will create the output log directory path and clean up previous output files
- /// if the run should be re-executed or `severus_force` is set and an output VCF exists.
- ///
- /// # Arguments
- ///
- /// * `id` - The sample ID
- /// * `config` - The execution configuration
- ///
- /// # Returns
- ///
- /// A `Severus` instance wrapped in `Ok`, or an error if setup fails
- fn initialize(id: &str, config: &Config) -> anyhow::Result<Self> {
- info!("Initialize Severus for {id}.");
- let log_dir = format!("{}/{}/log/severus", config.result_dir, id);
- let severus = Self {
- id: id.to_string(),
- config: config.clone(),
- log_dir,
- };
- if severus.config.severus_force {
- remove_dir_if_exists(&severus.config.severus_output_dir(id))?;
- }
- Ok(severus)
- }
- }
- impl ShouldRun for Severus {
- /// Determines whether Severus should re-run based on whether the PASS VCF
- /// is older than either the tumor or normal BAM file.
- ///
- /// # Returns
- ///
- /// `true` if Severus needs to be re-run, otherwise `false`
- fn should_run(&self) -> bool {
- let passed_vcf = &self.config.severus_passed_vcf(&self.id);
- let result = is_file_older(passed_vcf, &self.config.normal_bam(&self.id), true)
- .unwrap_or(true)
- || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id), true).unwrap_or(true);
- if result {
- info!("Severus should run for: {}.", self.id);
- }
- result
- }
- }
- impl Run for Severus {
- /// Runs the Severus structural variant caller if its output VCF does not already exist.
- ///
- /// It also conditionally runs the upstream phasing module (`LongphasePhase`) if the required
- /// phased VCF is missing. After running Severus, it filters PASS variants using `bcftools`.
- ///
- /// # Returns
- ///
- /// `Ok(())` if everything runs successfully; otherwise, an error with context.
- fn run(&mut self) -> anyhow::Result<()> {
- if !self.should_run() {
- anyhow::bail!("Severus is up-to-date");
- }
- let id = &self.id;
- let output_vcf = &self.config.severus_output_vcf(id);
- let passed_vcf = &self.config.severus_passed_vcf(id);
- if !Path::new(output_vcf).exists() {
- let constit_phased_vcf = &self.config.constit_phased_vcf(id);
- // Run Longphase if necessary
- if !Path::new(constit_phased_vcf).exists() {
- let mut phase = LongphasePhase::initialize(&self.id, &self.config)?;
- crate::runners::Run::run(&mut phase)?;
- // run!(&self.config, &mut phase)?;
- }
- fs::create_dir_all(self.config.severus_output_dir(id))
- .context("Failed to create Severus output directory")?;
- let severus_args: Vec<String> = vec![
- "--target-bam".into(),
- self.config.tumoral_bam(id),
- "--control-bam".into(),
- self.config.normal_bam(id),
- "--phasing-vcf".into(),
- constit_phased_vcf.clone(),
- "--out-dir".into(),
- self.config.severus_output_dir(id),
- "-t".into(),
- self.config.severus_threads.to_string(),
- "--write-alignments".into(),
- "--use-supplementary-tag".into(),
- "--resolve-overlaps".into(),
- "--between-junction-ins".into(),
- "--vntr-bed".into(),
- self.config.vntrs_bed.clone(),
- ];
- let mut job = SeverusJob {
- conda_sh: self.config.conda_sh.clone(),
- severus_bin: self.config.severus_bin.clone(),
- args: severus_args,
- };
- let output = run!(&self.config, &mut job)
- .context("Error while running Severus (local/Slurm)")?;
- let log_file = format!("{}/severus_", self.log_dir);
- output
- .save_to_file(&log_file)
- .context(format!("Error while writing Severus logs into {log_file}"))?;
- } else {
- debug!(
- "Severus output VCF already exists for {}, skipping execution.",
- self.id
- );
- }
- // Filter PASS variants
- if !Path::new(passed_vcf).exists() && Path::new(output_vcf).exists() {
- let mut keep =
- BcftoolsKeepPassPrecise::from_config(&self.config, output_vcf, passed_vcf);
- let report = run!(&self.config, &mut keep).context(format!(
- "Error while running bcftools keep PASS for {output_vcf}"
- ))?;
- let log_file = format!("{}/severus_bcftools_pass", self.log_dir);
- report
- .save_to_file(&log_file)
- .context(format!("Error while writing logs into {log_file}"))?;
- } else {
- debug!(
- "Severus PASSED VCF already exists for {}, skipping execution.",
- self.id
- );
- }
- Ok(())
- }
- }
- #[derive(Debug, Clone)]
- struct SeverusJob {
- conda_sh: String,
- severus_bin: String,
- args: Vec<String>,
- }
- impl JobCommand for SeverusJob {
- fn cmd(&self) -> String {
- format!(
- "source {conda_sh} && conda activate severus_env && {bin} {args}",
- conda_sh = self.conda_sh,
- bin = self.severus_bin,
- args = self.args.join(" ")
- )
- }
- }
- impl LocalRunner for SeverusJob {}
- impl SlurmRunner for SeverusJob {
- fn slurm_args(&self) -> Vec<String> {
- SlurmParams {
- job_name: Some("severus".into()),
- partition: Some("shortq".into()),
- cpus_per_task: Some(16),
- mem: Some("120G".into()),
- gres: None,
- }
- .to_args()
- }
- }
- impl CallerCat for Severus {
- /// Returns the annotation category for Severus calls.
- ///
- /// This indicates that the variants come from the `Severus` caller
- /// and are somatic in nature. It is used downstream for caller-specific
- /// annotation tracking and filtering.
- fn caller_cat(&self) -> Annotation {
- Annotation::Callers(Caller::Severus, Sample::Somatic)
- }
- }
- impl Variants for Severus {
- /// Loads the variant calls from the Severus filtered PASS VCF.
- ///
- /// This method reads the VCF file output by Severus (after PASS filtering),
- /// assigns the appropriate annotation tag to each variant using `caller_cat()`,
- /// and updates the provided `Annotations` map in parallel.
- ///
- /// # Arguments
- ///
- /// * `annotations` - A reference to the global annotations map used
- /// for aggregating and tracking variant-level metadata across callers.
- ///
- /// # Returns
- ///
- /// A `VariantCollection` object containing:
- /// - the parsed variants,
- /// - the `Vcf` wrapper of the file path,
- /// - and the caller identity used for tagging.
- ///
- /// # Errors
- ///
- /// Returns an error if the VCF file cannot be read or parsed.
- fn variants(&self, annotations: &Annotations) -> anyhow::Result<VariantCollection> {
- let vcf_passed = self.config.severus_passed_vcf(&self.id);
- let caller = self.caller_cat();
- let add = vec![caller.clone()];
- info!("Loading variants from {caller}: {vcf_passed}");
- let variants = read_vcf(&vcf_passed)
- .map_err(|e| anyhow::anyhow!("Failed to read Severus VCF {}.\n{e}", vcf_passed))?;
- variants.par_iter().for_each(|v| {
- annotations.insert_update(v.hash(), &add);
- });
- info!("{}, {} variants loaded.", caller, variants.len());
- Ok(VariantCollection {
- variants,
- vcf: Vcf::new(vcf_passed.into())?,
- caller,
- })
- }
- }
- impl Label for Severus {
- /// Returns the string label for this caller.
- fn label(&self) -> String {
- self.caller_cat().to_string()
- }
- }
- impl Version for Severus {
- /// Retrieves the Severus version by running `severus --version` in its conda environment.
- ///
- /// # Errors
- /// Returns an error if command execution fails or "Version " not found in output.
- fn version(config: &Config) -> anyhow::Result<String> {
- let mut job = SeverusJob {
- conda_sh: config.conda_sh.clone(),
- severus_bin: config.severus_bin.clone(),
- args: vec!["--version".to_string()],
- };
- let out = run!(&config, &mut job).context("Error while running `severus --version`")?;
- let combined = format!("{}{}", out.stdout, out.stderr);
- // Take first non-empty trimmed line
- let line = combined
- .lines()
- .map(str::trim)
- .find(|l| !l.is_empty())
- .ok_or_else(|| anyhow::anyhow!("No output from `severus --version`"))?;
- // Accept either "1.6" or "Severus version: 1.6"
- let v = if let Some((_, value)) = line.split_once(':') {
- value.trim().to_string()
- } else {
- line.to_string()
- };
- Ok(v)
- }
- }
- /// Severus solo (single-sample) structural variant caller.
- ///
- /// Detects structural variants including VNTRs from a single BAM file without a matched control.
- /// Useful for germline SV detection or when no matched normal is available.
- ///
- /// # Fields
- ///
- /// - `id` - Sample identifier (e.g., "34528")
- /// - `time` - Time point label: typically `config.normal_name` ("norm") or `config.tumoral_name` ("diag")
- /// - `config` - Global pipeline configuration
- /// - `log_dir` - Directory for execution logs (e.g., "{result_dir}/{id}/log/severus_solo")
- #[derive(Debug)]
- pub struct SeverusSolo {
- /// Sample identifier
- pub id: String,
- /// Time point identifier (e.g., "norm" or "diag")
- pub time: String,
- /// Global pipeline configuration
- pub config: Config,
- /// Directory for log file storage
- pub log_dir: String,
- }
- impl InitializeSolo for SeverusSolo {
- /// Initializes Severus solo analysis for a sample at a specific time point.
- ///
- /// Creates necessary log directory.
- ///
- /// # Errors
- /// Returns an error if directory creation fails.
- fn initialize(id: &str, time: &str, config: &Config) -> anyhow::Result<Self> {
- let log_dir = format!("{}/{}/log/severus_solo", config.result_dir, id);
- if !Path::new(&log_dir).exists() {
- fs::create_dir_all(&log_dir)
- .context(format!("Failed to create {log_dir} directory"))?;
- }
- Ok(SeverusSolo {
- id: id.to_string(),
- time: time.to_string(),
- config: config.clone(),
- log_dir,
- })
- }
- }
- impl Run for SeverusSolo {
- /// Runs the Severus pipeline and filters for PASS variants.
- ///
- /// Skips steps if their output files already exist.
- ///
- /// # Errors
- /// Returns an error if Severus execution, filtering, or log writing fails.
- fn run(&mut self) -> anyhow::Result<()> {
- let id = &self.id;
- let time = &self.time;
- let output_vcf = &self.config.severus_solo_output_vcf(id, time);
- let passed_vcf = &self.config.severus_solo_passed_vcf(id, time);
- if !Path::new(output_vcf).exists() {
- // Run command if output VCF doesn't exist
- let mut job = SeverusJob {
- conda_sh: self.config.conda_sh.clone(),
- severus_bin: self.config.severus_bin.clone(),
- args: vec![
- "--target-bam".into(),
- self.config.solo_bam(id, time),
- "--out-dir".into(),
- self.config.severus_solo_output_dir(id, time),
- "-t".into(),
- self.config.severus_threads.to_string(),
- "--write-alignments".into(),
- "--use-supplementary-tag".into(),
- "--resolve-overlaps".into(),
- "--between-junction-ins".into(),
- "--vntr-bed".into(),
- self.config.vntrs_bed.clone(),
- ],
- };
- let report =
- run!(&self.config, &mut job).context("Error while running severus solo")?;
- let log_file = format!("{}/severus_", self.log_dir);
- report
- .save_to_file(&log_file)
- .context(format!("Error while writing logs into {log_file}"))?;
- } else {
- debug!("Severus output vcf already exists.");
- }
- // Keep PASS
- if !Path::new(passed_vcf).exists() && Path::new(output_vcf).exists() {
- let mut keep =
- BcftoolsKeepPassPrecise::from_config(&self.config, output_vcf, passed_vcf);
- let report = run!(&self.config, &mut keep).context(format!(
- "Error while running bcftools keep PASS for {output_vcf}"
- ))?;
- let log_file = format!("{}/bcftools_pass", self.log_dir);
- report
- .save_to_file(&log_file)
- .context(format!("Error while writing logs into {log_file}"))?;
- } else {
- debug!("VCF PASSED already exists for Severus.");
- }
- Ok(())
- }
- }
- #[cfg(test)]
- mod tests {
- use super::*;
- use crate::helpers::test_init;
- #[test]
- fn severus_version() -> anyhow::Result<()> {
- test_init();
- let vl = Severus::version(&Config::default())?;
- info!("Severus version: {vl}");
- Ok(())
- }
- #[test]
- fn severus_run() -> anyhow::Result<()> {
- test_init();
- let config = Config::default();
- // let clairs = ClairS::initialize("34528", &config)?;
- // info!("{clairs}");
- let mut caller = Severus::initialize("DUMCO", &config)?;
- caller.run()?;
- Ok(())
- }
- }
|