severus.rs 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524
  1. //! # Severus Structural Variant Caller
  2. //!
  3. //! This module provides wrappers for [Severus](https://github.com/KolmogorovLab/Severus),
  4. //! a structural variant caller specialized in VNTR (Variable Number Tandem Repeat) detection
  5. //! and complex SV resolution from long-read sequencing data.
  6. //!
  7. //! ## Overview
  8. //!
  9. //! Severus detects:
  10. //! - Structural variants (deletions, insertions, inversions, translocations)
  11. //! - VNTR expansions and contractions
  12. //! - Complex nested rearrangements
  13. //! - Junction-level SV breakpoints with high precision
  14. //!
  15. //! ## Key Features
  16. //!
  17. //! - **VNTR-aware calling** - Uses VNTR annotations from BED file
  18. //! - **Phasing integration** - Leverages phased VCFs for haplotype resolution
  19. //! - **High precision** - Resolves overlapping SVs and ambiguous junctions
  20. //! - **Alignment writing** - Optional detailed alignment output for validation
  21. //!
  22. //! ## Requirements
  23. //!
  24. //! - Tumor and normal BAMs (paired mode) or single BAM (solo mode)
  25. //! - VNTR annotation BED file (`config.vntrs_bed`)
  26. //! - Phased germline VCF (automatically generated via LongPhase if missing)
  27. //! - Conda environment with Severus configured (environment name: severus_env)
  28. //!
  29. //! ## Output Files
  30. //!
  31. //! Paired mode PASS-filtered VCF:
  32. //! ```text
  33. //! {result_dir}/{id}/severus/severus_PASSED.vcf.gz
  34. //! ```
  35. //!
  36. //! Solo mode PASS-filtered VCF:
  37. //! ```text
  38. //! {result_dir}/{id}/severus_solo/{time_point}/severus_PASSED.vcf.gz
  39. //! ```
  40. //!
  41. //! ## Usage
  42. //!
  43. //! ### Paired (Tumor-Normal) Mode
  44. //!
  45. //! ```ignore
  46. //! use pandora_lib_promethion::callers::severus::Severus;
  47. //! use pandora_lib_promethion::config::Config;
  48. //! use pandora_lib_promethion::pipes::Initialize;
  49. //! use pandora_lib_promethion::runners::Run;
  50. //!
  51. //! let config = Config::default();
  52. //! let mut caller = Severus::initialize("sample_001", &config)?;
  53. //!
  54. //! if caller.should_run() {
  55. //! caller.run()?; // Automatically handles phasing if needed
  56. //! }
  57. //!
  58. //! // Load variants including VNTRs
  59. //! let variants = caller.variants(&annotations)?;
  60. //! println!("Found {} SVs (including VNTRs)", variants.variants.len());
  61. //! # Ok::<(), anyhow::Error>(())
  62. //! ```
  63. //!
  64. //! ### Solo Mode
  65. //!
  66. //! ```ignore
  67. //! use pandora_lib_promethion::callers::severus::SeverusSolo;
  68. //! use pandora_lib_promethion::pipes::InitializeSolo;
  69. //!
  70. //! let config = Config::default();
  71. //! let mut caller = SeverusSolo::initialize("sample_001", "norm", &config)?;
  72. //! caller.run()?;
  73. //! # Ok::<(), anyhow::Error>(())
  74. //! ```
  75. //!
  76. //! ## References
  77. //!
  78. //! - [Severus GitHub](https://github.com/KolmogorovLab/Severus)
  79. //! - [Severus Paper](https://doi.org/10.1038/s41587-024-02340-1)
  80. use crate::{
  81. annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
  82. collection::vcf::Vcf,
  83. commands::{
  84. bcftools::BcftoolsKeepPassPrecise, longphase::LongphasePhase, Command as JobCommand,
  85. LocalRunner, SlurmParams, SlurmRunner,
  86. },
  87. config::Config,
  88. helpers::{is_file_older, remove_dir_if_exists},
  89. io::vcf::read_vcf,
  90. pipes::{Initialize, InitializeSolo, ShouldRun, Version},
  91. run,
  92. runners::Run,
  93. variant::{
  94. variant::{Label, Variants},
  95. variant_collection::VariantCollection,
  96. },
  97. };
  98. use anyhow::Context;
  99. use log::{debug, info};
  100. use rayon::prelude::*;
  101. use std::{fs, path::Path};
  102. /// Severus paired (tumor-normal) structural variant caller.
  103. ///
  104. /// Executes Severus for somatic SV detection including VNTR analysis.
  105. /// Automatically handles prerequisite phasing via LongPhase if needed.
  106. ///
  107. /// # Fields
  108. ///
  109. /// - `id` - Sample identifier (e.g., "34528")
  110. /// - `config` - Global pipeline configuration
  111. /// - `log_dir` - Directory for execution logs (e.g., "{result_dir}/{id}/log/severus")
  112. #[derive(Debug)]
  113. pub struct Severus {
  114. /// Sample identifier
  115. pub id: String,
  116. /// Global pipeline configuration
  117. pub config: Config,
  118. /// Directory for log file storage
  119. pub log_dir: String,
  120. }
  121. impl Initialize for Severus {
  122. /// Initializes a new Severus instance for a given sample ID and configuration.
  123. ///
  124. /// This will create the output log directory path and clean up previous output files
  125. /// if the run should be re-executed or `severus_force` is set and an output VCF exists.
  126. ///
  127. /// # Arguments
  128. ///
  129. /// * `id` - The sample ID
  130. /// * `config` - The execution configuration
  131. ///
  132. /// # Returns
  133. ///
  134. /// A `Severus` instance wrapped in `Ok`, or an error if setup fails
  135. fn initialize(id: &str, config: &Config) -> anyhow::Result<Self> {
  136. info!("Initialize Severus for {id}.");
  137. let log_dir = format!("{}/{}/log/severus", config.result_dir, id);
  138. let severus = Self {
  139. id: id.to_string(),
  140. config: config.clone(),
  141. log_dir,
  142. };
  143. if severus.config.severus_force {
  144. remove_dir_if_exists(&severus.config.severus_output_dir(id))?;
  145. }
  146. Ok(severus)
  147. }
  148. }
  149. impl ShouldRun for Severus {
  150. /// Determines whether Severus should re-run based on whether the PASS VCF
  151. /// is older than either the tumor or normal BAM file.
  152. ///
  153. /// # Returns
  154. ///
  155. /// `true` if Severus needs to be re-run, otherwise `false`
  156. fn should_run(&self) -> bool {
  157. let passed_vcf = &self.config.severus_passed_vcf(&self.id);
  158. let result = is_file_older(passed_vcf, &self.config.normal_bam(&self.id), true)
  159. .unwrap_or(true)
  160. || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id), true).unwrap_or(true);
  161. if result {
  162. info!("Severus should run for: {}.", self.id);
  163. }
  164. result
  165. }
  166. }
  167. impl Run for Severus {
  168. /// Runs the Severus structural variant caller if its output VCF does not already exist.
  169. ///
  170. /// It also conditionally runs the upstream phasing module (`LongphasePhase`) if the required
  171. /// phased VCF is missing. After running Severus, it filters PASS variants using `bcftools`.
  172. ///
  173. /// # Returns
  174. ///
  175. /// `Ok(())` if everything runs successfully; otherwise, an error with context.
  176. fn run(&mut self) -> anyhow::Result<()> {
  177. if !self.should_run() {
  178. anyhow::bail!("Severus is up-to-date");
  179. }
  180. let id = &self.id;
  181. let output_vcf = &self.config.severus_output_vcf(id);
  182. let passed_vcf = &self.config.severus_passed_vcf(id);
  183. if !Path::new(output_vcf).exists() {
  184. let constit_phased_vcf = &self.config.constit_phased_vcf(id);
  185. // Run Longphase if necessary
  186. if !Path::new(constit_phased_vcf).exists() {
  187. let mut phase = LongphasePhase::initialize(&self.id, &self.config)?;
  188. crate::runners::Run::run(&mut phase)?;
  189. // run!(&self.config, &mut phase)?;
  190. }
  191. fs::create_dir_all(self.config.severus_output_dir(id))
  192. .context("Failed to create Severus output directory")?;
  193. let severus_args: Vec<String> = vec![
  194. "--target-bam".into(),
  195. self.config.tumoral_bam(id),
  196. "--control-bam".into(),
  197. self.config.normal_bam(id),
  198. "--phasing-vcf".into(),
  199. constit_phased_vcf.clone(),
  200. "--out-dir".into(),
  201. self.config.severus_output_dir(id),
  202. "-t".into(),
  203. self.config.severus_threads.to_string(),
  204. "--write-alignments".into(),
  205. "--use-supplementary-tag".into(),
  206. "--resolve-overlaps".into(),
  207. "--between-junction-ins".into(),
  208. "--vntr-bed".into(),
  209. self.config.vntrs_bed.clone(),
  210. ];
  211. let mut job = SeverusJob {
  212. conda_sh: self.config.conda_sh.clone(),
  213. severus_bin: self.config.severus_bin.clone(),
  214. args: severus_args,
  215. };
  216. let output = run!(&self.config, &mut job)
  217. .context("Error while running Severus (local/Slurm)")?;
  218. let log_file = format!("{}/severus_", self.log_dir);
  219. output
  220. .save_to_file(&log_file)
  221. .context(format!("Error while writing Severus logs into {log_file}"))?;
  222. } else {
  223. debug!(
  224. "Severus output VCF already exists for {}, skipping execution.",
  225. self.id
  226. );
  227. }
  228. // Filter PASS variants
  229. if !Path::new(passed_vcf).exists() && Path::new(output_vcf).exists() {
  230. let mut keep =
  231. BcftoolsKeepPassPrecise::from_config(&self.config, output_vcf, passed_vcf);
  232. let report = run!(&self.config, &mut keep).context(format!(
  233. "Error while running bcftools keep PASS for {output_vcf}"
  234. ))?;
  235. let log_file = format!("{}/severus_bcftools_pass", self.log_dir);
  236. report
  237. .save_to_file(&log_file)
  238. .context(format!("Error while writing logs into {log_file}"))?;
  239. } else {
  240. debug!(
  241. "Severus PASSED VCF already exists for {}, skipping execution.",
  242. self.id
  243. );
  244. }
  245. Ok(())
  246. }
  247. }
  248. #[derive(Debug, Clone)]
  249. struct SeverusJob {
  250. conda_sh: String,
  251. severus_bin: String,
  252. args: Vec<String>,
  253. }
  254. impl JobCommand for SeverusJob {
  255. fn cmd(&self) -> String {
  256. format!(
  257. "source {conda_sh} && conda activate severus_env && {bin} {args}",
  258. conda_sh = self.conda_sh,
  259. bin = self.severus_bin,
  260. args = self.args.join(" ")
  261. )
  262. }
  263. }
  264. impl LocalRunner for SeverusJob {}
  265. impl SlurmRunner for SeverusJob {
  266. fn slurm_args(&self) -> Vec<String> {
  267. SlurmParams {
  268. job_name: Some("severus".into()),
  269. partition: Some("shortq".into()),
  270. cpus_per_task: Some(16),
  271. mem: Some("120G".into()),
  272. gres: None,
  273. }
  274. .to_args()
  275. }
  276. }
  277. impl CallerCat for Severus {
  278. /// Returns the annotation category for Severus calls.
  279. ///
  280. /// This indicates that the variants come from the `Severus` caller
  281. /// and are somatic in nature. It is used downstream for caller-specific
  282. /// annotation tracking and filtering.
  283. fn caller_cat(&self) -> Annotation {
  284. Annotation::Callers(Caller::Severus, Sample::Somatic)
  285. }
  286. }
  287. impl Variants for Severus {
  288. /// Loads the variant calls from the Severus filtered PASS VCF.
  289. ///
  290. /// This method reads the VCF file output by Severus (after PASS filtering),
  291. /// assigns the appropriate annotation tag to each variant using `caller_cat()`,
  292. /// and updates the provided `Annotations` map in parallel.
  293. ///
  294. /// # Arguments
  295. ///
  296. /// * `annotations` - A reference to the global annotations map used
  297. /// for aggregating and tracking variant-level metadata across callers.
  298. ///
  299. /// # Returns
  300. ///
  301. /// A `VariantCollection` object containing:
  302. /// - the parsed variants,
  303. /// - the `Vcf` wrapper of the file path,
  304. /// - and the caller identity used for tagging.
  305. ///
  306. /// # Errors
  307. ///
  308. /// Returns an error if the VCF file cannot be read or parsed.
  309. fn variants(&self, annotations: &Annotations) -> anyhow::Result<VariantCollection> {
  310. let vcf_passed = self.config.severus_passed_vcf(&self.id);
  311. let caller = self.caller_cat();
  312. let add = vec![caller.clone()];
  313. info!("Loading variants from {caller}: {vcf_passed}");
  314. let variants = read_vcf(&vcf_passed)
  315. .map_err(|e| anyhow::anyhow!("Failed to read Severus VCF {}.\n{e}", vcf_passed))?;
  316. variants.par_iter().for_each(|v| {
  317. annotations.insert_update(v.hash(), &add);
  318. });
  319. info!("{}, {} variants loaded.", caller, variants.len());
  320. Ok(VariantCollection {
  321. variants,
  322. vcf: Vcf::new(vcf_passed.into())?,
  323. caller,
  324. })
  325. }
  326. }
  327. impl Label for Severus {
  328. /// Returns the string label for this caller.
  329. fn label(&self) -> String {
  330. self.caller_cat().to_string()
  331. }
  332. }
  333. impl Version for Severus {
  334. /// Retrieves the Severus version by running `severus --version` in its conda environment.
  335. ///
  336. /// # Errors
  337. /// Returns an error if command execution fails or "Version " not found in output.
  338. fn version(config: &Config) -> anyhow::Result<String> {
  339. let mut job = SeverusJob {
  340. conda_sh: config.conda_sh.clone(),
  341. severus_bin: config.severus_bin.clone(),
  342. args: vec!["--version".to_string()],
  343. };
  344. let out = run!(&config, &mut job).context("Error while running `severus --version`")?;
  345. let combined = format!("{}{}", out.stdout, out.stderr);
  346. // Take first non-empty trimmed line
  347. let line = combined
  348. .lines()
  349. .map(str::trim)
  350. .find(|l| !l.is_empty())
  351. .ok_or_else(|| anyhow::anyhow!("No output from `severus --version`"))?;
  352. // Accept either "1.6" or "Severus version: 1.6"
  353. let v = if let Some((_, value)) = line.split_once(':') {
  354. value.trim().to_string()
  355. } else {
  356. line.to_string()
  357. };
  358. Ok(v)
  359. }
  360. }
  361. /// Severus solo (single-sample) structural variant caller.
  362. ///
  363. /// Detects structural variants including VNTRs from a single BAM file without a matched control.
  364. /// Useful for germline SV detection or when no matched normal is available.
  365. ///
  366. /// # Fields
  367. ///
  368. /// - `id` - Sample identifier (e.g., "34528")
  369. /// - `time` - Time point label: typically `config.normal_name` ("norm") or `config.tumoral_name` ("diag")
  370. /// - `config` - Global pipeline configuration
  371. /// - `log_dir` - Directory for execution logs (e.g., "{result_dir}/{id}/log/severus_solo")
  372. #[derive(Debug)]
  373. pub struct SeverusSolo {
  374. /// Sample identifier
  375. pub id: String,
  376. /// Time point identifier (e.g., "norm" or "diag")
  377. pub time: String,
  378. /// Global pipeline configuration
  379. pub config: Config,
  380. /// Directory for log file storage
  381. pub log_dir: String,
  382. }
  383. impl InitializeSolo for SeverusSolo {
  384. /// Initializes Severus solo analysis for a sample at a specific time point.
  385. ///
  386. /// Creates necessary log directory.
  387. ///
  388. /// # Errors
  389. /// Returns an error if directory creation fails.
  390. fn initialize(id: &str, time: &str, config: &Config) -> anyhow::Result<Self> {
  391. let log_dir = format!("{}/{}/log/severus_solo", config.result_dir, id);
  392. if !Path::new(&log_dir).exists() {
  393. fs::create_dir_all(&log_dir)
  394. .context(format!("Failed to create {log_dir} directory"))?;
  395. }
  396. Ok(SeverusSolo {
  397. id: id.to_string(),
  398. time: time.to_string(),
  399. config: config.clone(),
  400. log_dir,
  401. })
  402. }
  403. }
  404. impl Run for SeverusSolo {
  405. /// Runs the Severus pipeline and filters for PASS variants.
  406. ///
  407. /// Skips steps if their output files already exist.
  408. ///
  409. /// # Errors
  410. /// Returns an error if Severus execution, filtering, or log writing fails.
  411. fn run(&mut self) -> anyhow::Result<()> {
  412. let id = &self.id;
  413. let time = &self.time;
  414. let output_vcf = &self.config.severus_solo_output_vcf(id, time);
  415. let passed_vcf = &self.config.severus_solo_passed_vcf(id, time);
  416. if !Path::new(output_vcf).exists() {
  417. // Run command if output VCF doesn't exist
  418. let mut job = SeverusJob {
  419. conda_sh: self.config.conda_sh.clone(),
  420. severus_bin: self.config.severus_bin.clone(),
  421. args: vec![
  422. "--target-bam".into(),
  423. self.config.solo_bam(id, time),
  424. "--out-dir".into(),
  425. self.config.severus_solo_output_dir(id, time),
  426. "-t".into(),
  427. self.config.severus_threads.to_string(),
  428. "--write-alignments".into(),
  429. "--use-supplementary-tag".into(),
  430. "--resolve-overlaps".into(),
  431. "--between-junction-ins".into(),
  432. "--vntr-bed".into(),
  433. self.config.vntrs_bed.clone(),
  434. ],
  435. };
  436. let report =
  437. run!(&self.config, &mut job).context("Error while running severus solo")?;
  438. let log_file = format!("{}/severus_", self.log_dir);
  439. report
  440. .save_to_file(&log_file)
  441. .context(format!("Error while writing logs into {log_file}"))?;
  442. } else {
  443. debug!("Severus output vcf already exists.");
  444. }
  445. // Keep PASS
  446. if !Path::new(passed_vcf).exists() && Path::new(output_vcf).exists() {
  447. let mut keep =
  448. BcftoolsKeepPassPrecise::from_config(&self.config, output_vcf, passed_vcf);
  449. let report = run!(&self.config, &mut keep).context(format!(
  450. "Error while running bcftools keep PASS for {output_vcf}"
  451. ))?;
  452. let log_file = format!("{}/bcftools_pass", self.log_dir);
  453. report
  454. .save_to_file(&log_file)
  455. .context(format!("Error while writing logs into {log_file}"))?;
  456. } else {
  457. debug!("VCF PASSED already exists for Severus.");
  458. }
  459. Ok(())
  460. }
  461. }
  462. #[cfg(test)]
  463. mod tests {
  464. use super::*;
  465. use crate::helpers::test_init;
  466. #[test]
  467. fn severus_version() -> anyhow::Result<()> {
  468. test_init();
  469. let vl = Severus::version(&Config::default())?;
  470. info!("Severus version: {vl}");
  471. Ok(())
  472. }
  473. #[test]
  474. fn severus_run() -> anyhow::Result<()> {
  475. test_init();
  476. let config = Config::default();
  477. // let clairs = ClairS::initialize("34528", &config)?;
  478. // info!("{clairs}");
  479. let mut caller = Severus::initialize("DUMCO", &config)?;
  480. caller.run()?;
  481. Ok(())
  482. }
  483. }