|
|
@@ -1,6 +1,73 @@
|
|
|
-//! NanomonSV structural variant caller orchestration (paired and solo).
|
|
|
+//! # NanomonSV Structural Variant Caller Orchestration
|
|
|
//!
|
|
|
-//! Runs parse/get and PASS filtering through the shared runner interfaces (local/Slurm) using the global `Config`.
|
|
|
+//! This module provides wrappers for [NanomonSV](https://github.com/friend1ws/nanomonsv),
|
|
|
+//! a structural variant (SV) caller optimized for long-read sequencing data.
|
|
|
+//!
|
|
|
+//! ## Overview
|
|
|
+//!
|
|
|
+//! NanomonSV detects structural variants including:
|
|
|
+//! - Deletions, insertions, duplications
|
|
|
+//! - Inversions and translocations
|
|
|
+//! - Complex rearrangements
|
|
|
+//!
|
|
|
+//! ## Execution Modes
|
|
|
+//!
|
|
|
+//! - **Paired (somatic)** - Compares tumor vs normal BAMs to identify somatic SVs
|
|
|
+//! - **Solo** - Single-sample SV calling without matched control
|
|
|
+//!
|
|
|
+//! Both modes support local and Slurm execution via the `Config.slurm_runner` flag.
|
|
|
+//!
|
|
|
+//! ## Output Files
|
|
|
+//!
|
|
|
+//! Paired mode PASS-filtered VCF:
|
|
|
+//! ```text
|
|
|
+//! {result_dir}/{id}/nanomonsv/{id}_diag_nanomonsv_PASSED.vcf.gz
|
|
|
+//! ```
|
|
|
+//!
|
|
|
+//! Solo mode PASS-filtered VCF:
|
|
|
+//! ```text
|
|
|
+//! {result_dir}/{id}/nanomonsv_solo/{id}_{time_point}_nanomonsv_PASSED.vcf.gz
|
|
|
+//! ```
|
|
|
+//!
|
|
|
+//! ## Usage
|
|
|
+//!
|
|
|
+//! ### Paired (Tumor-Normal) Mode
|
|
|
+//!
|
|
|
+//! ```ignore
|
|
|
+//! use pandora_lib_promethion::callers::nanomonsv::NanomonSV;
|
|
|
+//! use pandora_lib_promethion::config::Config;
|
|
|
+//! use pandora_lib_promethion::pipes::Initialize;
|
|
|
+//! use pandora_lib_promethion::runners::Run;
|
|
|
+//!
|
|
|
+//! let config = Config::default();
|
|
|
+//! let mut caller = NanomonSV::initialize("sample_001", &config)?;
|
|
|
+//!
|
|
|
+//! if caller.should_run() {
|
|
|
+//! caller.run()?;
|
|
|
+//! }
|
|
|
+//!
|
|
|
+//! // Load variants
|
|
|
+//! let variants = caller.variants(&annotations)?;
|
|
|
+//! println!("Found {} somatic SVs", variants.variants.len());
|
|
|
+//! # Ok::<(), anyhow::Error>(())
|
|
|
+//! ```
|
|
|
+//!
|
|
|
+//! ### Solo Mode
|
|
|
+//!
|
|
|
+//! ```ignore
|
|
|
+//! use pandora_lib_promethion::callers::nanomonsv::NanomonSVSolo;
|
|
|
+//! use pandora_lib_promethion::pipes::InitializeSolo;
|
|
|
+//!
|
|
|
+//! let config = Config::default();
|
|
|
+//! let mut caller = NanomonSVSolo::initialize("sample_001", "norm", &config)?;
|
|
|
+//! caller.run()?;
|
|
|
+//! # Ok::<(), anyhow::Error>(())
|
|
|
+//! ```
|
|
|
+//!
|
|
|
+//! ## References
|
|
|
+//!
|
|
|
+//! - [NanomonSV GitHub](https://github.com/friend1ws/nanomonsv)
|
|
|
+//! - [NanomonSV Paper](https://doi.org/10.1186/s13059-020-02175-y)
|
|
|
use rayon::prelude::*;
|
|
|
use std::{
|
|
|
fs::{self},
|
|
|
@@ -30,17 +97,30 @@ use crate::{
|
|
|
},
|
|
|
};
|
|
|
|
|
|
-/// Represents the NanomonSV runner, responsible for structural variant calling
|
|
|
-/// from diagnostic and normal BAMs using the NanomonSV tool. This runner initialize,
|
|
|
-/// run, classify, and extract variants from VCF.
|
|
|
+/// NanomonSV paired (tumor-normal) structural variant caller.
|
|
|
+///
|
|
|
+/// Executes the NanomonSV pipeline for somatic SV detection by comparing
|
|
|
+/// tumor and normal BAM files.
|
|
|
+///
|
|
|
+/// # Fields
|
|
|
+///
|
|
|
+/// - `id` - Sample identifier (e.g., "34528")
|
|
|
+/// - `log_dir` - Directory for execution logs (e.g., "{result_dir}/{id}/log/nanomonsv")
|
|
|
+/// - `config` - Global pipeline configuration
|
|
|
+/// - `job_args` - Internal command-line arguments passed to nanomonsv binary
|
|
|
+/// - `threads` - Number of CPU threads for parallel processing (from `config.nanomonsv_threads`)
|
|
|
#[derive(Debug)]
|
|
|
pub struct NanomonSV {
|
|
|
+ /// Sample identifier
|
|
|
pub id: String,
|
|
|
+ /// Directory for log file storage
|
|
|
pub log_dir: String,
|
|
|
+ /// Global pipeline configuration
|
|
|
pub config: Config,
|
|
|
|
|
|
- // Command args and threads used by the shared runner.
|
|
|
+ /// Command-line arguments for nanomonsv executable
|
|
|
job_args: Vec<String>,
|
|
|
+ /// Number of threads for parallel execution
|
|
|
threads: u8,
|
|
|
}
|
|
|
|
|
|
@@ -293,16 +373,27 @@ impl Label for NanomonSV {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-/// NanomonSV caller in solo (single-sample) mode.
|
|
|
+/// NanomonSV solo (single-sample) structural variant caller.
|
|
|
///
|
|
|
/// Processes a single BAM file to detect structural variants without a matched control.
|
|
|
+/// Useful for germline SV detection or when no matched normal is available.
|
|
|
+///
|
|
|
+/// # Fields
|
|
|
+///
|
|
|
+/// - `id` - Sample identifier (e.g., "34528")
|
|
|
+/// - `bam` - Path to input BAM file (e.g., "{bam_dir}/{id}_{time_point}.bam")
|
|
|
+/// - `time_point` - Time point label: typically `config.normal_name` ("norm") or `config.tumoral_name` ("diag")
|
|
|
+/// - `out_dir` - Output directory (e.g., "{result_dir}/{id}/nanomonsv_solo/{time_point}")
|
|
|
+/// - `log_dir` - Log directory (e.g., "{result_dir}/{id}/log/nanomonsv_solo")
|
|
|
+/// - `vcf_passed` - PASS-filtered output VCF path
|
|
|
+/// - `config` - Global pipeline configuration
|
|
|
#[derive(Debug)]
|
|
|
pub struct NanomonSVSolo {
|
|
|
/// Sample identifier
|
|
|
pub id: String,
|
|
|
/// Path to input BAM file
|
|
|
pub bam: String,
|
|
|
- /// Time point identifier (e.g., "normal" or "tumor")
|
|
|
+ /// Time point identifier (e.g., "norm" or "diag")
|
|
|
pub time_point: String,
|
|
|
/// Output directory for NanomonSV results
|
|
|
pub out_dir: String,
|
|
|
@@ -323,7 +414,18 @@ impl InitializeSolo for NanomonSVSolo {
|
|
|
/// Returns an error if directory creation fails.
|
|
|
fn initialize(id: &str, time: &str, config: &Config) -> anyhow::Result<Self> {
|
|
|
let id = id.to_string();
|
|
|
- info!("Initialize Nanomonsv solo for {id} {time}.");
|
|
|
+ let time_point = time.to_string();
|
|
|
+
|
|
|
+ // Validate time_point matches configured names
|
|
|
+ anyhow::ensure!(
|
|
|
+ time_point == config.normal_name || time_point == config.tumoral_name,
|
|
|
+ "Invalid time_point '{}': must be either '{}' (normal) or '{}' (tumor)",
|
|
|
+ time_point,
|
|
|
+ config.normal_name,
|
|
|
+ config.tumoral_name
|
|
|
+ );
|
|
|
+
|
|
|
+ info!("Initialize Nanomonsv solo for {id} {time_point}.");
|
|
|
let log_dir = format!("{}/{}/log/nanomonsv_solo", config.result_dir, &id);
|
|
|
|
|
|
if !Path::new(&log_dir).exists() {
|
|
|
@@ -331,17 +433,17 @@ impl InitializeSolo for NanomonSVSolo {
|
|
|
.context(format!("Failed to create {log_dir} directory"))?;
|
|
|
}
|
|
|
|
|
|
- let out_dir = config.nanomonsv_solo_output_dir(&id, time);
|
|
|
+ let out_dir = config.nanomonsv_solo_output_dir(&id, &time_point);
|
|
|
fs::create_dir_all(&out_dir)?;
|
|
|
|
|
|
- let bam = config.solo_bam(&id, time);
|
|
|
+ let bam = config.solo_bam(&id, &time_point);
|
|
|
|
|
|
- let vcf_passed = config.nanomonsv_solo_passed_vcf(&id, time);
|
|
|
+ let vcf_passed = config.nanomonsv_solo_passed_vcf(&id, &time_point);
|
|
|
|
|
|
Ok(Self {
|
|
|
id,
|
|
|
bam,
|
|
|
- time_point: time.to_string(),
|
|
|
+ time_point,
|
|
|
out_dir,
|
|
|
log_dir,
|
|
|
vcf_passed,
|
|
|
@@ -407,6 +509,11 @@ impl Run for NanomonSVSolo {
|
|
|
|
|
|
impl CallerCat for NanomonSVSolo {
|
|
|
/// Returns the caller annotation based on whether this is a normal or tumor sample.
|
|
|
+ ///
|
|
|
+ /// # Safety
|
|
|
+ ///
|
|
|
+ /// The time_point is validated during initialization, so this can never fail.
|
|
|
+ /// If it does, it indicates a serious logic error in the code.
|
|
|
fn caller_cat(&self) -> Annotation {
|
|
|
let Config {
|
|
|
normal_name,
|
|
|
@@ -418,7 +525,12 @@ impl CallerCat for NanomonSVSolo {
|
|
|
} else if *tumoral_name == self.time_point {
|
|
|
Annotation::Callers(Caller::NanomonSVSolo, Sample::SoloTumor)
|
|
|
} else {
|
|
|
- panic!("Error in time_point name: {}", self.time_point);
|
|
|
+ // SAFETY: time_point is validated in initialize() to be either normal_name or tumoral_name.
|
|
|
+ // If we reach here, it's a logic error in the code, not a user error.
|
|
|
+ unreachable!(
|
|
|
+ "Invalid time_point '{}': expected '{}' or '{}'. This should have been caught during initialization.",
|
|
|
+ self.time_point, normal_name, tumoral_name
|
|
|
+ )
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
@@ -657,7 +769,7 @@ pub fn nanomonsv_create_pon(config: &Config, pon_path: &str) -> anyhow::Result<(
|
|
|
passed_mrd.push(output);
|
|
|
}
|
|
|
}
|
|
|
- (Some(_), Some(p), None) => warn!("Prossing csi for {}", p.display()),
|
|
|
+ (Some(_), Some(p), None) => warn!("Processing csi for {}", p.display()),
|
|
|
(Some(_), Some(p), Some(_)) => passed_mrd.push(p),
|
|
|
_ => {} // All files found
|
|
|
}
|