|
@@ -10,7 +10,7 @@ use log::{debug, error, info, warn};
|
|
|
|
|
|
|
|
use crate::{
|
|
use crate::{
|
|
|
annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
|
|
annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
|
|
|
- collection::{vcf::Vcf, Initialize, InitializeSolo, ShouldRun},
|
|
|
|
|
|
|
+ collection::{vcf::Vcf, Initialize, InitializeSolo, ShouldRun, Version},
|
|
|
commands::bcftools::{bcftools_concat, bcftools_keep_pass, BcftoolsConfig},
|
|
commands::bcftools::{bcftools_concat, bcftools_keep_pass, BcftoolsConfig},
|
|
|
config::Config,
|
|
config::Config,
|
|
|
helpers::{is_file_older, remove_dir_if_exists},
|
|
helpers::{is_file_older, remove_dir_if_exists},
|
|
@@ -78,7 +78,7 @@ impl ShouldRun for NanomonSV {
|
|
|
let mrd_info_vcf = format!("{mrd_out_prefix}.bp_info.sorted.bed.gz");
|
|
let mrd_info_vcf = format!("{mrd_out_prefix}.bp_info.sorted.bed.gz");
|
|
|
|
|
|
|
|
let result = [
|
|
let result = [
|
|
|
- is_file_older(&mrd_info_vcf , &self.config.normal_bam(&self.id), true).unwrap_or(true),
|
|
|
|
|
|
|
+ is_file_older(&mrd_info_vcf, &self.config.normal_bam(&self.id), true).unwrap_or(true),
|
|
|
is_file_older(&passed_vcf, &self.config.tumoral_bam(&self.id), true).unwrap_or(true),
|
|
is_file_older(&passed_vcf, &self.config.tumoral_bam(&self.id), true).unwrap_or(true),
|
|
|
]
|
|
]
|
|
|
.iter()
|
|
.iter()
|
|
@@ -131,7 +131,7 @@ impl Run for NanomonSV {
|
|
|
let mrd_result_vcf = format!("{mrd_out_prefix}.nanomonsv.result.vcf");
|
|
let mrd_result_vcf = format!("{mrd_out_prefix}.nanomonsv.result.vcf");
|
|
|
|
|
|
|
|
if !Path::new(&mrd_result_vcf).exists() {
|
|
if !Path::new(&mrd_result_vcf).exists() {
|
|
|
- info!("Nanomonsv get from normal bam: {}.", mrd_bam);
|
|
|
|
|
|
|
+ info!("Nanomonsv get from normal bam: {mrd_bam}.");
|
|
|
let report = nanomonsv_get(&mrd_bam, &mrd_out_prefix, None, None, &self.config)
|
|
let report = nanomonsv_get(&mrd_bam, &mrd_out_prefix, None, None, &self.config)
|
|
|
.context(format!(
|
|
.context(format!(
|
|
|
"Error while running NanomonSV get for {mrd_result_vcf}"
|
|
"Error while running NanomonSV get for {mrd_result_vcf}"
|
|
@@ -145,7 +145,7 @@ impl Run for NanomonSV {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
if !Path::new(&diag_result_vcf).exists() {
|
|
if !Path::new(&diag_result_vcf).exists() {
|
|
|
- info!("NanomonSV get from tumoral bam: {}.", diag_bam);
|
|
|
|
|
|
|
+ info!("NanomonSV get from tumoral bam: {diag_bam}.");
|
|
|
let report = nanomonsv_get(
|
|
let report = nanomonsv_get(
|
|
|
&diag_bam,
|
|
&diag_bam,
|
|
|
&diag_out_prefix,
|
|
&diag_out_prefix,
|
|
@@ -167,7 +167,7 @@ impl Run for NanomonSV {
|
|
|
if !Path::new(&vcf_passed).exists() {
|
|
if !Path::new(&vcf_passed).exists() {
|
|
|
let report =
|
|
let report =
|
|
|
bcftools_keep_pass(&diag_result_vcf, &vcf_passed, BcftoolsConfig::default())
|
|
bcftools_keep_pass(&diag_result_vcf, &vcf_passed, BcftoolsConfig::default())
|
|
|
- .context(format!("Can't index {}", vcf_passed))?;
|
|
|
|
|
|
|
+ .context(format!("Can't index {vcf_passed}"))?;
|
|
|
report
|
|
report
|
|
|
.save_to_file(&format!("{}/bcftools_pass_", self.log_dir))
|
|
.save_to_file(&format!("{}/bcftools_pass_", self.log_dir))
|
|
|
.context("Failed to save report")?;
|
|
.context("Failed to save report")?;
|
|
@@ -190,6 +190,34 @@ impl CallerCat for NanomonSV {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+impl Version for NanomonSV {
|
|
|
|
|
+ /// Retrieves the NanomonSV version by running `nanomonsv --version`.
|
|
|
|
|
+ ///
|
|
|
|
|
+ /// # Errors
|
|
|
|
|
+ /// Returns an error if command execution fails or "Version " not found in output.
|
|
|
|
|
+ fn version(config: &Config) -> anyhow::Result<String> {
|
|
|
|
|
+ let args = ["--version"];
|
|
|
|
|
+ let mut cmd_run = CommandRun::new(&config.nanomonsv_bin, &args);
|
|
|
|
|
+
|
|
|
|
|
+ let report = run_wait(&mut cmd_run).context(format!(
|
|
|
|
|
+ "Error while running `NanomonSV {}`",
|
|
|
|
|
+ args.join(" ")
|
|
|
|
|
+ ))?;
|
|
|
|
|
+ let log = report.log;
|
|
|
|
|
+ let start = log
|
|
|
|
|
+ .find("stdout: nanomonsv ")
|
|
|
|
|
+ .context("Failed to find 'stdout: nanomonsv ' in the log")?;
|
|
|
|
|
+ let start_index = start + "stdout: nanomonsv ".len();
|
|
|
|
|
+ let end = log[start_index..]
|
|
|
|
|
+ .find('\n')
|
|
|
|
|
+ .context("Failed to find newline after 'stdout: nanomonsv '")?;
|
|
|
|
|
+ Ok(log[start_index..start_index + end]
|
|
|
|
|
+ .to_string()
|
|
|
|
|
+ .trim()
|
|
|
|
|
+ .to_string())
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
impl Variants for NanomonSV {
|
|
impl Variants for NanomonSV {
|
|
|
/// Loads and annotates the variants from the NanomonSV PASS VCF.
|
|
/// Loads and annotates the variants from the NanomonSV PASS VCF.
|
|
|
///
|
|
///
|
|
@@ -201,7 +229,7 @@ impl Variants for NanomonSV {
|
|
|
let add = vec![caller.clone()];
|
|
let add = vec![caller.clone()];
|
|
|
let vcf_passed = self.config.nanomonsv_passed_vcf(&self.id);
|
|
let vcf_passed = self.config.nanomonsv_passed_vcf(&self.id);
|
|
|
|
|
|
|
|
- info!("Loading variants from {}: {}", caller, vcf_passed);
|
|
|
|
|
|
|
+ info!("Loading variants from {caller}: {vcf_passed}");
|
|
|
|
|
|
|
|
let variants = read_vcf(&vcf_passed)
|
|
let variants = read_vcf(&vcf_passed)
|
|
|
.map_err(|e| anyhow::anyhow!("Failed to read NanomonSV VCF {}.\n{e}", vcf_passed))?;
|
|
.map_err(|e| anyhow::anyhow!("Failed to read NanomonSV VCF {}.\n{e}", vcf_passed))?;
|
|
@@ -221,24 +249,40 @@ impl Variants for NanomonSV {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
impl Label for NanomonSV {
|
|
impl Label for NanomonSV {
|
|
|
|
|
+ /// Returns the string label for this caller.
|
|
|
fn label(&self) -> String {
|
|
fn label(&self) -> String {
|
|
|
self.caller_cat().to_string()
|
|
self.caller_cat().to_string()
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-/// SOLO
|
|
|
|
|
|
|
+/// NanomonSV caller in solo (single-sample) mode.
|
|
|
|
|
+///
|
|
|
|
|
+/// Processes a single BAM file to detect structural variants without a matched control.
|
|
|
#[derive(Debug)]
|
|
#[derive(Debug)]
|
|
|
pub struct NanomonSVSolo {
|
|
pub struct NanomonSVSolo {
|
|
|
|
|
+ /// Sample identifier
|
|
|
pub id: String,
|
|
pub id: String,
|
|
|
|
|
+ /// Path to input BAM file
|
|
|
pub bam: String,
|
|
pub bam: String,
|
|
|
|
|
+ /// Time point identifier (e.g., "normal" or "tumor")
|
|
|
pub time_point: String,
|
|
pub time_point: String,
|
|
|
|
|
+ /// Output directory for NanomonSV results
|
|
|
pub out_dir: String,
|
|
pub out_dir: String,
|
|
|
|
|
+ /// Directory for log files
|
|
|
pub log_dir: String,
|
|
pub log_dir: String,
|
|
|
|
|
+ /// Path to PASS-filtered VCF output
|
|
|
pub vcf_passed: String,
|
|
pub vcf_passed: String,
|
|
|
|
|
+ /// Pipeline configuration
|
|
|
pub config: Config,
|
|
pub config: Config,
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
impl InitializeSolo for NanomonSVSolo {
|
|
impl InitializeSolo for NanomonSVSolo {
|
|
|
|
|
+ /// Initializes NanomonSV solo analysis for a sample at a specific time point.
|
|
|
|
|
+ ///
|
|
|
|
|
+ /// Creates necessary output and log directories.
|
|
|
|
|
+ ///
|
|
|
|
|
+ /// # Errors
|
|
|
|
|
+ /// Returns an error if directory creation fails.
|
|
|
fn initialize(id: &str, time: &str, config: Config) -> anyhow::Result<Self> {
|
|
fn initialize(id: &str, time: &str, config: Config) -> anyhow::Result<Self> {
|
|
|
let id = id.to_string();
|
|
let id = id.to_string();
|
|
|
info!("Initialize Nanomonsv solo for {id} {time}.");
|
|
info!("Initialize Nanomonsv solo for {id} {time}.");
|
|
@@ -269,6 +313,12 @@ impl InitializeSolo for NanomonSVSolo {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
impl Run for NanomonSVSolo {
|
|
impl Run for NanomonSVSolo {
|
|
|
|
|
+ /// Runs the NanomonSV solo pipeline: parse, get, and filter steps.
|
|
|
|
|
+ ///
|
|
|
|
|
+ /// Skips steps if their output files already exist.
|
|
|
|
|
+ ///
|
|
|
|
|
+ /// # Errors
|
|
|
|
|
+ /// Returns an error if any pipeline step fails or log files cannot be written.
|
|
|
fn run(&mut self) -> anyhow::Result<()> {
|
|
fn run(&mut self) -> anyhow::Result<()> {
|
|
|
// Parse
|
|
// Parse
|
|
|
info!("Nanomonsv Parse");
|
|
info!("Nanomonsv Parse");
|
|
@@ -318,6 +368,7 @@ impl Run for NanomonSVSolo {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
impl CallerCat for NanomonSVSolo {
|
|
impl CallerCat for NanomonSVSolo {
|
|
|
|
|
+ /// Returns the caller annotation based on whether this is a normal or tumor sample.
|
|
|
fn caller_cat(&self) -> Annotation {
|
|
fn caller_cat(&self) -> Annotation {
|
|
|
let Config {
|
|
let Config {
|
|
|
normal_name,
|
|
normal_name,
|
|
@@ -335,12 +386,17 @@ impl CallerCat for NanomonSVSolo {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
impl Label for NanomonSVSolo {
|
|
impl Label for NanomonSVSolo {
|
|
|
|
|
+ /// Returns the string label for this caller.
|
|
|
fn label(&self) -> String {
|
|
fn label(&self) -> String {
|
|
|
self.caller_cat().to_string()
|
|
self.caller_cat().to_string()
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
impl Variants for NanomonSVSolo {
|
|
impl Variants for NanomonSVSolo {
|
|
|
|
|
+ /// Loads variants from the PASS-filtered VCF and adds caller annotations.
|
|
|
|
|
+ ///
|
|
|
|
|
+ /// # Errors
|
|
|
|
|
+ /// Returns an error if VCF reading fails.
|
|
|
fn variants(&self, annotations: &Annotations) -> anyhow::Result<VariantCollection> {
|
|
fn variants(&self, annotations: &Annotations) -> anyhow::Result<VariantCollection> {
|
|
|
let caller = self.caller_cat();
|
|
let caller = self.caller_cat();
|
|
|
let add = vec![caller.clone()];
|
|
let add = vec![caller.clone()];
|
|
@@ -362,7 +418,10 @@ impl Variants for NanomonSVSolo {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-// Helper functions
|
|
|
|
|
|
|
+/// Runs NanomonSV parse step to extract SV breakpoint information from a BAM file.
|
|
|
|
|
+///
|
|
|
|
|
+/// # Errors
|
|
|
|
|
+/// Returns an error if command execution fails.
|
|
|
pub fn nanomonsv_parse(bam: &str, out_prefix: &str, config: &Config) -> anyhow::Result<RunReport> {
|
|
pub fn nanomonsv_parse(bam: &str, out_prefix: &str, config: &Config) -> anyhow::Result<RunReport> {
|
|
|
let args = vec![
|
|
let args = vec![
|
|
|
"parse",
|
|
"parse",
|
|
@@ -398,8 +457,8 @@ fn somatic_parse(
|
|
|
config: &Config,
|
|
config: &Config,
|
|
|
log_dir: &str,
|
|
log_dir: &str,
|
|
|
) -> anyhow::Result<()> {
|
|
) -> anyhow::Result<()> {
|
|
|
- let diag_out_prefix = format!("{}/{}_diag", diag_out_dir, id);
|
|
|
|
|
- let mrd_out_prefix = format!("{}/{}_mrd", mrd_out_dir, id);
|
|
|
|
|
|
|
+ let diag_out_prefix = format!("{diag_out_dir}/{id}_diag");
|
|
|
|
|
+ let mrd_out_prefix = format!("{mrd_out_dir}/{id}_mrd");
|
|
|
|
|
|
|
|
let diag_info_vcf = format!("{diag_out_prefix}.bp_info.sorted.bed.gz");
|
|
let diag_info_vcf = format!("{diag_out_prefix}.bp_info.sorted.bed.gz");
|
|
|
let mrd_info_vcf = format!("{mrd_out_prefix}.bp_info.sorted.bed.gz");
|
|
let mrd_info_vcf = format!("{mrd_out_prefix}.bp_info.sorted.bed.gz");
|
|
@@ -419,7 +478,12 @@ fn somatic_parse(
|
|
|
Ok(())
|
|
Ok(())
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-// Helper function to spawn a thread for parsing
|
|
|
|
|
|
|
+/// Spawns a thread to run NanomonSV parse step.
|
|
|
|
|
+///
|
|
|
|
|
+/// Returns a dummy thread if output already exists.
|
|
|
|
|
+///
|
|
|
|
|
+/// # Errors
|
|
|
|
|
+/// Returns an error if parsing or log writing fails.
|
|
|
fn spawn_parse_thread(
|
|
fn spawn_parse_thread(
|
|
|
bam: &str,
|
|
bam: &str,
|
|
|
out_prefix: &str,
|
|
out_prefix: &str,
|
|
@@ -433,25 +497,31 @@ fn spawn_parse_thread(
|
|
|
let config = config.clone();
|
|
let config = config.clone();
|
|
|
let log_dir = log_dir.to_string();
|
|
let log_dir = log_dir.to_string();
|
|
|
|
|
|
|
|
- info!("Nanomonsv parsing started for BAM: {}", bam);
|
|
|
|
|
|
|
+ info!("Nanomonsv parsing started for BAM: {bam}");
|
|
|
let handle = thread::spawn(move || {
|
|
let handle = thread::spawn(move || {
|
|
|
let report = nanomonsv_parse(&bam, &out_prefix, &config)
|
|
let report = nanomonsv_parse(&bam, &out_prefix, &config)
|
|
|
- .with_context(|| format!("Failed to parse BAM: {}", bam))?;
|
|
|
|
|
|
|
+ .with_context(|| format!("Failed to parse BAM: {bam}"))?;
|
|
|
|
|
|
|
|
report
|
|
report
|
|
|
- .save_to_file(&format!("{log_dir}/nanomonsv_parse_{}_", bam))
|
|
|
|
|
- .with_context(|| format!("Failed to save report for BAM: {}", bam))?;
|
|
|
|
|
|
|
+ .save_to_file(&format!("{log_dir}/nanomonsv_parse_{bam}_"))
|
|
|
|
|
+ .with_context(|| format!("Failed to save report for BAM: {bam}"))?;
|
|
|
|
|
|
|
|
Ok(())
|
|
Ok(())
|
|
|
});
|
|
});
|
|
|
|
|
|
|
|
Ok(handle)
|
|
Ok(handle)
|
|
|
} else {
|
|
} else {
|
|
|
- debug!("Nanomonsv parse results already exist: {}", info_vcf);
|
|
|
|
|
|
|
+ debug!("Nanomonsv parse results already exist: {info_vcf}");
|
|
|
Ok(thread::spawn(|| Ok(()))) // Return a dummy thread that does nothing
|
|
Ok(thread::spawn(|| Ok(()))) // Return a dummy thread that does nothing
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+/// Runs NanomonSV get step to call structural variants.
|
|
|
|
|
+///
|
|
|
|
|
+/// Optionally uses a control sample for matched analysis.
|
|
|
|
|
+///
|
|
|
|
|
+/// # Errors
|
|
|
|
|
+/// Returns an error if command execution fails.
|
|
|
pub fn nanomonsv_get(
|
|
pub fn nanomonsv_get(
|
|
|
bam: &str,
|
|
bam: &str,
|
|
|
out_prefix: &str,
|
|
out_prefix: &str,
|
|
@@ -483,6 +553,12 @@ pub fn nanomonsv_get(
|
|
|
Ok(res)
|
|
Ok(res)
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+/// Creates a panel of normals (PON) from MRD NanomonSV results.
|
|
|
|
|
+///
|
|
|
|
|
+/// Searches for MRD directories, filters PASS variants, and concatenates them.
|
|
|
|
|
+///
|
|
|
|
|
+/// # Errors
|
|
|
|
|
+/// Returns an error if directory traversal, filtering, or concatenation fails.
|
|
|
pub fn nanomonsv_create_pon(config: &Config, pon_path: &str) -> anyhow::Result<()> {
|
|
pub fn nanomonsv_create_pon(config: &Config, pon_path: &str) -> anyhow::Result<()> {
|
|
|
let mut passed_mrd = Vec::new();
|
|
let mut passed_mrd = Vec::new();
|
|
|
for mrd_dir in find_nanomonsv_dirs(&PathBuf::from(&config.result_dir), "mrd", 0, 3) {
|
|
for mrd_dir in find_nanomonsv_dirs(&PathBuf::from(&config.result_dir), "mrd", 0, 3) {
|
|
@@ -546,6 +622,13 @@ pub fn nanomonsv_create_pon(config: &Config, pon_path: &str) -> anyhow::Result<(
|
|
|
Ok(())
|
|
Ok(())
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+/// Recursively finds NanomonSV output directories for a specific time point.
|
|
|
|
|
+///
|
|
|
|
|
+/// # Arguments
|
|
|
|
|
+/// * `root` - Starting directory for search
|
|
|
|
|
+/// * `time_point` - Time point identifier (e.g., "mrd")
|
|
|
|
|
+/// * `depth` - Current recursion depth
|
|
|
|
|
+/// * `max_depth` - Maximum recursion depth
|
|
|
pub fn find_nanomonsv_dirs(
|
|
pub fn find_nanomonsv_dirs(
|
|
|
root: &Path,
|
|
root: &Path,
|
|
|
time_point: &str,
|
|
time_point: &str,
|
|
@@ -568,7 +651,7 @@ pub fn find_nanomonsv_dirs(
|
|
|
if entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false)
|
|
if entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false)
|
|
|
&& path
|
|
&& path
|
|
|
.to_string_lossy()
|
|
.to_string_lossy()
|
|
|
- .contains(&format!("{}/nanomonsv", time_point))
|
|
|
|
|
|
|
+ .contains(&format!("{time_point}/nanomonsv"))
|
|
|
{
|
|
{
|
|
|
Some(path)
|
|
Some(path)
|
|
|
} else {
|
|
} else {
|
|
@@ -587,6 +670,14 @@ pub fn find_nanomonsv_dirs(
|
|
|
result
|
|
result
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+/// Replaces a filename suffix in a path.
|
|
|
|
|
+///
|
|
|
|
|
+/// # Example
|
|
|
|
|
+/// ```
|
|
|
|
|
+/// let path = Path::new("/data/sample_mrd.nanomonsv.result.vcf");
|
|
|
|
|
+/// let new_path = replace_filename_suffix(path, "_mrd.nanomonsv.result.vcf", "_mrd_PASSED.vcf.gz");
|
|
|
|
|
+/// // new_path: /data/sample_mrd_PASSED.vcf.gz
|
|
|
|
|
+/// ```
|
|
|
pub fn replace_filename_suffix(path: &Path, from: &str, to: &str) -> PathBuf {
|
|
pub fn replace_filename_suffix(path: &Path, from: &str, to: &str) -> PathBuf {
|
|
|
let file_name = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
|
|
let file_name = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
|
|
|
|
|
|