Explorar o código

upate version checker

Thomas hai 1 mes
pai
achega
37caffae61
Modificáronse 10 ficheiros con 446 adicións e 75 borrados
  1. 5 4
      Cargo.lock
  2. 1 0
      Cargo.toml
  3. 46 10
      src/callers/clairs.rs
  4. 49 4
      src/callers/deep_somatic.rs
  5. 43 3
      src/callers/deep_variant.rs
  6. 108 17
      src/callers/nanomonsv.rs
  7. 124 7
      src/callers/savana.rs
  8. 46 24
      src/callers/severus.rs
  9. 2 0
      src/config.rs
  10. 22 6
      src/lib.rs

+ 5 - 4
Cargo.lock

@@ -2984,6 +2984,7 @@ dependencies = [
  "petgraph 0.8.2",
  "rand 0.9.2",
  "rayon",
+ "regex",
  "rusqlite",
  "rust-htslib 0.50.0",
  "semver 1.0.26",
@@ -3539,9 +3540,9 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.11.1"
+version = "1.12.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
+checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -3551,9 +3552,9 @@ dependencies = [
 
 [[package]]
 name = "regex-automata"
-version = "0.4.9"
+version = "0.4.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
+checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
 dependencies = [
  "aho-corasick",
  "memchr",

+ 1 - 0
Cargo.toml

@@ -46,6 +46,7 @@ ordered-float = { version = "5.0.0", features = ["serde"] }
 bitcode = "0.6.5"
 semver = "1.0.26"
 petgraph = "0.8.1"
+regex = "1.12.2"
 
 [profile.dev]
 opt-level = 0

+ 46 - 10
src/callers/clairs.rs

@@ -1,6 +1,6 @@
 use crate::{
     annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
-    collection::{vcf::Vcf, Initialize, ShouldRun},
+    collection::{vcf::Vcf, Initialize, ShouldRun, Version},
     commands::bcftools::{bcftools_concat, bcftools_keep_pass, BcftoolsConfig},
     config::Config,
     helpers::{is_file_older, remove_dir_if_exists, temp_file_path},
@@ -11,10 +11,11 @@ use crate::{
         variant_collection::VariantCollection,
     },
 };
-use anyhow::Ok;
+use anyhow::{Context, Ok};
 use log::{debug, info, warn};
 use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
-use std::{fs, path::Path};
+use regex::Regex;
+use std::{fs, path::Path, process::{Command, Stdio}};
 
 /// A pipeline runner for executing ClairS on paired tumor and normal samples.
 ///
@@ -115,8 +116,8 @@ impl Run for ClairS {
                 "-v",
                 "/data:/data",
                 "-v",
-                &format!("{}:{}", output_dir, output_dir),
-                "hkubal/clairs:latest",
+                &format!("{output_dir}:{output_dir}"),
+                &format!("hkubal/clairs:{}", self.config.clairs_docker_tag),
                 "/opt/bin/run_clairs",
                 "-T",
                 &self.config.tumoral_bam(&self.id),
@@ -257,7 +258,7 @@ impl Variants for ClairS {
         let add = vec![caller.clone()];
         let passed_vcf = &self.config.clairs_passed_vcf(&self.id);
 
-        info!("Loading variants from {}: {}", caller, passed_vcf);
+        info!("Loading variants from {caller}: {passed_vcf}");
         let variants = read_vcf(passed_vcf)
             .map_err(|e| anyhow::anyhow!("Failed to read ClairS VCF {}.\n{e}", passed_vcf))?;
 
@@ -293,10 +294,7 @@ impl ClairS {
         let add = vec![caller.clone()];
         let clair3_germline_passed = &self.config.clairs_germline_passed_vcf(&self.id);
 
-        info!(
-            "Loading variants from {}: {}",
-            caller, clair3_germline_passed
-        );
+        info!("Loading variants from {caller}: {clair3_germline_passed}");
 
         let variants = read_vcf(clair3_germline_passed)?;
         variants.par_iter().for_each(|v| {
@@ -313,7 +311,45 @@ impl ClairS {
 }
 
 impl Label for ClairS {
+    /// Returns the string label for this caller.
     fn label(&self) -> String {
         self.caller_cat().to_string()
     }
 }
+
+impl Version for ClairS {
+    /// Retrieves the ClairS version by running `/opt/bin/run_clairs --version` in its docker environment.
+    ///
+    /// # Errors
+    /// Returns an error if command execution fails or "Version " not found in output.
+    fn version(config: &Config) -> anyhow::Result<String> {
+        let out = Command::new("docker")
+            .args([
+                "run",
+                "--rm",
+                "--entrypoint",
+                "/opt/bin/run_clairs",
+                &format!("hkubal/clairs:{}", config.clairs_docker_tag),
+                "--version",
+            ])
+            .stdout(Stdio::piped())
+            .stderr(Stdio::piped())
+            .output()
+            .context("failed to spawn docker")?;
+
+        if !out.status.success() {
+            let mut log = String::from_utf8_lossy(&out.stdout).to_string();
+            log.push_str(&String::from_utf8_lossy(&out.stderr));
+            anyhow::bail!("docker run failed: {}\n{}", out.status, log);
+        }
+
+        let mut log = String::from_utf8_lossy(&out.stdout).to_string();
+        log.push_str(&String::from_utf8_lossy(&out.stderr));
+
+        let re = Regex::new(r"(?m)run_clairs\s+([^\s]+)")?;
+        let caps = re
+            .captures(&log)
+            .context("could not parse DeepSomatic version from output")?;
+        Ok(caps.get(1).unwrap().as_str().to_string())
+    }
+}

+ 49 - 4
src/callers/deep_somatic.rs

@@ -1,11 +1,17 @@
-use std::{fs, path::Path};
+use std::{
+    fs,
+    path::Path,
+    process::{Command, Stdio},
+};
 
+use anyhow::Context;
 use log::info;
 use rayon::prelude::*;
+use regex::Regex;
 
 use crate::{
     annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
-    collection::{vcf::Vcf, Initialize, ShouldRun},
+    collection::{vcf::Vcf, Initialize, ShouldRun, Version},
     commands::bcftools::{bcftools_keep_pass, BcftoolsConfig},
     config::Config,
     helpers::{is_file_older, remove_dir_if_exists},
@@ -105,7 +111,7 @@ impl Run for DeepSomatic {
                 "-v",
                 "/data:/data",
                 "-v",
-                &format!("{}:/output", output_dir),
+                &format!("{output_dir}:/output"),
                 &format!("google/deepsomatic:{}", self.config.deepsomatic_bin_version),
                 "run_deepsomatic",
                 &format!("--model_type={}", self.config.deepsomatic_model_type),
@@ -187,7 +193,7 @@ impl Variants for DeepSomatic {
         let add = vec![caller.clone()];
         let vcf_passed = self.config.deepsomatic_passed_vcf(&self.id);
 
-        info!("Loading variants from {}: {}", caller, vcf_passed);
+        info!("Loading variants from {caller}: {vcf_passed}");
         let variants = read_vcf(&vcf_passed)
             .map_err(|e| anyhow::anyhow!("Failed to read DeepSomatic VCF {}.\n{e}", vcf_passed))?;
 
@@ -204,7 +210,46 @@ impl Variants for DeepSomatic {
 }
 
 impl Label for DeepSomatic {
+    /// Returns the string label for this caller.
     fn label(&self) -> String {
         self.caller_cat().to_string()
     }
 }
+
+impl Version for DeepSomatic {
+    /// Retrieves the DeepSomatic version by running `run_deepsomatic --version` in its docker environment.
+    ///
+    /// # Errors
+    /// Returns an error if command execution fails or "Version " not found in output.
+    fn version(config: &Config) -> anyhow::Result<String> {
+        let out = Command::new("docker")
+            .args([
+                "run",
+                "--rm",
+                "--entrypoint",
+                "/opt/deepvariant/bin/deepsomatic/run_deepsomatic",
+                &format!("google/deepsomatic:{}", config.deepsomatic_bin_version),
+                "--version",
+            ])
+            .stdout(Stdio::piped())
+            .stderr(Stdio::piped())
+            .output()
+            .context("failed to spawn docker")?;
+
+        if !out.status.success() {
+            let mut log = String::from_utf8_lossy(&out.stdout).to_string();
+            log.push_str(&String::from_utf8_lossy(&out.stderr));
+            anyhow::bail!("docker run failed: {}\n{}", out.status, log);
+        }
+
+        let mut log = String::from_utf8_lossy(&out.stdout).to_string();
+        log.push_str(&String::from_utf8_lossy(&out.stderr));
+
+        // e.g. “DeepSomatic version 1.9.0”
+        let re = Regex::new(r"(?m)DeepVariant version\s+([^\s]+)")?;
+        let caps = re
+            .captures(&log)
+            .context("could not parse DeepSomatic version from output")?;
+        Ok(caps.get(1).unwrap().as_str().to_string())
+    }
+}

+ 43 - 3
src/callers/deep_variant.rs

@@ -1,11 +1,12 @@
 use anyhow::Context;
 use log::{debug, info};
 use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
-use std::{fs, path::Path};
+use regex::Regex;
+use std::{fs, path::Path, process::{Command, Stdio}};
 
 use crate::{
     annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
-    collection::{vcf::Vcf, InitializeSolo, ShouldRun},
+    collection::{vcf::Vcf, InitializeSolo, ShouldRun, Version},
     commands::bcftools::{bcftools_keep_pass, BcftoolsConfig},
     config::Config,
     helpers::{is_file_older, remove_dir_if_exists},
@@ -225,7 +226,7 @@ impl Variants for DeepVariant {
         let vcf_passed = self
             .config
             .deepvariant_solo_passed_vcf(&self.id, &self.time_point);
-        info!("Loading variants from {}: {}", caller, vcf_passed);
+        info!("Loading variants from {caller}: {vcf_passed}");
         let variants = read_vcf(&vcf_passed)
             .map_err(|e| anyhow::anyhow!("Failed to read DeepVariant VCF {}.\n{e}", vcf_passed))?;
         variants.par_iter().for_each(|v| {
@@ -246,7 +247,46 @@ impl Variants for DeepVariant {
 }
 
 impl Label for DeepVariant {
+    /// Returns the string label for this caller.
     fn label(&self) -> String {
         self.caller_cat().to_string()
     }
 }
+
+impl Version for DeepVariant {
+    /// Retrieves the DeepVariant version by running `savana --version` in its conda environment.
+    ///
+    /// # Errors
+    /// Returns an error if command execution fails or "Version " not found in output.
+    fn version(config: &Config) -> anyhow::Result<String> {
+        let out = Command::new("docker")
+            .args([
+                "run",
+                "--rm",
+                "--entrypoint",
+                "/opt/deepvariant/bin/run_deepvariant",
+                &format!("google/deepvariant:{}", config.deepvariant_bin_version),
+                "--version",
+            ])
+            .stdout(Stdio::piped())
+            .stderr(Stdio::piped())
+            .output()
+            .context("failed to spawn docker")?;
+
+        if !out.status.success() {
+            let mut log = String::from_utf8_lossy(&out.stdout).to_string();
+            log.push_str(&String::from_utf8_lossy(&out.stderr));
+            anyhow::bail!("docker run failed: {}\n{}", out.status, log);
+        }
+
+        let mut log = String::from_utf8_lossy(&out.stdout).to_string();
+        log.push_str(&String::from_utf8_lossy(&out.stderr));
+
+        // e.g. “DeepVariant version 1.9.0”
+        let re = Regex::new(r"(?m)DeepVariant version\s+([^\s]+)")?;
+        let caps = re
+            .captures(&log)
+            .context("could not parse DeepVariant version from output")?;
+        Ok(caps.get(1).unwrap().as_str().to_string())
+    }
+}

+ 108 - 17
src/callers/nanomonsv.rs

@@ -10,7 +10,7 @@ use log::{debug, error, info, warn};
 
 use crate::{
     annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
-    collection::{vcf::Vcf, Initialize, InitializeSolo, ShouldRun},
+    collection::{vcf::Vcf, Initialize, InitializeSolo, ShouldRun, Version},
     commands::bcftools::{bcftools_concat, bcftools_keep_pass, BcftoolsConfig},
     config::Config,
     helpers::{is_file_older, remove_dir_if_exists},
@@ -78,7 +78,7 @@ impl ShouldRun for NanomonSV {
         let mrd_info_vcf = format!("{mrd_out_prefix}.bp_info.sorted.bed.gz");
 
         let result = [
-            is_file_older(&mrd_info_vcf , &self.config.normal_bam(&self.id), true).unwrap_or(true),
+            is_file_older(&mrd_info_vcf, &self.config.normal_bam(&self.id), true).unwrap_or(true),
             is_file_older(&passed_vcf, &self.config.tumoral_bam(&self.id), true).unwrap_or(true),
         ]
         .iter()
@@ -131,7 +131,7 @@ impl Run for NanomonSV {
         let mrd_result_vcf = format!("{mrd_out_prefix}.nanomonsv.result.vcf");
 
         if !Path::new(&mrd_result_vcf).exists() {
-            info!("Nanomonsv get from normal bam: {}.", mrd_bam);
+            info!("Nanomonsv get from normal bam: {mrd_bam}.");
             let report = nanomonsv_get(&mrd_bam, &mrd_out_prefix, None, None, &self.config)
                 .context(format!(
                     "Error while running NanomonSV get for {mrd_result_vcf}"
@@ -145,7 +145,7 @@ impl Run for NanomonSV {
         }
 
         if !Path::new(&diag_result_vcf).exists() {
-            info!("NanomonSV get from tumoral bam: {}.", diag_bam);
+            info!("NanomonSV get from tumoral bam: {diag_bam}.");
             let report = nanomonsv_get(
                 &diag_bam,
                 &diag_out_prefix,
@@ -167,7 +167,7 @@ impl Run for NanomonSV {
         if !Path::new(&vcf_passed).exists() {
             let report =
                 bcftools_keep_pass(&diag_result_vcf, &vcf_passed, BcftoolsConfig::default())
-                    .context(format!("Can't index {}", vcf_passed))?;
+                    .context(format!("Can't index {vcf_passed}"))?;
             report
                 .save_to_file(&format!("{}/bcftools_pass_", self.log_dir))
                 .context("Failed to save report")?;
@@ -190,6 +190,34 @@ impl CallerCat for NanomonSV {
     }
 }
 
+impl Version for NanomonSV {
+    /// Retrieves the NanomonSV version by running `nanomonsv --version`.
+    ///
+    /// # Errors
+    /// Returns an error if command execution fails or "Version " not found in output.
+    fn version(config: &Config) -> anyhow::Result<String> {
+        let args = ["--version"];
+        let mut cmd_run = CommandRun::new(&config.nanomonsv_bin, &args);
+
+        let report = run_wait(&mut cmd_run).context(format!(
+            "Error while running `NanomonSV {}`",
+            args.join(" ")
+        ))?;
+        let log = report.log;
+        let start = log
+            .find("stdout: nanomonsv ")
+            .context("Failed to find 'stdout: nanomonsv ' in the log")?;
+        let start_index = start + "stdout: nanomonsv ".len();
+        let end = log[start_index..]
+            .find('\n')
+            .context("Failed to find newline after 'stdout: nanomonsv '")?;
+        Ok(log[start_index..start_index + end]
+            .to_string()
+            .trim()
+            .to_string())
+    }
+}
+
 impl Variants for NanomonSV {
     /// Loads and annotates the variants from the NanomonSV PASS VCF.
     ///
@@ -201,7 +229,7 @@ impl Variants for NanomonSV {
         let add = vec![caller.clone()];
         let vcf_passed = self.config.nanomonsv_passed_vcf(&self.id);
 
-        info!("Loading variants from {}: {}", caller, vcf_passed);
+        info!("Loading variants from {caller}: {vcf_passed}");
 
         let variants = read_vcf(&vcf_passed)
             .map_err(|e| anyhow::anyhow!("Failed to read NanomonSV VCF {}.\n{e}", vcf_passed))?;
@@ -221,24 +249,40 @@ impl Variants for NanomonSV {
 }
 
 impl Label for NanomonSV {
+    /// Returns the string label for this caller.
     fn label(&self) -> String {
         self.caller_cat().to_string()
     }
 }
 
-/// SOLO
+/// NanomonSV caller in solo (single-sample) mode.
+///
+/// Processes a single BAM file to detect structural variants without a matched control.
 #[derive(Debug)]
 pub struct NanomonSVSolo {
+    /// Sample identifier
     pub id: String,
+    /// Path to input BAM file
     pub bam: String,
+    /// Time point identifier (e.g., "normal" or "tumor")
     pub time_point: String,
+    /// Output directory for NanomonSV results
     pub out_dir: String,
+    /// Directory for log files
     pub log_dir: String,
+    /// Path to PASS-filtered VCF output
     pub vcf_passed: String,
+    /// Pipeline configuration
     pub config: Config,
 }
 
 impl InitializeSolo for NanomonSVSolo {
+    /// Initializes NanomonSV solo analysis for a sample at a specific time point.
+    ///
+    /// Creates necessary output and log directories.
+    ///
+    /// # Errors
+    /// Returns an error if directory creation fails.
     fn initialize(id: &str, time: &str, config: Config) -> anyhow::Result<Self> {
         let id = id.to_string();
         info!("Initialize Nanomonsv solo for {id} {time}.");
@@ -269,6 +313,12 @@ impl InitializeSolo for NanomonSVSolo {
 }
 
 impl Run for NanomonSVSolo {
+    /// Runs the NanomonSV solo pipeline: parse, get, and filter steps.
+    ///
+    /// Skips steps if their output files already exist.
+    ///
+    /// # Errors
+    /// Returns an error if any pipeline step fails or log files cannot be written.
     fn run(&mut self) -> anyhow::Result<()> {
         // Parse
         info!("Nanomonsv Parse");
@@ -318,6 +368,7 @@ impl Run for NanomonSVSolo {
 }
 
 impl CallerCat for NanomonSVSolo {
+    /// Returns the caller annotation based on whether this is a normal or tumor sample.
     fn caller_cat(&self) -> Annotation {
         let Config {
             normal_name,
@@ -335,12 +386,17 @@ impl CallerCat for NanomonSVSolo {
 }
 
 impl Label for NanomonSVSolo {
+    /// Returns the string label for this caller.
     fn label(&self) -> String {
         self.caller_cat().to_string()
     }
 }
 
 impl Variants for NanomonSVSolo {
+    /// Loads variants from the PASS-filtered VCF and adds caller annotations.
+    ///
+    /// # Errors
+    /// Returns an error if VCF reading fails.
     fn variants(&self, annotations: &Annotations) -> anyhow::Result<VariantCollection> {
         let caller = self.caller_cat();
         let add = vec![caller.clone()];
@@ -362,7 +418,10 @@ impl Variants for NanomonSVSolo {
     }
 }
 
-// Helper functions
+/// Runs NanomonSV parse step to extract SV breakpoint information from a BAM file.
+///
+/// # Errors
+/// Returns an error if command execution fails.
 pub fn nanomonsv_parse(bam: &str, out_prefix: &str, config: &Config) -> anyhow::Result<RunReport> {
     let args = vec![
         "parse",
@@ -398,8 +457,8 @@ fn somatic_parse(
     config: &Config,
     log_dir: &str,
 ) -> anyhow::Result<()> {
-    let diag_out_prefix = format!("{}/{}_diag", diag_out_dir, id);
-    let mrd_out_prefix = format!("{}/{}_mrd", mrd_out_dir, id);
+    let diag_out_prefix = format!("{diag_out_dir}/{id}_diag");
+    let mrd_out_prefix = format!("{mrd_out_dir}/{id}_mrd");
 
     let diag_info_vcf = format!("{diag_out_prefix}.bp_info.sorted.bed.gz");
     let mrd_info_vcf = format!("{mrd_out_prefix}.bp_info.sorted.bed.gz");
@@ -419,7 +478,12 @@ fn somatic_parse(
     Ok(())
 }
 
-// Helper function to spawn a thread for parsing
+/// Spawns a thread to run NanomonSV parse step.
+///
+/// Returns a dummy thread if output already exists.
+///
+/// # Errors
+/// Returns an error if parsing or log writing fails.
 fn spawn_parse_thread(
     bam: &str,
     out_prefix: &str,
@@ -433,25 +497,31 @@ fn spawn_parse_thread(
         let config = config.clone();
         let log_dir = log_dir.to_string();
 
-        info!("Nanomonsv parsing started for BAM: {}", bam);
+        info!("Nanomonsv parsing started for BAM: {bam}");
         let handle = thread::spawn(move || {
             let report = nanomonsv_parse(&bam, &out_prefix, &config)
-                .with_context(|| format!("Failed to parse BAM: {}", bam))?;
+                .with_context(|| format!("Failed to parse BAM: {bam}"))?;
 
             report
-                .save_to_file(&format!("{log_dir}/nanomonsv_parse_{}_", bam))
-                .with_context(|| format!("Failed to save report for BAM: {}", bam))?;
+                .save_to_file(&format!("{log_dir}/nanomonsv_parse_{bam}_"))
+                .with_context(|| format!("Failed to save report for BAM: {bam}"))?;
 
             Ok(())
         });
 
         Ok(handle)
     } else {
-        debug!("Nanomonsv parse results already exist: {}", info_vcf);
+        debug!("Nanomonsv parse results already exist: {info_vcf}");
         Ok(thread::spawn(|| Ok(()))) // Return a dummy thread that does nothing
     }
 }
 
+/// Runs NanomonSV get step to call structural variants.
+///
+/// Optionally uses a control sample for matched analysis.
+///
+/// # Errors
+/// Returns an error if command execution fails.
 pub fn nanomonsv_get(
     bam: &str,
     out_prefix: &str,
@@ -483,6 +553,12 @@ pub fn nanomonsv_get(
     Ok(res)
 }
 
+/// Creates a panel of normals (PON) from MRD NanomonSV results.
+///
+/// Searches for MRD directories, filters PASS variants, and concatenates them.
+///
+/// # Errors
+/// Returns an error if directory traversal, filtering, or concatenation fails.
 pub fn nanomonsv_create_pon(config: &Config, pon_path: &str) -> anyhow::Result<()> {
     let mut passed_mrd = Vec::new();
     for mrd_dir in find_nanomonsv_dirs(&PathBuf::from(&config.result_dir), "mrd", 0, 3) {
@@ -546,6 +622,13 @@ pub fn nanomonsv_create_pon(config: &Config, pon_path: &str) -> anyhow::Result<(
     Ok(())
 }
 
+/// Recursively finds NanomonSV output directories for a specific time point.
+///
+/// # Arguments
+/// * `root` - Starting directory for search
+/// * `time_point` - Time point identifier (e.g., "mrd")
+/// * `depth` - Current recursion depth
+/// * `max_depth` - Maximum recursion depth
 pub fn find_nanomonsv_dirs(
     root: &Path,
     time_point: &str,
@@ -568,7 +651,7 @@ pub fn find_nanomonsv_dirs(
             if entry.file_type().map(|ft| ft.is_dir()).unwrap_or(false)
                 && path
                     .to_string_lossy()
-                    .contains(&format!("{}/nanomonsv", time_point))
+                    .contains(&format!("{time_point}/nanomonsv"))
             {
                 Some(path)
             } else {
@@ -587,6 +670,14 @@ pub fn find_nanomonsv_dirs(
     result
 }
 
+/// Replaces a filename suffix in a path.
+///
+/// # Example
+/// ```
+/// let path = Path::new("/data/sample_mrd.nanomonsv.result.vcf");
+/// let new_path = replace_filename_suffix(path, "_mrd.nanomonsv.result.vcf", "_mrd_PASSED.vcf.gz");
+/// // new_path: /data/sample_mrd_PASSED.vcf.gz
+/// ```
 pub fn replace_filename_suffix(path: &Path, from: &str, to: &str) -> PathBuf {
     let file_name = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
 

+ 124 - 7
src/callers/savana.rs

@@ -82,7 +82,8 @@ impl ShouldRun for Savana {
     /// `true` if an update is needed, or if timestamps can't be checked (file doesn't exist)
     fn should_run(&self) -> bool {
         let passed_vcf = &self.config.savana_passed_vcf(&self.id);
-        let result = is_file_older(passed_vcf, &self.config.normal_bam(&self.id), true).unwrap_or(true)
+        let result = is_file_older(passed_vcf, &self.config.normal_bam(&self.id), true)
+            .unwrap_or(true)
             || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id), true).unwrap_or(true);
         if result {
             warn!("Savana should run for id: {}.", self.id);
@@ -202,6 +203,10 @@ impl Run for Savana {
 }
 
 impl Version for Savana {
+    /// Retrieves the Savana version by running `savana --version` in its conda environment.
+    ///
+    /// # Errors
+    /// Returns an error if command execution fails or "Version " not found in output.
     fn version(config: &Config) -> anyhow::Result<String> {
         let savana_args = ["--version"];
         let args = [
@@ -232,6 +237,8 @@ impl Version for Savana {
 }
 
 impl CallerCat for Savana {
+    /// Tags the caller identity for downstream variant classification and traceability.
+    /// Identifies this tool as the Savana caller producing somatic variants.
     fn caller_cat(&self) -> Annotation {
         Annotation::Callers(Caller::Savana, Sample::Somatic)
     }
@@ -270,26 +277,49 @@ impl Variants for Savana {
 }
 
 impl Label for Savana {
+    /// Returns the string label for this caller.
     fn label(&self) -> String {
         self.caller_cat().to_string()
     }
 }
 
+/// A row from Savana copy number segmentation output.
+///
+/// Represents a genomic segment with associated copy number information,
+/// including optional allele-specific data.
 pub struct SavanaCNRow {
+    /// The genomic range (chromosome, start, end) of this segment
     pub range: GenomeRange,
+    /// Unique identifier for this segment
     pub segment_id: String,
+    /// Number of bins included in this segment
     pub bin_count: u32,
+    /// Sum of the lengths of all bins in this segment
     pub sum_of_bin_lengths: u32,
+    /// Weight assigned to this segment in the analysis
     pub weight: f32,
+    /// Estimated copy number for this segment
     pub copy_number: f32,
+    /// Minor allele copy number, if available (requires heterozygous SNPs)
     pub minor_allele_copy_number: Option<f64>,
+    /// Mean B-allele frequency for heterozygous SNPs in this segment
     pub mean_baf: Option<f64>,
+    /// Number of heterozygous SNPs in this segment
     pub n_het_snps: u32,
 }
 
 impl FromStr for SavanaCNRow {
     type Err = anyhow::Error;
 
+    /// Parses a tab-separated line from Savana copy number output.
+    ///
+    /// # Format
+    /// Expected columns: chromosome, start, end, segment_id, bin_count,
+    /// sum_of_bin_lengths, weight, copy_number, minor_allele_copy_number,
+    /// mean_baf, n_het_snps
+    ///
+    /// # Errors
+    /// Returns an error if any field is missing or cannot be parsed to the expected type.
     fn from_str(s: &str) -> anyhow::Result<Self> {
         let cells: Vec<&str> = s.split("\t").collect();
         let range = GenomeRange::from_1_inclusive_str(
@@ -352,11 +382,23 @@ impl FromStr for SavanaCNRow {
     }
 }
 
+/// Container for Savana copy number segmentation data.
+///
+/// Holds all copy number segments for a sample, loaded from Savana output files.
 pub struct SavanaCopyNumber {
+    /// Vector of copy number segments
     pub data: Vec<SavanaCNRow>,
 }
 
 impl SavanaCopyNumber {
+    /// Loads copy number data for a given sample ID.
+    ///
+    /// # Arguments
+    /// * `id` - Sample identifier
+    /// * `config` - Configuration containing path information
+    ///
+    /// # Errors
+    /// Returns an error if the file cannot be read or parsed.
     pub fn load_id(id: &str, config: Config) -> anyhow::Result<Self> {
         let path = config.savana_copy_number(id);
         let reader = get_gz_reader(&path)?;
@@ -373,18 +415,33 @@ impl SavanaCopyNumber {
     }
 }
 
-// bin chromosome  start   end perc_known_bases    use_bin tumour_read_count   normal_read_count
+/// A row from Savana read count output.
+///
+/// Contains read depth information for tumor and normal samples in genomic bins.
 pub struct SavanaRCRow {
+    /// The genomic range (chromosome, start, end) of this bin
     pub range: GenomeRange,
+    /// Percentage of bases in the bin with known sequence (not N)
     pub perc_known_bases: f32,
+    /// Whether this bin should be used in analysis
     pub use_bin: bool,
+    /// Number of reads from the tumor sample in this bin
     pub tumour_read_count: u32,
+    /// Number of reads from the normal sample in this bin
     pub normal_read_count: u32,
 }
 
 impl FromStr for SavanaRCRow {
     type Err = anyhow::Error;
 
+    /// Parses a tab-separated line from Savana read count output.
+    ///
+    /// # Format
+    /// Expected columns: bin (format: chr:start_end), chromosome, start, end,
+    /// perc_known_bases, use_bin, tumour_read_count, normal_read_count
+    ///
+    /// # Errors
+    /// Returns an error if any field is missing or cannot be parsed.
     fn from_str(s: &str) -> anyhow::Result<Self> {
         let cells: Vec<&str> = s.split("\t").collect();
         let bin = cells
@@ -396,10 +453,10 @@ impl FromStr for SavanaRCRow {
             .and_then(|(a, b)| {
                 b.split_once('_').map(|(b, c)| {
                     GenomeRange::from_1_inclusive_str(a, b, c)
-                        .context(format!("Error while parsing range {}", bin))
+                        .context(format!("Error while parsing range {bin}"))
                 })
             })
-            .context(format!("Invalid range format: {}", bin))??;
+            .context(format!("Invalid range format: {bin}"))??;
 
         Ok(Self {
             range,
@@ -428,11 +485,23 @@ impl FromStr for SavanaRCRow {
     }
 }
 
+/// Container for Savana read count data.
+///
+/// Holds read depth information across genomic bins for tumor-normal pairs.
 pub struct SavanaReadCounts {
+    /// Vector of read count bins
     pub data: Vec<SavanaRCRow>,
 }
 
 impl SavanaReadCounts {
+    /// Loads read count data for a given sample ID.
+    ///
+    /// # Arguments
+    /// * `id` - Sample identifier
+    /// * `config` - Configuration containing path information
+    ///
+    /// # Errors
+    /// Returns an error if the file cannot be read or parsed.
     pub fn load_id(id: &str, config: Config) -> anyhow::Result<Self> {
         let path = config.savana_read_counts(id);
         let reader = get_gz_reader(&path)?;
@@ -448,6 +517,10 @@ impl SavanaReadCounts {
         Ok(Self { data })
     }
 
+    /// Calculates the total number of tumor reads across all bins.
+    ///
+    /// # Returns
+    /// Sum of tumor read counts from all bins.
     pub fn n_tumoral_reads(&self) -> usize {
         self.data
             .par_iter()
@@ -455,6 +528,10 @@ impl SavanaReadCounts {
             .sum::<usize>()
     }
 
+    /// Calculates the total number of normal reads across all bins.
+    ///
+    /// # Returns
+    /// Sum of normal read counts from all bins.
     pub fn n_normal_reads(&self) -> usize {
         self.data
             .par_iter()
@@ -462,6 +539,13 @@ impl SavanaReadCounts {
             .sum::<usize>()
     }
 
+    /// Calculates normalized read counts per chromosome.
+    ///
+    /// Normalizes each chromosome's read count by the expected number of reads
+    /// based on the number of bins in that chromosome relative to total bins.
+    ///
+    /// # Returns
+    /// Vector of tuples containing (chromosome_name, normalized_count) pairs.
     pub fn norm_chr_counts(&self) -> Vec<(String, f64)> {
         let n_tum = self.n_tumoral_reads();
         let n_bins = self.data.len();
@@ -485,32 +569,65 @@ impl SavanaReadCounts {
     }
 }
 
+/// Savana copy number data with serialization support.
+///
+/// Alternative representation of copy number segments optimized for
+/// serialization and deserialization.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct SavanaCN {
+    /// Vector of copy number segments
     pub segments: Vec<CNSegment>,
 }
 
+/// A copy number segment with full metadata.
+///
+/// Contains all information about a genomic segment including copy number,
+/// allelic information, and quality metrics.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct CNSegment {
+    /// Chromosome name
     pub chromosome: String,
+    /// Start position (1-based, inclusive)
     pub start: u64,
+    /// End position (1-based, inclusive)
     pub end: u64,
+    /// Unique segment identifier
     pub segment_id: String,
+    /// Number of bins merged into this segment
     pub bin_count: u32,
+    /// Total length of all bins in base pairs
     pub sum_of_bin_lengths: u64,
+    /// Statistical weight of this segment
     pub weight: f64,
+    /// Estimated total copy number
     pub copy_number: f64,
+    /// Minor allele copy number (requires heterozygous SNPs)
     pub minor_allele_copy_number: Option<f64>,
+    /// Mean B-allele frequency across heterozygous SNPs
     pub mean_baf: Option<f64>,
+    /// Number of heterozygous SNPs in this segment
     pub no_het_snps: u32,
 }
 
 impl SavanaCN {
+    /// Parses a Savana copy number file into structured segments.
+    ///
+    /// # Arguments
+    /// * `id` - Sample identifier
+    /// * `config` - Configuration containing path information
+    ///
+    /// # Returns
+    /// Parsed copy number data with all segments.
+    ///
+    /// # Errors
+    /// Returns an error if:
+    /// * File cannot be opened or read
+    /// * Line has incorrect number of columns (expected 11)
+    /// * Any field cannot be parsed to the expected type
     pub fn parse_file(id: &str, config: &Config) -> anyhow::Result<Self> {
         let path = config.savana_copy_number(id);
-        let reader = get_gz_reader(&path)?;
-        // let file = File::open(&path).context("Failed to open the file")?;
-        // let reader = io::BufReader::new(file);
+        let reader =
+            get_gz_reader(&path).context(anyhow::anyhow!("Error while reading: {path}"))?;
 
         let mut segments = Vec::new();
 

+ 46 - 24
src/callers/severus.rs

@@ -70,7 +70,8 @@ impl ShouldRun for Severus {
     /// `true` if Severus needs to be re-run, otherwise `false`
     fn should_run(&self) -> bool {
         let passed_vcf = &self.config.severus_passed_vcf(&self.id);
-        let result = is_file_older(passed_vcf, &self.config.normal_bam(&self.id), true).unwrap_or(true)
+        let result = is_file_older(passed_vcf, &self.config.normal_bam(&self.id), true)
+            .unwrap_or(true)
             || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id), true).unwrap_or(true);
         if result {
             info!("Severus should run for: {}.", self.id);
@@ -167,32 +168,12 @@ impl Run for Severus {
                 "Severus PASSED VCF already exists for {}, skipping execution.",
                 self.id
             );
-
         }
 
         Ok(())
     }
 }
 
-impl Version for Severus {
-    fn version(config: &Config) -> anyhow::Result<String> {
-        let args = [
-            "-c",
-            &format!(
-                "source {} && conda activate severus_env && {} {}",
-                config.conda_sh, config.severus_bin, "--version"
-            ),
-        ];
-        let mut cmd_run = CommandRun::new("bash", &args);
-        let report = run_wait(&mut cmd_run).context("Error while running `severus --version`")?;
-        let v = match report.log.split_once(':') {
-            Some((_, value)) => value.trim(),
-            None => anyhow::bail!("Error while parsing `severus --version`"),
-        };
-
-        Ok(v.to_string())
-    }
-}
 
 impl CallerCat for Severus {
     /// Returns the annotation category for Severus calls.
@@ -231,7 +212,7 @@ impl Variants for Severus {
         let vcf_passed = self.config.severus_passed_vcf(&self.id);
         let caller = self.caller_cat();
         let add = vec![caller.clone()];
-        info!("Loading variants from {}: {}", caller, vcf_passed);
+        info!("Loading variants from {caller}: {vcf_passed}");
 
         let variants = read_vcf(&vcf_passed)
             .map_err(|e| anyhow::anyhow!("Failed to read Severus VCF {}.\n{e}", vcf_passed))?;
@@ -250,22 +231,57 @@ impl Variants for Severus {
 }
 
 impl Label for Severus {
+    /// Returns the string label for this caller.
     fn label(&self) -> String {
         self.caller_cat().to_string()
     }
 }
 
-/// ========================================================================
+impl Version for Severus {
+    /// Retrieves the Severus version by running `severus --version` in its coda environment.
+    ///
+    /// # Errors
+    /// Returns an error if command execution fails or "Version " not found in output.
+    fn version(config: &Config) -> anyhow::Result<String> {
+        let args = [
+            "-c",
+            &format!(
+                "source {} && conda activate severus_env && {} {}",
+                config.conda_sh, config.severus_bin, "--version"
+            ),
+        ];
+        let mut cmd_run = CommandRun::new("bash", &args);
+        let report = run_wait(&mut cmd_run).context("Error while running `severus --version`")?;
+        let v = match report.log.split_once(':') {
+            Some((_, value)) => value.trim(),
+            None => anyhow::bail!("Error while parsing `severus --version`"),
+        };
+
+        Ok(v.to_string())
+    }
+}
 
+/// Severus SV caller in solo (single-sample) mode.
+///
+/// Detects structural variants from long-read sequencing data without a matched control.
 #[derive(Debug)]
 pub struct SeverusSolo {
+    /// Sample identifier
     pub id: String,
+    /// Time point identifier (e.g., "normal" or "tumor")
     pub time: String,
+    /// Pipeline configuration
     pub config: Config,
+    /// Directory for log files
     pub log_dir: String,
 }
-
 impl InitializeSolo for SeverusSolo {
+    /// Initializes Severus solo analysis for a sample at a specific time point.
+    ///
+    /// Creates necessary log directory.
+    ///
+    /// # Errors
+    /// Returns an error if directory creation fails.
     fn initialize(id: &str, time: &str, config: Config) -> anyhow::Result<Self> {
         let log_dir = format!("{}/{}/log/severus_solo", config.result_dir, id);
         if !Path::new(&log_dir).exists() {
@@ -283,6 +299,12 @@ impl InitializeSolo for SeverusSolo {
 }
 
 impl Run for SeverusSolo {
+    /// Runs the Severus pipeline and filters for PASS variants.
+    ///
+    /// Skips steps if their output files already exist.
+    ///
+    /// # Errors
+    /// Returns an error if Severus execution, filtering, or log writing fails.
     fn run(&mut self) -> anyhow::Result<()> {
         let id = &self.id;
         let time = &self.time;

+ 2 - 0
src/config.rs

@@ -54,6 +54,7 @@ pub struct Config {
     pub somatic_pipe_stats: String,
 
     pub clairs_threads: u8,
+    pub clairs_docker_tag: String,
     pub clairs_force: bool,
     pub clairs_platform: String,
     pub clairs_output_dir: String,
@@ -154,6 +155,7 @@ impl Default for Config {
 
             // ClairS
             clairs_output_dir: "{result_dir}/{id}/diag/ClairS".to_string(),
+            clairs_docker_tag: "latest".to_string(),
             clairs_threads: 155,
             clairs_platform: "ont_r10_dorado_sup_5khz_ssrs".to_string(),
             clairs_force: false,

+ 22 - 6
src/lib.rs

@@ -285,10 +285,26 @@ mod tests {
         NanomonSV::initialize(id, Config::default())?.run()
     }
 
+     #[test]
+    fn nanomonsv_version() -> anyhow::Result<()> {
+        init();
+        let v = NanomonSV::version(&Config::default())?;
+        println!("NanomonSV version: {v}");
+        let v = DeepVariant::version(&Config::default())?;
+        println!("DeepVariant version: {v}");
+        let v = Savana::version(&Config::default())?;
+        println!("Savana version: {v}");
+        let v = DeepSomatic::version(&Config::default())?;
+        println!("DeepSomatic version: {v}");
+        let v = ClairS::version(&Config::default())?;
+        println!("ClairS version: {v}");
+        Ok(())
+    }
+
     #[test]
     fn nanomonsv_solo() -> anyhow::Result<()> {
         init();
-        NanomonSVSolo::initialize("GRAND", "diag", Config::default())?.run()
+        NanomonSVSolo::initialize("LAKHDHAR", "diag", Config::default())?.run()
     }
 
     // cargo test run -- --nocapture; ~/run_scripts/notify_finish.sh &
@@ -469,11 +485,11 @@ mod tests {
         Severus::initialize("BANGA", Config::default())?.run()
     }
 
-    // #[test]
-    // fn run_severus_solo() -> anyhow::Result<()> {
-    //     init();
-    //     SeverusSolo::initialize("CAMEL","diag", Config::default())?.run()
-    // }
+    #[test]
+    fn run_severus_solo() -> anyhow::Result<()> {
+        init();
+        SeverusSolo::initialize("LAKHDHAR","diag", Config::default())?.run()
+    }
 
     #[test]
     fn run_savana() -> anyhow::Result<()> {