Thomas преди 1 месец
родител
ревизия
7abe30225b

+ 0 - 1
src/aligner/mod.rs

@@ -1,2 +1 @@
 pub mod minimap;
-

+ 0 - 2
src/annotation/alpha_genome.rs

@@ -1,3 +1 @@
 
-
-

+ 1 - 3
src/annotation/echtvar.rs

@@ -145,7 +145,7 @@ mod tests {
 
         let caller = Annotation::Callers(Caller::ClairS, Sample::Somatic);
 
-         variants.iter().for_each(|v| {
+        variants.iter().for_each(|v| {
             annotations.insert_update(v.hash(), std::slice::from_ref(&caller));
         });
 
@@ -156,6 +156,4 @@ mod tests {
 
         Ok(())
     }
-
-    
 }

+ 4 - 1
src/annotation/gnomad.rs

@@ -49,7 +49,10 @@ impl FromStr for GnomAD {
     fn from_str(s: &str) -> anyhow::Result<Self> {
         let vs: Vec<_> = s.split(";").collect();
         if vs.len() < 13 {
-            return Err(anyhow::anyhow!("Error not the right number of parts for {:?}", s));
+            return Err(anyhow::anyhow!(
+                "Error not the right number of parts for {:?}",
+                s
+            ));
         }
         if vs[0].contains("-1") {
             return Err(anyhow::anyhow!(

+ 14 - 14
src/annotation/mod.rs

@@ -1,9 +1,9 @@
+pub mod alpha_genome;
 pub mod cosmic;
 pub mod echtvar;
 pub mod gnomad;
 pub mod ncbi;
 pub mod vep;
-pub mod alpha_genome;
 
 use std::{
     collections::{HashMap, HashSet},
@@ -590,19 +590,19 @@ impl Annotations {
             //     .iter()
             //     .any(|a| matches!(a, Annotation::Callers(_, Sample::Somatic)));
             // if has_tumor && !has_somatic {
-                // push at most once
-                if anns
-                    .iter()
-                    .any(|a| matches!(a, Annotation::ConstitDepth(d) if *d < min_constit_depth))
-                {
-                    anns.push(Annotation::LowConstitDepth);
-                }
-                if anns
-                    .iter()
-                    .any(|a| matches!(a, Annotation::ConstitAlt(a0)    if *a0 > max_alt_constit))
-                {
-                    anns.push(Annotation::HighConstitAlt);
-                }
+            // push at most once
+            if anns
+                .iter()
+                .any(|a| matches!(a, Annotation::ConstitDepth(d) if *d < min_constit_depth))
+            {
+                anns.push(Annotation::LowConstitDepth);
+            }
+            if anns
+                .iter()
+                .any(|a| matches!(a, Annotation::ConstitAlt(a0)    if *a0 > max_alt_constit))
+            {
+                anns.push(Annotation::HighConstitAlt);
+            }
             // }
         }
     }

+ 1 - 1
src/annotation/vep.rs

@@ -2,7 +2,7 @@ use anyhow::anyhow;
 use bitcode::{Decode, Encode};
 use hashbrown::HashMap;
 use itertools::Itertools;
-use log::{ warn};
+use log::warn;
 use serde::{Deserialize, Serialize};
 use std::{
     cmp::{Ordering, Reverse},

+ 1 - 1
src/callers/mod.rs

@@ -149,6 +149,7 @@ use crate::{
 };
 
 pub mod clairs;
+pub mod coral;
 pub mod deep_somatic;
 pub mod deep_variant;
 pub mod gatk;
@@ -156,7 +157,6 @@ pub mod nanomonsv;
 pub mod savana;
 pub mod severus;
 pub mod straglr;
-pub mod coral;
 
 /// Runs all somatic variant callers sequentially for comprehensive multi-caller analysis.
 ///

+ 17 - 7
src/callers/severus.rs

@@ -78,11 +78,23 @@
 //! - [Severus GitHub](https://github.com/KolmogorovLab/Severus)
 //! - [Severus Paper](https://doi.org/10.1038/s41587-024-02340-1)
 use crate::{
-    annotation::{Annotation, Annotations, Caller, CallerCat, Sample}, collection::vcf::Vcf, commands::{
-        Command as JobCommand, LocalRunner, SbatchRunner, SlurmParams, SlurmRunner, bcftools::BcftoolsKeepPassPrecise, longphase::LongphasePhase
-    }, config::Config, helpers::{is_file_older, remove_dir_if_exists}, io::vcf::read_vcf, locker::SampleLock, pipes::{Initialize, InitializeSolo, ShouldRun, Version}, run, runners::Run, variant::{
-        variant_collection::VariantCollection, vcf_variant::{Label, Variants}
-    }
+    annotation::{Annotation, Annotations, Caller, CallerCat, Sample},
+    collection::vcf::Vcf,
+    commands::{
+        bcftools::BcftoolsKeepPassPrecise, longphase::LongphasePhase, Command as JobCommand,
+        LocalRunner, SbatchRunner, SlurmParams, SlurmRunner,
+    },
+    config::Config,
+    helpers::{is_file_older, remove_dir_if_exists},
+    io::vcf::read_vcf,
+    locker::SampleLock,
+    pipes::{Initialize, InitializeSolo, ShouldRun, Version},
+    run,
+    runners::Run,
+    variant::{
+        variant_collection::VariantCollection,
+        vcf_variant::{Label, Variants},
+    },
 };
 use anyhow::Context;
 use log::{debug, info};
@@ -318,7 +330,6 @@ impl SbatchRunner for SeverusJob {
     }
 }
 
-
 impl CallerCat for Severus {
     /// Returns the annotation category for Severus calls.
     ///
@@ -547,7 +558,6 @@ mod tests {
         let mut caller = Severus::initialize("DUMCO", &config)?;
         caller.run()?;
 
-
         Ok(())
     }
 }

+ 4 - 4
src/callers/straglr.rs

@@ -292,7 +292,7 @@ impl Run for Straglr {
                     min_support: self.config.straglr_min_support,
                     min_cluster_size: self.config.straglr_min_cluster_size,
                     genotype_in_size: self.config.straglr_genotype_in_size,
-                    job_name: Some(format!("straglr_{id}_{}", self.config.normal_name))
+                    job_name: Some(format!("straglr_{id}_{}", self.config.normal_name)),
                 };
 
                 let output = run!(&self.config, &mut job)
@@ -323,7 +323,7 @@ impl Run for Straglr {
                     min_support: self.config.straglr_min_support,
                     min_cluster_size: self.config.straglr_min_cluster_size,
                     genotype_in_size: self.config.straglr_genotype_in_size,
-                    job_name: Some(format!("straglr_{id}_{}", self.config.tumoral_name))
+                    job_name: Some(format!("straglr_{id}_{}", self.config.tumoral_name)),
                 };
 
                 let output = run!(&self.config, &mut job)
@@ -920,7 +920,7 @@ impl Run for StraglrSolo {
                 min_support: self.config.straglr_min_support,
                 min_cluster_size: self.config.straglr_min_cluster_size,
                 genotype_in_size: self.config.straglr_genotype_in_size,
-                job_name: Some(format!("straglr_solo_{id}_{time}"))
+                job_name: Some(format!("straglr_solo_{id}_{time}")),
             };
 
             let report =
@@ -1131,7 +1131,7 @@ pub fn run_straglr_chunked(
             min_support: config.straglr_min_support,
             min_cluster_size: config.straglr_min_cluster_size,
             genotype_in_size: config.straglr_genotype_in_size,
-            job_name: Some(format!("straglr_{id}_{time_point}_{part_num}"))
+            job_name: Some(format!("straglr_{id}_{time_point}_{part_num}")),
         };
 
         jobs.push(job);

+ 0 - 1
src/collection/bam.rs

@@ -1009,7 +1009,6 @@ fn decode_str(n: u8) -> u8 {
     }
 }
 
-
 // pub fn base_at_new(
 //     record: &rust_htslib::bam::Record,
 //     at_pos: i64,

+ 2 - 2
src/collection/minknow.rs

@@ -106,8 +106,8 @@ impl MinKnowSampleSheet {
         use std::fs::File;
         use std::io::{self, BufRead};
 
-        let file =
-            File::open(&path).map_err(|e| anyhow::anyhow!("Can't open file: {}\n\t{e}", path.display()))?;
+        let file = File::open(&path)
+            .map_err(|e| anyhow::anyhow!("Can't open file: {}\n\t{e}", path.display()))?;
         let reader = io::BufReader::new(file);
 
         let mut lines = reader.lines();

+ 2 - 2
src/collection/mod.rs

@@ -1,9 +1,9 @@
 pub mod bam;
+pub mod bam_stats;
 pub mod flowcells;
 pub mod minknow;
 pub mod modbases;
 pub mod pod5;
+pub mod prom_run;
 pub mod run;
 pub mod vcf;
-pub mod bam_stats;
-pub mod prom_run;

+ 10 - 2
src/collection/pod5.rs

@@ -44,7 +44,11 @@ impl fmt::Display for Pod5 {
         writeln!(f, "  size      : {} bytes", self.file_size)?;
         writeln!(f, "  path      : {}", self.path.display())?;
         writeln!(f, "  experiment: {}", self.experiment_name)?;
-        writeln!(f, "  flow cell : {} ({})", self.flow_cell_id, self.flow_cell_product_code)?;
+        writeln!(
+            f,
+            "  flow cell : {} ({})",
+            self.flow_cell_id, self.flow_cell_product_code
+        )?;
         writeln!(f, "🧪 Sample")?;
         writeln!(f, "  id        : {}", self.sample_id)?;
         writeln!(f, "  kit       : {}", self.sequencing_kit)?;
@@ -139,7 +143,11 @@ impl fmt::Display for Pod5sRun {
         for c in &self.cases {
             writeln!(f, "    • {}", c)?;
         }
-        writeln!(f, "  Pod5 files    : {} (showing 0 details)", self.pod5s.len())?;
+        writeln!(
+            f,
+            "  Pod5 files    : {} (showing 0 details)",
+            self.pod5s.len()
+        )?;
         if let Some(ref bam) = self.bams_pass {
             writeln!(f, "  BAM pass      : {}", bam.display())?;
         }

+ 16 - 4
src/collection/prom_run.rs

@@ -72,13 +72,20 @@ use crate::{
     collection::{
         bam_stats::WGSBamStats,
         flowcells::IdInput,
-        minknow::{MinKnowSampleSheet, PoreStateEntry, parse_pore_activity_from_reader},
+        minknow::{parse_pore_activity_from_reader, MinKnowSampleSheet, PoreStateEntry},
         pod5::Pod5,
-    }, commands::{
+    },
+    commands::{
         dorado::DoradoAlign,
         modkit::ModkitSummary,
         samtools::{SamtoolsIndex, SamtoolsMergeMany, SamtoolsSort},
-    }, config::Config, helpers::{TempFileGuard, get_genome_sizes, list_files_recursive, remove_bam_with_index}, io::bam::read_sm_tag_or_inject, locker::SampleLock, pipes::InitializeSolo, run, run_many
+    },
+    config::Config,
+    helpers::{get_genome_sizes, list_files_recursive, remove_bam_with_index, TempFileGuard},
+    io::bam::read_sm_tag_or_inject,
+    locker::SampleLock,
+    pipes::InitializeSolo,
+    run, run_many,
 };
 
 /// Represent a complete ONT PromethION sequencing run with all associated files.
@@ -1409,7 +1416,12 @@ fn merge_into_existing_final(
 ///
 /// Source is already sorted (from sort_and_index_chunks), so only
 /// move and index are needed.
-fn create_new_final(source: &Path, destination: &Path, case: &IdInput, config: &Config) -> anyhow::Result<()> {
+fn create_new_final(
+    source: &Path,
+    destination: &Path,
+    case: &IdInput,
+    config: &Config,
+) -> anyhow::Result<()> {
     info!("  Creating new final BAM: {}", destination.display());
 
     // Source already sorted — just move and index

+ 1 - 2
src/collection/vcf.rs

@@ -128,8 +128,7 @@ impl VcfCollection {
 
 pub fn n_variants(path: &str) -> anyhow::Result<u64> {
     let csi_src = format!("{path}.csi");
-    let index = csi::fs::read(&csi_src)
-        .with_context(|| format!("can't read index of {path}"))?;
+    let index = csi::fs::read(&csi_src).with_context(|| format!("can't read index of {path}"))?;
 
     let n = index
         .reference_sequences()

+ 2 - 6
src/commands/bcftools.rs

@@ -227,7 +227,7 @@ impl super::SlurmRunner for BcftoolsKeepPassPrecise {
 
 impl super::SbatchRunner for BcftoolsKeepPassPrecise {
     /// Slurm resource request for the `bcftools keep pass precise` job.
-    fn slurm_params(&self) -> SlurmParams{
+    fn slurm_params(&self) -> SlurmParams {
         SlurmParams {
             job_name: Some("bcftools_keep_pass_precise".into()),
             cpus_per_task: Some(self.threads as u32),
@@ -386,11 +386,7 @@ impl super::Command for BcftoolsIndex {
 
     /// Returns the shell command that runs `bcftools index`.
     fn cmd(&self) -> String {
-        let format = if self.tbi {
-            "-t "
-        } else {
-            ""
-        };
+        let format = if self.tbi { "-t " } else { "" };
         format!(
             "{bin} index {format}--threads {threads} {vcf}",
             bin = self.bin,

+ 1 - 1
src/commands/longphase.rs

@@ -333,7 +333,7 @@ impl Run for LongphaseHap {
 impl ShouldRun for LongphaseHap {
     fn should_run(&self) -> bool {
         is_file_older(
-             self.bam_hp.to_string_lossy().as_ref(),
+            self.bam_hp.to_string_lossy().as_ref(),
             self.bam.to_string_lossy().as_ref(),
             false, // will remove the whole tumoral folder
         )

+ 20 - 6
src/commands/modkit.rs

@@ -496,20 +496,34 @@ pub fn read_dmr_tsv(path: &str, value_col_1based: usize) -> anyhow::Result<Vec<D
     let mut intervals = Vec::new();
     let mut i = 0usize;
 
-    while line.read(&mut reader).with_context(|| format!("I/O error in {path} around line {i}"))? {
-        if line.as_str().starts_with('#') || line.as_str().is_empty() { continue; }
+    while line
+        .read(&mut reader)
+        .with_context(|| format!("I/O error in {path} around line {i}"))?
+    {
+        if line.as_str().starts_with('#') || line.as_str().is_empty() {
+            continue;
+        }
         i += 1;
         let f = line.split_fields();
         let get = |idx: usize, name: &str| -> anyhow::Result<&str> {
-            f.get(idx).copied().ok_or_else(|| anyhow::anyhow!("Missing {name} at line {i}"))
+            f.get(idx)
+                .copied()
+                .ok_or_else(|| anyhow::anyhow!("Missing {name} at line {i}"))
         };
 
         let chrom = get(0, "chrom")?.to_string();
-        let start: u64 = get(1, "start")?.parse().with_context(|| format!("Invalid start at line {i}"))?;
-        let end: u64 = get(2, "end")?.parse().with_context(|| format!("Invalid end at line {i}"))?;
+        let start: u64 = get(1, "start")?
+            .parse()
+            .with_context(|| format!("Invalid start at line {i}"))?;
+        let end: u64 = get(2, "end")?
+            .parse()
+            .with_context(|| format!("Invalid end at line {i}"))?;
         let name = f.get(3).copied().unwrap_or(".").to_string();
         let value: f64 = get(value_idx, &format!("value col {value_col_1based}"))?
-            .parse().with_context(|| format!("Invalid float in value col {value_col_1based} at line {i}"))?;
+            .parse()
+            .with_context(|| {
+                format!("Invalid float in value col {value_col_1based} at line {i}")
+            })?;
 
         intervals.push(DmrInterval {
             chrom,

+ 5 - 1
src/de_novo/de_novo_pipe.rs

@@ -284,7 +284,11 @@ pub fn run_local_assembly_iterative(
                     .map_err(|e| log::warn!("primary_record failed: {e}"))
                     .ok()?;
                 let qname = resolved.qname().to_vec();
-                if !seen.contains(&qname) { Some(resolved) } else { None }
+                if !seen.contains(&qname) {
+                    Some(resolved)
+                } else {
+                    None
+                }
             })
             .collect();
 

+ 1 - 1
src/de_novo/mod.rs

@@ -1,5 +1,5 @@
-pub mod flye;
 pub mod de_novo_pipe;
+pub mod flye;
 pub mod medaka;
 
 use crate::commands::{Command as JobCommand, LocalRunner, SbatchRunner, SlurmRunner};

+ 1 - 5
src/functions/assembler.rs

@@ -29,11 +29,7 @@ pub struct Assembler {
 }
 
 impl Assembler {
-    pub fn new(
-        id: String,
-        time_point: String,
-        config: AssemblerConfig,
-    ) -> Self {
+    pub fn new(id: String, time_point: String, config: AssemblerConfig) -> Self {
         Assembler {
             id,
             time_point,

+ 14 - 12
src/functions/fb_inv_stats.rs

@@ -2,7 +2,6 @@ use std::collections::HashMap;
 
 use crate::io::bam::{FbInv, SegmentOrder};
 
-
 /// Distribution statistics for a numeric field.
 #[derive(Debug, Clone, Default)]
 pub struct DistributionStats {
@@ -38,7 +37,11 @@ impl DistributionStats {
         };
 
         // Standard deviation
-        let variance: f64 = values.iter().map(|&v| (v as f64 - mean).powi(2)).sum::<f64>() / count as f64;
+        let variance: f64 = values
+            .iter()
+            .map(|&v| (v as f64 - mean).powi(2))
+            .sum::<f64>()
+            / count as f64;
         let std_dev = variance.sqrt();
 
         Self {
@@ -105,10 +108,10 @@ pub struct FbInvStats {
 /// Overlap metrics between alignment A and B.
 #[derive(Debug, Clone, Default)]
 pub struct OverlapStats {
-    pub overlap_bp_stats: DistributionStats,      // overlap in base pairs
-    pub overlap_fraction_a: DistributionStats,    // overlap / aln_a_len (stored as i64 * 1000 for precision)
-    pub overlap_fraction_b: DistributionStats,    // overlap / aln_b_len
-    pub jaccard_stats: DistributionStats,         // overlap / union (stored as i64 * 1000)
+    pub overlap_bp_stats: DistributionStats, // overlap in base pairs
+    pub overlap_fraction_a: DistributionStats, // overlap / aln_a_len (stored as i64 * 1000 for precision)
+    pub overlap_fraction_b: DistributionStats, // overlap / aln_b_len
+    pub jaccard_stats: DistributionStats,      // overlap / union (stored as i64 * 1000)
 }
 
 impl FbInvStats {
@@ -186,10 +189,7 @@ impl FbInvStats {
             positions.sort_unstable(); // ensure sorted
 
             let mean_distance = if count > 1 {
-                let total_distance: i64 = positions
-                    .windows(2)
-                    .map(|w| (w[1] - w[0]).abs())
-                    .sum();
+                let total_distance: i64 = positions.windows(2).map(|w| (w[1] - w[0]).abs()).sum();
                 total_distance as f64 / (count - 1) as f64
             } else {
                 0.0
@@ -287,7 +287,10 @@ impl FbInvStats {
 
         // Overlap metrics
         s.push_str("Overlap metrics:\n");
-        s.push_str(&format_dist("  overlap_bp", &self.overlap_stats.overlap_bp_stats));
+        s.push_str(&format_dist(
+            "  overlap_bp",
+            &self.overlap_stats.overlap_bp_stats,
+        ));
         s.push_str(&format!(
             "  overlap_frac_a: mean={:.1}%, median={:.1}%\n",
             self.overlap_stats.overlap_fraction_a.mean / 10.0,
@@ -328,4 +331,3 @@ fn format_dist(name: &str, d: &DistributionStats) -> String {
         name, d.min, d.max, d.mean, d.median, d.std_dev
     )
 }
-

+ 27 - 18
src/io/pod5_infos.rs

@@ -63,7 +63,8 @@ impl Pod5Info {
         let mut file = File::open(file_path)
             .map_err(|e| anyhow::anyhow!("Failed to open POD5 file '{}': {}", file_path, e))?;
 
-        let end = file.seek(SeekFrom::End(0))
+        let end = file
+            .seek(SeekFrom::End(0))
             .map_err(|e| anyhow::anyhow!("Failed to seek in '{}': {}", file_path, e))?;
 
         if end < 32 {
@@ -75,8 +76,9 @@ impl Pod5Info {
             .map_err(|e| anyhow::anyhow!("Seek failed in '{}': {}", file_path, e))?;
 
         let mut buffer = [0u8; 8];
-        file.read_exact(&mut buffer)
-            .map_err(|e| anyhow::anyhow!("Failed to read footer length from '{}': {}", file_path, e))?;
+        file.read_exact(&mut buffer).map_err(|e| {
+            anyhow::anyhow!("Failed to read footer length from '{}': {}", file_path, e)
+        })?;
 
         let footer_len = i64::from_le_bytes(buffer);
 
@@ -123,7 +125,10 @@ impl Pod5Info {
                         &mut file,
                         content.offset() as u64,
                         content.length() as u64,
-                    ).map_err(|e| anyhow::anyhow!("Failed to read RunInfoTable from '{}': {}", file_path, e))?;
+                    )
+                    .map_err(|e| {
+                        anyhow::anyhow!("Failed to read RunInfoTable from '{}': {}", file_path, e)
+                    })?;
 
                     let batch = match batches.first() {
                         Some(b) => b,
@@ -172,21 +177,25 @@ fn extract_column(col: &str, array: &ArrayRef, info: &mut Pod5Info) {
         }
         DataType::Utf8 => {
             if let Some(a) = array.as_any().downcast_ref::<StringArray>() {
-                let value: String = a.iter().filter_map(|v| v.map(|s| s.to_string())).collect::<Vec<_>>().join(" ");
+                let value: String = a
+                    .iter()
+                    .filter_map(|v| v.map(|s| s.to_string()))
+                    .collect::<Vec<_>>()
+                    .join(" ");
                 match col {
-                    "acquisition_id"          => info.acquisition_id = value,
-                    "experiment_name"         => info.experiment_name = value,
-                    "flow_cell_id"            => info.flow_cell_id = value,
-                    "flow_cell_product_code"  => info.flow_cell_product_code = value,
-                    "protocol_name"           => info.protocol_name = value,
-                    "protocol_run_id"         => info.protocol_run_id = value,
-                    "sample_id"               => info.sample_id = value,
-                    "sequencing_kit"          => info.sequencing_kit = value,
-                    "sequencer_position"      => info.sequencer_position = value,
+                    "acquisition_id" => info.acquisition_id = value,
+                    "experiment_name" => info.experiment_name = value,
+                    "flow_cell_id" => info.flow_cell_id = value,
+                    "flow_cell_product_code" => info.flow_cell_product_code = value,
+                    "protocol_name" => info.protocol_name = value,
+                    "protocol_run_id" => info.protocol_run_id = value,
+                    "sample_id" => info.sample_id = value,
+                    "sequencing_kit" => info.sequencing_kit = value,
+                    "sequencer_position" => info.sequencer_position = value,
                     "sequencer_position_type" => info.sequencer_position_type = value,
-                    "software"                => info.software = value,
-                    "system_name"             => info.system_name = value,
-                    "system_type"             => info.system_type = value,
+                    "software" => info.software = value,
+                    "system_name" => info.system_name = value,
+                    "system_type" => info.system_type = value,
                     _ => debug!("pod5: unrecognised Utf8 column '{col}'"),
                 }
             }
@@ -199,7 +208,7 @@ fn extract_column(col: &str, array: &ArrayRef, info: &mut Pod5Info) {
                     if let Some(dt) = Utc.timestamp_millis_opt(a.value(i)).single() {
                         match col {
                             "acquisition_start_time" => info.acquisition_start_time = dt,
-                            "protocol_start_time"    => info.protocol_start_time = dt,
+                            "protocol_start_time" => info.protocol_start_time = dt,
                             _ => debug!("pod5: unrecognised Timestamp column '{col}'"),
                         }
                     }

+ 11 - 10
src/io/straglr.rs

@@ -12,8 +12,8 @@ use std::{
     str::FromStr,
 };
 
-use crate::positions::GenomeRange;
 use super::readers::get_reader;
+use crate::positions::GenomeRange;
 
 /// Represents a single STR locus genotyped by Straglr.
 ///
@@ -106,16 +106,12 @@ impl StraglrRow {
 
     /// Returns the maximum allele size in bp, or `None` if no alleles are present.
     pub fn max_allele_size(&self) -> Option<f64> {
-        self.allele_sizes()
-            .into_iter()
-            .max_by(f64::total_cmp)
+        self.allele_sizes().into_iter().max_by(f64::total_cmp)
     }
 
     /// Returns the maximum copy number across all alleles, or `None` if no alleles are present.
     pub fn max_copy_number(&self) -> Option<f64> {
-        self.copy_numbers()
-            .into_iter()
-            .max_by(f64::total_cmp)
+        self.copy_numbers().into_iter().max_by(f64::total_cmp)
     }
 
     /// Returns `true` if any allele has a copy number ≥ `threshold_cn`.
@@ -226,7 +222,12 @@ pub fn read_straglr_tsv(path: &str) -> anyhow::Result<Vec<StraglrRow>> {
                 // All reads at same locus share genotype; store first occurrence
                 locus_map.entry(key).or_insert(row.genotype);
             }
-            Err(e) => warn!("Failed to parse line {}: {} (error: {})", line_num + 1, line, e),
+            Err(e) => warn!(
+                "Failed to parse line {}: {} (error: {})",
+                line_num + 1,
+                line,
+                e
+            ),
         }
     }
 
@@ -316,11 +317,11 @@ mod tests {
         assert_eq!(row.locus_length(), 100);
         assert_eq!(row.motif_length(), 3);
         assert_eq!(row.allele_sizes(), vec![60.0, 90.0]);
-        
+
         let cns = row.copy_numbers();
         assert!((cns[0] - 20.0).abs() < 0.01);
         assert!((cns[1] - 30.0).abs() < 0.01);
-        
+
         assert!((row.max_allele_size().unwrap() - 90.0).abs() < 0.01);
         assert!((row.max_copy_number().unwrap() - 30.0).abs() < 0.01);
         assert!(row.is_expanded(25.0));

+ 9 - 3
src/io/tsv.rs

@@ -20,8 +20,8 @@
 //! }
 //! ```
 
-use std::io::{self, BufRead};
 use anyhow::Context;
+use std::io::{self, BufRead};
 
 /// A reusable delimited line buffer.
 ///
@@ -50,12 +50,18 @@ impl Default for TsvLine {
 impl TsvLine {
     /// Create a new `TsvLine` with the default `\t` delimiter.
     pub fn new() -> Self {
-        Self { buf: String::new(), delimiter: '\t' }
+        Self {
+            buf: String::new(),
+            delimiter: '\t',
+        }
     }
 
     /// Create a new `TsvLine` with a custom delimiter (e.g. `','` for CSV).
     pub fn with_delimiter(delimiter: char) -> Self {
-        Self { buf: String::new(), delimiter }
+        Self {
+            buf: String::new(),
+            delimiter,
+        }
     }
 
     /// Read the next line from `reader` into the internal buffer.

+ 8 - 3
src/lib.rs

@@ -132,11 +132,13 @@
 
 use std::sync::{Arc, Mutex};
 
+pub mod aligner;
 pub mod annotation;
 pub mod callers;
 pub mod collection;
 pub mod commands;
 pub mod config;
+pub mod de_novo;
 pub mod functions;
 pub mod helpers;
 pub mod io;
@@ -148,8 +150,6 @@ pub mod runners;
 pub mod scan;
 pub mod slurm_helpers;
 pub mod variant;
-pub mod de_novo;
-pub mod aligner;
 
 #[macro_use]
 extern crate lazy_static;
@@ -1531,7 +1531,12 @@ mod tests {
 
         let mut variants = variant_collection::Variants::load_from_file(&path)?;
         info!("All: {}", variants.len());
-        variants.retain(|v| *v.alteration_category().first().unwrap_or(&AlterationCategory::Other) == AlterationCategory::SNV);
+        variants.retain(|v| {
+            *v.alteration_category()
+                .first()
+                .unwrap_or(&AlterationCategory::Other)
+                == AlterationCategory::SNV
+        });
         info!("SNV: {}", variants.len());
         variants.in_place_merge();
         info!("SNV: {}", variants.len());

+ 0 - 1
src/math.rs

@@ -1,6 +1,5 @@
 use rayon::prelude::*;
 
-
 /// Filters outliers from a dataset using the Modified Z-Score method and returns the indices of the outliers.
 ///
 /// The Modified Z-Score is a robust statistical method for detecting outliers. It is calculated as:

+ 2 - 3
src/pipes/mod.rs

@@ -1,11 +1,10 @@
 use std::path::Path;
 
-use crate::{config::Config};
+use crate::config::Config;
 
 // pub mod somatic;
-pub mod somatic_slurm;
 pub mod somatic;
-
+pub mod somatic_slurm;
 
 pub trait Initialize: Sized {
     fn initialize(id: &str, config: &Config) -> anyhow::Result<Self>;

+ 11 - 10
src/positions.rs

@@ -87,9 +87,10 @@ impl GenomePosition {
 
         // UCSC / omics interval: chr:+:start-end (interbase)
         let interval_str = format!("{chrom}:+:{start}-{end}");
-        let interval: omics::coordinate::Interval<omics::coordinate::system::Interbase> = interval_str
-            .parse()
-            .with_context(|| format!("Invalid interval: {interval_str}"))?;
+        let interval: omics::coordinate::Interval<omics::coordinate::system::Interbase> =
+            interval_str
+                .parse()
+                .with_context(|| format!("Invalid interval: {interval_str}"))?;
 
         let lifted = match machine.liftover(interval) {
             Some(v) if !v.is_empty() => v[0].clone(),
@@ -781,18 +782,17 @@ pub fn overlaps_par(positions: &[&GenomePosition], ranges: &[&GenomeRange]) -> V
 /// assert_eq!(out, vec![GenomeRange { contig: 1, range: 150..200 }]);
 /// ```
 pub fn range_intersection_par(a: &[&GenomeRange], b: &[&GenomeRange]) -> Vec<GenomeRange> {
-    let (a_contigs, b_contigs) = rayon::join(
-        || extract_contig_indices(a),
-        || extract_contig_indices(b),
-    );
+    let (a_contigs, b_contigs) =
+        rayon::join(|| extract_contig_indices(a), || extract_contig_indices(b));
 
-    a_contigs.into_par_iter()
+    a_contigs
+        .into_par_iter()
         .filter_map(|(contig, a_start, a_end)| {
             let (b_start, b_end) = find_contig_indices(&b_contigs, contig)?;
 
             let a_ranges = &a[a_start..a_end];
             let b_ranges = &b[b_start..b_end];
-            
+
             let mut intersections = Vec::new();
             let (mut i, mut j) = (0, 0);
 
@@ -853,7 +853,8 @@ pub fn extract_contig_indices(ranges: &[&GenomeRange]) -> Vec<(u8, usize, usize)
 
 // Binary search to find contig indices in precomputed list
 pub fn find_contig_indices(contigs: &[(u8, usize, usize)], target: u8) -> Option<(usize, usize)> {
-    contigs.binary_search_by(|(c, _, _)| c.cmp(&target))
+    contigs
+        .binary_search_by(|(c, _, _)| c.cmp(&target))
         .ok()
         .map(|idx| (contigs[idx].1, contigs[idx].2))
 }

+ 32 - 10
src/scan/bin.rs

@@ -2,9 +2,14 @@ use std::collections::HashMap;
 
 use anyhow::Context;
 use log::{error, warn};
-use rust_htslib::bam::{HeaderView, IndexedReader, Read, Record, ext::BamRecordExtensions, record::Aux};
+use rust_htslib::bam::{
+    ext::BamRecordExtensions, record::Aux, HeaderView, IndexedReader, Read, Record,
+};
 
-use crate::io::{bam::{fb_inv_from_record, primary_record, primary_records}, tsv::{parse_csv_u32_into, parse_u32}};
+use crate::io::{
+    bam::{fb_inv_from_record, primary_record, primary_records},
+    tsv::{parse_csv_u32_into, parse_u32},
+};
 
 /// A genomic bin containing reads from a specific region.
 ///
@@ -415,25 +420,40 @@ pub fn parse_bin_record_into<'a>(
     contig_expected: &str,
 ) -> anyhow::Result<(u32, &'a [u32], &'a [u32])> {
     let get = |i: usize, name: &str| -> anyhow::Result<&str> {
-        fields.get(i).copied().ok_or_else(|| anyhow::anyhow!("missing field {name} (col {i})"))
+        fields
+            .get(i)
+            .copied()
+            .ok_or_else(|| anyhow::anyhow!("missing field {name} (col {i})"))
     };
 
     let contig = get(0, "contig")?;
-    anyhow::ensure!(contig == contig_expected, "unexpected contig {contig:?}, expected {contig_expected:?}");
+    anyhow::ensure!(
+        contig == contig_expected,
+        "unexpected contig {contig:?}, expected {contig_expected:?}"
+    );
 
     let start = parse_u32(get(1, "start")?).context("bad start")?;
-    let end   = parse_u32(get(2, "end")?).context("bad end")?;
-    anyhow::ensure!(end >= start, "invalid bin coordinates: end < start ({start} > {end})");
+    let end = parse_u32(get(2, "end")?).context("bad end")?;
+    anyhow::ensure!(
+        end >= start,
+        "invalid bin coordinates: end < start ({start} > {end})"
+    );
 
     parse_csv_u32_into(&mut buf.depths, get(9, "depths")?).context("parse depths")?;
-    parse_csv_u32_into(&mut buf.lowq,   get(10, "lowq")?).context("parse lowq")?;
+    parse_csv_u32_into(&mut buf.lowq, get(10, "lowq")?).context("parse lowq")?;
 
     anyhow::ensure!(
         (end - start + 1) as usize == buf.depths.len(),
         "bin width mismatch: {}..{} → width {}, depths len {}",
-        start, end, end - start + 1, buf.depths.len()
+        start,
+        end,
+        end - start + 1,
+        buf.depths.len()
+    );
+    anyhow::ensure!(
+        buf.depths.len() == buf.lowq.len(),
+        "depth/lowq len mismatch"
     );
-    anyhow::ensure!(buf.depths.len() == buf.lowq.len(), "depth/lowq len mismatch");
 
     Ok((start, &buf.depths, &buf.lowq))
 }
@@ -458,7 +478,9 @@ pub struct BinStats {
 impl BinStats {
     #[inline]
     pub fn mean_coverage_from_depths(&self) -> f64 {
-        if self.depths.is_empty() { return 0.0; }
+        if self.depths.is_empty() {
+            return 0.0;
+        }
         self.depths.iter().sum::<u32>() as f64 / self.depths.len() as f64
     }
 }

+ 4 - 3
src/scan/scan.rs

@@ -843,9 +843,10 @@ fn validate_count_file(
     let mut line = TsvLine::new();
     let mut n = 0usize;
 
-    while line.read(&mut rdr).with_context(|| {
-        format!("failed reading {path} around line {}", n + 1)
-    })? {
+    while line
+        .read(&mut rdr)
+        .with_context(|| format!("failed reading {path} around line {}", n + 1))?
+    {
         n += 1;
         let fields = line.split_fields();
 

+ 3 - 1
src/slurm_helpers.rs

@@ -217,7 +217,9 @@ pub fn max_cpus_per_task_shortq() -> anyhow::Result<u32> {
         let mut parts = line.split_whitespace();
         let cpus = parts.next().unwrap_or("");
         let state = parts.next().unwrap_or("").to_lowercase();
-        if state.contains("drain") { continue; }
+        if state.contains("drain") {
+            continue;
+        }
 
         // CPUS(A/I/O/T) → take idle `I` at index 1
         if let Some(i) = cpus.split('/').nth(1).and_then(|v| v.parse::<u32>().ok()) {

+ 1 - 2
src/variant/mod.rs

@@ -39,7 +39,6 @@
 //! # Ok::<(), anyhow::Error>(())
 //! ```
 
-pub mod vcf_variant;
 pub mod variant_collection;
 pub mod variants_stats;
-
+pub mod vcf_variant;