1 gadu atpakaļ · 9d84177b64
--- a/src/config.rs
+++ b/src/config.rs
@@ -67,6 +67,7 @@ pub struct Config {
 
				     pub nanomonsv_solo_output_dir: String,
			
 
				     pub nanomonsv_solo_passed_vcf: String,
			
 
				     pub somatic_pipe_force: bool,
			
 
				+    pub min_high_quality_depth: u32,
			
 
				 }
			
 
				 
			
 
				 // Here comes names that can't be changed from output of tools
			
@@ -187,6 +188,8 @@ impl Default for Config {
 
				             somatic_min_constit_depth: 5,
			
 
				             somatic_max_alt_constit: 1,
			
 
				             min_shannon_entropy: 1.0,
			
 
				+
			
 
				+            min_high_quality_depth: 14,
			
 
				         }
			
 
				     }
			
 
				 }
			
--- a/src/helpers.rs
+++ b/src/helpers.rs
@@ -319,35 +319,36 @@ where
 
				     bins
			
 
				 }
			
 
				 
			
 
				-fn aggregate_data(data: &[(u128, u32)], num_bins: usize) -> Vec<(u32, u32)> {
			
 
				-    if data.is_empty() || num_bins == 0 {
			
 
				-        return vec![];
			
 
				-    }
			
 
				-
			
 
				-    let bin_size = ((data.len() as f64) / (num_bins as f64)).ceil() as usize;
			
 
				-
			
 
				-    (0..num_bins)
			
 
				-        .map(|i| {
			
 
				-            let start = i * bin_size;
			
 
				-            let end = (start + bin_size).min(data.len()); // Ensure `end` does not exceed `data.len()`
			
 
				-
			
 
				-            // If `start` is out of bounds, return (0, 0)
			
 
				-            if start >= data.len() {
			
 
				-                return (0, 0);
			
 
				-            }
			
 
				-
			
 
				-            let bin = &data[start..end];
			
 
				-
			
 
				-            let sum_x: u128 = bin.iter().map(|&(x, _)| x).sum();
			
 
				-            let count = bin.len() as u128;
			
 
				-            let mean_x = (sum_x / count) as u32; // Rounded down to nearest u32
			
 
				-
			
 
				-            let sum_n: u32 = bin.iter().map(|&(_, n)| n).sum();
			
 
				-
			
 
				-            (mean_x, sum_n)
			
 
				-        })
			
 
				-        .collect()
			
 
				-}
			
 
				+// fn aggregate_data(data: &[(u128, u32)], num_bins: usize) -> Vec<(u32, u32)> {
			
 
				+//     if data.is_empty() || num_bins == 0 {
			
 
				+//         return vec![];
			
 
				+//     }
			
 
				+//
			
 
				+//     let bin_size = ((data.len() as f64) / (num_bins as f64)).ceil() as usize;
			
 
				+//
			
 
				+//     (0..num_bins)
			
 
				+//         .map(|i| {
			
 
				+//             let start = i * bin_size;
			
 
				+//             let end = (start + bin_size).min(data.len()); // Ensure `end` does not exceed `data.len()`
			
 
				+//
			
 
				+//             // If `start` is out of bounds, return (0, 0)
			
 
				+//             if start >= data.len() {
			
 
				+//                 return (0, 0);
			
 
				+//             }
			
 
				+//
			
 
				+//             let bin = &data[start..end];
			
 
				+//
			
 
				+//             let sum_x: u128 = bin.iter().map(|&(x, _)| x).sum();
			
 
				+//             let count = bin.len() as u128;
			
 
				+//             let mean_x = (sum_x / count) as u32; // Rounded down to nearest u32
			
 
				+//
			
 
				+//             let sum_n: u32 = bin.iter().map(|&(_, n)| n).sum();
			
 
				+//
			
 
				+//             (mean_x, sum_n)
			
 
				+//         })
			
 
				+//         .collect()
			
 
				+// }
			
 
				+//
			
 
				 
			
 
				 pub fn app_storage_dir() -> anyhow::Result<PathBuf> {
			
 
				     let app_name = env!("CARGO_PKG_NAME");
			
--- a/src/io/gff.rs
+++ b/src/io/gff.rs
@@ -5,6 +5,15 @@ use crate::{config::Config, positions::GenomeRange};
 
				 
			
 
				 use super::readers::get_gz_reader;
			
 
				 
			
 
				+/// # Arguments
			
 
				+/// * `feature_type` - GFF feature type to filter (e.g., "gene", "exon", "CDS", "transcript")
			
 
				+/// * `config` - Application configuration containing refseq_gff path
			
 
				+/// 
			
 
				+/// # Returns
			
 
				+/// Vec of genomic ranges matching specified feature type
			
 
				+/// 
			
 
				+/// # Errors
			
 
				+/// Returns anyhow::Error for I/O failures and parsing errors
			
 
				 pub fn features_ranges(feature_type: &str, config: &Config) -> anyhow::Result<Vec<GenomeRange>> {
			
 
				     let path = &config.refseq_gff;
			
 
				     let mut reader = get_gz_reader(path)
			
@@ -13,7 +22,10 @@ pub fn features_ranges(feature_type: &str, config: &Config) -> anyhow::Result<Ve
 
				 
			
 
				     let mut res = Vec::new();
			
 
				     for result in reader.record_bufs() {
			
 
				-        let record = result?;
			
 
				+        let record = result.with_context(|| {
			
 
				+            format!("Error reading GFF file {path}")
			
 
				+        })?;
			
 
				+
			
 
				         let ty = record.ty();
			
 
				         if ty != feature_type {
			
 
				             continue;
			
--- a/src/io/mod.rs
+++ b/src/io/mod.rs
@@ -7,3 +7,4 @@ pub mod fasta;
 
				 pub mod pod5_footer_generated;
			
 
				 pub mod gff;
			
 
				 pub mod bam;
			
 
				+pub mod writers;
			
--- a/src/io/writers.rs
+++ b/src/io/writers.rs
@@ -0,0 +1,26 @@
 
				+use std::{
			
 
				+    fs::{self, File, OpenOptions},
			
 
				+    path::Path,
			
 
				+};
			
 
				+
			
 
				+use anyhow::Context;
			
 
				+use bgzip::{BGZFWriter, Compression};
			
 
				+
			
 
				+pub fn get_gz_writer(path: &str, force: bool) -> anyhow::Result<BGZFWriter<File>> {
			
 
				+    if !path.ends_with(".gz") {
			
 
				+        anyhow::bail!("The file should end with gz");
			
 
				+    }
			
 
				+
			
 
				+    if force && Path::new(path).exists() {
			
 
				+        fs::remove_file(path).with_context(|| anyhow::anyhow!("Failed to remove file: {path}"))?;
			
 
				+    }
			
 
				+
			
 
				+    let file = OpenOptions::new()
			
 
				+        .write(true) // Open the file for writing
			
 
				+        .create_new(true)
			
 
				+        .truncate(true)
			
 
				+        .open(path)
			
 
				+        .with_context(|| anyhow::anyhow!("failed to open the file: {path}"))?;
			
 
				+
			
 
				+    Ok(BGZFWriter::new(file, Compression::default()))
			
 
				+}
			
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -27,7 +27,7 @@ lazy_static! {
 
				 
			
 
				 #[cfg(test)]
			
 
				 mod tests {
			
 
				-    use std::{collections::HashMap, fs, ops::Index, path::Path};
			
 
				+    use std::{collections::HashMap, fs, path::Path};
			
 
				 
			
 
				     use annotation::{vep::{VepLine, VEP}, Annotations};
			
 
				     use callers::{nanomonsv::nanomonsv_create_pon, savana::{Savana, SavanaReadCounts}, severus::{Severus, SeverusSolo}};
			
@@ -45,7 +45,7 @@ mod tests {
 
				 
			
 
				     use self::{collection::pod5::{FlowCellCase, Pod5Collection}, commands::dorado, config::Config};
			
 
				     use super::*;
			
 
				-    use crate::{annotation::{vep::VepImpact, Annotation}, callers::{clairs::ClairS, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVSolo}, savana::{CNSegment, SavanaCN}, Callers}, collection::{bam, pod5::{scan_archive, FlowCells}, run_tasks, vcf::VcfCollection, Collections, CollectionsConfig}, commands::dorado::Dorado, io::{dict::read_dict, gff::features_ranges}, pipes::somatic::const_stats, positions::{merge_overlapping_genome_ranges, par_overlaps}, scan::scan::{par_whole_scan, somatic_scan}, variant::{variant::AlterationCategory, variant_collection::VariantsStats}};
			
 
				+    use crate::{annotation::Annotation, callers::{clairs::ClairS, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVSolo}, savana::{CNSegment, SavanaCN}, Callers}, collection::{bam, pod5::{scan_archive, FlowCells}, run_tasks, vcf::VcfCollection, Collections, CollectionsConfig}, commands::dorado::Dorado, io::{dict::read_dict, gff::features_ranges}, pipes::somatic::const_stats, positions::{merge_overlapping_genome_ranges, par_overlaps}, scan::scan::{par_whole_scan, somatic_scan}, variant::{variant::AlterationCategory, variants_stats::{self, VariantsStats}}};
			
 
				 
			
 
				     // export RUST_LOG="debug"
			
 
				     fn init() {
			
@@ -822,17 +822,17 @@ Ok(())
 
				         let merged = merge_overlapping_genome_ranges(&r);
			
 
				         
			
 
				         let variants = variant_collection::Variants::load_from_file(&path)?;
			
 
				-        let full = variant_collection::somatic_rates(&variants.data, &merged, &config);
			
 
				+        let full = variants_stats::somatic_rates(&variants.data, &merged, &config);
			
 
				         info!("{full:#?}");
			
 
				 
			
 
				         let restrained: Vec<variant_collection::Variant> = variants.data.iter().filter(|v| v.vcf_variants.len() >= 2)
			
 
				             .cloned().collect();
			
 
				-        let min_2 = variant_collection::somatic_rates(&restrained, &merged, &config);
			
 
				+        let min_2 = variants_stats::somatic_rates(&restrained, &merged, &config);
			
 
				         info!("{min_2:#?}");
			
 
				 
			
 
				         let restrained: Vec<variant_collection::Variant> = restrained.iter().filter(|v| v.vcf_variants.len() >= 3)
			
 
				             .cloned().collect();
			
 
				-        let min_3 = variant_collection::somatic_rates(&restrained, &merged, &config);
			
 
				+        let min_3 = variants_stats::somatic_rates(&restrained, &merged, &config);
			
 
				         info!("{min_3:#?}");
			
 
				 
			
 
				 
			
@@ -869,6 +869,16 @@ Ok(())
 
				         Ok(())
			
 
				     }
			
 
				 
			
 
				-
			
 
				-
			
 
				+    #[test]
			
 
				+    fn quality_ranges() -> anyhow::Result<()> {
			
 
				+        init();
			
 
				+        let r = variants_stats::high_depth_somatic("ADJAGBA", &Config::default())?;
			
 
				+        r.iter().for_each(|e| {
			
 
				+            let contig = e.key();
			
 
				+            let ranges = e.value();
			
 
				+            let su = ranges.iter().map(|range| range.range.len()).sum::<usize>();
			
 
				+            info!("{contig}: {su} high depth bases");
			
 
				+        });
			
 
				+        Ok(())
			
 
				+    }
			
 
				 }
			
--- a/src/math.rs
+++ b/src/math.rs
@@ -63,6 +63,7 @@ pub fn filter_outliers_modified_z_score_with_indices(
 
				     indices
			
 
				         .into_iter()
			
 
				         .zip(data.iter())
			
 
				+        .par_bridge()
			
 
				         .filter_map(|(index, &x)| {
			
 
				             // Calculate the Modified Z-Score
			
 
				             let modified_z_score = 0.6745 * (x - median).abs() / mad;
			
--- a/src/pipes/somatic.rs
+++ b/src/pipes/somatic.rs
@@ -20,7 +20,8 @@ use crate::{
 
				     runners::Run,
			
 
				     variant::{
			
 
				         variant::{load_variants, run_variants, CallerBox},
			
 
				-        variant_collection::{ExternalAnnotation, VariantCollection, Variants, VariantsStats},
			
 
				+        variant_collection::{ExternalAnnotation, VariantCollection, Variants},
			
 
				+        variants_stats::VariantsStats,
			
 
				     },
			
 
				 };
			
 
				 
			
--- a/src/scan/bin.rs
+++ b/src/scan/bin.rs
@@ -1,6 +1,7 @@
 
				 use std::collections::HashMap;
			
 
				 
			
 
				 use anyhow::Context;
			
 
				+use log::debug;
			
 
				 use rust_htslib::bam::{ext::BamRecordExtensions, record::Aux, IndexedReader, Read, Record};
			
 
				 
			
 
				 use crate::io::bam::{primary_record, primary_records};
			
@@ -21,11 +22,13 @@ pub struct Bin {
 
				     /// Map of read query names to their BAM records
			
 
				     pub reads_store: HashMap<Vec<u8>, Record>,
			
 
				     /// Indexed BAM reader for fetching additional records
			
 
				-    pub bam_reader: IndexedReader,
			
 
				+    // pub bam_reader: IndexedReader,
			
 
				     /// Average read length within this bin
			
 
				-    pub reads_mean_len: u32,
			
 
				+    // pub reads_mean_len: u32,
			
 
				     /// Number of reads filtered due to low mapping quality
			
 
				     pub n_low_mapq: u32,
			
 
				+    /// Vec of depths
			
 
				+    pub depths: Vec<u32>,
			
 
				 }
			
 
				 
			
 
				 impl Bin {
			
@@ -49,10 +52,15 @@ impl Bin {
 
				     /// let bin = Bin::new("sample.bam", "chr1", 1000000, 1000)?;
			
 
				     /// println!("Loaded {} reads", bin.n_reads());
			
 
				     /// ```
			
 
				-    pub fn new(bam_path: &str, contig: &str, start: u32, length: u32) -> anyhow::Result<Self> {
			
 
				-        const DEFAULT_MAPQ: u8 = 50;
			
 
				-        let mut bam_reader = IndexedReader::from_path(bam_path)
			
 
				-            .with_context(|| format!("Can't open BAM file: {}", bam_path))?;
			
 
				+    pub fn new(
			
 
				+        bam_reader: &mut IndexedReader,
			
 
				+        contig: &str,
			
 
				+        start: u32,
			
 
				+        length: u32,
			
 
				+        min_mapq: u8,
			
 
				+    ) -> anyhow::Result<Self> {
			
 
				+        // let mut bam_reader = IndexedReader::from_path(bam_path)
			
 
				+        //     .with_context(|| format!("Can't open BAM file: {}", bam_path))?;
			
 
				 
			
 
				         // Calculate inclusive end position
			
 
				         let end = start + length - 1;
			
@@ -66,44 +74,69 @@ impl Bin {
 
				 
			
 
				         let mut reads_store: HashMap<Vec<u8>, Record> = HashMap::new();
			
 
				         let mut n_low_mapq = 0;
			
 
				-        let mut lengths = Vec::new();
			
 
				+        // let mut lengths = Vec::new();
			
 
				 
			
 
				         // Process all records in the region
			
 
				-        for read_result in bam_reader.records() {
			
 
				-            let record = read_result.context("Error while parsing BAM record")?;
			
 
				-
			
 
				-            // Skip reads with low mapping quality
			
 
				-            if record.mapq() < DEFAULT_MAPQ {
			
 
				-                n_low_mapq += 1;
			
 
				+        // for read_result in bam_reader.records() {
			
 
				+        //     let record = read_result.context("Error while parsing BAM record")?;
			
 
				+        //
			
 
				+        //     // Skip reads with low mapping quality
			
 
				+        //     if record.mapq() < min_mapq {
			
 
				+        //         n_low_mapq += 1;
			
 
				+        //         continue;
			
 
				+        //     }
			
 
				+        //
			
 
				+        //     // Store the read length
			
 
				+        //     lengths.push(
			
 
				+        //         record
			
 
				+        //             .reference_end()
			
 
				+        //             .saturating_sub(record.reference_start()),
			
 
				+        //     );
			
 
				+        //
			
 
				+        //     // Store the record by query name
			
 
				+        //     reads_store.insert(record.qname().to_vec(), record);
			
 
				+        // }
			
 
				+        // Calculate mean read length, handling the empty case
			
 
				+        // let reads_mean_len = if lengths.is_empty() {
			
 
				+        //     0
			
 
				+        // } else {
			
 
				+        //     (lengths.iter().sum::<i64>() as f64 / lengths.len() as f64) as u32
			
 
				+        // };
			
 
				+
			
 
				+        let mut depths = Vec::new();
			
 
				+        for pileup in bam_reader.pileup() {
			
 
				+            let pileup = pileup?;
			
 
				+            let pos = pileup.pos();
			
 
				+
			
 
				+            if pos < start {
			
 
				                 continue;
			
 
				+            } else if pos > end {
			
 
				+                break;
			
 
				             }
			
 
				 
			
 
				-            // Store the read length
			
 
				-            lengths.push(
			
 
				-                record
			
 
				-                    .reference_end()
			
 
				-                    .saturating_sub(record.reference_start()),
			
 
				-            );
			
 
				-
			
 
				-            // Store the record by query name
			
 
				-            reads_store.insert(record.qname().to_vec(), record);
			
 
				+            let depth = pileup
			
 
				+                .alignments()
			
 
				+                .filter(|a| {
			
 
				+                    let record = a.record();
			
 
				+                    if record.mapq() < min_mapq {
			
 
				+                        n_low_mapq += 1;
			
 
				+                        return false;
			
 
				+                    }
			
 
				+                    reads_store.insert(record.qname().to_vec(), record);
			
 
				+
			
 
				+                    true
			
 
				+                })
			
 
				+                .count();
			
 
				+            depths.push(depth as u32);
			
 
				         }
			
 
				 
			
 
				-        // Calculate mean read length, handling the empty case
			
 
				-        let reads_mean_len = if lengths.is_empty() {
			
 
				-            0
			
 
				-        } else {
			
 
				-            (lengths.iter().sum::<i64>() as f64 / lengths.len() as f64) as u32
			
 
				-        };
			
 
				-
			
 
				         Ok(Bin {
			
 
				             contig: contig.to_string(),
			
 
				             start,
			
 
				             end,
			
 
				             reads_store,
			
 
				-            bam_reader,
			
 
				             n_low_mapq,
			
 
				-            reads_mean_len,
			
 
				+            depths,
			
 
				         })
			
 
				     }
			
 
				 
			
@@ -125,11 +158,11 @@ impl Bin {
 
				     /// # Returns
			
 
				     ///
			
 
				     /// A vector of primary alignment records for reads with supplementary alignments.
			
 
				-    pub fn sa_primary(&mut self) -> Vec<Record> {
			
 
				+    pub fn sa_primary(&mut self, bam_reader: &mut IndexedReader) -> Vec<Record> {
			
 
				         self.reads_store
			
 
				             .values()
			
 
				             .filter(|record| matches!(record.aux(b"SA"), Ok(Aux::String(_))))
			
 
				-            .map(|record| primary_record(&mut self.bam_reader, record.clone()))
			
 
				+            .map(|record| primary_record(bam_reader, record.clone()))
			
 
				             .collect()
			
 
				     }
			
 
				 
			
@@ -255,7 +288,7 @@ impl Bin {
 
				     /// # Returns
			
 
				     ///
			
 
				     /// A vector of primary alignment records.
			
 
				-    pub fn se_primary(&mut self, pos: u32) -> Vec<Record> {
			
 
				+    pub fn se_primary(&mut self, pos: u32, bam_reader: &mut IndexedReader) -> Vec<Record> {
			
 
				         let records: Vec<Record> = self
			
 
				             .reads_store
			
 
				             .values()
			
@@ -265,7 +298,7 @@ impl Bin {
 
				             .cloned()
			
 
				             .collect();
			
 
				 
			
 
				-        primary_records(&mut self.bam_reader, records)
			
 
				+        primary_records(bam_reader, records)
			
 
				     }
			
 
				 
			
 
				     /// Gets primary alignments for reads that start at a specific position.
			
@@ -277,11 +310,11 @@ impl Bin {
 
				     /// # Returns
			
 
				     ///
			
 
				     /// A vector of primary alignment records.
			
 
				-    pub fn s_primary(&mut self, pos: u32) -> Vec<Record> {
			
 
				+    pub fn s_primary(&mut self, pos: u32, bam_reader: &mut IndexedReader) -> Vec<Record> {
			
 
				         self.reads_store
			
 
				             .values()
			
 
				             .filter(|record| record.reference_start() as u32 == pos)
			
 
				-            .map(|record| primary_record(&mut self.bam_reader, record.clone()))
			
 
				+            .map(|record| primary_record(bam_reader, record.clone()))
			
 
				             .collect()
			
 
				     }
			
 
				 
			
@@ -294,11 +327,11 @@ impl Bin {
 
				     /// # Returns
			
 
				     ///
			
 
				     /// A vector of primary alignment records.
			
 
				-    pub fn e_primary(&mut self, pos: u32) -> Vec<Record> {
			
 
				+    pub fn e_primary(&mut self, pos: u32, bam_reader: &mut IndexedReader) -> Vec<Record> {
			
 
				         self.reads_store
			
 
				             .values()
			
 
				             .filter(|record| record.reference_end() as u32 == pos)
			
 
				-            .map(|record| primary_record(&mut self.bam_reader, record.clone()))
			
 
				+            .map(|record| primary_record(bam_reader, record.clone()))
			
 
				             .collect()
			
 
				     }
			
 
				 
			
@@ -339,5 +372,8 @@ impl Bin {
 
				         // Divide by the bin length to get average coverage
			
 
				         total_bases_covered / bin_length
			
 
				     }
			
 
				-}
			
 
				 
			
 
				+    pub fn mean_coverage_from_depths(&self) -> f64 {
			
 
				+        self.depths.iter().sum::<u32>() as f64 / self.depths.len() as f64
			
 
				+    }
			
 
				+}
			
--- a/src/scan/mod.rs
+++ b/src/scan/mod.rs
@@ -1,4 +1,2 @@
 
				-
			
 
				 pub mod bin;
			
 
				 pub mod scan;
			
 
				-
			
--- a/src/scan/scan.rs
+++ b/src/scan/scan.rs
@@ -1,14 +1,16 @@
 
				-use std::{fmt, fs, fs::File, io::Write, sync::Mutex};
			
 
				+use std::{fmt, fs, io::Write, sync::Mutex};
			
 
				 
			
 
				+use anyhow::Context;
			
 
				 use log::{debug, error, info};
			
 
				 use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator};
			
 
				 use rayon::{
			
 
				     iter::{IntoParallelIterator, ParallelIterator},
			
 
				     slice::ParallelSliceMut,
			
 
				 };
			
 
				+use rust_htslib::bam::IndexedReader;
			
 
				 
			
 
				+use crate::io::writers::get_gz_writer;
			
 
				 use crate::math::filter_outliers_modified_z_score_with_indices;
			
 
				-use crate::positions::{contig_to_num, GenomeRange, GetGenomeRange};
			
 
				 use crate::{config::Config, io::dict::read_dict, scan::bin::Bin};
			
 
				 
			
 
				 /// Represents a count of reads in a genomic bin, including various metrics and outlier information.
			
@@ -32,6 +34,7 @@ pub struct BinCount {
 
				     pub ratio_end: f64,
			
 
				     /// Optional vector of outlier types for this bin.
			
 
				     pub outlier: Option<Vec<BinOutlier>>,
			
 
				+    pub depths: Vec<u32>,
			
 
				 }
			
 
				 
			
 
				 impl From<&Bin> for BinCount {
			
@@ -52,11 +55,12 @@ impl From<&Bin> for BinCount {
 
				             start: bin.start,
			
 
				             end: bin.end,
			
 
				             n_reads: n_reads as u32,
			
 
				-            coverage: bin.mean_coverage(),
			
 
				+            coverage: bin.mean_coverage_from_depths(),
			
 
				             ratio_sa: n_sa as f64 / n_reads_float,
			
 
				             ratio_start: n_start as f64 / n_reads_float,
			
 
				             ratio_end: n_end as f64 / n_reads_float,
			
 
				             outlier: None,
			
 
				+            depths: bin.depths.clone(),
			
 
				         }
			
 
				     }
			
 
				 }
			
@@ -68,7 +72,7 @@ impl BinCount {
 
				     /// A `String` representing the `BinCount` as a TSV row.
			
 
				     pub fn to_tsv_row(&self) -> String {
			
 
				         format!(
			
 
				-            "{}\t{}\t{}\t{}\t{:.6}\t{:.6}\t{:.6}\t{:.6}\t{}",
			
 
				+            "{}\t{}\t{}\t{}\t{:.6}\t{:.6}\t{:.6}\t{:.6}\t{}\t{}",
			
 
				             self.contig,
			
 
				             self.start,
			
 
				             self.end,
			
@@ -84,7 +88,12 @@ impl BinCount {
 
				                     .map(|e| e.to_string())
			
 
				                     .collect::<Vec<String>>()
			
 
				                     .join(", "))
			
 
				-                .unwrap_or_default()
			
 
				+                .unwrap_or_default(),
			
 
				+            self.depths
			
 
				+                .iter()
			
 
				+                .map(|e| e.to_string())
			
 
				+                .collect::<Vec<String>>()
			
 
				+                .join(",")
			
 
				         )
			
 
				     }
			
 
				 
			
@@ -104,7 +113,7 @@ impl BinCount {
 
				     /// - The outlier field is not in the expected format.
			
 
				     pub fn from_tsv_row(row: &str) -> anyhow::Result<Self> {
			
 
				         let fields: Vec<&str> = row.split('\t').collect();
			
 
				-        if fields.len() != 9 {
			
 
				+        if fields.len() != 10 {
			
 
				             anyhow::bail!("Invalid number of fields in TSV row");
			
 
				         }
			
 
				 
			
@@ -124,6 +133,15 @@ impl BinCount {
 
				             None
			
 
				         };
			
 
				 
			
 
				+        let depths = fields[9]
			
 
				+            .split(',')
			
 
				+            .map(|s| {
			
 
				+                s.trim() // Remove any whitespace around numbers
			
 
				+                    .parse::<u32>()
			
 
				+                    .with_context(|| format!("Failed to parse '{}' as u32", s))
			
 
				+            })
			
 
				+            .collect::<anyhow::Result<Vec<u32>>>()?;
			
 
				+
			
 
				         Ok(BinCount {
			
 
				             contig: fields[0].to_string(),
			
 
				             start: fields[1].parse()?,
			
@@ -134,6 +152,7 @@ impl BinCount {
 
				             ratio_start: fields[6].parse()?,
			
 
				             ratio_end: fields[7].parse()?,
			
 
				             outlier,
			
 
				+            depths,
			
 
				         })
			
 
				     }
			
 
				 }
			
@@ -144,7 +163,6 @@ impl BinCount {
 
				 //     }
			
 
				 // }
			
 
				 
			
 
				-
			
 
				 /// Represents types of outliers that can be detected in a genomic bin.
			
 
				 #[derive(Debug, Clone)]
			
 
				 pub enum BinOutlier {
			
@@ -231,13 +249,9 @@ pub fn par_whole_scan(out_dir: &str, bam_path: &str, config: &Config) -> anyhow:
 
				     let bin_size = config.count_bin_size;
			
 
				     let chunk_n_bin = config.count_n_chunks;
			
 
				     info!("Starting whole genome scan for {bam_path}, with bin size of {bin_size} nt and by chunks of {chunk_n_bin} bins.");
			
 
				-    fs::create_dir(out_dir)?;
			
 
				+    fs::create_dir_all(out_dir)?;
			
 
				 
			
 
				     for (contig, length) in read_dict(&config.dict_file)? {
			
 
				-        if contig.as_str() == "chrM" {
			
 
				-            continue;
			
 
				-        }
			
 
				-
			
 
				         let n_bin = length / bin_size;
			
 
				         // Calculate number of chunks using ceiling division
			
 
				         let n_chunks = n_bin.div_ceil(chunk_n_bin);
			
@@ -259,6 +273,9 @@ pub fn par_whole_scan(out_dir: &str, bam_path: &str, config: &Config) -> anyhow:
 
				                 // Calculate number of bins in this chunk with ceiling division
			
 
				                 let n_bins_in_chunk = chunk_length.div_ceil(bin_size);
			
 
				 
			
 
				+                let mut bam_reader = IndexedReader::from_path(bam_path)
			
 
				+                    .with_context(|| format!("Can't open BAM file: {}", bam_path)).unwrap();
			
 
				+
			
 
				                 // Process each bin in the chunk
			
 
				                 (0..n_bins_in_chunk)
			
 
				                     // .into_iter()
			
@@ -267,7 +284,13 @@ pub fn par_whole_scan(out_dir: &str, bam_path: &str, config: &Config) -> anyhow:
 
				                         // Ensure we don't exceed remaining length
			
 
				                         let bin_length = std::cmp::min(bin_size, chunk_length - j * bin_size);
			
 
				                         // debug!("chunk start:{chunk_start}, length: {chunk_length}, n_bins: {n_bins_in_chunk}, first bin start: {bin_start} bin length: {bin_length}");
			
 
				-                        match Bin::new(bam_path, &contig, bin_start, bin_length) {
			
 
				+                        match Bin::new(
			
 
				+                            &mut bam_reader,
			
 
				+                            &contig,
			
 
				+                            bin_start,
			
 
				+                            bin_length,
			
 
				+                            config.bam_min_mapq,
			
 
				+                        ) {
			
 
				                             Ok(bin) => Some(BinCount::from(&bin)),
			
 
				                             Err(e) => {
			
 
				                                 error!("Failed to get Bin: {e}");
			
@@ -285,9 +308,11 @@ pub fn par_whole_scan(out_dir: &str, bam_path: &str, config: &Config) -> anyhow:
 
				         debug!("Scan {contig}, computing outliers");
			
 
				         fill_outliers(&mut bins);
			
 
				 
			
 
				-        let out_file = format!("{out_dir}/{contig}_count.tsv");
			
 
				+        let out_file = format!("{out_dir}/{contig}_count.tsv.gz");
			
 
				         debug!("Scan {contig}, writing file");
			
 
				-        let mut file = File::create(out_file)?;
			
 
				+
			
 
				+        let mut file = get_gz_writer(&out_file, true)
			
 
				+            .with_context(|| anyhow::anyhow!("failed to open the file: {out_file}"))?;
			
 
				         for bin in bins {
			
 
				             writeln!(file, "{}", bin.to_tsv_row())?;
			
 
				         }
			
@@ -342,51 +367,38 @@ pub fn par_whole_scan(out_dir: &str, bam_path: &str, config: &Config) -> anyhow:
 
				 /// // After this call, some BinCount objects may have their outlier field populated
			
 
				 /// ```
			
 
				 pub fn fill_outliers(bin_counts: &mut [BinCount]) {
			
 
				-    let bin_counts = Mutex::new(bin_counts);
			
 
				-
			
 
				-    fn get_ratio_sa(c: &BinCount) -> f64 {
			
 
				-        c.ratio_sa
			
 
				-    }
			
 
				-    fn get_ratio_start(c: &BinCount) -> f64 {
			
 
				-        c.ratio_start
			
 
				-    }
			
 
				-    fn get_ratio_end(c: &BinCount) -> f64 {
			
 
				-        c.ratio_end
			
 
				-    }
			
 
				-    type OutlierTypeInfo = (fn(&BinCount) -> f64, BinOutlier);
			
 
				+    let intermediate: Vec<_> = bin_counts
			
 
				+        .par_iter()
			
 
				+        .enumerate()
			
 
				+        .map(|(i, c)| (i, [c.ratio_sa, c.ratio_start, c.ratio_end]))
			
 
				+        .collect();
			
 
				 
			
 
				-    let outlier_types: [OutlierTypeInfo; 3] = [
			
 
				-        (get_ratio_sa, BinOutlier::SA),
			
 
				-        (get_ratio_start, BinOutlier::Start),
			
 
				-        (get_ratio_end, BinOutlier::End),
			
 
				-    ];
			
 
				+    let outlier_types = [BinOutlier::SA, BinOutlier::Start, BinOutlier::End];
			
 
				 
			
 
				-    for (get_ratio, outlier_type) in outlier_types.iter() {
			
 
				-        let (indices, ratios): (Vec<usize>, Vec<f64>) = bin_counts
			
 
				-            .lock()
			
 
				-            .unwrap()
			
 
				-            .par_iter()
			
 
				-            .enumerate()
			
 
				-            .filter_map(|(i, c)| {
			
 
				-                let ratio = get_ratio(c);
			
 
				-                if !ratio.is_nan() {
			
 
				-                    Some((i, ratio))
			
 
				-                } else {
			
 
				-                    None
			
 
				-                }
			
 
				-            })
			
 
				-            .unzip();
			
 
				+    let outliers: Vec<(usize, BinOutlier)> = outlier_types
			
 
				+        .iter()
			
 
				+        .enumerate()
			
 
				+        .flat_map(|(ratio_idx, outlier_type)| {
			
 
				+            let (indices, ratios): (Vec<_>, Vec<_>) = intermediate
			
 
				+                .par_iter()
			
 
				+                .filter_map(|(i, ratio_array)| {
			
 
				+                    let ratio = ratio_array[ratio_idx];
			
 
				+                    (!ratio.is_nan()).then_some((*i, ratio))
			
 
				+                })
			
 
				+                .unzip();
			
 
				 
			
 
				-        let outlier_indices = filter_outliers_modified_z_score_with_indices(&ratios, indices);
			
 
				+            filter_outliers_modified_z_score_with_indices(&ratios, indices)
			
 
				+                .into_iter()
			
 
				+                .map(move |i| (i, outlier_type.clone()))
			
 
				+        })
			
 
				+        .collect();
			
 
				 
			
 
				-        outlier_indices.par_iter().for_each(|&i| {
			
 
				-            let mut bin_counts = bin_counts.lock().unwrap();
			
 
				-            bin_counts[i]
			
 
				-                .outlier
			
 
				-                .get_or_insert_with(Vec::new)
			
 
				-                .push(outlier_type.clone());
			
 
				-        });
			
 
				-    }
			
 
				+    outliers.iter().for_each(|(i, outlier_type)| {
			
 
				+        bin_counts[*i]
			
 
				+            .outlier
			
 
				+            .get_or_insert_with(Vec::new)
			
 
				+            .push(outlier_type.clone());
			
 
				+    });
			
 
				 }
			
 
				 
			
 
				 /// Performs a somatic scan for a given sample ID by analyzing both normal and tumoral BAM files.
			
@@ -427,3 +439,4 @@ pub fn somatic_scan(id: &str, config: &Config) -> anyhow::Result<()> {
 
				         config,
			
 
				     )
			
 
				 }
			
 
				+
			
--- a/src/variant/mod.rs
+++ b/src/variant/mod.rs
@@ -1,3 +1,4 @@
 
				 pub mod variant;
			
 
				 pub mod variant_collection;
			
 
				+pub mod variants_stats;
			
 
				 
			
--- a/src/variant/variant_collection.rs
+++ b/src/variant/variant_collection.rs
@@ -794,214 +794,6 @@ impl Variants {
 
				     }
			
 
				 }
			
 
				 
			
 
				-#[derive(Debug, Clone, Serialize, Deserialize)]
			
 
				-pub struct VariantsStats {
			
 
				-    pub n: u32,
			
 
				-    #[serde(serialize_with = "serialize_dashmap_sort")]
			
 
				-    pub alteration_categories: DashMap<String, u32>,
			
 
				-    #[serde(serialize_with = "serialize_dashmap_sort")]
			
 
				-    pub cosmic: DashMap<u64, u32>,
			
 
				-    #[serde(serialize_with = "serialize_dashmap_sort")]
			
 
				-    pub n_alts: DashMap<u32, u32>,
			
 
				-    #[serde(serialize_with = "serialize_dashmap_sort")]
			
 
				-    pub depths: DashMap<u32, u32>,
			
 
				-    #[serde(serialize_with = "serialize_dashmap_sort")]
			
 
				-    pub vafs: DashMap<OrderedFloat<f32>, u32>,
			
 
				-    #[serde(serialize_with = "serialize_dashmap_sort")]
			
 
				-    pub vep_impact: DashMap<String, u32>,
			
 
				-    #[serde(serialize_with = "serialize_dashmap_sort")]
			
 
				-    pub consequences: DashMap<String, u32>,
			
 
				-    #[serde(serialize_with = "serialize_dashmap_sort")]
			
 
				-    pub genes: DashMap<String, u32>,
			
 
				-    pub n_gnomad: usize,
			
 
				-    pub gnomad: Vec<(String, Vec<(f64, usize)>)>,
			
 
				-}
			
 
				-
			
 
				-use serde::Serializer;
			
 
				-
			
 
				-pub fn serialize_dashmap_sort<S, T>(
			
 
				-    data: &DashMap<T, u32>,
			
 
				-    serializer: S,
			
 
				-) -> Result<S::Ok, S::Error>
			
 
				-where
			
 
				-    S: Serializer,
			
 
				-    T: Serialize + Ord + std::hash::Hash + Clone,
			
 
				-{
			
 
				-    let ordered: BTreeMap<_, _> = data
			
 
				-        .iter()
			
 
				-        .map(|entry| (entry.key().clone(), *entry.value()))
			
 
				-        .collect();
			
 
				-
			
 
				-    ordered.serialize(serializer)
			
 
				-}
			
 
				-
			
 
				-impl VariantsStats {
			
 
				-    pub fn new(variants: &Variants) -> Self {
			
 
				-        let n = variants.data.len() as u32;
			
 
				-        let alteration_categories: DashMap<String, u32> = DashMap::new();
			
 
				-        let vep_impact: DashMap<String, u32> = DashMap::new();
			
 
				-        let genes: DashMap<String, u32> = DashMap::new();
			
 
				-        let consequences: DashMap<String, u32> = DashMap::new();
			
 
				-        let n_alts: DashMap<u32, u32> = DashMap::new();
			
 
				-        let depths: DashMap<u32, u32> = DashMap::new();
			
 
				-        let vafs: DashMap<OrderedFloat<f32>, u32> = DashMap::new();
			
 
				-        let cosmic: DashMap<u64, u32> = DashMap::new();
			
 
				-        let gnomads: DashMap<String, Vec<f64>> = DashMap::new();
			
 
				-
			
 
				-        variants.data.par_iter().for_each(|v| {
			
 
				-            if let Ok(best_vep) = v.best_vep() {
			
 
				-                if let Some(impact) = best_vep.extra.impact {
			
 
				-                    *vep_impact.entry(impact.to_string()).or_default() += 1;
			
 
				-                }
			
 
				-                if let Some(gene) = best_vep.extra.symbol {
			
 
				-                    *genes.entry(gene).or_default() += 1;
			
 
				-                }
			
 
				-                if let Some(csqs) = best_vep.consequence {
			
 
				-                    let mut csq: Vec<String> = csqs.into_iter().map(String::from).collect();
			
 
				-                    csq.sort();
			
 
				-                    csq.dedup();
			
 
				-                    *consequences.entry(csq.join(", ")).or_default() += 1;
			
 
				-                }
			
 
				-            }
			
 
				-
			
 
				-            let (n_alt, depth) = v.n_alt_depth();
			
 
				-            *n_alts.entry(n_alt as u32).or_default() += 1;
			
 
				-            *depths.entry(depth as u32).or_default() += 1;
			
 
				-
			
 
				-            let vaf = OrderedFloat::from(((n_alt * 1_000.0 / depth).round() / 10.0) as f32);
			
 
				-            if !vaf.is_nan() {
			
 
				-                *vafs.entry(vaf).or_default() += 1;
			
 
				-            }
			
 
				-
			
 
				-            v.annotations.iter().for_each(|annotation| {
			
 
				-                match annotation {
			
 
				-                    Annotation::Cosmic(v) => *cosmic.entry(v.cosmic_cnt).or_default() += 1,
			
 
				-                    Annotation::GnomAD(v) => {
			
 
				-                        v.to_vec()
			
 
				-                            .iter()
			
 
				-                            .map(|e| e.to_string_value_pair())
			
 
				-                            .for_each(|(key, value)| {
			
 
				-                                gnomads.entry(key).or_default().push(value);
			
 
				-                            });
			
 
				-                    }
			
 
				-                    _ => (),
			
 
				-                };
			
 
				-            });
			
 
				-            let mut alteration_category_str = v
			
 
				-                .alteration_category()
			
 
				-                .iter()
			
 
				-                .map(|c| c.to_string())
			
 
				-                .collect::<Vec<String>>();
			
 
				-            alteration_category_str.sort();
			
 
				-            alteration_category_str.dedup();
			
 
				-
			
 
				-            *alteration_categories
			
 
				-                .entry(alteration_category_str.join(", "))
			
 
				-                .or_default() += 1;
			
 
				-        });
			
 
				-
			
 
				-        let mut n_gnomad = 0;
			
 
				-        let gnomad = gnomads
			
 
				-            .iter()
			
 
				-            .map(|e| {
			
 
				-                let data = e.value().to_vec();
			
 
				-                n_gnomad = data.len();
			
 
				-                (e.key().to_string(), bin_data(data, 0.0001))
			
 
				-            })
			
 
				-            .collect();
			
 
				-
			
 
				-        Self {
			
 
				-            n,
			
 
				-            alteration_categories,
			
 
				-            cosmic,
			
 
				-            gnomad,
			
 
				-            vafs,
			
 
				-            n_alts,
			
 
				-            depths,
			
 
				-            vep_impact,
			
 
				-            consequences,
			
 
				-            genes,
			
 
				-            n_gnomad,
			
 
				-        }
			
 
				-    }
			
 
				-
			
 
				-    pub fn save_to_json(&self, filename: &str) -> anyhow::Result<()> {
			
 
				-        let file = File::create(filename)
			
 
				-            .map_err(|e| anyhow::anyhow!("Failed to create file: {}\n{e}", filename))?;
			
 
				-        let mut writer = BGZFWriter::new(file, bgzip::Compression::default());
			
 
				-
			
 
				-        serde_json::to_writer(&mut writer, self)
			
 
				-            .map_err(|e| anyhow::anyhow!("Failed to serialize JSON to file: {}\n{e}", filename))?;
			
 
				-
			
 
				-        writer.close().map_err(|e| {
			
 
				-            anyhow::anyhow!("Failed to close BGZF writer for file: {}\n{e}", filename)
			
 
				-        })?;
			
 
				-
			
 
				-        debug!("Successfully saved variants to {}", filename);
			
 
				-        Ok(())
			
 
				-    }
			
 
				-    pub fn load_from_json(filename: &str) -> anyhow::Result<Self> {
			
 
				-        let file =
			
 
				-            File::open(filename).with_context(|| format!("Failed to open file: {}", filename))?;
			
 
				-        let mut reader = BGZFReader::new(file)
			
 
				-            .with_context(|| format!("Failed to create BGZF reader for file: {}", filename))?;
			
 
				-
			
 
				-        let variants: Self = serde_json::from_reader(&mut reader)
			
 
				-            .with_context(|| format!("Failed to deserialize JSON from file: {}", filename))?;
			
 
				-
			
 
				-        debug!("Successfully loaded variants from {}", filename);
			
 
				-        Ok(variants)
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-#[derive(Debug)]
			
 
				-pub struct SomaticVariantRates {
			
 
				-    pub wgs_length: u32,
			
 
				-    pub total_variants: usize,
			
 
				-    pub somatic_mutation_rate_wgs: f64,
			
 
				-    pub exon_count: usize,
			
 
				-    pub variants_in_coding: usize,
			
 
				-    pub coding_variants: usize,
			
 
				-    pub total_exon_bases: u32,
			
 
				-    pub somatic_mutation_rate_coding: f64,
			
 
				-    pub somatic_nonsynonymous_rate_coding: f64,
			
 
				-}
			
 
				-
			
 
				-pub fn somatic_rates(
			
 
				-    variants: &[Variant],
			
 
				-    feature_ranges: &Vec<GenomeRange>,
			
 
				-    config: &Config,
			
 
				-) -> anyhow::Result<SomaticVariantRates> {
			
 
				-    let ol = par_overlaps(variants, feature_ranges);
			
 
				-
			
 
				-    let n_coding = ol
			
 
				-        .iter()
			
 
				-        .filter_map(|i| variants[*i].best_vep().ok())
			
 
				-        .filter_map(|bv| bv.impact())
			
 
				-        .filter(|impact| *impact <= VepImpact::MODERATE)
			
 
				-        .count();
			
 
				-
			
 
				-    let n_bases_m: u32 = feature_ranges.par_iter().map(|gr| gr.length()).sum();
			
 
				-    let mega_base_m = n_bases_m as f64 / 10.0e6;
			
 
				-
			
 
				-    let wgs_len: u32 = read_dict(&config.dict_file)?.iter().map(|(_, l)| *l).sum();
			
 
				-    let rate_wgs = variants.len() as f64 / (wgs_len as f64 / 10.0e6);
			
 
				-
			
 
				-    let n_exons_mb = ol.len() as f64 / mega_base_m;
			
 
				-    let n_coding_mb = n_coding as f64 / mega_base_m;
			
 
				-
			
 
				-    Ok(SomaticVariantRates {
			
 
				-        total_variants: variants.len(),
			
 
				-        exon_count: feature_ranges.len(),
			
 
				-        variants_in_coding: ol.len(),
			
 
				-        coding_variants: n_coding,
			
 
				-        total_exon_bases: n_bases_m,
			
 
				-        wgs_length: wgs_len,
			
 
				-        somatic_mutation_rate_wgs: rate_wgs,
			
 
				-        somatic_mutation_rate_coding: n_exons_mb,
			
 
				-        somatic_nonsynonymous_rate_coding: n_coding_mb,
			
 
				-    })
			
 
				-}
			
 
				 
			
 
				 /// Creates a new Variant instance from a collection of VcfVariants and annotations.
			
 
				 ///
			
--- a/src/variant/variants_stats.rs
+++ b/src/variant/variants_stats.rs
@@ -0,0 +1,306 @@
 
				+use std::{collections::{BTreeMap, HashMap}, io::BufRead};
			
 
				+
			
 
				+use anyhow::Context;
			
 
				+use dashmap::DashMap;
			
 
				+use log::debug;
			
 
				+use ordered_float::OrderedFloat;
			
 
				+use rayon::prelude::*;
			
 
				+use serde::{Deserialize, Serialize, Serializer};
			
 
				+
			
 
				+use crate::{
			
 
				+    annotation::{vep::VepImpact, Annotation},
			
 
				+    config::Config,
			
 
				+    helpers::bin_data,
			
 
				+    io::{dict::read_dict, readers::get_gz_reader, writers::get_gz_writer},
			
 
				+    positions::{contig_to_num, par_overlaps, GenomeRange},
			
 
				+    scan::scan::BinCount,
			
 
				+};
			
 
				+
			
 
				+use super::variant_collection::{Variant, Variants};
			
 
				+
			
 
				+#[derive(Debug, Clone, Serialize, Deserialize)]
			
 
				+pub struct VariantsStats {
			
 
				+    pub n: u32,
			
 
				+    #[serde(serialize_with = "serialize_dashmap_sort")]
			
 
				+    pub alteration_categories: DashMap<String, u32>,
			
 
				+    #[serde(serialize_with = "serialize_dashmap_sort")]
			
 
				+    pub cosmic: DashMap<u64, u32>,
			
 
				+    #[serde(serialize_with = "serialize_dashmap_sort")]
			
 
				+    pub n_alts: DashMap<u32, u32>,
			
 
				+    #[serde(serialize_with = "serialize_dashmap_sort")]
			
 
				+    pub depths: DashMap<u32, u32>,
			
 
				+    #[serde(serialize_with = "serialize_dashmap_sort")]
			
 
				+    pub vafs: DashMap<OrderedFloat<f32>, u32>,
			
 
				+    #[serde(serialize_with = "serialize_dashmap_sort")]
			
 
				+    pub vep_impact: DashMap<String, u32>,
			
 
				+    #[serde(serialize_with = "serialize_dashmap_sort")]
			
 
				+    pub consequences: DashMap<String, u32>,
			
 
				+    #[serde(serialize_with = "serialize_dashmap_sort")]
			
 
				+    pub genes: DashMap<String, u32>,
			
 
				+    pub n_gnomad: usize,
			
 
				+    pub gnomad: Vec<(String, Vec<(f64, usize)>)>,
			
 
				+}
			
 
				+pub fn serialize_dashmap_sort<S, T>(
			
 
				+    data: &DashMap<T, u32>,
			
 
				+    serializer: S,
			
 
				+) -> Result<S::Ok, S::Error>
			
 
				+where
			
 
				+    S: Serializer,
			
 
				+    T: Serialize + Ord + std::hash::Hash + Clone,
			
 
				+{
			
 
				+    let ordered: BTreeMap<_, _> = data
			
 
				+        .iter()
			
 
				+        .map(|entry| (entry.key().clone(), *entry.value()))
			
 
				+        .collect();
			
 
				+
			
 
				+    ordered.serialize(serializer)
			
 
				+}
			
 
				+
			
 
				+impl VariantsStats {
			
 
				+    pub fn new(variants: &Variants) -> Self {
			
 
				+        let n = variants.data.len() as u32;
			
 
				+        let alteration_categories: DashMap<String, u32> = DashMap::new();
			
 
				+        let vep_impact: DashMap<String, u32> = DashMap::new();
			
 
				+        let genes: DashMap<String, u32> = DashMap::new();
			
 
				+        let consequences: DashMap<String, u32> = DashMap::new();
			
 
				+        let n_alts: DashMap<u32, u32> = DashMap::new();
			
 
				+        let depths: DashMap<u32, u32> = DashMap::new();
			
 
				+        let vafs: DashMap<OrderedFloat<f32>, u32> = DashMap::new();
			
 
				+        let cosmic: DashMap<u64, u32> = DashMap::new();
			
 
				+        let gnomads: DashMap<String, Vec<f64>> = DashMap::new();
			
 
				+
			
 
				+        variants.data.par_iter().for_each(|v| {
			
 
				+            if let Ok(best_vep) = v.best_vep() {
			
 
				+                if let Some(impact) = best_vep.extra.impact {
			
 
				+                    *vep_impact.entry(impact.to_string()).or_default() += 1;
			
 
				+                }
			
 
				+                if let Some(gene) = best_vep.extra.symbol {
			
 
				+                    *genes.entry(gene).or_default() += 1;
			
 
				+                }
			
 
				+                if let Some(csqs) = best_vep.consequence {
			
 
				+                    let mut csq: Vec<String> = csqs.into_iter().map(String::from).collect();
			
 
				+                    csq.sort();
			
 
				+                    csq.dedup();
			
 
				+                    *consequences.entry(csq.join(", ")).or_default() += 1;
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            let (n_alt, depth) = v.n_alt_depth();
			
 
				+            *n_alts.entry(n_alt as u32).or_default() += 1;
			
 
				+            *depths.entry(depth as u32).or_default() += 1;
			
 
				+
			
 
				+            let vaf = OrderedFloat::from(((n_alt * 1_000.0 / depth).round() / 10.0) as f32);
			
 
				+            if !vaf.is_nan() {
			
 
				+                *vafs.entry(vaf).or_default() += 1;
			
 
				+            }
			
 
				+
			
 
				+            v.annotations.iter().for_each(|annotation| {
			
 
				+                match annotation {
			
 
				+                    Annotation::Cosmic(v) => *cosmic.entry(v.cosmic_cnt).or_default() += 1,
			
 
				+                    Annotation::GnomAD(v) => {
			
 
				+                        v.to_vec()
			
 
				+                            .iter()
			
 
				+                            .map(|e| e.to_string_value_pair())
			
 
				+                            .for_each(|(key, value)| {
			
 
				+                                gnomads.entry(key).or_default().push(value);
			
 
				+                            });
			
 
				+                    }
			
 
				+                    _ => (),
			
 
				+                };
			
 
				+            });
			
 
				+            let mut alteration_category_str = v
			
 
				+                .alteration_category()
			
 
				+                .iter()
			
 
				+                .map(|c| c.to_string())
			
 
				+                .collect::<Vec<String>>();
			
 
				+            alteration_category_str.sort();
			
 
				+            alteration_category_str.dedup();
			
 
				+
			
 
				+            *alteration_categories
			
 
				+                .entry(alteration_category_str.join(", "))
			
 
				+                .or_default() += 1;
			
 
				+        });
			
 
				+
			
 
				+        let mut n_gnomad = 0;
			
 
				+        let gnomad = gnomads
			
 
				+            .iter()
			
 
				+            .map(|e| {
			
 
				+                let data = e.value().to_vec();
			
 
				+                n_gnomad = data.len();
			
 
				+                (e.key().to_string(), bin_data(data, 0.0001))
			
 
				+            })
			
 
				+            .collect();
			
 
				+
			
 
				+        Self {
			
 
				+            n,
			
 
				+            alteration_categories,
			
 
				+            cosmic,
			
 
				+            gnomad,
			
 
				+            vafs,
			
 
				+            n_alts,
			
 
				+            depths,
			
 
				+            vep_impact,
			
 
				+            consequences,
			
 
				+            genes,
			
 
				+            n_gnomad,
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    pub fn save_to_json(&self, filename: &str) -> anyhow::Result<()> {
			
 
				+        let mut writer = get_gz_writer(filename, true)
			
 
				+            .with_context(|| anyhow::anyhow!("Failed to create file: {}", filename))?;
			
 
				+
			
 
				+        serde_json::to_writer(&mut writer, self)
			
 
				+            .with_context(|| anyhow::anyhow!("Failed to serialize JSON to file: {}", filename))?;
			
 
				+
			
 
				+        writer.close().with_context(|| {
			
 
				+            anyhow::anyhow!("Failed to close BGZF writer for file: {}", filename)
			
 
				+        })?;
			
 
				+
			
 
				+        debug!("Successfully saved variants to {}", filename);
			
 
				+        Ok(())
			
 
				+    }
			
 
				+    pub fn load_from_json(filename: &str) -> anyhow::Result<Self> {
			
 
				+        let mut reader = get_gz_reader(filename)
			
 
				+            .with_context(|| format!("Failed to create BGZF reader for file: {}", filename))?;
			
 
				+
			
 
				+        let variants: Self = serde_json::from_reader(&mut reader)
			
 
				+            .with_context(|| format!("Failed to deserialize JSON from file: {}", filename))?;
			
 
				+
			
 
				+        debug!("Successfully loaded variants from {}", filename);
			
 
				+        Ok(variants)
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+#[derive(Debug)]
			
 
				+pub struct SomaticVariantRates {
			
 
				+    pub wgs_length: u32,
			
 
				+    pub total_variants: usize,
			
 
				+    pub somatic_mutation_rate_wgs: f64,
			
 
				+    pub exon_count: usize,
			
 
				+    pub variants_in_coding: usize,
			
 
				+    pub coding_variants: usize,
			
 
				+    pub total_exon_bases: u32,
			
 
				+    pub somatic_mutation_rate_coding: f64,
			
 
				+    pub somatic_nonsynonymous_rate_coding: f64,
			
 
				+}
			
 
				+
			
 
				+pub fn somatic_rates(
			
 
				+    variants: &[Variant],
			
 
				+    feature_ranges: &Vec<GenomeRange>,
			
 
				+    config: &Config,
			
 
				+) -> anyhow::Result<SomaticVariantRates> {
			
 
				+    let ol = par_overlaps(variants, feature_ranges);
			
 
				+
			
 
				+    let n_coding = ol
			
 
				+        .iter()
			
 
				+        .filter_map(|i| variants[*i].best_vep().ok())
			
 
				+        .filter_map(|bv| bv.impact())
			
 
				+        .filter(|impact| *impact <= VepImpact::MODERATE)
			
 
				+        .count();
			
 
				+
			
 
				+    let n_bases_m: u32 = feature_ranges.par_iter().map(|gr| gr.length()).sum();
			
 
				+    let mega_base_m = n_bases_m as f64 / 10.0e6;
			
 
				+
			
 
				+    let wgs_len: u32 = read_dict(&config.dict_file)?.iter().map(|(_, l)| *l).sum();
			
 
				+    let rate_wgs = variants.len() as f64 / (wgs_len as f64 / 10.0e6);
			
 
				+
			
 
				+    let n_exons_mb = ol.len() as f64 / mega_base_m;
			
 
				+    let n_coding_mb = n_coding as f64 / mega_base_m;
			
 
				+
			
 
				+    Ok(SomaticVariantRates {
			
 
				+        total_variants: variants.len(),
			
 
				+        exon_count: feature_ranges.len(),
			
 
				+        variants_in_coding: ol.len(),
			
 
				+        coding_variants: n_coding,
			
 
				+        total_exon_bases: n_bases_m,
			
 
				+        wgs_length: wgs_len,
			
 
				+        somatic_mutation_rate_wgs: rate_wgs,
			
 
				+        somatic_mutation_rate_coding: n_exons_mb,
			
 
				+        somatic_nonsynonymous_rate_coding: n_coding_mb,
			
 
				+    })
			
 
				+}
			
 
				+
			
 
				+pub fn high_depth_somatic(id: &str, config: &Config) -> anyhow::Result<DashMap<String, Vec<GenomeRange>>> {
			
 
				+    let mut contigs: Vec<String> = (1..22).map(|i| format!("chr{i}")).collect();
			
 
				+    contigs.extend(["chrX", "chrY", "chrM"].into_iter().map(String::from));
			
 
				+
			
 
				+    let results: DashMap<String, Vec<GenomeRange>> = DashMap::new();
			
 
				+    contigs.into_par_iter().for_each(|contig| {
			
 
				+        let mrd_path = format!("{}/{contig}_count.tsv.gz", config.normal_dir_count(id));
			
 
				+        let mrd_reader =
			
 
				+            get_gz_reader(&mrd_path).with_context(|| format!("Failed to open: {mrd_path}"));
			
 
				+        let diag_path = format!("{}/{contig}_count.tsv.gz", config.tumoral_dir_count(id));
			
 
				+        let diag_reader =
			
 
				+            get_gz_reader(&diag_path).with_context(|| format!("Failed to open: {diag_path}"));
			
 
				+
			
 
				+        if let (Ok(mrd_reader), Ok(diag_reader)) = (mrd_reader, diag_reader) {
			
 
				+            let ranges: Vec<GenomeRange> = mrd_reader
			
 
				+                .lines()
			
 
				+                .zip(diag_reader.lines())
			
 
				+                .filter_map(|(mrd, diag)| {
			
 
				+                    if let (Ok(mrd), Ok(diag)) = (mrd, diag) {
			
 
				+                        if let (Ok(mrd), Ok(diag)) =
			
 
				+                        (BinCount::from_tsv_row(&mrd), BinCount::from_tsv_row(&diag))
			
 
				+                        {
			
 
				+                            assert_eq!(mrd.contig, diag.contig);
			
 
				+                            assert_eq!(mrd.start, diag.start);
			
 
				+                            let bools: Vec<bool> = mrd
			
 
				+                                .depths
			
 
				+                                .into_iter()
			
 
				+                                .zip(diag.depths)
			
 
				+                                .map(|(depth_mrd, depth_diag)| {
			
 
				+                                    depth_mrd >= config.min_high_quality_depth
			
 
				+                                    && depth_diag >= config.min_high_quality_depth
			
 
				+                                })
			
 
				+                                .collect();
			
 
				+                            let ranges = ranges_from_consecutive_true(&bools, mrd.start, &mrd.contig);
			
 
				+                            return Some(ranges);
			
 
				+                        }
			
 
				+                    }
			
 
				+                    None
			
 
				+                })
			
 
				+                .flatten()
			
 
				+                .collect();
			
 
				+            results.insert(contig, ranges);
			
 
				+        }
			
 
				+    });
			
 
				+
			
 
				+    Ok(results)
			
 
				+}
			
 
				+
			
 
				+fn ranges_from_consecutive_true(vec: &[bool], start: u32, contig: &str) -> Vec<GenomeRange> {
			
 
				+    let contig = contig_to_num(contig);
			
 
				+    let mut ranges = Vec::new();
			
 
				+    let mut current_start: Option<u32> = None;
			
 
				+
			
 
				+    // Iterate through elements starting from specified position
			
 
				+    for (i, &value) in vec.iter().enumerate() {
			
 
				+        let i = i as u32 + start;
			
 
				+        match (value, current_start) {
			
 
				+            // Begin new range
			
 
				+            (true, None) => current_start = Some(i),
			
 
				+            // Finalize current range
			
 
				+            (false, Some(start_idx)) => {
			
 
				+                // ranges.push(start_idx..i);
			
 
				+                ranges.push(GenomeRange {
			
 
				+                    contig,
			
 
				+                    range: start_idx..i,
			
 
				+                });
			
 
				+                current_start = None;
			
 
				+            }
			
 
				+            _ => {}
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // Add any remaining active range
			
 
				+    if let Some(start_idx) = current_start {
			
 
				+        // ranges.push(start_idx..vec.len() as u32);
			
 
				+        ranges.push(GenomeRange {
			
 
				+            contig,
			
 
				+            range: start_idx..vec.len() as u32 + start,
			
 
				+        });
			
 
				+    }
			
 
				+
			
 
				+    ranges
			
 
				+}