8 月之前 · 980c173f72
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2939,6 +2939,7 @@ dependencies = [
 
				  "rayon",
			
 
				  "rusqlite",
			
 
				  "rust-htslib 0.49.0",
			
 
				+ "semver 1.0.26",
			
 
				  "serde",
			
 
				  "serde_json",
			
 
				  "tar",
			
@@ -3699,7 +3700,7 @@ version = "0.4.1"
 
				 source = "registry+https://github.com/rust-lang/crates.io-index"
			
 
				 checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
			
 
				 dependencies = [
			
 
				- "semver 1.0.25",
			
 
				+ "semver 1.0.26",
			
 
				 ]
			
 
				 
			
 
				 [[package]]
			
@@ -3846,9 +3847,9 @@ dependencies = [
 
				 
			
 
				 [[package]]
			
 
				 name = "semver"
			
 
				-version = "1.0.25"
			
 
				+version = "1.0.26"
			
 
				 source = "registry+https://github.com/rust-lang/crates.io-index"
			
 
				-checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03"
			
 
				+checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"
			
 
				 
			
 
				 [[package]]
			
 
				 name = "semver-parser"
			
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -44,6 +44,7 @@ tar = "0.4.43"
 
				 flatbuffers = "25.2.10"
			
 
				 ordered-float = { version = "5.0.0", features = ["serde"] }
			
 
				 bitcode = "0.6.5"
			
 
				+semver = "1.0.26"
			
 
				 
			
 
				 [profile.dev]
			
 
				 opt-level = 0
			
--- a/src/annotation/ncbi.rs
+++ b/src/annotation/ncbi.rs
@@ -1,4 +1,5 @@
 
				 use anyhow::{Context, Ok, Result};
			
 
				+use semver::Version;
			
 
				 use serde::{Deserialize, Serialize};
			
 
				 use std::str::FromStr;
			
 
				 
			
@@ -72,79 +73,106 @@ pub struct NCBIAcc {
 
				     /// The numeric part of the accession
			
 
				     pub number: u64,
			
 
				     /// The version number of the accession
			
 
				-    pub version: f32,
			
 
				+    pub version: Version,
			
 
				 }
			
 
				 
			
 
				 impl FromStr for NCBIAcc {
			
 
				     type Err = anyhow::Error;
			
 
				-    /// Parses a string into an NCBIAcc struct.
			
 
				+    /// Parses a string into an `NCBIAcc` struct.
			
 
				     ///
			
 
				     /// This method handles various formats of NCBI accessions, including:
			
 
				-    /// - Standard format: "PREFIX_NUMBER.VERSION" (e.g., "NM_001234.5")
			
 
				-    /// - Unassigned transcripts: "unassigned_transcript_NUMBER_VERSION"
			
 
				-    /// - Accessions without versions
			
 
				-    /// - Accessions without numbers (treated as having max u64 number and version 0.0)
			
 
				+    ///
			
 
				+    /// - **Standard format**: `"PREFIX_NUMBER.VERSION"` (e.g., `"NM_001234.5"` or `"NM_001234.1_2_3"`).
			
 
				+    ///   Trailing underscores after the version are converted to dots and parsed as a semantic version (e.g., `"1_2_3"` → `"1.2.3"`).
			
 
				+    ///
			
 
				+    /// - **Unassigned transcripts**: `"unassigned_transcript_NUMBER_VERSION"` (e.g., `"unassigned_transcript_56789_1"`).
			
 
				+    ///
			
 
				+    /// - **Accessions without versions**: Parsed with default version `"0.0.0"`.
			
 
				+    ///
			
 
				+    /// - **Accessions without numbers**: Treated as having `u64::MAX` as the number and version `"0.0.0"`.
			
 
				+    ///
			
 
				+    /// Internally uses the [`semver::Version`](https://docs.rs/semver/latest/semver/struct.Version.html) type for rich multi-part versioning support.
			
 
				     ///
			
 
				     /// # Arguments
			
 
				-    /// * `s` - A string slice representing the NCBI accession
			
 
				+    /// * `s` - A string slice representing the NCBI accession.
			
 
				     ///
			
 
				     /// # Returns
			
 
				-    /// * `Ok(NCBIAcc)` if parsing is successful
			
 
				-    /// * `Err(anyhow::Error)` if parsing fails
			
 
				+    /// * `Ok(NCBIAcc)` if parsing is successful.
			
 
				+    /// * `Err(anyhow::Error)` if parsing fails.
			
 
				     ///
			
 
				     /// # Examples
			
 
				     /// ```
			
 
				+    /// use std::str::FromStr;
			
 
				+    /// use your_crate::NCBIAcc;
			
 
				+    ///
			
 
				     /// let acc1 = NCBIAcc::from_str("NM_001234.5").unwrap();
			
 
				     /// assert_eq!(acc1.prefix, "NM");
			
 
				     /// assert_eq!(acc1.number, 1234);
			
 
				-    /// assert_eq!(acc1.version, 5.0);
			
 
				+    /// assert_eq!(acc1.version.to_string(), "5.0.0");
			
 
				     ///
			
 
				     /// let acc2 = NCBIAcc::from_str("unassigned_transcript_56789_1").unwrap();
			
 
				     /// assert_eq!(acc2.prefix, "unassigned_transcript");
			
 
				     /// assert_eq!(acc2.number, 56789);
			
 
				-    /// assert_eq!(acc2.version, 1.0);
			
 
				+    /// assert_eq!(acc2.version.to_string(), "1.0.0");
			
 
				     ///
			
 
				     /// let acc3 = NCBIAcc::from_str("XR_123456").unwrap();
			
 
				     /// assert_eq!(acc3.prefix, "XR_123456");
			
 
				     /// assert_eq!(acc3.number, u64::MAX);
			
 
				-    /// assert_eq!(acc3.version, 0.0);
			
 
				+    /// assert_eq!(acc3.version.to_string(), "0.0.0");
			
 
				+    ///
			
 
				+    /// let acc4 = NCBIAcc::from_str("NM_001234.1_2_3").unwrap();
			
 
				+    /// assert_eq!(acc4.version.to_string(), "1.2.3");
			
 
				     /// ```
			
 
				     fn from_str(s: &str) -> Result<Self> {
			
 
				-        if s.contains("unassigned_transcript_") {
			
 
				-            let s = s.replace("unassigned_transcript_", "");
			
 
				-            let (num, v) = if s.contains("_") {
			
 
				-                s.split_once("_").context("first split error")?
			
 
				-            } else {
			
 
				-                (s.as_str(), "0")
			
 
				-            };
			
 
				-            Ok(NCBIAcc {
			
 
				+        if s.starts_with("unassigned_transcript_") {
			
 
				+            let s = s.trim_start_matches("unassigned_transcript_");
			
 
				+
			
 
				+            let (num_str, version_str_raw) = s.split_once('_').unwrap_or((s, "0")); // fallback: version = 0
			
 
				+
			
 
				+            let number = num_str
			
 
				+                .parse::<u64>()
			
 
				+                .context("Error parsing number in unassigned_transcript")?;
			
 
				+
			
 
				+            let version = Version::parse(&normalize_to_semver(version_str_raw))
			
 
				+                .context("Error parsing version in unassigned_transcript")?;
			
 
				+
			
 
				+            return Ok(NCBIAcc {
			
 
				                 prefix: "unassigned_transcript".to_string(),
			
 
				-                number: num.parse().context("Error parsing NCBI accession number")?,
			
 
				-                version: v.parse().context("Error parsing NCBI accession version")?,
			
 
				-            })
			
 
				-        } else if s.contains("_") {
			
 
				-            if s.contains(".") {
			
 
				-                let (rest, v) = s.split_once(".").context("first split error")?;
			
 
				-                let (pref, num) = rest.split_once("_").context("second split error")?;
			
 
				-                let v = v.replace("_", ".");
			
 
				-                Ok(NCBIAcc {
			
 
				-                    prefix: pref.to_string(),
			
 
				-                    number: num.parse().context("Error parsing NCBI accession number")?,
			
 
				-                    version: v.parse().context("Error parsing NCBI accession version")?,
			
 
				-                })
			
 
				-            } else {
			
 
				-                Ok(NCBIAcc {
			
 
				-                    prefix: s.to_string(),
			
 
				-                    number: u64::MAX,
			
 
				-                    version: 0.0,
			
 
				-                })
			
 
				-            }
			
 
				-        } else {
			
 
				-            Ok(NCBIAcc {
			
 
				-                prefix: s.to_string(),
			
 
				-                number: u64::MAX,
			
 
				-                version: 0.0,
			
 
				-            })
			
 
				+                number,
			
 
				+                version,
			
 
				+            });
			
 
				+        }
			
 
				+
			
 
				+        if s.contains('.') {
			
 
				+            let (rest, raw_ver) = s.split_once('.').context("dot split failed")?;
			
 
				+            let (prefix, num) = rest.split_once('_').context("underscore split failed")?;
			
 
				+
			
 
				+            let version_str = normalize_to_semver(&raw_ver.replace('.', "_"));
			
 
				+            let version = Version::parse(&version_str)
			
 
				+                .with_context(|| format!("error parsing version: '{}'", version_str))?;
			
 
				+
			
 
				+            return Ok(NCBIAcc {
			
 
				+                prefix: prefix.to_string(),
			
 
				+                number: num.parse().context("error parsing number")?,
			
 
				+                version,
			
 
				+            });
			
 
				         }
			
 
				+
			
 
				+        Ok(NCBIAcc {
			
 
				+            prefix: s.to_string(),
			
 
				+            number: u64::MAX,
			
 
				+            version: Version::parse("0.0.0").unwrap(),
			
 
				+        })
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+fn normalize_to_semver(v: &str) -> String {
			
 
				+    let parts: Vec<&str> = v.split('_').collect();
			
 
				+
			
 
				+    match parts.len() {
			
 
				+        1 => format!("{}.0.0", parts[0]),            // "1" -> "1.0.0"
			
 
				+        2 => format!("{}.{}.0", parts[0], parts[1]), // "1_2" -> "1.2.0"
			
 
				+        3 => format!("{}.{}.{}", parts[0], parts[1], parts[2]), // "1_2_3"
			
 
				+        _ => parts.join("."),                        // fallback
			
 
				     }
			
 
				 }
			
--- a/src/annotation/vep.rs
+++ b/src/annotation/vep.rs
@@ -730,7 +730,10 @@ pub fn run_vep(in_path: &str, out_path: &str) -> anyhow::Result<()> {
 
				 /// # Note
			
 
				 /// This function assumes that the VEP annotations are well-formed and contain
			
 
				 /// the necessary fields for comparison (consequence, feature). It will log warnings
			
 
				-/// for any parsing errors encountered during the selection process.
			
 
				+/// for any parsing errors encountered during the selection process.        
			
 
				+
			
 
				+
			
 
				+
			
 
				 pub fn get_best_vep(d: &[VEP]) -> anyhow::Result<VEP> {
			
 
				     let best_impact_veps = d
			
 
				         .iter()
			
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -45,7 +45,7 @@ mod tests {
 
				 
			
 
				     use self::{collection::pod5::{FlowCellCase, Pod5Collection}, commands::dorado, config::Config};
			
 
				     use super::*;
			
 
				-    use crate::{annotation::Annotation, callers::{clairs::ClairS, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVSolo}, savana::{CNSegment, SavanaCN}, Callers}, collection::{bam, pod5::{scan_archive, FlowCells}, run_tasks, vcf::VcfCollection, Collections, CollectionsConfig}, commands::dorado::Dorado, io::{dict::read_dict, gff::features_ranges}, pipes::somatic::const_stats, positions::{merge_overlapping_genome_ranges, par_overlaps}, scan::scan::{par_whole_scan, somatic_scan}, variant::{variant::AlterationCategory, variants_stats::{self, VariantsStats}}};
			
 
				+    use crate::{annotation::Annotation, callers::{clairs::ClairS, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVSolo}, savana::{CNSegment, SavanaCN}, Callers}, collection::{bam, pod5::{scan_archive, FlowCells}, run_tasks, vcf::VcfCollection, Collections, CollectionsConfig}, commands::dorado::Dorado, io::{dict::read_dict, gff::features_ranges}, pipes::somatic::const_stats, positions::{merge_overlapping_genome_ranges, par_overlaps, range_intersection_par, sort_ranges}, scan::scan::{par_whole_scan, somatic_scan}, variant::{variant::AlterationCategory, variants_stats::{self, VariantsStats}}};
			
 
				 
			
 
				     // export RUST_LOG="debug"
			
 
				     fn init() {
			
@@ -765,7 +765,7 @@ mod tests {
 
				         let path = format!("{}/{id}/diag/somatic_variants.json.gz", config.result_dir);
			
 
				         let variants = variant_collection::Variants::load_from_json(&path)?;
			
 
				 
			
 
				-        VariantsStats::new(&variants).save_to_json(&format!("{}/{id}/diag/somatic_variants_stats.json.gz", config.result_dir));
			
 
				+        VariantsStats::new(&variants, id, &config)?.save_to_json(&format!("{}/{id}/diag/somatic_variants_stats.json.gz", config.result_dir));
			
 
				 Ok(())
			
 
				     }
			
 
				 
			
@@ -818,22 +818,34 @@ Ok(())
 
				         let id = "ADJAGBA";
			
 
				         let config = Config::default();
			
 
				         let path = format!("{}/{id}/diag/somatic_variants.bit", config.result_dir);
			
 
				-        let r = features_ranges("exon", &config)?;
			
 
				-        let merged = merge_overlapping_genome_ranges(&r);
			
 
				+        let exon_ranges = features_ranges("exon", &config)?;
			
 
				+        let exon_ranges = merge_overlapping_genome_ranges(&exon_ranges);
			
 
				         
			
 
				         let variants = variant_collection::Variants::load_from_file(&path)?;
			
 
				-        let full = variants_stats::somatic_rates(&variants.data, &merged, &config);
			
 
				+        let full = variants_stats::somatic_rates(&variants.data, &exon_ranges, &config);
			
 
				+        info!("{full:#?}");
			
 
				+
			
 
				+        // let restrained: Vec<variant_collection::Variant> = variants.data.iter().filter(|v| v.vcf_variants.len() >= 2)
			
 
				+        //     .cloned().collect();
			
 
				+        // let min_2 = variants_stats::somatic_rates(&restrained, &exon_ranges, &config);
			
 
				+        // info!("{min_2:#?}");
			
 
				+        //
			
 
				+        // let restrained: Vec<variant_collection::Variant> = restrained.iter().filter(|v| v.vcf_variants.len() >= 3)
			
 
				+        //     .cloned().collect();
			
 
				+        // let min_3 = variants_stats::somatic_rates(&restrained, &exon_ranges, &config);
			
 
				+        // info!("{min_3:#?}");
			
 
				+
			
 
				+        let mut high_depth_ranges = variants_stats::high_depth_somatic(id, &config)?;
			
 
				+        high_depth_ranges.par_sort_by_key(|r| ( r.contig, r.range.start ));
			
 
				+
			
 
				+        let exon_ranges_ref: Vec<&GenomeRange> = exon_ranges.iter().collect();
			
 
				+        let exons_high_depth = range_intersection_par(&high_depth_ranges.iter().collect::<Vec<&GenomeRange>>(), &exon_ranges_ref);
			
 
				+
			
 
				+        let full = variants_stats::somatic_rates(&variants.data, &exons_high_depth, &config);
			
 
				         info!("{full:#?}");
			
 
				 
			
 
				-        let restrained: Vec<variant_collection::Variant> = variants.data.iter().filter(|v| v.vcf_variants.len() >= 2)
			
 
				-            .cloned().collect();
			
 
				-        let min_2 = variants_stats::somatic_rates(&restrained, &merged, &config);
			
 
				-        info!("{min_2:#?}");
			
 
				 
			
 
				-        let restrained: Vec<variant_collection::Variant> = restrained.iter().filter(|v| v.vcf_variants.len() >= 3)
			
 
				-            .cloned().collect();
			
 
				-        let min_3 = variants_stats::somatic_rates(&restrained, &merged, &config);
			
 
				-        info!("{min_3:#?}");
			
 
				+
			
 
				 
			
 
				 
			
 
				         // info!("n variants loaded: {}", variants.data.len());
			
@@ -872,13 +884,145 @@ Ok(())
 
				     #[test]
			
 
				     fn quality_ranges() -> anyhow::Result<()> {
			
 
				         init();
			
 
				-        let r = variants_stats::high_depth_somatic("ADJAGBA", &Config::default())?;
			
 
				-        r.iter().for_each(|e| {
			
 
				-            let contig = e.key();
			
 
				-            let ranges = e.value();
			
 
				-            let su = ranges.iter().map(|range| range.range.len()).sum::<usize>();
			
 
				-            info!("{contig}: {su} high depth bases");
			
 
				-        });
			
 
				+        let r = variants_stats::high_depth_somatic("ADJAGBA", &Config::default())?.iter().map(|range| range.length()).sum::<u32>();
			
 
				+        info!("{r} high depth bases");
			
 
				         Ok(())
			
 
				     }
			
 
				+
			
 
				+    fn gr(contig: u8, start: u32, end: u32) -> GenomeRange {
			
 
				+        GenomeRange {
			
 
				+            contig,
			
 
				+            range: start..end,
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn test_both_empty() {
			
 
				+        let a: Vec<&GenomeRange> = vec![];
			
 
				+        let b: Vec<&GenomeRange> = vec![];
			
 
				+        let result = range_intersection_par(&a, &b);
			
 
				+        assert!(result.is_empty());
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn test_one_empty() {
			
 
				+        let a = [gr(1, 0, 100)];
			
 
				+        let b: Vec<&GenomeRange> = vec![];
			
 
				+        let a_refs: Vec<&GenomeRange> = a.iter().collect();
			
 
				+        let mut result = range_intersection_par(&a_refs, &b);
			
 
				+        sort_ranges(&mut result);
			
 
				+        assert!(result.is_empty());
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn test_single_range_no_overlap() {
			
 
				+        let a = [gr(1, 0, 100)];
			
 
				+        let b = [gr(1, 100, 200)];
			
 
				+        let a_refs: Vec<&GenomeRange> = a.iter().collect();
			
 
				+        let b_refs: Vec<&GenomeRange> = b.iter().collect();
			
 
				+        let mut result = range_intersection_par(&a_refs, &b_refs);
			
 
				+        sort_ranges(&mut result);
			
 
				+        assert!(result.is_empty());
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn test_single_range_full_overlap() {
			
 
				+        let a = [gr(1, 0, 100)];
			
 
				+        let b = [gr(1, 0, 100)];
			
 
				+        let a_refs: Vec<&GenomeRange> = a.iter().collect();
			
 
				+        let b_refs: Vec<&GenomeRange> = b.iter().collect();
			
 
				+        let mut result = range_intersection_par(&a_refs, &b_refs);
			
 
				+        let mut expected = [gr(1, 0, 100)];
			
 
				+        sort_ranges(&mut result);
			
 
				+        sort_ranges(&mut expected);
			
 
				+        assert_eq!(result, expected);
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn test_different_contigs() {
			
 
				+        let a = [gr(1, 0, 100)];
			
 
				+        let b = [gr(2, 0, 100)];
			
 
				+        let a_refs: Vec<&GenomeRange> = a.iter().collect();
			
 
				+        let b_refs: Vec<&GenomeRange> = b.iter().collect();
			
 
				+        let mut result = range_intersection_par(&a_refs, &b_refs);
			
 
				+        sort_ranges(&mut result);
			
 
				+        assert!(result.is_empty());
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn test_touching_ranges() {
			
 
				+        let a = [gr(1, 0, 100)];
			
 
				+        let b = [gr(1, 100, 200)];
			
 
				+        let a_refs: Vec<&GenomeRange> = a.iter().collect();
			
 
				+        let b_refs: Vec<&GenomeRange> = b.iter().collect();
			
 
				+        let mut result = range_intersection_par(&a_refs, &b_refs);
			
 
				+        sort_ranges(&mut result);
			
 
				+        assert!(result.is_empty());
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn test_complete_subrange() {
			
 
				+        let a = [gr(1, 0, 200)];
			
 
				+        let b = [gr(1, 50, 150)];
			
 
				+        let a_refs: Vec<&GenomeRange> = a.iter().collect();
			
 
				+        let b_refs: Vec<&GenomeRange> = b.iter().collect();
			
 
				+        let mut result = range_intersection_par(&a_refs, &b_refs);
			
 
				+        let mut expected = [gr(1, 50, 150)];
			
 
				+        sort_ranges(&mut result);
			
 
				+        sort_ranges(&mut expected);
			
 
				+        assert_eq!(result, expected);
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn test_multiple_overlaps_same_contig() {
			
 
				+        let a = [gr(1, 0, 50), gr(1, 75, 125)];
			
 
				+        let b = [gr(1, 25, 100), gr(1, 150, 200)];
			
 
				+        let a_refs: Vec<&GenomeRange> = a.iter().collect();
			
 
				+        let b_refs: Vec<&GenomeRange> = b.iter().collect();
			
 
				+        let mut result = range_intersection_par(&a_refs, &b_refs);
			
 
				+        let mut expected = [gr(1, 25, 50), gr(1, 75, 100)];
			
 
				+        sort_ranges(&mut result);
			
 
				+        sort_ranges(&mut expected);
			
 
				+        assert_eq!(result, expected);
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn test_multiple_contigs() {
			
 
				+        let a = [gr(1, 0, 100), gr(2, 50, 150), gr(3, 200, 300)];
			
 
				+        let b = [gr(1, 50, 150), gr(2, 0, 100), gr(4, 0, 100)];
			
 
				+        let a_refs: Vec<&GenomeRange> = a.iter().collect();
			
 
				+        let b_refs: Vec<&GenomeRange> = b.iter().collect();
			
 
				+        let mut result = range_intersection_par(&a_refs, &b_refs);
			
 
				+        let mut expected = [gr(1, 50, 100), gr(2, 50, 100)];
			
 
				+        sort_ranges(&mut result);
			
 
				+        sort_ranges(&mut expected);
			
 
				+        assert_eq!(result, expected);
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn test_adjacent_ranges() {
			
 
				+        let a = [gr(1, 0, 50), gr(1, 50, 100)];
			
 
				+        let b = [gr(1, 25, 75)];
			
 
				+        let a_refs: Vec<&GenomeRange> = a.iter().collect();
			
 
				+        let b_refs: Vec<&GenomeRange> = b.iter().collect();
			
 
				+        let mut result = range_intersection_par(&a_refs, &b_refs);
			
 
				+        let mut expected = [gr(1, 25, 50), gr(1, 50, 75)];
			
 
				+        sort_ranges(&mut result);
			
 
				+        sort_ranges(&mut expected);
			
 
				+        assert_eq!(result, expected);
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn test_minimal_overlap() {
			
 
				+        let a = [gr(1, 0, 100)];
			
 
				+        let b = [gr(1, 99, 200)];
			
 
				+        let a_refs: Vec<&GenomeRange> = a.iter().collect();
			
 
				+        let b_refs: Vec<&GenomeRange> = b.iter().collect();
			
 
				+        let mut result = range_intersection_par(&a_refs, &b_refs);
			
 
				+        let mut expected = [gr(1, 99, 100)];
			
 
				+        sort_ranges(&mut result);
			
 
				+        sort_ranges(&mut expected);
			
 
				+        assert_eq!(result, expected);
			
 
				+    }
			
 
				+
			
 
				 }
			
--- a/src/pipes/somatic.rs
+++ b/src/pipes/somatic.rs
@@ -509,7 +509,7 @@ impl Run for Somatic {
 
				         let vep_stats = annotations.vep_stats()?;
			
 
				         vep_stats.save_to_json(&format!("{stats_dir}/{id}_annotations_10_vep.json"))?;
			
 
				 
			
 
				-        VariantsStats::new(&variants)
			
 
				+        VariantsStats::new(&variants, &id, &config)?
			
 
				             .save_to_json(&format!("{stats_dir}/{id}_variants_stats_final.json"))?;
			
 
				 
			
 
				         info!("Final unique variants: {}", variants.data.len());
			
@@ -523,7 +523,7 @@ impl Run for Somatic {
 
				 pub fn const_stats(id: String, config: Config) -> anyhow::Result<()> {
			
 
				     info!("Loading Germline");
			
 
				     let annotations = Annotations::default();
			
 
				-    let clairs_germline = ClairS::initialize(&id, config)?.germline(&annotations)?;
			
 
				+    let clairs_germline = ClairS::initialize(&id, config.clone())?.germline(&annotations)?;
			
 
				 
			
 
				     info!("Annotation with Cosmic and GnomAD.");
			
 
				     let ext_annot = ExternalAnnotation::init()?;
			
@@ -533,7 +533,7 @@ pub fn const_stats(id: String, config: Config) -> anyhow::Result<()> {
 
				     variants.merge(clairs_germline, &annotations);
			
 
				     info!("-- {}", variants.data.len());
			
 
				 
			
 
				-    let u = VariantsStats::new(&variants);
			
 
				+    let u = VariantsStats::new(&variants, &id, &config)?;
			
 
				     info!("++ {}", u.n);
			
 
				 
			
 
				     Ok(())
			
--- a/src/positions.rs
+++ b/src/positions.rs
@@ -450,6 +450,12 @@ impl TryFrom<(&str, &str, &str)> for GenomeRange {
 
				 }
			
 
				 
			
 
				 impl GenomeRange {
			
 
				+    pub fn new(contig: &str, start: u32, end: u32) -> Self {
			
 
				+        Self {
			
 
				+            contig: contig_to_num(contig),
			
 
				+            range: start..end,
			
 
				+        }
			
 
				+    }
			
 
				     /// Creates a `GenomeRange` from 1-based inclusive start and end positions.
			
 
				     ///
			
 
				     /// This method is useful when working with data sources that use 1-based coordinates
			
@@ -522,7 +528,7 @@ pub fn merge_overlapping_genome_ranges(ranges: &[GenomeRange]) -> Vec<GenomeRang
 
				         return vec![];
			
 
				     }
			
 
				 
			
 
				-    // ranges.sort_by_key(|r| (r.contig, r.range.start));
			
 
				+    // ranges.par_sort_by_key(|r| (r.contig, r.range.start));
			
 
				 
			
 
				     let chunk_size = (ranges.len() / rayon::current_num_threads()).max(1);
			
 
				 
			
@@ -620,6 +626,156 @@ pub fn overlaps_par(positions: &[&GenomePosition], ranges: &[&GenomeRange]) -> V
 
				         })
			
 
				         .collect()
			
 
				 }
			
 
				+//
			
 
				+// pub fn range_intersection_par(a: &[GenomeRange], b: &[GenomeRange]) -> Vec<GenomeRange> {
			
 
				+//     // Group ranges by contig for both inputs
			
 
				+//     let mut a_map = std::collections::HashMap::new();
			
 
				+//     for range in a {
			
 
				+//         a_map.entry(&range.contig).or_insert(Vec::new()).push(range);
			
 
				+//     }
			
 
				+//
			
 
				+//     let mut b_map = std::collections::HashMap::new();
			
 
				+//     for range in b {
			
 
				+//         b_map.entry(&range.contig).or_insert(Vec::new()).push(range);
			
 
				+//     }
			
 
				+//
			
 
				+//     // Process common contigs in parallel
			
 
				+//     a_map
			
 
				+//         .into_par_iter()
			
 
				+//         .filter_map(|(contig, mut a_ranges)| {
			
 
				+//             // Sort ranges by start position (end is only used for tie-breaking)
			
 
				+//             a_ranges.sort_by_key(|r| (r.range.start, r.range.end));
			
 
				+//
			
 
				+//             // Get corresponding ranges from b
			
 
				+//             let mut b_ranges = match b_map.get(contig) {
			
 
				+//                 Some(ranges) => ranges.clone(),
			
 
				+//                 None => return None,
			
 
				+//             };
			
 
				+//             b_ranges.sort_by_key(|r| (r.range.start, r.range.end));
			
 
				+//
			
 
				+//             let mut intersections = Vec::new();
			
 
				+//             let mut i = 0;
			
 
				+//             let mut j = 0;
			
 
				+//
			
 
				+//             // Two-pointer intersection algorithm for 0-based, end-exclusive ranges
			
 
				+//             while i < a_ranges.len() && j < b_ranges.len() {
			
 
				+//                 let a_range = &a_ranges[i];
			
 
				+//                 let b_range = &b_ranges[j];
			
 
				+//
			
 
				+//                 // Check if ranges are completely non-overlapping
			
 
				+//                 if a_range.range.end <= b_range.range.start {
			
 
				+//                     // |a_range| ends before |b_range| starts
			
 
				+//                     i += 1;
			
 
				+//                 } else if b_range.range.end <= a_range.range.start {
			
 
				+//                     // |b_range| ends before |a_range| starts
			
 
				+//                     j += 1;
			
 
				+//                 } else {
			
 
				+//                     // Calculate intersection of overlapping ranges
			
 
				+//                     let start = a_range.range.start.max(b_range.range.start);
			
 
				+//                     let end = a_range.range.end.min(b_range.range.end);
			
 
				+//
			
 
				+//                     // Only create a range if start < end (non-zero length)
			
 
				+//                     if start < end {
			
 
				+//                         intersections.push(GenomeRange {
			
 
				+//                             contig: *contig,
			
 
				+//                             range: start..end,
			
 
				+//                         });
			
 
				+//                     }
			
 
				+//
			
 
				+//                     // Move the pointer with the earlier end coordinate
			
 
				+//                     if a_range.range.end < b_range.range.end {
			
 
				+//                         i += 1;
			
 
				+//                     } else {
			
 
				+//                         j += 1;
			
 
				+//                     }
			
 
				+//                 }
			
 
				+//             }
			
 
				+//
			
 
				+//             Some(intersections)
			
 
				+//         })
			
 
				+//         .flatten()
			
 
				+//         .collect()
			
 
				+// }
			
 
				+
			
 
				+pub fn range_intersection_par(a: &[&GenomeRange], b: &[&GenomeRange]) -> Vec<GenomeRange> {
			
 
				+    let (a_contigs, b_contigs) = rayon::join(
			
 
				+        || extract_contig_indices(a),
			
 
				+        || extract_contig_indices(b),
			
 
				+    );
			
 
				+
			
 
				+    a_contigs.into_par_iter()
			
 
				+        .filter_map(|(contig, a_start, a_end)| {
			
 
				+            let Some((b_start, b_end)) = find_contig_indices(&b_contigs, contig) else {
			
 
				+                return None;
			
 
				+            };
			
 
				+
			
 
				+            let a_ranges = &a[a_start..a_end];
			
 
				+            let b_ranges = &b[b_start..b_end];
			
 
				+            
			
 
				+            let mut intersections = Vec::new();
			
 
				+            let (mut i, mut j) = (0, 0);
			
 
				+
			
 
				+            while i < a_ranges.len() && j < b_ranges.len() {
			
 
				+                let a_range = &a_ranges[i].range;
			
 
				+                let b_range = &b_ranges[j].range;
			
 
				+
			
 
				+                match (a_range.end <= b_range.start, b_range.end <= a_range.start) {
			
 
				+                    (true, _) => i += 1,
			
 
				+                    (_, true) => j += 1,
			
 
				+                    _ => {
			
 
				+                        let start = a_range.start.max(b_range.start);
			
 
				+                        let end = a_range.end.min(b_range.end);
			
 
				+                        if start < end {
			
 
				+                            intersections.push(GenomeRange {
			
 
				+                                contig,
			
 
				+                                range: start..end,
			
 
				+                            });
			
 
				+                        }
			
 
				+
			
 
				+                        if a_range.end < b_range.end {
			
 
				+                            i += 1;
			
 
				+                        } else {
			
 
				+                            j += 1;
			
 
				+                        }
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            Some(intersections)
			
 
				+        })
			
 
				+        .flatten()
			
 
				+        .collect()
			
 
				+}
			
 
				+
			
 
				+// Helper to create (contig, start index, end index) tuples
			
 
				+fn extract_contig_indices(ranges: &[&GenomeRange]) -> Vec<(u8, usize, usize)> {
			
 
				+    let mut indices = Vec::new();
			
 
				+    let mut current_contig = None;
			
 
				+    let mut start_idx = 0;
			
 
				+
			
 
				+    for (i, range) in ranges.iter().enumerate() {
			
 
				+        if current_contig != Some(range.contig) {
			
 
				+            if let Some(contig) = current_contig {
			
 
				+                indices.push((contig, start_idx, i));
			
 
				+            }
			
 
				+            current_contig = Some(range.contig);
			
 
				+            start_idx = i;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    if let Some(contig) = current_contig {
			
 
				+        indices.push((contig, start_idx, ranges.len()));
			
 
				+    }
			
 
				+
			
 
				+    indices
			
 
				+}
			
 
				+
			
 
				+// Binary search to find contig indices in precomputed list
			
 
				+fn find_contig_indices(contigs: &[(u8, usize, usize)], target: u8) -> Option<(usize, usize)> {
			
 
				+    contigs.binary_search_by(|(c, _, _)| c.cmp(&target))
			
 
				+        .ok()
			
 
				+        .map(|idx| (contigs[idx].1, contigs[idx].2))
			
 
				+}
			
 
				 
			
 
				 /// A parallel version of the overlap finding function that works with any types
			
 
				 /// implementing `GetGenomePosition` and `GetGenomeRange` traits.
			
@@ -677,3 +833,7 @@ pub trait GetGenomeRange {
 
				     /// Returns a reference to the `GenomeRange` for this item.
			
 
				     fn range(&self) -> &GenomeRange;
			
 
				 }
			
 
				+
			
 
				+pub fn sort_ranges(ranges: &mut [GenomeRange]) {
			
 
				+    ranges.par_sort_by_key(|r| (r.contig, r.range.start, r.range.end));
			
 
				+}
			
--- a/src/scan/scan.rs
+++ b/src/scan/scan.rs
@@ -1,4 +1,4 @@
 
				-use std::{fmt, fs, io::Write, sync::Mutex};
			
 
				+use std::{fmt, fs, io::Write};
			
 
				 
			
 
				 use anyhow::Context;
			
 
				 use log::{debug, error, info};
			
@@ -11,6 +11,7 @@ use rust_htslib::bam::IndexedReader;
 
				 
			
 
				 use crate::io::writers::get_gz_writer;
			
 
				 use crate::math::filter_outliers_modified_z_score_with_indices;
			
 
				+
			
 
				 use crate::{config::Config, io::dict::read_dict, scan::bin::Bin};
			
 
				 
			
 
				 /// Represents a count of reads in a genomic bin, including various metrics and outlier information.
			
@@ -133,30 +134,44 @@ impl BinCount {
 
				             None
			
 
				         };
			
 
				 
			
 
				-        let depths = fields[9]
			
 
				-            .split(',')
			
 
				-            .map(|s| {
			
 
				-                s.trim() // Remove any whitespace around numbers
			
 
				-                    .parse::<u32>()
			
 
				-                    .with_context(|| format!("Failed to parse '{}' as u32", s))
			
 
				-            })
			
 
				-            .collect::<anyhow::Result<Vec<u32>>>()?;
			
 
				+        let depths = if fields[9].trim().is_empty() {
			
 
				+            Vec::new()
			
 
				+        } else {
			
 
				+            fields[9]
			
 
				+                .split(',')
			
 
				+                .map(|s| {
			
 
				+                    s.trim()
			
 
				+                        .parse::<u32>()
			
 
				+                        .with_context(|| format!("Failed to parse '{}' as u32", s))
			
 
				+                })
			
 
				+                .collect::<anyhow::Result<Vec<u32>>>()?
			
 
				+        };
			
 
				 
			
 
				         Ok(BinCount {
			
 
				             contig: fields[0].to_string(),
			
 
				             start: fields[1].parse()?,
			
 
				             end: fields[2].parse()?,
			
 
				             n_reads: fields[3].parse()?,
			
 
				-            coverage: fields[4].parse()?,
			
 
				-            ratio_sa: fields[5].parse()?,
			
 
				-            ratio_start: fields[6].parse()?,
			
 
				-            ratio_end: fields[7].parse()?,
			
 
				+            coverage: parse_f64_allow_nan(fields[4])?,
			
 
				+            ratio_sa: parse_f64_allow_nan(fields[5])?,
			
 
				+            ratio_start: parse_f64_allow_nan(fields[6])?,
			
 
				+            ratio_end: parse_f64_allow_nan(fields[7])?,
			
 
				             outlier,
			
 
				             depths,
			
 
				         })
			
 
				     }
			
 
				 }
			
 
				 
			
 
				+fn parse_f64_allow_nan(s: &str) -> anyhow::Result<f64> {
			
 
				+    let s = s.trim(); // Remove whitespace
			
 
				+    if s.eq_ignore_ascii_case("NaN") {
			
 
				+        Ok(f64::NAN)
			
 
				+    } else {
			
 
				+        s.parse::<f64>()
			
 
				+            .with_context(|| format!("Failed to parse '{}' as f64", s))
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				 // impl GetGenomeRange for BinCount {
			
 
				 //     fn range(&self) -> GenomeRange {
			
 
				 //         GenomeRange { contig: contig_to_num(&self.contig), range: self.start..(self.end + 1) }
			
@@ -274,7 +289,8 @@ pub fn par_whole_scan(out_dir: &str, bam_path: &str, config: &Config) -> anyhow:
 
				                 let n_bins_in_chunk = chunk_length.div_ceil(bin_size);
			
 
				 
			
 
				                 let mut bam_reader = IndexedReader::from_path(bam_path)
			
 
				-                    .with_context(|| format!("Can't open BAM file: {}", bam_path)).unwrap();
			
 
				+                    .with_context(|| format!("Failed to open BAM file: {}", bam_path))
			
 
				+                    .unwrap();
			
 
				 
			
 
				                 // Process each bin in the chunk
			
 
				                 (0..n_bins_in_chunk)
			
@@ -439,4 +455,3 @@ pub fn somatic_scan(id: &str, config: &Config) -> anyhow::Result<()> {
 
				         config,
			
 
				     )
			
 
				 }
			
 
				-
			
--- a/src/variant/variants_stats.rs
+++ b/src/variant/variants_stats.rs
@@ -1,4 +1,4 @@
 
				-use std::{collections::{BTreeMap, HashMap}, io::BufRead};
			
 
				+use std::{collections::BTreeMap, io::BufRead, sync::Arc};
			
 
				 
			
 
				 use anyhow::Context;
			
 
				 use dashmap::DashMap;
			
@@ -11,8 +11,8 @@ use crate::{
 
				     annotation::{vep::VepImpact, Annotation},
			
 
				     config::Config,
			
 
				     helpers::bin_data,
			
 
				-    io::{dict::read_dict, readers::get_gz_reader, writers::get_gz_writer},
			
 
				-    positions::{contig_to_num, par_overlaps, GenomeRange},
			
 
				+    io::{dict::read_dict, gff::features_ranges, readers::get_gz_reader, writers::get_gz_writer},
			
 
				+    positions::{contig_to_num, merge_overlapping_genome_ranges, par_overlaps, range_intersection_par, GenomeRange},
			
 
				     scan::scan::BinCount,
			
 
				 };
			
 
				 
			
@@ -37,9 +37,16 @@ pub struct VariantsStats {
 
				     pub consequences: DashMap<String, u32>,
			
 
				     #[serde(serialize_with = "serialize_dashmap_sort")]
			
 
				     pub genes: DashMap<String, u32>,
			
 
				+    #[serde(serialize_with = "serialize_dashmap_sort")]
			
 
				+    pub genes_nonsynonymus: DashMap<String, u32>,
			
 
				+
			
 
				     pub n_gnomad: usize,
			
 
				     pub gnomad: Vec<(String, Vec<(f64, usize)>)>,
			
 
				+
			
 
				+    pub somatic_rates: SomaticVariantRates,
			
 
				+    pub high_depth_somatic_rates: SomaticVariantRates,
			
 
				 }
			
 
				+
			
 
				 pub fn serialize_dashmap_sort<S, T>(
			
 
				     data: &DashMap<T, u32>,
			
 
				     serializer: S,
			
@@ -57,11 +64,12 @@ where
 
				 }
			
 
				 
			
 
				 impl VariantsStats {
			
 
				-    pub fn new(variants: &Variants) -> Self {
			
 
				+    pub fn new(variants: &Variants, id:&str, config: &Config) -> anyhow::Result<Self> {
			
 
				         let n = variants.data.len() as u32;
			
 
				         let alteration_categories: DashMap<String, u32> = DashMap::new();
			
 
				         let vep_impact: DashMap<String, u32> = DashMap::new();
			
 
				         let genes: DashMap<String, u32> = DashMap::new();
			
 
				+        let genes_nonsynonymus: DashMap<String, u32> = DashMap::new();
			
 
				         let consequences: DashMap<String, u32> = DashMap::new();
			
 
				         let n_alts: DashMap<u32, u32> = DashMap::new();
			
 
				         let depths: DashMap<u32, u32> = DashMap::new();
			
@@ -71,12 +79,18 @@ impl VariantsStats {
 
				 
			
 
				         variants.data.par_iter().for_each(|v| {
			
 
				             if let Ok(best_vep) = v.best_vep() {
			
 
				-                if let Some(impact) = best_vep.extra.impact {
			
 
				+                if let Some(ref impact) = best_vep.extra.impact {
			
 
				                     *vep_impact.entry(impact.to_string()).or_default() += 1;
			
 
				                 }
			
 
				-                if let Some(gene) = best_vep.extra.symbol {
			
 
				-                    *genes.entry(gene).or_default() += 1;
			
 
				+                if let Some(ref gene) = best_vep.extra.symbol {
			
 
				+                    *genes.entry(gene.to_string()).or_default() += 1;
			
 
				+                }
			
 
				+                if let (Some(impact), Some(gene)) = (best_vep.extra.impact, best_vep.extra.symbol) {
			
 
				+                    if impact <= VepImpact::MODERATE {
			
 
				+                        *genes_nonsynonymus.entry(gene).or_default() += 1;
			
 
				+                    }
			
 
				                 }
			
 
				+
			
 
				                 if let Some(csqs) = best_vep.consequence {
			
 
				                     let mut csq: Vec<String> = csqs.into_iter().map(String::from).collect();
			
 
				                     csq.sort();
			
@@ -131,7 +145,21 @@ impl VariantsStats {
 
				             })
			
 
				             .collect();
			
 
				 
			
 
				-        Self {
			
 
				+        let exon_ranges = features_ranges("exon", &config)?;
			
 
				+        let exon_ranges = merge_overlapping_genome_ranges(&exon_ranges);
			
 
				+
			
 
				+        let all_somatic_rates = somatic_rates(&variants.data, &exon_ranges, &config)?;
			
 
				+
			
 
				+        let mut high_depth_ranges = high_depth_somatic(id, &config)?;
			
 
				+        high_depth_ranges.par_sort_by_key(|r| ( r.contig, r.range.start ));
			
 
				+
			
 
				+        let exon_ranges_ref: Vec<&GenomeRange> = exon_ranges.iter().collect();
			
 
				+        let exons_high_depth = range_intersection_par(&high_depth_ranges.iter().collect::<Vec<&GenomeRange>>(), &exon_ranges_ref);
			
 
				+
			
 
				+        let high_depth = somatic_rates(&variants.data, &exons_high_depth, &config)?;
			
 
				+
			
 
				+
			
 
				+        Ok(Self {
			
 
				             n,
			
 
				             alteration_categories,
			
 
				             cosmic,
			
@@ -142,8 +170,11 @@ impl VariantsStats {
 
				             vep_impact,
			
 
				             consequences,
			
 
				             genes,
			
 
				+            genes_nonsynonymus,
			
 
				             n_gnomad,
			
 
				-        }
			
 
				+            somatic_rates: all_somatic_rates,
			
 
				+            high_depth_somatic_rates: high_depth
			
 
				+        })
			
 
				     }
			
 
				 
			
 
				     pub fn save_to_json(&self, filename: &str) -> anyhow::Result<()> {
			
@@ -160,6 +191,7 @@ impl VariantsStats {
 
				         debug!("Successfully saved variants to {}", filename);
			
 
				         Ok(())
			
 
				     }
			
 
				+
			
 
				     pub fn load_from_json(filename: &str) -> anyhow::Result<Self> {
			
 
				         let mut reader = get_gz_reader(filename)
			
 
				             .with_context(|| format!("Failed to create BGZF reader for file: {}", filename))?;
			
@@ -172,25 +204,25 @@ impl VariantsStats {
 
				     }
			
 
				 }
			
 
				 
			
 
				-#[derive(Debug)]
			
 
				+#[derive(Debug, Serialize, Deserialize, Clone)]
			
 
				 pub struct SomaticVariantRates {
			
 
				     pub wgs_length: u32,
			
 
				     pub total_variants: usize,
			
 
				     pub somatic_mutation_rate_wgs: f64,
			
 
				     pub exon_count: usize,
			
 
				-    pub variants_in_coding: usize,
			
 
				-    pub coding_variants: usize,
			
 
				     pub total_exon_bases: u32,
			
 
				+    pub variants_in_coding: usize,
			
 
				     pub somatic_mutation_rate_coding: f64,
			
 
				+    pub nonsynonymous_variants: usize,
			
 
				     pub somatic_nonsynonymous_rate_coding: f64,
			
 
				 }
			
 
				 
			
 
				 pub fn somatic_rates(
			
 
				     variants: &[Variant],
			
 
				-    feature_ranges: &Vec<GenomeRange>,
			
 
				+    exon_ranges: &Vec<GenomeRange>,
			
 
				     config: &Config,
			
 
				 ) -> anyhow::Result<SomaticVariantRates> {
			
 
				-    let ol = par_overlaps(variants, feature_ranges);
			
 
				+    let ol = par_overlaps(variants, exon_ranges);
			
 
				 
			
 
				     let n_coding = ol
			
 
				         .iter()
			
@@ -199,7 +231,7 @@ pub fn somatic_rates(
 
				         .filter(|impact| *impact <= VepImpact::MODERATE)
			
 
				         .count();
			
 
				 
			
 
				-    let n_bases_m: u32 = feature_ranges.par_iter().map(|gr| gr.length()).sum();
			
 
				+    let n_bases_m: u32 = exon_ranges.par_iter().map(|gr| gr.length()).sum();
			
 
				     let mega_base_m = n_bases_m as f64 / 10.0e6;
			
 
				 
			
 
				     let wgs_len: u32 = read_dict(&config.dict_file)?.iter().map(|(_, l)| *l).sum();
			
@@ -210,9 +242,9 @@ pub fn somatic_rates(
 
				 
			
 
				     Ok(SomaticVariantRates {
			
 
				         total_variants: variants.len(),
			
 
				-        exon_count: feature_ranges.len(),
			
 
				+        exon_count: exon_ranges.len(),
			
 
				         variants_in_coding: ol.len(),
			
 
				-        coding_variants: n_coding,
			
 
				+        nonsynonymous_variants: n_coding,
			
 
				         total_exon_bases: n_bases_m,
			
 
				         wgs_length: wgs_len,
			
 
				         somatic_mutation_rate_wgs: rate_wgs,
			
@@ -221,52 +253,136 @@ pub fn somatic_rates(
 
				     })
			
 
				 }
			
 
				 
			
 
				-pub fn high_depth_somatic(id: &str, config: &Config) -> anyhow::Result<DashMap<String, Vec<GenomeRange>>> {
			
 
				-    let mut contigs: Vec<String> = (1..22).map(|i| format!("chr{i}")).collect();
			
 
				-    contigs.extend(["chrX", "chrY", "chrM"].into_iter().map(String::from));
			
 
				-
			
 
				-    let results: DashMap<String, Vec<GenomeRange>> = DashMap::new();
			
 
				-    contigs.into_par_iter().for_each(|contig| {
			
 
				-        let mrd_path = format!("{}/{contig}_count.tsv.gz", config.normal_dir_count(id));
			
 
				-        let mrd_reader =
			
 
				-            get_gz_reader(&mrd_path).with_context(|| format!("Failed to open: {mrd_path}"));
			
 
				-        let diag_path = format!("{}/{contig}_count.tsv.gz", config.tumoral_dir_count(id));
			
 
				-        let diag_reader =
			
 
				-            get_gz_reader(&diag_path).with_context(|| format!("Failed to open: {diag_path}"));
			
 
				-
			
 
				-        if let (Ok(mrd_reader), Ok(diag_reader)) = (mrd_reader, diag_reader) {
			
 
				-            let ranges: Vec<GenomeRange> = mrd_reader
			
 
				+/// Computes high-depth somatic regions across all chromosomes for a given sample.
			
 
				+///
			
 
				+/// This function reads count files (compressed TSVs) for both normal and tumoral samples
			
 
				+/// for each contig (`chr1` to `chr22`, `chrX`, `chrY`, and `chrM`). It identifies genomic regions
			
 
				+/// where both normal and tumoral depths exceed a configured quality threshold, and extracts
			
 
				+/// consecutive high-quality bins as genomic ranges.
			
 
				+///
			
 
				+/// The function performs the computation in parallel across contigs for better performance,
			
 
				+/// and includes contextual error handling to help trace issues related to I/O or data parsing.
			
 
				+///
			
 
				+/// # Arguments
			
 
				+///
			
 
				+/// * `id` - The identifier of the sample.
			
 
				+/// * `config` - A reference to a `Config` struct containing paths and thresholds.
			
 
				+///
			
 
				+/// # Returns
			
 
				+///
			
 
				+/// A `Result` containing a vector of `GenomeRange` objects representing high-depth somatic regions,
			
 
				+/// or an error if any file reading, parsing, or logical check fails.
			
 
				+///
			
 
				+/// # Errors
			
 
				+///
			
 
				+/// Returns an error if:
			
 
				+/// - Any of the input files (normal or tumoral) can't be opened
			
 
				+/// - Any line in the files fails to read or parse
			
 
				+/// - The `BinCount` objects from corresponding lines don't match in position
			
 
				+///
			
 
				+/// # Parallelism
			
 
				+///
			
 
				+/// This function leverages `rayon` to parallelize processing of contigs.
			
 
				+///
			
 
				+/// # Example
			
 
				+///
			
 
				+/// ```rust
			
 
				+/// let config = Config::load_from("config_path.toml")?;
			
 
				+/// let regions = high_depth_somatic("sample_001", &config)?;
			
 
				+/// for region in regions {
			
 
				+///     println!("{:?}", region);
			
 
				+/// }
			
 
				+/// ```
			
 
				+///
			
 
				+/// # Requirements
			
 
				+///
			
 
				+/// - The file names must follow the pattern `{contig}_count.tsv.gz`
			
 
				+/// - The structure of lines must match what `BinCount::from_tsv_row` expects
			
 
				+pub fn high_depth_somatic(id: &str, config: &Config) -> anyhow::Result<Vec<GenomeRange>> {
			
 
				+    // Generate contigs from chr1 to chr22, then chrX, Y, M
			
 
				+    let contigs = (1..=22)
			
 
				+        .map(|i| format!("chr{i}"))
			
 
				+        .chain(["chrX", "chrY", "chrM"].iter().map(|s| s.to_string()))
			
 
				+        .collect::<Vec<_>>();
			
 
				+
			
 
				+    let config = Arc::new(config); // Wrap the config in an Arc for shared ownership
			
 
				+
			
 
				+    // Process contigs in parallel with proper error propagation
			
 
				+    let results: Vec<Vec<GenomeRange>> = contigs
			
 
				+        .into_par_iter()
			
 
				+        .map(|contig| {
			
 
				+            let config = Arc::clone(&config);
			
 
				+            // Build file paths
			
 
				+            let mrd_path = format!("{}/{contig}_count.tsv.gz", config.normal_dir_count(id));
			
 
				+            let diag_path = format!("{}/{contig}_count.tsv.gz", config.tumoral_dir_count(id));
			
 
				+
			
 
				+            // Open readers with proper error context
			
 
				+            let mrd_reader = get_gz_reader(&mrd_path)
			
 
				+                .with_context(|| format!("Failed to open MRD file: {mrd_path}"))?;
			
 
				+            let diag_reader = get_gz_reader(&diag_path)
			
 
				+                .with_context(|| format!("Failed to open Diag file: {diag_path}"))?;
			
 
				+
			
 
				+            // Process lines in pairs
			
 
				+            let ranges = mrd_reader
			
 
				                 .lines()
			
 
				                 .zip(diag_reader.lines())
			
 
				-                .filter_map(|(mrd, diag)| {
			
 
				-                    if let (Ok(mrd), Ok(diag)) = (mrd, diag) {
			
 
				-                        if let (Ok(mrd), Ok(diag)) =
			
 
				-                        (BinCount::from_tsv_row(&mrd), BinCount::from_tsv_row(&diag))
			
 
				-                        {
			
 
				-                            assert_eq!(mrd.contig, diag.contig);
			
 
				-                            assert_eq!(mrd.start, diag.start);
			
 
				-                            let bools: Vec<bool> = mrd
			
 
				-                                .depths
			
 
				-                                .into_iter()
			
 
				-                                .zip(diag.depths)
			
 
				-                                .map(|(depth_mrd, depth_diag)| {
			
 
				-                                    depth_mrd >= config.min_high_quality_depth
			
 
				-                                    && depth_diag >= config.min_high_quality_depth
			
 
				-                                })
			
 
				-                                .collect();
			
 
				-                            let ranges = ranges_from_consecutive_true(&bools, mrd.start, &mrd.contig);
			
 
				-                            return Some(ranges);
			
 
				-                        }
			
 
				+                .enumerate()
			
 
				+                .map(|(line_num, (mrd_line, diag_line))| {
			
 
				+                    let line_num = line_num + 1; // Convert to 1-based indexing
			
 
				+
			
 
				+                    // Read lines with context
			
 
				+                    let mrd_line = mrd_line.with_context(|| {
			
 
				+                        format!("MRD file {mrd_path} line {line_num} read error")
			
 
				+                    })?;
			
 
				+                    let diag_line = diag_line.with_context(|| {
			
 
				+                        format!("Diag file {diag_path} line {line_num} read error")
			
 
				+                    })?;
			
 
				+
			
 
				+                    // Parse both lines
			
 
				+                    let mrd = BinCount::from_tsv_row(&mrd_line).with_context(|| {
			
 
				+                        format!("Failed to parse MRD line {line_num}: {mrd_line}")
			
 
				+                    })?;
			
 
				+                    let diag = BinCount::from_tsv_row(&diag_line).with_context(|| {
			
 
				+                        format!("Failed to parse Diag line {line_num}: {diag_line}")
			
 
				+                    })?;
			
 
				+
			
 
				+                    // Validate matching positions
			
 
				+                    if mrd.contig != diag.contig {
			
 
				+                        anyhow::bail!(
			
 
				+                            "Contig mismatch at line {line_num}: {} vs {}",
			
 
				+                            mrd.contig,
			
 
				+                            diag.contig
			
 
				+                        );
			
 
				+                    }
			
 
				+                    if mrd.start != diag.start {
			
 
				+                        anyhow::bail!(
			
 
				+                            "Start position mismatch at line {line_num}: {} vs {}",
			
 
				+                            mrd.start,
			
 
				+                            diag.start
			
 
				+                        );
			
 
				                     }
			
 
				-                    None
			
 
				+
			
 
				+                    // Calculate high-depth regions
			
 
				+                    let bools: Vec<bool> = mrd
			
 
				+                        .depths
			
 
				+                        .iter()
			
 
				+                        .zip(diag.depths.iter())
			
 
				+                        .map(|(&m, &d)| {
			
 
				+                            m >= config.min_high_quality_depth && d >= config.min_high_quality_depth
			
 
				+                        })
			
 
				+                        .collect();
			
 
				+
			
 
				+                    Ok(ranges_from_consecutive_true(&bools, mrd.start, &mrd.contig))
			
 
				                 })
			
 
				-                .flatten()
			
 
				-                .collect();
			
 
				-            results.insert(contig, ranges);
			
 
				-        }
			
 
				-    });
			
 
				+                .collect::<anyhow::Result<Vec<_>>>()?;
			
 
				+
			
 
				+            // Flatten nested ranges and return contig's ranges
			
 
				+            Ok(ranges.into_iter().flatten().collect::<Vec<GenomeRange>>())
			
 
				+        })
			
 
				+        .collect::<anyhow::Result<Vec<_>>>()?;
			
 
				 
			
 
				-    Ok(results)
			
 
				+    // Flatten the results from all contigs into a single vector
			
 
				+    Ok(results.into_iter().flatten().collect())
			
 
				 }
			
 
				 
			
 
				 fn ranges_from_consecutive_true(vec: &[bool], start: u32, contig: &str) -> Vec<GenomeRange> {