Thomas 8 mēneši atpakaļ
vecāks
revīzija
c69dcf818c

+ 10 - 6
src/annotation/mod.rs

@@ -79,6 +79,8 @@ pub enum Annotation {
     Panel(String),
 
     CpG,
+
+    LowMAPQ,
 }
 
 /// Denotes the biological sample type associated with a variant call.
@@ -203,15 +205,16 @@ impl fmt::Display for Annotation {
             Annotation::VEP(_) => "VEP".into(),
             Annotation::CpG => "CpG".into(),
             Annotation::TriNucleotides(bases) => format!(
-                "Trinucleotides({})",
-                bases.iter().map(|b| b.to_string()).collect::<String>(),
-            ),
+                        "Trinucleotides({})",
+                        bases.iter().map(|b| b.to_string()).collect::<String>(),
+                    ),
             Annotation::ReplicationTiming(rt) => match rt {
-                ReplicationClass::Early => "ReplicationEarly".into(),
-                ReplicationClass::Late => "ReplicationLate".into(),
-            },
+                        ReplicationClass::Early => "ReplicationEarly".into(),
+                        ReplicationClass::Late => "ReplicationLate".into(),
+                    },
             Annotation::HighDepth => "HighDepth".into(),
             Annotation::Panel(name) => format!("Panel_{name}"),
+            Annotation::LowMAPQ => "LowMAPQ".to_string(),
         };
         write!(f, "{}", s)
     }
@@ -356,6 +359,7 @@ impl Annotations {
                     | Annotation::HighDepth
                     | Annotation::CpG
                     | Annotation::Panel(_)
+                    | Annotation::LowMAPQ
                     | Annotation::HighConstitAlt => categorical.push(ann.to_string()),
                     Annotation::Callers(caller, sample) => {
                         categorical.push(format!("{caller} {sample}"))

+ 2 - 0
src/config.rs

@@ -77,6 +77,7 @@ pub struct Config {
     pub late_bed: String,
     pub panels: Vec<(String, String)>,
     pub cpg_bed: String,
+    pub max_depth_low_quality: u32,
 }
 
 // Here comes names that can't be changed from output of tools
@@ -205,6 +206,7 @@ impl Default for Config {
             entropy_seq_len: 10,
             min_shannon_entropy: 1.0,
 
+            max_depth_low_quality: 20,
             min_high_quality_depth: 14, 
             min_n_callers: 1,
             early_bed: "/data/ref/hs1/replication_early_25_hs1.bed".to_string(),

+ 2 - 0
src/io/writers.rs

@@ -5,6 +5,7 @@ use std::{
 
 use anyhow::Context;
 use bgzip::{BGZFWriter, Compression};
+use log::info;
 
 pub fn get_gz_writer(path: &str, force: bool) -> anyhow::Result<BGZFWriter<File>> {
     if !path.ends_with(".gz") {
@@ -22,5 +23,6 @@ pub fn get_gz_writer(path: &str, force: bool) -> anyhow::Result<BGZFWriter<File>
         .open(path)
         .with_context(|| anyhow::anyhow!("failed to open the file: {path}"))?;
 
+    info!("Writing into {path}");
     Ok(BGZFWriter::new(file, Compression::default()))
 }

+ 22 - 22
src/lib.rs

@@ -177,7 +177,7 @@ mod tests {
 
     use self::{collection::pod5::{FlowCellCase, Pod5Collection}, commands::dorado, config::Config};
     use super::*;
-    use crate::{annotation::Annotation, callers::{clairs::ClairS, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVSolo}, savana::SavanaCN}, collection::{bam::{self, nt_pileup_new}, flowcells::{scan_archive, FlowCells}, run_tasks, vcf::VcfCollection, Collections, CollectionsConfig, ShouldRun}, commands::dorado::Dorado, helpers::find_files, io::{dict::read_dict, gff::features_ranges}, pipes::somatic::const_stats, positions::{merge_overlapping_genome_ranges, range_intersection_par, sort_ranges}, scan::scan::somatic_scan, variant::{variant::{AlterationCategory, BNDDesc, BNDGraph, ToBNDGraph}, variants_stats::{self, VariantsStats}}};
+    use crate::{annotation::Annotation, callers::{clairs::ClairS, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVSolo}, savana::SavanaCN}, collection::{bam::{self, nt_pileup_new}, flowcells::{scan_archive, FlowCells}, run_tasks, vcf::VcfCollection, Collections, CollectionsConfig, ShouldRun}, commands::dorado::Dorado, helpers::find_files, io::{dict::read_dict, gff::features_ranges}, pipes::somatic::const_stats, positions::{merge_overlapping_genome_ranges, range_intersection_par, sort_ranges}, scan::scan::somatic_scan, variant::{variant::{AlterationCategory, BNDDesc, BNDGraph, ToBNDGraph}, variants_stats::{self, somatic_depth_quality_ranges, VariantsStats}}};
 
     // export RUST_LOG="debug"
     fn init() {
@@ -869,7 +869,7 @@ mod tests {
     #[test]
     fn somatic_cases() -> anyhow::Result<()> {
         init();
-        let id = "ADJAGBA";
+        let id = "CHENU";
         let config = Config { somatic_pipe_force: true, ..Default::default() };
         match SomaticPipe::initialize(id, config)?.run() {
             Ok(_) => (),
@@ -934,7 +934,11 @@ mod tests {
             let path = format!("{}/{id}/diag/{id}_somatic_variants.bit", config.result_dir);
             match variant_collection::Variants::load_from_file(&path) {
                 Ok(mut variants) => {
-                    let res = VariantsStats::new(&mut variants, id, &config)?.save_to_json(&format!(
+                    let (mut high_depth_ranges, _) =
+                    somatic_depth_quality_ranges(&id, &config)?;
+                    high_depth_ranges.par_sort_by_key(|r| (r.contig, r.range.start));
+
+                    let res = VariantsStats::new(&mut variants, id, &config, &high_depth_ranges)?.save_to_json(&format!(
                         "{}/{id}/diag/{id}_somatic_variants_stats.json.gz", config.result_dir));
                     if res.is_err() {
                         info!("{:#?}", res);
@@ -984,8 +988,12 @@ mod tests {
     #[test]
     fn whole_scan() -> anyhow::Result<()> {
         init();
-        let id = "ADJAGBA";
-        let config = Config::default();
+        let id = "CHENU";
+        let mut config = Config::default();
+        let u = config.solo_bam(id, "mrd");
+        println!("{u}");
+
+        config.somatic_scan_force = true;
         somatic_scan(id, &config)?;
         Ok(())
     }
@@ -1013,15 +1021,15 @@ mod tests {
         // let min_3 = variants_stats::somatic_rates(&restrained, &exon_ranges, &config);
         // info!("{min_3:#?}");
 
-        let mut high_depth_ranges = variants_stats::high_depth_somatic(id, &config)?;
-        high_depth_ranges.par_sort_by_key(|r| ( r.contig, r.range.start ));
-
-        let exon_ranges_ref: Vec<&GenomeRange> = exon_ranges.iter().collect();
-        let exons_high_depth = range_intersection_par(&high_depth_ranges.iter().collect::<Vec<&GenomeRange>>(), &exon_ranges_ref);
-
-        let full = variants_stats::somatic_rates(&variants.data, &exons_high_depth, &config);
-        info!("{full:#?}");
-
+        // let mut high_depth_ranges = variants_stats::high_depth_somatic(id, &config)?;
+        // high_depth_ranges.par_sort_by_key(|r| ( r.contig, r.range.start ));
+        //
+        // let exon_ranges_ref: Vec<&GenomeRange> = exon_ranges.iter().collect();
+        // let exons_high_depth = range_intersection_par(&high_depth_ranges.iter().collect::<Vec<&GenomeRange>>(), &exon_ranges_ref);
+        //
+        // let full = variants_stats::somatic_rates(&variants.data, &exons_high_depth, &config);
+        // info!("{full:#?}");
+        //
 
 
 
@@ -1059,14 +1067,6 @@ mod tests {
         Ok(())
     }
 
-    #[test]
-    fn quality_ranges() -> anyhow::Result<()> {
-        init();
-        let r = variants_stats::high_depth_somatic("ADJAGBA", &Config::default())?.iter().map(|range| range.length()).sum::<u32>();
-        info!("{r} high depth bases");
-        Ok(())
-    }
-
     fn gr(contig: u8, start: u32, end: u32) -> GenomeRange {
         GenomeRange {
             contig,

+ 28 - 7
src/pipes/somatic.rs

@@ -3,11 +3,15 @@ use crate::{
     collection::ShouldRun,
     create_should_run_normal_tumoral, init_solo_callers_normal_tumoral,
     scan::scan::SomaticScan,
-    variant::variant::{run_if_required, ShouldRunBox},
+    variant::{
+        variant::{run_if_required, ShouldRunBox},
+        variants_stats::somatic_depth_quality_ranges,
+    },
 };
 use anyhow::Context;
 use itertools::Itertools;
 use log::info;
+use rayon::slice::ParallelSliceMut;
 use std::{
     collections::HashMap,
     fs::{self, File},
@@ -217,8 +221,9 @@ impl Run for SomaticPipe {
         to_run_if_req.extend(create_should_run_normal_tumoral!(&id, &config, DeepVariant));
 
         info!("Running prerequired pipe components.");
-        run_if_required(&mut to_run_if_req)
-            .with_context(|| format!("Failed to run a prerequired component of somatic pipe for {id}."))?;
+        run_if_required(&mut to_run_if_req).with_context(|| {
+            format!("Failed to run a prerequired component of somatic pipe for {id}.")
+        })?;
 
         // Initialize variant callers
         let mut callers = init_somatic_callers!(
@@ -285,6 +290,23 @@ impl Run for SomaticPipe {
                 "{stats_dir}/{id}_annotations_02_post_germline.json"
             ))?;
 
+        // MASK mapq
+        let (mut high_depth_ranges, mut low_quality_ranges) =
+            somatic_depth_quality_ranges(&id, &config)?;
+        high_depth_ranges.par_sort_by_key(|r| (r.contig, r.range.start));
+        low_quality_ranges.par_sort_by_key(|r| (r.contig, r.range.start));
+
+        info!("ranges: {}", low_quality_ranges.len());
+
+        variants_collections.iter_mut().for_each(|e | {
+            let _ = e.annotate_with_ranges(&low_quality_ranges, &annotations, Annotation::LowMAPQ);
+        });
+
+        let n_masked_lowqual = annotations.retain_variants(&mut variants_collections, |anns| {
+            !anns.contains(&Annotation::LowMAPQ)
+        });
+        info!("N low mapq filtered: {}", n_masked_lowqual);
+
         // Annotate with depth and number of alternate reads from constitutional BAM
         info!("Reading Constit BAM file for depth and pileup annotation.");
         variants_collections.iter().try_for_each(|c| {
@@ -453,7 +475,7 @@ impl Run for SomaticPipe {
         let vep_stats = annotations.vep_stats()?;
         vep_stats.save_to_json(&format!("{stats_dir}/{id}_annotations_10_vep.json"))?;
 
-        VariantsStats::new(&mut variants, &id, &config)?
+        VariantsStats::new(&mut variants, &id, &config, &high_depth_ranges)?
             .save_to_json(&format!("{stats_dir}/{id}_variants_stats_final.json.gz"))?;
 
         info!("Final unique variants: {}", variants.data.len());
@@ -461,7 +483,6 @@ impl Run for SomaticPipe {
         variants.save_to_file(&result_bit)?;
         variants.write_vcf(&result_vcf, &config.dict_file, config.somatic_pipe_force)?;
 
-
         Ok(())
     }
 }
@@ -479,8 +500,8 @@ pub fn const_stats(id: String, config: Config) -> anyhow::Result<()> {
     variants.merge(clairs_germline, &annotations);
     info!("-- {}", variants.data.len());
 
-    let u = VariantsStats::new(&mut variants, &id, &config)?;
-    info!("++ {}", u.n);
+    // let u = VariantsStats::new(&mut variants, &id, &config)?;
+    // info!("++ {}", u.n);
 
     Ok(())
 }

+ 6 - 8
src/scan/bin.rs

@@ -26,7 +26,7 @@ pub struct Bin {
     /// Average read length within this bin
     // pub reads_mean_len: u32,
     /// Number of reads filtered due to low mapping quality
-    pub n_low_mapq: u32,
+    pub low_qualities: Vec<u32>,
     /// Vec of depths
     pub depths: Vec<u32>,
 }
@@ -66,8 +66,8 @@ impl Bin {
             .with_context(|| format!("Failed to fetch region {}:{}-{}", contig, start, end))?;
 
         let mut reads_store: HashMap<Vec<u8>, Record> = HashMap::new();
-        let mut n_low_mapq = 0;
         let mut depths = vec![0u32; length as usize]; // Optional pseudo-depth
+        let mut low_qualities = vec![0u32; length as usize]; // Optional pseudo-depth
 
         for record_result in bam_reader.rc_records() {
             let rc_record = match record_result {
@@ -83,11 +83,6 @@ impl Bin {
 
             let record = rc_record.as_ref();
 
-            if record.mapq() < min_mapq {
-                n_low_mapq += 1;
-                continue;
-            }
-
             if let Some((read_start, read_end)) = read_range(record) {
                 if read_end < start || read_start > end {
                     continue; // Not overlapping this bin
@@ -103,6 +98,9 @@ impl Bin {
                     let i = (pos - start) as usize;
                     if i < depths.len() {
                         depths[i] += 1;
+                        if record.mapq() < min_mapq {
+                            low_qualities[i] += 1;
+                        }
                     }
                 }
             }
@@ -113,7 +111,7 @@ impl Bin {
             start,
             end,
             reads_store,
-            n_low_mapq,
+            low_qualities,
             depths,
         })
     }

+ 25 - 8
src/scan/scan.rs

@@ -41,6 +41,7 @@ pub struct BinCount {
     /// Optional vector of outlier types for this bin.
     pub outlier: Option<Vec<BinOutlier>>,
     pub depths: Vec<u32>,
+    pub low_qualities: Vec<u32>,
 }
 
 impl From<&Bin> for BinCount {
@@ -67,6 +68,7 @@ impl From<&Bin> for BinCount {
             ratio_end: n_end as f64 / n_reads_float,
             outlier: None,
             depths: bin.depths.clone(),
+            low_qualities: bin.low_qualities.clone(),
         }
     }
 }
@@ -78,7 +80,7 @@ impl BinCount {
     /// A `String` representing the `BinCount` as a TSV row.
     pub fn to_tsv_row(&self) -> String {
         format!(
-            "{}\t{}\t{}\t{}\t{:.6}\t{:.6}\t{:.6}\t{:.6}\t{}\t{}",
+            "{}\t{}\t{}\t{}\t{:.6}\t{:.6}\t{:.6}\t{:.6}\t{}\t{}\t{}",
             self.contig,
             self.start,
             self.end,
@@ -96,6 +98,11 @@ impl BinCount {
                     .join(", "))
                 .unwrap_or_default(),
             self.depths
+                .iter()
+                .map(|e| e.to_string())
+                .collect::<Vec<String>>()
+                .join(","),
+            self.low_qualities
                 .iter()
                 .map(|e| e.to_string())
                 .collect::<Vec<String>>()
@@ -119,7 +126,7 @@ impl BinCount {
     /// - The outlier field is not in the expected format.
     pub fn from_tsv_row(row: &str) -> anyhow::Result<Self> {
         let fields: Vec<&str> = row.split('\t').collect();
-        if fields.len() != 10 {
+        if fields.len() != 11 {
             anyhow::bail!("Invalid number of fields in TSV row");
         }
 
@@ -152,6 +159,19 @@ impl BinCount {
                 .collect::<anyhow::Result<Vec<u32>>>()?
         };
 
+        let low_qualities = if fields[10].trim().is_empty() {
+            Vec::new()
+        } else {
+            fields[10]
+                .split(',')
+                .map(|s| {
+                    s.trim()
+                        .parse::<u32>()
+                        .with_context(|| format!("Failed to parse '{}' as u32", s))
+                })
+                .collect::<anyhow::Result<Vec<u32>>>()?
+        };
+
         Ok(BinCount {
             contig: fields[0].to_string(),
             start: fields[1].parse()?,
@@ -163,6 +183,7 @@ impl BinCount {
             ratio_end: parse_f64_allow_nan(fields[7])?,
             outlier,
             depths,
+            low_qualities,
         })
     }
 }
@@ -469,13 +490,9 @@ pub fn fill_outliers(bin_counts: &mut [BinCount]) {
 /// ```
 pub fn somatic_scan(id: &str, config: &Config) -> anyhow::Result<()> {
     info!("Starting scan for {id} normal.");
-    par_whole_scan(&config.normal_dir_count(id), &config.normal_bam(id), config)?;
+    par_whole_scan(id, &config.normal_name, config)?;
     info!("Starting scan for {id} tumoral.");
-    par_whole_scan(
-        &config.tumoral_dir_count(id),
-        &config.tumoral_bam(id),
-        config,
-    )
+    par_whole_scan(id, &config.tumoral_name, config)
 }
 
 /// A pipeline runner for executing SomaticScan on matched tumor and normal samples.

+ 58 - 31
src/variant/variant_collection.rs

@@ -207,6 +207,29 @@ impl VariantCollection {
         )
     }
 
+    pub fn annotate_with_ranges(
+        &mut self,
+        ranges: &[GenomeRange],
+        annotations: &Annotations,
+        annotation: Annotation,
+    ) -> (u32, usize) {
+        let ranges_ref: Vec<&GenomeRange> = ranges.iter().collect();
+        let positions: Vec<&GenomePosition> = self.variants.iter().map(|v| &v.position).collect();
+
+        let overlaps = overlaps_par(&positions, &ranges_ref);
+
+        for &idx in &overlaps {
+            let variant = &mut self.variants[idx];
+
+            let key = variant.hash();
+            let mut anns = annotations.store.entry(key).or_default();
+            anns.push(annotation.clone());
+        }
+
+        let total_bp: u32 = ranges.iter().map(|r| r.length()).sum();
+        (total_bp, overlaps.len())
+    }
+
     fn chunk_size(&self, max_threads: u8) -> usize {
         let total_items = self.variants.len();
         let min_chunk_size = 1000;
@@ -362,7 +385,7 @@ impl VariantCollection {
                             if seq == alt_seq {
                                 n_alt = n;
                             } else if is_ins && seq.len() > 1 {
-                                // insertion sequence DO NOT have to match 
+                                // insertion sequence DO NOT have to match
                                 // rule assumed to reduce FP from Solo callers
                                 // with this rule solo callers won't be able to
                                 // call the variant of an insertion.
@@ -598,44 +621,44 @@ impl Variant {
 
     /// Merge all `Infos` from the list of `VcfVariant`s.
     pub fn merge_infos(&self) -> Infos {
-    use std::collections::HashSet;
-    use log::warn;
+        use log::warn;
+        use std::collections::HashSet;
 
-    let mut seen_keys = HashSet::new();
-    let mut merged = Vec::new();
-    let mut end_info: Option<(u32, &VcfVariant)> = None;
+        let mut seen_keys = HashSet::new();
+        let mut merged = Vec::new();
+        let mut end_info: Option<(u32, &VcfVariant)> = None;
 
-    for vcf in self.vcf_variants.iter() {
-        for info in &vcf.infos.0 {
-            let key = info.key().to_string();
+        for vcf in self.vcf_variants.iter() {
+            for info in &vcf.infos.0 {
+                let key = info.key().to_string();
 
-            if key == "END" {
-                if let Info::END(e) = info {
-                    end_info = Some((*e, vcf));
+                if key == "END" {
+                    if let Info::END(e) = info {
+                        end_info = Some((*e, vcf));
+                    }
+                } else if seen_keys.insert(key) {
+                    merged.push(info.clone());
                 }
-            } else if seen_keys.insert(key) {
-                merged.push(info.clone());
             }
         }
-    }
 
-    if let Some((end_pos, vcf)) = end_info {
-        let pos = vcf.position.position + 1;
-        if end_pos >= pos {
-            merged.push(Info::END(end_pos));
-        } else {
-            warn!(
-                "Invalid INFO/END={} < POS={} at {}:{} – skipping END field.",
-                end_pos,
-                pos,
-                vcf.position.contig(),
-                pos
-            );
+        if let Some((end_pos, vcf)) = end_info {
+            let pos = vcf.position.position + 1;
+            if end_pos >= pos {
+                merged.push(Info::END(end_pos));
+            } else {
+                warn!(
+                    "Invalid INFO/END={} < POS={} at {}:{} – skipping END field.",
+                    end_pos,
+                    pos,
+                    vcf.position.contig(),
+                    pos
+                );
+            }
         }
-    }
 
-    Infos(merged)
-}
+        Infos(merged)
+    }
 
     pub fn merge_formats(&self) -> Formats {
         let mut seen_keys = HashSet::new();
@@ -667,7 +690,7 @@ impl Variant {
         let merged_formats = self.merge_formats();
 
         let (format_keys, format_values): (String, String) = merged_formats.into();
-        
+
         writeln!(
             writer,
             "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}",
@@ -947,6 +970,10 @@ impl Variants {
         (total_bp, n_restricted_call)
     }
 
+    pub fn remove_with_annotation(&mut self, annotation: &Annotation) {
+        self.data.retain(|v| !v.annotations.contains(annotation));
+    }
+
     /// Saves the Variants collection to a BGZF-compressed JSON file.
     ///
     /// This method serializes the Variants struct to JSON format and writes it to a file

+ 93 - 113
src/variant/variants_stats.rs

@@ -62,7 +62,6 @@ pub struct VariantsStats {
 
     #[serde(serialize_with = "serialize_dashmap_sort")]
     pub deletions_len: DashMap<u32, u32>,
-
 }
 
 pub fn serialize_dashmap_sort<S, T>(
@@ -82,7 +81,7 @@ where
 }
 
 impl VariantsStats {
-    pub fn new(variants: &mut Variants, id: &str, config: &Config) -> anyhow::Result<Self> {
+    pub fn new(variants: &mut Variants, id: &str, config: &Config, high_depth_ranges: &[GenomeRange]) -> anyhow::Result<Self> {
         let n = variants.data.len() as u32;
         let alteration_categories: DashMap<String, u32> = DashMap::new();
         let vep_impact: DashMap<String, u32> = DashMap::new();
@@ -173,7 +172,6 @@ impl VariantsStats {
             if let Some(len) = v.deletion_length() {
                 *deletions_len.entry(len).or_default() += 1;
             }
-
         });
 
         let mut n_gnomad = 0;
@@ -191,8 +189,8 @@ impl VariantsStats {
 
         let all_somatic_rates = somatic_rates(&variants.data, &exon_ranges, config)?;
 
-        let mut high_depth_ranges = high_depth_somatic(id, config)?;
-        high_depth_ranges.par_sort_by_key(|r| (r.contig, r.range.start));
+        
+
         let exon_ranges_ref: Vec<&GenomeRange> = exon_ranges.iter().collect();
         let exons_high_depth = range_intersection_par(
             &high_depth_ranges.iter().collect::<Vec<&GenomeRange>>(),
@@ -452,136 +450,118 @@ pub fn somatic_rates(
     })
 }
 
-/// Computes high-depth somatic regions across all chromosomes for a given sample.
+/// Computes high‑depth and low‑quality genomic regions for a given sample.
 ///
-/// This function reads count files (compressed TSVs) for both normal and tumoral samples
-/// for each contig (`chr1` to `chr22`, `chrX`, `chrY`, and `chrM`). It identifies genomic regions
-/// where both normal and tumoral depths exceed a configured quality threshold, and extracts
-/// consecutive high-quality bins as genomic ranges.
-///
-/// The function performs the computation in parallel across contigs for better performance,
-/// and includes contextual error handling to help trace issues related to I/O or data parsing.
+/// For each chromosome (`chr1`…`chr22`, `chrX`, `chrY`, `chrM`), this function
+/// reads paired “normal” (MRD) and “tumoral” (Diag) count files, then:
+/// 1. Marks positions where both depths ≥ `config.min_high_quality_depth` as high‑depth.
+/// 2. Marks positions where both depths < `config.max_depth_low_quality` as low‑quality.
+/// Consecutive runs of true values are merged into `GenomeRange`s.
 ///
 /// # Arguments
 ///
-/// * `id` - The identifier of the sample.
-/// * `config` - A reference to a `Config` struct containing paths and thresholds.
+/// * `id` — Sample identifier (used to locate per‑contig files).
+/// * `config` — Analysis settings.
 ///
 /// # Returns
 ///
-/// A `Result` containing a vector of `GenomeRange` objects representing high-depth somatic regions,
-/// or an error if any file reading, parsing, or logical check fails.
+/// On success, returns `Ok((high_depth_ranges, low_quality_ranges))`, where each is a
+/// flattened `Vec<GenomeRange>` across all contigs. Returns an error if any I/O,
+/// parsing, or contig/position mismatch occurs.
 ///
 /// # Errors
 ///
-/// Returns an error if:
-/// - Any of the input files (normal or tumoral) can't be opened
-/// - Any line in the files fails to read or parse
-/// - The `BinCount` objects from corresponding lines don't match in position
-///
-/// # Parallelism
-///
-/// This function leverages `rayon` to parallelize processing of contigs.
-///
-/// # Example
-///
-/// ```rust
-/// let config = Config::load_from("config_path.toml")?;
-/// let regions = high_depth_somatic("sample_001", &config)?;
-/// for region in regions {
-///     println!("{:?}", region);
-/// }
-/// ```
-///
-/// # Requirements
-///
-/// - The file names must follow the pattern `{contig}_count.tsv.gz`
-/// - The structure of lines must match what `BinCount::from_tsv_row` expects
-pub fn high_depth_somatic(id: &str, config: &Config) -> anyhow::Result<Vec<GenomeRange>> {
-    // Generate contigs from chr1 to chr22, then chrX, Y, M
+/// * File open/read failures (with path & line number in context).
+/// * TSV parsing errors (with line content in context).
+/// * Contig or start‑position mismatches between paired files.
+pub fn somatic_depth_quality_ranges(
+    id: &str,
+    config: &Config,
+) -> anyhow::Result<(Vec<GenomeRange>, Vec<GenomeRange>)> {
+    // List of contigs: chr1..chr22, then X, Y, M
     let contigs = (1..=22)
         .map(|i| format!("chr{i}"))
-        .chain(["chrX", "chrY", "chrM"].iter().map(|s| s.to_string()))
+        .chain(["chrX", "chrY", "chrM"].iter().map(ToString::to_string))
         .collect::<Vec<_>>();
 
-    let config = Arc::new(config); // Wrap the config in an Arc for shared ownership
+    let cfg = Arc::new(config);
 
-    // Process contigs in parallel with proper error propagation
-    let results: Vec<Vec<GenomeRange>> = contigs
+    // For each contig, produce (high_ranges, lowq_ranges)
+    let per_contig = contigs
         .into_par_iter()
         .map(|contig| {
-            let config = Arc::clone(&config);
-            // Build file paths
-            let mrd_path = format!("{}/{contig}_count.tsv.gz", config.normal_dir_count(id));
-            let diag_path = format!("{}/{contig}_count.tsv.gz", config.tumoral_dir_count(id));
-
-            // Open readers with proper error context
-            let mrd_reader = get_gz_reader(&mrd_path)
-                .with_context(|| format!("Failed to open MRD file: {mrd_path}"))?;
-            let diag_reader = get_gz_reader(&diag_path)
-                .with_context(|| format!("Failed to open Diag file: {diag_path}"))?;
-
-            // Process lines in pairs
-            let ranges = mrd_reader
-                .lines()
-                .zip(diag_reader.lines())
-                .enumerate()
-                .map(|(line_num, (mrd_line, diag_line))| {
-                    let line_num = line_num + 1; // Convert to 1-based indexing
-
-                    // Read lines with context
-                    let mrd_line = mrd_line.with_context(|| {
-                        format!("MRD file {mrd_path} line {line_num} read error")
-                    })?;
-                    let diag_line = diag_line.with_context(|| {
-                        format!("Diag file {diag_path} line {line_num} read error")
-                    })?;
-
-                    // Parse both lines
-                    let mrd = BinCount::from_tsv_row(&mrd_line).with_context(|| {
-                        format!("Failed to parse MRD line {line_num}: {mrd_line}")
-                    })?;
-                    let diag = BinCount::from_tsv_row(&diag_line).with_context(|| {
-                        format!("Failed to parse Diag line {line_num}: {diag_line}")
-                    })?;
-
-                    // Validate matching positions
-                    if mrd.contig != diag.contig {
-                        anyhow::bail!(
-                            "Contig mismatch at line {line_num}: {} vs {}",
-                            mrd.contig,
-                            diag.contig
-                        );
-                    }
-                    if mrd.start != diag.start {
-                        anyhow::bail!(
-                            "Start position mismatch at line {line_num}: {} vs {}",
-                            mrd.start,
-                            diag.start
-                        );
-                    }
-
-                    // Calculate high-depth regions
-                    let bools: Vec<bool> = mrd
-                        .depths
-                        .iter()
-                        .zip(diag.depths.iter())
-                        .map(|(&m, &d)| {
-                            m >= config.min_high_quality_depth && d >= config.min_high_quality_depth
-                        })
-                        .collect();
+            let cfg = Arc::clone(&cfg);
+            let normal_path = format!("{}/{}_count.tsv.gz", cfg.normal_dir_count(id), contig);
+            let tumor_path = format!("{}/{}_count.tsv.gz", cfg.tumoral_dir_count(id), contig);
+
+            let normal_rdr = get_gz_reader(&normal_path)
+                .with_context(|| format!("Failed to open normal file: {}", normal_path))?;
+            let tumor_rdr = get_gz_reader(&tumor_path)
+                .with_context(|| format!("Failed to open tumor file: {}", tumor_path))?;
+
+            // Collect per-line high & low masks
+            let mut high_runs = Vec::new();
+            let mut low_runs = Vec::new();
+
+            for (idx, (n_line, t_line)) in normal_rdr.lines().zip(tumor_rdr.lines()).enumerate() {
+                let line_no = idx + 1;
+                let n_line = n_line.with_context(|| format!("{} line {}", normal_path, line_no))?;
+                let t_line = t_line.with_context(|| format!("{} line {}", tumor_path, line_no))?;
+
+                let n = BinCount::from_tsv_row(&n_line)
+                    .with_context(|| format!("Parse error at {}: {}", normal_path, line_no))?;
+                let t = BinCount::from_tsv_row(&t_line)
+                    .with_context(|| format!("Parse error at {}: {}", tumor_path, line_no))?;
+
+                if n.contig != t.contig {
+                    anyhow::bail!(
+                        "Contig mismatch at line {}: {} vs {}",
+                        line_no,
+                        n.contig,
+                        t.contig
+                    );
+                }
+                if n.start != t.start {
+                    anyhow::bail!(
+                        "Position mismatch at line {}: {} vs {}",
+                        line_no,
+                        n.start,
+                        t.start
+                    );
+                }
 
-                    Ok(ranges_from_consecutive_true(&bools, mrd.start, &mrd.contig))
-                })
-                .collect::<anyhow::Result<Vec<_>>>()?;
+                let high_mask: Vec<bool> = n
+                    .depths
+                    .iter()
+                    .zip(&t.depths)
+                    .map(|(&nd, &td)| {
+                        nd >= cfg.min_high_quality_depth && td >= cfg.min_high_quality_depth
+                    })
+                    .collect();
+
+                let lowq_mask: Vec<bool> = n
+                    .low_qualities
+                    .iter()
+                    .zip(&t.low_qualities)
+                    .map(|(&nq, &tq)| {
+                        nq < cfg.max_depth_low_quality && tq < cfg.max_depth_low_quality
+                    })
+                    .collect();
+
+                high_runs.extend(ranges_from_consecutive_true(&high_mask, n.start, &n.contig));
+                low_runs.extend(ranges_from_consecutive_true(&lowq_mask, n.start, &n.contig));
+            }
 
-            // Flatten nested ranges and return contig's ranges
-            Ok(ranges.into_iter().flatten().collect::<Vec<GenomeRange>>())
+            Ok((high_runs, low_runs))
         })
         .collect::<anyhow::Result<Vec<_>>>()?;
 
-    // Flatten the results from all contigs into a single vector
-    Ok(results.into_iter().flatten().collect())
+    // Flatten across all contigs
+    let (high_all, low_all): (Vec<_>, Vec<_>) = per_contig.into_iter().unzip();
+    Ok((
+        high_all.into_iter().flatten().collect(),
+        low_all.into_iter().flatten().collect(),
+    ))
 }
 
 /// Converts a slice of booleans into a list of `GenomeRange`s representing