Ver código fonte

INSERTION depth wrong

Thomas 6 meses atrás
pai
commit
708d1fefb3
6 arquivos alterados com 484 adições e 344 exclusões
  1. 188 195
      Cargo.lock
  2. 31 0
      src/annotation/mod.rs
  3. 61 13
      src/lib.rs
  4. 16 2
      src/pipes/somatic.rs
  5. 54 74
      src/variant/variant_collection.rs
  6. 134 60
      src/variant/variants_stats.rs

Diferenças do arquivo suprimidas por serem muito extensas
+ 188 - 195
Cargo.lock


+ 31 - 0
src/annotation/mod.rs

@@ -493,6 +493,37 @@ impl Annotations {
             .collect()
     }
 
+    /// Retain only the variants that pass a given filter across both the
+    /// internal store and the provided collections.
+    ///
+    /// # Arguments
+    ///
+    /// * `variants` – Mutable reference to a list of `VariantCollection`s,
+    ///   each of which contains a set of called variants.
+    /// * `filter` – Predicate applied to each entry of `self.store`
+    ///   (value = `Vec<Annotation>`). If it returns `true`, the entry and
+    ///   all variants with the same key are kept; otherwise they are removed.
+    ///
+    /// # Behavior
+    ///
+    /// 1. `self.store` is filtered in-place by applying `filter`.
+    /// 2. The keys of surviving entries are collected into a `HashSet`.
+    /// 3. Each `VariantCollection` in `variants` is pruned in parallel so
+    ///    that only variants whose `hash()` is in the key set remain.
+    /// 4. Empty `VariantCollection`s are discarded.
+    /// 5. Progress information (number of variants removed per caller) is
+    ///    logged via `info!`.
+    ///
+    /// # Returns
+    ///
+    /// The total number of variants removed across all collections.
+    ///
+    /// # Notes
+    ///
+    /// * Keys must be `Copy` or `Clone` to be inserted into the temporary
+    ///   `HashSet`.
+    /// * Logging inside the parallel loop may interleave between threads,
+    ///   but totals remain correct.
     pub fn retain_variants(
         &mut self,
         variants: &mut Vec<VariantCollection>,

+ 61 - 13
src/lib.rs

@@ -701,7 +701,7 @@ let variant: VcfVariant = row.parse()?;
         let variant: VcfVariant = row.parse()?;
         let var_string = variant.into_vcf_row();
         assert_eq!(row, &var_string);
-        variant_col.variants.push(row.parse()?);
+        // variant_col.variants.push(row.parse()?);
 
         assert_eq!(AlterationCategory::INS, variant.alteration_category());
 
@@ -712,7 +712,7 @@ let variant: VcfVariant = row.parse()?;
         assert_eq!(row, &var_string);
         assert_eq!(AlterationCategory::INS, variant.alteration_category());
 
-        variant_col.variants.push(row.parse()?);
+        // variant_col.variants.push(row.parse()?);
 
         // Nanomonsv dont parse last format remove: \t22:0 and nt putted in uppercase
         let row = "chr8\t87940084\td_333\tT\t<INS>\t.\tPASS\tEND=87940201;SVTYPE=INS;SVINSLEN=172;SVINSSEQ=TA\tTR:VR\t9:5";
@@ -721,14 +721,24 @@ let variant: VcfVariant = row.parse()?;
         assert_eq!(row, &var_string);
         assert_eq!(AlterationCategory::INS, variant.alteration_category());
 
-        variant_col.variants.push(row.parse()?);
+        // variant_col.variants.push(row.parse()?);
 
-        
+        //
+        let row = "chr5\t36122736\t.\tT\tTGCTCCG\t3.9\tPASS\t.\tGT:GQ:DP:AD:VAF:PL\t0/1:4:28:11,16:0.57143:1,0,37";
+        let variant: VcfVariant = row.parse()?;
+        let var_string = variant.into_vcf_row();
+        assert_eq!(row, &var_string);
+        assert_eq!(AlterationCategory::INS, variant.alteration_category());
+
+        variant_col.variants = Vec::new();
+        variant_col.variants.push(row.parse()?);
         variant_col.annotate_with_constit_bam(
             &annotations, 
-            "/data/longreads_basic_pipe/ACHITE/mrd/ACHITE_mrd_hs1.bam", 
+            "/data/longreads_basic_pipe/PASSARD/mrd/PASSARD_mrd_hs1.bam", 
             1
         )?;
+        println!("{variant_col:?}");
+        println!("{annotations:?}");
 
         Ok(())
     }
@@ -1030,13 +1040,15 @@ let variant: VcfVariant = row.parse()?;
     #[test]
     fn ins_at() -> anyhow::Result<()> {
         init();
-        let id = "ADJAGBA";
+        let id = "PASSARD";
         let c = Config::default();
-        let chr = "chr1";
-        let position = 52232; // 1-based like in vcf
+        let chr = "chr5";
+        let position = 36122736; // 1-based like in vcf
         let mut bam = rust_htslib::bam::IndexedReader::from_path(c.solo_bam(id, "mrd"))?;
         // let p = ins_pileup(&mut bam, chr, position - 1, true)?.iter().map(|e| String::from_utf8(vec![*e]).unwrap()).collect::<Vec<_>>();
         let counts = counts_ins_at(&mut bam, chr, position -1)?;
+        
+        println!("{counts:?}");
         for (key, value) in &counts {
             println!("{}: {}", key, value);
         }
@@ -1044,6 +1056,42 @@ let variant: VcfVariant = row.parse()?;
         Ok(())
     }
 
+    // #[test]
+    // fn del_at() -> anyhow::Result<()> {
+    //     let id = "PASSARD";
+    //             let c = Config::default();
+    //
+    //     let mut bam = rust_htslib::bam::IndexedReader::from_path(c.solo_bam(id, "mrd"))?;
+    //
+    //     let pileup_start = crate::collection::bam::nt_pileup(
+    //                                     &mut bam,
+    //                                     "chr5",
+    //                                     36122735,
+    //                                     false,
+    //                                 )?;
+    //
+    //     let uu = crate::collection::bam::nt_pileup_new(
+    //                                     &mut bam,
+    //                                     "chr5",
+    //                                     36122735,
+    //                                     false,
+    //                                 )?;
+    //
+    //                                 let pileup_end = crate::collection::bam::nt_pileup_new(
+    //                                     &mut bam,
+    //                                     &var.position.contig(),
+    //                                     del_repr.end.saturating_sub(1),
+    //                                     false,
+    //                                 )?;
+    //
+    //     println!("{pileup_start:?}");
+    //     println!("{uu:?}");
+    //     let depth = uu.len().max(pileup_end.len());
+    //
+    //     Ok(())
+    //
+    // }
+
     #[test]
     fn vep_line() -> anyhow::Result<()> {
         init();
@@ -1097,14 +1145,14 @@ let variant: VcfVariant = row.parse()?;
         let bams = collections.bam.by_id_completed(15.0, 10.0);
         let n = bams.len();
         let mut config = Config::default();
-        // config.somatic_scan_force = true;
+        config.somatic_scan_force = true;
         warn!("{n} cases");
         for (i, bam) in bams.iter().enumerate() {
             let id = &bam.id;
             warn!("{i}/{n} {id}");
-            if id == "BANGA" {
-                continue;
-            }
+            // if id == "BANGA" {
+            //     continue;
+            // }
             if id == "ARM" {
                 continue;
             }
@@ -1121,7 +1169,7 @@ let variant: VcfVariant = row.parse()?;
     #[test]
     fn somatic_cases() -> anyhow::Result<()> {
         init();
-        let id = "SABER";
+        let id = "PASSARD";
         let config = Config { somatic_pipe_force: true, ..Default::default() };
         match SomaticPipe::initialize(id, config)?.run() {
             Ok(_) => (),

+ 16 - 2
src/pipes/somatic.rs

@@ -351,6 +351,7 @@ impl Run for SomaticPipe {
                 "{stats_dir}/{id}_annotations_02_post_germline.json"
             ))?;
 
+
         // Remove deletions stretch
         // info!("Removing deletions stretchs:");
         // variants_collections.iter_mut().for_each(|coll| {
@@ -369,9 +370,8 @@ impl Run for SomaticPipe {
         variants_collections.iter_mut().for_each(|e| {
             let _ = e.annotate_with_ranges(&low_quality_ranges, &annotations, Annotation::LowMAPQ);
         });
-
         let n_masked_lowqual = annotations.retain_variants(&mut variants_collections, |anns| {
-            anns.contains(&Annotation::LowMAPQ)
+            !anns.contains(&Annotation::LowMAPQ)
         });
         info!("N low mapq filtered: {}", n_masked_lowqual);
 
@@ -397,6 +397,19 @@ impl Run for SomaticPipe {
             })))
             .save_to_json(&format!("{stats_dir}/{id}_annotations_03_bam.json"))?;
 
+        // variants_collections.iter().for_each(|col| {
+        //     col.variants
+        //         .iter()
+        //         .filter(|v| v.position.position == 36122735)
+        //         .for_each(|v| {
+        //             if let Some(ann) = annotations.store.get(&v.hash()) {
+        //                 println!("before const DEPTH v: {:?}\n\n{:?}", v, ann.value());
+        //             } else {
+        //                 println!("no ann but present");
+        //             }
+        //         });
+        // });
+
         // Filter based on low constitutional depth
         info!(
             "Removing variants when depth in constit bam < {}.",
@@ -479,6 +492,7 @@ impl Run for SomaticPipe {
             "{} variants filtered, with constit alt <= max contig alt ({}) and in GnomAD.",
             somatic_stats.n_high_alt_constit_gnomad, self.config.somatic_max_alt_constit
         );
+
         // TODO: These stats doesn't capture filter metrics !!!
         annotations
             .callers_stat(Some(Box::new(|v| {

+ 54 - 74
src/variant/variant_collection.rs

@@ -33,7 +33,8 @@ use crate::{
         vcf::Vcf,
     },
     helpers::{
-        app_storage_dir, detect_repetition, estimate_shannon_entropy, mean, temp_file_path, Hash128, Repeat,
+        app_storage_dir, detect_repetition, estimate_shannon_entropy, mean, temp_file_path,
+        Hash128, Repeat,
     },
     io::{fasta::sequence_at, readers::get_reader, vcf::vcf_header, writers::get_gz_writer},
     positions::{overlaps_par, GenomePosition, GenomeRange, GetGenomePosition},
@@ -320,55 +321,51 @@ impl VariantCollection {
     }
 
     pub fn remove_strech(&mut self) -> usize {
-    // 1) Concurrently collect indices of RepOne/RepTwo deletions keyed by "contig:pos"
-    let deletions_to_rm: DashMap<String, Vec<usize>> = DashMap::new();
-    self.variants
-        .par_iter()
-        .enumerate()
-        .for_each(|(i, v)| {
+        // 1) Concurrently collect indices of RepOne/RepTwo deletions keyed by "contig:pos"
+        let deletions_to_rm: DashMap<String, Vec<usize>> = DashMap::new();
+        self.variants.par_iter().enumerate().for_each(|(i, v)| {
             if let Some(del_seq) = v.deletion_seq() {
-                if matches!(detect_repetition(&del_seq), Repeat::RepOne(_, _) | Repeat::RepTwo(_, _)) {
+                if matches!(
+                    detect_repetition(&del_seq),
+                    Repeat::RepOne(_, _) | Repeat::RepTwo(_, _)
+                ) {
                     let key = format!("{}:{}", v.position.contig, v.position.position);
-                    deletions_to_rm
-                        .entry(key)
-                        .or_default()
-                        .value_mut()
-                        .push(i);
+                    deletions_to_rm.entry(key).or_default().value_mut().push(i);
                 }
             }
         });
 
-    // 2) Build a HashSet of all indices where Vec.len() > 1
-    let to_remove: HashSet<usize> = deletions_to_rm
-        .iter()
-        .filter_map(|entry| {
-            let idxs = entry.value();
-            if idxs.len() > 1 {
-                // clone here is fine since each Vec is small
-                Some(idxs.clone())
-            } else {
-                None
-            }
-        })
-        .flatten()
-        .collect();
-
-    // 3) Drain & rebuild, dropping any variant whose index is in `to_remove`
-    self.variants = self
-        .variants
-        .drain(..)
-        .enumerate()
-        .filter_map(|(i, v)| {
-            if to_remove.contains(&i) {
-                None
-            } else {
-                Some(v)
-            }
-        })
-        .collect();
+        // 2) Build a HashSet of all indices where Vec.len() > 1
+        let to_remove: HashSet<usize> = deletions_to_rm
+            .iter()
+            .filter_map(|entry| {
+                let idxs = entry.value();
+                if idxs.len() > 1 {
+                    // clone here is fine since each Vec is small
+                    Some(idxs.clone())
+                } else {
+                    None
+                }
+            })
+            .flatten()
+            .collect();
+
+        // 3) Drain & rebuild, dropping any variant whose index is in `to_remove`
+        self.variants = self
+            .variants
+            .drain(..)
+            .enumerate()
+            .filter_map(|(i, v)| {
+                if to_remove.contains(&i) {
+                    None
+                } else {
+                    Some(v)
+                }
+            })
+            .collect();
 
         to_remove.len()
-}
+    }
     /// Annotates variants with information from a constitutional BAM file.
     ///
     /// This function processes variants in parallel chunks and adds annotations
@@ -414,12 +411,7 @@ impl VariantCollection {
             }
         }
 
-        fn match_repeats(
-            v: &[(String, i32)],
-            nt: char,
-            n: usize,
-            e: usize,
-        ) -> Vec<&(String, i32)> {
+        fn match_repeats(v: &[(String, i32)], nt: char, n: usize, e: usize) -> Vec<&(String, i32)> {
             v.iter()
                 .filter(|(s, _)| {
                     let len = s.len();
@@ -510,28 +502,6 @@ impl VariantCollection {
                                     } else {
                                         0
                                     };
-                                    // println!("TOL {tol}");
-
-                                    // let end_qnames: Vec<Vec<u8>> = pileup_end
-                                    //     .iter()
-                                        // .inspect(|e| {
-                                        //     if let crate::collection::bam::PileBase::Del((_, l)) =
-                                        //         e
-                                        //     {
-                                        //         println!("{l}");
-                                        //     }
-                                        // })
-                                        // .filter_map(|e| match e {
-                                        //     crate::collection::bam::PileBase::Del((qn, l))
-                                        //         if *l >= len.saturating_sub(tol).max(1)
-                                        //             && *l <= len + tol =>
-                                        //     {
-                                        //         Some(qn.to_vec())
-                                        //     }
-                                        //     _ => None,
-                                        // })
-                                        // .collect();
-                                    // println!("ends {}", end_qnames.len());
 
                                     let alt: u32 = pileup_start
                                         .iter()
@@ -547,12 +517,15 @@ impl VariantCollection {
                                         })
                                         .sum();
 
-                                    let depth = pileup_start.len().min(pileup_end.len());
+                                    let depth = pileup_start.len().max(pileup_end.len());
 
                                     // debug!("{} {alt} / {depth} {len}", var.variant_id());
 
                                     anns.push(Annotation::ConstitAlt(alt as u16));
                                     anns.push(Annotation::ConstitDepth(depth as u16));
+                                } else {
+                                    anns.push(Annotation::ConstitAlt(0_u16));
+                                    anns.push(Annotation::ConstitDepth(111_u16));
                                 }
                             }
                             AlterationCategory::INS => {
@@ -561,19 +534,26 @@ impl VariantCollection {
                                     &var.position.contig(),
                                     var.position.position,
                                 )?;
+                                let depth = crate::collection::bam::nt_pileup_new(
+                                    &mut bam,
+                                    &var.position.contig(),
+                                    var.position.position,
+                                    false,
+                                )?
+                                .len();
 
                                 let alt_seq = var.inserted_seq().unwrap_or_default();
 
-                                let (depth, alt) = match single_char_repeat(&alt_seq) {
+                                let (_, alt) = match single_char_repeat(&alt_seq) {
                                     Some((repeated, n)) if alt_seq.len() > 1 => {
                                         // If stretch of same nt consider eq +/- 3 nt
                                         let pv = pileup.clone().into_iter().collect::<Vec<_>>();
                                         let res = match_repeats(&pv, repeated, n, 3);
-                                        let depth = pileup.values().map(|e| *e as u32).sum();
+                                        let depth = pileup.len() as u32;
                                         let alt = res.iter().map(|(_, n)| *n as u32).sum();
                                         (depth, alt)
                                     }
-                                    _ => pileup.into_iter().fold((0, 0), folder(&alt_seq)),
+                                    _ => pileup.clone().into_iter().fold((0, 0), folder(&alt_seq)),
                                 };
 
                                 // debug!("{} {alt} / {depth} ", var.variant_id());

+ 134 - 60
src/variant/variants_stats.rs

@@ -478,85 +478,98 @@ pub fn somatic_depth_quality_ranges(
     id: &str,
     config: &Config,
 ) -> anyhow::Result<(Vec<GenomeRange>, Vec<GenomeRange>)> {
-    // List of contigs: chr1..chr22, then X, Y, M
-    let contigs = (1..=22)
+    // chr1..chr22 + X,Y,M
+    let contigs: Vec<String> = (1..=22)
         .map(|i| format!("chr{i}"))
-        .chain(["chrX", "chrY", "chrM"].iter().map(ToString::to_string))
-        .collect::<Vec<_>>();
+        .chain(["chrX", "chrY", "chrM"].into_iter().map(String::from))
+        .collect();
 
-    let cfg = Arc::new(config);
+    let cfg = config; // no Arc<&Config>
 
-    // For each contig, produce (high_ranges, lowq_ranges)
     let per_contig = contigs
         .into_par_iter()
         .map(|contig| {
-            let cfg = Arc::clone(&cfg);
             let normal_path = format!("{}/{}_count.tsv.gz", cfg.normal_dir_count(id), contig);
-            let tumor_path = format!("{}/{}_count.tsv.gz", cfg.tumoral_dir_count(id), contig);
+            let tumor_path  = format!("{}/{}_count.tsv.gz", cfg.tumoral_dir_count(id), contig);
 
             let normal_rdr = get_gz_reader(&normal_path)
                 .with_context(|| format!("Failed to open normal file: {}", normal_path))?;
             let tumor_rdr = get_gz_reader(&tumor_path)
                 .with_context(|| format!("Failed to open tumor file: {}", tumor_path))?;
 
-            // Collect per-line high & low masks
-            let mut high_runs = Vec::new();
-            let mut low_runs = Vec::new();
-
-            for (idx, (n_line, t_line)) in normal_rdr.lines().zip(tumor_rdr.lines()).enumerate() {
-                let line_no = idx + 1;
-                let n_line = n_line.with_context(|| format!("{} line {}", normal_path, line_no))?;
-                let t_line = t_line.with_context(|| format!("{} line {}", tumor_path, line_no))?;
-
-                let n = BinCount::from_tsv_row(&n_line)
-                    .with_context(|| format!("Parse error at {}: {}", normal_path, line_no))?;
-                let t = BinCount::from_tsv_row(&t_line)
-                    .with_context(|| format!("Parse error at {}: {}", tumor_path, line_no))?;
-
-                if n.contig != t.contig {
-                    anyhow::bail!(
-                        "Contig mismatch at line {}: {} vs {}",
-                        line_no,
-                        n.contig,
-                        t.contig
-                    );
-                }
-                if n.start != t.start {
-                    anyhow::bail!(
-                        "Position mismatch at line {}: {} vs {}",
-                        line_no,
-                        n.start,
-                        t.start
-                    );
-                }
+            let mut high_runs: Vec<GenomeRange> = Vec::new();
+            let mut lowq_runs: Vec<GenomeRange> = Vec::new();
+
+            let mut nl = normal_rdr.lines();
+            let mut tl = tumor_rdr.lines();
+            let mut line_no = 0usize;
+
+            loop {
+                let n_next = nl.next();
+                let t_next = tl.next();
+                match (n_next, t_next) {
+                    (None, None) => break,
+                    (Some(Err(e)), _) => return Err(anyhow::anyhow!("{} line {}: {}", normal_path, line_no + 1, e)),
+                    (_, Some(Err(e))) => return Err(anyhow::anyhow!("{} line {}: {}", tumor_path,  line_no + 1, e)),
+                    (Some(Ok(n_line)), Some(Ok(t_line))) => {
+                        line_no += 1;
+
+                        let n = BinCount::from_tsv_row(&n_line)
+                            .with_context(|| format!("Parse error at {}: {}", normal_path, line_no))?;
+                        let t = BinCount::from_tsv_row(&t_line)
+                            .with_context(|| format!("Parse error at {}: {}", tumor_path, line_no))?;
+
+                        if n.contig != t.contig {
+                            anyhow::bail!("Contig mismatch at line {}: {} vs {}", line_no, n.contig, t.contig);
+                        }
+                        if n.start != t.start {
+                            anyhow::bail!("Position mismatch at line {}: {} vs {}", line_no, n.start, t.start);
+                        }
+                        // Ensure equal bin widths
+                        if n.depths.len() != t.depths.len() {
+                            anyhow::bail!(
+                                "Depth vector length mismatch at line {}: {} vs {}",
+                                line_no, n.depths.len(), t.depths.len()
+                            );
+                        }
+                        if n.low_qualities.len() != t.low_qualities.len() {
+                            anyhow::bail!(
+                                "LowQ vector length mismatch at line {}: {} vs {}",
+                                line_no, n.low_qualities.len(), t.low_qualities.len()
+                            );
+                        }
+
+                        // High-quality depth in BOTH samples
+                        let high_mask_iter = n.depths.iter().zip(&t.depths).map(|(&nd, &td)| {
+                            nd >= cfg.min_high_quality_depth && td >= cfg.min_high_quality_depth
+                        });
 
-                let high_mask: Vec<bool> = n
-                    .depths
-                    .iter()
-                    .zip(&t.depths)
-                    .map(|(&nd, &td)| {
-                        nd >= cfg.min_high_quality_depth && td >= cfg.min_high_quality_depth
-                    })
-                    .collect();
-
-                let lowq_mask: Vec<bool> = n
-                    .low_qualities
-                    .iter()
-                    .zip(&t.low_qualities)
-                    .map(|(&nq, &tq)| {
-                        nq < cfg.max_depth_low_quality && tq < cfg.max_depth_low_quality
-                    })
-                    .collect();
-
-                high_runs.extend(ranges_from_consecutive_true(&high_mask, n.start, &n.contig));
-                low_runs.extend(ranges_from_consecutive_true(&lowq_mask, n.start, &n.contig));
+                        // NOTE: if you intended "low-quality regions" (bad), invert predicate.
+                        let lowq_mask_iter = n.low_qualities.iter().zip(&t.low_qualities).map(|(&nq, &tq)| {
+                            nq > cfg.max_depth_low_quality && tq > cfg.max_depth_low_quality
+                        });
+
+                        high_runs.extend(ranges_from_consecutive_true_iter(high_mask_iter, n.start, &n.contig));
+                        lowq_runs.extend(ranges_from_consecutive_true_iter(lowq_mask_iter, n.start, &n.contig));
+                    }
+                    (Some(_), None) => {
+                        anyhow::bail!("Line count mismatch: {} has extra lines after {}", normal_path, line_no);
+                    }
+                    (None, Some(_)) => {
+                        anyhow::bail!("Line count mismatch: {} has extra lines after {}", tumor_path, line_no);
+                    }
+                }
             }
 
-            Ok((high_runs, low_runs))
+            // Merge adjacent/overlapping ranges within this contig
+            high_runs = merge_adjacent_ranges(high_runs);
+            lowq_runs = merge_adjacent_ranges(lowq_runs);
+
+            Ok((high_runs, lowq_runs))
         })
         .collect::<anyhow::Result<Vec<_>>>()?;
 
-    // Flatten across all contigs
+    // Flatten
     let (high_all, low_all): (Vec<_>, Vec<_>) = per_contig.into_iter().unzip();
     Ok((
         high_all.into_iter().flatten().collect(),
@@ -564,6 +577,67 @@ pub fn somatic_depth_quality_ranges(
     ))
 }
 
+
+/// Iterator-based version (no temporary Vec<bool>).
+/// Produces end-exclusive ranges in the same shape as your old function.
+pub fn ranges_from_consecutive_true_iter<I>(
+    mask: I,
+    start0: u32,
+    contig: &str,
+) -> Vec<GenomeRange>
+where
+    I: IntoIterator<Item = bool>,
+{
+    let contig = contig_to_num(contig);
+    let mut ranges = Vec::new();
+    let mut current_start: Option<u32> = None;
+    let mut i: u32 = 0;
+
+    for v in mask {
+        if v {
+            if current_start.is_none() {
+                current_start = Some(start0 + i);
+            }
+        } else if let Some(s) = current_start.take() {
+            ranges.push(GenomeRange { contig, range: s..(start0 + i) });
+        }
+        i += 1;
+    }
+
+    if let Some(s) = current_start {
+        ranges.push(GenomeRange { contig, range: s..(start0 + i) });
+    }
+
+    ranges
+}
+
+/// Merge overlapping or touching ranges per contig (end-exclusive).
+pub fn merge_adjacent_ranges(mut ranges: Vec<GenomeRange>) -> Vec<GenomeRange> {
+    if ranges.is_empty() {
+        return ranges;
+    }
+
+    ranges.sort_by(|a, b| {
+        (a.contig, a.range.start).cmp(&(b.contig, b.range.start))
+    });
+
+    let mut merged = Vec::with_capacity(ranges.len());
+    let mut cur = ranges[0].clone();
+
+    for r in ranges.into_iter().skip(1) {
+        if r.contig == cur.contig && r.range.start <= cur.range.end {
+            if r.range.end > cur.range.end {
+                cur.range.end = r.range.end;
+            }
+        } else {
+            merged.push(cur);
+            cur = r;
+        }
+    }
+    merged.push(cur);
+    merged
+}
+
 /// Converts a slice of booleans into a list of `GenomeRange`s representing
 /// consecutive `true` values, offset by a `start` position and tagged with a contig ID.
 ///

Alguns arquivos não foram mostrados porque muitos arquivos mudaram nesse diff