Parcourir la source

annotate_with_constit_bam (HighConstitAlt + LowConstitDepth) // counts_ins_at // alteration cat

Thomas il y a 1 an
Parent
commit
80ba19f211
8 fichiers modifiés avec 416 ajouts et 111 suppressions
  1. 0 0
      gg.txt
  2. 87 28
      src/annotation/mod.rs
  3. 113 26
      src/collection/bam.rs
  4. 6 1
      src/config.rs
  5. 27 1
      src/lib.rs
  6. 29 13
      src/pipes/somatic.rs
  7. 60 8
      src/variant/variant.rs
  8. 94 34
      src/variant/variant_collection.rs

+ 0 - 0
gg.txt


+ 87 - 28
src/annotation/mod.rs

@@ -1,10 +1,12 @@
-use std::{collections::HashSet, fmt};
+use std::{
+    collections::{HashMap, HashSet},
+    fmt,
+};
 
+use crate::helpers::mean;
 use dashmap::DashMap;
 use rayon::prelude::*;
 
-use crate::helpers::mean;
-
 #[derive(Debug, Clone, PartialEq)]
 pub enum Annotation {
     SoloDiag,
@@ -13,6 +15,10 @@ pub enum Annotation {
     Germline,
     Somatic,
     ShannonEntropy(f64),
+    ConstitDepth(u16),
+    ConstitAlt(u16),
+    LowConstitDepth,
+    HighConstitAlt,
 }
 
 impl fmt::Display for Annotation {
@@ -24,6 +30,10 @@ impl fmt::Display for Annotation {
             Annotation::Germline => "Germline",
             Annotation::Somatic => "Somatic",
             Annotation::ShannonEntropy(_) => "ShannonEntropy",
+            Annotation::ConstitDepth(_) => "ConstitDepth",
+            Annotation::ConstitAlt(_) => "ConstitAlt",
+            Annotation::LowConstitDepth => "LowConstitDepth",
+            Annotation::HighConstitAlt => "HighConstitAlt",
         };
         write!(f, "{}", str)
     }
@@ -59,43 +69,63 @@ impl Annotations {
 
     pub fn callers_stat(&self) {
         let map: DashMap<String, u64> = DashMap::new();
-        let map_shannon: DashMap<String, Vec<f64>> = DashMap::new();
+        let num_maps: DashMap<String, HashMap<String, Vec<f64>>> = DashMap::new();
 
         self.store.par_iter().for_each(|e| {
-            let mut s: Vec<String> = e
-                .value()
-                .iter()
-                .filter(|e| !matches!(e, Annotation::ShannonEntropy(_)))
-                .map(|v| v.to_string())
-                .collect();
-            s.sort();
-            s.dedup();
+            let anns = e.value();
+
+            let mut categorical = Vec::new();
+            let mut numerical = Vec::new();
+            for ann in anns {
+                match ann {
+                    Annotation::SoloDiag
+                    | Annotation::SoloConstit
+                    | Annotation::Germline
+                    | Annotation::Somatic
+                    | Annotation::LowConstitDepth
+                    | Annotation::HighConstitAlt => categorical.push(ann.to_string()),
+                    Annotation::Callers(caller) => categorical.push(caller.to_string()),
+                    Annotation::ShannonEntropy(v) => numerical.push((ann.to_string(), *v)),
+                    Annotation::ConstitDepth(v) | Annotation::ConstitAlt(v) => {
+                        numerical.push((ann.to_string(), *v as f64));
+                    }
+                }
+            }
 
-            let k = s.join(" + ");
+            categorical.sort();
+            categorical.dedup();
+            let k = categorical.join(" + ");
 
             *map.entry(k.clone()).or_default() += 1;
 
-            if let Some(Annotation::ShannonEntropy(v)) = e
-                .value()
-                .iter()
-                .find(|&a| matches!(a, Annotation::ShannonEntropy(_)))
-            {
-                map_shannon.entry(k.clone()).or_default().push(*v);
+            for (k_num, v_num) in numerical {
+                num_maps
+                    .entry(k.clone())
+                    .or_default()
+                    .entry(k_num)
+                    .or_default()
+                    .push(v_num);
             }
         });
-        println!("Callers stats:");
-        println!("\ttotal: {}", map.len());
+        println!("\nCallers stats:");
+        println!("\tcategories: {}", map.len());
+        let mut n = 0;
         map.iter().for_each(|e| {
-            println!("\t{}:\t{}", e.key(), e.value());
-        });
-
-        println!("Callers stats mean shannon ent:");
-        map_shannon.iter().for_each(|e| {
+            let k = e.key();
             let v = e.value();
-            if !v.is_empty() {
-                println!("\t{}:\t{:.2}", e.key(), mean(v));
+            n += v;
+            let mut num_str = Vec::new();
+            if let Some(nums) = num_maps.get(k) {
+                num_str.extend(
+                    nums.iter()
+                        .map(|(k_n, v_n)| format!("{k_n} {:.2}", mean(v_n))),
+                )
             }
+            num_str.sort();
+            println!("\t{k}\t{v}\t{}", num_str.join("\t"));
         });
+
+        println!("Total\t{n}");
     }
 
     pub fn get_keys_filter(
@@ -112,4 +142,33 @@ impl Annotations {
     pub fn retain_keys(&mut self, keys_to_keep: &HashSet<u128>) {
         self.store.retain(|key, _| keys_to_keep.contains(key));
     }
+
+    pub fn solo_constit_boundaries(&self, max_alt_constit: u16, min_constit_depth: u16) {
+        self.store
+            .iter_mut()
+            .filter(|anns| {
+                let contains = anns.iter().any(|item| matches!(item, Annotation::SoloDiag));
+                let contains_not = anns.iter().all(|item| !matches!(item, Annotation::Somatic));
+
+                contains && contains_not
+            })
+            .for_each(|mut e| {
+                let v = e.value_mut();
+                let mut to_add = Vec::new();
+                v.iter().for_each(|ann| match ann {
+                    Annotation::ConstitDepth(v) => {
+                        if *v < min_constit_depth {
+                            to_add.push(Annotation::LowConstitDepth);
+                        }
+                    }
+                    Annotation::ConstitAlt(v) => {
+                        if *v > max_alt_constit {
+                            to_add.push(Annotation::HighConstitAlt);
+                        }
+                    },
+                    _ => (),
+                });
+                v.extend(to_add);
+            });
+    }
 }

+ 113 - 26
src/collection/bam.rs

@@ -1,4 +1,5 @@
 use std::{
+    collections::HashMap,
     fs::{self, File},
     path::PathBuf,
     str::FromStr,
@@ -7,14 +8,13 @@ use std::{
 use anyhow::{anyhow, Context};
 use chrono::{DateTime, Utc};
 use glob::glob;
-use hashbrown::HashMap;
-use log::warn;
+use log::{debug, warn};
 use pandora_lib_bindings::{
     progs::cramino::{Cramino, CraminoRes},
     utils::RunBin,
 };
 use rayon::prelude::*;
-use rust_htslib::bam::Read;
+use rust_htslib::bam::{record::Cigar, Read};
 use serde::{Deserialize, Serialize};
 
 #[derive(Debug, Clone, Deserialize, Serialize)]
@@ -108,10 +108,10 @@ impl Bam {
             None
         };
 
-        let composition =
-            bam_compo(path.to_string_lossy().as_ref(), 20000).context(
-                format!("Error while reading BAM composition for {}", path.display()),
-            )?;
+        let composition = bam_compo(path.to_string_lossy().as_ref(), 20000).context(format!(
+            "Error while reading BAM composition for {}",
+            path.display()
+        ))?;
 
         let s = Self {
             path,
@@ -239,19 +239,19 @@ pub fn bam_compo(file_path: &str, sample_size: usize) -> anyhow::Result<Vec<(Str
 pub fn nt_pileup(
     bam: &mut rust_htslib::bam::IndexedReader,
     chr: &str,
-    position: i32,
+    position: u32,
     with_next_ins: bool,
 ) -> anyhow::Result<Vec<u8>> {
     use rust_htslib::{bam, bam::Read};
     let mut bases = Vec::new();
-    bam.fetch((chr, position,  position + 1))?;
+    bam.fetch((chr, position, position + 1))?;
     let mut bam_pileup = Vec::new();
     for p in bam.pileup() {
         let pileup = p.context(format!(
             "Can't pileup bam at position {}:{} (0-based)",
             chr, position
         ))?;
-        let cur_position = pileup.pos() as i32;
+        let cur_position = pileup.pos();
         if cur_position == position {
             for alignment in pileup.alignments() {
                 match alignment.indel() {
@@ -260,7 +260,7 @@ pub fn nt_pileup(
                     _ => {
                         let record = alignment.record();
                         if record.seq_len() > 0 {
-                            if let Some(b) = base_at(&record, position as u32, with_next_ins)? {
+                            if let Some(b) = base_at(&record, position, with_next_ins)? {
                                 bases.push(b);
                             }
                         } else if alignment.is_del() {
@@ -279,8 +279,6 @@ pub fn base_at(
     at_pos: u32,
     with_next_ins: bool,
 ) -> anyhow::Result<Option<u8>> {
-    use rust_htslib::bam::record::Cigar;
-
     let cigar = record.cigar();
     let seq = record.seq();
     let pos = cigar.pos() as u32;
@@ -327,23 +325,112 @@ pub fn base_at(
     Ok(None)
 }
 
-// thanks to chatGPT (the best)
-pub fn estimate_shannon_entropy(dna_sequence: &str) -> f64 {
-    let m = dna_sequence.len() as f64;
+pub fn counts_at(
+    bam: &mut rust_htslib::bam::IndexedReader,
+    chr: &str,
+    position: u32,
+) -> anyhow::Result<HashMap<String, i32>> {
+    let p = nt_pileup(bam, chr, position, false)?
+        .iter()
+        .map(|e| String::from_utf8(vec![*e]).unwrap())
+        .collect::<Vec<_>>();
+    let mut counts = HashMap::new();
+    for item in p.iter() {
+        *counts.entry(item.to_string()).or_insert(0) += 1;
+    }
+    Ok(counts)
+}
 
-    // Count occurrences of each base
-    let mut bases = HashMap::<char, usize>::new();
-    for base in dna_sequence.chars() {
-        *bases.entry(base).or_insert(0) += 1;
+pub fn ins_pileup(
+    bam: &mut rust_htslib::bam::IndexedReader,
+    chr: &str,
+    position: u32,
+) -> anyhow::Result<Vec<String>> {
+    let mut bases = Vec::new();
+    bam.fetch((chr, position, position + 10))?;
+    for p in bam.pileup() {
+        let pileup = p.context(format!(
+            "Can't pileup bam at position {}:{} (0-based)",
+            chr, position
+        ))?;
+        let cur_position = pileup.pos();
+        // Ins in the next position
+        if cur_position == position + 1 {
+            // debug!("{cur_position}");
+            for alignment in pileup.alignments() {
+                let record = alignment.record();
+                if record.seq_len() > 0 {
+                    if let Some(b) = ins_at(&record, position)? {
+                        bases.push(b);
+                    }
+                }
+            }
+        }
     }
+    Ok(bases)
+}
+
+pub fn ins_at(
+    record: &rust_htslib::bam::record::Record,
+    at_pos: u32,
+) -> anyhow::Result<Option<String>> {
+    use rust_htslib::bam::record::Cigar;
+
+    let cigar = record.cigar();
+    let seq = record.seq();
+    let pos = cigar.pos() as u32;
 
-    // Calculate Shannon entropy
-    let mut shannon_entropy_value = 0.0;
-    for &n_i in bases.values() {
-        let p_i = n_i as f64 / m;
-        shannon_entropy_value -= p_i * p_i.log2();
+    let mut read_i = 0u32;
+    let mut ref_pos = pos;
+    if ref_pos > at_pos {
+        return Ok(None);
     }
+    // debug!(
+    //     "read: {}",
+    //     String::from_utf8(record.qname().to_vec()).unwrap()
+    // );
+
+    for (id, op) in cigar.iter().enumerate() {
+        let (add_read, add_ref) = match *op {
+            Cigar::Match(len) | Cigar::Equal(len) | Cigar::Diff(len) => (len, len),
+            Cigar::Ins(len) => (len, 0),
+            Cigar::Del(len) => (0, len),
+            Cigar::RefSkip(len) => (0, len),
+            Cigar::SoftClip(len) => (len, 0),
+            Cigar::HardClip(_) | Cigar::Pad(_) => (0, 0),
+        };
 
-    shannon_entropy_value
+        if ref_pos + add_read > at_pos && ref_pos + add_read < at_pos + 10 {
+            if let Cigar::Ins(v) = *op {
+                // debug!(
+                //     "ins size {v} @ {} (corrected {})",
+                //     ref_pos + add_read,
+                //     (ref_pos + add_read) - v - 1
+                // );
+
+                if (ref_pos + add_read) - v - 1 == at_pos {
+                    let inserted_seq =
+                        seq.as_bytes()[read_i as usize..(read_i + v) as usize].to_vec();
+                    return Ok(Some(String::from_utf8(inserted_seq)?));
+                }
+            }
+        }
+
+        read_i += add_read;
+        ref_pos += add_ref;
+    }
+    Ok(None)
 }
 
+pub fn counts_ins_at(
+    bam: &mut rust_htslib::bam::IndexedReader,
+    chr: &str,
+    position: u32,
+) -> anyhow::Result<HashMap<String, i32>> {
+    let p = ins_pileup(bam, chr, position)?;
+    let mut counts = HashMap::new();
+    for item in p.iter() {
+        *counts.entry(item.to_string()).or_insert(0) += 1;
+    }
+    Ok(counts)
+}

+ 6 - 1
src/config.rs

@@ -39,6 +39,8 @@ pub struct Config {
     pub clairs_platform: String,
     pub clairs_output_dir: String,
     pub mask_bed: String,
+    pub solo_min_constit_depth: u16,
+    pub solo_max_alt_constit: u16,
 }
 
 // Here comes names that can't be changed from output of tools
@@ -60,7 +62,6 @@ impl Default for Config {
             reference: "/data/ref/hs1/chm13v2.0.fa".to_string(),
             reference_name: "hs1".to_string(),
 
-
             // File structure
             result_dir: "/data/longreads_basic_pipe".to_string(),
             tumoral_name: "diag".to_string(),
@@ -116,6 +117,10 @@ impl Default for Config {
             modkit_summary_threads: 50,
             modkit_summary_file: "{result_dir}/{id}/{time}/{id}_{time}_5mC_5hmC_summary.txt"
                 .to_string(),
+
+            // Pipe
+            solo_min_constit_depth: 5,
+            solo_max_alt_constit: 1,
         }
     }
 }

+ 27 - 1
src/lib.rs

@@ -29,7 +29,7 @@ mod tests {
 
     use annotation::Annotations;
     use callers::{nanomonsv::nanomonsv_create_pon, savana::Savana, severus::{Severus, SeverusSolo}};
-    use collection::{bam::nt_pileup, Initialize, InitializeSolo, Version};
+    use collection::{bam::{counts_ins_at, ins_pileup, nt_pileup}, Initialize, InitializeSolo, Version};
     use commands::{longphase::{LongphaseConfig, LongphaseHap, LongphaseModcallSolo, LongphasePhase}, modkit::{bed_methyl, ModkitConfig}};
     use functions::assembler::{Assembler, AssemblerConfig};
     use helpers::estimate_shannon_entropy;
@@ -427,6 +427,15 @@ mod tests {
         let var_string = variant.into_vcf_row();
         assert_eq!(row, &var_string);
 
+        let row = "chr1\t52232\t.\tC\tCT\t18\t.\t.\tGT:GQ:DP:AD:AF\t1/.:1:24:3,5:0.208333";
+        let variant: VcfVariant = row.parse()?;
+        let var_string = variant.into_vcf_row();
+        assert_eq!(row, &var_string);
+
+         let row = "chr1\t52232\t.\tC\tCT\t18\t.\t.\tGT:GQ:DP:AD:AF\t1/1:1:24:3,5:0.208333";
+        let variant_b: VcfVariant = row.parse()?;
+        assert_eq!(variant, variant_b);
+
         Ok(())
     }
 
@@ -537,4 +546,21 @@ mod tests {
         println!("{r} ({} {:.2})", r.len(), estimate_shannon_entropy(r.as_str()));
         Ok(())
     }
+
+    #[test]
+    fn ins_at() -> anyhow::Result<()> {
+        init();
+        let id = "ADJAGBA";
+        let c = Config::default();
+        let chr = "chr1";
+        let position = 52232; // 1-based like in vcf
+        let mut bam = rust_htslib::bam::IndexedReader::from_path(c.solo_bam(id, "diag"))?;
+        // let p = ins_pileup(&mut bam, chr, position - 1, true)?.iter().map(|e| String::from_utf8(vec![*e]).unwrap()).collect::<Vec<_>>();
+        let counts = counts_ins_at(&mut bam, chr, position -1)?;
+        for (key, value) in &counts {
+            println!("{}: {}", key, value);
+        }
+        
+        Ok(())
+    }
 }

+ 29 - 13
src/pipes/somatic.rs

@@ -90,32 +90,48 @@ impl Run for Somatic {
 
         variants_collection.retain(|e| !e.variants.is_empty());
 
+        info!("Constit Bam annotation...");
+        variants_collection.iter().try_for_each(|c| {
+            c.annotate_with_constit_bam(&annotations, &self.config.normal_bam(&id), 150)
+        })?;
+        annotations.solo_constit_boundaries(
+            self.config.solo_max_alt_constit,
+            self.config.solo_min_constit_depth,
+        );
+
         info!("Entropy annotation...");
         variants_collection.iter().for_each(|c| {
-            c.annotate_with_sequence_entropy(&annotations, &self.config.reference);
+            c.annotate_with_sequence_entropy(&annotations, &self.config.reference, 10, 150);
         });
         annotations.callers_stat();
 
         let prob_keys: HashSet<u128> = annotations
             .get_keys_filter(|anns| {
-                anns.contains(&Annotation::Somatic) && anns.contains(&Annotation::Germline)
+                // let contains = anns.iter().any(|item| matches!(item, Annotation::SoloDiag));
+                // let contains_not = anns.iter().all(|item| !matches!(item, Annotation::Somatic));
+                //
+                // contains && contains_not
+                anns.contains(&Annotation::SoloDiag)
+                    && !anns.contains(&Annotation::Somatic)
+                    && !anns.contains(&Annotation::LowConstitDepth)
+                    && !anns.contains(&Annotation::HighConstitAlt)
             })
             .into_iter()
             .collect();
 
         info!("Problematic variants {}", prob_keys.len());
 
-        // let mut problematic_variants = variants_collection.clone();
-        //
-        // let problematic_variants: Vec<VcfVariant> = problematic_variants
-        //     .iter_mut()
-        //     .flat_map(|e| {
-        //         e.retain_keys(&prob_keys);
-        //         e.variants.clone()
-        //     })
-        //     .collect();
-
-        // write_vcf(&problematic_variants, "prob.vcf.gz")?;
+        let mut problematic_variants = variants_collection.clone();
+
+        let problematic_variants: Vec<VcfVariant> = problematic_variants
+            .iter_mut()
+            .flat_map(|e| {
+                e.retain_keys(&prob_keys);
+                e.variants.clone()
+            })
+            .collect();
+
+        write_vcf(&problematic_variants, "prob.vcf.gz")?;
 
         Ok(())
     }

+ 60 - 8
src/variant/variant.rs

@@ -7,9 +7,7 @@ use crate::{
 use anyhow::{anyhow, Context, Ok};
 use rayon::prelude::*;
 use serde::{Deserialize, Serialize};
-use std::{
-    cmp::Ordering, collections::HashSet, fmt, hash::{Hash, Hasher}, str::FromStr
-};
+use std::{cmp::Ordering, collections::HashSet, fmt, hash::Hash, str::FromStr};
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct VcfVariant {
@@ -147,7 +145,7 @@ impl VcfVariant {
         // Update the hasher with the fields of the variant
         hasher.update(&self.position.contig.to_ne_bytes()); // Convert position to bytes
         hasher.update(&self.position.position.to_ne_bytes()); // Convert position to bytes
-        hasher.update(self.reference.to_string().as_bytes());   // Reference string as bytes
+        hasher.update(self.reference.to_string().as_bytes()); // Reference string as bytes
         hasher.update(self.alternative.to_string().as_bytes()); // Alternative string as bytes
 
         // Finalize the hash and get the output
@@ -160,6 +158,57 @@ impl VcfVariant {
         // Convert to u128
         u128::from_ne_bytes(array)
     }
+
+    pub fn alteration_category(&self) -> AlterationCategory {
+        match (&self.reference, &self.alternative) {
+            (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotide(_)) => {
+                AlterationCategory::Snv
+            }
+            (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotides(_)) => {
+                AlterationCategory::Ins
+            }
+            (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Unstructured(_)) => {
+                AlterationCategory::Other
+            }
+            (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Nucleotide(_)) => {
+                AlterationCategory::Del
+            }
+            (ReferenceAlternative::Nucleotides(a), ReferenceAlternative::Nucleotides(b))
+                if a.len() < b.len() =>
+            {
+                AlterationCategory::Ins
+            }
+            (ReferenceAlternative::Nucleotides(a), ReferenceAlternative::Nucleotides(b))
+                if a.len() > b.len() =>
+            {
+                AlterationCategory::Del
+            }
+            (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Nucleotides(_)) => {
+                AlterationCategory::Rep
+            }
+            (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Unstructured(_)) => {
+                AlterationCategory::Other
+            }
+            (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Nucleotide(_)) => {
+                AlterationCategory::Other
+            }
+            (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Nucleotides(_)) => {
+                AlterationCategory::Other
+            }
+            (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Unstructured(_)) => {
+                AlterationCategory::Other
+            }
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
+pub enum AlterationCategory {
+    Snv,
+    Ins,
+    Del,
+    Rep,
+    Other,
 }
 
 impl VariantId for VcfVariant {
@@ -620,22 +669,25 @@ pub fn load_variants(
 
 pub fn parallel_intersection<T: Hash + Eq + Clone + Send + Sync>(
     vec1: &[T],
-    vec2: &[T]
+    vec2: &[T],
 ) -> (Vec<T>, Vec<T>, Vec<T>) {
     let set1: HashSet<_> = vec1.par_iter().cloned().collect();
     let set2: HashSet<_> = vec2.par_iter().cloned().collect();
 
-    let common: Vec<T> = set1.par_iter()
+    let common: Vec<T> = set1
+        .par_iter()
         .filter(|item| set2.contains(item))
         .cloned()
         .collect();
 
-    let only_in_first: Vec<T> = set1.par_iter()
+    let only_in_first: Vec<T> = set1
+        .par_iter()
         .filter(|item| !set2.contains(item))
         .cloned()
         .collect();
 
-    let only_in_second: Vec<T> = set2.par_iter()
+    let only_in_second: Vec<T> = set2
+        .par_iter()
         .filter(|item| !set1.contains(item))
         .cloned()
         .collect();

+ 94 - 34
src/variant/variant_collection.rs

@@ -1,11 +1,11 @@
 use std::collections::HashSet;
 
-use rayon::{iter::ParallelIterator, slice::ParallelSlice};
+use rayon::prelude::*;
 
-use super::variant::VcfVariant;
+use super::variant::{AlterationCategory, VcfVariant};
 use crate::{
     annotation::{Annotation, Annotations},
-    collection::vcf::Vcf,
+    collection::{bam::{counts_at, counts_ins_at}, vcf::Vcf},
     helpers::estimate_shannon_entropy,
     pipes::somatic::sequence_at,
 };
@@ -26,40 +26,100 @@ impl VariantCollection {
             .retain(|v| keys_to_keep.contains(&v.hash_variant()));
     }
 
-    pub fn annotate_with_sequence_entropy(&self, annotations: &Annotations, reference: &str) {
+    pub fn chunk_size(&self, max_threads: u8) -> usize {
         let total_items = self.variants.len();
         let min_chunk_size = 1000;
-        let max_chunks = 150;
-        let seq_len = 10;
-
-        // Calculate the optimal chunk size
-        let optimal_chunk_size = total_items.div_ceil(max_chunks); // Ceiling division
-        let chunk_size = optimal_chunk_size.max(min_chunk_size);
-
-        self.variants.par_chunks(chunk_size).for_each(|chunk| {
-            let mut fasta_reader = noodles_fasta::indexed_reader::Builder::default()
-                .build_from_path(reference)
-                .unwrap();
-
-            for c in chunk {
-                let key = c.hash_variant();
-                let mut anns = annotations.store.entry(key).or_default();
-
-                if !anns
-                    .iter()
-                    .any(|e| matches!(e, Annotation::ShannonEntropy(_)))
-                {
-                    if let Ok(r) = sequence_at(
-                        &mut fasta_reader,
-                        &c.position.contig(),
-                        c.position.position as usize,
-                        seq_len,
-                    ) {
-                        let ent = estimate_shannon_entropy(r.as_str());
-                        anns.push(Annotation::ShannonEntropy(ent));
+        let max_chunks = max_threads;
+
+        let optimal_chunk_size = total_items.div_ceil(max_chunks as usize);
+        optimal_chunk_size.max(min_chunk_size)
+    }
+
+    pub fn annotate_with_sequence_entropy(
+        &self,
+        annotations: &Annotations,
+        reference: &str,
+        seq_len: usize,
+        max_threads: u8,
+    ) {
+        self.variants
+            .par_chunks(self.chunk_size(max_threads))
+            .for_each(|chunk| {
+                let mut fasta_reader = noodles_fasta::indexed_reader::Builder::default()
+                    .build_from_path(reference)
+                    .unwrap();
+
+                for c in chunk {
+                    let key = c.hash_variant();
+                    let mut anns = annotations.store.entry(key).or_default();
+
+                    if !anns
+                        .iter()
+                        .any(|e| matches!(e, Annotation::ShannonEntropy(_)))
+                    {
+                        if let Ok(r) = sequence_at(
+                            &mut fasta_reader,
+                            &c.position.contig(),
+                            c.position.position as usize,
+                            seq_len,
+                        ) {
+                            let ent = estimate_shannon_entropy(r.as_str());
+                            anns.push(Annotation::ShannonEntropy(ent));
+                        }
+                    }
+                }
+            });
+    }
+
+    pub fn annotate_with_constit_bam(
+        &self,
+        annotations: &Annotations,
+        constit_bam_path: &str,
+        max_threads: u8,
+    ) -> anyhow::Result<()> {
+        self.variants
+            .par_chunks(self.chunk_size(max_threads))
+            .try_for_each(|chunk| {
+                let mut bam = rust_htslib::bam::IndexedReader::from_path(constit_bam_path)
+                    .map_err(|e| anyhow::anyhow!("Failed to open BAM file: {e}"))?;
+
+                for var in chunk {
+                    let key = var.hash_variant();
+                    let mut anns = annotations.store.entry(key).or_default();
+
+                    if anns
+                        .iter()
+                        .filter(|e| {
+                            matches!(e, Annotation::ConstitAlt(_) | Annotation::ConstitDepth(_))
+                        })
+                        .count()
+                        != 2
+                    {
+                        let mut n_alt = 0;
+                        let mut depth = 0;
+                        let mut alt_seq = var.alternative.to_string();
+
+                        let c = if var.alteration_category() == AlterationCategory::Ins {
+                            alt_seq = alt_seq.split_off(1);
+                            counts_ins_at(&mut bam, &var.position.contig(), var.position.position)?
+                        } else {
+                            counts_at(&mut bam, &var.position.contig(), var.position.position)?
+                        };
+
+                        for (seq, n) in c {
+                            if seq == alt_seq {
+                                n_alt = n;
+                            }
+                            depth += n;
+                        }
+
+
+                        anns.push(Annotation::ConstitAlt(n_alt as u16));
+                        anns.push(Annotation::ConstitDepth(depth as u16));
                     }
                 }
-            }
-        });
+                anyhow::Ok(())
+            })?;
+        Ok(())
     }
 }