Bladeren bron

annot repeatmasker

Thomas 7 maanden geleden
bovenliggende
commit
e8b86f0d83
4 gewijzigde bestanden met toevoegingen van 37 en 71 verwijderingen
  1. 29 30
      src/annotation/mod.rs
  2. 1 1
      src/lib.rs
  3. 4 1
      src/pipes/somatic.rs
  4. 3 39
      src/variant/variant_collection.rs

+ 29 - 30
src/annotation/mod.rs

@@ -88,6 +88,9 @@ pub enum Annotation {
 
     /// Variant in a variable number of tandem repeat locus
     VNTR,
+
+    /// RepeatMasker
+    RepeatMasker,
 }
 
 /// Denotes the biological sample type associated with a variant call.
@@ -214,6 +217,7 @@ impl fmt::Display for Annotation {
             VEP(_) => "VEP".into(),
             CpG => "CpG".into(),
             VNTR => "VNTR".into(),
+            RepeatMasker => "VNTR".into(),
             TriNucleotides(bases) => format!(
                 "Trinucleotides({})",
                 bases.iter().map(|b| b.to_string()).collect::<String>(),
@@ -364,8 +368,8 @@ impl Annotations {
             for ann in anns.iter() {
                 match ann {
                     LowConstitDepth | LowEntropy | GnomAD(_) | VEP(_) | TriNucleotides(_)
-                    | ReplicationTiming(_) | HighDepth | CpG | VNTR | Panel(_) | LowMAPQ
-                    | HighConstitAlt => categorical.push(ann.to_string()),
+                    | ReplicationTiming(_) | HighDepth | CpG | VNTR | RepeatMasker | Panel(_)
+                    | LowMAPQ | HighConstitAlt => categorical.push(ann.to_string()),
                     Callers(caller, sample) => categorical.push(format!("{caller} {sample}")),
                     ShannonEntropy(v) => numerical.push((ann.to_string(), *v)),
                     ConstitDepth(v) | Annotation::ConstitAlt(v) => {
@@ -529,36 +533,31 @@ impl Annotations {
     }
 
     pub fn somatic_constit_boundaries(&self, max_alt_constit: u16, min_constit_depth: u16) {
-        self.store
-            .iter_mut()
-            .filter(|anns| {
-                let contains = anns
+        for mut entry in self.store.iter_mut() {
+            let anns = entry.value_mut();
+            // tumor but no somatic
+            let has_tumor = anns
+                .iter()
+                .any(|a| matches!(a, Annotation::Callers(_, Sample::SoloTumor)));
+            let has_somatic = anns
+                .iter()
+                .any(|a| matches!(a, Annotation::Callers(_, Sample::Somatic)));
+            if has_tumor && !has_somatic {
+                // push at most once
+                if anns
                     .iter()
-                    .any(|item| matches!(item, Annotation::Callers(_, Sample::SoloTumor)));
-                let contains_not = anns
+                    .any(|a| matches!(a, Annotation::ConstitDepth(d) if *d < min_constit_depth))
+                {
+                    anns.push(Annotation::LowConstitDepth);
+                }
+                if anns
                     .iter()
-                    .all(|item| !matches!(item, Annotation::Callers(_, Sample::Somatic)));
-
-                contains && contains_not
-            })
-            .for_each(|mut e| {
-                let v = e.value_mut();
-                let mut to_add = Vec::new();
-                v.iter().for_each(|ann| match ann {
-                    Annotation::ConstitDepth(v) => {
-                        if *v < min_constit_depth {
-                            to_add.push(Annotation::LowConstitDepth);
-                        }
-                    }
-                    Annotation::ConstitAlt(v) => {
-                        if *v > max_alt_constit {
-                            to_add.push(Annotation::HighConstitAlt);
-                        }
-                    }
-                    _ => (),
-                });
-                v.extend(to_add);
-            });
+                    .any(|a| matches!(a, Annotation::ConstitAlt(a0)    if *a0 > max_alt_constit))
+                {
+                    anns.push(Annotation::HighConstitAlt);
+                }
+            }
+        }
     }
 
     pub fn count_annotations(&self, annotation_types: Vec<Annotation>) -> Vec<usize> {

+ 1 - 1
src/lib.rs

@@ -1045,7 +1045,7 @@ mod tests {
     #[test]
     fn somatic_cases() -> anyhow::Result<()> {
         init();
-        let id = "ACHITE";
+        let id = "BACOU";
         let config = Config { somatic_pipe_force: true, ..Default::default() };
         match SomaticPipe::initialize(id, config)?.run() {
             Ok(_) => (),

+ 4 - 1
src/pipes/somatic.rs

@@ -475,12 +475,15 @@ impl Run for SomaticPipe {
 
         info!("Final unique variants: {}", variants.data.len());
 
-        // Ensure sorted (should already be sorted) and save
+        // Ensure sorted (should already be sorted)
         variants.sort();
 
         let vntrs: Vec<GenomeRange> = read_bed("/data/ref/hs1/vntrs_chm13.bed")?.iter().map(|r| r.range.clone()).collect();
         variants.annotate_with_ranges(&vntrs, Some(Annotation::VNTR), 0, Vec::new());
 
+        let repeat_masker: Vec<GenomeRange> = read_bed("/data/ref/hs1/chm13v2.0_RepeatMasker_4.1.2p1.2022Apr14.bed")?.iter().map(|r| r.range.clone()).collect();
+        variants.annotate_with_ranges(&repeat_masker, Some(Annotation::RepeatMasker), 0, Vec::new());
+
         variants.save_to_json(&result_json)?;
         variants.save_to_file(&result_bit)?;
         variants.write_vcf(&result_vcf, &config.dict_file, config.somatic_pipe_force)?;

+ 3 - 39
src/variant/variant_collection.rs

@@ -402,13 +402,7 @@ impl VariantCollection {
                         .count()
                         != 2
                     {
-                        // let mut n_alt = 0;
-                        // let mut depth = 0;
-                        // let mut is_ins = false;
-                        let alteration_cat = var.alteration_category();
-
-                        // debug!("{alteration_cat:?}");
-                        match alteration_cat {
+                        match var.alteration_category() {
                             AlterationCategory::SNV => {
                                 let pileup = counts_at(
                                     &mut bam,
@@ -432,6 +426,7 @@ impl VariantCollection {
                                     )?;
                                     let (start_depth, start_alt) =
                                         pileup_start.into_iter().fold((0, 0), folder("D"));
+
                                     let pileup_end = counts_at(
                                         &mut bam,
                                         &var.position.contig(),
@@ -483,43 +478,12 @@ impl VariantCollection {
                                     _ => pileup.into_iter().fold((0, 0), folder(&alt_seq)),
                                 };
 
-                                debug!("{} {alt} / {depth} ", var.variant_id());
+                                // debug!("{} {alt} / {depth} ", var.variant_id());
                                 anns.push(Annotation::ConstitAlt(alt as u16));
                                 anns.push(Annotation::ConstitDepth(depth as u16));
                             }
                             _ => (),
                         }
-                        // let c = if var.alteration_category() == AlterationCategory::INS {
-                        //     // TODO: Add stretch comparison
-                        //     alt_seq = alt_seq.split_off(1);
-                        //     is_ins = true;
-                        //     counts_ins_at(&mut bam, &var.position.contig(), var.position.position)?
-                        // } else if var.alteration_category() == AlterationCategory::DEL {
-                        //     alt_seq = "D".to_string();
-                        //     counts_at(&mut bam, &var.position.contig(), var.position.position + 1)?
-                        // } else {
-                        //     counts_at(&mut bam, &var.position.contig(), var.position.position)?
-                        // };
-                        //
-                        // for (seq, n) in c {
-                        //     if seq == alt_seq {
-                        //         n_alt = n;
-                        //     } else if is_ins && seq.len() > 1 {
-                        //         // insertion sequence DO NOT have to match
-                        //         // rule assumed to reduce FP from Solo callers
-                        //         // with this rule solo callers won't be able to
-                        //         // call the variant of an insertion.
-                        //         n_alt = n;
-                        //     }
-                        //     if seq == *"D" && alt_seq != *"D" {
-                        //         continue;
-                        //     }
-                        //     depth += n;
-                        // }
-                        // debug!("{n_alt}");
-
-                        // anns.push(Annotation::ConstitAlt(n_alt as u16));
-                        // anns.push(Annotation::ConstitDepth(depth as u16));
                     }
                 }
                 anyhow::Ok(())