Browse Source

one source of hash

Thomas 1 week ago
parent
commit
c2301c5f98
2 changed files with 21 additions and 23 deletions
  1. 1 1
      src/annotation/mod.rs
  2. 20 22
      src/variant/variant_collection.rs

+ 1 - 1
src/annotation/mod.rs

@@ -544,7 +544,7 @@ impl Annotations {
             .par_iter_mut()
             .map(|c| {
                 let before = c.variants.len();
-                c.variants.retain(|a| keys.contains(&a.hash()));
+                c.variants.retain(|a| keys.contains(&a.hash));
                 let after = c.variants.len();
                 info!("\t- {}\t{}/{}", c.caller, before - after, before);
                 before - after

+ 20 - 22
src/variant/variant_collection.rs

@@ -93,7 +93,7 @@ impl VariantCollection {
     /// }
     /// ```
     pub fn keys(&self) -> Vec<Hash128> {
-        self.variants.iter().map(|v| v.hash()).collect()
+        self.variants.iter().map(|v| v.hash).collect()
     }
 
     /// Retains only the variants whose hash keys are present in the provided set.
@@ -128,7 +128,7 @@ impl VariantCollection {
     /// variant_collection.retain_keys(&keys_to_keep);
     /// ```
     pub fn retain_keys(&mut self, keys_to_keep: &HashSet<Hash128>) {
-        self.variants.retain(|v| keys_to_keep.contains(&v.hash()));
+        self.variants.retain(|v| keys_to_keep.contains(&v.hash));
     }
 
     /// Removes variants whose hash keys are present in the provided set.
@@ -163,8 +163,7 @@ impl VariantCollection {
     /// variant_collection.remove_keys(&keys_to_remove);
     /// ```
     pub fn remove_keys(&mut self, keys_to_remove: &HashSet<Hash128>) {
-        self.variants
-            .retain(|v| !keys_to_remove.contains(&v.hash()));
+        self.variants.retain(|v| !keys_to_remove.contains(&v.hash));
     }
 
     /// Partitions the VcfVariants into two sets based on a given predicate.
@@ -231,7 +230,7 @@ impl VariantCollection {
         for &idx in &overlaps {
             let variant = &mut self.variants[idx];
 
-            let key = variant.hash();
+            let key = variant.hash;
             let mut anns = annotations.store.entry(key).or_default();
             anns.push(annotation.clone());
         }
@@ -319,15 +318,21 @@ impl VariantCollection {
             || noodles_fasta::io::indexed_reader::Builder::default().build_from_path(reference),
             |reader_res, chunk| {
                 let Ok(ref mut fasta_reader) = reader_res else {
+                    error!("Failed to load reference for chunk: {chunk:?}");
                     return;
                 };
 
                 for c in chunk {
-                    let key = c.hash();
+                    let key = c.hash;
                     let pos0 = c.position.position as usize; // 0-based
 
                     let Ok(seq) = sequence_at(fasta_reader, &c.position.contig(), pos0, seq_len)
                     else {
+                        warn!(
+                            "Failed to get sequence at: {}:{}",
+                            &c.position.contig(),
+                            pos0
+                        );
                         continue;
                     };
 
@@ -422,6 +427,7 @@ impl VariantCollection {
 
         to_remove.len()
     }
+
     /// Annotates variants with information from a constitutional BAM file.
     ///
     /// This function processes variants in parallel chunks and adds annotations
@@ -467,15 +473,6 @@ impl VariantCollection {
             }
         }
 
-        fn match_repeats(v: &[(String, i32)], nt: char, n: usize, e: usize) -> Vec<&(String, i32)> {
-            v.iter()
-                .filter(|(s, _)| {
-                    let len = s.len();
-                    (n.saturating_sub(e)..=n + e).contains(&len) && s.chars().all(|c| c == nt)
-                })
-                .collect()
-        }
-
         self.variants
             .par_chunks(self.chunk_size(max_threads))
             .try_for_each(|chunk| {
@@ -488,7 +485,7 @@ impl VariantCollection {
                     .build_from_path(c.reference)?;
 
                 for var in chunk {
-                    let key = var.hash();
+                    let key = var.hash;
                     let mut anns = annotations.store.entry(key).or_default();
 
                     if anns
@@ -1515,7 +1512,7 @@ impl ExternalAnnotation {
         let mut unfound = Vec::new();
 
         for variant in variants {
-            let hash = variant.hash();
+            let hash = variant.hash;
             let mut has_pushed = false;
 
             // Check COSMIC
@@ -1644,7 +1641,7 @@ impl ExternalAnnotation {
 
                     let (cosmic, gnomad) = parse_echtvar_val(&row.info)?;
 
-                    let hash = chunk[i].hash();
+                    let hash = chunk[i].hash;
 
                     chunk_results.push((hash, cosmic, gnomad));
                 }
@@ -1706,7 +1703,7 @@ impl ExternalAnnotation {
         let mut unfound = Vec::new();
 
         for variant in variants {
-            let hash = variant.hash();
+            let hash = variant.hash;
 
             // Check VEP
             match self.get_annotation(hash, "VEP")? {
@@ -1837,7 +1834,7 @@ impl ExternalAnnotation {
 
                             if let Some(vep_lines) = lines.get(&k) {
                                 if let Ok(veps) = vep_lines.iter().map(VEP::try_from).collect() {
-                                    chunk_results.push((entry.hash(), veps));
+                                    chunk_results.push((entry.hash, veps));
                                 }
                             } else {
                                 warn!(
@@ -1957,7 +1954,7 @@ fn process_vep_chunk(
 
         if let Some(vep_lines) = lines.get(&k) {
             if let Ok(veps) = vep_lines.iter().map(VEP::try_from).collect() {
-                chunk_results.push((entry.hash(), veps));
+                chunk_results.push((entry.hash, veps));
             }
         } else {
             warn!(
@@ -1994,9 +1991,10 @@ mod tests {
         let config = Config::default();
         let ins: VcfVariant = "chr1\t286\t.\tC\tCT\t27.4\tPASS\t.\tGT:GQ:DP:AD:VAF:MID:PL\t1/1:25:24:0,22:0.916667:deepvariant:27,28,0".parse()?;
         let ins_2: VcfVariant = "chr1\t1000188\t.\tT\tTGGTGCAGGCAGAGAACAGACGTCGCGATGGGCCCGACGGTGCTGGCTCCATGGGAACCGAGACCCAACACCCAAAGGAGTCCCACAGGCTCAGGGG\t8.9\tPASS\t.\tGT:GQ:DP:AD:VAF:MID:PL\t0/1:8:48:31,16:0.333333:deepvariant:8,0,13".parse()?;
+        let ins3: VcfVariant = "chr1\t1710\t.\tT\tTA\t34.1\tPASS\t.\tGT:GQ:DP:AD:VAF:MID:PL\t1/1:9:39:9,22:0.564103:deepvariant:33,71,0".parse()?;
         let vcf_path = "/mnt/beegfs02/scratch/t_steimle/data/wgs/CHAHA/norm/DeepVariant/CHAHA_norm_DeepVariant_PASSED.vcf.gz";
         let coll = VariantCollection {
-            variants: vec![ins, ins_2],
+            variants: vec![ins, ins_2, ins3],
             vcf: Vcf::new(vcf_path.into())?,
             caller: Annotation::Callers(Caller::DeepVariant, crate::annotation::Sample::Somatic),
         };