|
|
@@ -93,7 +93,7 @@ impl VariantCollection {
|
|
|
/// }
|
|
|
/// ```
|
|
|
pub fn keys(&self) -> Vec<Hash128> {
|
|
|
- self.variants.iter().map(|v| v.hash()).collect()
|
|
|
+ self.variants.iter().map(|v| v.hash).collect()
|
|
|
}
|
|
|
|
|
|
/// Retains only the variants whose hash keys are present in the provided set.
|
|
|
@@ -128,7 +128,7 @@ impl VariantCollection {
|
|
|
/// variant_collection.retain_keys(&keys_to_keep);
|
|
|
/// ```
|
|
|
pub fn retain_keys(&mut self, keys_to_keep: &HashSet<Hash128>) {
|
|
|
- self.variants.retain(|v| keys_to_keep.contains(&v.hash()));
|
|
|
+ self.variants.retain(|v| keys_to_keep.contains(&v.hash));
|
|
|
}
|
|
|
|
|
|
/// Removes variants whose hash keys are present in the provided set.
|
|
|
@@ -163,8 +163,7 @@ impl VariantCollection {
|
|
|
/// variant_collection.remove_keys(&keys_to_remove);
|
|
|
/// ```
|
|
|
pub fn remove_keys(&mut self, keys_to_remove: &HashSet<Hash128>) {
|
|
|
- self.variants
|
|
|
- .retain(|v| !keys_to_remove.contains(&v.hash()));
|
|
|
+ self.variants.retain(|v| !keys_to_remove.contains(&v.hash));
|
|
|
}
|
|
|
|
|
|
/// Partitions the VcfVariants into two sets based on a given predicate.
|
|
|
@@ -231,7 +230,7 @@ impl VariantCollection {
|
|
|
for &idx in &overlaps {
|
|
|
let variant = &mut self.variants[idx];
|
|
|
|
|
|
- let key = variant.hash();
|
|
|
+ let key = variant.hash;
|
|
|
let mut anns = annotations.store.entry(key).or_default();
|
|
|
anns.push(annotation.clone());
|
|
|
}
|
|
|
@@ -319,15 +318,21 @@ impl VariantCollection {
|
|
|
|| noodles_fasta::io::indexed_reader::Builder::default().build_from_path(reference),
|
|
|
|reader_res, chunk| {
|
|
|
let Ok(ref mut fasta_reader) = reader_res else {
|
|
|
+ error!("Failed to load reference for chunk: {chunk:?}");
|
|
|
return;
|
|
|
};
|
|
|
|
|
|
for c in chunk {
|
|
|
- let key = c.hash();
|
|
|
+ let key = c.hash;
|
|
|
let pos0 = c.position.position as usize; // 0-based
|
|
|
|
|
|
let Ok(seq) = sequence_at(fasta_reader, &c.position.contig(), pos0, seq_len)
|
|
|
else {
|
|
|
+ warn!(
|
|
|
+ "Failed to get sequence at: {}:{}",
|
|
|
+ &c.position.contig(),
|
|
|
+ pos0
|
|
|
+ );
|
|
|
continue;
|
|
|
};
|
|
|
|
|
|
@@ -422,6 +427,7 @@ impl VariantCollection {
|
|
|
|
|
|
to_remove.len()
|
|
|
}
|
|
|
+
|
|
|
/// Annotates variants with information from a constitutional BAM file.
|
|
|
///
|
|
|
/// This function processes variants in parallel chunks and adds annotations
|
|
|
@@ -467,15 +473,6 @@ impl VariantCollection {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- fn match_repeats(v: &[(String, i32)], nt: char, n: usize, e: usize) -> Vec<&(String, i32)> {
|
|
|
- v.iter()
|
|
|
- .filter(|(s, _)| {
|
|
|
- let len = s.len();
|
|
|
- (n.saturating_sub(e)..=n + e).contains(&len) && s.chars().all(|c| c == nt)
|
|
|
- })
|
|
|
- .collect()
|
|
|
- }
|
|
|
-
|
|
|
self.variants
|
|
|
.par_chunks(self.chunk_size(max_threads))
|
|
|
.try_for_each(|chunk| {
|
|
|
@@ -488,7 +485,7 @@ impl VariantCollection {
|
|
|
.build_from_path(c.reference)?;
|
|
|
|
|
|
for var in chunk {
|
|
|
- let key = var.hash();
|
|
|
+ let key = var.hash;
|
|
|
let mut anns = annotations.store.entry(key).or_default();
|
|
|
|
|
|
if anns
|
|
|
@@ -1515,7 +1512,7 @@ impl ExternalAnnotation {
|
|
|
let mut unfound = Vec::new();
|
|
|
|
|
|
for variant in variants {
|
|
|
- let hash = variant.hash();
|
|
|
+ let hash = variant.hash;
|
|
|
let mut has_pushed = false;
|
|
|
|
|
|
// Check COSMIC
|
|
|
@@ -1644,7 +1641,7 @@ impl ExternalAnnotation {
|
|
|
|
|
|
let (cosmic, gnomad) = parse_echtvar_val(&row.info)?;
|
|
|
|
|
|
- let hash = chunk[i].hash();
|
|
|
+ let hash = chunk[i].hash;
|
|
|
|
|
|
chunk_results.push((hash, cosmic, gnomad));
|
|
|
}
|
|
|
@@ -1706,7 +1703,7 @@ impl ExternalAnnotation {
|
|
|
let mut unfound = Vec::new();
|
|
|
|
|
|
for variant in variants {
|
|
|
- let hash = variant.hash();
|
|
|
+ let hash = variant.hash;
|
|
|
|
|
|
// Check VEP
|
|
|
match self.get_annotation(hash, "VEP")? {
|
|
|
@@ -1837,7 +1834,7 @@ impl ExternalAnnotation {
|
|
|
|
|
|
if let Some(vep_lines) = lines.get(&k) {
|
|
|
if let Ok(veps) = vep_lines.iter().map(VEP::try_from).collect() {
|
|
|
- chunk_results.push((entry.hash(), veps));
|
|
|
+ chunk_results.push((entry.hash, veps));
|
|
|
}
|
|
|
} else {
|
|
|
warn!(
|
|
|
@@ -1957,7 +1954,7 @@ fn process_vep_chunk(
|
|
|
|
|
|
if let Some(vep_lines) = lines.get(&k) {
|
|
|
if let Ok(veps) = vep_lines.iter().map(VEP::try_from).collect() {
|
|
|
- chunk_results.push((entry.hash(), veps));
|
|
|
+ chunk_results.push((entry.hash, veps));
|
|
|
}
|
|
|
} else {
|
|
|
warn!(
|
|
|
@@ -1994,9 +1991,10 @@ mod tests {
|
|
|
let config = Config::default();
|
|
|
let ins: VcfVariant = "chr1\t286\t.\tC\tCT\t27.4\tPASS\t.\tGT:GQ:DP:AD:VAF:MID:PL\t1/1:25:24:0,22:0.916667:deepvariant:27,28,0".parse()?;
|
|
|
let ins_2: VcfVariant = "chr1\t1000188\t.\tT\tTGGTGCAGGCAGAGAACAGACGTCGCGATGGGCCCGACGGTGCTGGCTCCATGGGAACCGAGACCCAACACCCAAAGGAGTCCCACAGGCTCAGGGG\t8.9\tPASS\t.\tGT:GQ:DP:AD:VAF:MID:PL\t0/1:8:48:31,16:0.333333:deepvariant:8,0,13".parse()?;
|
|
|
+ let ins3: VcfVariant = "chr1\t1710\t.\tT\tTA\t34.1\tPASS\t.\tGT:GQ:DP:AD:VAF:MID:PL\t1/1:9:39:9,22:0.564103:deepvariant:33,71,0".parse()?;
|
|
|
let vcf_path = "/mnt/beegfs02/scratch/t_steimle/data/wgs/CHAHA/norm/DeepVariant/CHAHA_norm_DeepVariant_PASSED.vcf.gz";
|
|
|
let coll = VariantCollection {
|
|
|
- variants: vec![ins, ins_2],
|
|
|
+ variants: vec![ins, ins_2, ins3],
|
|
|
vcf: Vcf::new(vcf_path.into())?,
|
|
|
caller: Annotation::Callers(Caller::DeepVariant, crate::annotation::Sample::Somatic),
|
|
|
};
|