|
|
@@ -1,6 +1,5 @@
|
|
|
use log::info;
|
|
|
-use rayon::prelude::*;
|
|
|
-use std::{collections::HashSet, fs::File, sync::Arc};
|
|
|
+use std::{fs::File, sync::Arc};
|
|
|
|
|
|
use crate::{
|
|
|
annotation::{Annotation, Annotations, Caller},
|
|
|
@@ -10,7 +9,7 @@ use crate::{
|
|
|
init_solo_callers, init_somatic_callers,
|
|
|
runners::Run,
|
|
|
variant::{
|
|
|
- variant::{load_variants, parallel_intersection, CallerBox},
|
|
|
+ variant::{load_variants, CallerBox},
|
|
|
variant_collection::{ExternalAnnotation, VariantCollection},
|
|
|
},
|
|
|
};
|
|
|
@@ -83,30 +82,6 @@ impl SomaticStats {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-// pub fn to_callers_cat(s: &str) -> (Annotation, Annotation) {
|
|
|
-// let splits: Vec<&str> = s.split("_").collect();
|
|
|
-//
|
|
|
-// if splits.contains(&"clairs") {
|
|
|
-// (Annotation::Callers(Caller::ClairS), Annotation::Somatic)
|
|
|
-// } else if splits.contains(&"clair3-germline") {
|
|
|
-// (Annotation::Callers(Caller::ClairS), Annotation::Germline)
|
|
|
-// } else if splits.contains(&"DeepVariant") && splits.contains(&"mrd") {
|
|
|
-// (
|
|
|
-// Annotation::Callers(Caller::DeepVariant),
|
|
|
-// Annotation::SoloConstit,
|
|
|
-// )
|
|
|
-// } else if splits.contains(&"DeepVariant") && splits.contains(&"diag") {
|
|
|
-// (
|
|
|
-// Annotation::Callers(Caller::DeepVariant),
|
|
|
-// Annotation::SoloTumor,
|
|
|
-// )
|
|
|
-// } else if splits.contains(&"nanomonsv") {
|
|
|
-// (Annotation::Callers(Caller::NanomonSV), Annotation::Somatic)
|
|
|
-// } else {
|
|
|
-// panic!("unknown caller: {s}");
|
|
|
-// }
|
|
|
-// }
|
|
|
-
|
|
|
impl Run for Somatic {
|
|
|
fn run(&mut self) -> anyhow::Result<()> {
|
|
|
let id = self.id.clone();
|
|
|
@@ -140,30 +115,12 @@ impl Run for Somatic {
|
|
|
info!(
|
|
|
"Variants collections from {} vcf ({} variants)",
|
|
|
variants_collections.len(),
|
|
|
- variants_collections.iter().map(|v| v.variants.len()).sum::<usize>()
|
|
|
+ variants_collections
|
|
|
+ .iter()
|
|
|
+ .map(|v| v.variants.len())
|
|
|
+ .sum::<usize>()
|
|
|
);
|
|
|
|
|
|
- // Annotations Stats
|
|
|
- // let mut annotations = Arc::unwrap_or_clone(annotations);
|
|
|
- // somatic_stats.push_annotations_stats();
|
|
|
-
|
|
|
- // TODO: look at variants: ClairS + DeepVariant + SoloConstit + SoloDiag + Somatic (error
|
|
|
- // in ClairS somatic)
|
|
|
-
|
|
|
- // Filtering Somatic variants
|
|
|
- // let mut variants_collections: Vec<VariantCollection> = variants_collections
|
|
|
- // .into_iter()
|
|
|
- // .map(|var| {
|
|
|
- // let total = var.variants.len();
|
|
|
- // let (left, _) = annotations.filter_variants(var, |anns| {
|
|
|
- // !anns.contains(&Annotation::Germline)
|
|
|
- // && !anns.contains(&Annotation::SoloConstit)
|
|
|
- // });
|
|
|
- // info!("{} {}\t{}/{}", left.caller, left.category, left.variants.len(), total);
|
|
|
- // left
|
|
|
- // })
|
|
|
- // .collect();
|
|
|
-
|
|
|
let mut annotations = Arc::try_unwrap(annotations)
|
|
|
.map_err(|e| anyhow::anyhow!("Failed to unwrap Arc: {:?}", e))?;
|
|
|
annotations.callers_stat();
|
|
|
@@ -176,51 +133,6 @@ impl Run for Somatic {
|
|
|
});
|
|
|
annotations.callers_stat();
|
|
|
|
|
|
- // return Ok(());
|
|
|
- // let tumor_or_somatic_keys = annotations.get_keys_filter(|anns| {
|
|
|
- // !anns.contains(&Annotation::Germline) && !anns.contains(&Annotation::SoloConstit)
|
|
|
- // });
|
|
|
- //
|
|
|
- // let (somatic_keys, germline_keys, remains) = parallel_intersection(
|
|
|
- // &variants_collections
|
|
|
- // .iter()
|
|
|
- // .flat_map(|e| e.keys())
|
|
|
- // .collect::<Vec<u128>>(),
|
|
|
- // &tumor_or_somatic_keys,
|
|
|
- // );
|
|
|
- // assert_eq!(0, remains.len());
|
|
|
- //
|
|
|
- // info!(
|
|
|
- // "Germline variants positions removed {}.",
|
|
|
- // germline_keys.len()
|
|
|
- // );
|
|
|
- // germline_keys.len();
|
|
|
- //
|
|
|
- // let somatic_keys: HashSet<u128> = somatic_keys.into_iter().collect();
|
|
|
- // // let mut annotations = Arc::try_unwrap(annotations)
|
|
|
- // // .map_err(|e| anyhow::anyhow!("Failed to unwrap Arc: {:?}", e))?;
|
|
|
- // annotations.retain_keys(&somatic_keys);
|
|
|
- // annotations.callers_stat();
|
|
|
- //
|
|
|
- // somatic_stats.n_constit_germline = variants_collections
|
|
|
- // .par_iter_mut()
|
|
|
- // .map(|c| {
|
|
|
- // let before = c.variants.len();
|
|
|
- // c.retain_keys(&somatic_keys);
|
|
|
- // let after = c.variants.len();
|
|
|
- // info!(
|
|
|
- // "Variants removed from {}: {}",
|
|
|
- // c.vcf.path.display(),
|
|
|
- // before - after
|
|
|
- // );
|
|
|
- // before - after
|
|
|
- // })
|
|
|
- // .sum();
|
|
|
- //
|
|
|
- // variants_collections.retain(|e| !e.variants.is_empty());
|
|
|
- //
|
|
|
- // variants_collections.iter().for_each(|c| c.stats());
|
|
|
-
|
|
|
// Annotation: BAM depth, n_alt
|
|
|
info!("Reading Constit Bam file for depth and pileup annotation.");
|
|
|
variants_collections.iter().try_for_each(|c| {
|
|
|
@@ -234,7 +146,7 @@ impl Run for Somatic {
|
|
|
|
|
|
// Filter: Remove LowConstitDepth from annotations and variants collections
|
|
|
info!(
|
|
|
- "Removing low constit depth (depth < {})",
|
|
|
+ "Removing variants when depth in constit bam < {}.",
|
|
|
self.config.solo_min_constit_depth
|
|
|
);
|
|
|
somatic_stats.n_low_constit = annotations
|
|
|
@@ -242,66 +154,31 @@ impl Run for Somatic {
|
|
|
!anns.contains(&Annotation::LowConstitDepth)
|
|
|
});
|
|
|
annotations.callers_stat();
|
|
|
-
|
|
|
- // let low_constit_keys: HashSet<u128> = annotations
|
|
|
- // .get_keys_filter(|anns| anns.contains(&Annotation::LowConstitDepth))
|
|
|
- // .into_iter()
|
|
|
- // .collect();
|
|
|
- //
|
|
|
- // annotations.remove_keys(&low_constit_keys);
|
|
|
- //
|
|
|
- // somatic_stats.n_low_constit = variants_collections
|
|
|
- // .par_iter_mut()
|
|
|
- // .map(|c| {
|
|
|
- // let before = c.variants.len();
|
|
|
- // c.remove_keys(&low_constit_keys);
|
|
|
- // let after = c.variants.len();
|
|
|
- // let rm = before - after;
|
|
|
- // info!("Variants removed from {}: {rm}", c.vcf.path.display(),);
|
|
|
- // rm
|
|
|
- // })
|
|
|
- // .sum();
|
|
|
- //
|
|
|
- // variants_collections.retain(|e| !e.variants.is_empty());
|
|
|
- //
|
|
|
- // variants_collections.iter().for_each(|c| c.stats());
|
|
|
+ info!(
|
|
|
+ "{} variants removed when depth in constit bam < {}.",
|
|
|
+ somatic_stats.n_low_constit, self.config.solo_min_constit_depth
|
|
|
+ );
|
|
|
|
|
|
// Filter: remove HighConstitAlt
|
|
|
info!(
|
|
|
- "Removing high constit alternative allele (alt constit > {})",
|
|
|
+ "Removing variants with SNP/indel inside the constit alignements > {}",
|
|
|
self.config.solo_max_alt_constit
|
|
|
);
|
|
|
-
|
|
|
somatic_stats.n_high_alt_constit = annotations
|
|
|
.retain_variants(&mut variants_collections, |anns| {
|
|
|
!anns.contains(&Annotation::HighConstitAlt)
|
|
|
});
|
|
|
annotations.callers_stat();
|
|
|
-
|
|
|
- // let high_alt_keys: HashSet<u128> = annotations
|
|
|
- // .get_keys_filter(|anns| anns.contains(&Annotation::HighConstitAlt))
|
|
|
- // .into_iter()
|
|
|
- // .collect();
|
|
|
- //
|
|
|
- // annotations.remove_keys(&high_alt_keys);
|
|
|
- // somatic_stats.n_high_alt_constit = variants_collections
|
|
|
- // .par_iter_mut()
|
|
|
- // .map(|c| {
|
|
|
- // let before = c.variants.len();
|
|
|
- // c.remove_keys(&high_alt_keys);
|
|
|
- // let after = c.variants.len();
|
|
|
- // let rm = before - after;
|
|
|
- // info!("Variants removed from {}: {rm}", c.vcf.path.display(),);
|
|
|
- // rm
|
|
|
- // })
|
|
|
- // .sum();
|
|
|
- // variants_collections.retain(|e| !e.variants.is_empty());
|
|
|
- //
|
|
|
- // variants_collections.iter().for_each(|c| c.stats());
|
|
|
-
|
|
|
+ info!(
|
|
|
+ "{} variants removed with SNP/indel inside the constit alignements > {}",
|
|
|
+ somatic_stats.n_high_alt_constit, self.config.solo_max_alt_constit
|
|
|
+ );
|
|
|
|
|
|
// Annotation: Entropy
|
|
|
- info!("Entropy annotation from {} sequences.", self.config.reference);
|
|
|
+ info!(
|
|
|
+ "Entropy annotation from {} sequences.",
|
|
|
+ self.config.reference
|
|
|
+ );
|
|
|
variants_collections.iter().for_each(|c| {
|
|
|
c.annotate_with_sequence_entropy(&annotations, &self.config.reference, 10, 150);
|
|
|
});
|
|
|
@@ -350,98 +227,6 @@ impl Run for Somatic {
|
|
|
);
|
|
|
annotations.callers_stat();
|
|
|
|
|
|
- // let high_alt_gnomad_keys: HashSet<u128> = annotations
|
|
|
- // .get_keys_filter(|anns| {
|
|
|
- // anns.iter()
|
|
|
- // .find_map(|a| {
|
|
|
- // if let Annotation::GnomAD(gnomad) = a {
|
|
|
- // Some(gnomad.gnomad_af > 0.0)
|
|
|
- // } else {
|
|
|
- // None
|
|
|
- // }
|
|
|
- // })
|
|
|
- // .and_then(|gnomad_condition| {
|
|
|
- // anns.iter()
|
|
|
- // .find_map(|a| {
|
|
|
- // if let Annotation::ConstitAlt(n_alt) = a {
|
|
|
- // Some(*n_alt > 0)
|
|
|
- // } else {
|
|
|
- // None
|
|
|
- // }
|
|
|
- // })
|
|
|
- // .map(|constit_alt_condition| gnomad_condition && constit_alt_condition)
|
|
|
- // })
|
|
|
- // .unwrap_or(false)
|
|
|
- // })
|
|
|
- // .into_iter()
|
|
|
- // .collect();
|
|
|
- //
|
|
|
- // info!(
|
|
|
- // "Filtering out {} positions, with constit alt <= max contig alt ({}) and in GnomAD.",
|
|
|
- // high_alt_gnomad_keys.len(),
|
|
|
- // self.config.solo_max_alt_constit
|
|
|
- // );
|
|
|
- //
|
|
|
- // annotations.remove_keys(&high_alt_gnomad_keys);
|
|
|
- // somatic_stats.n_high_alt_constit_gnomad = variants_collections
|
|
|
- // .par_iter_mut()
|
|
|
- // .map(|c| {
|
|
|
- // let before = c.variants.len();
|
|
|
- // c.remove_keys(&high_alt_gnomad_keys);
|
|
|
- // let after = c.variants.len();
|
|
|
- // let rm = before - after;
|
|
|
- // info!("Variants removed from {}: {rm}", c.vcf.path.display(),);
|
|
|
- // rm
|
|
|
- // })
|
|
|
- // .sum();
|
|
|
- // variants_collections.retain(|e| !e.variants.is_empty());
|
|
|
-
|
|
|
-
|
|
|
- // let prob_keys: HashSet<u128> = annotations
|
|
|
- // .get_keys_filter(|anns| {
|
|
|
- // anns.contains(&Annotation::SoloTumor)
|
|
|
- // && !anns.contains(&Annotation::Callers(Caller::ClairS))
|
|
|
- // })
|
|
|
- // .into_iter()
|
|
|
- // .collect();
|
|
|
- //
|
|
|
- // info!("Problematic variants {}", prob_keys.len());
|
|
|
- //
|
|
|
- // let mut problematic_variants = variants_collection.clone();
|
|
|
- //
|
|
|
- // let problematic_variants: Vec<VcfVariant> = problematic_variants
|
|
|
- // .iter_mut()
|
|
|
- // .flat_map(|e| {
|
|
|
- // info!("{} {}:\t{}", e.vcf.caller, e.vcf.time, e.variants.len());
|
|
|
- // e.retain_keys(&prob_keys);
|
|
|
- // e.variants.clone()
|
|
|
- // })
|
|
|
- // .collect();
|
|
|
- // write_vcf(&problematic_variants, "prob.vcf.gz")?;
|
|
|
- //
|
|
|
- // // Entropies
|
|
|
- // let entropies: Vec<f64> = problematic_variants
|
|
|
- // .iter()
|
|
|
- // .flat_map(|v| {
|
|
|
- // annotations
|
|
|
- // .store
|
|
|
- // .get(&v.hash_variant())
|
|
|
- // .unwrap()
|
|
|
- // .iter()
|
|
|
- // .flat_map(|a| {
|
|
|
- // if let Annotation::ShannonEntropy(ent) = a {
|
|
|
- // vec![*ent]
|
|
|
- // } else {
|
|
|
- // Vec::new()
|
|
|
- // }
|
|
|
- // })
|
|
|
- // .collect::<Vec<_>>()
|
|
|
- // })
|
|
|
- // .collect();
|
|
|
- // let json_string = serde_json::to_string(&entropies)?;
|
|
|
- // let mut file = File::create("/data/entropies.json")?;
|
|
|
- // file.write_all(json_string.as_bytes())?;
|
|
|
-
|
|
|
// Annotation low entropy
|
|
|
annotations.low_shannon_entropy(self.config.min_shannon_entropy);
|
|
|
annotations.callers_stat();
|
|
|
@@ -454,28 +239,6 @@ impl Run for Somatic {
|
|
|
});
|
|
|
annotations.callers_stat();
|
|
|
|
|
|
- // let low_ent_keys: HashSet<u128> = annotations
|
|
|
- // .get_keys_filter(|anns| anns.contains(&Annotation::LowEntropy))
|
|
|
- // .into_iter()
|
|
|
- // .collect();
|
|
|
- //
|
|
|
- // annotations.remove_keys(&low_ent_keys);
|
|
|
- // annotations.callers_stat();
|
|
|
- // somatic_stats.n_low_entropies = variants_collections
|
|
|
- // .par_iter_mut()
|
|
|
- // .map(|c| {
|
|
|
- // let before = c.variants.len();
|
|
|
- // c.remove_keys(&low_ent_keys);
|
|
|
- // let after = c.variants.len();
|
|
|
- // let rm = before - after;
|
|
|
- // info!("Variants removed from {}: {rm}", c.vcf.path.display());
|
|
|
- // rm
|
|
|
- // })
|
|
|
- // .sum();
|
|
|
- // variants_collections.retain(|e| !e.variants.is_empty());
|
|
|
- // let tw: Vec<VcfVariant> = variants_collection.iter().flat_map(|c| c.variants.clone()).collect();
|
|
|
- // write_vcf(&tw, "low_ent.vcf.gz")?;
|
|
|
-
|
|
|
// VEP
|
|
|
info!("VEP annotation.");
|
|
|
variants_collections
|