|
|
@@ -1,8 +1,8 @@
|
|
|
-use log::info;
|
|
|
-use std::{fs::File, sync::Arc};
|
|
|
+use log::{info, kv::Source};
|
|
|
+use std::{collections::HashMap, fs::File, sync::Arc};
|
|
|
|
|
|
use crate::{
|
|
|
- annotation::{Annotation, Annotations, Caller},
|
|
|
+ annotation::{Annotation, Annotations, AnnotationsStats, Caller},
|
|
|
callers::{
|
|
|
clairs::ClairS, deep_somatic::DeepSomatic, deep_variant::DeepVariant, nanomonsv::NanomonSV,
|
|
|
savana::Savana, severus::Severus,
|
|
|
@@ -83,6 +83,125 @@ impl SomaticStats {
|
|
|
..Default::default()
|
|
|
}
|
|
|
}
|
|
|
+ pub fn annot_init(&self, stats: AnnotationsStats) {
|
|
|
+ let stats: Vec<(Vec<Annotation>, u64)> = stats
|
|
|
+ .categorical
|
|
|
+ .iter()
|
|
|
+ .map(|e| {
|
|
|
+ let anns = e
|
|
|
+ .key()
|
|
|
+ .split(" + ")
|
|
|
+ .map(|k| k.parse())
|
|
|
+ .collect::<anyhow::Result<Vec<Annotation>>>()
|
|
|
+ .unwrap();
|
|
|
+ (anns, *e.value())
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ let callers_somatic_solo_tumor = [
|
|
|
+ self.input
|
|
|
+ .somatic
|
|
|
+ .iter()
|
|
|
+ .map(|(caller, _)| vec![Annotation::Callers(caller.clone()), Annotation::Somatic])
|
|
|
+ .collect::<Vec<Vec<Annotation>>>(),
|
|
|
+ self.input
|
|
|
+ .solo_tumor
|
|
|
+ .iter()
|
|
|
+ .map(|(caller, _)| vec![Annotation::Callers(caller.clone()), Annotation::SoloTumor])
|
|
|
+ .collect(),
|
|
|
+ ]
|
|
|
+ .concat();
|
|
|
+
|
|
|
+ let callers_germline_solo_constit = [
|
|
|
+ self.input
|
|
|
+ .germline
|
|
|
+ .iter()
|
|
|
+ .map(|(caller, _)| vec![Annotation::Callers(caller.clone()), Annotation::Germline])
|
|
|
+ .collect::<Vec<Vec<Annotation>>>(),
|
|
|
+ self.input
|
|
|
+ .solo_constit
|
|
|
+ .iter()
|
|
|
+ .map(|(caller, _)| {
|
|
|
+ vec![Annotation::Callers(caller.clone()), Annotation::SoloConstit]
|
|
|
+ })
|
|
|
+ .collect(),
|
|
|
+ ]
|
|
|
+ .concat();
|
|
|
+
|
|
|
+ let mut with_germline: HashMap<String, HashMap<String, u64>> = HashMap::new();
|
|
|
+ stats.iter().for_each(|(anns, v)| {
|
|
|
+ if anns
|
|
|
+ .iter()
|
|
|
+ .any(|a| matches!(a, Annotation::SoloConstit | Annotation::Germline))
|
|
|
+ {
|
|
|
+ let n_by_tumor: Vec<(String, u64)> = callers_somatic_solo_tumor
|
|
|
+ .iter()
|
|
|
+ .flat_map(|tumor| {
|
|
|
+ if tumor.iter().all(|a| anns.contains(a)) {
|
|
|
+ let tum_call =
|
|
|
+ format!("{} {}", tumor.first().unwrap(), tumor.get(1).unwrap());
|
|
|
+ vec![(tum_call, *v)]
|
|
|
+ } else {
|
|
|
+ vec![]
|
|
|
+ }
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ let mut germline_caller: Vec<String> = callers_germline_solo_constit
|
|
|
+ .iter()
|
|
|
+ .flat_map(|germ| {
|
|
|
+ if germ.iter().all(|a| anns.contains(a)) {
|
|
|
+ let germ_call =
|
|
|
+ format!("{} {}", germ.first().unwrap(), germ.get(1).unwrap());
|
|
|
+ vec![germ_call]
|
|
|
+ } else {
|
|
|
+ vec![]
|
|
|
+ }
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+ germline_caller.sort();
|
|
|
+ let germline_caller = germline_caller.join(" + ");
|
|
|
+
|
|
|
+
|
|
|
+ n_by_tumor.iter().for_each(|(tumoral_caller, n)| {
|
|
|
+ if let Some(row) = with_germline.get_mut(tumoral_caller) {
|
|
|
+ // germline_caller.iter().for_each(|germline_caller| {
|
|
|
+ if tumoral_caller == "ClairS Somatic" {
|
|
|
+ println!("{tumoral_caller} {germline_caller} {n}");
|
|
|
+ }
|
|
|
+ if let Some(col) = row.get_mut(&germline_caller) {
|
|
|
+ *col += *n;
|
|
|
+ } else {
|
|
|
+ row.insert(germline_caller.to_string(), *n);
|
|
|
+ }
|
|
|
+ // });
|
|
|
+ } else {
|
|
|
+ let mut row = HashMap::new();
|
|
|
+ // germline_caller.iter().for_each(|germline_caller| {
|
|
|
+ row.insert(germline_caller.to_string(), *n);
|
|
|
+ // });
|
|
|
+ with_germline.insert(tumoral_caller.to_string(), row);
|
|
|
+ }
|
|
|
+ });
|
|
|
+ }
|
|
|
+ });
|
|
|
+
|
|
|
+ let mut germlines_callers: Vec<String> = with_germline.iter().flat_map(|(_, r)| {
|
|
|
+ r.iter().map(|(k,_)| k.to_string()).collect::<Vec<String>>()
|
|
|
+ }).collect();
|
|
|
+ germlines_callers.sort();
|
|
|
+ germlines_callers.dedup();
|
|
|
+
|
|
|
+ with_germline.iter().for_each(|(tumor, row)| {
|
|
|
+ print!("{tumor}\t");
|
|
|
+ germlines_callers.iter().for_each(|g| {
|
|
|
+ let v = row.get(g).unwrap_or(&0);
|
|
|
+ print!("{g}:{v}\t");
|
|
|
+ });
|
|
|
+ println!();
|
|
|
+ });
|
|
|
+ println!();
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
impl Run for Somatic {
|
|
|
@@ -137,7 +256,18 @@ impl Run for Somatic {
|
|
|
|
|
|
let mut annotations = Arc::try_unwrap(annotations)
|
|
|
.map_err(|e| anyhow::anyhow!("Failed to unwrap Arc: {:?}", e))?;
|
|
|
- annotations.callers_stat();
|
|
|
+ let caller_cat_anns = |v: &Annotation| {
|
|
|
+ matches!(
|
|
|
+ v,
|
|
|
+ Annotation::Callers(_)
|
|
|
+ | Annotation::Germline
|
|
|
+ | Annotation::Somatic
|
|
|
+ | Annotation::SoloConstit
|
|
|
+ | Annotation::SoloTumor
|
|
|
+ )
|
|
|
+ };
|
|
|
+ let annot_init = annotations.callers_stat(Some(Box::new(caller_cat_anns)));
|
|
|
+ somatic_stats.annot_init(annot_init);
|
|
|
|
|
|
// Filter: Variants neither Germline nor SoloConstit
|
|
|
info!("Keeping somatic variants (variants neither in solo nor in germline).");
|
|
|
@@ -145,7 +275,7 @@ impl Run for Somatic {
|
|
|
annotations.retain_variants(&mut variants_collections, |anns| {
|
|
|
!anns.contains(&Annotation::Germline) && !anns.contains(&Annotation::SoloConstit)
|
|
|
});
|
|
|
- annotations.callers_stat();
|
|
|
+ annotations.callers_stat(Some(Box::new(caller_cat_anns)));
|
|
|
|
|
|
// Annotation: BAM depth, n_alt
|
|
|
info!("Reading Constit BAM file for depth and pileup annotation.");
|
|
|
@@ -156,7 +286,20 @@ impl Run for Somatic {
|
|
|
self.config.solo_max_alt_constit,
|
|
|
self.config.solo_min_constit_depth,
|
|
|
);
|
|
|
- annotations.callers_stat();
|
|
|
+ annotations.callers_stat(Some(Box::new(|v| {
|
|
|
+ matches!(
|
|
|
+ v,
|
|
|
+ Annotation::Callers(_)
|
|
|
+ | Annotation::Germline
|
|
|
+ | Annotation::Somatic
|
|
|
+ | Annotation::SoloConstit
|
|
|
+ | Annotation::SoloTumor
|
|
|
+ | Annotation::ConstitAlt(_)
|
|
|
+ | Annotation::ConstitDepth(_)
|
|
|
+ | Annotation::HighConstitAlt
|
|
|
+ | Annotation::LowConstitDepth
|
|
|
+ )
|
|
|
+ })));
|
|
|
|
|
|
// Filter: Remove LowConstitDepth from annotations and variants collections
|
|
|
info!(
|
|
|
@@ -167,7 +310,6 @@ impl Run for Somatic {
|
|
|
.retain_variants(&mut variants_collections, |anns| {
|
|
|
!anns.contains(&Annotation::LowConstitDepth)
|
|
|
});
|
|
|
- annotations.callers_stat();
|
|
|
info!(
|
|
|
"{} variants removed when depth in constit bam < {}.",
|
|
|
somatic_stats.n_low_constit, self.config.solo_min_constit_depth
|
|
|
@@ -182,12 +324,24 @@ impl Run for Somatic {
|
|
|
.retain_variants(&mut variants_collections, |anns| {
|
|
|
!anns.contains(&Annotation::HighConstitAlt)
|
|
|
});
|
|
|
- annotations.callers_stat();
|
|
|
info!(
|
|
|
"{} variants removed with SNP/indel inside the constit alignements > {}",
|
|
|
somatic_stats.n_high_alt_constit, self.config.solo_max_alt_constit
|
|
|
);
|
|
|
|
|
|
+ annotations.callers_stat(Some(Box::new(|v| {
|
|
|
+ matches!(
|
|
|
+ v,
|
|
|
+ Annotation::Callers(_)
|
|
|
+ | Annotation::Germline
|
|
|
+ | Annotation::Somatic
|
|
|
+ | Annotation::SoloConstit
|
|
|
+ | Annotation::SoloTumor
|
|
|
+ | Annotation::ConstitAlt(_)
|
|
|
+ | Annotation::ConstitDepth(_)
|
|
|
+ )
|
|
|
+ })));
|
|
|
+
|
|
|
// Annotation: Entropy
|
|
|
info!(
|
|
|
"Entropy annotation from {} sequences.",
|
|
|
@@ -196,7 +350,6 @@ impl Run for Somatic {
|
|
|
variants_collections.iter().for_each(|c| {
|
|
|
c.annotate_with_sequence_entropy(&annotations, &self.config.reference, 10, 150);
|
|
|
});
|
|
|
- annotations.callers_stat();
|
|
|
|
|
|
// Annotation: Cosmic and GnomAD
|
|
|
info!("Annotation with Cosmic and GnomAD.");
|
|
|
@@ -207,7 +360,18 @@ impl Run for Somatic {
|
|
|
ext_annot.annotate(&c.variants, &annotations)?;
|
|
|
Ok(())
|
|
|
})?;
|
|
|
- annotations.callers_stat();
|
|
|
+ annotations.callers_stat(Some(Box::new(|v| {
|
|
|
+ matches!(
|
|
|
+ v,
|
|
|
+ Annotation::Callers(_)
|
|
|
+ | Annotation::Germline
|
|
|
+ | Annotation::Somatic
|
|
|
+ | Annotation::SoloConstit
|
|
|
+ | Annotation::SoloTumor
|
|
|
+ | Annotation::ConstitAlt(_)
|
|
|
+ | Annotation::GnomAD(_)
|
|
|
+ )
|
|
|
+ })));
|
|
|
|
|
|
// Filter: Remove variants in Gnomad and in constit bam
|
|
|
info!("Filtering out variants in GnomAD and in constit bam at low AF.");
|
|
|
@@ -235,23 +399,57 @@ impl Run for Somatic {
|
|
|
})
|
|
|
.unwrap_or(false)
|
|
|
});
|
|
|
+
|
|
|
info!(
|
|
|
"{} variants filtered, with constit alt <= max contig alt ({}) and in GnomAD.",
|
|
|
somatic_stats.n_high_alt_constit_gnomad, self.config.solo_max_alt_constit
|
|
|
);
|
|
|
- annotations.callers_stat();
|
|
|
+ annotations.callers_stat(Some(Box::new(|v| {
|
|
|
+ matches!(
|
|
|
+ v,
|
|
|
+ Annotation::Callers(_)
|
|
|
+ | Annotation::Germline
|
|
|
+ | Annotation::Somatic
|
|
|
+ | Annotation::SoloConstit
|
|
|
+ | Annotation::SoloTumor
|
|
|
+ | Annotation::ConstitAlt(_)
|
|
|
+ | Annotation::GnomAD(_)
|
|
|
+ )
|
|
|
+ })));
|
|
|
|
|
|
// Annotation low entropy
|
|
|
annotations.low_shannon_entropy(self.config.min_shannon_entropy);
|
|
|
- annotations.callers_stat();
|
|
|
+ // annotations.callers_stat();
|
|
|
|
|
|
// Filtering low entropy for solo variants.
|
|
|
info!("Filtering low entropies");
|
|
|
+ annotations.callers_stat(Some(Box::new(|v| {
|
|
|
+ matches!(
|
|
|
+ v,
|
|
|
+ Annotation::Callers(_)
|
|
|
+ | Annotation::Germline
|
|
|
+ | Annotation::Somatic
|
|
|
+ | Annotation::SoloConstit
|
|
|
+ | Annotation::SoloTumor
|
|
|
+ | Annotation::LowEntropy
|
|
|
+ )
|
|
|
+ })));
|
|
|
+
|
|
|
somatic_stats.n_low_entropies = annotations
|
|
|
.retain_variants(&mut variants_collections, |anns| {
|
|
|
!anns.contains(&Annotation::LowEntropy)
|
|
|
});
|
|
|
- annotations.callers_stat();
|
|
|
+ annotations.callers_stat(Some(Box::new(|v| {
|
|
|
+ matches!(
|
|
|
+ v,
|
|
|
+ Annotation::Callers(_)
|
|
|
+ | Annotation::Germline
|
|
|
+ | Annotation::Somatic
|
|
|
+ | Annotation::SoloConstit
|
|
|
+ | Annotation::SoloTumor
|
|
|
+ | Annotation::LowEntropy
|
|
|
+ )
|
|
|
+ })));
|
|
|
|
|
|
// VEP
|
|
|
info!("VEP annotation.");
|
|
|
@@ -262,7 +460,9 @@ impl Run for Somatic {
|
|
|
ext_annot.annotate_vep(&c.variants, &annotations)?;
|
|
|
Ok(())
|
|
|
})?;
|
|
|
- annotations.callers_stat();
|
|
|
+ annotations.callers_stat(Some(Box::new(caller_cat_anns)));
|
|
|
+
|
|
|
+ annotations.vep_stats()?;
|
|
|
|
|
|
Ok(())
|
|
|
}
|