|
|
@@ -39,128 +39,48 @@ impl Initialize for Somatic {
|
|
|
}
|
|
|
|
|
|
#[derive(Debug, Default, Clone)]
|
|
|
-struct SomaticStats {
|
|
|
- initial: HashMap<String, usize>,
|
|
|
- annotations_stats: Vec<AnnotationsStats>,
|
|
|
+pub struct SomaticStats {
|
|
|
+ pub input: InputStats,
|
|
|
+ pub n_constit_germline: usize,
|
|
|
+ pub n_low_constit: usize,
|
|
|
+ pub n_high_alt_constit: usize,
|
|
|
}
|
|
|
|
|
|
-impl SomaticStats {
|
|
|
- pub fn init(collections: &[VariantCollection]) -> Self {
|
|
|
- let mut initial = HashMap::new();
|
|
|
+#[derive(Debug, Default, Clone)]
|
|
|
+pub struct InputStats {
|
|
|
+ pub solo_tumor: Vec<(Annotation, usize)>,
|
|
|
+ pub solo_constit: Vec<(Annotation, usize)>,
|
|
|
+ pub germline: Vec<(Annotation, usize)>,
|
|
|
+ pub somatic: Vec<(Annotation, usize)>,
|
|
|
+}
|
|
|
|
|
|
+impl InputStats {
|
|
|
+ pub fn from_collections(collections: &[VariantCollection]) -> Self {
|
|
|
+ let mut stats = Self::default();
|
|
|
for collection in collections.iter() {
|
|
|
let name = format!("{}_{}", collection.vcf.caller, collection.vcf.time);
|
|
|
- initial.insert(name, collection.variants.len());
|
|
|
+ let (caller, cat) = to_callers_cat(&name);
|
|
|
+ match cat {
|
|
|
+ Annotation::SoloTumor => stats.solo_tumor.push((caller, collection.variants.len())),
|
|
|
+ Annotation::SoloConstit => {
|
|
|
+ stats.solo_constit.push((caller, collection.variants.len()))
|
|
|
+ }
|
|
|
+ Annotation::Germline => stats.germline.push((caller, collection.variants.len())),
|
|
|
+ Annotation::Somatic => stats.somatic.push((caller, collection.variants.len())),
|
|
|
+ _ => (),
|
|
|
+ };
|
|
|
}
|
|
|
+ stats
|
|
|
+ }
|
|
|
+}
|
|
|
|
|
|
+impl SomaticStats {
|
|
|
+ pub fn init(collections: &[VariantCollection]) -> Self {
|
|
|
Self {
|
|
|
- initial,
|
|
|
- annotations_stats: Default::default(),
|
|
|
+ input: InputStats::from_collections(collections),
|
|
|
+ ..Default::default()
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
- pub fn push_annotations_stats(&mut self, annotations_stats: AnnotationsStats) {
|
|
|
- self.annotations_stats.push(annotations_stats);
|
|
|
- }
|
|
|
-
|
|
|
- pub fn aggregation(&mut self) -> anyhow::Result<()> {
|
|
|
- let annotations_stats = self
|
|
|
- .annotations_stats
|
|
|
- .get(0)
|
|
|
- .ok_or(anyhow::anyhow!("Can't find stats"))?;
|
|
|
- let step_cat = vec![
|
|
|
- (Annotation::Germline, 0),
|
|
|
- (Annotation::Somatic, 0),
|
|
|
- (Annotation::SoloDiag, 0),
|
|
|
- (Annotation::SoloConstit, 0),
|
|
|
- ];
|
|
|
-
|
|
|
- let mut callers_cats: Vec<((Annotation, Annotation), Vec<(Annotation, u64)>)> = self
|
|
|
- .initial
|
|
|
- .keys()
|
|
|
- .map(|s| (to_callers_cat(s), step_cat.clone()))
|
|
|
- .collect();
|
|
|
-
|
|
|
- let mut node_names = Vec::new();
|
|
|
-
|
|
|
- node_names.extend(step_cat.clone().into_iter().map(|(cat, _)| cat.to_string()));
|
|
|
-
|
|
|
- let stats: anyhow::Result<Vec<()>> = annotations_stats
|
|
|
- .categorical
|
|
|
- .iter()
|
|
|
- .map(|e| {
|
|
|
- let v = e.value();
|
|
|
- let keys: Vec<&str> = e.key().split(" + ").collect();
|
|
|
- let k_a: Vec<Annotation> = keys
|
|
|
- .into_iter()
|
|
|
- .map(|e| e.parse())
|
|
|
- .collect::<anyhow::Result<_>>()?;
|
|
|
-
|
|
|
- for ((caller, cat), counts) in callers_cats.iter_mut() {
|
|
|
- node_names.push(format!("{} {}", caller, cat));
|
|
|
- if k_a.contains(caller) && k_a.contains(cat) {
|
|
|
- for (c_annot, value) in counts.iter_mut() {
|
|
|
- if k_a.contains(c_annot) {
|
|
|
- *value += v;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- Ok(())
|
|
|
- })
|
|
|
- .collect();
|
|
|
- stats?;
|
|
|
-
|
|
|
- println!("{callers_cats:#?}");
|
|
|
- let mut links: Vec<(String, String, f64)> = callers_cats
|
|
|
- .iter()
|
|
|
- .flat_map(|((caller, cat), counts)| {
|
|
|
- let from = format!("{} {}", caller, cat);
|
|
|
- counts
|
|
|
- .iter()
|
|
|
- .map(move |(annot, count)| (from.clone(), annot.to_string(), *count as f64))
|
|
|
- })
|
|
|
- .collect();
|
|
|
- links.sort_by(|a, b| {
|
|
|
- a.2.partial_cmp(&b.2)
|
|
|
- .unwrap()
|
|
|
- .then(a.0.cmp(&b.0))
|
|
|
- .then(a.1.cmp(&b.1))
|
|
|
- });
|
|
|
-
|
|
|
- links.dedup();
|
|
|
-
|
|
|
- node_names.sort();
|
|
|
- node_names.dedup();
|
|
|
-
|
|
|
- println!("{links:?}");
|
|
|
- println!("{node_names:?}");
|
|
|
-
|
|
|
- let chart = Chart::new().series(
|
|
|
- Sankey::new()
|
|
|
- .emphasis(Emphasis::new().focus(EmphasisFocus::Adjacency))
|
|
|
- // .data(vec!["a", "b", "a1", "a2", "b1", "c"])
|
|
|
- .data(node_names)
|
|
|
- .links(links), // .links(vec![
|
|
|
- // ("a", "a1", 5),
|
|
|
- // ("a", "a2", 3),
|
|
|
- // ("b", "b1", 8),
|
|
|
- // ("a", "b1", 3),
|
|
|
- // ("b1", "a1", 1),
|
|
|
- // ("b1", "c", 2),
|
|
|
- // ]),
|
|
|
- // .data(node_names)
|
|
|
- // .links(vec![("ClairS Somatic", "Germline", 10)])
|
|
|
- // ,
|
|
|
- // ),
|
|
|
- );
|
|
|
-
|
|
|
- let mut renderer = charming::ImageRenderer::new(1000, 800);
|
|
|
- renderer.save(&chart, "/data/sankey.svg")?;
|
|
|
-
|
|
|
- Ok(())
|
|
|
- }
|
|
|
}
|
|
|
|
|
|
pub fn to_callers_cat(s: &str) -> (Annotation, Annotation) {
|
|
|
@@ -178,7 +98,7 @@ pub fn to_callers_cat(s: &str) -> (Annotation, Annotation) {
|
|
|
} else if splits.contains(&"DeepVariant") && splits.contains(&"diag") {
|
|
|
(
|
|
|
Annotation::Callers(Caller::DeepVariant),
|
|
|
- Annotation::SoloDiag,
|
|
|
+ Annotation::SoloTumor,
|
|
|
)
|
|
|
} else {
|
|
|
panic!("unknown caller: {s}");
|
|
|
@@ -209,16 +129,16 @@ impl Run for Somatic {
|
|
|
|
|
|
// Annotations Stats
|
|
|
// let mut annotations = Arc::unwrap_or_clone(annotations);
|
|
|
- somatic_stats.push_annotations_stats(annotations.callers_stat());
|
|
|
- somatic_stats.aggregation()?;
|
|
|
+ annotations.callers_stat();
|
|
|
+ // somatic_stats.push_annotations_stats();
|
|
|
|
|
|
- return Ok(());
|
|
|
// TODO: look at variants: ClairS + DeepVariant + SoloConstit + SoloDiag + Somatic (error
|
|
|
// in ClairS somatic)
|
|
|
|
|
|
// Filtering Somatic variants
|
|
|
info!("Filtering somatic variants (variants not in salo callers on constit sample or germline).");
|
|
|
- let germline_or_somatic_keys = annotations.get_keys_filter(|anns| {
|
|
|
+
|
|
|
+ let tumor_or_somatic_keys = annotations.get_keys_filter(|anns| {
|
|
|
!anns.contains(&Annotation::Germline) && !anns.contains(&Annotation::SoloConstit)
|
|
|
});
|
|
|
|
|
|
@@ -227,30 +147,41 @@ impl Run for Somatic {
|
|
|
.iter()
|
|
|
.flat_map(|e| e.keys())
|
|
|
.collect::<Vec<u128>>(),
|
|
|
- &germline_or_somatic_keys,
|
|
|
+ &tumor_or_somatic_keys,
|
|
|
);
|
|
|
assert_eq!(0, remains.len());
|
|
|
|
|
|
- info!("Somatic variants positions {}.", somatic_keys.len());
|
|
|
- info!("Germline variants positions {}.", germline_keys.len());
|
|
|
+ info!(
|
|
|
+ "Germline variants positions removed {}.",
|
|
|
+ germline_keys.len()
|
|
|
+ );
|
|
|
+ germline_keys.len();
|
|
|
|
|
|
let somatic_keys: HashSet<u128> = somatic_keys.into_iter().collect();
|
|
|
+ let mut annotations = Arc::try_unwrap(annotations)
|
|
|
+ .map_err(|e| anyhow::anyhow!("Failed to unwrap Arc: {:?}", e))?;
|
|
|
annotations.retain_keys(&somatic_keys);
|
|
|
annotations.callers_stat();
|
|
|
|
|
|
- variants_collection.par_iter_mut().for_each(|c| {
|
|
|
- let before = c.variants.len();
|
|
|
- c.retain_keys(&somatic_keys);
|
|
|
- let after = c.variants.len();
|
|
|
- info!(
|
|
|
- "Variants removed from {}: {}",
|
|
|
- c.vcf.path.display(),
|
|
|
+ somatic_stats.n_constit_germline = variants_collection
|
|
|
+ .par_iter_mut()
|
|
|
+ .map(|c| {
|
|
|
+ let before = c.variants.len();
|
|
|
+ c.retain_keys(&somatic_keys);
|
|
|
+ let after = c.variants.len();
|
|
|
+ info!(
|
|
|
+ "Variants removed from {}: {}",
|
|
|
+ c.vcf.path.display(),
|
|
|
+ before - after
|
|
|
+ );
|
|
|
before - after
|
|
|
- );
|
|
|
- });
|
|
|
+ })
|
|
|
+ .sum();
|
|
|
|
|
|
variants_collection.retain(|e| !e.variants.is_empty());
|
|
|
|
|
|
+ variants_collection.iter().for_each(|c| c.stats());
|
|
|
+
|
|
|
info!("Constit Bam annotation...");
|
|
|
variants_collection.iter().try_for_each(|c| {
|
|
|
c.annotate_with_constit_bam(&annotations, &self.config.normal_bam(&id), 150)
|
|
|
@@ -260,22 +191,81 @@ impl Run for Somatic {
|
|
|
self.config.solo_min_constit_depth,
|
|
|
);
|
|
|
|
|
|
+ annotations.callers_stat();
|
|
|
+
|
|
|
+ //
|
|
|
+ // Remove LowConstitDepth
|
|
|
+ info!(
|
|
|
+ "Removing low constit depth (depth < {})",
|
|
|
+ self.config.solo_min_constit_depth
|
|
|
+ );
|
|
|
+
|
|
|
+ let low_constit_keys: HashSet<u128> = annotations
|
|
|
+ .get_keys_filter(|anns| anns.contains(&Annotation::LowConstitDepth))
|
|
|
+ .into_iter()
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ annotations.remove_keys(&low_constit_keys);
|
|
|
+
|
|
|
+ somatic_stats.n_low_constit = variants_collection
|
|
|
+ .par_iter_mut()
|
|
|
+ .map(|c| {
|
|
|
+ let before = c.variants.len();
|
|
|
+ c.remove_keys(&low_constit_keys);
|
|
|
+ let after = c.variants.len();
|
|
|
+ let rm = before - after;
|
|
|
+ info!("Variants removed from {}: {rm}", c.vcf.path.display(),);
|
|
|
+ rm
|
|
|
+ })
|
|
|
+ .sum();
|
|
|
+
|
|
|
+ variants_collection.retain(|e| !e.variants.is_empty());
|
|
|
+
|
|
|
+ variants_collection.iter().for_each(|c| c.stats());
|
|
|
+
|
|
|
+ // Remove High Constit Alt
|
|
|
+ info!(
|
|
|
+ "Removing high constit alternative allele (alt constit > {})",
|
|
|
+ self.config.solo_max_alt_constit
|
|
|
+ );
|
|
|
+
|
|
|
+ let high_alt_keys: HashSet<u128> = annotations
|
|
|
+ .get_keys_filter(|anns| anns.contains(&Annotation::HighConstitAlt))
|
|
|
+ .into_iter()
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ annotations.remove_keys(&high_alt_keys);
|
|
|
+
|
|
|
+ somatic_stats.n_high_alt_constit = variants_collection
|
|
|
+ .par_iter_mut()
|
|
|
+ .map(|c| {
|
|
|
+ let before = c.variants.len();
|
|
|
+ c.remove_keys(&high_alt_keys);
|
|
|
+ let after = c.variants.len();
|
|
|
+ let rm = before - after;
|
|
|
+ info!("Variants removed from {}: {rm}", c.vcf.path.display(),);
|
|
|
+ rm
|
|
|
+ })
|
|
|
+ .sum();
|
|
|
+
|
|
|
+ variants_collection.retain(|e| !e.variants.is_empty());
|
|
|
+
|
|
|
+ variants_collection.iter().for_each(|c| c.stats());
|
|
|
+
|
|
|
+ annotations.callers_stat();
|
|
|
+
|
|
|
+ // Entropy
|
|
|
info!("Entropy annotation...");
|
|
|
variants_collection.iter().for_each(|c| {
|
|
|
c.annotate_with_sequence_entropy(&annotations, &self.config.reference, 10, 150);
|
|
|
});
|
|
|
annotations.callers_stat();
|
|
|
|
|
|
+
|
|
|
let prob_keys: HashSet<u128> = annotations
|
|
|
.get_keys_filter(|anns| {
|
|
|
- // let contains = anns.iter().any(|item| matches!(item, Annotation::SoloDiag));
|
|
|
- // let contains_not = anns.iter().all(|item| !matches!(item, Annotation::Somatic));
|
|
|
- //
|
|
|
- // contains && contains_not
|
|
|
- anns.contains(&Annotation::SoloDiag)
|
|
|
- && !anns.contains(&Annotation::Somatic)
|
|
|
- && !anns.contains(&Annotation::LowConstitDepth)
|
|
|
- && !anns.contains(&Annotation::HighConstitAlt)
|
|
|
+ anns.contains(&Annotation::SoloTumor)
|
|
|
+ && !anns.contains(&Annotation::Callers(Caller::ClairS))
|
|
|
})
|
|
|
.into_iter()
|
|
|
.collect();
|
|
|
@@ -287,6 +277,7 @@ impl Run for Somatic {
|
|
|
let problematic_variants: Vec<VcfVariant> = problematic_variants
|
|
|
.iter_mut()
|
|
|
.flat_map(|e| {
|
|
|
+ info!("{} {}:\t{}", e.vcf.caller, e.vcf.time, e.variants.len());
|
|
|
e.retain_keys(&prob_keys);
|
|
|
e.variants.clone()
|
|
|
})
|