|
|
@@ -1,15 +1,24 @@
|
|
|
+use charming::{
|
|
|
+ element::{Emphasis, EmphasisFocus},
|
|
|
+ series::{Sankey, SankeyLink, SankeyNode},
|
|
|
+ Chart,
|
|
|
+};
|
|
|
+use hashbrown::HashMap;
|
|
|
use log::info;
|
|
|
use rayon::prelude::*;
|
|
|
use std::{collections::HashSet, fs::File, sync::Arc};
|
|
|
|
|
|
use crate::{
|
|
|
- annotation::{Annotation, Annotations},
|
|
|
+ annotation::{Annotation, Annotations, AnnotationsStats, Caller},
|
|
|
callers::{clairs::ClairS, deep_variant::DeepVariant},
|
|
|
collection::{Initialize, InitializeSolo},
|
|
|
config::Config,
|
|
|
io::vcf::write_vcf,
|
|
|
runners::Run,
|
|
|
- variant::variant::{load_variants, parallel_intersection, RunnerVariants, VcfVariant},
|
|
|
+ variant::{
|
|
|
+ variant::{load_variants, parallel_intersection, RunnerVariants, VcfVariant},
|
|
|
+ variant_collection::VariantCollection,
|
|
|
+ },
|
|
|
};
|
|
|
|
|
|
pub struct Somatic {
|
|
|
@@ -29,10 +38,157 @@ impl Initialize for Somatic {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+#[derive(Debug, Default, Clone)]
|
|
|
+struct SomaticStats {
|
|
|
+ initial: HashMap<String, usize>,
|
|
|
+ annotations_stats: Vec<AnnotationsStats>,
|
|
|
+}
|
|
|
+
|
|
|
+impl SomaticStats {
|
|
|
+ pub fn init(collections: &[VariantCollection]) -> Self {
|
|
|
+ let mut initial = HashMap::new();
|
|
|
+
|
|
|
+ for collection in collections.iter() {
|
|
|
+ let name = format!("{}_{}", collection.vcf.caller, collection.vcf.time);
|
|
|
+ initial.insert(name, collection.variants.len());
|
|
|
+ }
|
|
|
+
|
|
|
+ Self {
|
|
|
+ initial,
|
|
|
+ annotations_stats: Default::default(),
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn push_annotations_stats(&mut self, annotations_stats: AnnotationsStats) {
|
|
|
+ self.annotations_stats.push(annotations_stats);
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn aggregation(&mut self) -> anyhow::Result<()> {
|
|
|
+ let annotations_stats = self
|
|
|
+ .annotations_stats
|
|
|
+ .get(0)
|
|
|
+ .ok_or(anyhow::anyhow!("Can't find stats"))?;
|
|
|
+ let step_cat = vec![
|
|
|
+ (Annotation::Germline, 0),
|
|
|
+ (Annotation::Somatic, 0),
|
|
|
+ (Annotation::SoloDiag, 0),
|
|
|
+ (Annotation::SoloConstit, 0),
|
|
|
+ ];
|
|
|
+
|
|
|
+ let mut callers_cats: Vec<((Annotation, Annotation), Vec<(Annotation, u64)>)> = self
|
|
|
+ .initial
|
|
|
+ .keys()
|
|
|
+ .map(|s| (to_callers_cat(s), step_cat.clone()))
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ let mut node_names = Vec::new();
|
|
|
+
|
|
|
+ node_names.extend(step_cat.clone().into_iter().map(|(cat, _)| cat.to_string()));
|
|
|
+
|
|
|
+ let stats: anyhow::Result<Vec<()>> = annotations_stats
|
|
|
+ .categorical
|
|
|
+ .iter()
|
|
|
+ .map(|e| {
|
|
|
+ let v = e.value();
|
|
|
+ let keys: Vec<&str> = e.key().split(" + ").collect();
|
|
|
+ let k_a: Vec<Annotation> = keys
|
|
|
+ .into_iter()
|
|
|
+ .map(|e| e.parse())
|
|
|
+ .collect::<anyhow::Result<_>>()?;
|
|
|
+
|
|
|
+ for ((caller, cat), counts) in callers_cats.iter_mut() {
|
|
|
+ node_names.push(format!("{} {}", caller, cat));
|
|
|
+ if k_a.contains(caller) && k_a.contains(cat) {
|
|
|
+ for (c_annot, value) in counts.iter_mut() {
|
|
|
+ if k_a.contains(c_annot) {
|
|
|
+ *value += v;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+ stats?;
|
|
|
+
|
|
|
+ println!("{callers_cats:#?}");
|
|
|
+ let mut links: Vec<(String, String, f64)> = callers_cats
|
|
|
+ .iter()
|
|
|
+ .flat_map(|((caller, cat), counts)| {
|
|
|
+ let from = format!("{} {}", caller, cat);
|
|
|
+ counts
|
|
|
+ .iter()
|
|
|
+ .map(move |(annot, count)| (from.clone(), annot.to_string(), *count as f64))
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+ links.sort_by(|a, b| {
|
|
|
+ a.2.partial_cmp(&b.2)
|
|
|
+ .unwrap()
|
|
|
+ .then(a.0.cmp(&b.0))
|
|
|
+ .then(a.1.cmp(&b.1))
|
|
|
+ });
|
|
|
+
|
|
|
+ links.dedup();
|
|
|
+
|
|
|
+ node_names.sort();
|
|
|
+ node_names.dedup();
|
|
|
+
|
|
|
+ println!("{links:?}");
|
|
|
+ println!("{node_names:?}");
|
|
|
+
|
|
|
+ let chart = Chart::new().series(
|
|
|
+ Sankey::new()
|
|
|
+ .emphasis(Emphasis::new().focus(EmphasisFocus::Adjacency))
|
|
|
+ // .data(vec!["a", "b", "a1", "a2", "b1", "c"])
|
|
|
+ .data(node_names)
|
|
|
+ .links(links), // .links(vec![
|
|
|
+ // ("a", "a1", 5),
|
|
|
+ // ("a", "a2", 3),
|
|
|
+ // ("b", "b1", 8),
|
|
|
+ // ("a", "b1", 3),
|
|
|
+ // ("b1", "a1", 1),
|
|
|
+ // ("b1", "c", 2),
|
|
|
+ // ]),
|
|
|
+ // .data(node_names)
|
|
|
+ // .links(vec![("ClairS Somatic", "Germline", 10)])
|
|
|
+ // ,
|
|
|
+ // ),
|
|
|
+ );
|
|
|
+
|
|
|
+ let mut renderer = charming::ImageRenderer::new(1000, 800);
|
|
|
+ renderer.save(&chart, "/data/sankey.svg")?;
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+pub fn to_callers_cat(s: &str) -> (Annotation, Annotation) {
|
|
|
+ let splits: Vec<&str> = s.split("_").collect();
|
|
|
+
|
|
|
+ if splits.contains(&"clairs") {
|
|
|
+ (Annotation::Callers(Caller::ClairS), Annotation::Somatic)
|
|
|
+ } else if splits.contains(&"clair3-germline") {
|
|
|
+ (Annotation::Callers(Caller::ClairS), Annotation::Germline)
|
|
|
+ } else if splits.contains(&"DeepVariant") && splits.contains(&"mrd") {
|
|
|
+ (
|
|
|
+ Annotation::Callers(Caller::DeepVariant),
|
|
|
+ Annotation::SoloConstit,
|
|
|
+ )
|
|
|
+ } else if splits.contains(&"DeepVariant") && splits.contains(&"diag") {
|
|
|
+ (
|
|
|
+ Annotation::Callers(Caller::DeepVariant),
|
|
|
+ Annotation::SoloDiag,
|
|
|
+ )
|
|
|
+ } else {
|
|
|
+ panic!("unknown caller: {s}");
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
impl Run for Somatic {
|
|
|
fn run(&mut self) -> anyhow::Result<()> {
|
|
|
- info!("Running somatic pipe for {}.", self.id);
|
|
|
let id = self.id.clone();
|
|
|
+ info!("Running somatic pipe for {id}.");
|
|
|
|
|
|
info!("Initialization...");
|
|
|
let mut v: Vec<Box<dyn RunnerVariants + Send + Sync>> = vec![
|
|
|
@@ -49,9 +205,14 @@ impl Run for Somatic {
|
|
|
variants_collection.push(clairs_germline);
|
|
|
info!("Variants sources loaded: {}", variants_collection.len());
|
|
|
|
|
|
+ let mut somatic_stats = SomaticStats::init(&variants_collection);
|
|
|
+
|
|
|
// Annotations Stats
|
|
|
- let mut annotations = Arc::unwrap_or_clone(annotations);
|
|
|
- annotations.callers_stat();
|
|
|
+ // let mut annotations = Arc::unwrap_or_clone(annotations);
|
|
|
+ somatic_stats.push_annotations_stats(annotations.callers_stat());
|
|
|
+ somatic_stats.aggregation()?;
|
|
|
+
|
|
|
+ return Ok(());
|
|
|
// TODO: look at variants: ClairS + DeepVariant + SoloConstit + SoloDiag + Somatic (error
|
|
|
// in ClairS somatic)
|
|
|
|