pub mod cosmic; pub mod echtvar; pub mod gnomad; pub mod ncbi; pub mod vep; use std::{ collections::{HashMap, HashSet}, fmt, str::FromStr, sync::Arc, }; use crate::{helpers::mean, variant::variant_collection::VariantCollection}; use cosmic::Cosmic; use dashmap::DashMap; use gnomad::GnomAD; use log::info; use rayon::prelude::*; use vep::VEP; #[derive(Debug, Clone, PartialEq)] pub enum Annotation { SoloTumor, SoloConstit, Callers(Caller), Germline, Somatic, ShannonEntropy(f64), ConstitDepth(u16), ConstitAlt(u16), LowConstitDepth, HighConstitAlt, Cosmic(Cosmic), GnomAD(GnomAD), LowEntropy, VEP(Vec), } impl fmt::Display for Annotation { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let str = match self { Annotation::SoloTumor => "SoloTumor", Annotation::SoloConstit => "SoloConstit", Annotation::Callers(caller) => &caller.to_string(), Annotation::Germline => "Germline", Annotation::Somatic => "Somatic", Annotation::ShannonEntropy(_) => "ShannonEntropy", Annotation::ConstitDepth(_) => "ConstitDepth", Annotation::ConstitAlt(_) => "ConstitAlt", Annotation::LowConstitDepth => "LowConstitDepth", Annotation::HighConstitAlt => "HighConstitAlt", Annotation::Cosmic(_) => "Cosmic", Annotation::GnomAD(_) => "GnomAD", Annotation::LowEntropy => "LowEntropy", Annotation::VEP(_) => "VEP", }; write!(f, "{}", str) } } impl FromStr for Annotation { type Err = anyhow::Error; fn from_str(s: &str) -> anyhow::Result { match s { "SoloTumor" => Ok(Annotation::SoloTumor), "SoloConstit" => Ok(Annotation::SoloConstit), "DeepVariant" => Ok(Annotation::Callers(Caller::DeepVariant)), "ClairS" => Ok(Annotation::Callers(Caller::ClairS)), "Germline" => Ok(Annotation::Germline), "Somatic" => Ok(Annotation::Somatic), s if s.starts_with("ShannonEntropy") => Ok(Annotation::ShannonEntropy(0.0)), s if s.starts_with("ConstitDepth") => Ok(Annotation::ConstitDepth(0)), s if s.starts_with("ConstitAlt") => Ok(Annotation::ConstitAlt(0)), "LowConstitDepth" => Ok(Annotation::LowConstitDepth), "HighConstitAlt" => Ok(Annotation::HighConstitAlt), _ => Err(anyhow::anyhow!("Unknown Annotation: {}", s)), } } } #[derive(Debug, Clone, PartialEq, Eq)] pub enum Caller { DeepVariant, ClairS, NanomonSV, NanomonSVSolo, Savana, Severus, } impl fmt::Display for Caller { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Caller::DeepVariant => write!(f, "DeepVariant"), Caller::ClairS => write!(f, "ClairS"), Caller::NanomonSV => write!(f, "NanomonSV"), Caller::NanomonSVSolo => write!(f, "NanomonSV-solo"), Caller::Savana => write!(f, "Savana"), Caller::Severus => write!(f, "Severus"), } } } #[derive(Debug, Default, Clone)] pub struct Annotations { pub store: DashMap>, } #[derive(Debug, Default, Clone)] pub struct AnnotationsStats { pub categorical: DashMap, pub numeric: DashMap>>, } impl Annotations { pub fn insert_update(&self, key: u128, add: &[Annotation]) { self.store .entry(key) .or_default() .extend(add.iter().cloned()) } pub fn callers_stat(&self) -> AnnotationsStats { let map: DashMap = DashMap::new(); let num_maps: DashMap>> = DashMap::new(); self.store.par_iter().for_each(|e| { let anns = e.value(); let mut categorical = Vec::new(); let mut numerical = Vec::new(); for ann in anns { match ann { Annotation::SoloTumor | Annotation::SoloConstit | Annotation::Germline | Annotation::Somatic | Annotation::LowConstitDepth | Annotation::LowEntropy | Annotation::GnomAD(_) | Annotation::VEP(_) | Annotation::HighConstitAlt => categorical.push(ann.to_string()), Annotation::Callers(caller) => categorical.push(caller.to_string()), Annotation::ShannonEntropy(v) => numerical.push((ann.to_string(), *v)), Annotation::ConstitDepth(v) | Annotation::ConstitAlt(v) => { numerical.push((ann.to_string(), *v as f64)); } Annotation::Cosmic(c) => numerical.push((ann.to_string(), c.cosmic_cnt as f64)), } } categorical.sort(); categorical.dedup(); let k = categorical.join(" + "); *map.entry(k.clone()).or_default() += 1; for (k_num, v_num) in numerical { num_maps .entry(k.clone()) .or_default() .entry(k_num) .or_default() .push(v_num); } }); println!("\nCallers stats:"); println!("\tcategories: {}", map.len()); let mut n = 0; map.iter().for_each(|e| { let k = e.key(); let v = e.value(); n += v; let mut num_str = Vec::new(); if let Some(nums) = num_maps.get(k) { num_str.extend( nums.iter() .map(|(k_n, v_n)| format!("{k_n} {:.2}", mean(v_n))), ) } num_str.sort(); println!("\t{k}\t{v}\t{}", num_str.join("\t")); }); println!("Total\t{n}"); AnnotationsStats { categorical: map, numeric: num_maps, } } pub fn get_keys_filter( &self, filter: impl Fn(&Vec) -> bool + Send + Sync, ) -> Vec { self.store .par_iter() .filter(|entry| filter(entry.value())) .map(|entry| *entry.key()) .collect() } pub fn retain_variants( &mut self, variants: &mut Vec, filter: impl Fn(&Vec) -> bool + Send + Sync, ) -> usize { info!("Variant Keys lookup"); let mut keys = HashSet::new(); self.store.retain(|key, value| { if filter(value) { keys.insert(*key); true } else { false } }); // let keys: Vec = self // .store // .par_iter() // .filter(|entry| filter(entry.value())) // .map(|entry| *entry.key()) // .collect(); info!("{} unique Variants to keep", keys.len()); // info!("Removing annotations"); // self.store.retain(|key, _| keys.contains(key)); info!("Removing variants from collections"); let n_removed: usize = variants .par_iter_mut() .map(|c| { let before = c.variants.len(); c.variants = c .variants .par_iter() .filter(|a| keys.contains(&a.hash_variant())) // .filter(|a| keys.par_iter().any(|k| k == &a.hash_variant())) .cloned() .collect(); // c.variants // .retain(|a| keys.par_iter().any(|k| k == &a.hash_variant())); let after = c.variants.len(); info!("{} {}\t{}/{}", c.caller, c.category, before - after, before); before - after }) .sum(); variants.retain(|e| !e.variants.is_empty()); info!("{n_removed} variants removed from collections."); n_removed } pub fn retain_keys(&mut self, keys_to_keep: &HashSet) { self.store.retain(|key, _| keys_to_keep.contains(key)); } pub fn remove_keys(&mut self, keys_to_remove: &HashSet) { self.store.retain(|key, _| !keys_to_remove.contains(key)); } pub fn solo_constit_boundaries(&self, max_alt_constit: u16, min_constit_depth: u16) { self.store .iter_mut() .filter(|anns| { let contains = anns .iter() .any(|item| matches!(item, Annotation::SoloTumor)); let contains_not = anns.iter().all(|item| !matches!(item, Annotation::Somatic)); contains && contains_not }) .for_each(|mut e| { let v = e.value_mut(); let mut to_add = Vec::new(); v.iter().for_each(|ann| match ann { Annotation::ConstitDepth(v) => { if *v < min_constit_depth { to_add.push(Annotation::LowConstitDepth); } } Annotation::ConstitAlt(v) => { if *v > max_alt_constit { to_add.push(Annotation::HighConstitAlt); } } _ => (), }); v.extend(to_add); }); } pub fn count_annotations(&self, annotation_types: Vec) -> Vec { let annotation_types = Arc::new(annotation_types); self.store .par_iter() .fold( || vec![0; annotation_types.len()], |mut counts, r| { let annotations = r.value(); for (index, annotation_type) in annotation_types.iter().enumerate() { counts[index] += annotations.iter().filter(|a| *a == annotation_type).count(); } counts }, ) .reduce( || vec![0; annotation_types.len()], |mut a, b| { for i in 0..a.len() { a[i] += b[i]; } a }, ) } pub fn low_shannon_entropy(&mut self, min_shannon_entropy: f64) { self.store.iter_mut().for_each(|mut e| { let anns = e.value_mut(); let mut is_low = false; anns.iter().for_each(|ann| { if let Annotation::ShannonEntropy(ent) = ann { if *ent < min_shannon_entropy && !anns.contains(&Annotation::Somatic) { is_low = true } } }); if is_low { anns.push(Annotation::LowEntropy); } }); } } pub trait CallerCat { fn caller_cat(&self) -> (Caller, Annotation); }