mod.rs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. pub mod cosmic;
  2. pub mod echtvar;
  3. pub mod gnomad;
  4. pub mod ncbi;
  5. pub mod vep;
  6. use std::{
  7. collections::{HashMap, HashSet},
  8. fmt,
  9. str::FromStr,
  10. sync::Arc,
  11. };
  12. use crate::{helpers::mean, variant::variant_collection::VariantCollection};
  13. use cosmic::Cosmic;
  14. use dashmap::DashMap;
  15. use gnomad::GnomAD;
  16. use log::info;
  17. use rayon::prelude::*;
  18. use vep::VEP;
  19. #[derive(Debug, Clone, PartialEq)]
  20. pub enum Annotation {
  21. SoloTumor,
  22. SoloConstit,
  23. Callers(Caller),
  24. Germline,
  25. Somatic,
  26. ShannonEntropy(f64),
  27. ConstitDepth(u16),
  28. ConstitAlt(u16),
  29. LowConstitDepth,
  30. HighConstitAlt,
  31. Cosmic(Cosmic),
  32. GnomAD(GnomAD),
  33. LowEntropy,
  34. VEP(Vec<VEP>),
  35. }
  36. impl fmt::Display for Annotation {
  37. fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  38. let str = match self {
  39. Annotation::SoloTumor => "SoloTumor",
  40. Annotation::SoloConstit => "SoloConstit",
  41. Annotation::Callers(caller) => &caller.to_string(),
  42. Annotation::Germline => "Germline",
  43. Annotation::Somatic => "Somatic",
  44. Annotation::ShannonEntropy(_) => "ShannonEntropy",
  45. Annotation::ConstitDepth(_) => "ConstitDepth",
  46. Annotation::ConstitAlt(_) => "ConstitAlt",
  47. Annotation::LowConstitDepth => "LowConstitDepth",
  48. Annotation::HighConstitAlt => "HighConstitAlt",
  49. Annotation::Cosmic(_) => "Cosmic",
  50. Annotation::GnomAD(_) => "GnomAD",
  51. Annotation::LowEntropy => "LowEntropy",
  52. Annotation::VEP(_) => "VEP",
  53. };
  54. write!(f, "{}", str)
  55. }
  56. }
  57. impl FromStr for Annotation {
  58. type Err = anyhow::Error;
  59. fn from_str(s: &str) -> anyhow::Result<Self> {
  60. match s {
  61. "SoloTumor" => Ok(Annotation::SoloTumor),
  62. "SoloConstit" => Ok(Annotation::SoloConstit),
  63. "DeepVariant" => Ok(Annotation::Callers(Caller::DeepVariant)),
  64. "ClairS" => Ok(Annotation::Callers(Caller::ClairS)),
  65. "Germline" => Ok(Annotation::Germline),
  66. "Somatic" => Ok(Annotation::Somatic),
  67. s if s.starts_with("ShannonEntropy") => Ok(Annotation::ShannonEntropy(0.0)),
  68. s if s.starts_with("ConstitDepth") => Ok(Annotation::ConstitDepth(0)),
  69. s if s.starts_with("ConstitAlt") => Ok(Annotation::ConstitAlt(0)),
  70. "LowConstitDepth" => Ok(Annotation::LowConstitDepth),
  71. "HighConstitAlt" => Ok(Annotation::HighConstitAlt),
  72. _ => Err(anyhow::anyhow!("Unknown Annotation: {}", s)),
  73. }
  74. }
  75. }
  76. #[derive(Debug, Clone, PartialEq, Eq)]
  77. pub enum Caller {
  78. DeepVariant,
  79. ClairS,
  80. NanomonSV,
  81. NanomonSVSolo,
  82. Savana,
  83. Severus,
  84. }
  85. impl fmt::Display for Caller {
  86. fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  87. match self {
  88. Caller::DeepVariant => write!(f, "DeepVariant"),
  89. Caller::ClairS => write!(f, "ClairS"),
  90. Caller::NanomonSV => write!(f, "NanomonSV"),
  91. Caller::NanomonSVSolo => write!(f, "NanomonSV-solo"),
  92. Caller::Savana => write!(f, "Savana"),
  93. Caller::Severus => write!(f, "Severus"),
  94. }
  95. }
  96. }
  97. #[derive(Debug, Default, Clone)]
  98. pub struct Annotations {
  99. pub store: DashMap<u128, Vec<Annotation>>,
  100. }
  101. #[derive(Debug, Default, Clone)]
  102. pub struct AnnotationsStats {
  103. pub categorical: DashMap<String, u64>,
  104. pub numeric: DashMap<String, HashMap<String, Vec<f64>>>,
  105. }
  106. impl Annotations {
  107. pub fn insert_update(&self, key: u128, add: &[Annotation]) {
  108. self.store
  109. .entry(key)
  110. .or_default()
  111. .extend(add.iter().cloned())
  112. }
  113. pub fn callers_stat(&self) -> AnnotationsStats {
  114. let map: DashMap<String, u64> = DashMap::new();
  115. let num_maps: DashMap<String, HashMap<String, Vec<f64>>> = DashMap::new();
  116. self.store.par_iter().for_each(|e| {
  117. let anns = e.value();
  118. let mut categorical = Vec::new();
  119. let mut numerical = Vec::new();
  120. for ann in anns {
  121. match ann {
  122. Annotation::SoloTumor
  123. | Annotation::SoloConstit
  124. | Annotation::Germline
  125. | Annotation::Somatic
  126. | Annotation::LowConstitDepth
  127. | Annotation::LowEntropy
  128. | Annotation::GnomAD(_)
  129. | Annotation::VEP(_)
  130. | Annotation::HighConstitAlt => categorical.push(ann.to_string()),
  131. Annotation::Callers(caller) => categorical.push(caller.to_string()),
  132. Annotation::ShannonEntropy(v) => numerical.push((ann.to_string(), *v)),
  133. Annotation::ConstitDepth(v) | Annotation::ConstitAlt(v) => {
  134. numerical.push((ann.to_string(), *v as f64));
  135. }
  136. Annotation::Cosmic(c) => numerical.push((ann.to_string(), c.cosmic_cnt as f64)),
  137. }
  138. }
  139. categorical.sort();
  140. categorical.dedup();
  141. let k = categorical.join(" + ");
  142. *map.entry(k.clone()).or_default() += 1;
  143. for (k_num, v_num) in numerical {
  144. num_maps
  145. .entry(k.clone())
  146. .or_default()
  147. .entry(k_num)
  148. .or_default()
  149. .push(v_num);
  150. }
  151. });
  152. println!("\nCallers stats:");
  153. println!("\tcategories: {}", map.len());
  154. let mut n = 0;
  155. map.iter().for_each(|e| {
  156. let k = e.key();
  157. let v = e.value();
  158. n += v;
  159. let mut num_str = Vec::new();
  160. if let Some(nums) = num_maps.get(k) {
  161. num_str.extend(
  162. nums.iter()
  163. .map(|(k_n, v_n)| format!("{k_n} {:.2}", mean(v_n))),
  164. )
  165. }
  166. num_str.sort();
  167. println!("\t{k}\t{v}\t{}", num_str.join("\t"));
  168. });
  169. println!("Total\t{n}");
  170. AnnotationsStats {
  171. categorical: map,
  172. numeric: num_maps,
  173. }
  174. }
  175. pub fn get_keys_filter(
  176. &self,
  177. filter: impl Fn(&Vec<Annotation>) -> bool + Send + Sync,
  178. ) -> Vec<u128> {
  179. self.store
  180. .par_iter()
  181. .filter(|entry| filter(entry.value()))
  182. .map(|entry| *entry.key())
  183. .collect()
  184. }
  185. pub fn retain_variants(
  186. &mut self,
  187. variants: &mut Vec<VariantCollection>,
  188. filter: impl Fn(&Vec<Annotation>) -> bool + Send + Sync,
  189. ) -> usize {
  190. info!("Variant Keys lookup");
  191. let mut keys = HashSet::new();
  192. self.store.retain(|key, value| {
  193. if filter(value) {
  194. keys.insert(*key);
  195. true
  196. } else {
  197. false
  198. }
  199. });
  200. // let keys: Vec<u128> = self
  201. // .store
  202. // .par_iter()
  203. // .filter(|entry| filter(entry.value()))
  204. // .map(|entry| *entry.key())
  205. // .collect();
  206. info!("{} unique Variants to keep", keys.len());
  207. // info!("Removing annotations");
  208. // self.store.retain(|key, _| keys.contains(key));
  209. info!("Removing variants from collections");
  210. let n_removed: usize = variants
  211. .par_iter_mut()
  212. .map(|c| {
  213. let before = c.variants.len();
  214. c.variants = c
  215. .variants
  216. .par_iter()
  217. .filter(|a| keys.contains(&a.hash_variant()))
  218. // .filter(|a| keys.par_iter().any(|k| k == &a.hash_variant()))
  219. .cloned()
  220. .collect();
  221. // c.variants
  222. // .retain(|a| keys.par_iter().any(|k| k == &a.hash_variant()));
  223. let after = c.variants.len();
  224. info!("{} {}\t{}/{}", c.caller, c.category, before - after, before);
  225. before - after
  226. })
  227. .sum();
  228. variants.retain(|e| !e.variants.is_empty());
  229. info!("{n_removed} variants removed from collections.");
  230. n_removed
  231. }
  232. pub fn retain_keys(&mut self, keys_to_keep: &HashSet<u128>) {
  233. self.store.retain(|key, _| keys_to_keep.contains(key));
  234. }
  235. pub fn remove_keys(&mut self, keys_to_remove: &HashSet<u128>) {
  236. self.store.retain(|key, _| !keys_to_remove.contains(key));
  237. }
  238. pub fn solo_constit_boundaries(&self, max_alt_constit: u16, min_constit_depth: u16) {
  239. self.store
  240. .iter_mut()
  241. .filter(|anns| {
  242. let contains = anns
  243. .iter()
  244. .any(|item| matches!(item, Annotation::SoloTumor));
  245. let contains_not = anns.iter().all(|item| !matches!(item, Annotation::Somatic));
  246. contains && contains_not
  247. })
  248. .for_each(|mut e| {
  249. let v = e.value_mut();
  250. let mut to_add = Vec::new();
  251. v.iter().for_each(|ann| match ann {
  252. Annotation::ConstitDepth(v) => {
  253. if *v < min_constit_depth {
  254. to_add.push(Annotation::LowConstitDepth);
  255. }
  256. }
  257. Annotation::ConstitAlt(v) => {
  258. if *v > max_alt_constit {
  259. to_add.push(Annotation::HighConstitAlt);
  260. }
  261. }
  262. _ => (),
  263. });
  264. v.extend(to_add);
  265. });
  266. }
  267. pub fn count_annotations(&self, annotation_types: Vec<Annotation>) -> Vec<usize> {
  268. let annotation_types = Arc::new(annotation_types);
  269. self.store
  270. .par_iter()
  271. .fold(
  272. || vec![0; annotation_types.len()],
  273. |mut counts, r| {
  274. let annotations = r.value();
  275. for (index, annotation_type) in annotation_types.iter().enumerate() {
  276. counts[index] +=
  277. annotations.iter().filter(|a| *a == annotation_type).count();
  278. }
  279. counts
  280. },
  281. )
  282. .reduce(
  283. || vec![0; annotation_types.len()],
  284. |mut a, b| {
  285. for i in 0..a.len() {
  286. a[i] += b[i];
  287. }
  288. a
  289. },
  290. )
  291. }
  292. pub fn low_shannon_entropy(&mut self, min_shannon_entropy: f64) {
  293. self.store.iter_mut().for_each(|mut e| {
  294. let anns = e.value_mut();
  295. let mut is_low = false;
  296. anns.iter().for_each(|ann| {
  297. if let Annotation::ShannonEntropy(ent) = ann {
  298. if *ent < min_shannon_entropy && !anns.contains(&Annotation::Somatic) {
  299. is_low = true
  300. }
  301. }
  302. });
  303. if is_low {
  304. anns.push(Annotation::LowEntropy);
  305. }
  306. });
  307. }
  308. }
  309. pub trait CallerCat {
  310. fn caller_cat(&self) -> (Caller, Annotation);
  311. }