mod.rs 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. use std::{
  2. collections::{HashMap, HashSet},
  3. fmt,
  4. };
  5. use crate::helpers::mean;
  6. use dashmap::DashMap;
  7. use rayon::prelude::*;
  8. #[derive(Debug, Clone, PartialEq)]
  9. pub enum Annotation {
  10. SoloDiag,
  11. SoloConstit,
  12. Callers(Caller),
  13. Germline,
  14. Somatic,
  15. ShannonEntropy(f64),
  16. ConstitDepth(u16),
  17. ConstitAlt(u16),
  18. LowConstitDepth,
  19. HighConstitAlt,
  20. }
  21. impl fmt::Display for Annotation {
  22. fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  23. let str = match self {
  24. Annotation::SoloDiag => "SoloDiag",
  25. Annotation::SoloConstit => "SoloConstit",
  26. Annotation::Callers(caller) => &caller.to_string(),
  27. Annotation::Germline => "Germline",
  28. Annotation::Somatic => "Somatic",
  29. Annotation::ShannonEntropy(_) => "ShannonEntropy",
  30. Annotation::ConstitDepth(_) => "ConstitDepth",
  31. Annotation::ConstitAlt(_) => "ConstitAlt",
  32. Annotation::LowConstitDepth => "LowConstitDepth",
  33. Annotation::HighConstitAlt => "HighConstitAlt",
  34. };
  35. write!(f, "{}", str)
  36. }
  37. }
  38. #[derive(Debug, Clone, PartialEq, Eq)]
  39. pub enum Caller {
  40. DeepVariant,
  41. ClairS,
  42. }
  43. impl fmt::Display for Caller {
  44. fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  45. match self {
  46. Caller::DeepVariant => write!(f, "DeepVariant"),
  47. Caller::ClairS => write!(f, "ClairS"),
  48. }
  49. }
  50. }
  51. #[derive(Debug, Default, Clone)]
  52. pub struct Annotations {
  53. pub store: DashMap<u128, Vec<Annotation>>,
  54. }
  55. impl Annotations {
  56. pub fn insert_update(&self, key: u128, add: &[Annotation]) {
  57. self.store
  58. .entry(key)
  59. .or_default()
  60. .extend(add.iter().cloned())
  61. }
  62. pub fn callers_stat(&self) {
  63. let map: DashMap<String, u64> = DashMap::new();
  64. let num_maps: DashMap<String, HashMap<String, Vec<f64>>> = DashMap::new();
  65. self.store.par_iter().for_each(|e| {
  66. let anns = e.value();
  67. let mut categorical = Vec::new();
  68. let mut numerical = Vec::new();
  69. for ann in anns {
  70. match ann {
  71. Annotation::SoloDiag
  72. | Annotation::SoloConstit
  73. | Annotation::Germline
  74. | Annotation::Somatic
  75. | Annotation::LowConstitDepth
  76. | Annotation::HighConstitAlt => categorical.push(ann.to_string()),
  77. Annotation::Callers(caller) => categorical.push(caller.to_string()),
  78. Annotation::ShannonEntropy(v) => numerical.push((ann.to_string(), *v)),
  79. Annotation::ConstitDepth(v) | Annotation::ConstitAlt(v) => {
  80. numerical.push((ann.to_string(), *v as f64));
  81. }
  82. }
  83. }
  84. categorical.sort();
  85. categorical.dedup();
  86. let k = categorical.join(" + ");
  87. *map.entry(k.clone()).or_default() += 1;
  88. for (k_num, v_num) in numerical {
  89. num_maps
  90. .entry(k.clone())
  91. .or_default()
  92. .entry(k_num)
  93. .or_default()
  94. .push(v_num);
  95. }
  96. });
  97. println!("\nCallers stats:");
  98. println!("\tcategories: {}", map.len());
  99. let mut n = 0;
  100. map.iter().for_each(|e| {
  101. let k = e.key();
  102. let v = e.value();
  103. n += v;
  104. let mut num_str = Vec::new();
  105. if let Some(nums) = num_maps.get(k) {
  106. num_str.extend(
  107. nums.iter()
  108. .map(|(k_n, v_n)| format!("{k_n} {:.2}", mean(v_n))),
  109. )
  110. }
  111. num_str.sort();
  112. println!("\t{k}\t{v}\t{}", num_str.join("\t"));
  113. });
  114. println!("Total\t{n}");
  115. }
  116. pub fn get_keys_filter(
  117. &self,
  118. filter: impl Fn(&Vec<Annotation>) -> bool + Send + Sync,
  119. ) -> Vec<u128> {
  120. self.store
  121. .par_iter()
  122. .filter(|entry| filter(entry.value()))
  123. .map(|entry| *entry.key())
  124. .collect()
  125. }
  126. pub fn retain_keys(&mut self, keys_to_keep: &HashSet<u128>) {
  127. self.store.retain(|key, _| keys_to_keep.contains(key));
  128. }
  129. pub fn solo_constit_boundaries(&self, max_alt_constit: u16, min_constit_depth: u16) {
  130. self.store
  131. .iter_mut()
  132. .filter(|anns| {
  133. let contains = anns.iter().any(|item| matches!(item, Annotation::SoloDiag));
  134. let contains_not = anns.iter().all(|item| !matches!(item, Annotation::Somatic));
  135. contains && contains_not
  136. })
  137. .for_each(|mut e| {
  138. let v = e.value_mut();
  139. let mut to_add = Vec::new();
  140. v.iter().for_each(|ann| match ann {
  141. Annotation::ConstitDepth(v) => {
  142. if *v < min_constit_depth {
  143. to_add.push(Annotation::LowConstitDepth);
  144. }
  145. }
  146. Annotation::ConstitAlt(v) => {
  147. if *v > max_alt_constit {
  148. to_add.push(Annotation::HighConstitAlt);
  149. }
  150. },
  151. _ => (),
  152. });
  153. v.extend(to_add);
  154. });
  155. }
  156. }