|
@@ -5,7 +5,15 @@ pub mod ncbi;
|
|
|
pub mod vep;
|
|
pub mod vep;
|
|
|
|
|
|
|
|
use std::{
|
|
use std::{
|
|
|
- collections::{HashMap, HashSet}, fmt, fs::File, io::{Read, Write}, str::FromStr, sync::Arc
|
|
|
|
|
|
|
+ collections::{HashMap, HashSet},
|
|
|
|
|
+ fmt,
|
|
|
|
|
+ fs::File,
|
|
|
|
|
+ io::{Read, Write},
|
|
|
|
|
+ str::FromStr,
|
|
|
|
|
+ sync::{
|
|
|
|
|
+ atomic::{AtomicU32, Ordering},
|
|
|
|
|
+ Arc,
|
|
|
|
|
+ },
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
use crate::{
|
|
use crate::{
|
|
@@ -253,7 +261,7 @@ impl Annotations {
|
|
|
.push(v_num);
|
|
.push(v_num);
|
|
|
}
|
|
}
|
|
|
});
|
|
});
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
println!("\nCallers stats:");
|
|
println!("\nCallers stats:");
|
|
|
println!("\tn categories: {}", map.len());
|
|
println!("\tn categories: {}", map.len());
|
|
|
let mut n = 0;
|
|
let mut n = 0;
|
|
@@ -284,45 +292,60 @@ impl Annotations {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- pub fn vep_stats(&self) -> anyhow::Result<()> {
|
|
|
|
|
- let (n_coding, n_syn, total) = self
|
|
|
|
|
- .store
|
|
|
|
|
- .par_iter()
|
|
|
|
|
- .map(|v| {
|
|
|
|
|
- v.value()
|
|
|
|
|
- .iter()
|
|
|
|
|
- .find_map(|ann| {
|
|
|
|
|
- if let Annotation::VEP(veps) = ann {
|
|
|
|
|
- get_best_vep(veps).ok()
|
|
|
|
|
- } else {
|
|
|
|
|
- None
|
|
|
|
|
- }
|
|
|
|
|
- })
|
|
|
|
|
- .and_then(|vep| vep.consequence.clone())
|
|
|
|
|
- .map_or((0, 0, 0), |consequences| {
|
|
|
|
|
- let impact = consequences
|
|
|
|
|
- .iter()
|
|
|
|
|
- .map(VepImpact::from)
|
|
|
|
|
- .min()
|
|
|
|
|
- .unwrap_or(VepImpact::MODIFIER);
|
|
|
|
|
- match impact {
|
|
|
|
|
- VepImpact::HIGH | VepImpact::MODERATE => (1, 0, 1),
|
|
|
|
|
- _ => {
|
|
|
|
|
- if consequences.contains(&VepConsequence::SynonymousVariant) {
|
|
|
|
|
- (0, 1, 1)
|
|
|
|
|
- } else {
|
|
|
|
|
- (0, 0, 1)
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
- })
|
|
|
|
|
- })
|
|
|
|
|
- .reduce(|| (0, 0, 0), |a, b| (a.0 + b.0, a.1 + b.1, a.2 + b.2));
|
|
|
|
|
|
|
+ pub fn vep_stats(&self) -> anyhow::Result<VepStats> {
|
|
|
|
|
+ let genes: DashMap<String, u32> = DashMap::new();
|
|
|
|
|
+ let genes_distant: DashMap<String, u32> = DashMap::new();
|
|
|
|
|
+ let features: DashMap<String, u32> = DashMap::new();
|
|
|
|
|
+ let consequences: DashMap<String, u32> = DashMap::new();
|
|
|
|
|
+ let impact: DashMap<String, u32> = DashMap::new();
|
|
|
|
|
+ let n_all = Arc::new(AtomicU32::new(0));
|
|
|
|
|
+ let n_vep = Arc::new(AtomicU32::new(0));
|
|
|
|
|
|
|
|
- println!(
|
|
|
|
|
- "VEP annotations\n\t- Coding: {n_coding}\n\t- Synonymous: {n_syn}\nTotal: {total}"
|
|
|
|
|
- );
|
|
|
|
|
- Ok(())
|
|
|
|
|
|
|
+ self.store.par_iter().for_each(|e| {
|
|
|
|
|
+ n_all.fetch_add(1, Ordering::SeqCst);
|
|
|
|
|
+
|
|
|
|
|
+ let best_vep_opt = e.value().iter().find_map(|a| {
|
|
|
|
|
+ if let Annotation::VEP(veps) = a {
|
|
|
|
|
+ get_best_vep(veps).ok()
|
|
|
|
|
+ } else {
|
|
|
|
|
+ None
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ if let Some(best_vep) = best_vep_opt {
|
|
|
|
|
+ n_vep.fetch_add(1, Ordering::SeqCst);
|
|
|
|
|
+ if let Some(gene) = best_vep.gene {
|
|
|
|
|
+ if best_vep.extra.distance.is_some() {
|
|
|
|
|
+ *genes_distant.entry(gene).or_default() += 1;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ *genes.entry(gene).or_default() += 1;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ if let Some(feature) = best_vep.feature {
|
|
|
|
|
+ *features.entry(feature).or_default() += 1;
|
|
|
|
|
+ }
|
|
|
|
|
+ if let Some(cs) = best_vep.consequence {
|
|
|
|
|
+ let mut cs = cs.into_iter().map(String::from).collect::<Vec<String>>();
|
|
|
|
|
+ cs.sort();
|
|
|
|
|
+ *consequences.entry(cs.join(" ")).or_default() += 1;
|
|
|
|
|
+ }
|
|
|
|
|
+ if let Some(imp) = best_vep.extra.impact {
|
|
|
|
|
+ *impact.entry(String::from(imp)).or_default() += 1;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ let vep_stats = VepStats {
|
|
|
|
|
+ genes: genes.into_iter().collect(),
|
|
|
|
|
+ genes_distant: genes_distant.into_iter().collect(),
|
|
|
|
|
+ features: features.into_iter().collect(),
|
|
|
|
|
+ consequences: consequences.into_iter().collect(),
|
|
|
|
|
+ impact: impact.into_iter().collect(),
|
|
|
|
|
+ n_all: n_all.load(Ordering::SeqCst),
|
|
|
|
|
+ n_vep: n_vep.load(Ordering::SeqCst),
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ Ok(vep_stats)
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
pub fn get_keys_filter(
|
|
pub fn get_keys_filter(
|
|
@@ -358,12 +381,7 @@ impl Annotations {
|
|
|
let before = c.variants.len();
|
|
let before = c.variants.len();
|
|
|
c.variants.retain(|a| keys.contains(&a.hash()));
|
|
c.variants.retain(|a| keys.contains(&a.hash()));
|
|
|
let after = c.variants.len();
|
|
let after = c.variants.len();
|
|
|
- info!(
|
|
|
|
|
- "\t- {}\t{}/{}",
|
|
|
|
|
- c.caller,
|
|
|
|
|
- before - after,
|
|
|
|
|
- before
|
|
|
|
|
- );
|
|
|
|
|
|
|
+ info!("\t- {}\t{}/{}", c.caller, before - after, before);
|
|
|
before - after
|
|
before - after
|
|
|
})
|
|
})
|
|
|
.sum();
|
|
.sum();
|
|
@@ -468,13 +486,42 @@ impl Annotations {
|
|
|
&& !anns
|
|
&& !anns
|
|
|
.iter()
|
|
.iter()
|
|
|
.any(|a| matches!(a, Annotation::Callers(_, Sample::Somatic)))
|
|
.any(|a| matches!(a, Annotation::Callers(_, Sample::Somatic)))
|
|
|
- }) && !anns.contains(&Annotation::LowEntropy) {
|
|
|
|
|
|
|
+ }) && !anns.contains(&Annotation::LowEntropy)
|
|
|
|
|
+ {
|
|
|
anns.push(Annotation::LowEntropy);
|
|
anns.push(Annotation::LowEntropy);
|
|
|
}
|
|
}
|
|
|
});
|
|
});
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+#[derive(Debug, Serialize, Deserialize)]
|
|
|
|
|
+pub struct VepStats {
|
|
|
|
|
+ pub genes: Vec<(String, u32)>,
|
|
|
|
|
+ pub genes_distant: Vec<(String, u32)>,
|
|
|
|
|
+ pub features: Vec<(String, u32)>,
|
|
|
|
|
+ pub consequences: Vec<(String, u32)>,
|
|
|
|
|
+ pub impact: Vec<(String, u32)>,
|
|
|
|
|
+ pub n_all: u32,
|
|
|
|
|
+ pub n_vep: u32,
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+impl VepStats {
|
|
|
|
|
+ pub fn save_to_json(&self, file_path: &str) -> anyhow::Result<()> {
|
|
|
|
|
+ let json = serde_json::to_string_pretty(self)?;
|
|
|
|
|
+ let mut file = File::create(file_path)?;
|
|
|
|
|
+ file.write_all(json.as_bytes())?;
|
|
|
|
|
+ Ok(())
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ pub fn load_from_json(file_path: &str) -> anyhow::Result<Self> {
|
|
|
|
|
+ let mut file = File::open(file_path)?;
|
|
|
|
|
+ let mut contents = String::new();
|
|
|
|
|
+ file.read_to_string(&mut contents)?;
|
|
|
|
|
+ let stats: Self = serde_json::from_str(&contents)?;
|
|
|
|
|
+ Ok(stats)
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
pub trait CallerCat {
|
|
pub trait CallerCat {
|
|
|
fn caller_cat(&self) -> Annotation;
|
|
fn caller_cat(&self) -> Annotation;
|
|
|
}
|
|
}
|