|
|
@@ -1,8 +1,15 @@
|
|
|
-use log::{info, kv::Source};
|
|
|
-use std::{collections::HashMap, fs::File, sync::Arc};
|
|
|
+use itertools::Itertools;
|
|
|
+use log::info;
|
|
|
+use std::{
|
|
|
+ collections::HashMap,
|
|
|
+ fs::{self, File},
|
|
|
+ io::Write,
|
|
|
+ path::Path,
|
|
|
+ sync::Arc,
|
|
|
+};
|
|
|
|
|
|
use crate::{
|
|
|
- annotation::{Annotation, Annotations, AnnotationsStats, Caller},
|
|
|
+ annotation::{Annotation, Annotations, AnnotationsStats, Sample},
|
|
|
callers::{
|
|
|
clairs::ClairS, deep_somatic::DeepSomatic, deep_variant::DeepVariant, nanomonsv::NanomonSV,
|
|
|
savana::Savana, severus::Severus,
|
|
|
@@ -46,27 +53,27 @@ pub struct SomaticStats {
|
|
|
|
|
|
#[derive(Debug, Default, Clone)]
|
|
|
pub struct InputStats {
|
|
|
- pub solo_tumor: Vec<(Caller, usize)>,
|
|
|
- pub solo_constit: Vec<(Caller, usize)>,
|
|
|
- pub germline: Vec<(Caller, usize)>,
|
|
|
- pub somatic: Vec<(Caller, usize)>,
|
|
|
+ pub solo_tumor: Vec<(Annotation, usize)>,
|
|
|
+ pub solo_constit: Vec<(Annotation, usize)>,
|
|
|
+ pub germline: Vec<(Annotation, usize)>,
|
|
|
+ pub somatic: Vec<(Annotation, usize)>,
|
|
|
}
|
|
|
|
|
|
impl InputStats {
|
|
|
pub fn from_collections(collections: &[VariantCollection]) -> Self {
|
|
|
let mut stats = Self::default();
|
|
|
for collection in collections.iter() {
|
|
|
- match collection.category {
|
|
|
- Annotation::SoloTumor => stats
|
|
|
+ match collection.caller {
|
|
|
+ Annotation::Callers(_, Sample::SoloTumor) => stats
|
|
|
.solo_tumor
|
|
|
.push((collection.caller.clone(), collection.variants.len())),
|
|
|
- Annotation::SoloConstit => stats
|
|
|
+ Annotation::Callers(_, Sample::SoloConstit) => stats
|
|
|
.solo_constit
|
|
|
.push((collection.caller.clone(), collection.variants.len())),
|
|
|
- Annotation::Germline => stats
|
|
|
+ Annotation::Callers(_, Sample::Germline) => stats
|
|
|
.germline
|
|
|
.push((collection.caller.clone(), collection.variants.len())),
|
|
|
- Annotation::Somatic => stats
|
|
|
+ Annotation::Callers(_, Sample::Somatic) => stats
|
|
|
.somatic
|
|
|
.push((collection.caller.clone(), collection.variants.len())),
|
|
|
_ => (),
|
|
|
@@ -83,7 +90,8 @@ impl SomaticStats {
|
|
|
..Default::default()
|
|
|
}
|
|
|
}
|
|
|
- pub fn annot_init(&self, stats: AnnotationsStats) {
|
|
|
+
|
|
|
+ pub fn annot_init(&self, stats: &AnnotationsStats, json_path: &str) -> anyhow::Result<()> {
|
|
|
let stats: Vec<(Vec<Annotation>, u64)> = stats
|
|
|
.categorical
|
|
|
.iter()
|
|
|
@@ -102,12 +110,12 @@ impl SomaticStats {
|
|
|
self.input
|
|
|
.somatic
|
|
|
.iter()
|
|
|
- .map(|(caller, _)| vec![Annotation::Callers(caller.clone()), Annotation::Somatic])
|
|
|
- .collect::<Vec<Vec<Annotation>>>(),
|
|
|
+ .map(|(caller, _)| caller.clone())
|
|
|
+ .collect::<Vec<Annotation>>(),
|
|
|
self.input
|
|
|
.solo_tumor
|
|
|
.iter()
|
|
|
- .map(|(caller, _)| vec![Annotation::Callers(caller.clone()), Annotation::SoloTumor])
|
|
|
+ .map(|(caller, _)| caller.clone())
|
|
|
.collect(),
|
|
|
]
|
|
|
.concat();
|
|
|
@@ -116,31 +124,30 @@ impl SomaticStats {
|
|
|
self.input
|
|
|
.germline
|
|
|
.iter()
|
|
|
- .map(|(caller, _)| vec![Annotation::Callers(caller.clone()), Annotation::Germline])
|
|
|
- .collect::<Vec<Vec<Annotation>>>(),
|
|
|
+ .map(|(caller, _)| caller.clone())
|
|
|
+ .collect::<Vec<Annotation>>(),
|
|
|
self.input
|
|
|
.solo_constit
|
|
|
.iter()
|
|
|
- .map(|(caller, _)| {
|
|
|
- vec![Annotation::Callers(caller.clone()), Annotation::SoloConstit]
|
|
|
- })
|
|
|
+ .map(|(caller, _)| caller.clone())
|
|
|
.collect(),
|
|
|
]
|
|
|
.concat();
|
|
|
|
|
|
let mut with_germline: HashMap<String, HashMap<String, u64>> = HashMap::new();
|
|
|
stats.iter().for_each(|(anns, v)| {
|
|
|
- if anns
|
|
|
- .iter()
|
|
|
- .any(|a| matches!(a, Annotation::SoloConstit | Annotation::Germline))
|
|
|
- {
|
|
|
+ if anns.iter().any(|a| {
|
|
|
+ matches!(
|
|
|
+ a,
|
|
|
+ Annotation::Callers(_, Sample::SoloConstit)
|
|
|
+ | Annotation::Callers(_, Sample::Germline)
|
|
|
+ )
|
|
|
+ }) {
|
|
|
let n_by_tumor: Vec<(String, u64)> = callers_somatic_solo_tumor
|
|
|
.iter()
|
|
|
.flat_map(|tumor| {
|
|
|
- if tumor.iter().all(|a| anns.contains(a)) {
|
|
|
- let tum_call =
|
|
|
- format!("{} {}", tumor.first().unwrap(), tumor.get(1).unwrap());
|
|
|
- vec![(tum_call, *v)]
|
|
|
+ if anns.contains(tumor) {
|
|
|
+ vec![(tumor.to_string(), *v)]
|
|
|
} else {
|
|
|
vec![]
|
|
|
}
|
|
|
@@ -150,10 +157,8 @@ impl SomaticStats {
|
|
|
let mut germline_caller: Vec<String> = callers_germline_solo_constit
|
|
|
.iter()
|
|
|
.flat_map(|germ| {
|
|
|
- if germ.iter().all(|a| anns.contains(a)) {
|
|
|
- let germ_call =
|
|
|
- format!("{} {}", germ.first().unwrap(), germ.get(1).unwrap());
|
|
|
- vec![germ_call]
|
|
|
+ if anns.contains(germ) {
|
|
|
+ vec![germ.to_string()]
|
|
|
} else {
|
|
|
vec![]
|
|
|
}
|
|
|
@@ -162,45 +167,67 @@ impl SomaticStats {
|
|
|
germline_caller.sort();
|
|
|
let germline_caller = germline_caller.join(" + ");
|
|
|
|
|
|
-
|
|
|
n_by_tumor.iter().for_each(|(tumoral_caller, n)| {
|
|
|
if let Some(row) = with_germline.get_mut(tumoral_caller) {
|
|
|
- // germline_caller.iter().for_each(|germline_caller| {
|
|
|
- if tumoral_caller == "ClairS Somatic" {
|
|
|
- println!("{tumoral_caller} {germline_caller} {n}");
|
|
|
- }
|
|
|
- if let Some(col) = row.get_mut(&germline_caller) {
|
|
|
- *col += *n;
|
|
|
- } else {
|
|
|
- row.insert(germline_caller.to_string(), *n);
|
|
|
- }
|
|
|
- // });
|
|
|
+ if let Some(col) = row.get_mut(&germline_caller) {
|
|
|
+ *col += *n;
|
|
|
+ } else {
|
|
|
+ row.insert(germline_caller.to_string(), *n);
|
|
|
+ }
|
|
|
} else {
|
|
|
let mut row = HashMap::new();
|
|
|
- // germline_caller.iter().for_each(|germline_caller| {
|
|
|
- row.insert(germline_caller.to_string(), *n);
|
|
|
- // });
|
|
|
+ row.insert(germline_caller.to_string(), *n);
|
|
|
with_germline.insert(tumoral_caller.to_string(), row);
|
|
|
}
|
|
|
});
|
|
|
}
|
|
|
});
|
|
|
|
|
|
- let mut germlines_callers: Vec<String> = with_germline.iter().flat_map(|(_, r)| {
|
|
|
- r.iter().map(|(k,_)| k.to_string()).collect::<Vec<String>>()
|
|
|
- }).collect();
|
|
|
+ let mut germlines_callers: Vec<String> = with_germline
|
|
|
+ .iter()
|
|
|
+ .flat_map(|(_, r)| {
|
|
|
+ r.iter()
|
|
|
+ .map(|(k, _)| k.to_string())
|
|
|
+ .collect::<Vec<String>>()
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
germlines_callers.sort();
|
|
|
germlines_callers.dedup();
|
|
|
|
|
|
- with_germline.iter().for_each(|(tumor, row)| {
|
|
|
- print!("{tumor}\t");
|
|
|
- germlines_callers.iter().for_each(|g| {
|
|
|
- let v = row.get(g).unwrap_or(&0);
|
|
|
- print!("{g}:{v}\t");
|
|
|
- });
|
|
|
- println!();
|
|
|
- });
|
|
|
- println!();
|
|
|
+ let mut json = Vec::new();
|
|
|
+ let mut lines: Vec<String> = with_germline
|
|
|
+ .iter()
|
|
|
+ .map(|(tumor, row)| {
|
|
|
+ json.push(format!(
|
|
|
+ "{{caller_name:\"{tumor}\", germline: [{}] }}",
|
|
|
+ germlines_callers
|
|
|
+ .iter()
|
|
|
+ .map(|g| {
|
|
|
+ let v = row.get(g).unwrap_or(&0);
|
|
|
+ format!("{{{g}: {v}}}")
|
|
|
+ })
|
|
|
+ .join(", ")
|
|
|
+ ));
|
|
|
+ format!(
|
|
|
+ "{tumor}\t{}",
|
|
|
+ germlines_callers
|
|
|
+ .iter()
|
|
|
+ .map(|g| {
|
|
|
+ let v = row.get(g).unwrap_or(&0);
|
|
|
+ format!("{g}: {v}")
|
|
|
+ })
|
|
|
+ .join("\t")
|
|
|
+ )
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+ lines.sort();
|
|
|
+ println!("{}", lines.join("\n"));
|
|
|
+
|
|
|
+ let json = format!("[{}]", json.join(", "));
|
|
|
+ let mut file = File::create(json_path)?;
|
|
|
+ file.write_all(json.as_bytes())?;
|
|
|
+
|
|
|
+ Ok(())
|
|
|
}
|
|
|
}
|
|
|
|
|
|
@@ -211,6 +238,11 @@ impl Run for Somatic {
|
|
|
let config = self.config.clone();
|
|
|
let annotations = Arc::new(self.annotations.clone());
|
|
|
|
|
|
+ // Stats dir
|
|
|
+ let stats_dir = config.somatic_pipe_stats(&id);
|
|
|
+ if !Path::new(&stats_dir).exists() {
|
|
|
+ fs::create_dir(&stats_dir)?;
|
|
|
+ }
|
|
|
// TODO: GZ !!!
|
|
|
// LongphasePhase::initialize(&id, self.config.clone())?.run()?;
|
|
|
|
|
|
@@ -222,7 +254,7 @@ impl Run for Somatic {
|
|
|
&config,
|
|
|
ClairS,
|
|
|
NanomonSV,
|
|
|
- Severus,
|
|
|
+ // Severus,
|
|
|
Savana,
|
|
|
DeepSomatic
|
|
|
);
|
|
|
@@ -256,26 +288,31 @@ impl Run for Somatic {
|
|
|
|
|
|
let mut annotations = Arc::try_unwrap(annotations)
|
|
|
.map_err(|e| anyhow::anyhow!("Failed to unwrap Arc: {:?}", e))?;
|
|
|
- let caller_cat_anns = |v: &Annotation| {
|
|
|
- matches!(
|
|
|
- v,
|
|
|
- Annotation::Callers(_)
|
|
|
- | Annotation::Germline
|
|
|
- | Annotation::Somatic
|
|
|
- | Annotation::SoloConstit
|
|
|
- | Annotation::SoloTumor
|
|
|
- )
|
|
|
- };
|
|
|
+ let caller_cat_anns = |v: &Annotation| matches!(v, Annotation::Callers(_, _));
|
|
|
let annot_init = annotations.callers_stat(Some(Box::new(caller_cat_anns)));
|
|
|
- somatic_stats.annot_init(annot_init);
|
|
|
+ somatic_stats.annot_init(
|
|
|
+ &annot_init,
|
|
|
+ &format!("{stats_dir}/{id}_germline_filter.json"),
|
|
|
+ )?;
|
|
|
+ annot_init.save_to_json(&format!("{stats_dir}/{id}_annotations_01.json"))?;
|
|
|
|
|
|
// Filter: Variants neither Germline nor SoloConstit
|
|
|
info!("Keeping somatic variants (variants neither in solo nor in germline).");
|
|
|
somatic_stats.n_constit_germline =
|
|
|
annotations.retain_variants(&mut variants_collections, |anns| {
|
|
|
- !anns.contains(&Annotation::Germline) && !anns.contains(&Annotation::SoloConstit)
|
|
|
+ !anns.iter().any(|ann| {
|
|
|
+ matches!(
|
|
|
+ ann,
|
|
|
+ Annotation::Callers(_, Sample::Germline)
|
|
|
+ | Annotation::Callers(_, Sample::SoloConstit)
|
|
|
+ )
|
|
|
+ })
|
|
|
});
|
|
|
- annotations.callers_stat(Some(Box::new(caller_cat_anns)));
|
|
|
+ annotations
|
|
|
+ .callers_stat(Some(Box::new(caller_cat_anns)))
|
|
|
+ .save_to_json(&format!(
|
|
|
+ "{stats_dir}/{id}_annotations_02_post_germline.json"
|
|
|
+ ))?;
|
|
|
|
|
|
// Annotation: BAM depth, n_alt
|
|
|
info!("Reading Constit BAM file for depth and pileup annotation.");
|
|
|
@@ -286,20 +323,18 @@ impl Run for Somatic {
|
|
|
self.config.solo_max_alt_constit,
|
|
|
self.config.solo_min_constit_depth,
|
|
|
);
|
|
|
- annotations.callers_stat(Some(Box::new(|v| {
|
|
|
- matches!(
|
|
|
- v,
|
|
|
- Annotation::Callers(_)
|
|
|
- | Annotation::Germline
|
|
|
- | Annotation::Somatic
|
|
|
- | Annotation::SoloConstit
|
|
|
- | Annotation::SoloTumor
|
|
|
- | Annotation::ConstitAlt(_)
|
|
|
- | Annotation::ConstitDepth(_)
|
|
|
- | Annotation::HighConstitAlt
|
|
|
- | Annotation::LowConstitDepth
|
|
|
- )
|
|
|
- })));
|
|
|
+ annotations
|
|
|
+ .callers_stat(Some(Box::new(|v| {
|
|
|
+ matches!(
|
|
|
+ v,
|
|
|
+ Annotation::Callers(_, _)
|
|
|
+ | Annotation::ConstitAlt(_)
|
|
|
+ | Annotation::ConstitDepth(_)
|
|
|
+ | Annotation::HighConstitAlt
|
|
|
+ | Annotation::LowConstitDepth
|
|
|
+ )
|
|
|
+ })))
|
|
|
+ .save_to_json(&format!("{stats_dir}/{id}_annotations_03_bam.json"))?;
|
|
|
|
|
|
// Filter: Remove LowConstitDepth from annotations and variants collections
|
|
|
info!(
|
|
|
@@ -329,18 +364,16 @@ impl Run for Somatic {
|
|
|
somatic_stats.n_high_alt_constit, self.config.solo_max_alt_constit
|
|
|
);
|
|
|
|
|
|
- annotations.callers_stat(Some(Box::new(|v| {
|
|
|
- matches!(
|
|
|
- v,
|
|
|
- Annotation::Callers(_)
|
|
|
- | Annotation::Germline
|
|
|
- | Annotation::Somatic
|
|
|
- | Annotation::SoloConstit
|
|
|
- | Annotation::SoloTumor
|
|
|
- | Annotation::ConstitAlt(_)
|
|
|
- | Annotation::ConstitDepth(_)
|
|
|
- )
|
|
|
- })));
|
|
|
+ annotations
|
|
|
+ .callers_stat(Some(Box::new(|v| {
|
|
|
+ matches!(
|
|
|
+ v,
|
|
|
+ Annotation::Callers(_, _)
|
|
|
+ | Annotation::ConstitAlt(_)
|
|
|
+ | Annotation::ConstitDepth(_)
|
|
|
+ )
|
|
|
+ })))
|
|
|
+ .save_to_json(&format!("{stats_dir}/{id}_annotations_04_bam_filter.json"))?;
|
|
|
|
|
|
// Annotation: Entropy
|
|
|
info!(
|
|
|
@@ -360,18 +393,14 @@ impl Run for Somatic {
|
|
|
ext_annot.annotate(&c.variants, &annotations)?;
|
|
|
Ok(())
|
|
|
})?;
|
|
|
- annotations.callers_stat(Some(Box::new(|v| {
|
|
|
- matches!(
|
|
|
- v,
|
|
|
- Annotation::Callers(_)
|
|
|
- | Annotation::Germline
|
|
|
- | Annotation::Somatic
|
|
|
- | Annotation::SoloConstit
|
|
|
- | Annotation::SoloTumor
|
|
|
- | Annotation::ConstitAlt(_)
|
|
|
- | Annotation::GnomAD(_)
|
|
|
- )
|
|
|
- })));
|
|
|
+ annotations
|
|
|
+ .callers_stat(Some(Box::new(|v| {
|
|
|
+ matches!(
|
|
|
+ v,
|
|
|
+ Annotation::Callers(_, _) | Annotation::ConstitAlt(_) | Annotation::GnomAD(_)
|
|
|
+ )
|
|
|
+ })))
|
|
|
+ .save_to_json(&format!("{stats_dir}/{id}_annotations_05_gnomad.json"))?;
|
|
|
|
|
|
// Filter: Remove variants in Gnomad and in constit bam
|
|
|
info!("Filtering out variants in GnomAD and in constit bam at low AF.");
|
|
|
@@ -404,18 +433,16 @@ impl Run for Somatic {
|
|
|
"{} variants filtered, with constit alt <= max contig alt ({}) and in GnomAD.",
|
|
|
somatic_stats.n_high_alt_constit_gnomad, self.config.solo_max_alt_constit
|
|
|
);
|
|
|
- annotations.callers_stat(Some(Box::new(|v| {
|
|
|
- matches!(
|
|
|
- v,
|
|
|
- Annotation::Callers(_)
|
|
|
- | Annotation::Germline
|
|
|
- | Annotation::Somatic
|
|
|
- | Annotation::SoloConstit
|
|
|
- | Annotation::SoloTumor
|
|
|
- | Annotation::ConstitAlt(_)
|
|
|
- | Annotation::GnomAD(_)
|
|
|
- )
|
|
|
- })));
|
|
|
+ annotations
|
|
|
+ .callers_stat(Some(Box::new(|v| {
|
|
|
+ matches!(
|
|
|
+ v,
|
|
|
+ Annotation::Callers(_, _) | Annotation::ConstitAlt(_) | Annotation::GnomAD(_)
|
|
|
+ )
|
|
|
+ })))
|
|
|
+ .save_to_json(&format!(
|
|
|
+ "{stats_dir}/{id}_annotations_06_gnomad_filter.json"
|
|
|
+ ))?;
|
|
|
|
|
|
// Annotation low entropy
|
|
|
annotations.low_shannon_entropy(self.config.min_shannon_entropy);
|
|
|
@@ -423,33 +450,21 @@ impl Run for Somatic {
|
|
|
|
|
|
// Filtering low entropy for solo variants.
|
|
|
info!("Filtering low entropies");
|
|
|
- annotations.callers_stat(Some(Box::new(|v| {
|
|
|
- matches!(
|
|
|
- v,
|
|
|
- Annotation::Callers(_)
|
|
|
- | Annotation::Germline
|
|
|
- | Annotation::Somatic
|
|
|
- | Annotation::SoloConstit
|
|
|
- | Annotation::SoloTumor
|
|
|
- | Annotation::LowEntropy
|
|
|
- )
|
|
|
- })));
|
|
|
+ annotations
|
|
|
+ .callers_stat(Some(Box::new(|v| {
|
|
|
+ matches!(v, Annotation::Callers(_, _) | Annotation::LowEntropy)
|
|
|
+ })))
|
|
|
+ .save_to_json(&format!("{stats_dir}/{id}_annotations_07_entropy.json"))?;
|
|
|
|
|
|
somatic_stats.n_low_entropies = annotations
|
|
|
.retain_variants(&mut variants_collections, |anns| {
|
|
|
!anns.contains(&Annotation::LowEntropy)
|
|
|
});
|
|
|
- annotations.callers_stat(Some(Box::new(|v| {
|
|
|
- matches!(
|
|
|
- v,
|
|
|
- Annotation::Callers(_)
|
|
|
- | Annotation::Germline
|
|
|
- | Annotation::Somatic
|
|
|
- | Annotation::SoloConstit
|
|
|
- | Annotation::SoloTumor
|
|
|
- | Annotation::LowEntropy
|
|
|
- )
|
|
|
- })));
|
|
|
+ annotations
|
|
|
+ .callers_stat(Some(Box::new(|v| matches!(v, Annotation::Callers(_, _)))))
|
|
|
+ .save_to_json(&format!(
|
|
|
+ "{stats_dir}/{id}_annotations_08_entropy_filter.json"
|
|
|
+ ))?;
|
|
|
|
|
|
// VEP
|
|
|
info!("VEP annotation.");
|
|
|
@@ -460,7 +475,9 @@ impl Run for Somatic {
|
|
|
ext_annot.annotate_vep(&c.variants, &annotations)?;
|
|
|
Ok(())
|
|
|
})?;
|
|
|
- annotations.callers_stat(Some(Box::new(caller_cat_anns)));
|
|
|
+ annotations
|
|
|
+ .callers_stat(Some(Box::new(caller_cat_anns)))
|
|
|
+ .save_to_json(&format!("{stats_dir}/{id}_annotations_09_vep.json"))?;
|
|
|
|
|
|
annotations.vep_stats()?;
|
|
|
|