|
|
@@ -37,7 +37,7 @@ use noodles_fasta::indexed_reader::Builder as FastaBuilder;
|
|
|
use noodles_gff as gff;
|
|
|
|
|
|
use rayon::prelude::*;
|
|
|
-use serde::{Deserialize, Serialize, Serializer, ser::SerializeStruct};
|
|
|
+use serde::{ser::SerializeStruct, Deserialize, Serialize, Serializer};
|
|
|
use std::io::Write;
|
|
|
use std::{
|
|
|
env::temp_dir,
|
|
|
@@ -78,7 +78,6 @@ impl Serialize for Variants {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
|
|
pub struct StatsVCF {
|
|
|
n_tumoral_init: usize,
|
|
|
@@ -724,10 +723,13 @@ impl Variants {
|
|
|
.collect()
|
|
|
}
|
|
|
|
|
|
- pub fn stats(&self) -> Result<()> {
|
|
|
+ pub fn stats(&self) -> Result<String> {
|
|
|
let mut callers_cat = HashMap::new();
|
|
|
let mut n_caller_data = 0;
|
|
|
|
|
|
+ let mut variants_cat = HashMap::new();
|
|
|
+ let mut n_variants_wcat = 0;
|
|
|
+
|
|
|
let mut ncbi_feature = HashMap::new();
|
|
|
let mut n_ncbi_feature = 0;
|
|
|
|
|
|
@@ -769,9 +771,12 @@ impl Variants {
|
|
|
*v += 1;
|
|
|
}
|
|
|
|
|
|
+ // Var cat
|
|
|
+
|
|
|
// Annotations
|
|
|
for annot in ele.annotations.iter() {
|
|
|
let mut features = Vec::new();
|
|
|
+ let mut variant_cat = Vec::new();
|
|
|
let mut cosmic_m1 = false;
|
|
|
|
|
|
match annot {
|
|
|
@@ -783,14 +788,24 @@ impl Variants {
|
|
|
cosmic_m1 = true;
|
|
|
}
|
|
|
}
|
|
|
+ AnnotationType::VariantCategory(vc) => {
|
|
|
+ let s = serde_json::to_string(vc)?;
|
|
|
+ variant_cat.push(s);
|
|
|
+ }
|
|
|
_ => (),
|
|
|
};
|
|
|
+
|
|
|
if features.len() > 0 {
|
|
|
features.sort();
|
|
|
add_hm(&mut ncbi_feature, &features.join(","));
|
|
|
n_ncbi_feature += 1;
|
|
|
}
|
|
|
|
|
|
+ if variant_cat.len() > 0 {
|
|
|
+ add_hm(&mut variants_cat, &variant_cat.join(","));
|
|
|
+ n_variants_wcat += 1;
|
|
|
+ }
|
|
|
+
|
|
|
if cosmic_m1 {
|
|
|
add_hm(&mut cosmic_sup_1, "Cosmic > 1");
|
|
|
n_cosmic_sup_1 += 1;
|
|
|
@@ -829,25 +844,31 @@ impl Variants {
|
|
|
|
|
|
// let file = File::create(path)?;
|
|
|
// let mut writer = BufWriter::new(file);
|
|
|
- // let tow = Stats::new(
|
|
|
- // (n_csq, cons_cat),
|
|
|
- // (n_ncbi_feature, ncbi_feature),
|
|
|
- // (n_caller_data, callers_cat),
|
|
|
- // n_cosmic_sup_1,
|
|
|
- // n_total,
|
|
|
- // n_constit,
|
|
|
- // n_tumoral,
|
|
|
- // n_constit_first,
|
|
|
- // n_loh_first,
|
|
|
- // n_low_mrd_depth_first,
|
|
|
- // n_constit_sec,
|
|
|
- // n_low_diversity_sec,
|
|
|
- // n_low_mrd_depth_sec,
|
|
|
- // n_somatic_sec,
|
|
|
- // );
|
|
|
- // serde_json::to_writer(&mut writer, &tow)?;
|
|
|
-
|
|
|
- Ok(())
|
|
|
+ let mut results = Vec::new();
|
|
|
+ results.push(Stat::new(
|
|
|
+ "consequences".to_string(),
|
|
|
+ cons_cat,
|
|
|
+ n_csq as u32,
|
|
|
+ ));
|
|
|
+ results.push(Stat::new(
|
|
|
+ "variants_cat".to_string(),
|
|
|
+ variants_cat,
|
|
|
+ n_variants_wcat as u32,
|
|
|
+ ));
|
|
|
+ results.push(Stat::new(
|
|
|
+ "ncbi_feature".to_string(),
|
|
|
+ ncbi_feature,
|
|
|
+ n_ncbi_feature as u32,
|
|
|
+ ));
|
|
|
+ results.push(Stat::new(
|
|
|
+ "callers_cat".to_string(),
|
|
|
+ callers_cat,
|
|
|
+ n_caller_data as u32,
|
|
|
+ ));
|
|
|
+
|
|
|
+ let res = serde_json::to_string(&results)?;
|
|
|
+
|
|
|
+ Ok(res)
|
|
|
}
|
|
|
|
|
|
pub fn save_sql(&self, path: &str) -> Result<()> {
|
|
|
@@ -892,6 +913,23 @@ impl Variants {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+#[derive(Debug, Serialize)]
|
|
|
+struct Stat {
|
|
|
+ name: String,
|
|
|
+ counts: HashMap<String, u32>,
|
|
|
+ n_with_annotation: u32,
|
|
|
+}
|
|
|
+
|
|
|
+impl Stat {
|
|
|
+ pub fn new(name: String, counts: HashMap<String, u32>, n_with_annotation: u32) -> Self {
|
|
|
+ Stat {
|
|
|
+ counts,
|
|
|
+ n_with_annotation,
|
|
|
+ name,
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
|
|
pub struct Variant {
|
|
|
pub contig: String,
|
|
|
@@ -1458,7 +1496,9 @@ pub fn run_pipe(name: &str, multi: &MultiProgress) -> Result<()> {
|
|
|
|
|
|
// TODO check if SNP are matching
|
|
|
if variants.len() > 100_000 {
|
|
|
- return Err(anyhow!("Too many variants, verify if somatic and tumoral samples match."));
|
|
|
+ return Err(anyhow!(
|
|
|
+ "Too many variants, verify if somatic and tumoral samples match."
|
|
|
+ ));
|
|
|
}
|
|
|
|
|
|
variants.merge();
|
|
|
@@ -1478,8 +1518,8 @@ pub fn run_pipe(name: &str, multi: &MultiProgress) -> Result<()> {
|
|
|
// crate::sql::variants_sql::remove_variants_names(&db_path, &name)?;
|
|
|
// }
|
|
|
//
|
|
|
- // variants.save_sql(&db_path)?;
|
|
|
- // variants.stats_sql(&db_path)?;
|
|
|
+ variants.save_sql(&db_path)?;
|
|
|
+ variants.stats_sql(&db_path)?;
|
|
|
info!("Variants : {}", variants.len());
|
|
|
|
|
|
Ok(())
|