Thomas 1 жил өмнө
parent
commit
00f3f9c2fa
2 өөрчлөгдсөн 67 нэмэгдсэн , 27 устгасан
  1. 2 2
      src/lib.rs
  2. 65 25
      src/variants.rs

+ 2 - 2
src/lib.rs

@@ -27,7 +27,7 @@ mod tests {
 
     #[test]
     fn load_from_vcf() -> Result<()> {
-        let name = "GALLET";
+        let name = "ROBIN";
 
         let logger =
             env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
@@ -42,7 +42,7 @@ mod tests {
 
     #[test]
     fn load_from_db() -> Result<()> {
-        let name = "CAMARA";
+        let name = "ROBIN";
         let logger =
             env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
                 .build();

+ 65 - 25
src/variants.rs

@@ -37,7 +37,7 @@ use noodles_fasta::indexed_reader::Builder as FastaBuilder;
 use noodles_gff as gff;
 
 use rayon::prelude::*;
-use serde::{Deserialize, Serialize, Serializer, ser::SerializeStruct};
+use serde::{ser::SerializeStruct, Deserialize, Serialize, Serializer};
 use std::io::Write;
 use std::{
     env::temp_dir,
@@ -78,7 +78,6 @@ impl Serialize for Variants {
     }
 }
 
-
 #[derive(Debug, Clone, Serialize, Deserialize, Default)]
 pub struct StatsVCF {
     n_tumoral_init: usize,
@@ -724,10 +723,13 @@ impl Variants {
             .collect()
     }
 
-    pub fn stats(&self) -> Result<()> {
+    pub fn stats(&self) -> Result<String> {
         let mut callers_cat = HashMap::new();
         let mut n_caller_data = 0;
 
+        let mut variants_cat = HashMap::new();
+        let mut n_variants_wcat = 0;
+
         let mut ncbi_feature = HashMap::new();
         let mut n_ncbi_feature = 0;
 
@@ -769,9 +771,12 @@ impl Variants {
                 *v += 1;
             }
 
+            // Var cat
+
             // Annotations
             for annot in ele.annotations.iter() {
                 let mut features = Vec::new();
+                let mut variant_cat = Vec::new();
                 let mut cosmic_m1 = false;
 
                 match annot {
@@ -783,14 +788,24 @@ impl Variants {
                             cosmic_m1 = true;
                         }
                     }
+                    AnnotationType::VariantCategory(vc) => {
+                        let s = serde_json::to_string(vc)?;
+                        variant_cat.push(s);
+                    }
                     _ => (),
                 };
+
                 if features.len() > 0 {
                     features.sort();
                     add_hm(&mut ncbi_feature, &features.join(","));
                     n_ncbi_feature += 1;
                 }
 
+                if variant_cat.len() > 0 {
+                    add_hm(&mut variants_cat, &variant_cat.join(","));
+                    n_variants_wcat += 1;
+                }
+
                 if cosmic_m1 {
                     add_hm(&mut cosmic_sup_1, "Cosmic > 1");
                     n_cosmic_sup_1 += 1;
@@ -829,25 +844,31 @@ impl Variants {
 
         // let file = File::create(path)?;
         // let mut writer = BufWriter::new(file);
-        // let tow = Stats::new(
-        //     (n_csq, cons_cat),
-        //     (n_ncbi_feature, ncbi_feature),
-        //     (n_caller_data, callers_cat),
-        //     n_cosmic_sup_1,
-        //     n_total,
-        //     n_constit,
-        //     n_tumoral,
-        //     n_constit_first,
-        //     n_loh_first,
-        //     n_low_mrd_depth_first,
-        //     n_constit_sec,
-        //     n_low_diversity_sec,
-        //     n_low_mrd_depth_sec,
-        //     n_somatic_sec,
-        // );
-        // serde_json::to_writer(&mut writer, &tow)?;
-
-        Ok(())
+        let mut results = Vec::new();
+        results.push(Stat::new(
+            "consequences".to_string(),
+            cons_cat,
+            n_csq as u32,
+        ));
+        results.push(Stat::new(
+            "variants_cat".to_string(),
+            variants_cat,
+            n_variants_wcat as u32,
+        ));
+        results.push(Stat::new(
+            "ncbi_feature".to_string(),
+            ncbi_feature,
+            n_ncbi_feature as u32,
+        ));
+        results.push(Stat::new(
+            "callers_cat".to_string(),
+            callers_cat,
+            n_caller_data as u32,
+        ));
+        
+        let res = serde_json::to_string(&results)?;
+
+        Ok(res)
     }
 
     pub fn save_sql(&self, path: &str) -> Result<()> {
@@ -892,6 +913,23 @@ impl Variants {
     }
 }
 
+#[derive(Debug, Serialize)]
+struct Stat {
+    name: String,
+    counts: HashMap<String, u32>,
+    n_with_annotation: u32,
+}
+
+impl Stat {
+    pub fn new(name: String, counts: HashMap<String, u32>, n_with_annotation: u32) -> Self {
+        Stat {
+            counts,
+            n_with_annotation,
+            name,
+        }
+    }
+}
+
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 pub struct Variant {
     pub contig: String,
@@ -1458,7 +1496,9 @@ pub fn run_pipe(name: &str, multi: &MultiProgress) -> Result<()> {
 
     // TODO check if SNP are matching
     if variants.len() > 100_000 {
-        return Err(anyhow!("Too many variants, verify if somatic and tumoral samples match."));
+        return Err(anyhow!(
+            "Too many variants, verify if somatic and tumoral samples match."
+        ));
     }
 
     variants.merge();
@@ -1478,8 +1518,8 @@ pub fn run_pipe(name: &str, multi: &MultiProgress) -> Result<()> {
     //     crate::sql::variants_sql::remove_variants_names(&db_path, &name)?;
     // }
     //
-    // variants.save_sql(&db_path)?;
-    // variants.stats_sql(&db_path)?;
+    variants.save_sql(&db_path)?;
+    variants.stats_sql(&db_path)?;
     info!("Variants : {}", variants.len());
 
     Ok(())