浏览代码

stats charts

Your Name 1 年之前
父节点
当前提交
0e5ea5505a
共有 4 个文件被更改,包括 918 次插入35 次删除
  1. 705 8
      Cargo.lock
  2. 2 2
      Cargo.toml
  3. 9 1
      src/lib.rs
  4. 202 24
      src/variants.rs

文件差异内容过多而无法显示
+ 705 - 8
Cargo.lock


+ 2 - 2
Cargo.toml

@@ -10,7 +10,7 @@ include = ["src/**/*"]
 log = "^0.4.22"
 env_logger = "^0.11.5"
 clap = { version = "^4.5.13", features = ["derive"] }
-anyhow = "^1.0.86"
+anyhow = "^1.0.89"
 indicatif = {version = "0.17.8", features = ["rayon"]}
 indicatif-log-bridge = "0.2.2"
 num-integer = "0.1.46"
@@ -47,4 +47,4 @@ crossbeam-channel = "0.5.13"
 flate2 = "1.0.31"
 num-format = "0.4.4"
 postcard = { version = "1.0.8", features = ["alloc"] }
-
+charming = { version = "0.3.1", features = ["ssr"] }

+ 9 - 1
src/lib.rs

@@ -13,6 +13,7 @@ mod tests {
     use indicatif_log_bridge::LogWrapper;
     use log::info;
     use noodles_core::{Position, Region};
+    use variants::AllStats;
     use crate::{
         annotations::phase, config::Config, sql::variants_sql::load_variants_name, utils::count_repetitions, variants::{AnnotationType, Category, Variants}
     };
@@ -28,7 +29,7 @@ mod tests {
 
     #[test]
     fn load_from_vcf() -> Result<()> {
-        let name = "CONSIGNY";
+        let name = "RICCO";
 
         let logger =
             env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
@@ -41,6 +42,13 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn graph() -> anyhow::Result<()> {
+        let stats = AllStats::load_json("/data/longreads_basic_pipe/RICCO/diag/report/data/RICCO_variants_stats.json")?;
+        stats.generate_graph("/data/stats")?;
+        Ok(())
+    }
+
     #[test]
     fn load_from_db() -> Result<()> {
         let name = "FAVOT";

+ 202 - 24
src/variants.rs

@@ -28,7 +28,13 @@ use crate::{
         get_hts_nt_pileup, new_pg, new_pg_speed, print_stat_cat,
     },
 };
-use anyhow::{anyhow, Context, Ok, Result};
+use anyhow::{anyhow, bail, Context, Ok, Result};
+use charming::{
+    component::{Axis, Grid},
+    element::{AxisLabel, AxisTick, AxisType, BoundaryGap, ItemStyle, Label, LabelPosition},
+    series::Bar,
+    Chart,
+};
 use csv::ReaderBuilder;
 use dashmap::DashMap;
 use hashbrown::HashMap;
@@ -45,6 +51,7 @@ use std::{
     env::temp_dir,
     fmt,
     fs::File,
+    io::Read,
     str::FromStr,
     sync::{
         atomic::{AtomicI32, Ordering},
@@ -550,10 +557,12 @@ impl Variants {
 
         let mut w = VariantWritter::new(path, &self.cfg.dict_file)?;
         for row in to_write.iter_mut() {
-            w.write_variant(row).context(format!("Error writing VCF row {:#?}", row))?;
+            w.write_variant(row)
+                .context(format!("Error writing VCF row {:#?}", row))?;
             pg.inc(1);
         }
-        w.write_index_finish().context(format!("Can't write index for {}", path))?;
+        w.write_index_finish()
+            .context(format!("Can't write index for {}", path))?;
         Ok(())
     }
 
@@ -1116,7 +1125,7 @@ impl Variants {
     }
 }
 
-#[derive(Debug, Clone, Serialize, ToSchema)]
+#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
 pub struct Stat {
     name: String,
     counts: HashMap<String, u32>,
@@ -1133,7 +1142,7 @@ impl Stat {
     }
 }
 
-#[derive(Debug, Clone, Serialize)]
+#[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct AllStats {
     pub variants_stats: Vec<Stat>,
     pub vcf_stats: StatsVCF,
@@ -1141,6 +1150,183 @@ pub struct AllStats {
     pub n_variants: usize,
 }
 
+impl AllStats {
+    pub fn load_json(path: &str) -> anyhow::Result<AllStats> {
+        File::open(path)
+            .context(format!("Failed to open file: {path}"))
+            .and_then(|mut file| {
+                let mut contents = String::new();
+                file.read_to_string(&mut contents)
+                    .context(format!("Failed to read file: {path}"))
+                    .map(|_| contents)
+            })
+            .and_then(|json_str| {
+                serde_json::from_str::<AllStats>(&json_str).with_context(|| "Failed to parse JSON")
+            })
+    }
+
+    pub fn generate_graph(&self, prefix: &str) -> anyhow::Result<()> {
+        let bar_interval = 20 as f64;
+        // 1. Consequences
+        let consequences_data: Vec<&Stat> = self
+            .variants_stats
+            .iter()
+            .filter(|v| v.name == "consequences")
+            .collect();
+        if consequences_data.is_empty() {
+            bail!("No consequences data");
+        }
+
+        let mut consequences_data: Vec<(String, i32)> = consequences_data
+            .first()
+            .unwrap()
+            .counts
+            .iter()
+            .map(|(k, v)| {
+                let k = k.replace("_variant", "");
+                let k = k.replace("_", " ");
+
+                let k = if k == "intron,non coding transcript" {
+                    "non coding transcript intron".to_string()
+                } else {
+                    k
+                };
+                let k = if let Some((before_comma, _)) = k.split_once(',') {
+                    before_comma.to_string()
+                } else {
+                    k
+                };
+
+                (k, *v as i32)
+            })
+            .collect();
+
+        consequences_data.sort_by(|a, b| a.1.cmp(&b.1));
+        let n = consequences_data.len() as f64;
+        let height = (n * bar_interval) + (0.25 * n * bar_interval);
+        let (y_data, x_data): (Vec<String>, Vec<i32>) = consequences_data.into_iter().unzip();
+
+        let chart = Chart::new()
+            .grid(Grid::new().left("18%").right("5%").top("0%").bottom("25%"))
+            .x_axis(Axis::new().type_(AxisType::Log))
+            .y_axis(
+                Axis::new()
+                    .type_(AxisType::Category)
+                    .data(y_data)
+                    .axis_label(AxisLabel::new().font_size(12)),
+            )
+            .series(
+                Bar::new()
+                    .show_background(true)
+                    .data(x_data)
+                    .label(Label::new().show(true).position(LabelPosition::Right)),
+            );
+
+        let mut renderer = charming::ImageRenderer::new(1000, height as u32);
+        renderer
+            .save(&chart, format!("{prefix}_consequences.svg"))
+            .unwrap();
+
+        // 2. NCBI features
+        let ncbi_data: Vec<&Stat> = self
+            .variants_stats
+            .iter()
+            .filter(|v| v.name == "ncbi_feature")
+            .collect();
+        if ncbi_data.is_empty() {
+            bail!("No NCBI features data");
+        }
+
+        let mut ncbi_data: Vec<(String, i32)> = ncbi_data
+            .first()
+            .unwrap()
+            .counts
+            .iter()
+            .map(|(k, v)| {
+                let k = if k == "non_allelic_homologous_recombination_region" {
+                    "non_allelic_homologous\n_recombination_region".to_string()
+                } else {
+                    k.to_string()
+                };
+                (k.replace("_", " "), *v as i32)
+            })
+            .collect();
+
+        ncbi_data.sort_by(|a, b| a.1.cmp(&b.1));
+        let n = ncbi_data.len() as f64;
+        let height = (n * bar_interval) + (0.25 * n * bar_interval);
+
+        let (y_data, x_data): (Vec<String>, Vec<i32>) = ncbi_data.into_iter().unzip();
+
+        let chart = Chart::new()
+            .grid(Grid::new().left("18%").right("5%").top("0%").bottom("25%"))
+            .x_axis(Axis::new().type_(AxisType::Log))
+            .y_axis(
+                Axis::new()
+                    .type_(AxisType::Category)
+                    .data(y_data)
+                    .axis_label(AxisLabel::new().font_size(12)),
+            )
+            .series(
+                Bar::new()
+                    .show_background(true)
+                    .data(x_data)
+                    .bar_width(10)
+                    .label(Label::new().show(true).position(LabelPosition::Right)),
+            );
+
+        let mut renderer = charming::ImageRenderer::new(1000, height as u32);
+        renderer.save(&chart, format!("{prefix}_ncbi.svg")).unwrap();
+
+        // 3.  Callers
+        let callers_data: Vec<&Stat> = self
+            .variants_stats
+            .iter()
+            .filter(|v| v.name == "callers_cat")
+            .collect();
+        if callers_data.is_empty() {
+            bail!("No callers data");
+        }
+
+        let mut callers_data: Vec<(String, i32)> = callers_data
+            .first()
+            .unwrap()
+            .counts
+            .iter()
+            .map(|(k, v)| (k.replace(",", " & "), *v as i32))
+            .collect();
+        let n = callers_data.len() as f64;
+        let height = (n * bar_interval) + (0.25 * n * bar_interval);
+
+        callers_data.sort_by(|a, b| a.1.cmp(&b.1));
+        let (y_data, x_data): (Vec<String>, Vec<i32>) = callers_data.into_iter().unzip();
+
+        let chart = Chart::new()
+            .grid(Grid::new().left("18%").right("5%").top("0%").bottom("25%"))
+            .x_axis(Axis::new().type_(AxisType::Log))
+            .y_axis(
+                Axis::new()
+                    .type_(AxisType::Category)
+                    .data(y_data)
+                    .axis_label(AxisLabel::new().font_size(12)),
+            )
+            .series(
+                Bar::new()
+                    .show_background(true)
+                    .data(x_data)
+                    .bar_width(10)
+                    .label(Label::new().show(true).position(LabelPosition::Right)),
+            );
+
+        let mut renderer = charming::ImageRenderer::new(1000, height as u32);
+        renderer
+            .save(&chart, format!("{prefix}_callers.svg"))
+            .unwrap();
+
+        Ok(())
+    }
+}
+
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, ToSchema)]
 pub struct Variant {
     pub contig: String,
@@ -1777,28 +1963,16 @@ pub fn run_pipe(name: &str, multi: &MultiProgress) -> Result<()> {
         cfg.longreads_results_dir
     );
 
-    let report_data_dir = format!(
-        "{}/{name}/diag/report/data",
-        cfg.longreads_results_dir
-    );
+    let report_data_dir = format!("{}/{name}/diag/report/data", cfg.longreads_results_dir);
     if !std::path::Path::new(&report_data_dir).exists() {
         fs::create_dir_all(&report_data_dir)?;
     }
 
-    let stats_path = format!(
-        "{}/{name}_variants_stats.json",
-        report_data_dir
-    );
+    let stats_path = format!("{}/{name}_variants_stats.json", report_data_dir);
 
-    let af_init_path = format!(
-        "{}/{name}_variants_af_init.csv",
-        report_data_dir
-    );
+    let af_init_path = format!("{}/{name}_variants_af_init.csv", report_data_dir);
 
-    let af_final_path = format!(
-        "{}/{name}_variants_af_final.csv",
-        report_data_dir
-    );
+    let af_final_path = format!("{}/{name}_variants_af_final.csv", report_data_dir);
 
     let sources = vec![
         (
@@ -1835,9 +2009,11 @@ pub fn run_pipe(name: &str, multi: &MultiProgress) -> Result<()> {
     let mut variants = Variants::from_vcfs(name.to_string(), sources, &cfg, multi.clone())?;
 
     write_af_data(&variants, &af_init_path).context("Can't write initial AF data")?;
-    
+
     variants.vcf_filters();
-    variants.write_vcf_cat(&loh_path, &VariantCategory::LOH).context("Can't write LOH")?;
+    variants
+        .write_vcf_cat(&loh_path, &VariantCategory::LOH)
+        .context("Can't write LOH")?;
     variants.bam_filters(&mrd_bam);
 
     let constits = variants.get_cat(&VariantCategory::Constit);
@@ -1865,7 +2041,9 @@ pub fn run_pipe(name: &str, multi: &MultiProgress) -> Result<()> {
     variants.filter_snp()?;
 
     variants.save_bytes(&bytes_path)?;
-    variants.stats_json(&stats_path).context("Can't write stats")?;
+    variants
+        .stats_json(&stats_path)
+        .context("Can't write stats")?;
 
     write_af_data(&variants, &af_final_path).context("Can't write final AF data")?;
 

部分文件因为文件数量过多而无法显示