|
|
@@ -28,7 +28,13 @@ use crate::{
|
|
|
get_hts_nt_pileup, new_pg, new_pg_speed, print_stat_cat,
|
|
|
},
|
|
|
};
|
|
|
-use anyhow::{anyhow, Context, Ok, Result};
|
|
|
+use anyhow::{anyhow, bail, Context, Ok, Result};
|
|
|
+use charming::{
|
|
|
+ component::{Axis, Grid},
|
|
|
+ element::{AxisLabel, AxisTick, AxisType, BoundaryGap, ItemStyle, Label, LabelPosition},
|
|
|
+ series::Bar,
|
|
|
+ Chart,
|
|
|
+};
|
|
|
use csv::ReaderBuilder;
|
|
|
use dashmap::DashMap;
|
|
|
use hashbrown::HashMap;
|
|
|
@@ -45,6 +51,7 @@ use std::{
|
|
|
env::temp_dir,
|
|
|
fmt,
|
|
|
fs::File,
|
|
|
+ io::Read,
|
|
|
str::FromStr,
|
|
|
sync::{
|
|
|
atomic::{AtomicI32, Ordering},
|
|
|
@@ -550,10 +557,12 @@ impl Variants {
|
|
|
|
|
|
let mut w = VariantWritter::new(path, &self.cfg.dict_file)?;
|
|
|
for row in to_write.iter_mut() {
|
|
|
- w.write_variant(row).context(format!("Error writing VCF row {:#?}", row))?;
|
|
|
+ w.write_variant(row)
|
|
|
+ .context(format!("Error writing VCF row {:#?}", row))?;
|
|
|
pg.inc(1);
|
|
|
}
|
|
|
- w.write_index_finish().context(format!("Can't write index for {}", path))?;
|
|
|
+ w.write_index_finish()
|
|
|
+ .context(format!("Can't write index for {}", path))?;
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
@@ -1116,7 +1125,7 @@ impl Variants {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-#[derive(Debug, Clone, Serialize, ToSchema)]
|
|
|
+#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
|
|
|
pub struct Stat {
|
|
|
name: String,
|
|
|
counts: HashMap<String, u32>,
|
|
|
@@ -1133,7 +1142,7 @@ impl Stat {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-#[derive(Debug, Clone, Serialize)]
|
|
|
+#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
|
pub struct AllStats {
|
|
|
pub variants_stats: Vec<Stat>,
|
|
|
pub vcf_stats: StatsVCF,
|
|
|
@@ -1141,6 +1150,183 @@ pub struct AllStats {
|
|
|
pub n_variants: usize,
|
|
|
}
|
|
|
|
|
|
+impl AllStats {
|
|
|
+ pub fn load_json(path: &str) -> anyhow::Result<AllStats> {
|
|
|
+ File::open(path)
|
|
|
+ .context(format!("Failed to open file: {path}"))
|
|
|
+ .and_then(|mut file| {
|
|
|
+ let mut contents = String::new();
|
|
|
+ file.read_to_string(&mut contents)
|
|
|
+ .context(format!("Failed to read file: {path}"))
|
|
|
+ .map(|_| contents)
|
|
|
+ })
|
|
|
+ .and_then(|json_str| {
|
|
|
+ serde_json::from_str::<AllStats>(&json_str).with_context(|| "Failed to parse JSON")
|
|
|
+ })
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn generate_graph(&self, prefix: &str) -> anyhow::Result<()> {
|
|
|
+ let bar_interval = 20 as f64;
|
|
|
+ // 1. Consequences
|
|
|
+ let consequences_data: Vec<&Stat> = self
|
|
|
+ .variants_stats
|
|
|
+ .iter()
|
|
|
+ .filter(|v| v.name == "consequences")
|
|
|
+ .collect();
|
|
|
+ if consequences_data.is_empty() {
|
|
|
+ bail!("No consequences data");
|
|
|
+ }
|
|
|
+
|
|
|
+ let mut consequences_data: Vec<(String, i32)> = consequences_data
|
|
|
+ .first()
|
|
|
+ .unwrap()
|
|
|
+ .counts
|
|
|
+ .iter()
|
|
|
+ .map(|(k, v)| {
|
|
|
+ let k = k.replace("_variant", "");
|
|
|
+ let k = k.replace("_", " ");
|
|
|
+
|
|
|
+ let k = if k == "intron,non coding transcript" {
|
|
|
+ "non coding transcript intron".to_string()
|
|
|
+ } else {
|
|
|
+ k
|
|
|
+ };
|
|
|
+ let k = if let Some((before_comma, _)) = k.split_once(',') {
|
|
|
+ before_comma.to_string()
|
|
|
+ } else {
|
|
|
+ k
|
|
|
+ };
|
|
|
+
|
|
|
+ (k, *v as i32)
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ consequences_data.sort_by(|a, b| a.1.cmp(&b.1));
|
|
|
+ let n = consequences_data.len() as f64;
|
|
|
+ let height = (n * bar_interval) + (0.25 * n * bar_interval);
|
|
|
+ let (y_data, x_data): (Vec<String>, Vec<i32>) = consequences_data.into_iter().unzip();
|
|
|
+
|
|
|
+ let chart = Chart::new()
|
|
|
+ .grid(Grid::new().left("18%").right("5%").top("0%").bottom("25%"))
|
|
|
+ .x_axis(Axis::new().type_(AxisType::Log))
|
|
|
+ .y_axis(
|
|
|
+ Axis::new()
|
|
|
+ .type_(AxisType::Category)
|
|
|
+ .data(y_data)
|
|
|
+ .axis_label(AxisLabel::new().font_size(12)),
|
|
|
+ )
|
|
|
+ .series(
|
|
|
+ Bar::new()
|
|
|
+ .show_background(true)
|
|
|
+ .data(x_data)
|
|
|
+ .label(Label::new().show(true).position(LabelPosition::Right)),
|
|
|
+ );
|
|
|
+
|
|
|
+ let mut renderer = charming::ImageRenderer::new(1000, height as u32);
|
|
|
+ renderer
|
|
|
+ .save(&chart, format!("{prefix}_consequences.svg"))
|
|
|
+ .unwrap();
|
|
|
+
|
|
|
+ // 2. NCBI features
|
|
|
+ let ncbi_data: Vec<&Stat> = self
|
|
|
+ .variants_stats
|
|
|
+ .iter()
|
|
|
+ .filter(|v| v.name == "ncbi_feature")
|
|
|
+ .collect();
|
|
|
+ if ncbi_data.is_empty() {
|
|
|
+ bail!("No NCBI features data");
|
|
|
+ }
|
|
|
+
|
|
|
+ let mut ncbi_data: Vec<(String, i32)> = ncbi_data
|
|
|
+ .first()
|
|
|
+ .unwrap()
|
|
|
+ .counts
|
|
|
+ .iter()
|
|
|
+ .map(|(k, v)| {
|
|
|
+ let k = if k == "non_allelic_homologous_recombination_region" {
|
|
|
+ "non_allelic_homologous\n_recombination_region".to_string()
|
|
|
+ } else {
|
|
|
+ k.to_string()
|
|
|
+ };
|
|
|
+ (k.replace("_", " "), *v as i32)
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ ncbi_data.sort_by(|a, b| a.1.cmp(&b.1));
|
|
|
+ let n = ncbi_data.len() as f64;
|
|
|
+ let height = (n * bar_interval) + (0.25 * n * bar_interval);
|
|
|
+
|
|
|
+ let (y_data, x_data): (Vec<String>, Vec<i32>) = ncbi_data.into_iter().unzip();
|
|
|
+
|
|
|
+ let chart = Chart::new()
|
|
|
+ .grid(Grid::new().left("18%").right("5%").top("0%").bottom("25%"))
|
|
|
+ .x_axis(Axis::new().type_(AxisType::Log))
|
|
|
+ .y_axis(
|
|
|
+ Axis::new()
|
|
|
+ .type_(AxisType::Category)
|
|
|
+ .data(y_data)
|
|
|
+ .axis_label(AxisLabel::new().font_size(12)),
|
|
|
+ )
|
|
|
+ .series(
|
|
|
+ Bar::new()
|
|
|
+ .show_background(true)
|
|
|
+ .data(x_data)
|
|
|
+ .bar_width(10)
|
|
|
+ .label(Label::new().show(true).position(LabelPosition::Right)),
|
|
|
+ );
|
|
|
+
|
|
|
+ let mut renderer = charming::ImageRenderer::new(1000, height as u32);
|
|
|
+ renderer.save(&chart, format!("{prefix}_ncbi.svg")).unwrap();
|
|
|
+
|
|
|
+ // 3. Callers
|
|
|
+ let callers_data: Vec<&Stat> = self
|
|
|
+ .variants_stats
|
|
|
+ .iter()
|
|
|
+ .filter(|v| v.name == "callers_cat")
|
|
|
+ .collect();
|
|
|
+ if callers_data.is_empty() {
|
|
|
+ bail!("No callers data");
|
|
|
+ }
|
|
|
+
|
|
|
+ let mut callers_data: Vec<(String, i32)> = callers_data
|
|
|
+ .first()
|
|
|
+ .unwrap()
|
|
|
+ .counts
|
|
|
+ .iter()
|
|
|
+ .map(|(k, v)| (k.replace(",", " & "), *v as i32))
|
|
|
+ .collect();
|
|
|
+ let n = callers_data.len() as f64;
|
|
|
+ let height = (n * bar_interval) + (0.25 * n * bar_interval);
|
|
|
+
|
|
|
+ callers_data.sort_by(|a, b| a.1.cmp(&b.1));
|
|
|
+ let (y_data, x_data): (Vec<String>, Vec<i32>) = callers_data.into_iter().unzip();
|
|
|
+
|
|
|
+ let chart = Chart::new()
|
|
|
+ .grid(Grid::new().left("18%").right("5%").top("0%").bottom("25%"))
|
|
|
+ .x_axis(Axis::new().type_(AxisType::Log))
|
|
|
+ .y_axis(
|
|
|
+ Axis::new()
|
|
|
+ .type_(AxisType::Category)
|
|
|
+ .data(y_data)
|
|
|
+ .axis_label(AxisLabel::new().font_size(12)),
|
|
|
+ )
|
|
|
+ .series(
|
|
|
+ Bar::new()
|
|
|
+ .show_background(true)
|
|
|
+ .data(x_data)
|
|
|
+ .bar_width(10)
|
|
|
+ .label(Label::new().show(true).position(LabelPosition::Right)),
|
|
|
+ );
|
|
|
+
|
|
|
+ let mut renderer = charming::ImageRenderer::new(1000, height as u32);
|
|
|
+ renderer
|
|
|
+ .save(&chart, format!("{prefix}_callers.svg"))
|
|
|
+ .unwrap();
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, ToSchema)]
|
|
|
pub struct Variant {
|
|
|
pub contig: String,
|
|
|
@@ -1777,28 +1963,16 @@ pub fn run_pipe(name: &str, multi: &MultiProgress) -> Result<()> {
|
|
|
cfg.longreads_results_dir
|
|
|
);
|
|
|
|
|
|
- let report_data_dir = format!(
|
|
|
- "{}/{name}/diag/report/data",
|
|
|
- cfg.longreads_results_dir
|
|
|
- );
|
|
|
+ let report_data_dir = format!("{}/{name}/diag/report/data", cfg.longreads_results_dir);
|
|
|
if !std::path::Path::new(&report_data_dir).exists() {
|
|
|
fs::create_dir_all(&report_data_dir)?;
|
|
|
}
|
|
|
|
|
|
- let stats_path = format!(
|
|
|
- "{}/{name}_variants_stats.json",
|
|
|
- report_data_dir
|
|
|
- );
|
|
|
+ let stats_path = format!("{}/{name}_variants_stats.json", report_data_dir);
|
|
|
|
|
|
- let af_init_path = format!(
|
|
|
- "{}/{name}_variants_af_init.csv",
|
|
|
- report_data_dir
|
|
|
- );
|
|
|
+ let af_init_path = format!("{}/{name}_variants_af_init.csv", report_data_dir);
|
|
|
|
|
|
- let af_final_path = format!(
|
|
|
- "{}/{name}_variants_af_final.csv",
|
|
|
- report_data_dir
|
|
|
- );
|
|
|
+ let af_final_path = format!("{}/{name}_variants_af_final.csv", report_data_dir);
|
|
|
|
|
|
let sources = vec![
|
|
|
(
|
|
|
@@ -1835,9 +2009,11 @@ pub fn run_pipe(name: &str, multi: &MultiProgress) -> Result<()> {
|
|
|
let mut variants = Variants::from_vcfs(name.to_string(), sources, &cfg, multi.clone())?;
|
|
|
|
|
|
write_af_data(&variants, &af_init_path).context("Can't write initial AF data")?;
|
|
|
-
|
|
|
+
|
|
|
variants.vcf_filters();
|
|
|
- variants.write_vcf_cat(&loh_path, &VariantCategory::LOH).context("Can't write LOH")?;
|
|
|
+ variants
|
|
|
+ .write_vcf_cat(&loh_path, &VariantCategory::LOH)
|
|
|
+ .context("Can't write LOH")?;
|
|
|
variants.bam_filters(&mrd_bam);
|
|
|
|
|
|
let constits = variants.get_cat(&VariantCategory::Constit);
|
|
|
@@ -1865,7 +2041,9 @@ pub fn run_pipe(name: &str, multi: &MultiProgress) -> Result<()> {
|
|
|
variants.filter_snp()?;
|
|
|
|
|
|
variants.save_bytes(&bytes_path)?;
|
|
|
- variants.stats_json(&stats_path).context("Can't write stats")?;
|
|
|
+ variants
|
|
|
+ .stats_json(&stats_path)
|
|
|
+ .context("Can't write stats")?;
|
|
|
|
|
|
write_af_data(&variants, &af_final_path).context("Can't write final AF data")?;
|
|
|
|