|
|
@@ -8,6 +8,7 @@ use std::{
|
|
|
use anyhow::Context;
|
|
|
use bgzip::{BGZFReader, BGZFWriter};
|
|
|
use csv::ReaderBuilder;
|
|
|
+use dashmap::DashMap;
|
|
|
use log::{debug, error, info, warn};
|
|
|
use rayon::prelude::*;
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
@@ -18,7 +19,7 @@ use crate::{
|
|
|
annotation::{
|
|
|
cosmic::Cosmic,
|
|
|
echtvar::{parse_echtvar_val, run_echtvar},
|
|
|
- gnomad::GnomAD,
|
|
|
+ gnomad::{GnomAD, GnomADValue},
|
|
|
vep::{run_vep, VepLine, VEP},
|
|
|
Annotation, Annotations,
|
|
|
},
|
|
|
@@ -26,7 +27,7 @@ use crate::{
|
|
|
bam::{counts_at, counts_ins_at},
|
|
|
vcf::Vcf,
|
|
|
},
|
|
|
- helpers::{app_storage_dir, estimate_shannon_entropy, temp_file_path, Hash128},
|
|
|
+ helpers::{app_storage_dir, estimate_shannon_entropy, mean, temp_file_path, Hash128},
|
|
|
io::{fasta::sequence_at, readers::get_reader, vcf::vcf_header},
|
|
|
positions::GenomePosition,
|
|
|
};
|
|
|
@@ -724,6 +725,88 @@ impl Variants {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
|
+pub struct VariantsStats {
|
|
|
+ pub n: u32,
|
|
|
+ pub alteration_categories: DashMap<String, u32>,
|
|
|
+ pub cosmic: DashMap<u64, u32>,
|
|
|
+ pub gnomad: DashMap<String, Vec<f64>>,
|
|
|
+}
|
|
|
+
|
|
|
+impl VariantsStats {
|
|
|
+ pub fn new(variants: &Variants) -> Self {
|
|
|
+ let n = variants.data.len() as u32;
|
|
|
+ let alteration_categories: DashMap<String, u32> = DashMap::new();
|
|
|
+ let cosmic: DashMap<u64, u32> = DashMap::new();
|
|
|
+ let gnomad: DashMap<String, Vec<f64>> = DashMap::new();
|
|
|
+
|
|
|
+ variants.data.par_iter().for_each(|v| {
|
|
|
+ v.annotations.iter().for_each(|annotation| {
|
|
|
+ match annotation {
|
|
|
+ Annotation::Cosmic(v) => *cosmic.entry(v.cosmic_cnt).or_default() += 1,
|
|
|
+ Annotation::GnomAD(v) => {
|
|
|
+ v.to_vec()
|
|
|
+ .iter()
|
|
|
+ .map(|e| e.to_string_value_pair())
|
|
|
+ .for_each(|(key, value)| {
|
|
|
+ gnomad.entry(key).or_default().push(value);
|
|
|
+ });
|
|
|
+ }
|
|
|
+ _ => (),
|
|
|
+ };
|
|
|
+ });
|
|
|
+ let mut alteration_category_str = v
|
|
|
+ .alteration_category()
|
|
|
+ .iter()
|
|
|
+ .map(|c| c.to_string())
|
|
|
+ .collect::<Vec<String>>();
|
|
|
+ alteration_category_str.sort();
|
|
|
+ alteration_category_str.dedup();
|
|
|
+
|
|
|
+ *alteration_categories.entry(alteration_category_str.join(", ")).or_default() += 1;
|
|
|
+ });
|
|
|
+
|
|
|
+ gnomad.iter().for_each(|e| {
|
|
|
+ println!("{}\t{}", e.key(), mean(e.value()));
|
|
|
+ });
|
|
|
+
|
|
|
+ Self {
|
|
|
+ n,
|
|
|
+ alteration_categories,
|
|
|
+ cosmic,
|
|
|
+ gnomad,
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn save_to_json(&self, filename: &str) -> anyhow::Result<()> {
|
|
|
+ let file = File::create(filename)
|
|
|
+ .with_context(|| format!("Failed to create file: {}", filename))?;
|
|
|
+ let mut writer = BGZFWriter::new(file, bgzip::Compression::default());
|
|
|
+
|
|
|
+ serde_json::to_writer(&mut writer, self)
|
|
|
+ .with_context(|| format!("Failed to serialize JSON to file: {}", filename))?;
|
|
|
+
|
|
|
+ writer
|
|
|
+ .close()
|
|
|
+ .with_context(|| format!("Failed to close BGZF writer for file: {}", filename))?;
|
|
|
+
|
|
|
+ debug!("Successfully saved variants to {}", filename);
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+ pub fn load_from_json(filename: &str) -> anyhow::Result<Self> {
|
|
|
+ let file =
|
|
|
+ File::open(filename).with_context(|| format!("Failed to open file: {}", filename))?;
|
|
|
+ let mut reader = BGZFReader::new(file)
|
|
|
+ .with_context(|| format!("Failed to create BGZF reader for file: {}", filename))?;
|
|
|
+
|
|
|
+ let variants: Self = serde_json::from_reader(&mut reader)
|
|
|
+ .with_context(|| format!("Failed to deserialize JSON from file: {}", filename))?;
|
|
|
+
|
|
|
+ debug!("Successfully loaded variants from {}", filename);
|
|
|
+ Ok(variants)
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
/// Creates a new Variant instance from a collection of VcfVariants and annotations.
|
|
|
///
|
|
|
/// This function consolidates information from one or more VcfVariants into a single Variant,
|