Selaa lähdekoodia

save bytes.gz

Thomas 1 vuosi sitten
vanhempi
commit
fff2723031
6 muutettua tiedostoa jossa 98 lisäystä ja 21 poistoa
  1. 28 0
      Cargo.lock
  2. 1 0
      Cargo.toml
  3. 0 3
      src/annotations/gnomad.rs
  4. 20 2
      src/lib.rs
  5. 5 1
      src/sql/variants_sql.rs
  6. 44 15
      src/variants.rs

+ 28 - 0
Cargo.lock

@@ -315,6 +315,12 @@ version = "0.8.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"
 
+[[package]]
+name = "crunchy"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
+
 [[package]]
 name = "csv"
 version = "1.3.0"
@@ -518,6 +524,16 @@ version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
 
+[[package]]
+name = "half"
+version = "2.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5eceaaeec696539ddaf7b333340f1af35a5aa87ae3e4f3ead0532f72affab2e"
+dependencies = [
+ "cfg-if",
+ "crunchy",
+]
+
 [[package]]
 name = "hashbrown"
 version = "0.14.3"
@@ -760,6 +776,7 @@ dependencies = [
  "noodles-tabix",
  "noodles-vcf",
  "num-integer",
+ "pot",
  "prettytable-rs",
  "rayon",
  "rust-htslib",
@@ -1158,6 +1175,17 @@ version = "1.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0"
 
+[[package]]
+name = "pot"
+version = "3.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df842bdb3b0553a411589e64aaa1a7d0c0259f72fabcedfaa841683ae3019d80"
+dependencies = [
+ "byteorder",
+ "half",
+ "serde",
+]
+
 [[package]]
 name = "ppv-lite86"
 version = "0.2.17"

+ 1 - 0
Cargo.toml

@@ -40,3 +40,4 @@ serde_rusqlite = "0.34.0"
 dashmap = { version = "5.5.3", features = ["rayon", "serde"] }
 crossbeam-deque = "0.8.5"
 trc = "1.2.4"
+pot = "=3.0.0"

+ 0 - 3
src/annotations/gnomad.rs

@@ -1,10 +1,7 @@
 use std::str::FromStr;
-
 use serde::{Serialize, Deserialize};
 use anyhow::{anyhow, Ok, Result};
 
-
-
 #[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
 pub struct GnomAD {
     pub gnomad_ac: u64,

+ 20 - 2
src/lib.rs

@@ -13,7 +13,11 @@ mod tests {
     use indicatif_log_bridge::LogWrapper;
     use log::info;
 
-    use crate::{config::Config, sql::variants_sql::{load_variants_name, remove_variants_names}, variants::{VCFSource, VariantType, Variants, Variant}};
+    use crate::{
+        config::Config,
+        sql::variants_sql::{load_variants_name, remove_variants_names},
+        variants::{VCFSource, Variant, VariantType, Variants},
+    };
 
     use super::*;
     #[test]
@@ -93,6 +97,14 @@ mod tests {
             "{}/{name}/diag/{name}_loh.vcf.gz",
             cfg.longreads_results_dir
         );
+        let db_constit_path = format!(
+            "{}/{name}/diag/{name}_constit.sqlite",
+            cfg.longreads_results_dir
+        );
+        let bytes_constit_path = format!(
+            "{}/{name}/diag/{name}_constit.bytes.gz",
+            cfg.longreads_results_dir
+        );
 
         let sources = vec![
             (
@@ -131,6 +143,12 @@ mod tests {
         variants.vcf_filters();
         variants.write_vcf_cat(&loh_path, &variants::VariantCategory::LOH)?;
         variants.bam_filters(&mrd_bam);
+
+        let constits = variants.get_cat(&variants::VariantCategory::Constit);
+        let constits = variants::Variants::from_vec(name.to_string(), &multi, constits);
+        // constits.save_sql(&db_constit_path)?;
+        constits.save_bytes(&bytes_constit_path)?;
+
         variants.keep_somatics_un();
         info!("Variants retained: {}", variants.len());
 
@@ -171,7 +189,7 @@ mod tests {
         let v = variants.get_cat(&variants::VariantCategory::Constit);
         // let v = v.iter().filter(|v| 0.25 < v.vaf.unwrap() && v.vaf.unwrap() < 0.75 )
         //     .map(|v| {
-        //     
+        //
         // }).collect::<Vec<Variant>>();
         //
 

+ 5 - 1
src/sql/variants_sql.rs

@@ -7,7 +7,7 @@ use serde_rusqlite::*;
 
 use crate::{variants::{
     AnnotationType, Format, ReferenceAlternative, VCFSource, Variant, Variants
-}, config::Config};
+}, config::Config, utils::new_pg_speed};
 
 #[derive(Clone, Serialize, Deserialize, Debug, PartialEq)]
 pub struct VariantSQL {
@@ -201,11 +201,15 @@ pub fn init_variants_table(connection: &rusqlite::Connection) -> Result<usize> {
 pub fn insert_variants(variants: &Variants, path: &str) -> Result<()> {
     let connection = rusqlite::Connection::open(&path)?;
     init_variants_table(&connection)?;
+    let pg = variants.mp.add(new_pg_speed(variants.len() as u64));
+    pg.set_message(format!("Inserting data into DB: {path}"));
 
     for v in variants.data.iter() {
         let mut var_sql: VariantSQL = v.try_into()?;
         var_sql.insert_into_variants(&connection, variants.name.clone())?;
+        pg.inc(1);
     }
+    pg.finish();
     Ok(())
 }
 

+ 44 - 15
src/variants.rs

@@ -14,6 +14,7 @@ use crate::{
     },
     config::Config,
     in_out::{
+        self,
         dict_reader::read_dict,
         get_reader,
         vcf_reader::{read_vcf, read_vcf_progress, VCFRow},
@@ -55,11 +56,11 @@ use std::{
 pub struct Variants {
     pub name: String,
     pub data: Vec<Variant>,
-    constit: DashMap<String, Variant>,
-    stats_vcf: StatsVCF,
-    stats_bam: StatsBAM,
-    cfg: Config,
-    mp: MultiProgress,
+    pub constit: DashMap<String, Variant>,
+    pub stats_vcf: StatsVCF,
+    pub stats_bam: StatsBAM,
+    pub cfg: Config,
+    pub mp: MultiProgress,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize, Default)]
@@ -119,6 +120,7 @@ impl Variants {
             mp: mp.clone(),
         }
     }
+
     pub fn from_vcfs(
         name: String,
         v: Vec<(&str, &VCFSource, &VariantType)>,
@@ -426,6 +428,7 @@ impl Variants {
         pg.finish();
         info!("{}. Executed in {}s", self.stats_vcf, elapsed.as_secs());
     }
+
     pub fn get_cat(&mut self, cat: &VariantCategory) -> Vec<Variant> {
         let pg = self.mp.add(new_pg_speed(self.data.len() as u64));
         pg.set_message(format!("Get cat {:?}", cat));
@@ -465,6 +468,7 @@ impl Variants {
         w.write_index_finish()?;
         Ok(())
     }
+
     pub fn keep_somatics_un(&mut self) {
         let pg = self.mp.add(new_pg_speed(self.data.len() as u64));
         pg.set_message("Filtering Variants");
@@ -640,19 +644,22 @@ impl Variants {
     }
 
     pub fn category_iter(&self, category: &VariantCategory) -> Vec<&Variant> {
-        self.data.par_iter().filter(|v| {
-            for annotation in v.annotations.iter() {
-                match annotation {
-                    AnnotationType::VariantCategory(cat) => {
-                        if cat == category {
-                            return true;
+        self.data
+            .par_iter()
+            .filter(|v| {
+                for annotation in v.annotations.iter() {
+                    match annotation {
+                        AnnotationType::VariantCategory(cat) => {
+                            if cat == category {
+                                return true;
+                            }
                         }
+                        _ => (),
                     }
-                    _ => (),
                 }
-            }
-            return false;
-        }).collect::<Vec<&Variant>>()
+                return false;
+            })
+            .collect::<Vec<&Variant>>()
     }
 
     pub fn filter_snp(&mut self) -> Result<i32> {
@@ -840,6 +847,28 @@ impl Variants {
         )?;
         Ok(())
     }
+
+    pub fn save_bytes(&self, path: &str) -> Result<()> {
+        let serialized = pot::to_vec(&self.data)?;
+        let mut w =
+            noodles_bgzf::writer::Builder::default().build_with_writer(File::create(path)?);
+        w.write_all(&serialized)?;
+        Ok(())
+    }
+
+    pub fn new_from_bytes(name: &str, path: &str, mp: MultiProgress) -> Result<Self> {
+        let r = in_out::get_reader(path)?;
+        let data: Vec<Variant> = pot::from_reader(r)?;
+        Ok(Self {
+            name: name.to_string(),
+            data,
+            constit: DashMap::new(),
+            stats_vcf: StatsVCF::default(),
+            stats_bam: StatsBAM::default(),
+            cfg: Config::get()?,
+            mp,
+        })
+    }
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]