Thomas 1 жил өмнө
parent
commit
dce22c84cb
4 өөрчлөгдсөн 44 нэмэгдсэн , 8 устгасан
  1. 1 0
      Cargo.lock
  2. 1 0
      Cargo.toml
  3. 16 2
      src/lib.rs
  4. 26 6
      src/phase.rs

+ 1 - 0
Cargo.lock

@@ -1328,6 +1328,7 @@ dependencies = [
  "anyhow",
  "crossbeam-channel",
  "env_logger",
+ "flate2",
  "indicatif",
  "indicatif-log-bridge",
  "log",

+ 1 - 0
Cargo.toml

@@ -18,4 +18,5 @@ pandora_lib_pileup = { git = "https://git.t0m4.fr/Thomas/pandora_lib_pileup.git"
 indicatif-log-bridge = "0.2.2"
 serde = { version = "1.0.*", default-features = false }
 postcard = { version = "1.0.8", features = ["alloc"] }
+flate2 = "1.0.31"
 

+ 16 - 2
src/lib.rs

@@ -284,7 +284,7 @@ mod tests {
     use num_format::{CustomFormat, Grouping, ToFormattedString};
     use pandora_lib_variants::{in_out::dict_reader::read_dict, variants::Variant};
 
-    use crate::phase::{save_phases, variants_phasing};
+    use crate::phase::{load_phases, save_phases, variants_phasing};
 
     use super::*;
 
@@ -373,7 +373,9 @@ mod tests {
             if !variants.is_empty() {
                 info!("{contig}: {} variants to phase", v.len());
                 let phases = variants_phasing(v, bam_path, min_records, &multi);
-                save_phases(&phases, &format!("{phases_dir}/{id}_{contig}_phases.postcard"))?;
+                if !phases.is_empty() {
+                    save_phases(&phases, &format!("{phases_dir}/{id}_{contig}_phases.postcard.gz"))?;
+                }
                 // info!("{} phases", phases.len());
                 // let f = phases.first().unwrap();
                 // println!("{f:#?}");
@@ -386,4 +388,16 @@ mod tests {
         // TODO: assign somatic to constit phase
         Ok(())
     }
+
+    #[test]
+    fn load_phase() -> anyhow::Result<()> {
+        init();
+        let id = "SALICETTO";
+        let contig = "chr7";
+        let phases_dir = format!("/data/longreads_basic_pipe/{id}/diag/phases");
+        let phase_path = format!("{phases_dir}/{id}_{contig}_phases.postcard.gz");
+        let p = load_phases(&phase_path)?;
+        info!("{} phases", p.len());
+        Ok(())
+    }
 }

+ 26 - 6
src/phase.rs

@@ -1,5 +1,6 @@
 use anyhow::{anyhow, Ok, Result};
 use crossbeam_channel::{unbounded, Receiver, Sender};
+use flate2::{read::GzDecoder, write::GzEncoder, Compression};
 use indicatif::MultiProgress;
 use log::{info, warn};
 use num_format::{CustomFormat, Grouping, ToFormattedString};
@@ -444,17 +445,36 @@ pub fn spawn_phase(
     (s, rr)
 }
 
+// pub fn save_phases(phases: &Vec<Phase>, filename: &str) -> anyhow::Result<()> {
+//     let bytes = postcard::to_allocvec(phases)?;
+//     let mut file = File::create(filename)?;
+//     file.write_all(&bytes)?;
+//     Ok(())
+// }
+//
+// pub fn load_phases(filename: &str) -> anyhow::Result<Vec<Phase>> {
+//     let mut file = File::open(filename)?;
+//     let mut bytes = Vec::new();
+//     file.read_to_end(&mut bytes)?;
+//     let phases: Vec<Phase> = postcard::from_bytes(&bytes)?;
+//     Ok(phases)
+// }
+
 pub fn save_phases(phases: &Vec<Phase>, filename: &str) -> anyhow::Result<()> {
-    let bytes = postcard::to_allocvec(phases)?;
-    let mut file = File::create(filename)?;
-    file.write_all(&bytes)?;
+    let bytes = postcard::to_allocvec(phases).expect("Serialization failed");
+    let file = File::create(filename)?;
+    let mut encoder = GzEncoder::new(file, Compression::default());
+    encoder.write_all(&bytes)?;
+    encoder.finish()?;
     Ok(())
 }
 
 pub fn load_phases(filename: &str) -> anyhow::Result<Vec<Phase>> {
-    let mut file = File::open(filename)?;
+    let file = File::open(filename)?;
+    let mut decoder = GzDecoder::new(file);
     let mut bytes = Vec::new();
-    file.read_to_end(&mut bytes)?;
-    let phases: Vec<Phase> = postcard::from_bytes(&bytes)?;
+    decoder.read_to_end(&mut bytes)?;
+    let phases: Vec<Phase> = postcard::from_bytes(&bytes).expect("Deserialization failed");
     Ok(phases)
 }
+