Thomas 1 year ago
parent
commit
fe80f53381
7 changed files with 52 additions and 16 deletions
  1. 1 1
      src/callers/nanomonsv.rs
  2. 2 2
      src/collection/pod5.rs
  3. 26 0
      src/helpers.rs
  4. 2 2
      src/io/readers.rs
  5. 1 1
      src/lib.rs
  6. 8 6
      src/pipes/somatic.rs
  7. 12 4
      src/variant/variant.rs

+ 1 - 1
src/callers/nanomonsv.rs

@@ -67,7 +67,7 @@ impl Initialize for NanomonSV {
 
 impl Run for NanomonSV {
     fn run(&mut self) -> anyhow::Result<()> {
-        somatic_parse(&self)?;
+        somatic_parse(self)?;
 
         info!("Nanomonsv Get");
         let diag_out_prefix = format!("{}/{}_diag", self.diag_out_dir, self.id);

+ 2 - 2
src/collection/pod5.rs

@@ -464,11 +464,11 @@ impl Pod5Collection {
                 r.flowcells
                     .iter()
                     .flat_map(|f| {
-                        return f
+                        f
                             .cases
                             .iter()
                             .map(|c| c.id.clone())
-                            .collect::<Vec<String>>();
+                            .collect::<Vec<String>>()
                     })
                     .collect::<Vec<String>>()
             })

+ 26 - 0
src/helpers.rs

@@ -302,4 +302,30 @@ pub fn app_storage_dir() -> anyhow::Result<PathBuf> {
     
     Ok(app_dir)
 }
+use blake3::Hasher as Blake3Hasher;
+use std::hash::{BuildHasher, Hasher};
+
+pub struct Blake3Hash(Blake3Hasher);
+
+impl Hasher for Blake3Hash {
+    fn finish(&self) -> u64 {
+        let hash = self.0.finalize();
+        u64::from_le_bytes(hash.as_bytes()[..8].try_into().unwrap())
+    }
+
+    fn write(&mut self, bytes: &[u8]) {
+        self.0.update(bytes);
+    }
+}
+
+#[derive(Default, Clone)]
+pub struct Blake3BuildHasher;
+
+impl BuildHasher for Blake3BuildHasher {
+    type Hasher = Blake3Hash;
+
+    fn build_hasher(&self) -> Self::Hasher {
+        Blake3Hash(Blake3Hasher::new())
+    }
+}
 

+ 2 - 2
src/io/readers.rs

@@ -2,10 +2,10 @@ use std::{fs::File, io::BufReader};
 
 use anyhow::Context;
 use bgzip::BGZFReader;
-use log::info;
+use log::debug;
 
 pub fn get_reader(path: &str) -> anyhow::Result<Box<dyn std::io::Read>> {
-    info!("Reading: {path}");
+    debug!("Reading: {path}");
     let file_type = *path
         .split(".")
         .collect::<Vec<&str>>()

+ 1 - 1
src/lib.rs

@@ -29,7 +29,7 @@ mod tests {
 
     use annotation::{vep::{VepLine, VEP}, Annotations};
     use callers::{nanomonsv::nanomonsv_create_pon, savana::Savana, severus::{Severus, SeverusSolo}};
-    use collection::{bam::{counts_at, counts_ins_at, ins_pileup, nt_pileup}, Initialize, InitializeSolo, Version};
+    use collection::{bam::{counts_at, counts_ins_at, nt_pileup}, Initialize, InitializeSolo, Version};
     use commands::{longphase::{LongphaseConfig, LongphaseHap, LongphaseModcallSolo, LongphasePhase}, modkit::{bed_methyl, ModkitConfig}};
     use functions::assembler::{Assembler, AssemblerConfig};
     use helpers::estimate_shannon_entropy;

+ 8 - 6
src/pipes/somatic.rs

@@ -85,17 +85,16 @@ impl SomaticStats {
 impl Run for Somatic {
     fn run(&mut self) -> anyhow::Result<()> {
         let id = self.id.clone();
+        info!("Running somatic pipe for {id}.");
+        let config = self.config.clone();
+        let annotations = Arc::new(self.annotations.clone());
 
         // TODO: GZ !!!
         // LongphasePhase::initialize(&id, self.config.clone())?.run()?;
 
-        info!("Running somatic pipe for {id}.");
-
-        info!("Initialization...");
-        let config = self.config.clone();
-        let annotations = Arc::new(self.annotations.clone());
+        // Initalize variants collections
+        info!("Initialization of callers...");
 
-        // Loading variants collections
         let mut callers = init_somatic_callers!(&id, &config, ClairS, NanomonSV);
         callers.extend(init_solo_callers!(
             &id,
@@ -105,6 +104,9 @@ impl Run for Somatic {
             DeepVariant,
             "mrd"
         ));
+
+        // Loading
+        info!("Loading variants.");
         let mut variants_collections = load_variants(&mut callers, &annotations)?;
 
         let clairs_germline =

+ 12 - 4
src/variant/variant.rs

@@ -1,7 +1,11 @@
 use crate::{
-    annotation::Annotations, positions::{GenomePosition, GetGenomePosition, VcfPosition}, runners::Run, variant::variant_collection::VariantCollection
+    annotation::Annotations,
+    positions::{GenomePosition, GetGenomePosition, VcfPosition},
+    runners::Run,
+    variant::variant_collection::VariantCollection,
 };
 use anyhow::{anyhow, Context, Ok};
+use blake3::Hasher;
 use rayon::prelude::*;
 use serde::{Deserialize, Serialize};
 use std::{cmp::Ordering, collections::HashSet, fmt, hash::Hash, str::FromStr};
@@ -29,9 +33,13 @@ impl PartialEq for VcfVariant {
 
 impl Hash for VcfVariant {
     fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
-        self.position.hash(state);
-        self.reference.hash(state);
-        self.alternative.hash(state);
+        let mut hasher = Hasher::new();
+        hasher.update(&self.position.contig.to_ne_bytes()); // Convert position to bytes
+        hasher.update(&self.position.position.to_ne_bytes()); // Convert position to bytes
+        hasher.update(self.reference.to_string().as_bytes()); // Reference string as bytes
+        hasher.update(self.alternative.to_string().as_bytes()); // Alternative string as bytes
+        let hash = hasher.finalize();
+        state.write(&hash.as_bytes()[..16]);
     }
 }