Explorar o código

todo_variants_agg

Thomas hai 1 ano
pai
achega
f56aa5599c
Modificáronse 5 ficheiros con 137 adicións e 29 borrados
  1. 68 19
      Cargo.lock
  2. 2 2
      Cargo.toml
  3. 66 4
      src/collection/mod.rs
  4. 0 4
      src/commands/dorado.rs
  5. 1 0
      src/functions/assembler.rs

+ 68 - 19
Cargo.lock

@@ -714,6 +714,12 @@ version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d2c54ff287cfc0a34f38a6b832ea1bd8e448a330b3e40a50859e6488bee07f22"
 
+[[package]]
+name = "bit-vec"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
+
 [[package]]
 name = "bit_field"
 version = "0.10.2"
@@ -1592,6 +1598,12 @@ dependencies = [
  "spin",
 ]
 
+[[package]]
+name = "foldhash"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2"
+
 [[package]]
 name = "fontconfig-parser"
 version = "0.5.7"
@@ -1846,6 +1858,18 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "hashbrown"
+version = "0.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb"
+dependencies = [
+ "allocator-api2",
+ "equivalent",
+ "foldhash",
+ "rayon",
+]
+
 [[package]]
 name = "hashlink"
 version = "0.9.1"
@@ -2480,9 +2504,9 @@ dependencies = [
  "byteorder",
  "bytes",
  "indexmap",
- "noodles-bgzf",
+ "noodles-bgzf 0.32.0",
  "noodles-core",
- "noodles-csi",
+ "noodles-csi 0.37.0",
  "noodles-sam",
 ]
 
@@ -2498,6 +2522,18 @@ dependencies = [
  "flate2",
 ]
 
+[[package]]
+name = "noodles-bgzf"
+version = "0.33.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b50aaa8f0a3c8a0b738b641a6d1a78d9fd30a899ab2d398779ee3c4eb80f1c1"
+dependencies = [
+ "byteorder",
+ "bytes",
+ "crossbeam-channel",
+ "flate2",
+]
+
 [[package]]
 name = "noodles-core"
 version = "0.15.0"
@@ -2513,10 +2549,23 @@ version = "0.37.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e4bc8001c54f1d8e47e1ac6041a5f27edc99b68bacea3fade9c89059de285aea"
 dependencies = [
- "bit-vec",
+ "bit-vec 0.7.0",
  "byteorder",
  "indexmap",
- "noodles-bgzf",
+ "noodles-bgzf 0.32.0",
+ "noodles-core",
+]
+
+[[package]]
+name = "noodles-csi"
+version = "0.39.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0f41004636fb4232155421cbf4706565073623838a8252875085fa670b8185c"
+dependencies = [
+ "bit-vec 0.8.0",
+ "byteorder",
+ "indexmap",
+ "noodles-bgzf 0.33.0",
  "noodles-core",
 ]
 
@@ -2529,7 +2578,7 @@ dependencies = [
  "bstr",
  "bytes",
  "memchr",
- "noodles-bgzf",
+ "noodles-bgzf 0.32.0",
  "noodles-core",
 ]
 
@@ -2542,7 +2591,7 @@ dependencies = [
  "bstr",
  "bytes",
  "memchr",
- "noodles-bgzf",
+ "noodles-bgzf 0.32.0",
  "noodles-core",
 ]
 
@@ -2553,9 +2602,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "adef59012090b5694b58cad0e4426cd18af404803f942d02e664af607d89ee28"
 dependencies = [
  "indexmap",
- "noodles-bgzf",
+ "noodles-bgzf 0.32.0",
  "noodles-core",
- "noodles-csi",
+ "noodles-csi 0.37.0",
  "percent-encoding",
 ]
 
@@ -2570,9 +2619,9 @@ dependencies = [
  "indexmap",
  "lexical-core",
  "memchr",
- "noodles-bgzf",
+ "noodles-bgzf 0.32.0",
  "noodles-core",
- "noodles-csi",
+ "noodles-csi 0.37.0",
 ]
 
 [[package]]
@@ -2581,12 +2630,12 @@ version = "0.43.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "545e16e229b7f8734b0a2a36bd4c98a5b70128663b16b5201ddadc0d09c28d4a"
 dependencies = [
- "bit-vec",
+ "bit-vec 0.7.0",
  "byteorder",
  "indexmap",
- "noodles-bgzf",
+ "noodles-bgzf 0.32.0",
  "noodles-core",
- "noodles-csi",
+ "noodles-csi 0.37.0",
 ]
 
 [[package]]
@@ -2597,9 +2646,9 @@ checksum = "c6f64c43315f757fe42ae014cf83996698cc9e47388080db165d0eb7b5f74092"
 dependencies = [
  "indexmap",
  "memchr",
- "noodles-bgzf",
+ "noodles-bgzf 0.32.0",
  "noodles-core",
- "noodles-csi",
+ "noodles-csi 0.37.0",
  "noodles-tabix",
  "percent-encoding",
 ]
@@ -2914,14 +2963,14 @@ dependencies = [
  "env_logger 0.11.5",
  "expectrl",
  "glob",
- "hashbrown 0.14.5",
+ "hashbrown 0.15.0",
  "indicatif",
  "lazy_static",
  "locale_config",
  "log",
  "logtest",
  "nix 0.29.0",
- "noodles-csi",
+ "noodles-csi 0.39.0",
  "num-format",
  "pandora_lib_assembler",
  "pandora_lib_bindings",
@@ -2983,9 +3032,9 @@ dependencies = [
  "indicatif-log-bridge",
  "log",
  "noodles-bam",
- "noodles-bgzf",
+ "noodles-bgzf 0.32.0",
  "noodles-core",
- "noodles-csi",
+ "noodles-csi 0.37.0",
  "noodles-fasta 0.42.0",
  "noodles-gff",
  "noodles-sam",

+ 2 - 2
Cargo.toml

@@ -23,7 +23,7 @@ tracing-test = "0.2.5"
 tracing = "0.1.40"
 logtest = "2.0.0"
 test-log = "0.2.16"
-noodles-csi = "0.37.0"
+noodles-csi = "0.39.0"
 num-format = "0.4.4"
 locale_config = "0.3.0"
 byte-unit = "5.1.4"
@@ -34,7 +34,7 @@ ptyprocess = "0.4.1"
 duct = "0.13.7"
 uuid = { version = "1.10.0", features = ["v4"] }
 rayon = "1.10.0"
-hashbrown = { version = "0.14.5", features = ["rayon"] }
+hashbrown = { version = "0.15.0", features = ["rayon"] }
 ctrlc = "3.4.4"
 lazy_static = "1.5.0"
 indicatif = "0.17.8"

+ 66 - 4
src/collection/mod.rs

@@ -2,6 +2,7 @@ use std::{
     collections::HashMap,
     fmt,
     fs::{self, metadata},
+    os::unix::fs::MetadataExt,
     path::{Path, PathBuf},
     time::SystemTime,
 };
@@ -268,12 +269,16 @@ impl Collections {
         // de novo
         tasks.extend(self.todo_assembler()?);
 
-        // Tasks sorting and dedup
+        // Tasks sorting
+        tasks.sort_by_key(|task| task.get_order());
+
+        // Tasks dedup
         let mut hs = HashMap::new();
         tasks.into_iter().for_each(|t| {
             hs.insert(t.to_string(), t);
         });
         self.tasks = hs.into_values().collect();
+
         Ok(())
     }
 
@@ -285,7 +290,8 @@ impl Collections {
         self.tasks = hs.into_values().collect();
     }
 
-    pub fn todo_assembler(&mut self) -> anyhow::Result<Vec<CollectionsTasks>> {
+    // No pair needed
+    pub fn todo_assembler(&self) -> anyhow::Result<Vec<CollectionsTasks>> {
         let mut tasks = Vec::new();
         let config = AssemblerConfig::default();
         for b in &self.bam.bams {
@@ -333,6 +339,64 @@ impl Collections {
         Ok(tasks)
     }
 
+    pub fn bam_pairs(&self) -> Vec<(bam::Bam, bam::Bam)> {
+        let mut ids: Vec<String> = self.bam.bams.iter().map(|b| b.id.clone()).collect();
+        ids.sort();
+        ids.dedup();
+
+        ids.iter()
+            .filter_map(|id| {
+                match (
+                    self.bam.get(id, "diag").first(),
+                    self.bam.get(id, "mrd").first(),
+                ) {
+                    (Some(&diag), Some(&mrd)) => Some((diag.clone(), mrd.clone())),
+                    _ => None,
+                }
+            })
+            .collect()
+    }
+
+    // UNTESTED
+    pub fn todo_variants_agg(&self) -> anyhow::Result<Vec<CollectionsTasks>> {
+        let mut tasks = Vec::new();
+        let config = VariantsConfig::default();
+        let vcfs_ids = self.vcf.group_by_id();
+        for pair in &self.bam_pairs() {
+            let const_path = format!(
+                "{}/{}/diag/{}_constit.bytes.gz",
+                &config.result_dir, &pair.0.id, &pair.0.id
+            );
+            let constit = Path::new(&const_path);
+
+            if constit.exists() {
+                let vcfs: Vec<_> = vcfs_ids.iter().filter(|(id, _)| id == &pair.0.id).collect();
+                if let Some((_, vcfs)) = vcfs.first() {
+                    let mtime = constit
+                        .metadata()
+                        .context(format!("Can't access file metadata {const_path}."))?
+                        .mtime();
+                    let n_new = vcfs
+                        .iter()
+                        .filter(|vcf| mtime < vcf.file_metadata.mtime())
+                        .count();
+                    if n_new > 0 {
+                        tasks.push(CollectionsTasks::Variants {
+                            id: pair.0.id.clone(),
+                            config: config.clone(),
+                        });
+                    }
+                }
+            } else {
+                tasks.push(CollectionsTasks::Variants {
+                    id: pair.0.id.clone(),
+                    config: config.clone(),
+                });
+            }
+        }
+        Ok(tasks)
+    }
+
     pub fn run(&mut self) -> anyhow::Result<()> {
         // self.tasks.reverse();
         if self.tasks.is_empty() {
@@ -344,8 +408,6 @@ impl Collections {
             }
         } else {
             let n_tasks = self.tasks.len();
-            let mut tasks = self.tasks.clone();
-            tasks.sort_by_key(|task| task.get_order());
             warn!("{n_tasks} tasks to run");
             let mut i = 1;
             while let Some(task) = self.tasks.pop() {

+ 0 - 4
src/commands/dorado.rs

@@ -143,7 +143,6 @@ impl Dorado {
             "{}/{}_{}_hs1_cramino.txt",
             self.time_dir, self.case.id, self.case.time_point
         );
-        // if !Path::new(&cramino_out).exists() {
         info!("Quality control of BAM: {}", self.bam);
         let output = duct::cmd!(
             "cramino",
@@ -157,7 +156,6 @@ impl Dorado {
         .run()?;
 
         fs::write(cramino_out, output.stdout)?;
-        // }
         Ok(())
     }
 
@@ -166,7 +164,6 @@ impl Dorado {
             "{}/{}_{}_5mC_5hmC_summary.txt",
             self.time_dir, self.case.id, self.case.time_point
         );
-        // if !Path::new(&mod_summary).exists() {
         info!("Generating base modification summary for BAM: {}", self.bam);
         let output = cmd!("modkit", "summary", "-t", "50", &self.bam)
             .stdout_capture()
@@ -174,7 +171,6 @@ impl Dorado {
             .run()?;
 
         fs::write(mod_summary, output.stdout)?;
-        // }
         Ok(())
     }
 

+ 1 - 0
src/functions/assembler.rs

@@ -40,6 +40,7 @@ impl Assembler {
             config,
         }
     }
+
     pub fn run(&self) -> anyhow::Result<()> {
         let case_dir = format!("{}/{}/{}", self.config.result_dir, self.id, self.time_point);
         let scan_reads_dir = format!("{case_dir}/{}/reads", self.config.scan_dir_name);