Bladeren bron

acquisition ID from pileup

Thomas 1 jaar geleden
bovenliggende
commit
370a7c9ec0
4 gewijzigde bestanden met toevoegingen van 242 en 247 verwijderingen
  1. 196 236
      Cargo.lock
  2. 1 1
      src/bam.rs
  3. 32 8
      src/commands/dorado.rs
  4. 13 2
      src/lib.rs

File diff suppressed because it is too large
+ 196 - 236
Cargo.lock


+ 1 - 1
src/bam.rs

@@ -89,7 +89,7 @@ impl Bam {
             None
         };
 
-        let composition = pandora_lib_pileup::bam_compo(path.to_string_lossy().as_ref(), 10000)?;
+        let composition = pandora_lib_pileup::bam_compo(path.to_string_lossy().as_ref(), 20000)?;
 
         Ok(Self {
             path,

+ 32 - 8
src/commands/dorado.rs

@@ -74,9 +74,11 @@ impl Dorado {
 
     fn create_directories(&self) -> anyhow::Result<()> {
         if !std::path::Path::new(&self.case_dir).exists() {
+            info!("Creating directory {}", self.case_dir);
             fs::create_dir(&self.case_dir)?;
         }
         if !std::path::Path::new(&self.time_dir).exists() {
+            info!("Creating directory {}", self.time_dir);
             fs::create_dir(&self.time_dir)?;
         }
         Ok(())
@@ -197,6 +199,23 @@ impl Dorado {
     }
 
     fn merge_bam(&self, bam: &Path) -> anyhow::Result<()> {
+        let composition_a: Vec<String> =
+            pandora_lib_pileup::bam_compo(bam.to_string_lossy().as_ref(), 20000)?
+                .iter()
+                .map(|(i, _)| i.clone())
+                .collect();
+        let composition_b: Vec<String> = pandora_lib_pileup::bam_compo(&self.bam, 20000)?
+            .iter()
+            .map(|(i, _)| i.clone())
+            .collect();
+        let n_id = composition_a
+            .iter()
+            .filter(|id| composition_b.contains(id))
+            .count();
+        if n_id > 0 {
+            return Err(anyhow::anyhow!("Already merged"));
+        }
+
         let into = PathBuf::from(&self.bam);
         let dir = into.parent().unwrap();
 
@@ -241,6 +260,7 @@ impl Dorado {
         .run()?;
         fs::remove_file(tmp_original)?;
         fs::remove_file(tmp_original_i)?;
+        fs::remove_file(bam)?;
         self.index()?;
         Ok(())
     }
@@ -304,15 +324,13 @@ impl Dorado {
         info!("Demux ✅");
 
         for case in cases.iter() {
-            let bam = format!(
-                "{tmp_demux_dir}/{sequencing_kit}_barcode{}.bam",
-                case.barcode
-            );
+            let barcode = case.barcode.replace("NB", "");
+            let bam = format!("{tmp_demux_dir}/{sequencing_kit}_barcode{}.bam", barcode);
 
             // Trim
             let trimmed_bam = format!(
                 "{tmp_demux_dir}/{sequencing_kit}_barcode{}_trimmed.bam",
-                case.barcode
+                barcode
             );
             let pipe = format!(
                 "{} trim {bam} | samtools view -h -@ {} -b /dev/stdin -o {trimmed_bam}",
@@ -323,7 +341,7 @@ impl Dorado {
             // Align
             let aligned_bam = format!(
                 "{tmp_demux_dir}/{sequencing_kit}_barcode{}_aligned.bam",
-                case.barcode
+                barcode
             );
             let dorado = format!(
                 "{} aligner --threads 160 {} {trimmed_bam}",
@@ -357,7 +375,7 @@ impl Dorado {
             d.run_cramino()?;
             d.run_modkit()?;
         }
-        fs::remove_dir(tmp_dir)?;
+        fs::remove_dir_all(tmp_dir)?;
 
         Ok(())
     }
@@ -380,21 +398,27 @@ impl Dorado {
         let bam_path = std::path::Path::new(&self.bam);
 
         if !bam_path.exists() {
+            info!("Creating new bam file");
             self.basecall_align(dorado_bin)?;
         } else {
+            // check if merge before call
             let new_bam_path = bam_path
                 .parent()
                 .unwrap()
                 .join(format!("{}.bam", Uuid::new_v4()));
             warn!("Creating new bam {}", new_bam_path.display());
 
+            let bam = self.bam.clone();
+            self.bam = new_bam_path.clone().to_string_lossy().to_string();
             self.basecall_align(dorado_bin)?;
+            self.bam.clone_from(&bam);
             self.merge_bam(&new_bam_path)?;
         }
 
+        self.index()?;
         self.run_cramino()?;
         self.run_modkit()?;
-        self.create_fastq()?;
+        // self.create_fastq()?;
 
         let end_time = std::time::SystemTime::now();
         self.end_time = end_time;

+ 13 - 2
src/lib.rs

@@ -3,7 +3,7 @@ use std::{collections::HashMap, path::PathBuf};
 use bam::BamCollection;
 use commands::dorado::{Dorado, Run};
 use config::Config;
-use log::info;
+use log::{info, warn};
 use pod5::{FlowCellCase, Pod5Collection, Pod5Type};
 use vcf::VcfCollection;
 
@@ -82,12 +82,23 @@ impl Collections {
     }
 
     pub fn run(&mut self) -> anyhow::Result<()> {
+        self.tasks.reverse();
         if self.tasks.is_empty() {
             self.todo();
-            self.run()?;
+            if self.tasks.is_empty() {
+                return Ok(());
+            } else {
+                self.run()?;
+            }
         } else {
+            let n_tasks = self.tasks.len();
+            warn!("{n_tasks} tasks to run");
+            let mut i = 1;
             while let Some(task) = self.tasks.pop() {
+                warn!("Running {i}/{n_tasks}");
+                info!("{task:#?}");
                 task.run()?;
+                i += 1;
             }
         }
         Ok(())

Some files were not shown because too many files changed in this diff