Thomas 6 сар өмнө
parent
commit
f4fddded08

+ 1 - 1
src/collection/flowcells.rs

@@ -320,7 +320,7 @@ impl FlowCells {
                         sheet,
                         pore,
                         throughput,
-                        FlowCellLocation::Archived(archive_dir.to_string()),
+                        FlowCellLocation::Archived(path.to_string()),
                         files,
                     ) {
                         Ok(fc) => {

+ 71 - 6
src/commands/dorado.rs

@@ -1,5 +1,5 @@
 use std::{
-    fs,
+    fs::{self, File},
     io::{Read, Write},
     path::{Path, PathBuf},
     time::SystemTime,
@@ -10,7 +10,11 @@ use log::{debug, info, warn};
 use uuid::Uuid;
 
 use crate::{
-    collection::{bam::bam_compo, pod5::FlowCellCase},
+    collection::{
+        bam::bam_compo,
+        flowcells::{FlowCell, IdInput},
+        pod5::FlowCellCase,
+    },
     config::Config,
     helpers::find_unique_file,
     io::pod5_infos::Pod5Info,
@@ -89,15 +93,29 @@ impl Dorado {
     }
 
     fn basecall_align(&mut self, dorado_bin: &str) -> anyhow::Result<()> {
-        let pod_dir = &self.case.pod_dir.display();
+        let pod_dir = &self.case.pod_dir;
         let ref_mmi = &self.config.align.ref_mmi;
         let bam = &self.bam;
         let samtools_view_threads = self.config.align.samtools_view_threads;
         let samtools_sort_threads = self.config.align.samtools_sort_threads;
         let dorado_arg = self.config.align.dorado_basecall_arg.clone();
 
+        let pod_path = fs::read_dir(pod_dir)
+            .map_err(|e| anyhow::anyhow!("Failed to read pod5 dir: {}.\n\t{e}", pod_dir.display()))?
+            .filter_map(|p| p.ok())
+            .map(|p| p.path())
+            .filter(|p| p.extension().unwrap() == "pod5")
+            .take(1)
+            .collect::<Vec<PathBuf>>()
+            .pop()
+            .unwrap();
+        let sequencing_kit = Pod5Info::from_pod5(pod_path.to_str().unwrap())
+            .sequencing_kit
+            .to_uppercase();
+
         let dorado = format!(
-            "{dorado_bin} basecaller {dorado_arg} {pod_dir} --trim all --reference {ref_mmi} "
+            "{dorado_bin} basecaller --kit-name {sequencing_kit} {dorado_arg} {} --trim all --emit-moves --reference {ref_mmi} ",
+            pod_dir.display()
         );
         info!("running Dorado: {dorado}");
         let samtools_view = format!("samtools view -h -@ {samtools_view_threads} -b /dev/stdin");
@@ -280,7 +298,12 @@ impl Dorado {
         // Get the sequencing kit from the first pod5 file
         let muxed_pod_dir = &cases.first().unwrap().pod_dir;
         let pod_path = fs::read_dir(muxed_pod_dir)
-            .map_err(|e| anyhow::anyhow!("Failed to read pod5 dir: {}.\n\t{e}", muxed_pod_dir.display()))?
+            .map_err(|e| {
+                anyhow::anyhow!(
+                    "Failed to read pod5 dir: {}.\n\t{e}",
+                    muxed_pod_dir.display()
+                )
+            })?
             .filter_map(|p| p.ok())
             .map(|p| p.path())
             .filter(|p| p.extension().unwrap() == "pod5")
@@ -300,7 +323,9 @@ impl Dorado {
         let pipe = format!("{dorado} | {samtools_view}");
         info!("Running: {pipe}");
         let pipe_cmd = cmd!("bash", "-c", &pipe);
-        pipe_cmd.run().map_err(|e| anyhow::anyhow!("Failed to run pipe: {pipe}.\n\t{}", e.to_string()))?;
+        pipe_cmd
+            .run()
+            .map_err(|e| anyhow::anyhow!("Failed to run pipe: {pipe}.\n\t{}", e.to_string()))?;
 
         info!("Basecalling ✅");
 
@@ -420,4 +445,44 @@ impl Dorado {
 
         Ok(())
     }
+
+    pub fn from_flowcell(flowcell: &FlowCell, config: &Config) -> anyhow::Result<()> {
+        let pod_dir = match &flowcell.location {
+            crate::collection::flowcells::FlowCellLocation::Local(pod_dir) => pod_dir,
+            crate::collection::flowcells::FlowCellLocation::Archived(pod_tar) => {
+                let file = File::open(pod_tar)
+                    .map_err(|e| anyhow::anyhow!("Failed to open tar file: {pod_tar}\n\t{e}"))?;
+                let mut archive = tar::Archive::new(file);
+                archive
+                    .unpack(&config.unarchive_tmp_dir)
+                    .map_err(|e| anyhow::anyhow!("Failed to un-tar: {pod_tar}\n\t{e}"))?;
+                // find
+                ""
+            }
+        };
+
+        // detect demuxed
+        let mut demuxed_pods = Vec::new();
+        for entry in fs::read_dir(pod_dir)? {
+            let entry = entry?;
+            let ft = entry.file_type()?;
+            if ft.is_dir() {
+                let fname = entry.file_name().into_string().unwrap();
+                if fname.contains("barcode") {
+                    if let Some(case) = flowcell.cases.iter().find_map(|c| {
+                        if c.barcode.replace("NB", "") == fname.replace("barcode", "") {
+                            Some(c.clone())
+                        } else {
+                            None
+                        }
+                    }) {
+                        demuxed_pods.push((entry.path(), case));
+                    }
+                }
+            }
+            todo!();
+        }
+
+        Ok(())
+    }
 }

+ 2 - 0
src/config.rs

@@ -2,6 +2,7 @@
 pub struct Config {
     pub pod_dir: String,
     pub result_dir: String,
+    pub unarchive_tmp_dir: String,
     pub align: AlignConfig,
     pub reference: String,
     pub reference_name: String,
@@ -105,6 +106,7 @@ impl Default for Config {
 
             // File structure
             result_dir: "/data/longreads_basic_pipe".to_string(),
+            unarchive_tmp_dir: "/data/unarchived".to_string(),
 
             tumoral_name: "diag".to_string(),
             normal_name: "mrd".to_string(),