Ver Fonte

dorado: added kit-name in basecaller and usage of kit-name from pod files instead of config

Thomas há 1 ano atrás
pai
commit
b5468f9176
2 ficheiros alterados com 29 adições e 23 exclusões
  1. 27 21
      src/commands/dorado.rs
  2. 2 2
      src/config.rs

+ 27 - 21
src/commands/dorado.rs

@@ -9,7 +9,12 @@ use duct::cmd;
 use log::{debug, info, warn};
 use uuid::Uuid;
 
-use crate::{collection::{bam::bam_compo, pod5::FlowCellCase}, config::Config, helpers::find_unique_file, io::pod5_infos::Pod5Info};
+use crate::{
+    collection::{bam::bam_compo, pod5::FlowCellCase},
+    config::Config,
+    helpers::find_unique_file,
+    io::pod5_infos::Pod5Info,
+};
 
 #[derive(Debug, Clone)]
 pub struct DoradoParams {
@@ -187,11 +192,10 @@ impl Dorado {
     }
 
     pub fn merge_bam(&self, bam: &Path) -> anyhow::Result<()> {
-        let composition_a: Vec<String> =
-            bam_compo(bam.to_string_lossy().as_ref(), 20000)?
-                .iter()
-                .map(|(i, _)| i.clone())
-                .collect();
+        let composition_a: Vec<String> = bam_compo(bam.to_string_lossy().as_ref(), 20000)?
+            .iter()
+            .map(|(i, _)| i.clone())
+            .collect();
         let composition_b: Vec<String> = bam_compo(&self.bam, 20000)?
             .iter()
             .map(|(i, _)| i.clone())
@@ -271,19 +275,6 @@ impl Dorado {
         let ref_mmi = &config.align.ref_mmi;
         let samtools_view_threads = config.align.samtools_view_threads;
 
-        let dorado = format!(
-            "{dorado_bin} basecaller {dorado_arg} {pod_dir} --trim all --reference {ref_mmi}"
-        );
-        let samtools_view =
-            format!("samtools view -h -@ {samtools_view_threads} -b -o {muxed_bam}");
-        let pipe = format!("{dorado} | {samtools_view}");
-        info!("Running: {pipe}");
-        let pipe_cmd = cmd!("bash", "-c", pipe);
-        pipe_cmd.run()?;
-
-        info!("Basecalling ✅");
-
-        // Demux the temporary bam file
         // Get the sequencing kit from the first pod5 file
         let muxed_pod_dir = &cases.first().unwrap().pod_dir;
         let pod_path = fs::read_dir(muxed_pod_dir)?
@@ -298,6 +289,21 @@ impl Dorado {
             .sequencing_kit
             .to_uppercase();
 
+
+        let dorado = format!(
+            "{dorado_bin} basecaller --kit-name {sequencing_kit} {dorado_arg} {pod_dir} --trim all --reference {ref_mmi}"
+        );
+        let samtools_view =
+            format!("samtools view -h -@ {samtools_view_threads} -b -o {muxed_bam}");
+        let pipe = format!("{dorado} | {samtools_view}");
+        info!("Running: {pipe}");
+        let pipe_cmd = cmd!("bash", "-c", pipe);
+        pipe_cmd.run()?;
+
+        info!("Basecalling ✅");
+
+        // Demux the temporary bam file
+                
         let tmp_demux_dir = format!("{tmp_dir}/demuxed");
         fs::create_dir(&tmp_demux_dir)?;
 
@@ -328,8 +334,8 @@ impl Dorado {
                 barcode
             );
             let pipe = format!(
-                "{} trim --sequencing-kit {} {bam} | samtools view -h -@ {} -b /dev/stdin -o {trimmed_bam}",
-                config.align.dorado_bin, config.align.dorado_sequencing_kit, &config.align.samtools_view_threads
+                "{} trim --sequencing-kit {sequencing_kit} {bam} | samtools view -h -@ {} -b /dev/stdin -o {trimmed_bam}",
+                config.align.dorado_bin, &config.align.samtools_view_threads
             );
 
             info!("Running: {pipe}");

+ 2 - 2
src/config.rs

@@ -124,7 +124,7 @@ impl Default for Config {
 pub struct AlignConfig {
     pub dorado_bin: String,
     pub dorado_basecall_arg: String,
-    pub dorado_sequencing_kit: String,
+    // pub dorado_sequencing_kit: String,
     pub ref_fa: String,
     pub ref_mmi: String,
     pub samtools_view_threads: u16,
@@ -136,7 +136,7 @@ impl Default for AlignConfig {
         Self {
             dorado_bin: "/data/tools/dorado-0.9.0-linux-x64/bin/dorado".to_string(),
             dorado_basecall_arg: "-x 'cuda:0,1,2,3' sup,5mC_5hmC".to_string(), // since v0.8.0 need
-            dorado_sequencing_kit: "SQK-LSK114".to_string(),
+            // dorado_sequencing_kit: "SQK-NBD114-24".to_string(),
             // to specify cuda devices (exclude the T1000)
             ref_fa: "/data/ref/hs1/chm13v2.0.fa".to_string(),
             ref_mmi: "/data/ref/chm13v2.0.mmi".to_string(),