|
|
@@ -4,17 +4,233 @@ use std::{
|
|
|
time::SystemTime,
|
|
|
};
|
|
|
|
|
|
+use anyhow::Context;
|
|
|
use duct::cmd;
|
|
|
use log::{debug, info, warn};
|
|
|
use uuid::Uuid;
|
|
|
|
|
|
use crate::{
|
|
|
collection::{bam::bam_composition, flowcells::FlowCell, pod5::FlowCellCase},
|
|
|
+ commands::Command,
|
|
|
config::Config,
|
|
|
helpers::find_unique_file,
|
|
|
io::pod5_infos::Pod5Info,
|
|
|
};
|
|
|
|
|
|
+#[derive(Debug)]
|
|
|
+pub struct DoradoBasecall {
|
|
|
+ dorado: PathBuf,
|
|
|
+ output_bam: PathBuf,
|
|
|
+ pod5_dir: PathBuf,
|
|
|
+ sequencing_kit: String,
|
|
|
+ dorado_basecall_arg: String,
|
|
|
+}
|
|
|
+
|
|
|
+impl DoradoBasecall {
|
|
|
+ pub fn from_config(config: &Config, pod5_dir: PathBuf, output_bam: PathBuf) -> Self {
|
|
|
+ Self {
|
|
|
+ dorado: (&config.align.dorado_bin).into(),
|
|
|
+ output_bam,
|
|
|
+ pod5_dir,
|
|
|
+ sequencing_kit: String::new(),
|
|
|
+ dorado_basecall_arg: config.align.dorado_basecall_arg.clone(),
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl Command for DoradoBasecall {
|
|
|
+ fn init(&mut self) -> anyhow::Result<()> {
|
|
|
+ if !self.pod5_dir.exists() || !self.pod5_dir.is_dir() {
|
|
|
+ anyhow::bail!(
|
|
|
+ "The pod5 dir is not accessible.\n{}",
|
|
|
+ self.pod5_dir.display()
|
|
|
+ );
|
|
|
+ }
|
|
|
+
|
|
|
+ if self.output_bam.exists() {
|
|
|
+ anyhow::bail!("The output BAM file already exists.");
|
|
|
+ }
|
|
|
+
|
|
|
+ let pod_path = fs::read_dir(&self.pod5_dir)
|
|
|
+ .context(format!(
|
|
|
+ "Failed to read pod5 dir: {}",
|
|
|
+ self.pod5_dir.display()
|
|
|
+ ))?
|
|
|
+ .filter_map(Result::ok) // keep only Ok entries
|
|
|
+ .map(|e| e.path())
|
|
|
+ .find(|p| p.extension().and_then(|e| e.to_str()) == Some("pod5"))
|
|
|
+ .context("No .pod5 file found")?;
|
|
|
+
|
|
|
+ self.sequencing_kit = Pod5Info::from_pod5(&pod_path.to_string_lossy())
|
|
|
+ .sequencing_kit
|
|
|
+ .to_uppercase();
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+
|
|
|
+ fn cmd(&self) -> String {
|
|
|
+ let dorado_bin = &self.dorado;
|
|
|
+ let pod_dir = &self.pod5_dir;
|
|
|
+ let bam = &self.output_bam;
|
|
|
+ let dorado_arg = &self.dorado_basecall_arg;
|
|
|
+ let sequencing_kit = &self.sequencing_kit;
|
|
|
+
|
|
|
+ format!(
|
|
|
+ "{} basecaller --kit-name {sequencing_kit} {dorado_arg} {} --trim all --emit-moves > {}",
|
|
|
+ dorado_bin.display(),
|
|
|
+ pod_dir.display(),
|
|
|
+ bam.display()
|
|
|
+ )
|
|
|
+
|
|
|
+ // let samtools_view = format!(
|
|
|
+ // "{} view -h -@ {samtools_view_threads} -b /dev/stdin",
|
|
|
+ // samtools.display()
|
|
|
+ // );
|
|
|
+ // let samtools_sort = format!(
|
|
|
+ // "{} sort -@ {samtools_sort_threads} /dev/stdin -o {}",
|
|
|
+ // samtools.display(),
|
|
|
+ // bam.display()
|
|
|
+ // );
|
|
|
+ //
|
|
|
+ // // format!("{dorado} | {samtools_view} | {samtools_sort}")
|
|
|
+ // format!("{dorado} | {samtools_view} > {}", bam.display())
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+#[derive(Debug)]
|
|
|
+pub struct DoradoAlign {
|
|
|
+ pub dorado: PathBuf,
|
|
|
+ pub reference: PathBuf,
|
|
|
+ pub input: PathBuf,
|
|
|
+ pub output: PathBuf,
|
|
|
+ pub threads: u8,
|
|
|
+}
|
|
|
+
|
|
|
+impl DoradoAlign {
|
|
|
+ pub fn from_config(config: &Config, input: PathBuf, output: PathBuf) -> Self {
|
|
|
+ Self {
|
|
|
+ dorado: (&config.align.dorado_bin).into(),
|
|
|
+ reference: (&config.align.ref_fa).into(),
|
|
|
+ input,
|
|
|
+ output,
|
|
|
+ threads: config.align.dorado_aligner_threads,
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl super::Command for DoradoAlign {
|
|
|
+ fn init(&mut self) -> anyhow::Result<()> {
|
|
|
+ if !self.input.exists() {
|
|
|
+ anyhow::bail!(
|
|
|
+ "The input BAM file is not accessible.\n{}",
|
|
|
+ self.input.display()
|
|
|
+ );
|
|
|
+ }
|
|
|
+
|
|
|
+ if self.output.exists() {
|
|
|
+ anyhow::bail!(
|
|
|
+ "The output BAM file already exists.\n{}",
|
|
|
+ self.output.display()
|
|
|
+ );
|
|
|
+ }
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+ fn cmd(&self) -> String {
|
|
|
+ format!(
|
|
|
+ "{} aligner --threads {} --allow-sec-supp --mm2-opts '--secondary yes' {} {} > {}",
|
|
|
+ self.dorado.display(),
|
|
|
+ self.threads,
|
|
|
+ self.reference.display(),
|
|
|
+ self.input.display(),
|
|
|
+ self.output.display()
|
|
|
+ )
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl super::SlurmRunner for DoradoAlign {
|
|
|
+ fn slurm_args(&self) -> Vec<String> {
|
|
|
+ super::SlurmParams {
|
|
|
+ job_name: Some("dorado_align".into()),
|
|
|
+ cpus_per_task: Some(self.threads.into()),
|
|
|
+ mem: Some("60G".into()),
|
|
|
+ partition: Some("gpgpuq".into()),
|
|
|
+ gres: None,
|
|
|
+ }
|
|
|
+ .to_args()
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// Running with Slurm: srun --job-name=dorado_basecall --cpus-per-task=X --mem=60G --partition=gpgpuq --gres=gpu:h100:4 bash -c /mnt/beegfs02/scratch/t_steimle/tools/dorado-latest-linux-x64/bin/dorado basecaller --kit-name SQK-NBD114-24 -x 'cuda:all' sup,5mC_5hmC /mnt/beegfs02/scratch/t_steimle/test_data/inputs/pod5/A --trim all --emit-moves > /mnt/beegfs02/scratch/t_steimle/test_data/outputs/aligned_5.bam
|
|
|
+// 04 cpu Basecalled @ Samples/s: 5.359770e+07
|
|
|
+// 05 cpu Basecalled @ Samples/s: 6.155305e+07
|
|
|
+// 06 cpu Basecalled @ Samples/s: 6.870292e+07
|
|
|
+// 07 cpu Basecalled @ Samples/s: 7.230430e+07
|
|
|
+// 08 cpu Basecalled @ Samples/s: 7.669054e+07
|
|
|
+// 10 cpu Basecalled @ Samples/s: 8.398348e+07
|
|
|
+// 15 cpu Basecalled @ Samples/s: 8.776285e+07
|
|
|
+impl super::SlurmRunner for DoradoBasecall {
|
|
|
+ fn slurm_args(&self) -> Vec<String> {
|
|
|
+ super::SlurmParams {
|
|
|
+ job_name: Some("dorado_basecall".into()),
|
|
|
+ cpus_per_task: Some(10),
|
|
|
+ mem: Some("60G".into()),
|
|
|
+ partition: Some("gpgpuq".into()),
|
|
|
+ gres: Some("gpu:h100:4".into()),
|
|
|
+ }
|
|
|
+ .to_args()
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+#[cfg(test)]
|
|
|
+mod tests {
|
|
|
+ use super::*;
|
|
|
+ use log::info;
|
|
|
+ use crate::TEST_DIR;
|
|
|
+
|
|
|
+ use crate::{commands::SlurmRunner, config::Config, helpers::test_init};
|
|
|
+
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn dorado_basecall() -> anyhow::Result<()> {
|
|
|
+ test_init();
|
|
|
+
|
|
|
+ let config = Config::default();
|
|
|
+
|
|
|
+ let mut dca = DoradoBasecall::from_config(
|
|
|
+ &config,
|
|
|
+ format!("{}/inputs/pod5/A", TEST_DIR.as_str()).into(),
|
|
|
+ format!("{}/outputs/unaligned_10.bam", TEST_DIR.as_str()).into(),
|
|
|
+ );
|
|
|
+
|
|
|
+ info!("Basecalling");
|
|
|
+ let out = SlurmRunner::run(&mut dca)?;
|
|
|
+
|
|
|
+ println!("{out:#?}");
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn dorado_align() -> anyhow::Result<()> {
|
|
|
+ test_init();
|
|
|
+
|
|
|
+ let config = Config::default();
|
|
|
+
|
|
|
+ let mut dca = DoradoAlign::from_config(
|
|
|
+ &config,
|
|
|
+ format!("{}/outputs/unaligned_10.bam", TEST_DIR.as_str()).into(),
|
|
|
+ format!("{}/outputs/10_hs1_sorted.bam", TEST_DIR.as_str()).into(),
|
|
|
+ );
|
|
|
+
|
|
|
+ info!("Basecalling");
|
|
|
+ let _out = SlurmRunner::run(&mut dca)?;
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
#[derive(Debug, Clone)]
|
|
|
pub struct DoradoParams {
|
|
|
pub ref_fa: String,
|
|
|
@@ -58,37 +274,39 @@ impl Dorado {
|
|
|
})
|
|
|
}
|
|
|
|
|
|
- fn create_reference_mmi(&self) -> anyhow::Result<()> {
|
|
|
- if !std::path::Path::new(&self.config.align.ref_mmi).exists() {
|
|
|
- cmd!(
|
|
|
- "minimap2",
|
|
|
- "-x",
|
|
|
- "map-ont",
|
|
|
- "-d",
|
|
|
- &self.config.align.ref_mmi,
|
|
|
- &self.config.align.ref_fa
|
|
|
- )
|
|
|
- .run()?;
|
|
|
- }
|
|
|
+ // ------------------------------------------------------------------
|
|
|
+ // Small helper to actually execute a shell command
|
|
|
+ // ------------------------------------------------------------------
|
|
|
+ fn run_shell(cmdline: &str) -> anyhow::Result<()> {
|
|
|
+ info!("Running: {cmdline}");
|
|
|
+ cmd!("bash", "-c", cmdline)
|
|
|
+ .run()
|
|
|
+ .map_err(|e| anyhow::anyhow!("Failed to run: {cmdline}\n\t{}", e.to_string()))?;
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
- fn create_directories(&self) -> anyhow::Result<()> {
|
|
|
- if !std::path::Path::new(&self.case_dir).exists() {
|
|
|
- info!("Creating directory {}", self.case_dir);
|
|
|
- fs::create_dir(&self.case_dir)?;
|
|
|
- }
|
|
|
- if !std::path::Path::new(&self.time_dir).exists() {
|
|
|
- info!("Creating directory {}", self.time_dir);
|
|
|
- fs::create_dir(&self.time_dir)?;
|
|
|
+ // ------------------------------------------------------------------
|
|
|
+ // Command builders (return strings)
|
|
|
+ // ------------------------------------------------------------------
|
|
|
+
|
|
|
+ /// minimap2 index creation (returns None if index already exists)
|
|
|
+ fn create_reference_mmi_cmd(&self) -> Option<String> {
|
|
|
+ if std::path::Path::new(&self.config.align.ref_mmi).exists() {
|
|
|
+ None
|
|
|
+ } else {
|
|
|
+ Some(format!(
|
|
|
+ "minimap2 -x map-ont -d {} {}",
|
|
|
+ self.config.align.ref_mmi, self.config.align.ref_fa
|
|
|
+ ))
|
|
|
}
|
|
|
- Ok(())
|
|
|
}
|
|
|
|
|
|
- fn basecall_align(&mut self, dorado_bin: &str) -> anyhow::Result<()> {
|
|
|
+ /// Dorado + samtools pipeline for basecalling + alignment
|
|
|
+ fn basecall_align_cmd(&self, dorado_bin: &str) -> anyhow::Result<String> {
|
|
|
let pod_dir = &self.case.pod_dir;
|
|
|
- let ref_mmi = &self.config.align.ref_mmi;
|
|
|
+ let ref_fa = &self.config.align.ref_fa;
|
|
|
let bam = &self.bam;
|
|
|
+ let samtools = &self.config.align.samtools_bin;
|
|
|
let samtools_view_threads = self.config.align.samtools_view_threads;
|
|
|
let samtools_sort_threads = self.config.align.samtools_sort_threads;
|
|
|
let dorado_arg = self.config.align.dorado_basecall_arg.clone();
|
|
|
@@ -102,72 +320,164 @@ impl Dorado {
|
|
|
.collect::<Vec<PathBuf>>()
|
|
|
.pop()
|
|
|
.unwrap();
|
|
|
+
|
|
|
let sequencing_kit = Pod5Info::from_pod5(pod_path.to_str().unwrap())
|
|
|
.sequencing_kit
|
|
|
.to_uppercase();
|
|
|
|
|
|
let dorado = format!(
|
|
|
- "{dorado_bin} basecaller --kit-name {sequencing_kit} {dorado_arg} {} --trim all --emit-moves --reference {ref_mmi} ",
|
|
|
+ "{dorado_bin} basecaller --kit-name {sequencing_kit} {dorado_arg} {} --trim all --emit-moves --reference {ref_fa}",
|
|
|
pod_dir.display()
|
|
|
);
|
|
|
- info!("running Dorado: {dorado}");
|
|
|
- let samtools_view = format!("samtools view -h -@ {samtools_view_threads} -b /dev/stdin");
|
|
|
- let samtools_sort = format!("samtools sort -@ {samtools_sort_threads} /dev/stdin -o {bam}");
|
|
|
- let pipe = format!("{dorado} | {samtools_view} | {samtools_sort}");
|
|
|
- info!("Running: {pipe}");
|
|
|
-
|
|
|
- let pipe_cmd = cmd!("bash", "-c", &pipe);
|
|
|
- pipe_cmd
|
|
|
- .run()
|
|
|
- .map_err(|e| anyhow::anyhow!("Failed to run pipe: {pipe}.\n\t{}", e.to_string()))?;
|
|
|
+ info!("Dorado command: {dorado}");
|
|
|
|
|
|
- // let pipe_cmd = cmd!("bash", "-c", pipe);
|
|
|
- // let mut reader = pipe_cmd.stderr_capture().reader()?;
|
|
|
- //
|
|
|
- // let mut buffer = [0; 1];
|
|
|
- // let mut line = String::new();
|
|
|
- //
|
|
|
- // loop {
|
|
|
- // match reader.read(&mut buffer) {
|
|
|
- // Ok(0) => break, // End of output
|
|
|
- // Ok(_) => {
|
|
|
- // let char = buffer[0] as char;
|
|
|
- // eprint!("-{}", char);
|
|
|
- // std::io::stderr().flush()?;
|
|
|
- //
|
|
|
- // if char == '\n' {
|
|
|
- // // Send the complete line
|
|
|
- // self.log.push(line.clone());
|
|
|
- // line.clear();
|
|
|
- // } else {
|
|
|
- // line.push(char);
|
|
|
- // }
|
|
|
- // }
|
|
|
- // Err(err) => {
|
|
|
- // warn!("Error reading from stderr: {}", err);
|
|
|
- // break;
|
|
|
- // }
|
|
|
- // }
|
|
|
- // }
|
|
|
+ let samtools_view = format!("{samtools} view -h -@ {samtools_view_threads} -b /dev/stdin");
|
|
|
+ let samtools_sort =
|
|
|
+ format!("{samtools} sort -@ {samtools_sort_threads} /dev/stdin -o {bam}");
|
|
|
|
|
|
- Ok(())
|
|
|
+ Ok(format!("{dorado} | {samtools_view} | {samtools_sort}"))
|
|
|
}
|
|
|
|
|
|
- pub fn index(&self) -> anyhow::Result<()> {
|
|
|
+ /// samtools index command
|
|
|
+ fn index_cmd(&self) -> String {
|
|
|
let t = self.config.align.samtools_view_threads.to_string();
|
|
|
- let cmd = format!("index -@ {t} {}", &self.bam);
|
|
|
- info!("Running samtools {cmd}");
|
|
|
- cmd!("samtools", "index", "-@", &t, &self.bam).run()?;
|
|
|
+ format!(
|
|
|
+ "{} index -@ {t} {}",
|
|
|
+ self.config.align.samtools_bin, self.bam
|
|
|
+ )
|
|
|
+ }
|
|
|
+
|
|
|
+ /// cramino QC command
|
|
|
+ fn cramino_cmd(&self) -> String {
|
|
|
+ format!("cramino -t 150 --karyotype {}", self.bam)
|
|
|
+ }
|
|
|
+
|
|
|
+ /// modkit summary command
|
|
|
+ fn modkit_cmd(&self) -> String {
|
|
|
+ format!("modkit summary -t 50 {}", self.bam)
|
|
|
+ }
|
|
|
+
|
|
|
+ /// fastq export pipeline from BAM
|
|
|
+ fn create_fastq_cmd(&self) -> String {
|
|
|
+ let bam = &self.bam;
|
|
|
+ let fastq = format!(
|
|
|
+ "{}/{}/{}/{}_{}.fastq.gz",
|
|
|
+ self.case_dir, self.case.id, self.case.time_point, self.case.id, self.case.time_point
|
|
|
+ );
|
|
|
+ let samtools = format!("samtools fastq -@ 150 {bam}");
|
|
|
+ let crabz = format!("crabz -f bgzf - -o {fastq}");
|
|
|
+ format!("{samtools} | {crabz}")
|
|
|
+ }
|
|
|
+
|
|
|
+ /// samtools merge command used in `merge_bam`
|
|
|
+ fn merge_bam_cmd(&self, bam: &Path, into: &Path) -> String {
|
|
|
+ format!(
|
|
|
+ "{} merge -@ 160 -h {} {} {} {}",
|
|
|
+ self.config.align.samtools_bin,
|
|
|
+ bam.display(),
|
|
|
+ into.display(),
|
|
|
+ bam.display(),
|
|
|
+ into.display() // placeholder, real tmp path is managed outside
|
|
|
+ )
|
|
|
+ }
|
|
|
+
|
|
|
+ // mux basecall + samtools view into muxed.bam
|
|
|
+ fn from_mux_basecall_cmd(
|
|
|
+ config: &Config,
|
|
|
+ sequencing_kit: &str,
|
|
|
+ pod_dir: &str,
|
|
|
+ muxed_bam: &str,
|
|
|
+ ) -> String {
|
|
|
+ let dorado_bin = &config.align.dorado_bin;
|
|
|
+ let dorado_arg = &config.align.dorado_basecall_arg;
|
|
|
+ let ref_mmi = &config.align.ref_mmi;
|
|
|
+ let samtools_bin = &config.align.samtools_bin;
|
|
|
+ let samtools_view_threads = config.align.samtools_view_threads;
|
|
|
+
|
|
|
+ let dorado = format!(
|
|
|
+ "{dorado_bin} basecaller --kit-name {sequencing_kit} {dorado_arg} {pod_dir} --emit-moves --trim all --reference {ref_mmi}"
|
|
|
+ );
|
|
|
+ let samtools_view =
|
|
|
+ format!("{samtools_bin} view -h -@ {samtools_view_threads} -b -o {muxed_bam}");
|
|
|
+ format!("{dorado} | {samtools_view}")
|
|
|
+ }
|
|
|
+
|
|
|
+ /// samtools split command for demux
|
|
|
+ fn demux_cmd(config: &Config, muxed_bam: &str, tmp_demux_dir: &str) -> String {
|
|
|
+ format!(
|
|
|
+ "{} split -@ {} -f '{}/%*_%!.bam' {}",
|
|
|
+ config.align.samtools_bin, config.align.samtools_view_threads, tmp_demux_dir, muxed_bam
|
|
|
+ )
|
|
|
+ }
|
|
|
+
|
|
|
+ /// dorado aligner + samtools for realignment in from_mux
|
|
|
+ fn realign_cmd(
|
|
|
+ config: &Config,
|
|
|
+ sequencing_kit: &str,
|
|
|
+ barcode: &str,
|
|
|
+ bam: &str,
|
|
|
+ aligned_bam: &str,
|
|
|
+ ) -> String {
|
|
|
+ let dorado = format!(
|
|
|
+ "{} aligner --threads {} {} {}",
|
|
|
+ config.align.dorado_bin, config.align.dorado_aligner_threads, config.align.ref_fa, bam,
|
|
|
+ );
|
|
|
+ let samtools_view = format!(
|
|
|
+ "{} view -h -@ {} -b /dev/stdin",
|
|
|
+ config.align.samtools_bin, config.align.samtools_view_threads
|
|
|
+ );
|
|
|
+ let samtools_sort = format!(
|
|
|
+ "{} sort -@ {} /dev/stdin -o {}",
|
|
|
+ config.align.samtools_bin, config.align.samtools_sort_threads, aligned_bam
|
|
|
+ );
|
|
|
+ let _ = sequencing_kit; // not used here but kept for symmetry
|
|
|
+ format!("{dorado} | {samtools_view} | {samtools_sort}")
|
|
|
+ }
|
|
|
+
|
|
|
+ // ------------------------------------------------------------------
|
|
|
+ // Workflow methods that now *run* the commands
|
|
|
+ // ------------------------------------------------------------------
|
|
|
+
|
|
|
+ fn create_reference_mmi(&self) -> anyhow::Result<()> {
|
|
|
+ if let Some(cmdline) = self.create_reference_mmi_cmd() {
|
|
|
+ Self::run_shell(&cmdline)?;
|
|
|
+ }
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
+ fn create_directories(&self) -> anyhow::Result<()> {
|
|
|
+ if !std::path::Path::new(&self.case_dir).exists() {
|
|
|
+ info!("Creating directory {}", self.case_dir);
|
|
|
+ fs::create_dir(&self.case_dir)?;
|
|
|
+ }
|
|
|
+ if !std::path::Path::new(&self.time_dir).exists() {
|
|
|
+ info!("Creating directory {}", self.time_dir);
|
|
|
+ fs::create_dir(&self.time_dir)?;
|
|
|
+ }
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+
|
|
|
+ fn basecall_align(&mut self, dorado_bin: &str) -> anyhow::Result<()> {
|
|
|
+ let pipe = self.basecall_align_cmd(dorado_bin)?;
|
|
|
+ Self::run_shell(&pipe)
|
|
|
+ .map_err(|e| anyhow::anyhow!("Failed to run pipe: {pipe}.\n\t{}", e.to_string()))
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn index(&self) -> anyhow::Result<()> {
|
|
|
+ let cmdline = self.index_cmd();
|
|
|
+ info!("Running samtools index for {}", self.bam);
|
|
|
+ Self::run_shell(&cmdline)
|
|
|
+ }
|
|
|
+
|
|
|
pub fn run_cramino(&self) -> anyhow::Result<()> {
|
|
|
let cramino_out = format!(
|
|
|
"{}/{}_{}_hs1_cramino.txt",
|
|
|
self.time_dir, self.case.id, self.case.time_point
|
|
|
);
|
|
|
info!("Quality control with cramino for BAM: {}", self.bam);
|
|
|
- let output = duct::cmd!("cramino", "-t", "150", "--karyotype", &self.bam)
|
|
|
+ let cmdline = self.cramino_cmd();
|
|
|
+
|
|
|
+ let output = cmd!("bash", "-c", &cmdline)
|
|
|
.stdout_capture()
|
|
|
.unchecked()
|
|
|
.run()?;
|
|
|
@@ -182,7 +492,9 @@ impl Dorado {
|
|
|
self.time_dir, self.case.id, self.case.time_point
|
|
|
);
|
|
|
info!("Generating base modification summary for BAM: {}", self.bam);
|
|
|
- let output = cmd!("modkit", "summary", "-t", "50", &self.bam)
|
|
|
+ let cmdline = self.modkit_cmd();
|
|
|
+
|
|
|
+ let output = cmd!("bash", "-c", &cmdline)
|
|
|
.stdout_capture()
|
|
|
.unchecked()
|
|
|
.run()?;
|
|
|
@@ -192,18 +504,13 @@ impl Dorado {
|
|
|
}
|
|
|
|
|
|
pub fn create_fastq(&self) -> anyhow::Result<()> {
|
|
|
- let bam = &self.bam;
|
|
|
let fastq = format!(
|
|
|
"{}/{}/{}/{}_{}.fastq.gz",
|
|
|
self.case_dir, self.case.id, self.case.time_point, self.case.id, self.case.time_point
|
|
|
);
|
|
|
if !std::path::Path::new(&fastq).exists() {
|
|
|
- let samtools = format!("samtools fastq -@ 150 {bam}");
|
|
|
- let crabz = format!("crabz -f bgzf - -o {fastq}");
|
|
|
- let pipe = format!("{samtools} | {crabz}");
|
|
|
- info!("Running: {pipe}");
|
|
|
- let pipe_cmd = duct::cmd!("bash", "-c", pipe);
|
|
|
- pipe_cmd.run()?;
|
|
|
+ let pipe = self.create_fastq_cmd();
|
|
|
+ Self::run_shell(&pipe)?;
|
|
|
}
|
|
|
Ok(())
|
|
|
}
|
|
|
@@ -251,26 +558,18 @@ impl Dorado {
|
|
|
);
|
|
|
fs::rename(original_i, tmp_original_i.clone())?;
|
|
|
|
|
|
- let cmd = format!(
|
|
|
- "samtools merge -@ 160 -h {} {} {} {}",
|
|
|
+ // real merge command with the correct tmp path
|
|
|
+ let merge_cmdline = format!(
|
|
|
+ "{} merge -@ 160 -h {} {} {} {}",
|
|
|
+ self.config.align.samtools_bin,
|
|
|
bam.display(),
|
|
|
into.display(),
|
|
|
bam.display(),
|
|
|
tmp_original.display()
|
|
|
);
|
|
|
- info!("Running {cmd}");
|
|
|
- cmd!(
|
|
|
- "samtools",
|
|
|
- "merge",
|
|
|
- "-@",
|
|
|
- "160",
|
|
|
- "-h",
|
|
|
- bam,
|
|
|
- into,
|
|
|
- bam,
|
|
|
- tmp_original.clone()
|
|
|
- )
|
|
|
- .run()?;
|
|
|
+ info!("Running {merge_cmdline}");
|
|
|
+ Self::run_shell(&merge_cmdline)?;
|
|
|
+
|
|
|
fs::remove_file(tmp_original)?;
|
|
|
fs::remove_file(tmp_original_i)?;
|
|
|
fs::remove_file(bam)?;
|
|
|
@@ -280,20 +579,15 @@ impl Dorado {
|
|
|
}
|
|
|
|
|
|
pub fn from_mux(cases: Vec<FlowCellCase>, config: Config) -> anyhow::Result<()> {
|
|
|
- // Creating a temporary directory
|
|
|
+ // tmp dir
|
|
|
let tmp_dir = format!("{}/.{}", config.result_dir, Uuid::new_v4());
|
|
|
info!("Creating tmp dir {tmp_dir}");
|
|
|
fs::create_dir(&tmp_dir)?;
|
|
|
|
|
|
- // Dorado base calling and align into a temporary bam file
|
|
|
+ // basecalling into muxed.bam
|
|
|
let muxed_bam = format!("{tmp_dir}/muxed.bam");
|
|
|
- let dorado_bin = &config.align.dorado_bin;
|
|
|
- let dorado_arg = &config.align.dorado_basecall_arg;
|
|
|
- let pod_dir = cases[0].pod_dir.display();
|
|
|
- let ref_mmi = &config.align.ref_mmi;
|
|
|
- let samtools_view_threads = config.align.samtools_view_threads;
|
|
|
+ let pod_dir = cases[0].pod_dir.display().to_string();
|
|
|
|
|
|
- // Get the sequencing kit from the first pod5 file
|
|
|
let muxed_pod_dir = &cases.first().unwrap().pod_dir;
|
|
|
let pod_path = fs::read_dir(muxed_pod_dir)
|
|
|
.map_err(|e| {
|
|
|
@@ -313,34 +607,21 @@ impl Dorado {
|
|
|
.sequencing_kit
|
|
|
.to_uppercase();
|
|
|
|
|
|
- let dorado = format!(
|
|
|
- "{dorado_bin} basecaller --kit-name {sequencing_kit} {dorado_arg} {pod_dir} --emit-moves --trim all --reference {ref_mmi}"
|
|
|
- );
|
|
|
- let samtools_view =
|
|
|
- format!("samtools view -h -@ {samtools_view_threads} -b -o {muxed_bam}");
|
|
|
- let pipe = format!("{dorado} | {samtools_view}");
|
|
|
- info!("Running: {pipe}");
|
|
|
- let pipe_cmd = cmd!("bash", "-c", &pipe);
|
|
|
- pipe_cmd
|
|
|
- .run()
|
|
|
- .map_err(|e| anyhow::anyhow!("Failed to run pipe: {pipe}.\n\t{}", e.to_string()))?;
|
|
|
-
|
|
|
+ let basecall_pipe =
|
|
|
+ Self::from_mux_basecall_cmd(&config, &sequencing_kit, &pod_dir, &muxed_bam);
|
|
|
+ info!("Running: {basecall_pipe}");
|
|
|
+ Self::run_shell(&basecall_pipe)?;
|
|
|
info!("Basecalling ✅");
|
|
|
|
|
|
- // Demux the temporary bam file
|
|
|
+ // demux
|
|
|
let tmp_demux_dir = format!("{tmp_dir}/demuxed");
|
|
|
fs::create_dir(&tmp_demux_dir)?;
|
|
|
-
|
|
|
- let pipe = format!(
|
|
|
- "samtools split -@ {} -f '{tmp_demux_dir}/%*_%!.bam' {muxed_bam}",
|
|
|
- config.align.samtools_view_threads
|
|
|
- );
|
|
|
- info!("Demux from {sequencing_kit} into {tmp_demux_dir}",);
|
|
|
- info!("Running: {pipe}");
|
|
|
- let pipe_cmd = cmd!("bash", "-c", pipe);
|
|
|
- pipe_cmd.run()?;
|
|
|
-
|
|
|
+ let demux_cmdline = Self::demux_cmd(&config, &muxed_bam, &tmp_demux_dir);
|
|
|
+ info!("Demux from {sequencing_kit} into {tmp_demux_dir}");
|
|
|
+ info!("Running: {demux_cmdline}");
|
|
|
+ Self::run_shell(&demux_cmdline)?;
|
|
|
info!("Demux ✅");
|
|
|
+
|
|
|
for case in cases.iter() {
|
|
|
let barcode = case.barcode.replace("NB", "");
|
|
|
let bam = find_unique_file(
|
|
|
@@ -348,29 +629,20 @@ impl Dorado {
|
|
|
&format!("{sequencing_kit}_barcode{}.bam", barcode),
|
|
|
)?;
|
|
|
|
|
|
- // Align
|
|
|
- let aligned_bam = format!(
|
|
|
- "{tmp_demux_dir}/{sequencing_kit}_barcode{}_aligned.bam",
|
|
|
- barcode
|
|
|
- );
|
|
|
- let dorado = format!(
|
|
|
- // "{} aligner --threads 160 {} {trimmed_bam}",
|
|
|
- "{} aligner --threads 160 {} {bam}",
|
|
|
- config.align.dorado_bin, config.align.ref_fa,
|
|
|
- );
|
|
|
- let samtools_view = format!(
|
|
|
- "samtools view -h -@ {} -b /dev/stdin",
|
|
|
- &config.align.samtools_view_threads
|
|
|
- );
|
|
|
- let samtools_sort = format!(
|
|
|
- "samtools sort -@ {} /dev/stdin -o {aligned_bam}",
|
|
|
- &config.align.samtools_sort_threads
|
|
|
- );
|
|
|
- let pipe = format!("{dorado} | {samtools_view} | {samtools_sort}");
|
|
|
-
|
|
|
- info!("Running {pipe}");
|
|
|
- cmd!("bash", "-c", pipe).run()?;
|
|
|
- info!("Alignement ✅");
|
|
|
+ let aligned_bam = if !config.align.dorado_should_realign {
|
|
|
+ bam.clone()
|
|
|
+ } else {
|
|
|
+ let aligned_bam = format!(
|
|
|
+ "{tmp_demux_dir}/{sequencing_kit}_barcode{}_aligned.bam",
|
|
|
+ barcode
|
|
|
+ );
|
|
|
+ let pipe =
|
|
|
+ Self::realign_cmd(&config, &sequencing_kit, &barcode, &bam, &aligned_bam);
|
|
|
+ info!("Running {pipe}");
|
|
|
+ Self::run_shell(&pipe)?;
|
|
|
+ info!("Alignement ✅");
|
|
|
+ aligned_bam.into()
|
|
|
+ };
|
|
|
|
|
|
let d = Dorado::init(case.clone(), config.clone())?;
|
|
|
d.create_directories()?;
|
|
|
@@ -397,7 +669,6 @@ impl Dorado {
|
|
|
self.start_time = start_time;
|
|
|
|
|
|
debug!("Running Dorado with config: {:#?}", self.config);
|
|
|
-
|
|
|
let dorado_bin = self.config.align.dorado_bin.clone();
|
|
|
|
|
|
self.create_reference_mmi()?;
|
|
|
@@ -414,7 +685,6 @@ impl Dorado {
|
|
|
self.basecall_align(&dorado_bin)?;
|
|
|
self.index()?;
|
|
|
} else {
|
|
|
- // check if merge before call
|
|
|
let new_bam_path = bam_path
|
|
|
.parent()
|
|
|
.unwrap()
|
|
|
@@ -444,6 +714,7 @@ impl Dorado {
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
+ // from_flowcell stays mostly as-is; it just calls run_pipe/from_mux
|
|
|
pub fn from_flowcell(flowcell: &FlowCell, config: &Config) -> anyhow::Result<()> {
|
|
|
let tp_conv = |time_point: &str| -> String {
|
|
|
match time_point {
|
|
|
@@ -516,7 +787,6 @@ impl Dorado {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-
|
|
|
Ok(())
|
|
|
}
|
|
|
}
|