|
|
@@ -1,12 +1,13 @@
|
|
|
use std::{
|
|
|
fs,
|
|
|
io::{Read, Write},
|
|
|
- path::Path,
|
|
|
+ path::{Path, PathBuf},
|
|
|
time::SystemTime,
|
|
|
};
|
|
|
|
|
|
use duct::cmd;
|
|
|
use log::{info, warn};
|
|
|
+use uuid::Uuid;
|
|
|
|
|
|
pub trait Run {
|
|
|
fn run(self) -> anyhow::Result<()>;
|
|
|
@@ -26,6 +27,9 @@ pub struct DoradoConfig {
|
|
|
|
|
|
pub struct Dorado {
|
|
|
config: DoradoConfig,
|
|
|
+ case_dir: String,
|
|
|
+ time_dir: String,
|
|
|
+ bam: String,
|
|
|
start_time: SystemTime,
|
|
|
end_time: SystemTime,
|
|
|
is_done: bool,
|
|
|
@@ -34,40 +38,46 @@ pub struct Dorado {
|
|
|
|
|
|
impl Dorado {
|
|
|
pub fn init(config: DoradoConfig) -> anyhow::Result<Self> {
|
|
|
+ let data_dir = "/data/longreads_basic_pipe";
|
|
|
+ let case_dir = format!("{}/{}", data_dir, config.name);
|
|
|
+ let time_dir = format!("{}/{}", case_dir, config.time);
|
|
|
+ let bam = format!("{}/{}_{}_hs1.bam", time_dir, config.name, config.time);
|
|
|
+
|
|
|
Ok(Self {
|
|
|
config,
|
|
|
start_time: SystemTime::now(),
|
|
|
end_time: SystemTime::now(),
|
|
|
is_done: false,
|
|
|
log: Vec::new(),
|
|
|
+ case_dir,
|
|
|
+ time_dir,
|
|
|
+ bam,
|
|
|
})
|
|
|
}
|
|
|
- fn create_reference_mmi(&self, ref_mmi: &str, ref_fa: &str) -> anyhow::Result<()> {
|
|
|
- if !std::path::Path::new(ref_mmi).exists() {
|
|
|
- cmd!("minimap2", "-x", "map-ont", "-d", ref_mmi, ref_fa).run()?;
|
|
|
+ fn create_reference_mmi(&self) -> anyhow::Result<()> {
|
|
|
+ if !std::path::Path::new(&self.config.ref_mmi).exists() {
|
|
|
+ cmd!("minimap2", "-x", "map-ont", "-d", &self.config.ref_mmi, &self.config.ref_fa).run()?;
|
|
|
}
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
- fn create_directories(&self, case_dir: &str, time_dir: &str) -> anyhow::Result<()> {
|
|
|
- if !std::path::Path::new(case_dir).exists() {
|
|
|
- fs::create_dir(case_dir)?;
|
|
|
+ fn create_directories(&self) -> anyhow::Result<()> {
|
|
|
+ if !std::path::Path::new(&self.case_dir).exists() {
|
|
|
+ fs::create_dir(&self.case_dir)?;
|
|
|
}
|
|
|
- if !std::path::Path::new(time_dir).exists() {
|
|
|
- fs::create_dir(time_dir)?;
|
|
|
+ if !std::path::Path::new(&self.time_dir).exists() {
|
|
|
+ fs::create_dir(&self.time_dir)?;
|
|
|
}
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
- fn run_dorado_and_samtools(
|
|
|
- &mut self,
|
|
|
- dorado_bin: &str,
|
|
|
- pod_dir: &str,
|
|
|
- ref_mmi: &str,
|
|
|
- bam: &str,
|
|
|
- samtools_view_threads: u16,
|
|
|
- samtools_sort_threads: u16,
|
|
|
- ) -> anyhow::Result<()> {
|
|
|
+ fn basecall_align(&mut self, dorado_bin: &str) -> anyhow::Result<()> {
|
|
|
+ let pod_dir = &self.config.pod_dir;
|
|
|
+ let ref_mmi = &self.config.ref_mmi;
|
|
|
+ let bam = &self.bam;
|
|
|
+ let samtools_view_threads = self.config.samtools_view_threads;
|
|
|
+ let samtools_sort_threads = self.config.samtools_sort_threads;
|
|
|
+
|
|
|
let dorado = format!(
|
|
|
"{dorado_bin} basecaller sup,5mC_5hmC {pod_dir} --trim all --reference {ref_mmi}"
|
|
|
);
|
|
|
@@ -107,10 +117,13 @@ impl Dorado {
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
- fn run_cramino(&self, bam: &str, time_dir: &str, name: &str, time: &str) -> anyhow::Result<()> {
|
|
|
- let cramino_out = format!("{}/{}_{}_hs1_cramino.txt", time_dir, name, time);
|
|
|
+ fn run_cramino(&self) -> anyhow::Result<()> {
|
|
|
+ let cramino_out = format!(
|
|
|
+ "{}/{}_{}_hs1_cramino.txt",
|
|
|
+ self.time_dir, self.config.name, self.config.time
|
|
|
+ );
|
|
|
if !Path::new(&cramino_out).exists() {
|
|
|
- info!("Quality control of BAM: {}", bam);
|
|
|
+ info!("Quality control of BAM: {}", self.bam);
|
|
|
let output = duct::cmd!(
|
|
|
"cramino",
|
|
|
"-t",
|
|
|
@@ -118,7 +131,7 @@ impl Dorado {
|
|
|
"--hist",
|
|
|
"--checksum",
|
|
|
"--karyotype",
|
|
|
- bam
|
|
|
+ &self.bam
|
|
|
)
|
|
|
.stdout_capture()
|
|
|
.unchecked()
|
|
|
@@ -129,11 +142,14 @@ impl Dorado {
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
- fn run_modkit(&self, bam: &str, time_dir: &str, name: &str, time: &str) -> anyhow::Result<()> {
|
|
|
- let mod_summary = format!("{}/{}_{}_5mC_5hmC_summary.txt", time_dir, name, time);
|
|
|
+ fn run_modkit(&self) -> anyhow::Result<()> {
|
|
|
+ let mod_summary = format!(
|
|
|
+ "{}/{}_{}_5mC_5hmC_summary.txt",
|
|
|
+ self.time_dir, self.config.name, self.config.time
|
|
|
+ );
|
|
|
if !Path::new(&mod_summary).exists() {
|
|
|
- info!("Generating base modification summary for BAM: {}", bam);
|
|
|
- let output = cmd!("modkit", "summary", "-t", "50", bam)
|
|
|
+ info!("Generating base modification summary for BAM: {}", self.bam);
|
|
|
+ let output = cmd!("modkit", "summary", "-t", "50", &self.bam)
|
|
|
.stdout_capture()
|
|
|
.unchecked()
|
|
|
.run()?;
|
|
|
@@ -143,14 +159,12 @@ impl Dorado {
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
- fn create_fastq(
|
|
|
- &self,
|
|
|
- bam: &str,
|
|
|
- case_dir: &str,
|
|
|
- name: &str,
|
|
|
- time: &str,
|
|
|
- ) -> anyhow::Result<()> {
|
|
|
- let fastq = format!("{}/{}/{}/{}_{}.fastq.gz", case_dir, name, time, name, time);
|
|
|
+ fn create_fastq(&self) -> anyhow::Result<()> {
|
|
|
+ let bam = &self.bam;
|
|
|
+ let fastq = format!(
|
|
|
+ "{}/{}/{}/{}_{}.fastq.gz",
|
|
|
+ self.case_dir, self.config.name, self.config.time, self.config.name, self.config.time
|
|
|
+ );
|
|
|
if !std::path::Path::new(&fastq).exists() {
|
|
|
let samtools = format!("samtools fastq -@ 150 {bam}");
|
|
|
let crabz = format!("crabz -f bgzf - -o {fastq}");
|
|
|
@@ -161,6 +175,21 @@ impl Dorado {
|
|
|
}
|
|
|
Ok(())
|
|
|
}
|
|
|
+
|
|
|
+ fn merge_bam(&self, bam: &Path) -> anyhow::Result<()> {
|
|
|
+ let into = PathBuf::from(&self.bam);
|
|
|
+ let dir = into.parent().unwrap();
|
|
|
+ let original_file = into.file_name().unwrap().to_string_lossy().to_string();
|
|
|
+ let original_i = dir.join(format!("{original_file}.bai"));
|
|
|
+ let tmp_original_file = format!("{}.bam", Uuid::new_v4());
|
|
|
+ let tmp_original = dir.join(tmp_original_file.clone());
|
|
|
+ let tmp_original_i = dir.join(format!("{tmp_original_file}.bai"));
|
|
|
+ fs::rename(bam, tmp_original)?;
|
|
|
+ fs::rename(original_i, tmp_original_i)?;
|
|
|
+
|
|
|
+ cmd!("samtools merge -@ 160 -h {bam} {into} {bam} {tmp_original}").run()?;
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
impl Run for Dorado {
|
|
|
@@ -170,42 +199,30 @@ impl Run for Dorado {
|
|
|
|
|
|
info!("Running Dorado with params: {:#?}", self.config);
|
|
|
|
|
|
- let DoradoConfig {
|
|
|
- name,
|
|
|
- time,
|
|
|
- pod_dir,
|
|
|
- ref_fa,
|
|
|
- ref_mmi,
|
|
|
- dorado_threads: _,
|
|
|
- samtools_view_threads,
|
|
|
- samtools_sort_threads,
|
|
|
- } = self.config.clone();
|
|
|
-
|
|
|
- let data_dir = "/data/longreads_basic_pipe";
|
|
|
let dorado_bin = "/data/tools/dorado-0.7.2-linux-x64/bin/dorado";
|
|
|
- let case_dir = format!("{}/{}", data_dir, name);
|
|
|
- let time_dir = format!("{}/{}", case_dir, time);
|
|
|
- let bam = format!("{}/{}_{}_hs1.bam", time_dir, name, time);
|
|
|
-
|
|
|
- self.create_reference_mmi(&ref_mmi, &ref_fa)?;
|
|
|
- self.create_directories(&case_dir, &time_dir)?;
|
|
|
-
|
|
|
- info!("Reading {} pod5 from: {}", time, pod_dir);
|
|
|
-
|
|
|
- if !std::path::Path::new(&bam).exists() {
|
|
|
- self.run_dorado_and_samtools(
|
|
|
- dorado_bin,
|
|
|
- &pod_dir,
|
|
|
- &ref_mmi,
|
|
|
- &bam,
|
|
|
- samtools_view_threads,
|
|
|
- samtools_sort_threads,
|
|
|
- )?;
|
|
|
+
|
|
|
+ self.create_reference_mmi()?;
|
|
|
+ self.create_directories()?;
|
|
|
+
|
|
|
+ info!("Reading {} pod5 from: {}", self.config.time, self.config.pod_dir);
|
|
|
+ let bam_path = std::path::Path::new(&self.bam);
|
|
|
+
|
|
|
+ if !bam_path.exists() {
|
|
|
+ self.basecall_align(dorado_bin)?;
|
|
|
+ } else {
|
|
|
+ let new_bam_path = bam_path
|
|
|
+ .parent()
|
|
|
+ .unwrap()
|
|
|
+ .join(format!("{}.bam", Uuid::new_v4()));
|
|
|
+ warn!("Creating new bam {}", new_bam_path.display());
|
|
|
+
|
|
|
+ self.basecall_align(dorado_bin)?;
|
|
|
+ self.merge_bam(&new_bam_path)?;
|
|
|
}
|
|
|
|
|
|
- self.run_cramino(&bam, &time_dir, &name, &time)?;
|
|
|
- self.run_modkit(&bam, &time_dir, &name, &time)?;
|
|
|
- self.create_fastq(&bam, &case_dir, &name, &time)?;
|
|
|
+ self.run_cramino()?;
|
|
|
+ self.run_modkit()?;
|
|
|
+ self.create_fastq()?;
|
|
|
|
|
|
let end_time = std::time::SystemTime::now();
|
|
|
self.end_time = end_time;
|