|
|
@@ -0,0 +1,130 @@
|
|
|
+use std::path::{Path, PathBuf};
|
|
|
+
|
|
|
+use anyhow::Context;
|
|
|
+
|
|
|
+use crate::{
|
|
|
+ commands::{Command as JobCommand, LocalRunner, SbatchRunner, SlurmParams, SlurmRunner},
|
|
|
+ config::Config,
|
|
|
+ de_novo::{
|
|
|
+ de_novo_pipe::{BoxedAssembler, BoxedPolisher, LocalAssemblyBuilder},
|
|
|
+ medaka::MedakaPolisher,
|
|
|
+ Assembler,
|
|
|
+ },
|
|
|
+};
|
|
|
+
|
|
|
+pub struct Wtdbg2Assembler {
|
|
|
+ pub reads: PathBuf,
|
|
|
+ pub out_dir: PathBuf,
|
|
|
+ pub genome_size: String, // e.g. "500k"
|
|
|
+ pub threads: u8,
|
|
|
+ pub wtdbg2_bin: String,
|
|
|
+ pub slurm_mem: String,
|
|
|
+ pub prefix: PathBuf, // out_dir/asm
|
|
|
+}
|
|
|
+
|
|
|
+impl Wtdbg2Assembler {
|
|
|
+ pub fn from_config(
|
|
|
+ config: &Config,
|
|
|
+ reads: PathBuf,
|
|
|
+ out_dir: PathBuf,
|
|
|
+ genome_size: String,
|
|
|
+ ) -> anyhow::Result<Self> {
|
|
|
+ std::fs::create_dir_all(&out_dir)
|
|
|
+ .with_context(|| format!("Cannot create wtdbg2 output dir: {}", out_dir.display()))?;
|
|
|
+ let prefix = out_dir.join("asm");
|
|
|
+ Ok(Self {
|
|
|
+ reads,
|
|
|
+ out_dir,
|
|
|
+ genome_size,
|
|
|
+ threads: config.wtdbg2_threads,
|
|
|
+ wtdbg2_bin: config.wtdbg2_bin.clone(),
|
|
|
+ slurm_mem: config.wtdbg2_slurm_mem.clone(),
|
|
|
+ prefix,
|
|
|
+ })
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl JobCommand for Wtdbg2Assembler {
|
|
|
+ fn cmd(&self) -> String {
|
|
|
+ let wtdbg2_dir = PathBuf::from(&self.wtdbg2_bin)
|
|
|
+ .parent()
|
|
|
+ .map(|p| p.display().to_string())
|
|
|
+ .unwrap_or_default();
|
|
|
+
|
|
|
+ format!(
|
|
|
+ r#"set -euo pipefail
|
|
|
+PATH={wtdbg2_dir}:${{PATH}} && {wtdbg2} -P -x ont -g {genome_size} -t {threads} -o {prefix} {reads}
|
|
|
+{wtdbg2_dir}/wtpoa-cns -t {threads} -i {prefix}.ctg.lay.gz -fo {fasta}
|
|
|
+"#,
|
|
|
+ wtdbg2_dir = wtdbg2_dir,
|
|
|
+ wtdbg2 = self.wtdbg2_bin,
|
|
|
+ genome_size = self.genome_size,
|
|
|
+ threads = self.threads,
|
|
|
+ prefix = self.prefix.display(),
|
|
|
+ reads = self.reads.display(),
|
|
|
+ fasta = self.assembly_fasta().display(),
|
|
|
+ )
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl Assembler for Wtdbg2Assembler {
|
|
|
+ fn reads_input(&self) -> &Path {
|
|
|
+ &self.reads
|
|
|
+ }
|
|
|
+ fn output_dir(&self) -> &Path {
|
|
|
+ &self.out_dir
|
|
|
+ }
|
|
|
+ fn assembly_fasta(&self) -> PathBuf {
|
|
|
+ self.prefix.with_extension("asm.cns.fa")
|
|
|
+ }
|
|
|
+ fn assembly_graph(&self) -> Option<PathBuf> {
|
|
|
+ // wtdbg2 produces a .dot graph
|
|
|
+ Some(self.prefix.with_extension("dot"))
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl SbatchRunner for Wtdbg2Assembler {
|
|
|
+ fn slurm_params(&self) -> SlurmParams {
|
|
|
+ SlurmParams {
|
|
|
+ job_name: Some("wtdbg2".to_string()),
|
|
|
+ cpus_per_task: Some(self.threads.into()),
|
|
|
+ mem: Some(self.slurm_mem.clone()),
|
|
|
+ partition: Some("shortq".to_string()),
|
|
|
+ gres: None,
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl SlurmRunner for Wtdbg2Assembler {
|
|
|
+ fn slurm_args(&self) -> Vec<String> {
|
|
|
+ self.slurm_params().to_args()
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl LocalRunner for Wtdbg2Assembler {}
|
|
|
+
|
|
|
+pub struct Wtdbg2MedakaBuilder {
|
|
|
+ pub config: Config,
|
|
|
+}
|
|
|
+
|
|
|
+impl LocalAssemblyBuilder for Wtdbg2MedakaBuilder {
|
|
|
+ fn build(
|
|
|
+ &self,
|
|
|
+ reads_path: PathBuf,
|
|
|
+ round_dir: &Path,
|
|
|
+ ) -> anyhow::Result<(BoxedAssembler, BoxedPolisher)> {
|
|
|
+ let assembler = Wtdbg2Assembler::from_config(
|
|
|
+ &self.config,
|
|
|
+ reads_path.clone(),
|
|
|
+ round_dir.join("wtdbg2"),
|
|
|
+ "10k".to_string(),
|
|
|
+ )?;
|
|
|
+ let polisher = MedakaPolisher::from_config(
|
|
|
+ &self.config,
|
|
|
+ reads_path,
|
|
|
+ assembler.assembly_fasta(),
|
|
|
+ round_dir.join("medaka"),
|
|
|
+ );
|
|
|
+ Ok((Box::new(assembler), Box::new(polisher)))
|
|
|
+ }
|
|
|
+}
|