| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130 |
- use std::path::{Path, PathBuf};
- use anyhow::Context;
- use crate::{
- commands::{Command as JobCommand, LocalRunner, SbatchRunner, SlurmParams, SlurmRunner},
- config::Config,
- de_novo::{
- de_novo_pipe::{BoxedAssembler, BoxedPolisher, LocalAssemblyBuilder},
- medaka::MedakaPolisher,
- Assembler,
- },
- };
- pub struct Wtdbg2Assembler {
- pub reads: PathBuf,
- pub out_dir: PathBuf,
- pub genome_size: String, // e.g. "500k"
- pub threads: u8,
- pub wtdbg2_bin: String,
- pub slurm_mem: String,
- pub prefix: PathBuf, // out_dir/asm
- }
- impl Wtdbg2Assembler {
- pub fn from_config(
- config: &Config,
- reads: PathBuf,
- out_dir: PathBuf,
- genome_size: String,
- ) -> anyhow::Result<Self> {
- std::fs::create_dir_all(&out_dir)
- .with_context(|| format!("Cannot create wtdbg2 output dir: {}", out_dir.display()))?;
- let prefix = out_dir.join("asm");
- Ok(Self {
- reads,
- out_dir,
- genome_size,
- threads: config.wtdbg2_threads,
- wtdbg2_bin: config.wtdbg2_bin.clone(),
- slurm_mem: config.wtdbg2_slurm_mem.clone(),
- prefix,
- })
- }
- }
- impl JobCommand for Wtdbg2Assembler {
- fn cmd(&self) -> String {
- let wtdbg2_dir = PathBuf::from(&self.wtdbg2_bin)
- .parent()
- .map(|p| p.display().to_string())
- .unwrap_or_default();
- format!(
- r#"set -euo pipefail
- PATH={wtdbg2_dir}:${{PATH}} && {wtdbg2} -P -x ont -g {genome_size} -t {threads} -o {prefix} {reads}
- {wtdbg2_dir}/wtpoa-cns -t {threads} -i {prefix}.ctg.lay.gz -fo {fasta}
- "#,
- wtdbg2_dir = wtdbg2_dir,
- wtdbg2 = self.wtdbg2_bin,
- genome_size = self.genome_size,
- threads = self.threads,
- prefix = self.prefix.display(),
- reads = self.reads.display(),
- fasta = self.assembly_fasta().display(),
- )
- }
- }
- impl Assembler for Wtdbg2Assembler {
- fn reads_input(&self) -> &Path {
- &self.reads
- }
- fn output_dir(&self) -> &Path {
- &self.out_dir
- }
- fn assembly_fasta(&self) -> PathBuf {
- self.prefix.with_extension("asm.cns.fa")
- }
- fn assembly_graph(&self) -> Option<PathBuf> {
- // wtdbg2 produces a .dot graph
- Some(self.prefix.with_extension("dot"))
- }
- }
- impl SbatchRunner for Wtdbg2Assembler {
- fn slurm_params(&self) -> SlurmParams {
- SlurmParams {
- job_name: Some("wtdbg2".to_string()),
- cpus_per_task: Some(self.threads.into()),
- mem: Some(self.slurm_mem.clone()),
- partition: Some("shortq".to_string()),
- gres: None,
- }
- }
- }
- impl SlurmRunner for Wtdbg2Assembler {
- fn slurm_args(&self) -> Vec<String> {
- self.slurm_params().to_args()
- }
- }
- impl LocalRunner for Wtdbg2Assembler {}
- pub struct Wtdbg2MedakaBuilder {
- pub config: Config,
- }
- impl LocalAssemblyBuilder for Wtdbg2MedakaBuilder {
- fn build(
- &self,
- reads_path: PathBuf,
- round_dir: &Path,
- ) -> anyhow::Result<(BoxedAssembler, BoxedPolisher)> {
- let assembler = Wtdbg2Assembler::from_config(
- &self.config,
- reads_path.clone(),
- round_dir.join("wtdbg2"),
- "10k".to_string(),
- )?;
- let polisher = MedakaPolisher::from_config(
- &self.config,
- reads_path,
- assembler.assembly_fasta(),
- round_dir.join("medaka"),
- );
- Ok((Box::new(assembler), Box::new(polisher)))
- }
- }
|