wtdbg2.rs 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. use std::path::{Path, PathBuf};
  2. use anyhow::Context;
  3. use crate::{
  4. commands::{Command as JobCommand, LocalRunner, SbatchRunner, SlurmParams, SlurmRunner},
  5. config::Config,
  6. de_novo::{
  7. de_novo_pipe::{BoxedAssembler, BoxedPolisher, LocalAssemblyBuilder},
  8. medaka::MedakaPolisher,
  9. Assembler,
  10. },
  11. };
  12. pub struct Wtdbg2Assembler {
  13. pub reads: PathBuf,
  14. pub out_dir: PathBuf,
  15. pub genome_size: String, // e.g. "500k"
  16. pub threads: u8,
  17. pub wtdbg2_bin: String,
  18. pub slurm_mem: String,
  19. pub prefix: PathBuf, // out_dir/asm
  20. }
  21. impl Wtdbg2Assembler {
  22. pub fn from_config(
  23. config: &Config,
  24. reads: PathBuf,
  25. out_dir: PathBuf,
  26. genome_size: String,
  27. ) -> anyhow::Result<Self> {
  28. std::fs::create_dir_all(&out_dir)
  29. .with_context(|| format!("Cannot create wtdbg2 output dir: {}", out_dir.display()))?;
  30. let prefix = out_dir.join("asm");
  31. Ok(Self {
  32. reads,
  33. out_dir,
  34. genome_size,
  35. threads: config.wtdbg2_threads,
  36. wtdbg2_bin: config.wtdbg2_bin.clone(),
  37. slurm_mem: config.wtdbg2_slurm_mem.clone(),
  38. prefix,
  39. })
  40. }
  41. }
  42. impl JobCommand for Wtdbg2Assembler {
  43. fn cmd(&self) -> String {
  44. let wtdbg2_dir = PathBuf::from(&self.wtdbg2_bin)
  45. .parent()
  46. .map(|p| p.display().to_string())
  47. .unwrap_or_default();
  48. format!(
  49. r#"set -euo pipefail
  50. PATH={wtdbg2_dir}:${{PATH}} && {wtdbg2} -P -x ont -g {genome_size} -t {threads} -o {prefix} {reads}
  51. {wtdbg2_dir}/wtpoa-cns -t {threads} -i {prefix}.ctg.lay.gz -fo {fasta}
  52. "#,
  53. wtdbg2_dir = wtdbg2_dir,
  54. wtdbg2 = self.wtdbg2_bin,
  55. genome_size = self.genome_size,
  56. threads = self.threads,
  57. prefix = self.prefix.display(),
  58. reads = self.reads.display(),
  59. fasta = self.assembly_fasta().display(),
  60. )
  61. }
  62. }
  63. impl Assembler for Wtdbg2Assembler {
  64. fn reads_input(&self) -> &Path {
  65. &self.reads
  66. }
  67. fn output_dir(&self) -> &Path {
  68. &self.out_dir
  69. }
  70. fn assembly_fasta(&self) -> PathBuf {
  71. self.prefix.with_extension("asm.cns.fa")
  72. }
  73. fn assembly_graph(&self) -> Option<PathBuf> {
  74. // wtdbg2 produces a .dot graph
  75. Some(self.prefix.with_extension("dot"))
  76. }
  77. }
  78. impl SbatchRunner for Wtdbg2Assembler {
  79. fn slurm_params(&self) -> SlurmParams {
  80. SlurmParams {
  81. job_name: Some("wtdbg2".to_string()),
  82. cpus_per_task: Some(self.threads.into()),
  83. mem: Some(self.slurm_mem.clone()),
  84. partition: Some("shortq".to_string()),
  85. gres: None,
  86. }
  87. }
  88. }
  89. impl SlurmRunner for Wtdbg2Assembler {
  90. fn slurm_args(&self) -> Vec<String> {
  91. self.slurm_params().to_args()
  92. }
  93. }
  94. impl LocalRunner for Wtdbg2Assembler {}
  95. pub struct Wtdbg2MedakaBuilder {
  96. pub config: Config,
  97. }
  98. impl LocalAssemblyBuilder for Wtdbg2MedakaBuilder {
  99. fn build(
  100. &self,
  101. reads_path: PathBuf,
  102. round_dir: &Path,
  103. ) -> anyhow::Result<(BoxedAssembler, BoxedPolisher)> {
  104. let assembler = Wtdbg2Assembler::from_config(
  105. &self.config,
  106. reads_path.clone(),
  107. round_dir.join("wtdbg2"),
  108. "10k".to_string(),
  109. )?;
  110. let polisher = MedakaPolisher::from_config(
  111. &self.config,
  112. reads_path,
  113. assembler.assembly_fasta(),
  114. round_dir.join("medaka"),
  115. );
  116. Ok((Box::new(assembler), Box::new(polisher)))
  117. }
  118. }