use log::{info, warn}; use std::{ fs::{self, File}, io::{BufRead, BufReader}, path::Path, process::{Command, Stdio} }; pub fn run_flye(fasta_path: &str, tmp_dir: &str, _size: &str) { info!("Running Flye for {fasta_path}"); let flye_bin = "/data/tools/Flye/bin/flye"; let mut cmd = Command::new(flye_bin) .arg("--threads") .arg("12") // .arg("--keep-haplotypes") // .arg("--meta") .arg("--min-overlap") .arg("1000") .arg("--out-dir") .arg(tmp_dir) .arg("--nano-hq") .arg(fasta_path) .stderr(Stdio::piped()) .spawn() .expect("Flye failed to start"); let stderr = cmd.stderr.take().unwrap(); let reader = BufReader::new(stderr); reader .lines() .map_while(Result::ok) .filter(|line| line.contains("ERROR")) .for_each(|line| warn!("[FLYE] {line}")); cmd.wait().unwrap(); cmd.kill().unwrap(); } pub fn read_flye_coverage(path: &str, min_cov: i32, contig_name: &str) -> (usize, usize) { use std::io::Read; let mut reader = File::open(path).map(flate2::read::GzDecoder::new).unwrap(); let mut buf = Vec::new(); reader.read_to_end(&mut buf).unwrap(); let mut line_acc = Vec::new(); let mut start = None; let mut end = None; let mut last_end = 0; for b in buf.iter() { match b { b'\n' => { let s = String::from_utf8(line_acc.clone()).unwrap(); line_acc.clear(); if !s.starts_with(&format!("{contig_name}\t")) { continue; } let s = s.split('\t').collect::>(); let cov: i32 = s.get(3).unwrap().parse().unwrap(); if start.is_none() && cov >= min_cov { let st: i32 = s.get(1).unwrap().parse().unwrap(); start = Some(st); } else if end.is_none() && start.is_some() && cov < min_cov { let en: i32 = s.get(1).unwrap().parse().unwrap(); end = Some(en); break; } last_end = s.get(2).unwrap().parse().unwrap(); } _ => line_acc.push(*b), } } (start.unwrap() as usize, end.unwrap_or(last_end) as usize) } pub fn assemble_flye(reads_path: &str) -> anyhow::Result>> { let min_cov = 2; let tmp_dir = format!("/tmp/ass_{}", uuid::Uuid::new_v4()); fs::create_dir(tmp_dir.clone()).unwrap(); run_flye(reads_path, &tmp_dir, "10M"); let contigs_path = format!("{tmp_dir}/assembly.fasta"); let mut res = None; if Path::new(&contigs_path).exists() { let assembly_path = format!("{tmp_dir}/assembly.fasta"); let flye_cov_path = format!("{tmp_dir}/40-polishing/base_coverage.bed.gz"); let mut reader = File::open(assembly_path) .map(BufReader::new) .map(noodles_fasta::Reader::new)?; let mut contigs = Vec::new(); for result in reader.records() { let record = result?; let contig_name = String::from_utf8(record.name().to_vec()).unwrap(); let (s, e) = read_flye_coverage(&flye_cov_path, min_cov, &contig_name); let seq = record.sequence().as_ref(); let seq = String::from_utf8(seq.to_vec()).unwrap(); let seq: String = seq[s..e].into(); contigs.push(seq.clone()); } res = Some(contigs); } fs::remove_dir_all(tmp_dir)?; Ok(res) }