|
@@ -1,6 +1,6 @@
|
|
|
pub mod flye;
|
|
pub mod flye;
|
|
|
|
|
|
|
|
-use anyhow::anyhow;
|
|
|
|
|
|
|
+use anyhow::{anyhow, Context};
|
|
|
use bam::{
|
|
use bam::{
|
|
|
header::HeaderEntry,
|
|
header::HeaderEntry,
|
|
|
record::tags::{StringType, TagValue},
|
|
record::tags::{StringType, TagValue},
|
|
@@ -126,6 +126,7 @@ pub fn lastz_remap(
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
let query_path = format!("{}/{}.fa", &tmp_dir, query_name);
|
|
let query_path = format!("{}/{}.fa", &tmp_dir, query_name);
|
|
|
|
|
+
|
|
|
let query_seq = String::from_utf8(record.sequence().to_vec())?;
|
|
let query_seq = String::from_utf8(record.sequence().to_vec())?;
|
|
|
|
|
|
|
|
write_fasta(&query_path, &vec![(query_name.clone(), query_seq)]);
|
|
write_fasta(&query_path, &vec![(query_name.clone(), query_seq)]);
|
|
@@ -176,6 +177,61 @@ pub fn lastz_remap(
|
|
|
Ok(())
|
|
Ok(())
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+pub fn cp_mod_tags(input_records: &Vec<bam::Record>, output_path: &str) -> anyhow::Result<()> {
|
|
|
|
|
+ // Read tags from input tags
|
|
|
|
|
+ let mut mm_hm = HashMap::new();
|
|
|
|
|
+ let mut ml_hm = HashMap::new();
|
|
|
|
|
+ for record in input_records {
|
|
|
|
|
+ // let record = record?;
|
|
|
|
|
+ let query_name = String::from_utf8(record.name().to_vec())?;
|
|
|
|
|
+ if let Some(TagValue::String(value, StringType::String)) = record.tags().get(b"MM") {
|
|
|
|
|
+ mm_hm.insert(query_name.clone(), value.to_vec());
|
|
|
|
|
+ }
|
|
|
|
|
+ if let Some(TagValue::IntArray(value)) = record.tags().get(b"ML") {
|
|
|
|
|
+ ml_hm.insert(query_name.clone(), value.raw().to_vec());
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // Read and keep records from destination bam
|
|
|
|
|
+ let reader =
|
|
|
|
|
+ bam::BamReader::from_path(output_path, 3).context(format!("Can't open {output_path}"))?;
|
|
|
|
|
+ let header = reader.header().to_owned();
|
|
|
|
|
+ let mut records = Vec::new();
|
|
|
|
|
+ for record in reader {
|
|
|
|
|
+ let record = record?;
|
|
|
|
|
+ records.push(record);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // Write records add tags into tmp file
|
|
|
|
|
+ let tmp_bam = format!("/tmp/{}.bam", Uuid::new_v4());
|
|
|
|
|
+ let mut bam_writer = bam::bam_writer::BamWriterBuilder::new();
|
|
|
|
|
+ let mut w = bam_writer.from_path(&tmp_bam, header)?;
|
|
|
|
|
+ for record in records.iter() {
|
|
|
|
|
+ let mut record = record.clone();
|
|
|
|
|
+ let record_name = String::from_utf8(record.name().to_vec())?;
|
|
|
|
|
+ let tags = record.tags_mut();
|
|
|
|
|
+ if let Some(mm) = mm_hm.get(&record_name) {
|
|
|
|
|
+ tags.push_string(b"MM", mm)
|
|
|
|
|
+ }
|
|
|
|
|
+ let tags = record.tags_mut();
|
|
|
|
|
+ if let Some(ml) = ml_hm.get(&record_name) {
|
|
|
|
|
+ let v = ml.as_bytes();
|
|
|
|
|
+ tags.push_array(b"ML", v);
|
|
|
|
|
+ }
|
|
|
|
|
+ w.write(&record)?;
|
|
|
|
|
+ }
|
|
|
|
|
+ w.finish()?;
|
|
|
|
|
+
|
|
|
|
|
+ // Remove destination bam, copy tmp bam to destination path and remove tmp bam
|
|
|
|
|
+ fs::remove_file(output_path).context(format!("Can't remove {output_path}"))?;
|
|
|
|
|
+ fs::copy(&tmp_bam, output_path).context(format!("Can't copy {tmp_bam} to {output_path}"))?;
|
|
|
|
|
+ fs::remove_file(tmp_bam)?;
|
|
|
|
|
+
|
|
|
|
|
+ // Index destination bam
|
|
|
|
|
+ duct::cmd!("samtools", "index", output_path).run()?;
|
|
|
|
|
+ Ok(())
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
pub fn write_fasta(contig_fa: &str, d: &Vec<(String, String)>) {
|
|
pub fn write_fasta(contig_fa: &str, d: &Vec<(String, String)>) {
|
|
|
let file = File::create(contig_fa).unwrap();
|
|
let file = File::create(contig_fa).unwrap();
|
|
|
let mut writer = noodles_fasta::writer::Builder::default().build_with_writer(file);
|
|
let mut writer = noodles_fasta::writer::Builder::default().build_with_writer(file);
|
|
@@ -541,22 +597,18 @@ impl FlyeAsm {
|
|
|
info!("Mapping input reads to {contig_id}");
|
|
info!("Mapping input reads to {contig_id}");
|
|
|
let new_bam = format!("{}/{contig_id}.bam", self.asm_dir);
|
|
let new_bam = format!("{}/{contig_id}.bam", self.asm_dir);
|
|
|
duct::cmd!("bwa", "index", contig_fa.clone()).run()?;
|
|
duct::cmd!("bwa", "index", contig_fa.clone()).run()?;
|
|
|
-
|
|
|
|
|
let input_fa = format!("{}/{}.fasta", self.asm_dir, self.input_id);
|
|
let input_fa = format!("{}/{}.fasta", self.asm_dir, self.input_id);
|
|
|
let bwa = format!("bwa mem {contig_fa} {input_fa}");
|
|
let bwa = format!("bwa mem {contig_fa} {input_fa}");
|
|
|
let samtools = "samtools sort /dev/stdin";
|
|
let samtools = "samtools sort /dev/stdin";
|
|
|
let pipe = format!("{bwa} | {samtools} > {new_bam}");
|
|
let pipe = format!("{bwa} | {samtools} > {new_bam}");
|
|
|
duct::cmd!("bash", "-c", pipe).run()?;
|
|
duct::cmd!("bash", "-c", pipe).run()?;
|
|
|
- duct::cmd!("samtools", "index", new_bam).run()?;
|
|
|
|
|
-
|
|
|
|
|
- // let modkit_pileup = format!("{}/{contig_id}_mod.bed", self.asm_dir);
|
|
|
|
|
- // lastz_remap(
|
|
|
|
|
- // &self.input_records,
|
|
|
|
|
- // &contig_fa,
|
|
|
|
|
- // contig.len() as u32,
|
|
|
|
|
- // &new_bam,
|
|
|
|
|
- // &modkit_pileup,
|
|
|
|
|
- // )?;
|
|
|
|
|
|
|
+
|
|
|
|
|
+ // Copy modified base tags to new bam
|
|
|
|
|
+ cp_mod_tags(&self.input_records, &new_bam)?;
|
|
|
|
|
+
|
|
|
|
|
+ // Run modkit
|
|
|
|
|
+ let modkit_pileup = format!("{}/{contig_id}_mod.bed", self.asm_dir);
|
|
|
|
|
+ duct::cmd!("modkit", "pileup", new_bam, modkit_pileup).run()?;
|
|
|
}
|
|
}
|
|
|
Ok(())
|
|
Ok(())
|
|
|
}
|
|
}
|
|
@@ -581,7 +633,7 @@ pub fn records_to_fasta(r: &Vec<bam::Record>, fa_path: &str) -> anyhow::Result<(
|
|
|
Ok(())
|
|
Ok(())
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-pub fn dir_flye(dir: &str) -> anyhow::Result<()> {
|
|
|
|
|
|
|
+pub fn dir_flye(dir: &str, force: bool) -> anyhow::Result<()> {
|
|
|
let pattern = format!("{}/*.bam", dir);
|
|
let pattern = format!("{}/*.bam", dir);
|
|
|
let re =
|
|
let re =
|
|
|
Regex::new(r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\.bam$").unwrap();
|
|
Regex::new(r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\.bam$").unwrap();
|
|
@@ -592,7 +644,7 @@ pub fn dir_flye(dir: &str) -> anyhow::Result<()> {
|
|
|
if let Some(file_name) = path.file_name().and_then(|name| name.to_str()) {
|
|
if let Some(file_name) = path.file_name().and_then(|name| name.to_str()) {
|
|
|
if re.is_match(file_name) {
|
|
if re.is_match(file_name) {
|
|
|
if let Some(input_id) = path.file_stem().and_then(|n| n.to_str()) {
|
|
if let Some(input_id) = path.file_stem().and_then(|n| n.to_str()) {
|
|
|
- if PathBuf::from(format!("{dir}/{input_id}_flye.bam")).exists() {
|
|
|
|
|
|
|
+ if PathBuf::from(format!("{dir}/{input_id}_flye.bam")).exists() && !force {
|
|
|
warn!("{input_id} already assembled: {}", igv_link(dir, input_id)?);
|
|
warn!("{input_id} already assembled: {}", igv_link(dir, input_id)?);
|
|
|
} else {
|
|
} else {
|
|
|
let f = FlyeAsm::new(dir, input_id)?;
|
|
let f = FlyeAsm::new(dir, input_id)?;
|
|
@@ -714,9 +766,9 @@ mod tests {
|
|
|
#[test]
|
|
#[test]
|
|
|
fn flye_d() -> anyhow::Result<()> {
|
|
fn flye_d() -> anyhow::Result<()> {
|
|
|
init();
|
|
init();
|
|
|
- let id = "BECERRA";
|
|
|
|
|
|
|
+ let id = "SALICETTO";
|
|
|
let res_dir = "/data/longreads_basic_pipe";
|
|
let res_dir = "/data/longreads_basic_pipe";
|
|
|
let asm_dir = format!("{res_dir}/{id}/diag/asm_contigs");
|
|
let asm_dir = format!("{res_dir}/{id}/diag/asm_contigs");
|
|
|
- dir_flye(&asm_dir)
|
|
|
|
|
|
|
+ dir_flye(&asm_dir, true)
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|