瀏覽代碼

full working

Thomas 1 年之前
父節點
當前提交
78d6576f89
共有 1 個文件被更改,包括 68 次插入16 次删除
  1. 68 16
      src/lib.rs

+ 68 - 16
src/lib.rs

@@ -1,6 +1,6 @@
 pub mod flye;
 pub mod flye;
 
 
-use anyhow::anyhow;
+use anyhow::{anyhow, Context};
 use bam::{
 use bam::{
     header::HeaderEntry,
     header::HeaderEntry,
     record::tags::{StringType, TagValue},
     record::tags::{StringType, TagValue},
@@ -126,6 +126,7 @@ pub fn lastz_remap(
         }
         }
 
 
         let query_path = format!("{}/{}.fa", &tmp_dir, query_name);
         let query_path = format!("{}/{}.fa", &tmp_dir, query_name);
+
         let query_seq = String::from_utf8(record.sequence().to_vec())?;
         let query_seq = String::from_utf8(record.sequence().to_vec())?;
 
 
         write_fasta(&query_path, &vec![(query_name.clone(), query_seq)]);
         write_fasta(&query_path, &vec![(query_name.clone(), query_seq)]);
@@ -176,6 +177,61 @@ pub fn lastz_remap(
     Ok(())
     Ok(())
 }
 }
 
 
+pub fn cp_mod_tags(input_records: &Vec<bam::Record>, output_path: &str) -> anyhow::Result<()> {
+    // Read tags from input tags
+    let mut mm_hm = HashMap::new();
+    let mut ml_hm = HashMap::new();
+    for record in input_records {
+        // let record = record?;
+        let query_name = String::from_utf8(record.name().to_vec())?;
+        if let Some(TagValue::String(value, StringType::String)) = record.tags().get(b"MM") {
+            mm_hm.insert(query_name.clone(), value.to_vec());
+        }
+        if let Some(TagValue::IntArray(value)) = record.tags().get(b"ML") {
+            ml_hm.insert(query_name.clone(), value.raw().to_vec());
+        }
+    }
+
+    // Read and keep records from destination bam
+    let reader =
+        bam::BamReader::from_path(output_path, 3).context(format!("Can't open {output_path}"))?;
+    let header = reader.header().to_owned();
+    let mut records = Vec::new();
+    for record in reader {
+        let record = record?;
+        records.push(record);
+    }
+
+    // Write records add tags into tmp file
+    let tmp_bam = format!("/tmp/{}.bam", Uuid::new_v4());
+    let mut bam_writer = bam::bam_writer::BamWriterBuilder::new();
+    let mut w = bam_writer.from_path(&tmp_bam, header)?;
+    for record in records.iter() {
+        let mut record = record.clone();
+        let record_name = String::from_utf8(record.name().to_vec())?;
+        let tags = record.tags_mut();
+        if let Some(mm) = mm_hm.get(&record_name) {
+            tags.push_string(b"MM", mm)
+        }
+        let tags = record.tags_mut();
+        if let Some(ml) = ml_hm.get(&record_name) {
+            let v = ml.as_bytes();
+            tags.push_array(b"ML", v);
+        }
+        w.write(&record)?;
+    }
+    w.finish()?;
+
+    // Remove destination bam, copy tmp bam to destination path and remove tmp bam
+    fs::remove_file(output_path).context(format!("Can't remove {output_path}"))?;
+    fs::copy(&tmp_bam, output_path).context(format!("Can't copy {tmp_bam} to {output_path}"))?;
+    fs::remove_file(tmp_bam)?;
+
+    // Index destination bam
+    duct::cmd!("samtools", "index", output_path).run()?;
+    Ok(())
+}
+
 pub fn write_fasta(contig_fa: &str, d: &Vec<(String, String)>) {
 pub fn write_fasta(contig_fa: &str, d: &Vec<(String, String)>) {
     let file = File::create(contig_fa).unwrap();
     let file = File::create(contig_fa).unwrap();
     let mut writer = noodles_fasta::writer::Builder::default().build_with_writer(file);
     let mut writer = noodles_fasta::writer::Builder::default().build_with_writer(file);
@@ -541,22 +597,18 @@ impl FlyeAsm {
             info!("Mapping  input reads to {contig_id}");
             info!("Mapping  input reads to {contig_id}");
             let new_bam = format!("{}/{contig_id}.bam", self.asm_dir);
             let new_bam = format!("{}/{contig_id}.bam", self.asm_dir);
             duct::cmd!("bwa", "index", contig_fa.clone()).run()?;
             duct::cmd!("bwa", "index", contig_fa.clone()).run()?;
-
             let input_fa = format!("{}/{}.fasta", self.asm_dir, self.input_id);
             let input_fa = format!("{}/{}.fasta", self.asm_dir, self.input_id);
             let bwa = format!("bwa mem {contig_fa} {input_fa}");
             let bwa = format!("bwa mem {contig_fa} {input_fa}");
             let samtools = "samtools sort /dev/stdin";
             let samtools = "samtools sort /dev/stdin";
             let pipe = format!("{bwa} | {samtools} > {new_bam}");
             let pipe = format!("{bwa} | {samtools} > {new_bam}");
             duct::cmd!("bash", "-c", pipe).run()?;
             duct::cmd!("bash", "-c", pipe).run()?;
-            duct::cmd!("samtools", "index", new_bam).run()?;
-
-            // let modkit_pileup = format!("{}/{contig_id}_mod.bed", self.asm_dir);
-            // lastz_remap(
-            //     &self.input_records,
-            //     &contig_fa,
-            //     contig.len() as u32,
-            //     &new_bam,
-            //     &modkit_pileup,
-            // )?;
+
+            // Copy modified base tags to new bam
+            cp_mod_tags(&self.input_records, &new_bam)?;
+
+            // Run modkit
+            let modkit_pileup = format!("{}/{contig_id}_mod.bed", self.asm_dir);
+            duct::cmd!("modkit", "pileup", new_bam, modkit_pileup).run()?;
         }
         }
         Ok(())
         Ok(())
     }
     }
@@ -581,7 +633,7 @@ pub fn records_to_fasta(r: &Vec<bam::Record>, fa_path: &str) -> anyhow::Result<(
     Ok(())
     Ok(())
 }
 }
 
 
-pub fn dir_flye(dir: &str) -> anyhow::Result<()> {
+pub fn dir_flye(dir: &str, force: bool) -> anyhow::Result<()> {
     let pattern = format!("{}/*.bam", dir);
     let pattern = format!("{}/*.bam", dir);
     let re =
     let re =
         Regex::new(r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\.bam$").unwrap();
         Regex::new(r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\.bam$").unwrap();
@@ -592,7 +644,7 @@ pub fn dir_flye(dir: &str) -> anyhow::Result<()> {
                 if let Some(file_name) = path.file_name().and_then(|name| name.to_str()) {
                 if let Some(file_name) = path.file_name().and_then(|name| name.to_str()) {
                     if re.is_match(file_name) {
                     if re.is_match(file_name) {
                         if let Some(input_id) = path.file_stem().and_then(|n| n.to_str()) {
                         if let Some(input_id) = path.file_stem().and_then(|n| n.to_str()) {
-                            if PathBuf::from(format!("{dir}/{input_id}_flye.bam")).exists() {
+                            if PathBuf::from(format!("{dir}/{input_id}_flye.bam")).exists() && !force {
                                 warn!("{input_id} already assembled: {}", igv_link(dir, input_id)?);
                                 warn!("{input_id} already assembled: {}", igv_link(dir, input_id)?);
                             } else {
                             } else {
                                 let f = FlyeAsm::new(dir, input_id)?;
                                 let f = FlyeAsm::new(dir, input_id)?;
@@ -714,9 +766,9 @@ mod tests {
     #[test]
     #[test]
     fn flye_d() -> anyhow::Result<()> {
     fn flye_d() -> anyhow::Result<()> {
         init();
         init();
-        let id = "BECERRA";
+        let id = "SALICETTO";
         let res_dir = "/data/longreads_basic_pipe";
         let res_dir = "/data/longreads_basic_pipe";
         let asm_dir = format!("{res_dir}/{id}/diag/asm_contigs");
         let asm_dir = format!("{res_dir}/{id}/diag/asm_contigs");
-        dir_flye(&asm_dir)
+        dir_flye(&asm_dir, true)
     }
     }
 }
 }