Browse Source

-lastz remap +bwa mem workin

Thomas 1 năm trước cách đây
mục cha
commit
c76f4d7a32
3 tập tin đã thay đổi với 95 bổ sung24 xóa
  1. 25 0
      Cargo.lock
  2. 1 0
      Cargo.toml
  3. 69 24
      src/lib.rs

+ 25 - 0
Cargo.lock

@@ -2013,6 +2013,7 @@ dependencies = [
  "noodles-fasta 0.41.0",
  "noodles-fastq",
  "pandora_lib_blastn",
+ "pandora_lib_igv",
  "petgraph",
  "regex",
  "rust-htslib 0.46.0",
@@ -2035,6 +2036,19 @@ dependencies = [
  "thiserror",
 ]
 
+[[package]]
+name = "pandora_lib_igv"
+version = "0.1.0"
+source = "git+https://git.t0m4.fr/Thomas/pandora_lib_igv.git#268f631dbe94132b8291f80864a8cbcef12e76ad"
+dependencies = [
+ "anyhow",
+ "base64",
+ "flate2",
+ "serde",
+ "serde_json",
+ "smart-default",
+]
+
 [[package]]
 name = "parking_lot"
 version = "0.12.3"
@@ -2760,6 +2774,17 @@ version = "1.13.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
 
+[[package]]
+name = "smart-default"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eb01866308440fc64d6c44d9e86c5cc17adfe33c4d6eed55da9145044d0ffc1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.72",
+]
+
 [[package]]
 name = "socket2"
 version = "0.5.7"

+ 1 - 0
Cargo.toml

@@ -13,6 +13,7 @@ noodles-fastq = "0.13.0"
 rust-htslib = "0.46.0"
 uuid = { version = "1.10.0", features = ["v4"] }
 pandora_lib_blastn = { git = "https://git.t0m4.fr/Thomas/pandora_lib_blastn.git" }
+pandora_lib_igv = { git = "https://git.t0m4.fr/Thomas/pandora_lib_igv.git" }
 nom = "7.1.3"
 petgraph = "0.6.5"
 bam = "0.1.4"

+ 69 - 24
src/lib.rs

@@ -10,6 +10,7 @@ use flye::{read_flye_coverage, run_flye};
 use log::{info, warn};
 use nom::AsBytes;
 use pandora_lib_blastn::BlastResult;
+use pandora_lib_igv::{BamTrack, BedTrack, Track};
 use petgraph::{
     algo::dijkstra,
     data::{Build, DataMap},
@@ -92,15 +93,15 @@ pub fn lastz_two_by_two(records: &[String], best: bool) -> anyhow::Result<Vec<Bl
 
 pub fn lastz_bam(query_path: &str, subject_path: &str) -> anyhow::Result<Vec<bam::Record>> {
     let lastz_bin = "lastz";
-    let pipe =
-        format!("{lastz_bin} --format=softsam {query_path} {subject_path} | samtools view -b");
+    let pipe = format!("{lastz_bin} --format=softsam {query_path} {subject_path} | samtools sort");
     let out = duct::cmd!("bash", "-c", pipe).reader()?;
     let reader = bam::BamReader::from_stream(out, 4)?;
     Ok(reader.into_iter().filter_map(anyhow::Result::ok).collect())
 }
 
 pub fn lastz_remap(
-    bam_path: &str,
+    // bam_path: &str,
+    input_records: &Vec<bam::Record>,
     ref_fa: &str,
     ref_len: u32,
     new_bam: &str,
@@ -113,9 +114,9 @@ pub fn lastz_remap(
     let mut records = Vec::new();
     let mut mm_hm = HashMap::new();
     let mut ml_hm = HashMap::new();
-    let reader = bam::BamReader::from_path(bam_path, 4).unwrap();
-    for record in reader {
-        let record = record?;
+    // let reader = bam::BamReader::from_path(bam_path, 4).unwrap();
+    for record in input_records {
+        // let record = record?;
         let query_name = String::from_utf8(record.name().to_vec())?;
         if let Some(TagValue::String(value, StringType::String)) = record.tags().get(b"MM") {
             mm_hm.insert(query_name.clone(), value.to_vec());
@@ -465,9 +466,10 @@ impl FlyeAsm {
         let tmp_dir = format!("/tmp/ass_{}", uuid::Uuid::new_v4());
         info!("Creating tmp directory {tmp_dir}");
         fs::create_dir(&tmp_dir).unwrap();
-        let input_fa = format!("{tmp_dir}/input.fa");
+        let input_fa = format!("{}/{}.fasta", self.asm_dir, self.input_id);
 
-        records_to_fasta(&self.input_records, &input_fa)?;
+        // let input_fa = format!("{tmp_dir}/input.fa");
+        // records_to_fasta(&self.input_records, &input_fa)?;
 
         let flye_tmp_dir = format!("{tmp_dir}/flye");
         fs::create_dir(&flye_tmp_dir).unwrap();
@@ -493,7 +495,7 @@ impl FlyeAsm {
                 let seq = record.sequence().as_ref();
                 let seq = String::from_utf8(seq.to_vec()).unwrap();
                 let seq: String = seq[s..e].into();
-                contigs.push(seq.clone());
+                contigs.push(seq);
             }
             self.contigs = Some(contigs);
         } else {
@@ -538,14 +540,23 @@ impl FlyeAsm {
             // Remaping input bam to contig
             info!("Mapping  input reads to {contig_id}");
             let new_bam = format!("{}/{contig_id}.bam", self.asm_dir);
-            let modkit_pileup = format!("{}/{contig_id}_mod.bed", self.asm_dir);
-            lastz_remap(
-                &self.input_bam,
-                &contig_fa,
-                contig.len() as u32,
-                &new_bam,
-                &modkit_pileup,
-            )?;
+            duct::cmd!("bwa", "index", contig_fa.clone()).run()?;
+
+            let input_fa = format!("{}/{}.fasta", self.asm_dir, self.input_id);
+            let bwa = format!("bwa mem {contig_fa} {input_fa}");
+            let samtools = "samtools sort /dev/stdin";
+            let pipe = format!("{bwa} | {samtools} > {new_bam}");
+            duct::cmd!("bash", "-c", pipe).run()?;
+            duct::cmd!("samtools", "index", new_bam).run()?;
+
+            // let modkit_pileup = format!("{}/{contig_id}_mod.bed", self.asm_dir);
+            // lastz_remap(
+            //     &self.input_records,
+            //     &contig_fa,
+            //     contig.len() as u32,
+            //     &new_bam,
+            //     &modkit_pileup,
+            // )?;
         }
         Ok(())
     }
@@ -572,28 +583,29 @@ pub fn records_to_fasta(r: &Vec<bam::Record>, fa_path: &str) -> anyhow::Result<(
 
 pub fn dir_flye(dir: &str) -> anyhow::Result<()> {
     let pattern = format!("{}/*.bam", dir);
-    let re = Regex::new(r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\.bam$")
-        .unwrap();
+    let re =
+        Regex::new(r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\.bam$").unwrap();
 
     for entry in glob::glob(&pattern).expect("Failed to read glob pattern") {
         match entry {
             Ok(path) => {
                 if let Some(file_name) = path.file_name().and_then(|name| name.to_str()) {
                     if re.is_match(file_name) {
-                        if let Some(input_id ) = path.file_stem().and_then(|n| n.to_str()) {
+                        if let Some(input_id) = path.file_stem().and_then(|n| n.to_str()) {
                             if PathBuf::from(format!("{dir}/{input_id}_flye.bam")).exists() {
-                                warn!("{input_id} already assembled");
+                                warn!("{input_id} already assembled: {}", igv_link(dir, input_id)?);
                             } else {
                                 let f = FlyeAsm::new(dir, input_id)?;
                                 if let Ok(f) = f.assemble() {
                                     match f.save() {
-                                        Ok(_) => info!("✅"),
+                                        Ok(_) => {
+                                            info!("✅ Assembled: {}", igv_link(dir, input_id)?);
+                                        }
                                         Err(e) => warn!("❌ {e}"),
                                     }
                                 }
                             }
                         }
-                        
                     }
                 }
             }
@@ -603,6 +615,37 @@ pub fn dir_flye(dir: &str) -> anyhow::Result<()> {
     Ok(())
 }
 
+pub fn igv_link(dir: &str, contig_id: &str) -> anyhow::Result<String> {
+    let contig_fa = format!("{dir}/{contig_id}_flye.fa");
+    let bam_track = Track::Bam(BamTrack::new(
+        "Assembled reads",
+        &format!("{dir}/{contig_id}_flye.bam"),
+    ));
+    let bed_track = Track::Bed(BedTrack::new(
+        "Blastn",
+        &format!("{dir}/{contig_id}_flye.bed"),
+    ));
+    let mod_track = Track::Bed(BedTrack::new(
+        "Modified bases",
+        &format!("{dir}/{contig_id}_flye_mod.bed"),
+    ));
+
+    let reference = pandora_lib_igv::ReferenceValues {
+        id: format!("{contig_id}_flye.fa"),
+        name: format!("{contig_id}_flye.fa"),
+        index_url: format!("{contig_fa}.fai"),
+        fasta_url: contig_fa,
+        ..Default::default()
+    };
+    let igv = pandora_lib_igv::Session::default()
+        .with_reference(reference)
+        .add_track(bam_track)?
+        .add_track(bed_track)?
+        .add_track(mod_track)?;
+    let link = igv.link("http://store-desktop.local/igv/")?;
+    Ok(link)
+}
+
 #[cfg(test)]
 mod tests {
     use std::fs;
@@ -663,13 +706,15 @@ mod tests {
         let asm_dir = format!("{res_dir}/{id}/diag/asm_contigs");
 
         FlyeAsm::new(&asm_dir, input_id)?.assemble()?.save()?;
+        let l = igv_link(&asm_dir, input_id)?;
+        info!("{l}");
         Ok(())
     }
 
     #[test]
     fn flye_d() -> anyhow::Result<()> {
         init();
-        let id = "SALICETTO";
+        let id = "BECERRA";
         let res_dir = "/data/longreads_basic_pipe";
         let asm_dir = format!("{res_dir}/{id}/diag/asm_contigs");
         dir_flye(&asm_dir)