Ver código fonte

bwa_mem embeded

Thomas 2 anos atrás
pai
commit
bf05cb9e35
2 arquivos alterados com 71 adições e 7 exclusões
  1. 3 1
      Cargo.toml
  2. 68 6
      src/lib.rs

+ 3 - 1
Cargo.toml

@@ -6,4 +6,6 @@ edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-duct = "0.13.5"
+duct = "0.13.5"
+bwa = { git = "https://github.com/10XGenomics/rust-bwa" }
+rust-htslib = { version = ">=0.35.2", default-features = false, features = ["serde_feature"] }

+ 68 - 6
src/lib.rs

@@ -19,6 +19,7 @@
 // SOFTWARE.
 
 use duct::cmd;
+use rust_htslib::bam::Header;
 
 pub fn bwa_aln(ref_path: &str, query_sequence: &str) -> Vec<Sam> {
     let stdout = cmd!("echo", "-e", format!("\">sequence_to_query\n{}\"", query_sequence))
@@ -157,9 +158,6 @@ impl Alignments {
             ranges: res_ranges
         }
     }
-    pub fn get_ranges(&self) {
-
-    }
 }
 
 pub fn revcomp(dna: &str) -> String {
@@ -186,6 +184,44 @@ pub fn switch_base(c: char) -> char {
     }
 }
 
+pub struct BwaAlign {
+    aligner: BwaAligner,
+    header: Header
+}
+
+impl BwaAlign {
+    pub fn init(ref_path: &str) -> BwaAlign {
+        let bwa_ref = BwaReference::open(ref_path).unwrap();
+        let header = bwa_ref.create_bam_header();
+        
+        BwaAlign { 
+            aligner: BwaAligner::new(
+                bwa_ref,
+                BwaSettings::new(),
+                PairedEndStats::default()
+            ),
+            header
+        }
+    }
+
+    pub fn get_ref_positions(&self, sequence: &str) -> Vec<((std::string::String, i32, i32), (i32, i32))> {
+        let mut all_ranges = Vec::new();
+        let (a, _) = self.aligner.align_read_pair(b"read_name", sequence.as_bytes(), &vec![b'2'; sequence.len()], String::new().as_bytes(), String::new().as_bytes());
+
+        for record in a.iter() {
+            let contig = self.bwa_tid_contig(record.tid() as usize);
+            let res = ranges_from_cigar(record.pos() as i32 + 1, Cigar::new_from_str(&format!("{}", record.cigar())), record.is_reverse());
+            res.into_iter().for_each(|(reference, query)| {
+                all_ranges.push(((contig.clone(), reference.0, reference.1), query))
+            });
+        }
+        all_ranges
+    }
+    pub fn bwa_tid_contig(&self, tid: usize) -> String {
+        self.header.to_hashmap().get("SQ").unwrap().get(tid).unwrap().get("SN").unwrap().to_owned()
+    }
+}
+
 /// return [start, stop] 1-based positions relative to reference and query
 pub fn ranges_from_cigar(ref_start: i32, cigar: Cigar, is_rc: bool) -> Vec<((i32, i32), (i32, i32))> {
     let cigar = if is_rc { cigar.expand().chars().rev().collect::<Vec<char>>() } else { cigar.expand().chars().collect::<Vec<char>>() };
@@ -268,6 +304,11 @@ pub fn format_seq(name: &str, sequence: &str, line_size: usize) -> String {
     format!(">{name}\n{res}")
 }
 
+
+
+use bwa::{BwaAligner, BwaReference, BwaSettings, PairedEndStats};
+
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -289,6 +330,7 @@ mod tests {
         .replace("\n", "")
         .replace(" ", "");
 
+
         let alns = Alignments::to_reference(&sequence, ref_path);
         assert_eq!(
             alns.ranges,
@@ -309,11 +351,31 @@ mod tests {
         .replace("\n", "")
         .replace(" ", "");
 
-        let alns = Alignments::to_reference(&sequence, ref_path);
+        //2011N160703-HAOAA 1
+        let sequence_2 = "
+        CCTCCCGCCCTGCCTTTCAGGTCGGGAAAGTCCCGGGGTTTGCAAAAGAGTGTCCGAGCG
+        CCCTGGAGGCGGGGAGGGCGGCAAGGAGGGCGCCGGTGTCGCGGTTGAGTTTCTCCACTG
+        CCGACCGCGGCCACGCTGCCCGGGGCTTCCCGGACAGGCTTCGCGCCGCCCACCTCGGCA
+        GCCGGGGCGGA
+        TGG
+        CCACACAGGTTGGAGTGCATTAAGCCTTTGTCCAAAAACACCCAGC
+        CGTGACCCGCTATGTATGTCTCAGCATTGGGAAGAGTCCTCTGAGTGTCATGGGAAAATA
+        ATATATGAGTT"
+        .replace("\n", "")
+        .replace(" ", "");
+
+        let aligner = BwaAlign::init(ref_path);
+        let res_1 = aligner.get_ref_positions(&sequence);
+        let res_2 = aligner.get_ref_positions(&sequence_2);
         
         assert_eq!(
-            alns.ranges,
-            vec![vec![(("chr14".to_string(), 22908232, 22908007), (1, 226))], vec![(("chr2".to_string(), 43453131, 43453227), (230, 326))]]
+            res_1,
+            vec![(("chr14".to_string(), 22908232, 22908007), (1, 226)), (("chr2".to_string(), 43453131, 43453227), (230, 326))]
+        );
+
+        assert_eq!(
+            res_2,
+            vec![(("chr2".to_string(), 43453321, 43453131), (1, 191)), (("chr14".to_string(), 22908007, 22908123), (195, 311))]
         );
     }
 }