|
@@ -19,6 +19,7 @@
|
|
|
// SOFTWARE.
|
|
// SOFTWARE.
|
|
|
|
|
|
|
|
use duct::cmd;
|
|
use duct::cmd;
|
|
|
|
|
+use rust_htslib::bam::Header;
|
|
|
|
|
|
|
|
pub fn bwa_aln(ref_path: &str, query_sequence: &str) -> Vec<Sam> {
|
|
pub fn bwa_aln(ref_path: &str, query_sequence: &str) -> Vec<Sam> {
|
|
|
let stdout = cmd!("echo", "-e", format!("\">sequence_to_query\n{}\"", query_sequence))
|
|
let stdout = cmd!("echo", "-e", format!("\">sequence_to_query\n{}\"", query_sequence))
|
|
@@ -157,9 +158,6 @@ impl Alignments {
|
|
|
ranges: res_ranges
|
|
ranges: res_ranges
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
- pub fn get_ranges(&self) {
|
|
|
|
|
-
|
|
|
|
|
- }
|
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
pub fn revcomp(dna: &str) -> String {
|
|
pub fn revcomp(dna: &str) -> String {
|
|
@@ -186,6 +184,44 @@ pub fn switch_base(c: char) -> char {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+pub struct BwaAlign {
|
|
|
|
|
+ aligner: BwaAligner,
|
|
|
|
|
+ header: Header
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+impl BwaAlign {
|
|
|
|
|
+ pub fn init(ref_path: &str) -> BwaAlign {
|
|
|
|
|
+ let bwa_ref = BwaReference::open(ref_path).unwrap();
|
|
|
|
|
+ let header = bwa_ref.create_bam_header();
|
|
|
|
|
+
|
|
|
|
|
+ BwaAlign {
|
|
|
|
|
+ aligner: BwaAligner::new(
|
|
|
|
|
+ bwa_ref,
|
|
|
|
|
+ BwaSettings::new(),
|
|
|
|
|
+ PairedEndStats::default()
|
|
|
|
|
+ ),
|
|
|
|
|
+ header
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ pub fn get_ref_positions(&self, sequence: &str) -> Vec<((std::string::String, i32, i32), (i32, i32))> {
|
|
|
|
|
+ let mut all_ranges = Vec::new();
|
|
|
|
|
+ let (a, _) = self.aligner.align_read_pair(b"read_name", sequence.as_bytes(), &vec![b'2'; sequence.len()], String::new().as_bytes(), String::new().as_bytes());
|
|
|
|
|
+
|
|
|
|
|
+ for record in a.iter() {
|
|
|
|
|
+ let contig = self.bwa_tid_contig(record.tid() as usize);
|
|
|
|
|
+ let res = ranges_from_cigar(record.pos() as i32 + 1, Cigar::new_from_str(&format!("{}", record.cigar())), record.is_reverse());
|
|
|
|
|
+ res.into_iter().for_each(|(reference, query)| {
|
|
|
|
|
+ all_ranges.push(((contig.clone(), reference.0, reference.1), query))
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+ all_ranges
|
|
|
|
|
+ }
|
|
|
|
|
+ pub fn bwa_tid_contig(&self, tid: usize) -> String {
|
|
|
|
|
+ self.header.to_hashmap().get("SQ").unwrap().get(tid).unwrap().get("SN").unwrap().to_owned()
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
/// return [start, stop] 1-based positions relative to reference and query
|
|
/// return [start, stop] 1-based positions relative to reference and query
|
|
|
pub fn ranges_from_cigar(ref_start: i32, cigar: Cigar, is_rc: bool) -> Vec<((i32, i32), (i32, i32))> {
|
|
pub fn ranges_from_cigar(ref_start: i32, cigar: Cigar, is_rc: bool) -> Vec<((i32, i32), (i32, i32))> {
|
|
|
let cigar = if is_rc { cigar.expand().chars().rev().collect::<Vec<char>>() } else { cigar.expand().chars().collect::<Vec<char>>() };
|
|
let cigar = if is_rc { cigar.expand().chars().rev().collect::<Vec<char>>() } else { cigar.expand().chars().collect::<Vec<char>>() };
|
|
@@ -268,6 +304,11 @@ pub fn format_seq(name: &str, sequence: &str, line_size: usize) -> String {
|
|
|
format!(">{name}\n{res}")
|
|
format!(">{name}\n{res}")
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+use bwa::{BwaAligner, BwaReference, BwaSettings, PairedEndStats};
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
#[cfg(test)]
|
|
#[cfg(test)]
|
|
|
mod tests {
|
|
mod tests {
|
|
|
use super::*;
|
|
use super::*;
|
|
@@ -289,6 +330,7 @@ mod tests {
|
|
|
.replace("\n", "")
|
|
.replace("\n", "")
|
|
|
.replace(" ", "");
|
|
.replace(" ", "");
|
|
|
|
|
|
|
|
|
|
+
|
|
|
let alns = Alignments::to_reference(&sequence, ref_path);
|
|
let alns = Alignments::to_reference(&sequence, ref_path);
|
|
|
assert_eq!(
|
|
assert_eq!(
|
|
|
alns.ranges,
|
|
alns.ranges,
|
|
@@ -309,11 +351,31 @@ mod tests {
|
|
|
.replace("\n", "")
|
|
.replace("\n", "")
|
|
|
.replace(" ", "");
|
|
.replace(" ", "");
|
|
|
|
|
|
|
|
- let alns = Alignments::to_reference(&sequence, ref_path);
|
|
|
|
|
|
|
+ //2011N160703-HAOAA 1
|
|
|
|
|
+ let sequence_2 = "
|
|
|
|
|
+ CCTCCCGCCCTGCCTTTCAGGTCGGGAAAGTCCCGGGGTTTGCAAAAGAGTGTCCGAGCG
|
|
|
|
|
+ CCCTGGAGGCGGGGAGGGCGGCAAGGAGGGCGCCGGTGTCGCGGTTGAGTTTCTCCACTG
|
|
|
|
|
+ CCGACCGCGGCCACGCTGCCCGGGGCTTCCCGGACAGGCTTCGCGCCGCCCACCTCGGCA
|
|
|
|
|
+ GCCGGGGCGGA
|
|
|
|
|
+ TGG
|
|
|
|
|
+ CCACACAGGTTGGAGTGCATTAAGCCTTTGTCCAAAAACACCCAGC
|
|
|
|
|
+ CGTGACCCGCTATGTATGTCTCAGCATTGGGAAGAGTCCTCTGAGTGTCATGGGAAAATA
|
|
|
|
|
+ ATATATGAGTT"
|
|
|
|
|
+ .replace("\n", "")
|
|
|
|
|
+ .replace(" ", "");
|
|
|
|
|
+
|
|
|
|
|
+ let aligner = BwaAlign::init(ref_path);
|
|
|
|
|
+ let res_1 = aligner.get_ref_positions(&sequence);
|
|
|
|
|
+ let res_2 = aligner.get_ref_positions(&sequence_2);
|
|
|
|
|
|
|
|
assert_eq!(
|
|
assert_eq!(
|
|
|
- alns.ranges,
|
|
|
|
|
- vec![vec![(("chr14".to_string(), 22908232, 22908007), (1, 226))], vec![(("chr2".to_string(), 43453131, 43453227), (230, 326))]]
|
|
|
|
|
|
|
+ res_1,
|
|
|
|
|
+ vec![(("chr14".to_string(), 22908232, 22908007), (1, 226)), (("chr2".to_string(), 43453131, 43453227), (230, 326))]
|
|
|
|
|
+ );
|
|
|
|
|
+
|
|
|
|
|
+ assert_eq!(
|
|
|
|
|
+ res_2,
|
|
|
|
|
+ vec![(("chr2".to_string(), 43453321, 43453131), (1, 191)), (("chr14".to_string(), 22908007, 22908123), (195, 311))]
|
|
|
);
|
|
);
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|