|
|
@@ -208,13 +208,21 @@ impl BwaAlign {
|
|
|
let mut all_ranges = Vec::new();
|
|
|
let (a, _) = self.aligner.align_read_pair(b"read_name", sequence.as_bytes(), &vec![b'2'; sequence.len()], String::new().as_bytes(), String::new().as_bytes());
|
|
|
|
|
|
- for record in a.iter() {
|
|
|
- let contig = self.bwa_tid_contig(record.tid() as usize);
|
|
|
- let res = ranges_from_cigar(record.pos() as i32 + 1, Cigar::new_from_str(&format!("{}", record.cigar())), record.is_reverse());
|
|
|
- res.into_iter().for_each(|(reference, query)| {
|
|
|
- all_ranges.push(((contig.clone(), reference.0, reference.1), query))
|
|
|
- });
|
|
|
+ if a.len() > 1 { // if multiple alignements;
|
|
|
+ for record in a.iter() {
|
|
|
+ let contig = self.bwa_tid_contig(record.tid() as usize);
|
|
|
+ let res = ranges_from_cigar(record.pos() as i32 + 1, Cigar::new_from_str(&format!("{}", record.cigar())), record.is_reverse());
|
|
|
+ res.into_iter().for_each(|(reference, query)| {
|
|
|
+ all_ranges.push(((contig.clone(), reference.0, reference.1), query))
|
|
|
+ });
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ let record = a.iter().next().unwrap();
|
|
|
+ // hgvs_from_cigar(record.pos() as i32 + 1, Cigar::new_from_str(&format!("{}", record.cigar())), record.is_reverse());
|
|
|
+ let r = find_deletion_insertion_ranges(record.pos() as u32 + 1, Cigar::new_from_str(&format!("{}", record.cigar())), record.is_reverse());
|
|
|
+ println!("{:?}", r);
|
|
|
}
|
|
|
+
|
|
|
all_ranges
|
|
|
}
|
|
|
pub fn bwa_tid_contig(&self, tid: usize) -> String {
|
|
|
@@ -304,11 +312,177 @@ pub fn format_seq(name: &str, sequence: &str, line_size: usize) -> String {
|
|
|
format!(">{name}\n{res}")
|
|
|
}
|
|
|
|
|
|
+fn find_deletion_insertion_ranges(start_position: u32, cigar: Cigar, is_rc: bool) -> (Vec<(u32, u32)>, Vec<(u32, u32)>) {
|
|
|
+ let mut cigar = if is_rc { cigar.expand().chars().rev().collect::<Vec<char>>() } else { cigar.expand().chars().collect::<Vec<char>>() };
|
|
|
+
|
|
|
+
|
|
|
+ // :)
|
|
|
+ let mut position = start_position;
|
|
|
+ let mut deletion_ranges = Vec::new();
|
|
|
+ let mut insertion_ranges = Vec::new();
|
|
|
+ let mut deletion_start = 0;
|
|
|
+ let mut insertion_start = 0;
|
|
|
+ let mut previous_operation = 'M';
|
|
|
+
|
|
|
+ for operation in cigar {
|
|
|
+ match operation {
|
|
|
+ 'M' => {
|
|
|
+ position += 1;
|
|
|
+ if previous_operation == 'D' && deletion_start != 0 {
|
|
|
+ let deletion_end = position - 1;
|
|
|
+ if deletion_end > deletion_start {
|
|
|
+ deletion_ranges.push((deletion_start, deletion_end));
|
|
|
+ }
|
|
|
+ deletion_start = 0;
|
|
|
+ } else if previous_operation == 'I' && insertion_start != 0 {
|
|
|
+ let insertion_end = position - 1;
|
|
|
+ if insertion_end > insertion_start {
|
|
|
+ insertion_ranges.push((insertion_start, insertion_end));
|
|
|
+ }
|
|
|
+ insertion_start = 0;
|
|
|
+ }
|
|
|
+ previous_operation = 'M';
|
|
|
+ }
|
|
|
+ 'D' => {
|
|
|
+ if deletion_start == 0 {
|
|
|
+ deletion_start = position;
|
|
|
+ }
|
|
|
+ position += 1; // Increment position for deletion
|
|
|
+ previous_operation = 'D';
|
|
|
+ }
|
|
|
+ 'I' => {
|
|
|
+ if insertion_start == 0 {
|
|
|
+ insertion_start = position - 1; // Store the previous reference position as the insertion start
|
|
|
+ }
|
|
|
+ previous_operation = 'I';
|
|
|
+ }
|
|
|
+ _ => {
|
|
|
+ // Handle other CIGAR operations if necessary
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
+ if deletion_start != 0 {
|
|
|
+ let deletion_end = position;
|
|
|
+ if deletion_end > deletion_start {
|
|
|
+ deletion_ranges.push((deletion_start, deletion_end));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if insertion_start != 0 {
|
|
|
+ let insertion_end = position - 1; // Store the previous reference position as the insertion end
|
|
|
+ if insertion_end > insertion_start {
|
|
|
+ insertion_ranges.push((insertion_start, insertion_end));
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ (deletion_ranges, insertion_ranges)
|
|
|
+}
|
|
|
|
|
|
-use bwa::{BwaAligner, BwaReference, BwaSettings, PairedEndStats};
|
|
|
|
|
|
|
|
|
+pub fn hgvs_from_cigar(ref_start: i32, cigar: Cigar, is_rc: bool) {
|
|
|
+ println!("{}", ref_start);
|
|
|
+ let cigar = if is_rc { cigar.expand().chars().rev().collect::<Vec<char>>() } else { cigar.expand().chars().collect::<Vec<char>>() };
|
|
|
+ println!("{:?}", cigar);
|
|
|
+ let mut ref_counter = 0;
|
|
|
+ let mut query_counter = 0;
|
|
|
+
|
|
|
+ let mut query_matches: Vec<i32> = Vec::new();
|
|
|
+ let mut ref_matches: Vec<i32> = Vec::new();
|
|
|
+
|
|
|
+ let mut insertions_pos = Vec::new();
|
|
|
+ let mut insertions_query_pos = Vec::new();
|
|
|
+ let mut deletions_pos = Vec::new();
|
|
|
+ for op in cigar.iter() {
|
|
|
+ match op {
|
|
|
+ 'M' | '=' => {
|
|
|
+ query_matches.push(query_counter);
|
|
|
+ ref_matches.push(ref_counter);
|
|
|
+
|
|
|
+ ref_counter += 1;
|
|
|
+ query_counter += 1;
|
|
|
+ },
|
|
|
+ 'S' | 'H' | 'I' => {
|
|
|
+ query_counter += 1;
|
|
|
+ if op == &'I' {
|
|
|
+ insertions_pos.push(ref_counter);
|
|
|
+ insertions_query_pos.push(query_counter);
|
|
|
+ }
|
|
|
+ },
|
|
|
+ 'D' | 'N' => {
|
|
|
+ ref_counter += 1;
|
|
|
+ if op == &'D' {
|
|
|
+ deletions_pos.push(ref_counter);
|
|
|
+ }
|
|
|
+ },
|
|
|
+ _ => panic!("Unknown cigar operand")
|
|
|
+ }
|
|
|
+ }
|
|
|
+ let mut ref_ranges: Vec<(String, i32, i32)> = Vec::new();
|
|
|
+ if insertions_pos.len() > 0 {
|
|
|
+ println!("insertions_pos {:?}", insertions_pos);
|
|
|
+ }
|
|
|
+
|
|
|
+ if deletions_pos.len() > 0 {
|
|
|
+ println!("deletions_pos {:?}", deletions_pos);
|
|
|
+ let mut range_start = None;
|
|
|
+ // let mut range_end = None;
|
|
|
+ let mut last = 0;
|
|
|
+ deletions_pos.iter().enumerate().for_each(|(i, &del_pos)| {
|
|
|
+ println!("{i}");
|
|
|
+ if let Some(pos) = range_start {
|
|
|
+ if del_pos == last + 1 && i + 1 != deletions_pos.len() {
|
|
|
+
|
|
|
+ } else {
|
|
|
+ ref_ranges.push(("del".to_string(), pos + ref_start - 1, last + ref_start + 1));
|
|
|
+ range_start = Some(pos);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ range_start = Some(del_pos);
|
|
|
+ }
|
|
|
+ last = del_pos;
|
|
|
+ });
|
|
|
+ }
|
|
|
+ println!("{:?}", ref_ranges);
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+fn generate_deletion_hgvs_notation(
|
|
|
+ seqname: &str,
|
|
|
+ deletion_ranges: &[(u32, u32)],
|
|
|
+) -> String {
|
|
|
+ let mut hgvs_notation = String::new();
|
|
|
+
|
|
|
+ for &(start, end) in deletion_ranges {
|
|
|
+ let deletion_start = start - 1;
|
|
|
+ let deletion_end = end + 1;
|
|
|
+ let deletion_notation = format!("{}:{}_{}", seqname, deletion_start, deletion_end);
|
|
|
+ hgvs_notation.push_str(&deletion_notation);
|
|
|
+ hgvs_notation.push(';');
|
|
|
+ }
|
|
|
+
|
|
|
+ hgvs_notation
|
|
|
+}
|
|
|
+
|
|
|
+fn generate_insertion_hgvs_notation(
|
|
|
+ seqname: &str,
|
|
|
+ insertion_ranges: &[(u32, u32)],
|
|
|
+ reference_sequence: &str,
|
|
|
+) -> String {
|
|
|
+ let mut hgvs_notation = String::new();
|
|
|
+
|
|
|
+ for &(start, _) in insertion_ranges {
|
|
|
+ let inserted_sequence = &reference_sequence[start as usize - 1..start as usize];
|
|
|
+ let insertion_notation = format!("{}:{}ins{}", seqname, start, inserted_sequence);
|
|
|
+ hgvs_notation.push_str(&insertion_notation);
|
|
|
+ hgvs_notation.push(';');
|
|
|
+ }
|
|
|
+
|
|
|
+ hgvs_notation
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+use bwa::{BwaAligner, BwaReference, BwaSettings, PairedEndStats};
|
|
|
#[cfg(test)]
|
|
|
mod tests {
|
|
|
use super::*;
|
|
|
@@ -378,4 +552,16 @@ mod tests {
|
|
|
vec![(("chr2".to_string(), 43453321, 43453131), (1, 191)), (("chr14".to_string(), 22908007, 22908123), (195, 311))]
|
|
|
);
|
|
|
}
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn deletion() {
|
|
|
+ let ref_path = "/home/thomas/NGS/ref/hg19.fa";
|
|
|
+
|
|
|
+ let sequence = "CTCGCTCGGCGGCCGCTCGGTCATCCTGTGGGCAGACAGACAAGCGGATGCCCGCTCTGACGACCGCCCCTGAC";
|
|
|
+
|
|
|
+ let aligner = BwaAlign::init(ref_path);
|
|
|
+ let res = aligner.get_ref_positions(&sequence);
|
|
|
+ println!("{:?}", res);
|
|
|
+
|
|
|
+ }
|
|
|
}
|