Thomas 2 năm trước cách đây
mục cha
commit
674679c94d
1 tập tin đã thay đổi với 193 bổ sung7 xóa
  1. 193 7
      src/lib.rs

+ 193 - 7
src/lib.rs

@@ -208,13 +208,21 @@ impl BwaAlign {
         let mut all_ranges = Vec::new();
         let (a, _) = self.aligner.align_read_pair(b"read_name", sequence.as_bytes(), &vec![b'2'; sequence.len()], String::new().as_bytes(), String::new().as_bytes());
 
-        for record in a.iter() {
-            let contig = self.bwa_tid_contig(record.tid() as usize);
-            let res = ranges_from_cigar(record.pos() as i32 + 1, Cigar::new_from_str(&format!("{}", record.cigar())), record.is_reverse());
-            res.into_iter().for_each(|(reference, query)| {
-                all_ranges.push(((contig.clone(), reference.0, reference.1), query))
-            });
+        if a.len() > 1 { // if multiple alignements;
+            for record in a.iter() {
+                let contig = self.bwa_tid_contig(record.tid() as usize);
+                let res = ranges_from_cigar(record.pos() as i32 + 1, Cigar::new_from_str(&format!("{}", record.cigar())), record.is_reverse());
+                res.into_iter().for_each(|(reference, query)| {
+                    all_ranges.push(((contig.clone(), reference.0, reference.1), query))
+                });
+            }
+        } else {
+            let record = a.iter().next().unwrap();
+            // hgvs_from_cigar(record.pos() as i32 + 1, Cigar::new_from_str(&format!("{}", record.cigar())), record.is_reverse());
+            let r = find_deletion_insertion_ranges(record.pos() as u32 + 1, Cigar::new_from_str(&format!("{}", record.cigar())), record.is_reverse());
+            println!("{:?}", r);
         }
+
         all_ranges
     }
     pub fn bwa_tid_contig(&self, tid: usize) -> String {
@@ -304,11 +312,177 @@ pub fn format_seq(name: &str, sequence: &str, line_size: usize) -> String {
     format!(">{name}\n{res}")
 }
 
+fn find_deletion_insertion_ranges(start_position: u32, cigar: Cigar, is_rc: bool) -> (Vec<(u32, u32)>, Vec<(u32, u32)>) {
+    let mut cigar = if is_rc { cigar.expand().chars().rev().collect::<Vec<char>>() } else { cigar.expand().chars().collect::<Vec<char>>() };
+
+
+    // :)
+    let mut position = start_position;
+    let mut deletion_ranges = Vec::new();
+    let mut insertion_ranges = Vec::new();
+    let mut deletion_start = 0;
+    let mut insertion_start = 0;
+    let mut previous_operation = 'M';
+
+    for operation in cigar {
+        match operation {
+            'M' => {
+                position += 1;
+                if previous_operation == 'D' && deletion_start != 0 {
+                    let deletion_end = position - 1;
+                    if deletion_end > deletion_start {
+                        deletion_ranges.push((deletion_start, deletion_end));
+                    }
+                    deletion_start = 0;
+                } else if previous_operation == 'I' && insertion_start != 0 {
+                    let insertion_end = position - 1;
+                    if insertion_end > insertion_start {
+                        insertion_ranges.push((insertion_start, insertion_end));
+                    }
+                    insertion_start = 0;
+                }
+                previous_operation = 'M';
+            }
+            'D' => {
+                if deletion_start == 0 {
+                    deletion_start = position;
+                }
+                position += 1; // Increment position for deletion
+                previous_operation = 'D';
+            }
+            'I' => {
+                if insertion_start == 0 {
+                    insertion_start = position - 1; // Store the previous reference position as the insertion start
+                }
+                previous_operation = 'I';
+            }
+            _ => {
+                // Handle other CIGAR operations if necessary
+            }
+        }
+    }
 
+    if deletion_start != 0 {
+        let deletion_end = position;
+        if deletion_end > deletion_start {
+            deletion_ranges.push((deletion_start, deletion_end));
+        }
+    }
+    if insertion_start != 0 {
+        let insertion_end = position - 1; // Store the previous reference position as the insertion end
+        if insertion_end > insertion_start {
+            insertion_ranges.push((insertion_start, insertion_end));
+        }
+    }
+
+    (deletion_ranges, insertion_ranges)
+}
 
-use bwa::{BwaAligner, BwaReference, BwaSettings, PairedEndStats};
 
 
+pub fn hgvs_from_cigar(ref_start: i32, cigar: Cigar, is_rc: bool) {
+    println!("{}", ref_start);
+    let cigar = if is_rc { cigar.expand().chars().rev().collect::<Vec<char>>() } else { cigar.expand().chars().collect::<Vec<char>>() };
+    println!("{:?}", cigar);
+    let mut ref_counter = 0;
+    let mut query_counter = 0;
+    
+    let mut query_matches: Vec<i32> = Vec::new();
+    let mut ref_matches: Vec<i32> = Vec::new();
+
+    let mut insertions_pos = Vec::new();
+    let mut insertions_query_pos = Vec::new();
+    let mut deletions_pos = Vec::new();
+    for op in cigar.iter() {
+        match op {
+            'M' | '=' => {
+                query_matches.push(query_counter);
+                ref_matches.push(ref_counter);
+                
+                ref_counter   += 1;
+                query_counter += 1;
+            },
+            'S' | 'H' | 'I' => {
+                query_counter += 1;
+                if op == &'I' {
+                    insertions_pos.push(ref_counter);
+                    insertions_query_pos.push(query_counter);
+                }
+            },
+            'D' | 'N' => {
+                ref_counter += 1;
+                if op == &'D' {
+                    deletions_pos.push(ref_counter);
+                }
+            },
+            _ => panic!("Unknown cigar operand")
+        }
+    }
+    let mut ref_ranges: Vec<(String, i32, i32)> = Vec::new();
+    if insertions_pos.len() > 0 {
+        println!("insertions_pos {:?}", insertions_pos);
+    }
+    
+    if deletions_pos.len() > 0 {
+        println!("deletions_pos {:?}", deletions_pos);
+        let mut range_start = None;
+        // let mut range_end = None;
+        let mut last = 0;
+        deletions_pos.iter().enumerate().for_each(|(i, &del_pos)| {
+            println!("{i}");
+            if let Some(pos) = range_start {
+                if del_pos == last + 1 && i + 1 != deletions_pos.len() {
+                    
+                } else {
+                    ref_ranges.push(("del".to_string(), pos + ref_start - 1, last + ref_start + 1));
+                    range_start = Some(pos);
+                }
+            } else {
+                range_start = Some(del_pos);
+            }
+            last = del_pos;
+        });
+    }
+    println!("{:?}", ref_ranges);
+    
+}
+
+fn generate_deletion_hgvs_notation(
+    seqname: &str,
+    deletion_ranges: &[(u32, u32)],
+) -> String {
+    let mut hgvs_notation = String::new();
+
+    for &(start, end) in deletion_ranges {
+        let deletion_start = start - 1;
+        let deletion_end = end + 1;
+        let deletion_notation = format!("{}:{}_{}", seqname, deletion_start, deletion_end);
+        hgvs_notation.push_str(&deletion_notation);
+        hgvs_notation.push(';');
+    }
+
+    hgvs_notation
+}
+
+fn generate_insertion_hgvs_notation(
+    seqname: &str,
+    insertion_ranges: &[(u32, u32)],
+    reference_sequence: &str,
+) -> String {
+    let mut hgvs_notation = String::new();
+
+    for &(start, _) in insertion_ranges {
+        let inserted_sequence = &reference_sequence[start as usize - 1..start as usize];
+        let insertion_notation = format!("{}:{}ins{}", seqname, start, inserted_sequence);
+        hgvs_notation.push_str(&insertion_notation);
+        hgvs_notation.push(';');
+    }
+
+    hgvs_notation
+}
+
+
+use bwa::{BwaAligner, BwaReference, BwaSettings, PairedEndStats};
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -378,4 +552,16 @@ mod tests {
             vec![(("chr2".to_string(), 43453321, 43453131), (1, 191)), (("chr14".to_string(), 22908007, 22908123), (195, 311))]
         );
     }
+
+    #[test]
+    fn deletion() {
+        let ref_path = "/home/thomas/NGS/ref/hg19.fa";
+
+        let sequence = "CTCGCTCGGCGGCCGCTCGGTCATCCTGTGGGCAGACAGACAAGCGGATGCCCGCTCTGACGACCGCCCCTGAC";
+
+        let aligner = BwaAlign::init(ref_path);
+        let res = aligner.get_ref_positions(&sequence);
+        println!("{:?}", res);
+
+    }
 }