Thomas há 2 anos atrás
pai
commit
21e1896330
1 ficheiros alterados com 55 adições e 26 exclusões
  1. 55 26
      src/main.rs

+ 55 - 26
src/main.rs

@@ -26,17 +26,18 @@ fn main() {
         println!("cigar len: {:?}", e.cigar.len());
         println!("pos: {}:{}", e.ref_name, e.pos);
         if is_reverse(e.flag) {
-            println!("{:?}", revcomp(&e.seq));
-            println!("{:?}", e.cigar.expand());
+            // println!("{:?}", &e.seq);
+            println!("rc {:?}", revcomp(&e.seq));
+            println!("{:?}", e.cigar.expand().chars().rev().collect::<String>());
+
+            // println!("{:?}", e.cigar.expand());
 
-            // println!("{:?}", e.cigar.expand().chars().rev().collect::<String>());
         } else {
             println!("{:?}", e.seq);
             println!("{:?}", e.cigar.expand());
         }
         
         println!("-------------------------------")
-        // println!("{:?}", e.seq.len() == e.cigar.len() as usize);
     });
 }
 
@@ -45,14 +46,27 @@ pub fn bwa_aln(fasta_ref_path: &str, sequence: &str) -> Vec<Sam> {
     .pipe(cmd!("bwa", "mem", fasta_ref_path, "-"))
     .stdout_capture()
     .stderr_capture()
-    .read().unwrap();
-
-    let alignements: Vec<Sam> = stdout.split("\n")
-    .filter(|e| e.contains("sequence_to_query"))
-    .map(|e| parse_sam_output(e))
-    .collect();
+    .read()
+    .unwrap();
     
-    alignements
+    stdout.split("\n")
+    .filter(|e| e.contains("sequence_to_query"))
+    .map(|e| Sam::new_from_bwa_output(e))
+    .into_iter()
+    .map(|mut e| {
+        // if the sequence is returned by bwa with an N at first position remove it
+        if e.seq.len() != sequence.len() {
+            if e.seq.chars().nth(0).unwrap() == 'N' {
+                let (_, seq) = e.seq.split_at(1);
+                e.seq = seq.to_string();
+                let mut cig = e.cigar.0.get_mut(0).unwrap();
+                cig.1 -= 1;
+                // e.pos += 1;
+            }
+        }
+        e
+    })
+    .collect()
 }
 
 #[derive(Debug, Clone)]
@@ -69,19 +83,25 @@ pub struct Sam {
     pub seq     : String
 }
 
-fn parse_sam_output(sam_string: &str) -> Sam {
-    let mut split = sam_string.split("\t");
-    Sam {
-        qname   : split.next().unwrap().to_string(),
-        flag    : split.next().unwrap().parse().unwrap(),
-        ref_name: split.next().unwrap().to_string(),
-        pos     : split.next().unwrap().parse().unwrap(),
-        mapq    : split.next().unwrap().parse().unwrap(),
-        cigar   : Cigar::new_from_str(&split.next().unwrap().to_string()),
-        rnext   : split.next().unwrap().to_string(),
-        pnext   : split.next().unwrap().to_string(),
-        tlen    : split.next().unwrap().to_string(),
-        seq     : split.next().unwrap().to_string()
+impl Sam {
+    pub fn new_from_bwa_output(sam_string: &str) -> Sam {
+        let mut split = sam_string.split("\t");
+        Sam {
+            qname   : split.next().unwrap().to_string(),
+            flag    : split.next().unwrap().parse().unwrap(),
+            ref_name: split.next().unwrap().to_string(),
+            pos     : split.next().unwrap().parse().unwrap(),
+            mapq    : split.next().unwrap().parse().unwrap(),
+            cigar   : Cigar::new_from_str(&split.next().unwrap().to_string()),
+            rnext   : split.next().unwrap().to_string(),
+            pnext   : split.next().unwrap().to_string(),
+            tlen    : split.next().unwrap().to_string(),
+            seq     : split.next().unwrap().to_string()
+        }
+    }
+
+    pub fn is_reverse(&self) -> bool {
+        self.flag & 0x10 == 0x10
     }
 }
 
@@ -99,7 +119,6 @@ impl Cigar {
                     let current_add = num_acc.parse::<u32>().unwrap();
                     num_acc = "".to_string();
 
-                    // if begin by N...
                     res.push((c.to_string(), current_add));
                 }
             }}
@@ -156,5 +175,15 @@ fn switch_base(c:char) -> char {
 
 fn make_aln(seq_query: &str, alignements: &Vec<Sam>) {
     let consumes_query = vec!["M", "I", "S", "=", "X", "H"];
-    let consumes_ref = vec!["M", "D", "N", "=", "X"];
+    let consumes_ref   = vec!["M", "D", "N", "=", "X"];
+
+    let mut stack: Vec<Vec<u8>> = Vec::new();
+    stack.push(seq_query.as_bytes().to_vec());
+
+    for alignement in alignements {
+        // for pos in 0..seq_query.len() {
+        
+        // }
+
+    }
 }