Browse Source

first commit

Thomas 2 years ago
commit
622e9dfd1f
2 changed files with 167 additions and 0 deletions
  1. 9 0
      Cargo.toml
  2. 158 0
      src/main.rs

+ 9 - 0
Cargo.toml

@@ -0,0 +1,9 @@
+[package]
+name = "desc_sequence"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+duct = "0.13.5"

+ 158 - 0
src/main.rs

@@ -0,0 +1,158 @@
+use duct::cmd;
+
+fn main() {
+    let fasta_ref_path = "/home/thomas/NGS/ref/hg19.fa";
+    let sequence = "
+    CATTATATTCTTTTTTACAGTGCCACGGGTATATTTAGTAAACATTGAAATGTGTGGTTG
+    GAATTGTTCATCACTGATGTTTTTGGTGATATTTGTTTATGTTCTGAAGCTATTGCTGTA
+    GACCAACATGGAGTAAACAGAAAATAATTGGGACACAGGGGCAATAAAACTCATATATTA
+    TTTTCCCATGACACTCAGAGGACTCTTCCCAATGCTGAGACATACATAGCGGGTCACGGC
+    TGGGTGTTTTTGGACAAAGGCTTAATGCACTCCAACCTAGGTCCAAGAACTACAACGGCT
+    GCTGCCGGCCGGGCCCGCTCCCTGGCCTACCTTGCACAAGAAGTCGACATCGTAGAAGGC
+    GGACAGAAGTGTGGTCGACATGTTTCTGGAA"
+    .replace("\n", "")
+    .replace(" ", "");
+
+    println!("sequence len: {}", sequence.len());
+    println!(">sequence\n{}", sequence);
+
+    let aln = bwa_aln(fasta_ref_path, &sequence);
+
+    aln.iter()
+    .for_each(|e| {
+        println!("seq len: {:?}", e.seq.len());
+        println!("cigar: {:?}", e.cigar.0);
+        println!("reverse: {:?}", is_reverse(e.flag));
+        println!("cigar len: {:?}", e.cigar.len());
+        println!("pos: {}:{}", e.ref_name, e.pos);
+        if is_reverse(e.flag) {
+            println!("{:?}", revcomp(&e.seq));
+            println!("{:?}", e.cigar.expand().chars().rev().collect::<String>());
+        } else {
+            println!("{:?}", e.seq);
+            println!("{:?}", e.cigar.expand());
+        }
+        
+        println!("-------------------------------")
+        // println!("{:?}", e.seq.len() == e.cigar.len() as usize);
+    });
+}
+
+pub fn bwa_aln(fasta_ref_path: &str, sequence: &str) -> Vec<Sam> {
+    let stdout = cmd!("echo", "-e", format!("\">sequence_to_query\n{}\"", sequence))
+    .pipe(cmd!("bwa", "mem", fasta_ref_path, "-"))
+    .stdout_capture()
+    .stderr_capture()
+    .read().unwrap();
+
+    let alignements: Vec<Sam> = stdout.split("\n")
+    .filter(|e| e.contains("sequence_to_query"))
+    .map(|e| parse_sam_output(e))
+    .collect();
+    
+    alignements
+}
+
+#[derive(Debug, Clone)]
+pub struct Sam {
+    pub qname   : String,
+    pub flag    : u16,
+    pub ref_name: String,
+    pub pos     : i32,
+    pub mapq    : i32,
+    pub cigar   : Cigar,
+    pub rnext   : String,
+    pub pnext   : String,
+    pub tlen    : String,
+    pub seq     : String
+}
+
+fn parse_sam_output(sam_string: &str) -> Sam {
+    let mut split = sam_string.split("\t");
+    Sam {
+        qname   : split.next().unwrap().to_string(),
+        flag    : split.next().unwrap().parse().unwrap(),
+        ref_name: split.next().unwrap().to_string(),
+        pos     : split.next().unwrap().parse().unwrap(),
+        mapq    : split.next().unwrap().parse().unwrap(),
+        cigar   : Cigar::new_from_str(&split.next().unwrap().to_string()),
+        rnext   : split.next().unwrap().to_string(),
+        pnext   : split.next().unwrap().to_string(),
+        tlen    : split.next().unwrap().to_string(),
+        seq     : split.next().unwrap().to_string()
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct Cigar(Vec<(String, u32)>);
+
+impl Cigar {
+    pub fn new_from_str(cigar_str: &str) -> Cigar {
+        let mut res: Vec<(String, u32)> = Vec::new();
+        let mut num_acc = String::new();
+        for c in cigar_str.split("") {
+            if c != "" { match c.parse::<usize>() {
+                Ok(_) => num_acc.push_str(c), // if a number added to the accumulator
+                Err(_) => {
+                    let current_add = num_acc.parse::<u32>().unwrap();
+                    num_acc = "".to_string();
+
+                    // if begin by N...
+                    res.push((c.to_string(), current_add));
+                }
+            }}
+        }
+        Cigar(res)
+    }
+
+    pub fn len(&self) -> u32 {
+        self.0.iter()
+        .map(|e| e.1)
+        .reduce(|acc, e| acc + e)
+        .unwrap()
+    }
+
+    pub fn expand(&self) -> String {
+        let mut res = String::new();
+        for (op, n) in self.0.iter() {
+            let c = op.chars().next().unwrap();
+            for _ in 0..(*n) {
+                res.push(c);
+            }
+        }
+        res
+    }
+}
+
+fn is_reverse (flag: u16) -> bool {
+    flag & 0x10 == 0x10
+}
+
+pub fn revcomp(dna: &str) -> String{
+    let mut rdna: String = String::with_capacity(dna.len()); 
+    for c in dna.chars().rev() {
+        rdna.push(switch_base(c));
+    }
+    rdna
+}
+
+fn switch_base(c:char) -> char {
+    match c {
+        'a' => 't',
+        'c' => 'g',
+        't' => 'a',
+        'g' => 'c',
+        'u' => 'a',
+        'A' => 'T',
+        'C' => 'G',
+        'T' => 'A',
+        'G' => 'C',
+        'U' => 'A',
+        _   => 'N'
+    }
+}
+
+fn make_aln(seq_query: &str, alignements: &Vec<Sam>) {
+    let consumes_query = vec!["M", "I", "S", "=", "X", "H"];
+    let consumes_ref = vec!["M", "D", "N", "=", "X"];
+}