|
|
@@ -0,0 +1,158 @@
|
|
|
+use duct::cmd;
|
|
|
+
|
|
|
+fn main() {
|
|
|
+ let fasta_ref_path = "/home/thomas/NGS/ref/hg19.fa";
|
|
|
+ let sequence = "
|
|
|
+ CATTATATTCTTTTTTACAGTGCCACGGGTATATTTAGTAAACATTGAAATGTGTGGTTG
|
|
|
+ GAATTGTTCATCACTGATGTTTTTGGTGATATTTGTTTATGTTCTGAAGCTATTGCTGTA
|
|
|
+ GACCAACATGGAGTAAACAGAAAATAATTGGGACACAGGGGCAATAAAACTCATATATTA
|
|
|
+ TTTTCCCATGACACTCAGAGGACTCTTCCCAATGCTGAGACATACATAGCGGGTCACGGC
|
|
|
+ TGGGTGTTTTTGGACAAAGGCTTAATGCACTCCAACCTAGGTCCAAGAACTACAACGGCT
|
|
|
+ GCTGCCGGCCGGGCCCGCTCCCTGGCCTACCTTGCACAAGAAGTCGACATCGTAGAAGGC
|
|
|
+ GGACAGAAGTGTGGTCGACATGTTTCTGGAA"
|
|
|
+ .replace("\n", "")
|
|
|
+ .replace(" ", "");
|
|
|
+
|
|
|
+ println!("sequence len: {}", sequence.len());
|
|
|
+ println!(">sequence\n{}", sequence);
|
|
|
+
|
|
|
+ let aln = bwa_aln(fasta_ref_path, &sequence);
|
|
|
+
|
|
|
+ aln.iter()
|
|
|
+ .for_each(|e| {
|
|
|
+ println!("seq len: {:?}", e.seq.len());
|
|
|
+ println!("cigar: {:?}", e.cigar.0);
|
|
|
+ println!("reverse: {:?}", is_reverse(e.flag));
|
|
|
+ println!("cigar len: {:?}", e.cigar.len());
|
|
|
+ println!("pos: {}:{}", e.ref_name, e.pos);
|
|
|
+ if is_reverse(e.flag) {
|
|
|
+ println!("{:?}", revcomp(&e.seq));
|
|
|
+ println!("{:?}", e.cigar.expand().chars().rev().collect::<String>());
|
|
|
+ } else {
|
|
|
+ println!("{:?}", e.seq);
|
|
|
+ println!("{:?}", e.cigar.expand());
|
|
|
+ }
|
|
|
+
|
|
|
+ println!("-------------------------------")
|
|
|
+ // println!("{:?}", e.seq.len() == e.cigar.len() as usize);
|
|
|
+ });
|
|
|
+}
|
|
|
+
|
|
|
+pub fn bwa_aln(fasta_ref_path: &str, sequence: &str) -> Vec<Sam> {
|
|
|
+ let stdout = cmd!("echo", "-e", format!("\">sequence_to_query\n{}\"", sequence))
|
|
|
+ .pipe(cmd!("bwa", "mem", fasta_ref_path, "-"))
|
|
|
+ .stdout_capture()
|
|
|
+ .stderr_capture()
|
|
|
+ .read().unwrap();
|
|
|
+
|
|
|
+ let alignements: Vec<Sam> = stdout.split("\n")
|
|
|
+ .filter(|e| e.contains("sequence_to_query"))
|
|
|
+ .map(|e| parse_sam_output(e))
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ alignements
|
|
|
+}
|
|
|
+
|
|
|
+#[derive(Debug, Clone)]
|
|
|
+pub struct Sam {
|
|
|
+ pub qname : String,
|
|
|
+ pub flag : u16,
|
|
|
+ pub ref_name: String,
|
|
|
+ pub pos : i32,
|
|
|
+ pub mapq : i32,
|
|
|
+ pub cigar : Cigar,
|
|
|
+ pub rnext : String,
|
|
|
+ pub pnext : String,
|
|
|
+ pub tlen : String,
|
|
|
+ pub seq : String
|
|
|
+}
|
|
|
+
|
|
|
+fn parse_sam_output(sam_string: &str) -> Sam {
|
|
|
+ let mut split = sam_string.split("\t");
|
|
|
+ Sam {
|
|
|
+ qname : split.next().unwrap().to_string(),
|
|
|
+ flag : split.next().unwrap().parse().unwrap(),
|
|
|
+ ref_name: split.next().unwrap().to_string(),
|
|
|
+ pos : split.next().unwrap().parse().unwrap(),
|
|
|
+ mapq : split.next().unwrap().parse().unwrap(),
|
|
|
+ cigar : Cigar::new_from_str(&split.next().unwrap().to_string()),
|
|
|
+ rnext : split.next().unwrap().to_string(),
|
|
|
+ pnext : split.next().unwrap().to_string(),
|
|
|
+ tlen : split.next().unwrap().to_string(),
|
|
|
+ seq : split.next().unwrap().to_string()
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+#[derive(Debug, Clone)]
|
|
|
+pub struct Cigar(Vec<(String, u32)>);
|
|
|
+
|
|
|
+impl Cigar {
|
|
|
+ pub fn new_from_str(cigar_str: &str) -> Cigar {
|
|
|
+ let mut res: Vec<(String, u32)> = Vec::new();
|
|
|
+ let mut num_acc = String::new();
|
|
|
+ for c in cigar_str.split("") {
|
|
|
+ if c != "" { match c.parse::<usize>() {
|
|
|
+ Ok(_) => num_acc.push_str(c), // if a number added to the accumulator
|
|
|
+ Err(_) => {
|
|
|
+ let current_add = num_acc.parse::<u32>().unwrap();
|
|
|
+ num_acc = "".to_string();
|
|
|
+
|
|
|
+ // if begin by N...
|
|
|
+ res.push((c.to_string(), current_add));
|
|
|
+ }
|
|
|
+ }}
|
|
|
+ }
|
|
|
+ Cigar(res)
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn len(&self) -> u32 {
|
|
|
+ self.0.iter()
|
|
|
+ .map(|e| e.1)
|
|
|
+ .reduce(|acc, e| acc + e)
|
|
|
+ .unwrap()
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn expand(&self) -> String {
|
|
|
+ let mut res = String::new();
|
|
|
+ for (op, n) in self.0.iter() {
|
|
|
+ let c = op.chars().next().unwrap();
|
|
|
+ for _ in 0..(*n) {
|
|
|
+ res.push(c);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ res
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+fn is_reverse (flag: u16) -> bool {
|
|
|
+ flag & 0x10 == 0x10
|
|
|
+}
|
|
|
+
|
|
|
+pub fn revcomp(dna: &str) -> String{
|
|
|
+ let mut rdna: String = String::with_capacity(dna.len());
|
|
|
+ for c in dna.chars().rev() {
|
|
|
+ rdna.push(switch_base(c));
|
|
|
+ }
|
|
|
+ rdna
|
|
|
+}
|
|
|
+
|
|
|
+fn switch_base(c:char) -> char {
|
|
|
+ match c {
|
|
|
+ 'a' => 't',
|
|
|
+ 'c' => 'g',
|
|
|
+ 't' => 'a',
|
|
|
+ 'g' => 'c',
|
|
|
+ 'u' => 'a',
|
|
|
+ 'A' => 'T',
|
|
|
+ 'C' => 'G',
|
|
|
+ 'T' => 'A',
|
|
|
+ 'G' => 'C',
|
|
|
+ 'U' => 'A',
|
|
|
+ _ => 'N'
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+fn make_aln(seq_query: &str, alignements: &Vec<Sam>) {
|
|
|
+ let consumes_query = vec!["M", "I", "S", "=", "X", "H"];
|
|
|
+ let consumes_ref = vec!["M", "D", "N", "=", "X"];
|
|
|
+}
|