use duct::cmd; fn main() { let fasta_ref_path = "/home/thomas/NGS/ref/hg19.fa"; let sequence = " CATTATATTCTTTTTTACAGTGCCACGGGTATATTTAGTAAACATTGAAATGTGTGGTTG GAATTGTTCATCACTGATGTTTTTGGTGATATTTGTTTATGTTCTGAAGCTATTGCTGTA GACCAACATGGAGTAAACAGAAAATAATTGGGACACAGGGGCAATAAAACTCATATATTA TTTTCCCATGACACTCAGAGGACTCTTCCCAATGCTGAGACATACATAGCGGGTCACGGC TGGGTGTTTTTGGACAAAGGCTTAATGCACTCCAACCTAGGTCCAAGAACTACAACGGCT GCTGCCGGCCGGGCCCGCTCCCTGGCCTACCTTGCACAAGAAGTCGACATCGTAGAAGGC GGACAGAAGTGTGGTCGACATGTTTCTGGAA" .replace("\n", "") .replace(" ", ""); println!("sequence len: {}", sequence.len()); println!(">sequence\n{}", sequence); let aln = bwa_aln(fasta_ref_path, &sequence); aln.iter() .for_each(|e| { println!("seq len: {:?}", e.seq.len()); println!("cigar: {:?}", e.cigar.0); println!("reverse: {:?}", is_reverse(e.flag)); println!("cigar len: {:?}", e.cigar.len()); println!("pos: {}:{}", e.ref_name, e.pos); if is_reverse(e.flag) { println!("{:?}", revcomp(&e.seq)); println!("{:?}", e.cigar.expand()); // println!("{:?}", e.cigar.expand().chars().rev().collect::()); } else { println!("{:?}", e.seq); println!("{:?}", e.cigar.expand()); } println!("-------------------------------") // println!("{:?}", e.seq.len() == e.cigar.len() as usize); }); } pub fn bwa_aln(fasta_ref_path: &str, sequence: &str) -> Vec { let stdout = cmd!("echo", "-e", format!("\">sequence_to_query\n{}\"", sequence)) .pipe(cmd!("bwa", "mem", fasta_ref_path, "-")) .stdout_capture() .stderr_capture() .read().unwrap(); let alignements: Vec = stdout.split("\n") .filter(|e| e.contains("sequence_to_query")) .map(|e| parse_sam_output(e)) .collect(); alignements } #[derive(Debug, Clone)] pub struct Sam { pub qname : String, pub flag : u16, pub ref_name: String, pub pos : i32, pub mapq : i32, pub cigar : Cigar, pub rnext : String, pub pnext : String, pub tlen : String, pub seq : String } fn parse_sam_output(sam_string: &str) -> Sam { let mut split = sam_string.split("\t"); Sam { qname : split.next().unwrap().to_string(), flag : split.next().unwrap().parse().unwrap(), ref_name: split.next().unwrap().to_string(), pos : split.next().unwrap().parse().unwrap(), mapq : split.next().unwrap().parse().unwrap(), cigar : Cigar::new_from_str(&split.next().unwrap().to_string()), rnext : split.next().unwrap().to_string(), pnext : split.next().unwrap().to_string(), tlen : split.next().unwrap().to_string(), seq : split.next().unwrap().to_string() } } #[derive(Debug, Clone)] pub struct Cigar(Vec<(String, u32)>); impl Cigar { pub fn new_from_str(cigar_str: &str) -> Cigar { let mut res: Vec<(String, u32)> = Vec::new(); let mut num_acc = String::new(); for c in cigar_str.split("") { if c != "" { match c.parse::() { Ok(_) => num_acc.push_str(c), // if a number added to the accumulator Err(_) => { let current_add = num_acc.parse::().unwrap(); num_acc = "".to_string(); // if begin by N... res.push((c.to_string(), current_add)); } }} } Cigar(res) } pub fn len(&self) -> u32 { self.0.iter() .map(|e| e.1) .reduce(|acc, e| acc + e) .unwrap() } pub fn expand(&self) -> String { let mut res = String::new(); for (op, n) in self.0.iter() { let c = op.chars().next().unwrap(); for _ in 0..(*n) { res.push(c); } } res } } fn is_reverse (flag: u16) -> bool { flag & 0x10 == 0x10 } pub fn revcomp(dna: &str) -> String{ let mut rdna: String = String::with_capacity(dna.len()); for c in dna.chars().rev() { rdna.push(switch_base(c)); } rdna } fn switch_base(c:char) -> char { match c { 'a' => 't', 'c' => 'g', 't' => 'a', 'g' => 'c', 'u' => 'a', 'A' => 'T', 'C' => 'G', 'T' => 'A', 'G' => 'C', 'U' => 'A', _ => 'N' } } fn make_aln(seq_query: &str, alignements: &Vec) { let consumes_query = vec!["M", "I", "S", "=", "X", "H"]; let consumes_ref = vec!["M", "D", "N", "=", "X"]; }