|
|
@@ -1,57 +1,5 @@
|
|
|
use duct::cmd;
|
|
|
|
|
|
-fn main() {
|
|
|
- let ref_path = "/home/thomas/NGS/ref/hg19.fa";
|
|
|
- // let sequence = "
|
|
|
- // CATTATATTCTTTTTTACAGTGCCACGGGTATATTTAGTAAACATTGAAATGTGTGGTTG
|
|
|
- // GAATTGTTCATCACTGATGTTTTTGGTGATATTTGTTTATGTTCTGAAGCTATTGCTGTA
|
|
|
- // GACCAACATGGAGTAAACAGAAAATAATTGGGACACAGGGGCAATAAAACTCATATATTA
|
|
|
- // TTTTCCCATGACACTCAGAGGACTCTTCCCAATGCTGAGACATACATAGCGGGTCACGGC
|
|
|
- // TGGGTGTTTTTGGACAAAGGCTTAATGCACTCCAACCTAGGTCCAAGAACTACAACGGCT
|
|
|
- // GCTGCCGGCCGGGCCCGCTCCCTGGCCTACCTTGCACAAGAAGTCGACATCGTAGAAGGC
|
|
|
- // GGACAGAAGTGTGGTCGACATGTTTCTGGAA"
|
|
|
- // .replace("\n", "")
|
|
|
- // .replace(" ", "");
|
|
|
-
|
|
|
- // let sequence = "
|
|
|
- // CGTTATTCGTCGTGGCTCAAGCCCGGCCACGCCGCCCCAAGGGCTCCTCCCGACCTCCCG
|
|
|
- // GCCTGCCGCTCCGGCCACTGCGGGATCCAGAAACATGTCGACCACACTTCTGTCCGCCTT
|
|
|
- // CTACGATGTCGACTTCTTGTGCAAGGTAGGCCAGGGAGCGGGCCCGGCCGGCAGCAGCCG
|
|
|
- // TTGTAGTTCTTGGACCTAGGTTGGAGTGCATTAAGCCTTTGTCCAAAAACACCCAGCCGT
|
|
|
- // GACCCGCTATGTATGTCTCAGCATTGGGAAGAGTCCTCTGAGTGTCATGGGAAAATAATA
|
|
|
- // TATGAGTTT"
|
|
|
- // .replace("\n", "")
|
|
|
- // .replace(" ", "");
|
|
|
-
|
|
|
- //2011N160703-HAOAA 1
|
|
|
- let sequence = "
|
|
|
- CCTCCCGCCCTGCCTTTCAGGTCGGGAAAGTCCCGGGGTTTGCAAAAGAGTGTCCGAGCG
|
|
|
- CCCTGGAGGCGGGGAGGGCGGCAAGGAGGGCGCCGGTGTCGCGGTTGAGTTTCTCCACTG
|
|
|
- CCGACCGCGGCCACGCTGCCCGGGGCTTCCCGGACAGGCTTCGCGCCGCCCACCTCGGCA
|
|
|
- GCCGGGGCGGA
|
|
|
- TGG
|
|
|
- CCACACAGGTTGGAGTGCATTAAGCCTTTGTCCAAAAACACCCAGC
|
|
|
- CGTGACCCGCTATGTATGTCTCAGCATTGGGAAGAGTCCTCTGAGTGTCATGGGAAAATA
|
|
|
- ATATATGAGTT"
|
|
|
- .replace("\n", "")
|
|
|
- .replace(" ", "");
|
|
|
- //2011N160703-HAOAA 2
|
|
|
- // let sequence = "
|
|
|
- // TGGAATTGTTCATCACTGATGTTTTTGGTGATATTTGTTTATGTTCTGAAGCTATTGCTG
|
|
|
- // TAGACCAACATGGAGTAAACAGAAAATAATTGGGACACAGGGGCAATAAAACTCATATAT
|
|
|
- // TATTTTCCCATGACACTCAGAGGACTCTTCCCAATGCTGAGACATACATAGCGGGTCACG
|
|
|
- // GCTGGGTGTTTTTGGACAAAGGCTTAATGCACTCCAACCTGTGTGGCCATCCGCCCCGGC
|
|
|
- // TGCCGAGGTGGGCGGCGCGAAGCCTGTCCGGGAAGCCCCGGGCAGCGTGGCCGCGGTCGG
|
|
|
- // CAGTGGAGAAACTCAACCGCGACACC"
|
|
|
- // .replace("\n", "")
|
|
|
- // .replace(" ", "");
|
|
|
-
|
|
|
- let alns = Alignments::to_reference(&sequence, ref_path);
|
|
|
-
|
|
|
- alns.ranges.iter().for_each(|r| println!("{:?}", r));
|
|
|
-
|
|
|
-}
|
|
|
-
|
|
|
pub fn bwa_aln(ref_path: &str, query_sequence: &str) -> Vec<Sam> {
|
|
|
let stdout = cmd!("echo", "-e", format!("\">sequence_to_query\n{}\"", query_sequence))
|
|
|
.pipe(cmd!("bwa", "mem", ref_path, "-"))
|
|
|
@@ -290,19 +238,52 @@ pub fn format_seq(name: &str, sequence: &str, line_size: usize) -> String {
|
|
|
format!(">{name}\n{res}")
|
|
|
}
|
|
|
|
|
|
-// pub fn compose_seq(ref_path: &str, ranges: Vec<(String, i32, i32)>) -> String {
|
|
|
-// let fa = IndexedFasta::from_file(ref_path).expect("Error opening fa");
|
|
|
+#[cfg(test)]
|
|
|
+mod tests {
|
|
|
+ use super::*;
|
|
|
|
|
|
-// let mut sequence = String::new();
|
|
|
-// for (contig, start, stop) in ranges {
|
|
|
-// let chr_index = fa.fai().tid(&contig).expect("Cannot find chr in index");
|
|
|
-// if start < stop {
|
|
|
-// let v = fa.view(chr_index, start as usize,stop as usize).expect("Cannot get .fa view");
|
|
|
-// sequence = format!("{}{}", sequence, v.to_string());
|
|
|
-// } else {
|
|
|
-// let v = fa.view(chr_index, stop as usize, start as usize).expect("Cannot get .fa view");
|
|
|
-// sequence = format!("{}{}", sequence, revcomp(&v.to_string()));
|
|
|
-// }
|
|
|
-// }
|
|
|
-// sequence
|
|
|
-// }
|
|
|
+ #[test]
|
|
|
+ fn test_1() {
|
|
|
+ let ref_path = "/home/thomas/NGS/ref/hg19.fa";
|
|
|
+
|
|
|
+ //2011N160703-HAOAA 1
|
|
|
+ let sequence = "
|
|
|
+ CCTCCCGCCCTGCCTTTCAGGTCGGGAAAGTCCCGGGGTTTGCAAAAGAGTGTCCGAGCG
|
|
|
+ CCCTGGAGGCGGGGAGGGCGGCAAGGAGGGCGCCGGTGTCGCGGTTGAGTTTCTCCACTG
|
|
|
+ CCGACCGCGGCCACGCTGCCCGGGGCTTCCCGGACAGGCTTCGCGCCGCCCACCTCGGCA
|
|
|
+ GCCGGGGCGGA
|
|
|
+ TGG
|
|
|
+ CCACACAGGTTGGAGTGCATTAAGCCTTTGTCCAAAAACACCCAGC
|
|
|
+ CGTGACCCGCTATGTATGTCTCAGCATTGGGAAGAGTCCTCTGAGTGTCATGGGAAAATA
|
|
|
+ ATATATGAGTT"
|
|
|
+ .replace("\n", "")
|
|
|
+ .replace(" ", "");
|
|
|
+
|
|
|
+ let alns = Alignments::to_reference(&sequence, ref_path);
|
|
|
+ assert_eq!(
|
|
|
+ alns.ranges,
|
|
|
+ vec![vec![((43453321, 43453131), (1, 191))], vec![((22908007, 22908123), (195, 311))]]
|
|
|
+ );
|
|
|
+ }
|
|
|
+ #[test]
|
|
|
+ fn test_2() {
|
|
|
+ let ref_path = "/home/thomas/NGS/ref/hg19.fa";
|
|
|
+ //2011N160703-HAOAA 2
|
|
|
+ let sequence = "
|
|
|
+ TGGAATTGTTCATCACTGATGTTTTTGGTGATATTTGTTTATGTTCTGAAGCTATTGCTG
|
|
|
+ TAGACCAACATGGAGTAAACAGAAAATAATTGGGACACAGGGGCAATAAAACTCATATAT
|
|
|
+ TATTTTCCCATGACACTCAGAGGACTCTTCCCAATGCTGAGACATACATAGCGGGTCACG
|
|
|
+ GCTGGGTGTTTTTGGACAAAGGCTTAATGCACTCCAACCTGTGTGGCCATCCGCCCCGGC
|
|
|
+ TGCCGAGGTGGGCGGCGCGAAGCCTGTCCGGGAAGCCCCGGGCAGCGTGGCCGCGGTCGG
|
|
|
+ CAGTGGAGAAACTCAACCGCGACACC"
|
|
|
+ .replace("\n", "")
|
|
|
+ .replace(" ", "");
|
|
|
+
|
|
|
+ let alns = Alignments::to_reference(&sequence, ref_path);
|
|
|
+
|
|
|
+ assert_eq!(
|
|
|
+ alns.ranges,
|
|
|
+ vec![vec![((22908232, 22908007), (1, 226))], vec![((43453131, 43453227), (230, 326))]]
|
|
|
+ );
|
|
|
+ }
|
|
|
+}
|