|
|
@@ -1,4 +1,5 @@
|
|
|
use anyhow::{anyhow, Ok, Result};
|
|
|
+use fasta::record::Sequence;
|
|
|
use log::info;
|
|
|
use minimap2::{Aligner, Mapping};
|
|
|
use noodles_fasta as fasta;
|
|
|
@@ -79,6 +80,48 @@ impl fmt::Display for ContigRef {
|
|
|
}
|
|
|
|
|
|
impl ContigRef {
|
|
|
+ pub fn desc(&self) -> Option<String> {
|
|
|
+ let uk = "UNKNOWN".to_string();
|
|
|
+ let to_desc = |v: &mut Vec<Mapping>| -> String {
|
|
|
+ v.sort_by(|a, b| a.query_start.cmp(&b.query_start));
|
|
|
+ let v: Vec<String> = v
|
|
|
+ .into_iter()
|
|
|
+ .map(|e| {
|
|
|
+ let strand = match e.strand {
|
|
|
+ minimap2::Strand::Forward => "",
|
|
|
+ minimap2::Strand::Reverse => "_rev",
|
|
|
+ };
|
|
|
+ format!(
|
|
|
+ "{}:{}_{}{}",
|
|
|
+ e.target_name.clone().unwrap_or(uk.clone()),
|
|
|
+ e.target_start,
|
|
|
+ e.target_end,
|
|
|
+ strand
|
|
|
+ )
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+ format!("[{}]", v.join(";"))
|
|
|
+ };
|
|
|
+
|
|
|
+ match self {
|
|
|
+ ContigRef::Unique(a) => Some(format!(
|
|
|
+ "{}:{}_{}",
|
|
|
+ a.target_name.clone().unwrap_or(uk.clone()),
|
|
|
+ a.target_start,
|
|
|
+ a.target_end
|
|
|
+ )),
|
|
|
+ ContigRef::Chimeric((a, b)) => Some(to_desc(&mut vec![a.to_owned(), b.to_owned()])),
|
|
|
+ ContigRef::ChimericTriple((a, b, c)) => {
|
|
|
+ Some(to_desc(&mut vec![a.to_owned(), b.to_owned(), c.to_owned()]))
|
|
|
+ }
|
|
|
+ ContigRef::ChimericMultiple(_) => todo!(),
|
|
|
+ ContigRef::LeftAmbiguity(_) => todo!(),
|
|
|
+ ContigRef::RightAmbiguity(_) => todo!(),
|
|
|
+ ContigRef::Ambigous(a) => {
|
|
|
+ Some(to_desc(&mut a.to_owned()))
|
|
|
+ },
|
|
|
+ }
|
|
|
+ }
|
|
|
pub fn hgvs(&self) -> Option<String> {
|
|
|
let uk = "UNKNOWN".to_string();
|
|
|
match self {
|
|
|
@@ -193,7 +236,6 @@ pub fn get_ref_pos(mappings: Vec<Mapping>) -> Result<ContigRef> {
|
|
|
return Ok(ContigRef::Unique(mappings.get(0).unwrap().clone()));
|
|
|
} else {
|
|
|
let mut grouped: VecDeque<Vec<Mapping>> = group_mappings(&mut mappings)?.into();
|
|
|
- println!("{grouped:?}");
|
|
|
|
|
|
if grouped.len() == 1 {
|
|
|
let r = grouped.into_iter().flat_map(|e| e).collect();
|
|
|
@@ -224,7 +266,6 @@ pub fn get_ref_pos(mappings: Vec<Mapping>) -> Result<ContigRef> {
|
|
|
}
|
|
|
}
|
|
|
if first.len() == 1 && last.len() == 1 {
|
|
|
- println!("bim");
|
|
|
if grouped.len() == 1 {
|
|
|
return Ok(ContigRef::ChimericTriple((
|
|
|
first.get(0).unwrap().clone(),
|
|
|
@@ -696,7 +737,7 @@ pub fn write_bam(ref_path: &str, reads_path: &str, bam_path: &str) -> Result<()>
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
-pub fn read_fasta(path: &str) -> Result<Vec<(String, Vec<u8>)>> {
|
|
|
+pub fn read_fasta(path: &str) -> Result<Vec<(String, Sequence)>> {
|
|
|
let mut reader = File::open(&path)
|
|
|
.map(BufReader::new)
|
|
|
.map(fasta::Reader::new)?;
|
|
|
@@ -705,41 +746,38 @@ pub fn read_fasta(path: &str) -> Result<Vec<(String, Vec<u8>)>> {
|
|
|
for result in reader.records() {
|
|
|
let record = result?;
|
|
|
let u = String::from_utf8(record.name().to_vec())?;
|
|
|
- let s = record.sequence().as_ref().to_vec();
|
|
|
+ let s = record.sequence().to_owned();
|
|
|
res.push((u, s));
|
|
|
}
|
|
|
|
|
|
Ok(res)
|
|
|
}
|
|
|
|
|
|
-pub fn dist_align(url: String) -> impl Fn(String) -> Result<Vec<Mapping>> {
|
|
|
- move |sequence: String| -> Result<Vec<Mapping>> {
|
|
|
- aligner_client::get_mappings(url.as_str(), sequence)
|
|
|
- }
|
|
|
-}
|
|
|
|
|
|
#[cfg(test)]
|
|
|
mod tests {
|
|
|
use super::*;
|
|
|
- use test_log::test;
|
|
|
|
|
|
- #[test_log::test]
|
|
|
+ #[test]
|
|
|
fn it_works() -> Result<()> {
|
|
|
let _ = env_logger::builder().is_test(true).try_init();
|
|
|
- let contig_fa = "./data_test/contig_1.fa";
|
|
|
+ let contig_fa = "./data_test/contig_2.fa";
|
|
|
let aligner_url = "http://localhost:4444/align";
|
|
|
|
|
|
let mut genome = Genome::new();
|
|
|
- let aligner = dist_align(aligner_url.to_string());
|
|
|
+ let aligner = aligner_client::dist_align(aligner_url.to_string());
|
|
|
|
|
|
let sequences = read_fasta(contig_fa)?;
|
|
|
for (name, seq) in sequences {
|
|
|
- genome.add_contig_from_seq(name, &seq, &aligner)?;
|
|
|
+ genome.add_contig_from_seq(name.clone(), &seq.as_ref().to_vec(), &aligner)?;
|
|
|
+ let mut seqc: Vec<u8> = seq.complement().map(|e| e.unwrap()).collect();
|
|
|
+ seqc.reverse();
|
|
|
+ genome.add_contig_from_seq(format!("{name}_rev"), &seqc, &aligner)?;
|
|
|
println!("Sending");
|
|
|
}
|
|
|
- genome.iter().for_each(|(n, c)| {
|
|
|
+ genome.iter().for_each(|(_, c)| {
|
|
|
c.iter().for_each(|cont| {
|
|
|
- println!("{}", cont.contig_ref);
|
|
|
+ println!("{}", cont.contig_ref.desc().unwrap());
|
|
|
});
|
|
|
});
|
|
|
|