|
|
@@ -0,0 +1,316 @@
|
|
|
+use anyhow::Result;
|
|
|
+use std::{fmt, collections::{HashMap, VecDeque}};
|
|
|
+use log::info;
|
|
|
+use minimap2::Mapping;
|
|
|
+
|
|
|
+
|
|
|
+#[derive(Debug)]
|
|
|
+pub struct Contig {
|
|
|
+ pub id: String,
|
|
|
+ pub mappings: Vec<Mapping>,
|
|
|
+}
|
|
|
+
|
|
|
+#[derive(Debug, Clone)]
|
|
|
+pub enum ContigsRefRes {
|
|
|
+ Unique(Mapping),
|
|
|
+ Chimeric((Mapping, Mapping)),
|
|
|
+ ChimericMultiple((Mapping, Vec<Mapping>, Mapping)),
|
|
|
+ LeftAmbiguity((Vec<Mapping>, Mapping)),
|
|
|
+ RightAmbiguity((Mapping, Vec<Mapping>)),
|
|
|
+ Ambigous(Vec<Mapping>),
|
|
|
+}
|
|
|
+
|
|
|
+impl fmt::Display for ContigsRefRes {
|
|
|
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
|
|
+ let str = match self {
|
|
|
+ ContigsRefRes::Unique(m) => mapping_to_string(m),
|
|
|
+ ContigsRefRes::Chimeric((a, b)) => {
|
|
|
+ format!("{}<->{}", mapping_to_string(a), mapping_to_string(b))
|
|
|
+ }
|
|
|
+ ContigsRefRes::ChimericMultiple((a, v, b)) => format!(
|
|
|
+ "{}<->{}<->{}",
|
|
|
+ mapping_to_string(a),
|
|
|
+ mappings_to_string(v),
|
|
|
+ mapping_to_string(b)
|
|
|
+ ),
|
|
|
+ ContigsRefRes::LeftAmbiguity((v, b)) => {
|
|
|
+ format!("{}<->{}", mappings_to_string(v), mapping_to_string(b))
|
|
|
+ }
|
|
|
+ ContigsRefRes::RightAmbiguity((a, v)) => {
|
|
|
+ format!("{}<->{}", mapping_to_string(a), mappings_to_string(v))
|
|
|
+ }
|
|
|
+ ContigsRefRes::Ambigous(v) => format!("{}", mappings_to_string(v)),
|
|
|
+ };
|
|
|
+ fmt.write_str(&str).unwrap();
|
|
|
+
|
|
|
+ std::result::Result::Ok(())
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+fn mapping_to_string(mapping: &Mapping) -> String {
|
|
|
+ let uk = "UNKNOWN".to_string();
|
|
|
+ format!(
|
|
|
+ "{}:{}-{}({}:{}-{})",
|
|
|
+ mapping.target_name.clone().unwrap_or(uk.clone()),
|
|
|
+ mapping.target_start,
|
|
|
+ mapping.target_end,
|
|
|
+ mapping.query_name.clone().unwrap_or(uk),
|
|
|
+ mapping.query_start,
|
|
|
+ mapping.query_end
|
|
|
+ )
|
|
|
+}
|
|
|
+
|
|
|
+fn mappings_to_string(mappings: &Vec<Mapping>) -> String {
|
|
|
+ let v = mappings
|
|
|
+ .iter()
|
|
|
+ .map(mapping_to_string)
|
|
|
+ .collect::<Vec<String>>();
|
|
|
+ v.join("//")
|
|
|
+}
|
|
|
+
|
|
|
+impl Contig {
|
|
|
+ pub fn get_ref_pos(&mut self) -> Result<ContigsRefRes> {
|
|
|
+ if self.mappings.len() == 1 {
|
|
|
+ return Ok(ContigsRefRes::Unique(self.mappings.get(0).unwrap().clone()));
|
|
|
+ } else {
|
|
|
+ let mut grouped: VecDeque<Vec<Mapping>> = group_mappings(&mut self.mappings)?.into();
|
|
|
+
|
|
|
+ if grouped.len() == 1 {
|
|
|
+ let r = grouped.into_iter().flat_map(|e| e).collect();
|
|
|
+ return Ok(ContigsRefRes::Ambigous(r));
|
|
|
+ } else if grouped.len() >= 2 {
|
|
|
+ let first = grouped.pop_back().unwrap();
|
|
|
+ let last = grouped.pop_front().unwrap();
|
|
|
+
|
|
|
+ if grouped.len() == 0 {
|
|
|
+ if first.len() == 1 && last.len() == 1 {
|
|
|
+ return Ok(ContigsRefRes::Chimeric((
|
|
|
+ first.get(0).unwrap().clone(),
|
|
|
+ last.get(0).unwrap().clone(),
|
|
|
+ )));
|
|
|
+ } else if first.len() == 1 {
|
|
|
+ return Ok(ContigsRefRes::RightAmbiguity((
|
|
|
+ first.get(0).unwrap().clone(),
|
|
|
+ last.clone(),
|
|
|
+ )));
|
|
|
+ } else if last.len() == 1 {
|
|
|
+ return Ok(ContigsRefRes::LeftAmbiguity((
|
|
|
+ first.clone(),
|
|
|
+ last.get(0).unwrap().clone(),
|
|
|
+ )));
|
|
|
+ } else {
|
|
|
+ let all: Vec<Mapping> =
|
|
|
+ vec![first, last].into_iter().flat_map(|e| e).collect();
|
|
|
+ return Ok(ContigsRefRes::Ambigous(all));
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ }
|
|
|
+ if first.len() == 1 && last.len() == 1 {
|
|
|
+ return Ok(ContigsRefRes::ChimericMultiple((
|
|
|
+ first.get(0).unwrap().clone(),
|
|
|
+ grouped.into_iter().flat_map(|e| e).collect(),
|
|
|
+ last.get(0).unwrap().clone(),
|
|
|
+ )));
|
|
|
+ } else if first.len() == 1 {
|
|
|
+ let right: Vec<Mapping> =
|
|
|
+ vec![grouped.into_iter().flat_map(|e| e).collect(), last]
|
|
|
+ .into_iter()
|
|
|
+ .flat_map(|e| e)
|
|
|
+ .collect();
|
|
|
+ return Ok(ContigsRefRes::RightAmbiguity((
|
|
|
+ first.get(0).unwrap().clone(),
|
|
|
+ right,
|
|
|
+ )));
|
|
|
+ } else if last.len() == 1 {
|
|
|
+ let left: Vec<Mapping> =
|
|
|
+ vec![first, grouped.into_iter().flat_map(|e| e).collect()]
|
|
|
+ .into_iter()
|
|
|
+ .flat_map(|e| e)
|
|
|
+ .collect();
|
|
|
+ return Ok(ContigsRefRes::LeftAmbiguity((
|
|
|
+ left,
|
|
|
+ last.get(0).unwrap().clone(),
|
|
|
+ )));
|
|
|
+ } else {
|
|
|
+ let all: Vec<Mapping> =
|
|
|
+ vec![first, grouped.into_iter().flat_map(|e| e).collect(), last]
|
|
|
+ .into_iter()
|
|
|
+ .flat_map(|e| e)
|
|
|
+ .collect();
|
|
|
+ return Ok(ContigsRefRes::Ambigous(all));
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ return Ok(ContigsRefRes::Ambigous(
|
|
|
+ grouped.into_iter().flat_map(|e| e).collect(),
|
|
|
+ ));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+#[derive(Debug, Clone)]
|
|
|
+pub struct Genome {
|
|
|
+ chromosomes: HashMap<String, Chromosome>,
|
|
|
+}
|
|
|
+
|
|
|
+impl Genome {
|
|
|
+ pub fn new() -> Self {
|
|
|
+ Genome {
|
|
|
+ chromosomes: HashMap::new(),
|
|
|
+ }
|
|
|
+ }
|
|
|
+ pub fn add_contig(&mut self, id: String, mappings: Vec<Mapping>) -> Result<()> {
|
|
|
+ let mut new_contig = Contig { id, mappings };
|
|
|
+ // get the category of Mapping
|
|
|
+ let ref_res = new_contig.get_ref_pos()?;
|
|
|
+ match ref_res.clone() {
|
|
|
+ ContigsRefRes::Unique(contig_mapping) => {
|
|
|
+ match self
|
|
|
+ .chromosomes
|
|
|
+ .get_mut(&contig_mapping.target_name.unwrap())
|
|
|
+ {
|
|
|
+ Some(chromosome) => {
|
|
|
+ chromosome.contigs.push(ref_res);
|
|
|
+ }
|
|
|
+ None => (),
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ContigsRefRes::Chimeric((a, b)) => {
|
|
|
+ let a_target_name = a.target_name.unwrap();
|
|
|
+ let b_target_name = b.target_name.unwrap();
|
|
|
+ if a_target_name == b_target_name {
|
|
|
+ if let Some(chromosome) = self.chromosomes.get_mut(&a_target_name) {
|
|
|
+ chromosome.contigs.push(ref_res);
|
|
|
+ } else {
|
|
|
+ self.chromosomes.insert(
|
|
|
+ a_target_name,
|
|
|
+ Chromosome {
|
|
|
+ contigs: vec![ref_res],
|
|
|
+ },
|
|
|
+ );
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ let chimeric_name = format!("{}-{}", a_target_name, b_target_name);
|
|
|
+ if let Some(chromosome) = self.chromosomes.get_mut(&chimeric_name) {
|
|
|
+ chromosome.contigs.push(ref_res);
|
|
|
+ } else {
|
|
|
+ self.chromosomes.insert(
|
|
|
+ chimeric_name,
|
|
|
+ Chromosome {
|
|
|
+ contigs: vec![ref_res],
|
|
|
+ },
|
|
|
+ );
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ContigsRefRes::ChimericMultiple((left, _, right)) => {
|
|
|
+ let left_target_name = left.target_name.unwrap();
|
|
|
+ let right_target_name = right.target_name.unwrap();
|
|
|
+ if left_target_name == right_target_name {
|
|
|
+ if let Some(chromosome) = self.chromosomes.get_mut(&left_target_name) {
|
|
|
+ chromosome.contigs.push(ref_res);
|
|
|
+ } else {
|
|
|
+ self.chromosomes.insert(
|
|
|
+ left_target_name,
|
|
|
+ Chromosome {
|
|
|
+ contigs: vec![ref_res],
|
|
|
+ },
|
|
|
+ );
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ let chimeric_name = format!("{}-{}", left_target_name, right_target_name);
|
|
|
+ if let Some(chromosome) = self.chromosomes.get_mut(&chimeric_name) {
|
|
|
+ chromosome.contigs.push(ref_res);
|
|
|
+ } else {
|
|
|
+ self.chromosomes.insert(
|
|
|
+ chimeric_name,
|
|
|
+ Chromosome {
|
|
|
+ contigs: vec![ref_res],
|
|
|
+ },
|
|
|
+ );
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ _ => {
|
|
|
+ if let Some(chromosome) = self.chromosomes.get_mut("Ambigous") {
|
|
|
+ chromosome.contigs.push(ref_res);
|
|
|
+ } else {
|
|
|
+ self.chromosomes.insert(
|
|
|
+ "Ambigous".to_string(),
|
|
|
+ Chromosome {
|
|
|
+ contigs: vec![ref_res],
|
|
|
+ },
|
|
|
+ );
|
|
|
+ }
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+ pub fn stats(&self) {
|
|
|
+ // let mut stats = HashMap::new();
|
|
|
+ for (k, v) in self.chromosomes.iter() {
|
|
|
+ info!("{}:{}", k, v.contigs.len());
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+#[derive(Debug, Clone)]
|
|
|
+pub struct Chromosome {
|
|
|
+ contigs: Vec<ContigsRefRes>,
|
|
|
+}
|
|
|
+
|
|
|
+fn group_mappings(mappings: &mut Vec<Mapping>) -> Result<Vec<Vec<Mapping>>> {
|
|
|
+ // sort alignments by query_start
|
|
|
+ mappings.sort_by(|a, b| a.query_start.cmp(&b.query_start));
|
|
|
+ // let mut graph = Graph::<String,()>::new();
|
|
|
+ //
|
|
|
+ // mappings.iter().enumerate().for_each(|(i, e)| {
|
|
|
+ // let start = graph.add_node(format!("{}S:{}", i, e.query_start));
|
|
|
+ // let end = graph.add_node(format!("{}E:{}", i, e.query_end));
|
|
|
+ // graph.add_edge(start, end, ());
|
|
|
+ // });
|
|
|
+
|
|
|
+ let mut alignments: Vec<Vec<Mapping>> = vec![];
|
|
|
+ // group by overlapps > 30
|
|
|
+ for aln in mappings.iter() {
|
|
|
+ let mut last = alignments.last_mut();
|
|
|
+ if let Some(mut l) = last {
|
|
|
+ if l.iter()
|
|
|
+ .filter(|a| a.query_end - aln.query_start > 30)
|
|
|
+ .count()
|
|
|
+ > 0
|
|
|
+ {
|
|
|
+ l.push(aln.clone());
|
|
|
+ } else {
|
|
|
+ alignments.push(vec![aln.clone()]);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ alignments.push(vec![aln.clone()]);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // let mut last_query_end = 0;
|
|
|
+ // let mut all_res = vec![];
|
|
|
+ // for map in alignments.iter() {
|
|
|
+ // if map.len() > 1 {
|
|
|
+ // let r: Vec<String> = map.iter().map(|m| format_map(m).unwrap()).collect();
|
|
|
+ // all_res.push(format!("[{}]", r.join(" ")));
|
|
|
+ // } else {
|
|
|
+ // all_res.push(format_map(map.get(0).unwrap()).unwrap());
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ //
|
|
|
+ // warn!("{}", all_res.join(" - "));
|
|
|
+ Ok(alignments)
|
|
|
+}
|
|
|
+
|
|
|
+#[cfg(test)]
|
|
|
+mod tests {
|
|
|
+ use super::*;
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn it_works() {
|
|
|
+ // let result = add(2, 2);
|
|
|
+ // assert_eq!(result, 4);
|
|
|
+ }
|
|
|
+}
|