|
|
@@ -1,8 +1,10 @@
|
|
|
use anyhow::Result;
|
|
|
-use std::{fmt, collections::{HashMap, VecDeque}};
|
|
|
use log::info;
|
|
|
use minimap2::Mapping;
|
|
|
-
|
|
|
+use std::{
|
|
|
+ collections::{HashMap, VecDeque},
|
|
|
+ fmt,
|
|
|
+};
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
pub struct Contig {
|
|
|
@@ -11,7 +13,7 @@ pub struct Contig {
|
|
|
}
|
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
-pub enum ContigsRefRes {
|
|
|
+pub enum ContigsRef {
|
|
|
Unique(Mapping),
|
|
|
Chimeric((Mapping, Mapping)),
|
|
|
ChimericMultiple((Mapping, Vec<Mapping>, Mapping)),
|
|
|
@@ -20,26 +22,26 @@ pub enum ContigsRefRes {
|
|
|
Ambigous(Vec<Mapping>),
|
|
|
}
|
|
|
|
|
|
-impl fmt::Display for ContigsRefRes {
|
|
|
+impl fmt::Display for ContigsRef {
|
|
|
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
|
|
let str = match self {
|
|
|
- ContigsRefRes::Unique(m) => mapping_to_string(m),
|
|
|
- ContigsRefRes::Chimeric((a, b)) => {
|
|
|
+ ContigsRef::Unique(m) => mapping_to_string(m),
|
|
|
+ ContigsRef::Chimeric((a, b)) => {
|
|
|
format!("{}<->{}", mapping_to_string(a), mapping_to_string(b))
|
|
|
}
|
|
|
- ContigsRefRes::ChimericMultiple((a, v, b)) => format!(
|
|
|
+ ContigsRef::ChimericMultiple((a, v, b)) => format!(
|
|
|
"{}<->{}<->{}",
|
|
|
mapping_to_string(a),
|
|
|
mappings_to_string(v),
|
|
|
mapping_to_string(b)
|
|
|
),
|
|
|
- ContigsRefRes::LeftAmbiguity((v, b)) => {
|
|
|
+ ContigsRef::LeftAmbiguity((v, b)) => {
|
|
|
format!("{}<->{}", mappings_to_string(v), mapping_to_string(b))
|
|
|
}
|
|
|
- ContigsRefRes::RightAmbiguity((a, v)) => {
|
|
|
+ ContigsRef::RightAmbiguity((a, v)) => {
|
|
|
format!("{}<->{}", mapping_to_string(a), mappings_to_string(v))
|
|
|
}
|
|
|
- ContigsRefRes::Ambigous(v) => format!("{}", mappings_to_string(v)),
|
|
|
+ ContigsRef::Ambigous(v) => format!("{}", mappings_to_string(v)),
|
|
|
};
|
|
|
fmt.write_str(&str).unwrap();
|
|
|
|
|
|
@@ -47,6 +49,28 @@ impl fmt::Display for ContigsRefRes {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+impl ContigsRef {
|
|
|
+ pub fn hgvs(&self) -> Option<String> {
|
|
|
+ match self {
|
|
|
+ ContigsRef::Unique(_) => None,
|
|
|
+ ContigsRef::Chimeric((a, b)) => {
|
|
|
+ if a.target_name == b.target_name {
|
|
|
+ let chr = a.target_name.clone().unwrap_or("UNKNOWN".to_string());
|
|
|
+ let end = a.target_end;
|
|
|
+ let start = b.target_start;
|
|
|
+ Some(format!("{chr}:{end}_{start}"))
|
|
|
+ } else {
|
|
|
+ None
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ContigsRef::ChimericMultiple(_) => None,
|
|
|
+ ContigsRef::LeftAmbiguity(_) => None,
|
|
|
+ ContigsRef::RightAmbiguity(_) => None,
|
|
|
+ ContigsRef::Ambigous(_) => None,
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
pub fn mapping_to_string(mapping: &Mapping) -> String {
|
|
|
let uk = "UNKNOWN".to_string();
|
|
|
format!(
|
|
|
@@ -69,44 +93,44 @@ fn mappings_to_string(mappings: &Vec<Mapping>) -> String {
|
|
|
}
|
|
|
|
|
|
impl Contig {
|
|
|
- pub fn get_ref_pos(&mut self) -> Result<ContigsRefRes> {
|
|
|
+ pub fn get_ref_pos(&mut self) -> Result<ContigsRef> {
|
|
|
if self.mappings.len() == 1 {
|
|
|
- return Ok(ContigsRefRes::Unique(self.mappings.get(0).unwrap().clone()));
|
|
|
+ return Ok(ContigsRef::Unique(self.mappings.get(0).unwrap().clone()));
|
|
|
} else {
|
|
|
let mut grouped: VecDeque<Vec<Mapping>> = group_mappings(&mut self.mappings)?.into();
|
|
|
|
|
|
if grouped.len() == 1 {
|
|
|
let r = grouped.into_iter().flat_map(|e| e).collect();
|
|
|
- return Ok(ContigsRefRes::Ambigous(r));
|
|
|
+ return Ok(ContigsRef::Ambigous(r));
|
|
|
} else if grouped.len() >= 2 {
|
|
|
let first = grouped.pop_back().unwrap();
|
|
|
let last = grouped.pop_front().unwrap();
|
|
|
|
|
|
if grouped.len() == 0 {
|
|
|
if first.len() == 1 && last.len() == 1 {
|
|
|
- return Ok(ContigsRefRes::Chimeric((
|
|
|
+ return Ok(ContigsRef::Chimeric((
|
|
|
first.get(0).unwrap().clone(),
|
|
|
last.get(0).unwrap().clone(),
|
|
|
)));
|
|
|
} else if first.len() == 1 {
|
|
|
- return Ok(ContigsRefRes::RightAmbiguity((
|
|
|
+ return Ok(ContigsRef::RightAmbiguity((
|
|
|
first.get(0).unwrap().clone(),
|
|
|
last.clone(),
|
|
|
)));
|
|
|
} else if last.len() == 1 {
|
|
|
- return Ok(ContigsRefRes::LeftAmbiguity((
|
|
|
+ return Ok(ContigsRef::LeftAmbiguity((
|
|
|
first.clone(),
|
|
|
last.get(0).unwrap().clone(),
|
|
|
)));
|
|
|
} else {
|
|
|
let all: Vec<Mapping> =
|
|
|
vec![first, last].into_iter().flat_map(|e| e).collect();
|
|
|
- return Ok(ContigsRefRes::Ambigous(all));
|
|
|
+ return Ok(ContigsRef::Ambigous(all));
|
|
|
}
|
|
|
} else {
|
|
|
}
|
|
|
if first.len() == 1 && last.len() == 1 {
|
|
|
- return Ok(ContigsRefRes::ChimericMultiple((
|
|
|
+ return Ok(ContigsRef::ChimericMultiple((
|
|
|
first.get(0).unwrap().clone(),
|
|
|
grouped.into_iter().flat_map(|e| e).collect(),
|
|
|
last.get(0).unwrap().clone(),
|
|
|
@@ -117,7 +141,7 @@ impl Contig {
|
|
|
.into_iter()
|
|
|
.flat_map(|e| e)
|
|
|
.collect();
|
|
|
- return Ok(ContigsRefRes::RightAmbiguity((
|
|
|
+ return Ok(ContigsRef::RightAmbiguity((
|
|
|
first.get(0).unwrap().clone(),
|
|
|
right,
|
|
|
)));
|
|
|
@@ -127,7 +151,7 @@ impl Contig {
|
|
|
.into_iter()
|
|
|
.flat_map(|e| e)
|
|
|
.collect();
|
|
|
- return Ok(ContigsRefRes::LeftAmbiguity((
|
|
|
+ return Ok(ContigsRef::LeftAmbiguity((
|
|
|
left,
|
|
|
last.get(0).unwrap().clone(),
|
|
|
)));
|
|
|
@@ -137,10 +161,10 @@ impl Contig {
|
|
|
.into_iter()
|
|
|
.flat_map(|e| e)
|
|
|
.collect();
|
|
|
- return Ok(ContigsRefRes::Ambigous(all));
|
|
|
+ return Ok(ContigsRef::Ambigous(all));
|
|
|
}
|
|
|
} else {
|
|
|
- return Ok(ContigsRefRes::Ambigous(
|
|
|
+ return Ok(ContigsRef::Ambigous(
|
|
|
grouped.into_iter().flat_map(|e| e).collect(),
|
|
|
));
|
|
|
}
|
|
|
@@ -159,7 +183,7 @@ impl Genome {
|
|
|
chromosomes: HashMap::new(),
|
|
|
}
|
|
|
}
|
|
|
- pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, Chromosome>{
|
|
|
+ pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, Chromosome> {
|
|
|
self.chromosomes.iter()
|
|
|
}
|
|
|
pub fn add_contig(&mut self, id: String, mappings: Vec<Mapping>) -> Result<()> {
|
|
|
@@ -167,7 +191,7 @@ impl Genome {
|
|
|
// get the category of Mapping
|
|
|
let ref_res = new_contig.get_ref_pos()?;
|
|
|
match ref_res.clone() {
|
|
|
- ContigsRefRes::Unique(contig_mapping) => {
|
|
|
+ ContigsRef::Unique(contig_mapping) => {
|
|
|
match self
|
|
|
.chromosomes
|
|
|
.get_mut(&contig_mapping.target_name.unwrap())
|
|
|
@@ -178,7 +202,7 @@ impl Genome {
|
|
|
None => (),
|
|
|
}
|
|
|
}
|
|
|
- ContigsRefRes::Chimeric((a, b)) => {
|
|
|
+ ContigsRef::Chimeric((a, b)) => {
|
|
|
let a_target_name = a.target_name.unwrap();
|
|
|
let b_target_name = b.target_name.unwrap();
|
|
|
if a_target_name == b_target_name {
|
|
|
@@ -206,7 +230,7 @@ impl Genome {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- ContigsRefRes::ChimericMultiple((left, _, right)) => {
|
|
|
+ ContigsRef::ChimericMultiple((left, _, right)) => {
|
|
|
let left_target_name = left.target_name.unwrap();
|
|
|
let right_target_name = right.target_name.unwrap();
|
|
|
if left_target_name == right_target_name {
|
|
|
@@ -251,12 +275,11 @@ impl Genome {
|
|
|
Ok(())
|
|
|
}
|
|
|
pub fn stats(&self) {
|
|
|
- // let mut stats = HashMap::new();
|
|
|
for (k, v) in self.chromosomes.iter() {
|
|
|
info!("{}:{}", k, v.contigs.len());
|
|
|
}
|
|
|
}
|
|
|
- pub fn chromosome(&self, chromosome: &str) -> Option<Vec<ContigsRefRes>> {
|
|
|
+ pub fn chromosome(&self, chromosome: &str) -> Option<Vec<ContigsRef>> {
|
|
|
if let Some(chr) = self.chromosomes.get(chromosome) {
|
|
|
Some(chr.contigs.clone())
|
|
|
} else {
|
|
|
@@ -266,11 +289,11 @@ impl Genome {
|
|
|
}
|
|
|
#[derive(Debug, Clone)]
|
|
|
pub struct Chromosome {
|
|
|
- contigs: Vec<ContigsRefRes>,
|
|
|
+ contigs: Vec<ContigsRef>,
|
|
|
}
|
|
|
|
|
|
impl Chromosome {
|
|
|
- pub fn iter(&self) -> std::slice::Iter<'_, ContigsRefRes> {
|
|
|
+ pub fn iter(&self) -> std::slice::Iter<'_, ContigsRef> {
|
|
|
self.contigs.iter()
|
|
|
}
|
|
|
}
|
|
|
@@ -278,13 +301,6 @@ impl Chromosome {
|
|
|
fn group_mappings(mappings: &mut Vec<Mapping>) -> Result<Vec<Vec<Mapping>>> {
|
|
|
// sort alignments by query_start
|
|
|
mappings.sort_by(|a, b| a.query_start.cmp(&b.query_start));
|
|
|
- // let mut graph = Graph::<String,()>::new();
|
|
|
- //
|
|
|
- // mappings.iter().enumerate().for_each(|(i, e)| {
|
|
|
- // let start = graph.add_node(format!("{}S:{}", i, e.query_start));
|
|
|
- // let end = graph.add_node(format!("{}E:{}", i, e.query_end));
|
|
|
- // graph.add_edge(start, end, ());
|
|
|
- // });
|
|
|
|
|
|
let mut alignments: Vec<Vec<Mapping>> = vec![];
|
|
|
// group by overlapps > 30
|
|
|
@@ -305,18 +321,6 @@ fn group_mappings(mappings: &mut Vec<Mapping>) -> Result<Vec<Vec<Mapping>>> {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- // let mut last_query_end = 0;
|
|
|
- // let mut all_res = vec![];
|
|
|
- // for map in alignments.iter() {
|
|
|
- // if map.len() > 1 {
|
|
|
- // let r: Vec<String> = map.iter().map(|m| format_map(m).unwrap()).collect();
|
|
|
- // all_res.push(format!("[{}]", r.join(" ")));
|
|
|
- // } else {
|
|
|
- // all_res.push(format_map(map.get(0).unwrap()).unwrap());
|
|
|
- // }
|
|
|
- // }
|
|
|
- //
|
|
|
- // warn!("{}", all_res.join(" - "));
|
|
|
Ok(alignments)
|
|
|
}
|
|
|
|