|
|
@@ -1,22 +1,33 @@
|
|
|
-use anyhow::Result;
|
|
|
+use anyhow::{Result, Ok};
|
|
|
use log::info;
|
|
|
use minimap2::Mapping;
|
|
|
-use rust_htslib::bam::Record;
|
|
|
+use rust_htslib::bam::{self, Record};
|
|
|
use std::{
|
|
|
collections::{HashMap, VecDeque},
|
|
|
fmt,
|
|
|
};
|
|
|
|
|
|
-#[derive(Debug)]
|
|
|
+#[derive(Debug, Clone)]
|
|
|
+pub struct Genome {
|
|
|
+ pub chromosomes: HashMap<String, Chromosome>,
|
|
|
+}
|
|
|
+
|
|
|
+#[derive(Debug, Clone)]
|
|
|
+pub struct Chromosome {
|
|
|
+ contigs: Vec<Contig>,
|
|
|
+}
|
|
|
+
|
|
|
+#[derive(Debug, Clone)]
|
|
|
pub struct Contig {
|
|
|
pub id: String,
|
|
|
pub mappings: Vec<Mapping>,
|
|
|
pub supporting_records: Vec<Record>,
|
|
|
- pub sequence: String
|
|
|
+ pub sequence: String,
|
|
|
+ pub contig_ref: ContigRef,
|
|
|
}
|
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
-pub enum ContigsRef {
|
|
|
+pub enum ContigRef {
|
|
|
Unique(Mapping),
|
|
|
Chimeric((Mapping, Mapping)),
|
|
|
ChimericMultiple((Mapping, Vec<Mapping>, Mapping)),
|
|
|
@@ -25,26 +36,26 @@ pub enum ContigsRef {
|
|
|
Ambigous(Vec<Mapping>),
|
|
|
}
|
|
|
|
|
|
-impl fmt::Display for ContigsRef {
|
|
|
+impl fmt::Display for ContigRef {
|
|
|
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
|
|
let str = match self {
|
|
|
- ContigsRef::Unique(m) => mapping_to_string(m),
|
|
|
- ContigsRef::Chimeric((a, b)) => {
|
|
|
+ ContigRef::Unique(m) => mapping_to_string(m),
|
|
|
+ ContigRef::Chimeric((a, b)) => {
|
|
|
format!("{}<->{}", mapping_to_string(a), mapping_to_string(b))
|
|
|
}
|
|
|
- ContigsRef::ChimericMultiple((a, v, b)) => format!(
|
|
|
+ ContigRef::ChimericMultiple((a, v, b)) => format!(
|
|
|
"{}<->{}<->{}",
|
|
|
mapping_to_string(a),
|
|
|
mappings_to_string(v),
|
|
|
mapping_to_string(b)
|
|
|
),
|
|
|
- ContigsRef::LeftAmbiguity((v, b)) => {
|
|
|
+ ContigRef::LeftAmbiguity((v, b)) => {
|
|
|
format!("{}<->{}", mappings_to_string(v), mapping_to_string(b))
|
|
|
}
|
|
|
- ContigsRef::RightAmbiguity((a, v)) => {
|
|
|
+ ContigRef::RightAmbiguity((a, v)) => {
|
|
|
format!("{}<->{}", mapping_to_string(a), mappings_to_string(v))
|
|
|
}
|
|
|
- ContigsRef::Ambigous(v) => format!("{}", mappings_to_string(v)),
|
|
|
+ ContigRef::Ambigous(v) => format!("{}", mappings_to_string(v)),
|
|
|
};
|
|
|
fmt.write_str(&str).unwrap();
|
|
|
|
|
|
@@ -52,11 +63,11 @@ impl fmt::Display for ContigsRef {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-impl ContigsRef {
|
|
|
+impl ContigRef {
|
|
|
pub fn hgvs(&self) -> Option<String> {
|
|
|
match self {
|
|
|
- ContigsRef::Unique(_) => None,
|
|
|
- ContigsRef::Chimeric((a, b)) => {
|
|
|
+ ContigRef::Unique(_) => None,
|
|
|
+ ContigRef::Chimeric((a, b)) => {
|
|
|
if a.target_name == b.target_name {
|
|
|
let chr = a.target_name.clone().unwrap_or("UNKNOWN".to_string());
|
|
|
let end = a.target_end;
|
|
|
@@ -66,10 +77,10 @@ impl ContigsRef {
|
|
|
None
|
|
|
}
|
|
|
}
|
|
|
- ContigsRef::ChimericMultiple(_) => None,
|
|
|
- ContigsRef::LeftAmbiguity(_) => None,
|
|
|
- ContigsRef::RightAmbiguity(_) => None,
|
|
|
- ContigsRef::Ambigous(_) => None,
|
|
|
+ ContigRef::ChimericMultiple(_) => None,
|
|
|
+ ContigRef::LeftAmbiguity(_) => None,
|
|
|
+ ContigRef::RightAmbiguity(_) => None,
|
|
|
+ ContigRef::Ambigous(_) => None,
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
@@ -95,91 +106,82 @@ fn mappings_to_string(mappings: &Vec<Mapping>) -> String {
|
|
|
v.join("//")
|
|
|
}
|
|
|
|
|
|
-impl Contig {
|
|
|
- pub fn get_ref_pos(&mut self) -> Result<ContigsRef> {
|
|
|
- if self.mappings.len() == 1 {
|
|
|
- return Ok(ContigsRef::Unique(self.mappings.get(0).unwrap().clone()));
|
|
|
- } else {
|
|
|
- let mut grouped: VecDeque<Vec<Mapping>> = group_mappings(&mut self.mappings)?.into();
|
|
|
+pub fn get_ref_pos(mappings: Vec<Mapping>) -> Result<ContigRef> {
|
|
|
+ let mut mappings = mappings;
|
|
|
+ if mappings.len() == 1 {
|
|
|
+ return Ok(ContigRef::Unique(mappings.get(0).unwrap().clone()));
|
|
|
+ } else {
|
|
|
+ let mut grouped: VecDeque<Vec<Mapping>> = group_mappings(&mut mappings)?.into();
|
|
|
|
|
|
- if grouped.len() == 1 {
|
|
|
- let r = grouped.into_iter().flat_map(|e| e).collect();
|
|
|
- return Ok(ContigsRef::Ambigous(r));
|
|
|
- } else if grouped.len() >= 2 {
|
|
|
- let first = grouped.pop_back().unwrap();
|
|
|
- let last = grouped.pop_front().unwrap();
|
|
|
+ if grouped.len() == 1 {
|
|
|
+ let r = grouped.into_iter().flat_map(|e| e).collect();
|
|
|
+ return Ok(ContigRef::Ambigous(r));
|
|
|
+ } else if grouped.len() >= 2 {
|
|
|
+ let first = grouped.pop_back().unwrap();
|
|
|
+ let last = grouped.pop_front().unwrap();
|
|
|
|
|
|
- if grouped.len() == 0 {
|
|
|
- if first.len() == 1 && last.len() == 1 {
|
|
|
- return Ok(ContigsRef::Chimeric((
|
|
|
- first.get(0).unwrap().clone(),
|
|
|
- last.get(0).unwrap().clone(),
|
|
|
- )));
|
|
|
- } else if first.len() == 1 {
|
|
|
- return Ok(ContigsRef::RightAmbiguity((
|
|
|
- first.get(0).unwrap().clone(),
|
|
|
- last.clone(),
|
|
|
- )));
|
|
|
- } else if last.len() == 1 {
|
|
|
- return Ok(ContigsRef::LeftAmbiguity((
|
|
|
- first.clone(),
|
|
|
- last.get(0).unwrap().clone(),
|
|
|
- )));
|
|
|
- } else {
|
|
|
- let all: Vec<Mapping> =
|
|
|
- vec![first, last].into_iter().flat_map(|e| e).collect();
|
|
|
- return Ok(ContigsRef::Ambigous(all));
|
|
|
- }
|
|
|
- } else {
|
|
|
- }
|
|
|
+ if grouped.len() == 0 {
|
|
|
if first.len() == 1 && last.len() == 1 {
|
|
|
- return Ok(ContigsRef::ChimericMultiple((
|
|
|
+ return Ok(ContigRef::Chimeric((
|
|
|
first.get(0).unwrap().clone(),
|
|
|
- grouped.into_iter().flat_map(|e| e).collect(),
|
|
|
last.get(0).unwrap().clone(),
|
|
|
)));
|
|
|
} else if first.len() == 1 {
|
|
|
- let right: Vec<Mapping> =
|
|
|
- vec![grouped.into_iter().flat_map(|e| e).collect(), last]
|
|
|
- .into_iter()
|
|
|
- .flat_map(|e| e)
|
|
|
- .collect();
|
|
|
- return Ok(ContigsRef::RightAmbiguity((
|
|
|
+ return Ok(ContigRef::RightAmbiguity((
|
|
|
first.get(0).unwrap().clone(),
|
|
|
- right,
|
|
|
+ last.clone(),
|
|
|
)));
|
|
|
} else if last.len() == 1 {
|
|
|
- let left: Vec<Mapping> =
|
|
|
- vec![first, grouped.into_iter().flat_map(|e| e).collect()]
|
|
|
- .into_iter()
|
|
|
- .flat_map(|e| e)
|
|
|
- .collect();
|
|
|
- return Ok(ContigsRef::LeftAmbiguity((
|
|
|
- left,
|
|
|
+ return Ok(ContigRef::LeftAmbiguity((
|
|
|
+ first.clone(),
|
|
|
last.get(0).unwrap().clone(),
|
|
|
)));
|
|
|
} else {
|
|
|
- let all: Vec<Mapping> =
|
|
|
- vec![first, grouped.into_iter().flat_map(|e| e).collect(), last]
|
|
|
- .into_iter()
|
|
|
- .flat_map(|e| e)
|
|
|
- .collect();
|
|
|
- return Ok(ContigsRef::Ambigous(all));
|
|
|
+ let all: Vec<Mapping> = vec![first, last].into_iter().flat_map(|e| e).collect();
|
|
|
+ return Ok(ContigRef::Ambigous(all));
|
|
|
}
|
|
|
} else {
|
|
|
- return Ok(ContigsRef::Ambigous(
|
|
|
+ }
|
|
|
+ if first.len() == 1 && last.len() == 1 {
|
|
|
+ return Ok(ContigRef::ChimericMultiple((
|
|
|
+ first.get(0).unwrap().clone(),
|
|
|
grouped.into_iter().flat_map(|e| e).collect(),
|
|
|
- ));
|
|
|
+ last.get(0).unwrap().clone(),
|
|
|
+ )));
|
|
|
+ } else if first.len() == 1 {
|
|
|
+ let right: Vec<Mapping> = vec![grouped.into_iter().flat_map(|e| e).collect(), last]
|
|
|
+ .into_iter()
|
|
|
+ .flat_map(|e| e)
|
|
|
+ .collect();
|
|
|
+ return Ok(ContigRef::RightAmbiguity((
|
|
|
+ first.get(0).unwrap().clone(),
|
|
|
+ right,
|
|
|
+ )));
|
|
|
+ } else if last.len() == 1 {
|
|
|
+ let left: Vec<Mapping> = vec![first, grouped.into_iter().flat_map(|e| e).collect()]
|
|
|
+ .into_iter()
|
|
|
+ .flat_map(|e| e)
|
|
|
+ .collect();
|
|
|
+ return Ok(ContigRef::LeftAmbiguity((
|
|
|
+ left,
|
|
|
+ last.get(0).unwrap().clone(),
|
|
|
+ )));
|
|
|
+ } else {
|
|
|
+ let all: Vec<Mapping> =
|
|
|
+ vec![first, grouped.into_iter().flat_map(|e| e).collect(), last]
|
|
|
+ .into_iter()
|
|
|
+ .flat_map(|e| e)
|
|
|
+ .collect();
|
|
|
+ return Ok(ContigRef::Ambigous(all));
|
|
|
}
|
|
|
+ } else {
|
|
|
+ return Ok(ContigRef::Ambigous(
|
|
|
+ grouped.into_iter().flat_map(|e| e).collect(),
|
|
|
+ ));
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-#[derive(Debug, Clone)]
|
|
|
-pub struct Genome {
|
|
|
- pub chromosomes: HashMap<String, Chromosome>,
|
|
|
-}
|
|
|
-
|
|
|
impl Genome {
|
|
|
pub fn new() -> Self {
|
|
|
Genome {
|
|
|
@@ -189,73 +191,84 @@ impl Genome {
|
|
|
pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, Chromosome> {
|
|
|
self.chromosomes.iter()
|
|
|
}
|
|
|
- pub fn add_contig(&mut self, id: String, mappings: Vec<Mapping>, supporting_records: Vec<Record>, sequence: String) -> Result<()> {
|
|
|
- let mut new_contig = Contig { id, mappings, supporting_records, sequence };
|
|
|
+ pub fn add_contig(
|
|
|
+ &mut self,
|
|
|
+ id: String,
|
|
|
+ mappings: Vec<Mapping>,
|
|
|
+ supporting_records: Vec<Record>,
|
|
|
+ sequence: String,
|
|
|
+ ) -> Result<()> {
|
|
|
+ let new_contig = Contig {
|
|
|
+ id,
|
|
|
+ mappings: mappings.clone(),
|
|
|
+ supporting_records,
|
|
|
+ sequence,
|
|
|
+ contig_ref: get_ref_pos(mappings)?,
|
|
|
+ };
|
|
|
// get the category of Mapping
|
|
|
- let ref_res = new_contig.get_ref_pos()?;
|
|
|
- match ref_res.clone() {
|
|
|
- ContigsRef::Unique(contig_mapping) => {
|
|
|
+ match new_contig.contig_ref.clone() {
|
|
|
+ ContigRef::Unique(contig_mapping) => {
|
|
|
match self
|
|
|
.chromosomes
|
|
|
.get_mut(&contig_mapping.target_name.unwrap())
|
|
|
{
|
|
|
Some(chromosome) => {
|
|
|
- chromosome.contigs.push(ref_res);
|
|
|
+ chromosome.contigs.push(new_contig);
|
|
|
}
|
|
|
None => (),
|
|
|
}
|
|
|
}
|
|
|
- ContigsRef::Chimeric((a, b)) => {
|
|
|
+ ContigRef::Chimeric((a, b)) => {
|
|
|
let a_target_name = a.target_name.unwrap();
|
|
|
let b_target_name = b.target_name.unwrap();
|
|
|
if a_target_name == b_target_name {
|
|
|
if let Some(chromosome) = self.chromosomes.get_mut(&a_target_name) {
|
|
|
- chromosome.contigs.push(ref_res);
|
|
|
+ chromosome.contigs.push(new_contig);
|
|
|
} else {
|
|
|
self.chromosomes.insert(
|
|
|
a_target_name,
|
|
|
Chromosome {
|
|
|
- contigs: vec![ref_res],
|
|
|
+ contigs: vec![new_contig],
|
|
|
},
|
|
|
);
|
|
|
}
|
|
|
} else {
|
|
|
let chimeric_name = format!("{}-{}", a_target_name, b_target_name);
|
|
|
if let Some(chromosome) = self.chromosomes.get_mut(&chimeric_name) {
|
|
|
- chromosome.contigs.push(ref_res);
|
|
|
+ chromosome.contigs.push(new_contig);
|
|
|
} else {
|
|
|
self.chromosomes.insert(
|
|
|
chimeric_name,
|
|
|
Chromosome {
|
|
|
- contigs: vec![ref_res],
|
|
|
+ contigs: vec![new_contig],
|
|
|
},
|
|
|
);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- ContigsRef::ChimericMultiple((left, _, right)) => {
|
|
|
+ ContigRef::ChimericMultiple((left, _, right)) => {
|
|
|
let left_target_name = left.target_name.unwrap();
|
|
|
let right_target_name = right.target_name.unwrap();
|
|
|
if left_target_name == right_target_name {
|
|
|
if let Some(chromosome) = self.chromosomes.get_mut(&left_target_name) {
|
|
|
- chromosome.contigs.push(ref_res);
|
|
|
+ chromosome.contigs.push(new_contig);
|
|
|
} else {
|
|
|
self.chromosomes.insert(
|
|
|
left_target_name,
|
|
|
Chromosome {
|
|
|
- contigs: vec![ref_res],
|
|
|
+ contigs: vec![new_contig],
|
|
|
},
|
|
|
);
|
|
|
}
|
|
|
} else {
|
|
|
let chimeric_name = format!("{}-{}", left_target_name, right_target_name);
|
|
|
if let Some(chromosome) = self.chromosomes.get_mut(&chimeric_name) {
|
|
|
- chromosome.contigs.push(ref_res);
|
|
|
+ chromosome.contigs.push(new_contig);
|
|
|
} else {
|
|
|
self.chromosomes.insert(
|
|
|
chimeric_name,
|
|
|
Chromosome {
|
|
|
- contigs: vec![ref_res],
|
|
|
+ contigs: vec![new_contig],
|
|
|
},
|
|
|
);
|
|
|
}
|
|
|
@@ -263,12 +276,12 @@ impl Genome {
|
|
|
}
|
|
|
_ => {
|
|
|
if let Some(chromosome) = self.chromosomes.get_mut("Ambigous") {
|
|
|
- chromosome.contigs.push(ref_res);
|
|
|
+ chromosome.contigs.push(new_contig);
|
|
|
} else {
|
|
|
self.chromosomes.insert(
|
|
|
"Ambigous".to_string(),
|
|
|
Chromosome {
|
|
|
- contigs: vec![ref_res],
|
|
|
+ contigs: vec![new_contig],
|
|
|
},
|
|
|
);
|
|
|
}
|
|
|
@@ -283,8 +296,8 @@ impl Genome {
|
|
|
info!("{}:{}", k, v.contigs.len());
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
- pub fn chromosome(&self, chromosome: &str) -> Option<Vec<ContigsRef>> {
|
|
|
+
|
|
|
+ pub fn chromosome(&self, chromosome: &str) -> Option<Vec<Contig>> {
|
|
|
if let Some(chr) = self.chromosomes.get(chromosome) {
|
|
|
Some(chr.contigs.clone())
|
|
|
} else {
|
|
|
@@ -292,17 +305,37 @@ impl Genome {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-#[derive(Debug, Clone)]
|
|
|
-pub struct Chromosome {
|
|
|
- contigs: Vec<ContigsRef>,
|
|
|
-}
|
|
|
|
|
|
impl Chromosome {
|
|
|
- pub fn iter(&self) -> std::slice::Iter<'_, ContigsRef> {
|
|
|
+ pub fn iter(&self) -> std::slice::Iter<'_, Contig> {
|
|
|
self.contigs.iter()
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+impl Contig {
|
|
|
+ pub fn sort(&mut self) {
|
|
|
+ self.mappings.sort_by(|a, b| a.target_start.cmp(&b.target_start));
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn write_bam(&self, path: &str) -> Result<()> {
|
|
|
+ let mut header = bam::Header::new();
|
|
|
+ let mut sq_record = rust_htslib::bam::header::HeaderRecord::new(b"@SQ");
|
|
|
+ sq_record.push_tag(b"SN", self.id.clone());
|
|
|
+ sq_record.push_tag(b"LN", self.sequence.len());
|
|
|
+ header.push_record(&sq_record);
|
|
|
+
|
|
|
+ let mut out = bam::Writer::from_path(path, &header, bam::Format::Bam).unwrap();
|
|
|
+
|
|
|
+ // copy reverse reads to new BAM file
|
|
|
+ for mapping in self.mappings.iter() {
|
|
|
+ let record = minimap2::htslib::mapping_to_record(Some(mapping), self.sequence.as_bytes(), header.clone(), None, Some(self.id.as_bytes()));
|
|
|
+ out.write(&record)?;
|
|
|
+ }
|
|
|
+ rust_htslib::bam::index::build(path, None, rust_htslib::bam::index::Type::Bai, 5)?;
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
fn group_mappings(mappings: &mut Vec<Mapping>) -> Result<Vec<Vec<Mapping>>> {
|
|
|
// sort alignments by query_start
|
|
|
mappings.sort_by(|a, b| a.query_start.cmp(&b.query_start));
|