use anyhow::{Ok, Result}; use log::info; use minimap2::{Aligner, Mapping}; use rust_htslib::bam::{self, Record}; use std::{ collections::{HashMap, VecDeque}, fmt, }; #[derive(Debug, Clone)] pub struct Genome { pub chromosomes: HashMap, } #[derive(Debug, Clone)] pub struct Chromosome { contigs: Vec, } #[derive(Debug, Clone)] pub struct Contig { pub id: String, // contig seq on ref pub mappings: Vec, // reads on ref pub supporting_records: Vec, pub sequence: String, pub contig_ref: ContigRef, } #[derive(Debug, Clone)] pub enum ContigRef { Unique(Mapping), Chimeric((Mapping, Mapping)), ChimericMultiple((Mapping, Vec, Mapping)), LeftAmbiguity((Vec, Mapping)), RightAmbiguity((Mapping, Vec)), Ambigous(Vec), } impl fmt::Display for ContigRef { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { let str = match self { ContigRef::Unique(m) => mapping_to_string(m), ContigRef::Chimeric((a, b)) => { format!("{}<->{}", mapping_to_string(a), mapping_to_string(b)) } ContigRef::ChimericMultiple((a, v, b)) => format!( "{}<->{}<->{}", mapping_to_string(a), mappings_to_string(v), mapping_to_string(b) ), ContigRef::LeftAmbiguity((v, b)) => { format!("{}<->{}", mappings_to_string(v), mapping_to_string(b)) } ContigRef::RightAmbiguity((a, v)) => { format!("{}<->{}", mapping_to_string(a), mappings_to_string(v)) } ContigRef::Ambigous(v) => format!("{}", mappings_to_string(v)), }; fmt.write_str(&str).unwrap(); std::result::Result::Ok(()) } } impl ContigRef { pub fn hgvs(&self) -> Option { match self { ContigRef::Unique(_) => None, ContigRef::Chimeric((a, b)) => { if a.target_name == b.target_name { let chr = a.target_name.clone().unwrap_or("UNKNOWN".to_string()); let end = a.target_end; let start = b.target_start; Some(format!("{chr}:{end}_{start}")) } else { None } } ContigRef::ChimericMultiple(_) => None, ContigRef::LeftAmbiguity(_) => None, ContigRef::RightAmbiguity(_) => None, ContigRef::Ambigous(_) => None, } } } pub fn mapping_to_string(mapping: &Mapping) -> String { let uk = "UNKNOWN".to_string(); format!( "{}:{}-{}({}:{}-{})", mapping.target_name.clone().unwrap_or(uk.clone()), mapping.target_start, mapping.target_end, mapping.query_name.clone().unwrap_or(uk), mapping.query_start, mapping.query_end ) } fn mappings_to_string(mappings: &Vec) -> String { let v = mappings .iter() .map(mapping_to_string) .collect::>(); v.join("//") } pub fn get_ref_pos(mappings: Vec) -> Result { let mut mappings = mappings; if mappings.len() == 1 { return Ok(ContigRef::Unique(mappings.get(0).unwrap().clone())); } else { let mut grouped: VecDeque> = group_mappings(&mut mappings)?.into(); if grouped.len() == 1 { let r = grouped.into_iter().flat_map(|e| e).collect(); return Ok(ContigRef::Ambigous(r)); } else if grouped.len() >= 2 { let first = grouped.pop_back().unwrap(); let last = grouped.pop_front().unwrap(); if grouped.len() == 0 { if first.len() == 1 && last.len() == 1 { return Ok(ContigRef::Chimeric(( first.get(0).unwrap().clone(), last.get(0).unwrap().clone(), ))); } else if first.len() == 1 { return Ok(ContigRef::RightAmbiguity(( first.get(0).unwrap().clone(), last.clone(), ))); } else if last.len() == 1 { return Ok(ContigRef::LeftAmbiguity(( first.clone(), last.get(0).unwrap().clone(), ))); } else { let all: Vec = vec![first, last].into_iter().flat_map(|e| e).collect(); return Ok(ContigRef::Ambigous(all)); } } else { } if first.len() == 1 && last.len() == 1 { return Ok(ContigRef::ChimericMultiple(( first.get(0).unwrap().clone(), grouped.into_iter().flat_map(|e| e).collect(), last.get(0).unwrap().clone(), ))); } else if first.len() == 1 { let right: Vec = vec![grouped.into_iter().flat_map(|e| e).collect(), last] .into_iter() .flat_map(|e| e) .collect(); return Ok(ContigRef::RightAmbiguity(( first.get(0).unwrap().clone(), right, ))); } else if last.len() == 1 { let left: Vec = vec![first, grouped.into_iter().flat_map(|e| e).collect()] .into_iter() .flat_map(|e| e) .collect(); return Ok(ContigRef::LeftAmbiguity(( left, last.get(0).unwrap().clone(), ))); } else { let all: Vec = vec![first, grouped.into_iter().flat_map(|e| e).collect(), last] .into_iter() .flat_map(|e| e) .collect(); return Ok(ContigRef::Ambigous(all)); } } else { return Ok(ContigRef::Ambigous( grouped.into_iter().flat_map(|e| e).collect(), )); } } } impl Genome { pub fn new() -> Self { Genome { chromosomes: HashMap::new(), } } pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, Chromosome> { self.chromosomes.iter() } pub fn add_contig( &mut self, id: String, mappings: Vec, supporting_records: Vec, sequence: String, ) -> Result<()> { let new_contig = Contig { id, mappings: mappings.clone(), supporting_records, sequence, contig_ref: get_ref_pos(mappings)?, }; // get the category of Mapping match new_contig.contig_ref.clone() { ContigRef::Unique(contig_mapping) => { match self .chromosomes .get_mut(&contig_mapping.target_name.unwrap()) { Some(chromosome) => { chromosome.contigs.push(new_contig); } None => (), } } ContigRef::Chimeric((a, b)) => { let a_target_name = a.target_name.unwrap(); let b_target_name = b.target_name.unwrap(); if a_target_name == b_target_name { if let Some(chromosome) = self.chromosomes.get_mut(&a_target_name) { chromosome.contigs.push(new_contig); } else { self.chromosomes.insert( a_target_name, Chromosome { contigs: vec![new_contig], }, ); } } else { let chimeric_name = format!("{}-{}", a_target_name, b_target_name); if let Some(chromosome) = self.chromosomes.get_mut(&chimeric_name) { chromosome.contigs.push(new_contig); } else { self.chromosomes.insert( chimeric_name, Chromosome { contigs: vec![new_contig], }, ); } } } ContigRef::ChimericMultiple((left, _, right)) => { let left_target_name = left.target_name.unwrap(); let right_target_name = right.target_name.unwrap(); if left_target_name == right_target_name { if let Some(chromosome) = self.chromosomes.get_mut(&left_target_name) { chromosome.contigs.push(new_contig); } else { self.chromosomes.insert( left_target_name, Chromosome { contigs: vec![new_contig], }, ); } } else { let chimeric_name = format!("{}-{}", left_target_name, right_target_name); if let Some(chromosome) = self.chromosomes.get_mut(&chimeric_name) { chromosome.contigs.push(new_contig); } else { self.chromosomes.insert( chimeric_name, Chromosome { contigs: vec![new_contig], }, ); } } } _ => { if let Some(chromosome) = self.chromosomes.get_mut("Ambigous") { chromosome.contigs.push(new_contig); } else { self.chromosomes.insert( "Ambigous".to_string(), Chromosome { contigs: vec![new_contig], }, ); } } }; Ok(()) } pub fn stats(&self) { for (k, v) in self.chromosomes.iter() { info!("{}:{}", k, v.contigs.len()); } } pub fn chromosome(&self, chromosome: &str) -> Option> { if let Some(chr) = self.chromosomes.get(chromosome) { Some(chr.contigs.clone()) } else { None } } } impl Chromosome { pub fn iter(&self) -> std::slice::Iter<'_, Contig> { self.contigs.iter() } } impl Contig { pub fn sort(&mut self) { self.mappings .sort_by(|a, b| a.target_start.cmp(&b.target_start)); } pub fn write_bam(&self, path: &str) -> Result<()> { let aligner = Aligner::builder() .asm5() .with_threads(8) .with_cigar() .with_seq(self.sequence.as_bytes()) .expect("Unable to build index"); let mut mappings = Vec::new(); for record in self.supporting_records.iter() { let seq = record.seq().as_bytes(); let alignment = aligner .map(&seq, false, false, None, None) .expect("Unable to align"); mappings.push(alignment); } let mut mappings: Vec<_> = mappings.into_iter().flatten().collect(); mappings.sort_by(|a, b| a.target_start.cmp(&b.target_start)); let mut header = bam::Header::new(); let mut sq_record = rust_htslib::bam::header::HeaderRecord::new(b"SQ"); sq_record.push_tag(b"SN", self.id.clone()); sq_record.push_tag(b"LN", self.sequence.len()); header.push_record(&sq_record); let mut out = bam::Writer::from_path(path, &header, bam::Format::Bam).unwrap(); // copy reverse reads to new BAM file for mapping in mappings.iter() { let record = minimap2::htslib::mapping_to_record( Some(mapping), self.sequence.as_bytes(), header.clone(), None, Some(self.id.as_bytes()), ); out.write(&record)?; } rust_htslib::bam::index::build(path, None, rust_htslib::bam::index::Type::Bai, 1)?; Ok(()) } pub fn hgvs(&self) -> Option { self.contig_ref.hgvs() } } fn group_mappings(mappings: &mut Vec) -> Result>> { // sort alignments by query_start mappings.sort_by(|a, b| a.query_start.cmp(&b.query_start)); let mut alignments: Vec> = vec![]; // group by overlapps > 30 for aln in mappings.iter() { let last = alignments.last_mut(); if let Some(l) = last { if l.iter() .filter(|a| a.query_end - aln.query_start > 30) .count() > 0 { l.push(aln.clone()); } else { alignments.push(vec![aln.clone()]); } } else { alignments.push(vec![aln.clone()]); } } Ok(alignments) } #[cfg(test)] mod tests { use super::*; #[test] fn it_works() { // let result = add(2, 2); // assert_eq!(result, 4); } }