pub mod breakpoint; pub mod genomic_graph; // mod phase; use anyhow::{anyhow, Ok, Result}; use fasta::record::Sequence; use log::info; use minimap2::{Aligner, Mapping}; use noodles_fasta as fasta; use num_format::{CustomFormat, Grouping, ToFormattedString, WriteFormatted}; use petgraph::{dot::Dot, prelude::*}; use rust_htslib::bam::{self, Record}; use std::{ collections::HashMap, fmt, fs::{self, File}, io::{BufReader, BufWriter, Write}, path::PathBuf, process::{Command, Stdio}, }; use uuid::Uuid; #[derive(Debug, Clone)] pub struct Genome { pub chromosomes: HashMap, } #[derive(Debug, Clone)] pub struct Chromosome { contigs: Vec, } #[derive(Debug, Clone)] pub struct Contig { pub id: String, // contig seq on ref pub mappings: Vec, // reads on ref pub supporting_records: Option>, pub sequence: String, pub contig_ref: ContigRef, } #[derive(Debug, Clone)] pub enum ContigRef { Unique(Mapping), Chimeric((Mapping, Mapping)), ChimericTriple((Mapping, Mapping, Mapping)), ChimericMultiple((Mapping, Vec, Mapping)), LeftAmbiguity((Vec, Mapping)), RightAmbiguity((Mapping, Vec)), Ambigous(Vec), } impl fmt::Display for ContigRef { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { let str = match self { ContigRef::Unique(m) => mapping_to_string(m), ContigRef::Chimeric((a, b)) => { format!("{}<->{}", mapping_to_string(a), mapping_to_string(b)) } ContigRef::ChimericMultiple((a, v, b)) => format!( "{}<->{}<->{}", mapping_to_string(a), mappings_to_string(v), mapping_to_string(b) ), ContigRef::LeftAmbiguity((v, b)) => { format!("{}<->{}", mappings_to_string(v), mapping_to_string(b)) } ContigRef::RightAmbiguity((a, v)) => { format!("{}<->{}", mapping_to_string(a), mappings_to_string(v)) } ContigRef::Ambigous(v) => format!("{}", mappings_to_string(v)), ContigRef::ChimericTriple((a, b, c)) => format!( "{}<->{}<->{}", mapping_to_string(a), mapping_to_string(b), mapping_to_string(c) ), }; fmt.write_str(&str).unwrap(); std::result::Result::Ok(()) } } impl ContigRef { pub fn breakpoints(&self) -> Option> { match self { ContigRef::Unique(_) => None, ContigRef::Chimeric((a, b)) => Some(vec![a.clone(), b.clone()]), ContigRef::ChimericTriple((a, b, c)) => Some(vec![a.clone(), b.clone(), c.clone()]), ContigRef::ChimericMultiple(_) => None, ContigRef::LeftAmbiguity(_) => None, ContigRef::RightAmbiguity(_) => None, ContigRef::Ambigous(_) => None, } } pub fn breakpoints_repr(&self) -> Option> { let left = "►"; let right = "◄"; let bp_right = "▐"; let bp_left = "▌"; let format = CustomFormat::builder() .grouping(Grouping::Standard) .minus_sign("-") .separator("_") .build() .unwrap(); let get_sign = |m: &Mapping| -> &str { match m.strand { minimap2::Strand::Forward => left, minimap2::Strand::Reverse => right, } }; let uk = "UNKNOWN".to_string(); let mut res = Vec::new(); if let Some(breakpoints) = self.breakpoints() { for v in breakpoints.windows(2) { let mut bp_string = format!("{}:", v[0].target_name.clone().unwrap_or(uk.clone())); let _ = bp_string .write_formatted(&v[0].target_end, &format) .unwrap(); bp_string = format!( "{bp_string}{}{bp_left}{bp_right}{}{}:", get_sign(&v[0]), get_sign(&v[1]), v[1].target_name.clone().unwrap_or(uk.clone()) ); let _ = bp_string .write_formatted(&v[1].target_start, &format) .unwrap(); res.push(bp_string); } } if !res.is_empty() { Some(res) } else { None } } pub fn desc(&self) -> Option { let uk = "UNKNOWN".to_string(); let to_desc = |v: &mut Vec| -> String { v.sort_by(|a, b| a.query_start.cmp(&b.query_start)); let v: Vec = v .into_iter() .map(|e| { let strand = match e.strand { minimap2::Strand::Forward => "", minimap2::Strand::Reverse => "_rev", }; format!( "{}:{}_{}{}", e.target_name.clone().unwrap_or(uk.clone()), e.target_start, e.target_end, strand ) }) .collect(); format!("[{}]", v.join(";")) }; match self { ContigRef::Unique(a) => Some(format!( "{}:{}_{}", a.target_name.clone().unwrap_or(uk.clone()), a.target_start, a.target_end )), ContigRef::Chimeric((a, b)) => Some(to_desc(&mut vec![a.to_owned(), b.to_owned()])), ContigRef::ChimericTriple((a, b, c)) => { Some(to_desc(&mut vec![a.to_owned(), b.to_owned(), c.to_owned()])) } ContigRef::ChimericMultiple(_) => None, ContigRef::LeftAmbiguity(_) => None, ContigRef::RightAmbiguity(_) => None, ContigRef::Ambigous(a) => Some(to_desc(&mut a.to_owned())), } } pub fn hgvs(&self) -> Option { let uk = "UNKNOWN".to_string(); match self { ContigRef::Unique(_) => None, ContigRef::Chimeric((a, b)) => { if a.target_name == b.target_name { let chr = a.target_name.clone().unwrap_or(uk.clone()); let del_start = a.target_end; let del_end = b.target_start; let hgvs = format!("{chr}:{del_start}_{del_end}"); Some(hgvs) } else { let a_chr = a.target_name.clone().unwrap_or(uk.clone()); let a_bp = a.target_end; let b_chr = b.target_name.clone().unwrap_or(uk.clone()); let b_bp = b.target_end; let hgvs = format!("{a_chr}:{a_bp}delins[{b_chr}:{b_bp}]"); Some(hgvs) } } ContigRef::ChimericMultiple(_) => None, ContigRef::LeftAmbiguity(_) => None, ContigRef::RightAmbiguity(_) => None, ContigRef::Ambigous(_) => None, ContigRef::ChimericTriple((a, b, c)) => { let mut v = [a, b, c]; v.sort_by(|a, b| a.query_start.cmp(&b.query_start)); let (a, b, c) = ( *v.get(0).clone().unwrap(), *v.get(1).clone().unwrap(), *v.get(2).clone().unwrap(), ); let a_target_name = a.target_name.clone().unwrap_or(uk.clone()); let b_target_name = b.target_name.clone().unwrap_or(uk.clone()); let c_target_name = c.target_name.clone().unwrap_or(uk.clone()); // if a_target_name != b_target_name {} // Insertions // prioritize first len let (bp_a_1, bp_a_2) = if a.query_end <= b.query_end { // TODO add inserted nt ( (a.target_name.clone().unwrap_or(uk.clone()), a.target_end), (b.target_name.clone().unwrap_or(uk.clone()), b.target_start), ) } else { let diff = a.query_end - b.query_start; ( (a.target_name.clone().unwrap_or(uk.clone()), a.target_end), ( b.target_name.clone().unwrap_or(uk.clone()), b.target_start + diff, ), ) }; let (bp_b_1, bp_b_2) = if b.query_end <= c.query_end { // TODO add inserted nt ( (b.target_name.clone().unwrap_or(uk.clone()), b.target_end), (c.target_name.clone().unwrap_or(uk.clone()), c.target_start), ) } else { let diff = b.query_end - c.query_start; ( (b.target_name.clone().unwrap_or(uk.clone()), b.target_end), ( c.target_name.clone().unwrap_or(uk.clone()), c.target_start + diff, ), ) }; if bp_a_1.0 == bp_b_2.0 { let hgvs = format!( "{}:{}_{}ins[{}:{}_{}]", bp_a_1.0, bp_a_1.1, bp_b_2.1, bp_a_2.0, bp_a_2.1, bp_b_1.1 ); Some(hgvs) } else { None } } } } } pub fn mapping_to_string(mapping: &Mapping) -> String { let uk = "UNKNOWN".to_string(); format!( "{}:{}-{}({}:{}-{})", mapping.target_name.clone().unwrap_or(uk.clone()), mapping.target_start, mapping.target_end, mapping.query_name.clone().unwrap_or(uk), mapping.query_start, mapping.query_end ) } fn mappings_to_string(mappings: &Vec) -> String { let v = mappings .iter() .map(mapping_to_string) .collect::>(); v.join("//") } pub fn get_ref_pos(mappings: Vec) -> Result { let mut mappings = mappings; mappings.sort_by(|a, b| a.query_start.cmp(&b.query_start)); if mappings.len() == 1 { return Ok(ContigRef::Unique(mappings.get(0).unwrap().clone())); } else { let mut grouped: Vec> = group_mappings(&mut mappings)?; // let mut grouped: VecDeque> = group_mappings(&mut mappings)?.into(); if grouped.len() == 1 { let r = grouped.into_iter().flat_map(|e| e).collect(); return Ok(ContigRef::Ambigous(r)); } else if grouped.len() >= 2 { // let first = grouped.pop_back().unwrap(); // let last = grouped.pop_front().unwrap(); let first = grouped.first().unwrap().to_vec(); let last = grouped.last().unwrap().to_vec(); grouped.remove(0); grouped.remove(grouped.len() - 1); assert!(first[0].query_start < last[0].query_start); if grouped.len() == 0 { if first.len() == 1 && last.len() == 1 { return Ok(ContigRef::Chimeric(( first.get(0).unwrap().clone(), last.get(0).unwrap().clone(), ))); } else if first.len() == 1 { return Ok(ContigRef::RightAmbiguity(( first.get(0).unwrap().clone(), last.clone(), ))); } else if last.len() == 1 { return Ok(ContigRef::LeftAmbiguity(( first.clone(), last.get(0).unwrap().clone(), ))); } else { let all: Vec = vec![first, last].into_iter().flat_map(|e| e).collect(); return Ok(ContigRef::Ambigous(all)); } } if first.len() == 1 && last.len() == 1 { if grouped.len() == 1 { return Ok(ContigRef::ChimericTriple(( first.get(0).unwrap().clone(), grouped.get(0).unwrap().get(0).unwrap().clone(), last.get(0).unwrap().clone(), ))); } else { return Ok(ContigRef::ChimericMultiple(( first.get(0).unwrap().clone(), grouped.into_iter().flat_map(|e| e).collect(), last.get(0).unwrap().clone(), ))); } } else if first.len() == 1 { let right: Vec = vec![grouped.into_iter().flat_map(|e| e).collect(), last] .into_iter() .flat_map(|e| e) .collect(); return Ok(ContigRef::RightAmbiguity(( first.get(0).unwrap().clone(), right, ))); } else if last.len() == 1 { let left: Vec = vec![first, grouped.into_iter().flat_map(|e| e).collect()] .into_iter() .flat_map(|e| e) .collect(); return Ok(ContigRef::LeftAmbiguity(( left, last.get(0).unwrap().clone(), ))); } else { let all: Vec = vec![first, grouped.into_iter().flat_map(|e| e).collect(), last] .into_iter() .flat_map(|e| e) .collect(); return Ok(ContigRef::Ambigous(all)); } } else { return Ok(ContigRef::Ambigous( grouped.into_iter().flat_map(|e| e).collect(), )); } } } impl Genome { pub fn new() -> Self { Genome { chromosomes: HashMap::new(), } } pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, Chromosome> { self.chromosomes.iter() } pub fn contigs(&self) -> impl Iterator { self.chromosomes.iter().flat_map(|(_, e)| e.iter()) } pub fn add_contig( &mut self, id: String, mappings: Vec, supporting_records: Option>, sequence: String, ) -> Result<()> { let mut mappings = mappings; mappings.sort_by(|a, b| a.query_start.cmp(&b.query_start)); let new_contig = Contig { id, mappings: mappings.clone(), supporting_records, sequence, contig_ref: get_ref_pos(mappings)?, }; // get the category of Mapping match new_contig.contig_ref.clone() { ContigRef::Unique(contig_mapping) => { match self .chromosomes .get_mut(&contig_mapping.target_name.unwrap()) { Some(chromosome) => { chromosome.contigs.push(new_contig); } None => (), } } ContigRef::Chimeric((a, b)) => { let a_target_name = a.target_name.unwrap(); let b_target_name = b.target_name.unwrap(); if a_target_name == b_target_name { if let Some(chromosome) = self.chromosomes.get_mut(&a_target_name) { chromosome.contigs.push(new_contig); } else { self.chromosomes.insert( a_target_name, Chromosome { contigs: vec![new_contig], }, ); } } else { let chimeric_name = format!("{}-{}", a_target_name, b_target_name); if let Some(chromosome) = self.chromosomes.get_mut(&chimeric_name) { chromosome.contigs.push(new_contig); } else { self.chromosomes.insert( chimeric_name, Chromosome { contigs: vec![new_contig], }, ); } } } ContigRef::ChimericMultiple((left, _, right)) => { let left_target_name = left.target_name.unwrap(); let right_target_name = right.target_name.unwrap(); if left_target_name == right_target_name { if let Some(chromosome) = self.chromosomes.get_mut(&left_target_name) { chromosome.contigs.push(new_contig); } else { self.chromosomes.insert( left_target_name, Chromosome { contigs: vec![new_contig], }, ); } } else { let chimeric_name = format!("{}-{}", left_target_name, right_target_name); if let Some(chromosome) = self.chromosomes.get_mut(&chimeric_name) { chromosome.contigs.push(new_contig); } else { self.chromosomes.insert( chimeric_name, Chromosome { contigs: vec![new_contig], }, ); } } } _ => { if let Some(chromosome) = self.chromosomes.get_mut("Ambigous") { chromosome.contigs.push(new_contig); } else { self.chromosomes.insert( "Ambigous".to_string(), Chromosome { contigs: vec![new_contig], }, ); } } }; Ok(()) } pub fn add_contig_from_seq( &mut self, name: String, sequence: &[u8], aligner: impl Fn(String) -> Result>, ) -> Result<()> { let mappings = aligner(String::from_utf8(sequence.to_vec())?)?; // println!("{mappings:?}"); self.add_contig(name, mappings, None, String::from_utf8(sequence.to_vec())?)?; Ok(()) } pub fn write_contigs_sequences(&self, dir: &str) { for contig in self.contigs() { contig.write_fasta(&format!("{dir}/{}.fasta", contig.id)) } // self.iter().for_each(|(_, chr)| { // chr.iter().for_each(|c| c.write_fasta(&format!("{dir}/{}.fasta", c.id))) // }); } pub fn from_contigs_sequences(dir: &str) -> Result { let aligner_url = "http://localhost:4444/align"; let aligner = aligner_client::dist_align(aligner_url.to_string()); let mut genome = Self::new(); // let paths = get_ext_paths(dir, "fasta")?; let paths = get_contigs_fa_paths(dir)?; for path in paths { let fa = read_fasta(path.to_str().unwrap())?; for (name, sequence) in fa { genome.add_contig_from_seq(name, sequence.as_ref(), &aligner)?; } } Ok(genome) } // // pub fn from_dir_bed(dir: &str) { // // } // pub fn write_records(&self, file: &str) { // let mut records = Vec::new(); // for (name, chromosome) in self.chromosomes.iter() { // for contig in chromosome.iter() { // if let Some(rec) = &contig.supporting_records { // records.extend(rec.iter()); // }; // } // } // // header // // // writer // rust_htslib::bam::Writer::from_path(path, header, format) // } pub fn stats(&self) { for (k, v) in self.chromosomes.iter() { info!("{}:{}", k, v.contigs.len()); } } pub fn chromosome(&self, chromosome: &str) -> Option> { self.chromosomes .get(chromosome) .map(|chr| chr.contigs.clone()) } } impl Chromosome { pub fn iter(&self) -> std::slice::Iter<'_, Contig> { self.contigs.iter() } } impl Contig { // pub fn sort(&mut self) { // // sorting by target order // self.mappings // .sort_by(|a, b| a.target_start.cmp(&b.target_start)); // } pub fn to_igv(&self, dir_path: &str) -> Result<()> { let supporting_records = self.supporting_records.clone().ok_or(anyhow!("no reads"))?; let contig_name = if let Some(hgvs) = self.hgvs() { hgvs } else { self.id.clone() }; let contig_dir = format!("{dir_path}/{contig_name}"); fs::create_dir_all(contig_dir.clone())?; let fasta_path = format!("{contig_dir}/contig.fa"); write_fasta(&fasta_path, &vec![(self.id.clone(), self.sequence.clone())]); write_fai(&fasta_path); let reads_path = format!("{contig_dir}/reads.fa"); let n_reads = supporting_records .clone() .into_iter() .map(|r| { ( String::from_utf8(r.qname().to_vec()).unwrap(), String::from_utf8(r.seq().as_bytes()).unwrap(), ) }) .collect(); write_fasta(&reads_path, &n_reads); let bam_path = format!("{contig_dir}/{}.bam", self.id); write_bam(&fasta_path, &reads_path, &bam_path)?; let bed_path = format!("{contig_dir}/contig.bed"); match &self.contig_ref { ContigRef::Chimeric((a, b)) => { let d = vec![ ( self.id.clone(), a.query_start, a.query_end, format!( "{}:{}-{}", a.target_name.clone().unwrap(), a.target_start, a.target_end ), ), ( self.id.clone(), b.query_start, b.query_end, format!( "{}:{}-{}", b.target_name.clone().unwrap(), b.target_start, b.target_end ), ), ]; write_bed(&bed_path, &d)?; } ContigRef::ChimericTriple((a, b, c)) => { let d: Vec<(String, i32, i32, String)> = [a, b, c] .iter() .map(|r| { ( self.id.clone(), r.query_start, r.query_end, format!( "{}:{}-{}", r.target_name.clone().unwrap(), r.target_start, r.target_end ), ) }) .collect(); write_bed(&bed_path, &d)?; } _ => (), } Ok(()) } // bug cigar len != seq len // pub fn write_bam(&self, path: &str) -> Result<()> { // let aligner = Aligner::builder() // .asm5() // .with_threads(8) // .with_cigar() // .with_seq(self.sequence.as_bytes()) // .expect("Unable to build index"); // // let mut mappings = Vec::new(); // let supporting_records = self // .supporting_records // .clone() // .ok_or(anyhow!("no supporting records"))?; // for record in supporting_records.iter() { // let seq = record.seq().as_bytes(); // let alignment = aligner // .map(&seq, false, false, None, None) // .expect("Unable to align"); // mappings.push(alignment); // } // let mut mappings: Vec<_> = mappings.into_iter().flatten().collect(); // mappings.sort_by(|a, b| a.target_start.cmp(&b.target_start)); // // let mut header = bam::Header::new(); // let mut sq_record = rust_htslib::bam::header::HeaderRecord::new(b"SQ"); // sq_record.push_tag(b"SN", self.id.clone()); // sq_record.push_tag(b"LN", self.sequence.len()); // header.push_record(&sq_record); // // let mut out = bam::Writer::from_path(path, &header, bam::Format::Bam).unwrap(); // // // copy reverse reads to new BAM file // for mapping in mappings.iter() { // let record = minimap2::htslib::mapping_to_record( // Some(mapping), // self.sequence.as_bytes(), // header.clone(), // None, // Some(Uuid::new_v4().as_bytes()), // ); // let _ = out.write(&record); // } // rust_htslib::bam::index::build(path, None, rust_htslib::bam::index::Type::Bai, 1)?; // Ok(()) // } pub fn hgvs(&self) -> Option { self.contig_ref.hgvs() } pub fn desc(&self) -> Option { self.contig_ref.desc() } pub fn breakpoints_repr(&self) -> Option> { self.contig_ref.breakpoints_repr() } pub fn write_fasta(&self, fasta_path: &str) { write_fasta(fasta_path, &vec![(self.id.clone(), self.sequence.clone())]); } } fn group_mappings(mappings: &mut [Mapping]) -> Result>> { // sort alignments by query_start mappings.sort_by(|a, b| a.query_start.cmp(&b.query_start)); let mut alignments: Vec> = vec![]; // group by overlapps > 30 for aln in mappings.iter() { let last = alignments.last_mut(); if let Some(l) = last { if l.iter() .filter(|a| a.query_end - aln.query_start > 30) .count() > 0 { l.push(aln.clone()); } else { alignments.push(vec![aln.clone()]); } } else { alignments.push(vec![aln.clone()]); } } Ok(alignments) } pub fn write_bed(path: &str, d: &[(String, i32, i32, String)]) -> Result<()> { let file = File::create(path).unwrap(); let mut writer = BufWriter::new(file); for (chr, start, end, value) in d.iter() { let row = format!( "{}\n", [ chr.to_string(), start.to_string(), end.to_string(), value.to_string() ] .join("\t") ); writer.write_all(row.as_bytes())?; } Ok(()) } // unique pub fn write_fastq(fastq_path: &str, d: &Vec) -> Result<()> { let file = File::create(fastq_path)?; let mut writer = BufWriter::new(file); for record in d { let name = String::from_utf8(record.qname().to_vec()).unwrap(); writer.write_all(format!("@{name}\n").as_bytes())?; let seq = record.seq().as_bytes(); writer.write_all(&seq)?; writer.write_all(b"\n+\n")?; let qual = record.qual(); writer.write_all(qual)?; } Ok(()) } pub fn write_fasta(fasta_path: &str, d: &Vec<(String, String)>) { let file = File::create(fasta_path).unwrap(); let mut writer = fasta::writer::Builder::default().build_with_writer(file); let mut passed = Vec::new(); for (name, sequence) in d { let name = name.to_string(); if sequence.is_empty() { continue; } if passed.contains(&name) { continue; } passed.push(name.clone()); let record = fasta::Record::new( fasta::record::Definition::new(name.as_str(), None), fasta::record::Sequence::from(sequence.as_bytes().to_vec()), ); writer.write_record(&record).unwrap(); } } pub fn write_fai(path: &str) { let mut faidx = Command::new("samtools") .arg("faidx") .arg(path) .spawn() .expect("Samtools faidx failed to start"); faidx.wait().unwrap(); } pub fn write_bam(ref_path: &str, reads_path: &str, bam_path: &str) -> Result<()> { let rg_id = uuid::Uuid::new_v4(); let mm2 = Command::new("minimap2") .arg("-t") .arg("128") .arg("-ax") .arg("map-ont") .arg("-R") .arg(format!( "@RG\\tPL:ONTASM_PROM\\tID:ONTASM_{rg_id}\\tSM:{rg_id}\\tLB:ONTASM_NB_PROM" )) .arg(ref_path) .arg(reads_path) .stdout(Stdio::piped()) .stderr(Stdio::piped()) .spawn() .expect("Minimap2 failed to start"); let view = Command::new("sambamba") .arg("view") .arg("-h") .arg("-S") .arg("-t") .arg("20") .arg("--format=bam") .arg("/dev/stdin") .stdin(Stdio::from(mm2.stdout.unwrap())) .stdout(Stdio::piped()) .stderr(Stdio::piped()) .spawn() .expect("Sambamba view failed to start"); let mut sort = Command::new("sambamba") .arg("sort") .arg("-t") .arg("20") .arg("/dev/stdin") .arg("-o") .arg(bam_path) .stderr(Stdio::piped()) .stdin(Stdio::from(view.stdout.unwrap())) .spawn() .expect("Sambamba sort failed to start"); sort.wait().unwrap(); Ok(()) } pub fn read_fasta(path: &str) -> Result> { let mut reader = File::open(path) .map(BufReader::new) .map(fasta::Reader::new)?; let mut res = Vec::new(); for result in reader.records() { let record = result?; let u = String::from_utf8(record.name().to_vec())?; let s = record.sequence().to_owned(); res.push((u, s)); } Ok(res) } // fn get_ext_paths(dir: &str, ext: &str) -> Result> { // let paths = std::fs::read_dir(dir)? // // Filter out all those directory entries which couldn't be read // .filter_map(|res| res.ok()) // // Map the directory entries to paths // .map(|dir_entry| dir_entry.path()) // // Filter out all paths with extensions other than `csv` // .filter_map(|path| { // if path.extension().map_or(false, |xt| xt == ext) { // Some(path) // } else { // None // } // }) // .collect::>(); // Ok(paths) // } fn get_contigs_fa_paths(dir: &str) -> Result> { let pattern = format!("{}/**/*_flye.fa", dir); let fa_paths: Vec = glob::glob(&pattern) .expect("Failed to read glob pattern") .filter_map(Result::ok) .collect(); Ok(fa_paths) } pub fn dot_graph( graph: &StableGraph, way: &[NodeIndex], value: &str, color: &str, erase: bool, ) -> String { let mut g = graph.clone(); // erase labels if erase { g.edge_weights_mut().for_each(|e| *e = "".to_string()); } let mut labels = Vec::new(); for window in way.windows(2) { let edge_id = g.find_edge(window[0], window[1]).unwrap(); let edge = g.edge_weight_mut(edge_id).unwrap(); let v = if !edge.is_empty() { let mut v: Vec<&str> = edge.split(",").filter(|e| !e.is_empty()).collect(); v.push(value); v.join(", ") } else { value.to_string() }; labels.push(v.clone()); *edge = v; } if erase { g.retain_edges(|g, i| g.edge_weight(i).unwrap().to_string() != "".to_string()); g.retain_nodes(|g, n| g.neighbors_undirected(n).count() > 0); } let mut dot = Dot::new(&g).to_string().replace("\\\"", ""); labels.sort_by_key(|b| std::cmp::Reverse(b.len())); // labels.sort_by(|a, b| b.len().cmp(&a.len())); for label in labels { let label_str = format!("label = \"{label}\""); dot = dot.replace( &format!("{label_str} ]\n"), &format!("{label_str}, color = \"{color}\" ]\n"), ); } dot } /// for non overlapping ways pub fn dot_graph_biall( graph: &StableGraph, ways: &[Vec], values: Vec<&str>, colors: Vec<&str>, erase: bool, ) -> String { let mut g = graph.clone(); // erase labels if erase { g.edge_weights_mut().for_each(|e| *e = "".to_string()); } let mut labels = Vec::new(); for ((way, value), color) in ways.iter().zip(values.into_iter()).zip(colors.into_iter()) { for window in way.windows(2) { let edge_id = g.find_edge(window[0], window[1]).unwrap(); let edge = g.edge_weight_mut(edge_id).unwrap(); let v = if !edge.is_empty() { let mut v: Vec<&str> = edge.split(",").filter(|e| !e.is_empty()).collect(); v.push(value); v.join(", ") } else { value.to_string() }; labels.push((v.clone(), color)); *edge = v; } } // g.retain_edges(|g, i| g.edge_weight(i).unwrap().to_string() != "".to_string()); g.retain_edges(|g, i| !g.edge_weight(i).unwrap().is_empty()); g.retain_nodes(|g, n| g.neighbors_undirected(n).count() > 0); let mut dot = Dot::new(&g).to_string().replace("\\\"", ""); labels.sort_by(|a, b| b.0.len().cmp(&a.0.len())); for (label, color) in labels { let label_str = format!("label = \"{label}\""); dot = dot.replace( &format!("{label_str} ]\n"), &format!("{label_str}, color = \"{color}\" ]\n"), ); } dot } #[cfg(test)] mod tests { use env_logger::Env; use super::*; use crate::{ genomic_graph::GenomicGraph, // phase::{variants_phasing, write_phases_bed}, }; fn init() { let _ = env_logger::Builder::from_env(Env::default().default_filter_or("info")) .is_test(true) .try_init(); } #[test] fn it_works() -> Result<()> { let _ = env_logger::builder().is_test(true).try_init(); let contig_fa = "./data_test/contig_2.fa"; let aligner_url = "http://localhost:4444/align"; let mut genome = Genome::new(); let aligner = aligner_client::dist_align(aligner_url.to_string()); let sequences = read_fasta(contig_fa)?; for (name, seq) in sequences { genome.add_contig_from_seq(name.clone(), &seq.as_ref().to_vec(), &aligner)?; let mut seqc: Vec = seq.complement().map(|e| e.unwrap()).collect(); seqc.reverse(); genome.add_contig_from_seq(format!("{name}_rev"), &seqc, &aligner)?; println!("Sending"); } genome.iter().for_each(|(_, c)| { c.iter().for_each(|cont| { println!("{}", cont.contig_ref.desc().unwrap()); }); }); Ok(()) } #[test] fn test_graph() -> Result<()> { init(); let case = "SALICETTO"; let chrom = vec!["chr10"]; info!("This record will be captured by `cargo test`"); let dir = format!("/data/longreads_basic_pipe/{case}/diag/asm_contis"); // Load from fasta in dir. let genome = Genome::from_contigs_sequences(&dir)?; genome.stats(); let mut genomic_graph = GenomicGraph::from_genome(&genome); let sens = vec![true, false]; let pos = vec![0, i32::MAX]; let mut all_ways = Vec::new(); if chrom.len() > 1 { (0..4).into_iter().for_each(|i| { let start_pos = if i < 2 { 0 } else { i32::MAX }; let end_pos = pos[i % 2]; (0..4).into_iter().for_each(|i| { let start_sens = if i < 2 { true } else { false }; let end_sens = sens[i % 2]; (0..4).into_iter().for_each(|i| { let start_chr = if i < 2 { chrom[0] } else { chrom[1] }; let end_chr = chrom[i % 2]; let start = (start_sens, start_chr, start_pos); let end = (end_sens, end_chr, end_pos); let (oriented_graph, _integrated_graph, ways) = genomic_graph.ways(start, end); let dot = oriented_graph.dot_graph(); println!("dot\n{dot}"); for (_i, way) in ways.iter().enumerate() { let s = way .iter() .map(|(_, _, _, s)| s.to_string()) .collect::>() .join(""); all_ways.push(s); } }); }); }); } else { let start_chr = chrom[0]; let end_chr = chrom[0]; let start = (true, start_chr, 0); let end = (true, end_chr, i32::MAX); let (oriented_graph, _integrated_graph, ways) = genomic_graph.ways(start, end); let dot = oriented_graph.dot_graph(); println!("dot\n{dot}"); for (_i, way) in ways.iter().enumerate() { let s = way .iter() .map(|(_, _, _, s)| s.to_string()) .collect::>() .join(""); all_ways.push(s); } } all_ways.dedup(); all_ways .iter() .enumerate() .for_each(|(i, s)| println!("{i}.\t{s}")); // let s = Dot::new(&integrated_graph).to_string().replace("\\\"", ""); // let x11_colors: Vec = vec![ // String::from("Red"), // String::from("Green"), // String::from("Blue"), // String::from("Cyan"), // String::from("Magenta"), // String::from("Yellow"), // String::from("DarkRed"), // String::from("DarkGreen"), // String::from("DarkBlue"), // String::from("DarkCyan"), // String::from("DarkMagenta"), // String::from("DarkYellow"), // String::from("LightRed"), // String::from("LightGreen"), // String::from("LightBlue"), // String::from("LightCyan"), // String::from("LightMagenta"), // String::from("LightYellow"), // String::from("Orange"), // String::from("Brown"), // String::from("Beige"), // ]; // let mut s = s.clone(); // ways.iter().enumerate().for_each(|(i, _)| { // s = s.replace( // &format!("[ label = \"{}\" ]", i + 1), // &format!( // "[ label = \"{}\" color = \"{}\" ]", // i + 1, // x11_colors[i].to_string() // ), // ); // }); // println!("{s}"); // // for (i, way) in ways.iter().enumerate() { // let s = way // .iter() // .map(|(_, _, _, s)| s.to_string()) // .collect::>() // .join(""); // println!("{}.\t{s}", i + 1); // } Ok(()) } #[test] fn dir() { init(); let id = "ROBIN"; info!("This record will be captured by `cargo test`"); let dir = format!("/data/longreads_basic_pipe/{id}/diag/scan/reads",); // Load from fasta in dir. let genome = Genome::from_contigs_sequences(&dir).unwrap(); genome.stats(); let mut res: Vec = genome .iter() .flat_map(|(_s, chrom)| { chrom.iter().filter_map(|c| c.hgvs()) // .map(|c| println!("{c}")) }) .collect(); res.sort(); res.dedup(); res.iter().for_each(|s| println!("{s}")); // println!("{genome:#?}"); } }