use std::{ops::Range, collections::{HashMap, HashSet}}; use petgraph::{graph::{Graph, NodeIndex, EdgeIndex}, Undirected}; use kseq::parse_path; use faimm::IndexedFasta; use duct::cmd; pub struct SequencesGraph { sequences: HashMap, seq_hash : HashSet, names : HashMap, overlaps : HashMap, offsets : HashMap, // node_index, stretch_id, offset graph : Graph, min_overlapping: usize, max_consecutive: usize, max_diffs : usize, } #[derive(Debug)] pub struct Overlap { id_a : NodeIndex, range_a: Range, id_b : NodeIndex, range_b: Range, } impl SequencesGraph { pub fn new (min_overlapping: usize, max_consecutive: usize, max_diffs: usize) -> SequencesGraph { SequencesGraph { sequences: HashMap::new(), seq_hash : HashSet::new(), names : HashMap::new(), overlaps : HashMap::new(), offsets : HashMap::new(), graph : Graph::new_undirected(), min_overlapping, max_consecutive, max_diffs } } pub fn from_fq_pe (&mut self, r1_path: &str, r2_path: &str, min_overlapping_len_pe: usize) { let mut r1p = parse_path(r1_path).unwrap(); let mut r2p = parse_path(r2_path).unwrap(); let mut n_pairs = 0; while let (Some(r1), Some(r2)) = (r1p.iter_record().unwrap(), r2p.iter_record().unwrap()) { let a = r1.seq().as_bytes().to_vec(); let b = r2.seq().as_bytes().to_vec(); self.add_from_pe(a, b, r1.head().to_string(), r2.head().to_string(), min_overlapping_len_pe); n_pairs += 1; } println!("{n_pairs}, pairs parsed."); self.draw_edges(); println!("{}, overlaps fund.", self.overlaps.len()); self.remove_single_nodes(); println!("{}, nodes to assemble", self.names.len()); self.find_leftmost_node(); } pub fn add_node (&mut self, sequence: String, name: String) { let id = self.graph.add_node(sequence.clone()); self.sequences.insert(id, sequence); self.names.insert(id, name); } pub fn add_edge (&mut self, id_a: &NodeIndex, id_b: &NodeIndex) { let a = self.sequences.get(id_a).unwrap().as_bytes().to_vec(); let b = self.sequences.get(id_b).unwrap().as_bytes().to_vec(); if let Some(r) = get_overlaps(&a, &b, self.min_overlapping, self.max_consecutive, self.max_diffs) { // normaly always like that self.overlaps.insert( self.graph.add_edge(id_a.clone(), id_b.clone(), r.0.len()), Overlap { id_a: *id_a, range_a: r.0, id_b: *id_b, range_b: r.1} ); } else if let Some(r) = get_overlaps(&b, &a, self.min_overlapping, self.max_consecutive, self.max_diffs) { self.overlaps.insert( self.graph.add_edge(id_a.clone(), id_b.clone(), r.0.len()), Overlap { id_a: *id_a, range_a: r.1, id_b: *id_b, range_b: r.0} ); } } pub fn add_from_pe (&mut self, a: Vec, b: Vec, a_name: String, b_name: String, min_overlapping_len_pe: usize) { let a_rc = revcomp(std::str::from_utf8(&a).unwrap()).as_bytes().to_vec(); let b_rc = revcomp(std::str::from_utf8(&b).unwrap()).as_bytes().to_vec(); let max_consecutive = self.max_consecutive; let max_diffs = self.max_diffs; let mut seq: Option> = None; if let Some(r) = get_overlaps(&a_rc, &b, min_overlapping_len_pe, max_consecutive, max_diffs) { // normaly always like that seq = Some(fuse_overlaps(&a_rc, &b, &r.0, &r.1)); } else if let Some(r) = get_overlaps(&b, &a, min_overlapping_len_pe, max_consecutive, max_diffs) { seq = Some(fuse_overlaps(&b, &a, &r.0, &r.1)); } else if let Some(r) = get_overlaps(&a, &b, min_overlapping_len_pe, max_consecutive, max_diffs) { seq = Some(fuse_overlaps(&b, &a, &r.0, &r.1)); } else if let Some(r) = get_overlaps(&b, &a_rc, min_overlapping_len_pe, max_consecutive, max_diffs) { seq = Some(fuse_overlaps(&b, &a_rc, &r.0, &r.1)); } else if let Some(r) = get_overlaps(&a, &b_rc, min_overlapping_len_pe, max_consecutive, max_diffs) { seq = Some(fuse_overlaps(&a, &b_rc, &r.0, &r.1)); } else if let Some(r) = get_overlaps(&a_rc, &b_rc, min_overlapping_len_pe, max_consecutive, max_diffs) { seq = Some(fuse_overlaps(&a_rc, &b_rc, &r.0, &r.1)); } else if let Some(r) = get_overlaps(&b_rc, &a_rc, min_overlapping_len_pe, max_consecutive, max_diffs) { seq = Some(fuse_overlaps(&b_rc, &a_rc, &r.0, &r.1)); } if let Some(seq) = seq { let seq = String::from_utf8_lossy(&seq).to_string(); if !self.seq_hash.contains(&seq) { self.add_node(seq.clone(), a_name); self.seq_hash.insert(seq); } } else { let seq_r1 = String::from_utf8_lossy(&a).to_string(); if !self.seq_hash.contains(&seq_r1) { self.add_node(seq_r1.clone(), format!("{a_name}_R1")); self.seq_hash.insert(seq_r1); } let seq_r2 = String::from_utf8_lossy(&b_rc).to_string(); if !self.seq_hash.contains(&seq_r2) { self.add_node(seq_r2.clone(), format!("{b_name}_R2")); self.seq_hash.insert(seq_r2); } } } pub fn draw_edges (&mut self) { let idx: Vec = self.graph.node_indices().collect(); let n_nodes = idx.len(); for x in 0..n_nodes { for y in 0..n_nodes { if y < x { self.add_edge(&idx[x], &idx[y]); } } } } pub fn remove_single_nodes (&mut self) { let mut res: Vec<(NodeIndex, usize)> = Vec::new(); self.names.iter().for_each(|(node_index, _)|{ let mut neighbors = self.graph.neighbors(*node_index).detach(); let mut node_neighbors: Vec = Vec::new(); while let Some(neighbor_id) = neighbors.next_node(&self.graph) { node_neighbors.push(neighbor_id); } res.push((*node_index, node_neighbors.len())); }); res.iter().for_each(|(node_id, n_neighbors)|{ if *n_neighbors == 0 { self.graph.remove_node(*node_id); println!("Removing {} which doesnt have any overlapping reads.", self.names.get(&node_id).unwrap()); self.names.remove(node_id); self.sequences.remove(node_id); } }); } pub fn find_leftmost_node (&mut self) { let mut stretch_id = 0; self.names.iter().for_each(|(node_index, _)|{ let mut neighbors = self.graph.neighbors(node_index.clone()).detach(); let mut left = false; let mut n_neigh = 0; while let Some(edge_index) = neighbors.next_edge(&self.graph) { n_neigh += 1; let ov = self.overlaps.get(&edge_index).unwrap(); if ov.id_a == node_index.clone() && ov.range_a.start == 0 { left = true; break; } if ov.id_b == node_index.clone() && ov.range_b.start == 0 { left = true; break; } } if !left && n_neigh > 0 { let node_key = format!("{}_{stretch_id}", node_index.index()); self.offsets.insert(node_key.clone(), (node_index.clone(), stretch_id, 0)); stretch_id += 1; } }); let mut visited: HashSet = HashSet::new(); loop { let mut new_visit = 0; let mut to_insert: Vec<(String, NodeIndex, usize, i32)> = Vec::new(); for (node_key, (node_index, stretch_id, offset)) in self.offsets.iter() { if visited.contains(node_key) { continue; } new_visit += 1; let mut neighbors = self.graph.neighbors(*node_index).detach(); while let Some(edge_index) = neighbors.next_edge(&self.graph) { let overlaps = self.overlaps.get(&edge_index).unwrap(); let neighbor_id = if overlaps.id_a == *node_index { overlaps.id_b } else { overlaps.id_a }; let neighbor_key = format!("{}_{stretch_id}", neighbor_id.index()); if !self.offsets.contains_key(&neighbor_key) { if overlaps.id_a == *node_index { let new_offset = *offset + overlaps.range_a.start as i32 - overlaps.range_b.start as i32; to_insert.push((neighbor_key, neighbor_id, *stretch_id, new_offset)); } else { let new_offset = *offset + overlaps.range_b.start as i32 - overlaps.range_a.start as i32; to_insert.push((neighbor_key, neighbor_id, *stretch_id, new_offset)); } } } visited.insert(node_key.clone()); } to_insert.iter().for_each(|(key, node_index, stretch_id, offset)| { self.offsets.insert(key.clone(), (*node_index, *stretch_id, *offset)); }); if new_visit == 0 { break; } } } pub fn process (&mut self) { self.draw_edges(); self.remove_single_nodes(); self.find_leftmost_node(); } pub fn get_new_stretch_id (&self) -> usize { let mut v = self.offsets.iter() .map(|(_, (_, id, _))| *id).collect::>(); v.dedup(); v.sort_by(|a,b| b.cmp(a)); if v.len() > 0 { v[0] + 1 } else { 0 } } pub fn get_stretch_align (&self, stretch_id: usize) -> Vec<(NodeIndex, i32)> { let mut align: Vec<(NodeIndex, i32)> = Vec::new(); self.offsets.iter().for_each(|(_, (node_index, node_stretch_id, offset))| { if *node_stretch_id == stretch_id { align.push((*node_index, *offset)); } }); align.sort_by(|a,b| a.1.cmp(&b.1)); align } pub fn print_stretch (&self, stretch_id: usize) { let v = self.get_stretch_align(stretch_id); println!(">sequence_{}", stretch_id); if v.len() > 0 { let decal = -v[0].1; for (node_index, offset) in v { let seq = self.sequences.get(&node_index).unwrap(); println!("{}{} - {:?}", " ".repeat((offset + decal) as usize).to_string(), seq, node_index); } } let cons = self.get_stretch_consensus(stretch_id); println!("{}", String::from_utf8_lossy(&cons) ); } // Stack reads respecting their offsets // Return the consensus sequence pub fn get_stretch_consensus (&self, stretch_id: usize) -> Vec { let v = self.get_stretch_align(stretch_id); let mut reads_stack: Vec> = Vec::with_capacity(v.len()); let mut consensus_sequence: Vec = Vec::new(); if v.len() > 0 { // first read position will be used as reference let first_gap = -v[0].1; let mut max_len = 0; // add each read to the stack with n offset empty space for (node_index, offset) in v { let seq = self.sequences.get(&node_index).unwrap(); let seq = format!("{}{}", " ".repeat((offset + first_gap) as usize).to_string(), seq).as_bytes().to_vec(); // keep track of the size of reads stack if max_len < seq.len() { max_len = seq.len(); } reads_stack.push(seq); } // for each column of the reads stack for i in 0..max_len { let mut acc: Vec = Vec::new(); let atcg = vec![b'A', b'T', b'C', b'G']; let mut n_atcg = [0; 4]; reads_stack.iter().for_each(|s|{ if s.len() > i { acc.push(s[i].clone()); for nb in 0..4 { if s[i] == atcg[nb] { n_atcg[nb] += 1; } } } }); let n_base: usize = n_atcg.iter().sum(); let max_base = n_atcg.iter().max().unwrap(); let mut base = b'N'; if *max_base as f32 / n_base as f32 > 0.5 { n_atcg.iter().enumerate().for_each(|(pos, n)| if n == max_base { base = atcg[pos] }); } consensus_sequence.push(base); } } // println!("RES {:?}", String::from_utf8_lossy(&consensus_sequence)); // println!("last = {:?}", consensus_sequence.last().unwrap().to_owned()); // println!("last = {:?}", consensus_sequence.last().unwrap().to_owned() == b'N'); consensus_sequence } pub fn get_fasta (&self) -> String { let mut fasta = String::new(); for i in 0..self.get_new_stretch_id() { let cons = self.get_stretch_consensus(i); fasta.push_str(format!(">sequence_{}_length_{}\n", i + 1, cons.len()).as_str()); let mut acc: Vec = Vec::new(); cons.into_iter().for_each(|c| { acc.push(c); if acc.len() == 60 { fasta.push_str(format!("{}\n", String::from_utf8_lossy(&acc)).as_str()); acc = Vec::new(); } }); if acc.len() > 0 { fasta.push_str(format!("{}\n", String::from_utf8_lossy(&acc)).as_str()); } } fasta } pub fn make_neo_contig (&self, stretch_id: usize) -> NeoContig { let cons = String::from_utf8_lossy(&self.get_stretch_consensus(stretch_id)).to_string().trim().to_string(); let stretch = self.get_stretch_align(stretch_id); let sequences: HashMap = stretch.iter().map(|(id,_)|{ let (id, s) = self.sequences.get_key_value(id).unwrap(); (*id, s.clone()) }).collect(); let sequences_names: HashMap = stretch.iter().map(|(id,_)|{ let (id, s) = self.names.get_key_value(id).unwrap(); (*id, s.clone()) }).collect(); NeoContig { sequence_id: stretch_id, name: format!("sequence_{stretch_id}"), contig: cons.clone(), alignments: Vec::new(), sequences, sequences_names, stretch, } } } #[derive(Debug, Clone)] pub struct Sam { pub query_name : String, pub query_range: Range, pub ref_name : String, pub ref_pos : i64, pub ref_range : Range, pub sequence : String, pub ref_sequence: String, pub ref_cigar : String, } impl Sam { pub fn new(query_name: String, flag: i32, ref_name: String, ref_pos: i64, cigar: String, sequence: String, fa: &Fasta) -> Self { let mut sequence = sequence; let (mut ref_range, query_range, ref_cigar) = matched_range(&cigar, &flag, &mut sequence); // sam is 1-based // the range frames the sequence // the last position is excluded in rust ranges // [ref_range.start, ref_range.end[ // in bam the positions are inclusives [start, end] if is_reverse(flag) { let len = ref_range.len(); ref_range.end = (ref_pos as usize) - 1; ref_range.start = ref_pos as usize + len - 1; } else { let len = ref_range.len(); ref_range.start = ref_pos as usize; ref_range.end = (ref_pos as usize + len) + 1; } let ref_sequence = fa.get_sequence(&ref_name, ref_range.start.clone(), ref_range.end.clone()); Sam {query_name, query_range, ref_name, ref_pos, ref_range, sequence, ref_sequence, ref_cigar} } } #[derive(Debug, Clone)] pub struct NeoContig { pub sequence_id: usize, pub name: String, pub contig : String, pub alignments : Vec, pub sequences: HashMap, pub sequences_names: HashMap, pub stretch : Vec<(NodeIndex, i32)>, } impl NeoContig { pub fn bwa_aln (&mut self, fasta_ref_path: &str, fa: &Fasta) { let sam_keys = vec!["QNAME", "FLAG", "RNAME", "POS", "MAPQ", "CIGAR", "RNEXT", "PNEXT", "TLEN", "SEQ", "QUAL", "ALN"]; println!("BWA exec"); println!(" {}", format!("\">sequence_{}\n{}\"", self.name.to_string(), self.contig)); let stdout = cmd!("echo", "-e", format!("\">sequence_{}\n{}\"", self.name.to_string(), self.contig)) .pipe(cmd!("bwa", "mem", fasta_ref_path, "-")) .stdout_capture() .stderr_capture() .read().unwrap(); let mut alignments: Vec = Vec::new(); stdout.split("\n").for_each(|line|{ // println!("{:?}", line); if line.len() > 8 { if &line[0..9] == "sequence_" { let mut qname: Option = None; let mut flag: Option = None; let mut rname: Option = None; let mut pos: Option = None; let mut cigar: Option = None; let mut sequence: Option = None; for (i, value) in line.split("\t").enumerate() { if sam_keys.len() <= i { break; } match sam_keys[i] { "QNAME" => qname = Some(value.to_string()), "FLAG" => flag = value.parse::().ok(), "RNAME" => rname = Some(value.to_string()), "POS" => pos = value.parse::().ok(), "CIGAR" => cigar = Some(value.to_string()), "SEQ" => sequence = Some(value.to_string()), _ => () } } if let (Some(qname), Some(flag), Some(rname), Some(pos), Some(cigar), Some(sequence)) = (qname, flag, rname, pos, cigar, sequence) { alignments.push(Sam::new(qname, flag, rname, pos, cigar, self.contig.clone(), &fa)); } } } }); println!("n bwa {:?}", alignments.len()); println!("Alignments {:?}", alignments); self.alignments = alignments; } pub fn make_aln (&self, print_sequences: Option) -> Vec> { println!("== {} ============", self.name); let contig_len = self.contig.len(); let mut lines: Vec> = Vec::new(); // ref 1 if self.alignments.len() > 0 { let ref_spaces = " ".repeat(self.alignments[0].query_range.start).as_bytes().to_vec(); // ref sequence let mut l = ref_spaces.clone(); let ref_cigar = self.alignments[0].ref_cigar.as_bytes(); let mut filtered_ref: Vec = self.alignments[0].ref_sequence .as_bytes() .iter().enumerate() .filter(|(i, _)| ref_cigar[*i] == b'M') .map(|(_, c)| *c) .collect(); l.append(&mut filtered_ref); lines.push(l); // match pipes let mut match_pipes = self.alignments[0].ref_cigar .as_bytes().iter() .filter(|c| **c == b'M') .map(|_| b'|') .collect::>(); let mut l = ref_spaces.clone(); l.append(&mut match_pipes); let mut ll = format!("{} - {}:{}-{}{}", " ".repeat(contig_len - l.len()).to_string(), self.alignments[0].ref_name, self.alignments[0].ref_range.start, self.alignments[0].ref_range.end, if self.alignments[0].ref_range.start > self.alignments[0].ref_range.end { " (rc)" } else { "" } ).as_bytes().to_vec(); l.append(&mut ll); lines.push(l); } // Contig let contig = format!("{} - denovo assembled contig (cov = {})", self.contig, self.sequences.len() ).as_bytes().to_vec(); lines.push(contig.clone()); // Sequences if let Some(show_seq) = print_sequences { if show_seq && self.stretch.len() > 0 { let decal = -self.stretch[0].1; // first have to be leftmost for (node_index, offset) in self.stretch.iter() { let seq = self.sequences.get(&node_index).unwrap(); let name = self.sequences_names.get(&node_index).unwrap(); let mut l = format!("{}{}", " ".repeat((offset + decal) as usize).to_string(), seq).as_bytes().to_vec(); let mut ll = format!("{} - {}", " ".repeat(contig_len - l.len()).to_string(), name).as_bytes().to_vec(); l.append(&mut ll); lines.push(l); } } // Contig lines.push(contig); } // other ref if self.alignments.len() > 1 { let ref_spaces = " ".repeat(self.alignments[1].query_range.start).as_bytes().to_vec(); // match pipes let mut match_pipes = self.alignments[1].ref_cigar.as_bytes().iter().filter(|c| **c == b'M').map(|_| b'|') .collect::>(); let mut l = ref_spaces.clone(); l.append(&mut match_pipes); let mut ll = format!("{} - {}:{}-{}{}", " ".repeat(contig_len - l.len()).to_string(), self.alignments[1].ref_name, self.alignments[1].ref_range.start, self.alignments[1].ref_range.end, if self.alignments[1].ref_range.start > self.alignments[1].ref_range.end { " (rc)" } else { "" } ).as_bytes().to_vec(); l.append(&mut ll); lines.push(l); // ref sequence let mut l = ref_spaces.clone(); let ref_cigar = self.alignments[1].ref_cigar.as_bytes(); let mut filtered_ref: Vec = self.alignments[1].ref_sequence .as_bytes() .iter().enumerate() .filter(|(i, _)| ref_cigar[*i] == b'M') .map(|(_, c)| *c) .collect(); l.append(&mut filtered_ref); lines.push(l); } lines } pub fn print_stretch(&self) { for line in self.make_aln(Some(false)) { println!("{}", String::from_utf8_lossy(&line)); } } } pub struct Fasta { fa: IndexedFasta } impl Fasta { pub fn new (path: &str) -> Self { Fasta { fa: IndexedFasta::from_file(path).expect("Error opening fa") } } fn get_sequence (&self, contig: &str, start: usize, end: usize) -> String { // end is included let start = start - 1; let end = end - 1; let chr_index = self.fa.fai().tid(contig).expect("Cannot find chr in index"); if (end as i64 - start as i64) < (0 as i64) { let fv = self.fa.view(chr_index, end, start).expect("Cannot get .fa view"); revcomp(&fv.to_string().to_uppercase()) } else { let fv = self.fa.view(chr_index, start, end).expect("Cannot get .fa view"); fv.to_string().to_uppercase() } } } fn is_reverse (flag: i32) -> bool { flag & 0x10 == 0x10 } fn matched_range (cigar: &str, flag: &i32, matched_seq: &mut str) -> (Range, Range, String) { let mut n_head_to_rm = 0; for c in matched_seq.as_bytes().to_vec().iter() { if *c == b'N' { n_head_to_rm += 1 } else { break; } } let mut n_trail_to_rm = 0; for c in matched_seq.as_bytes().to_vec().iter().rev() { if *c == b'N' { n_trail_to_rm += 1 } else { break; } } let all_op = vec!["M", "I", "D", "N", "S", "=", "X", "H", "P"]; let consumes_query = vec!["M", "I", "S", "=", "X", "H"]; let consumes_ref = vec!["M", "D", "N", "=", "X"]; let mut range_query:Range = Range::default(); let mut range_ref:Range = Range::default(); let mut num_acc = String::new(); let mut query_pos = 0; let mut ref_pos = 0; let mut ref_cigar_string = "".to_string(); let mut first_m = true; let n_op = cigar.split("").filter(|c| all_op.contains(c)).count(); let mut curr_op = 1; for f in cigar.split("") { match f.parse::() { Ok(_) => num_acc.push_str(f), // if a number added to the accumulator Err(_) => { // if a letter if f != "" { //parse the accumulator let mut current_add = num_acc.parse::().unwrap(); num_acc = "".to_string(); if n_head_to_rm > 0 && curr_op == 1 { current_add = current_add - n_head_to_rm; } if n_trail_to_rm > 0 && curr_op == n_op { current_add = current_add - n_trail_to_rm; } curr_op += 1; if f == "M" { if first_m { range_query.start = query_pos; range_query.end = query_pos + current_add; range_ref.start = ref_pos; range_ref.end = ref_pos + current_add; first_m = false; } else { range_query.end = query_pos + current_add; range_ref.end = ref_pos + current_add; } } if consumes_query.contains(&f) { query_pos += current_add; } if consumes_ref.contains(&f) { ref_pos += current_add; let toadd = f.repeat(current_add); ref_cigar_string.push_str(&toadd); } } } } } if is_reverse(*flag) { let query_len = range_query.len(); range_query.start = query_pos - range_query.end; range_query.end = range_query.start + query_len - 1; } assert_eq!(range_ref.len(), ref_cigar_string.len()); (range_ref, range_query, ref_cigar_string) } fn revcomp(dna: &str) -> String{ let mut rdna: String = String::with_capacity(dna.len()); for c in dna.chars().rev() { rdna.push(switch_base(c)); } rdna } fn switch_base(c:char) -> char { match c { 'a' => 't', 'c' => 'g', 't' => 'a', 'g' => 'c', 'u' => 'a', 'A' => 'T', 'C' => 'G', 'T' => 'A', 'G' => 'C', 'U' => 'A', _ => 'N' } } fn get_overlaps (a: &Vec, b: &Vec, min_len: usize, max_consecutive: usize, max_diffs: usize) -> Option<(Range, Range)> { let (a_len, b_len) = (a.len(), b.len()); let (smallest_len, biggest_len) = if a_len <= b_len { (a_len, b_len) } else { (b_len, a_len) }; let mut res: Option<(Range, Range)> = None; for i in min_len..(smallest_len + (biggest_len - smallest_len)) { let range_a: Range; let range_b: Range; if i <= smallest_len { range_a = Range { start: 0, end: i }; range_b = Range { start: b_len - i, end: b_len }; } else { // check if inside if smallest_len == a_len { let after = i - smallest_len - 1; range_a = Range { start: 0, end: a_len }; range_b = Range { start: after, end: a_len + after}; } else { let after = i - smallest_len - 1; range_a = Range { start: after, end: b_len + after}; range_b = Range { start: 0, end: b_len}; } } if &a[range_a.clone()] == &b[range_b.clone()] { res = Some((range_a, range_b)); continue; } else if match_tol(&a[range_a.clone()], &b[range_b.clone()], max_consecutive, max_diffs) { res = Some((range_a, range_b)); continue; } else if res.is_some() { return res; } } res } fn fuse_overlaps (a: &Vec, b: &Vec, a_range: &Range, b_range: &Range) -> Vec { let mut res: Vec = Vec::new(); if a_range.start == 0 { res = [b.clone(), a[a_range.end..a.len()].to_vec() ].concat().to_vec(); } else if b_range.start == 0 { res = [a.clone(), b[b_range.end..b.len()].to_vec() ].concat().to_vec(); } res } // max one consecutive diff and 5 diffs fn match_tol (s1: &[u8], s2: &[u8], max_consecutive: usize, max_diffs: usize) -> bool { let max_consecutive= max_consecutive + 2; // let max_diffs: usize = 5; let mut diffs: Vec = Vec::new(); for (a, b) in std::iter::zip(s1, s2) { diffs.push((a == b) as usize); let sum: usize = diffs.iter().rev().take(max_consecutive).sum(); if sum == 0 && diffs.len() >= max_consecutive { return false } } let sum: usize = diffs.iter().rev().take(max_consecutive).sum(); if sum == max_consecutive { diffs.len() - diffs.iter().sum::() <= max_diffs } else { false } }