|
|
@@ -0,0 +1,1201 @@
|
|
|
+pub mod breakpoint;
|
|
|
+pub mod genomic_graph;
|
|
|
+// mod phase;
|
|
|
+
|
|
|
+use anyhow::{anyhow, Ok, Result};
|
|
|
+use fasta::record::Sequence;
|
|
|
+use log::info;
|
|
|
+use minimap2::{Aligner, Mapping};
|
|
|
+use noodles_fasta as fasta;
|
|
|
+use num_format::{CustomFormat, Grouping, ToFormattedString, WriteFormatted};
|
|
|
+use petgraph::{dot::Dot, prelude::*};
|
|
|
+use rust_htslib::bam::{self, Record};
|
|
|
+use std::{
|
|
|
+ collections::HashMap,
|
|
|
+ fmt,
|
|
|
+ fs::{self, File},
|
|
|
+ io::{BufReader, BufWriter, Write},
|
|
|
+ path::PathBuf,
|
|
|
+ process::{Command, Stdio},
|
|
|
+};
|
|
|
+use uuid::Uuid;
|
|
|
+
|
|
|
+#[derive(Debug, Clone)]
|
|
|
+pub struct Genome {
|
|
|
+ pub chromosomes: HashMap<String, Chromosome>,
|
|
|
+}
|
|
|
+
|
|
|
+#[derive(Debug, Clone)]
|
|
|
+pub struct Chromosome {
|
|
|
+ contigs: Vec<Contig>,
|
|
|
+}
|
|
|
+
|
|
|
+#[derive(Debug, Clone)]
|
|
|
+pub struct Contig {
|
|
|
+ pub id: String,
|
|
|
+ // contig seq on ref
|
|
|
+ pub mappings: Vec<Mapping>,
|
|
|
+ // reads on ref
|
|
|
+ pub supporting_records: Option<Vec<Record>>,
|
|
|
+ pub sequence: String,
|
|
|
+ pub contig_ref: ContigRef,
|
|
|
+}
|
|
|
+
|
|
|
+#[derive(Debug, Clone)]
|
|
|
+pub enum ContigRef {
|
|
|
+ Unique(Mapping),
|
|
|
+ Chimeric((Mapping, Mapping)),
|
|
|
+ ChimericTriple((Mapping, Mapping, Mapping)),
|
|
|
+ ChimericMultiple((Mapping, Vec<Mapping>, Mapping)),
|
|
|
+ LeftAmbiguity((Vec<Mapping>, Mapping)),
|
|
|
+ RightAmbiguity((Mapping, Vec<Mapping>)),
|
|
|
+ Ambigous(Vec<Mapping>),
|
|
|
+}
|
|
|
+
|
|
|
+impl fmt::Display for ContigRef {
|
|
|
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
|
|
+ let str = match self {
|
|
|
+ ContigRef::Unique(m) => mapping_to_string(m),
|
|
|
+ ContigRef::Chimeric((a, b)) => {
|
|
|
+ format!("{}<->{}", mapping_to_string(a), mapping_to_string(b))
|
|
|
+ }
|
|
|
+ ContigRef::ChimericMultiple((a, v, b)) => format!(
|
|
|
+ "{}<->{}<->{}",
|
|
|
+ mapping_to_string(a),
|
|
|
+ mappings_to_string(v),
|
|
|
+ mapping_to_string(b)
|
|
|
+ ),
|
|
|
+ ContigRef::LeftAmbiguity((v, b)) => {
|
|
|
+ format!("{}<->{}", mappings_to_string(v), mapping_to_string(b))
|
|
|
+ }
|
|
|
+ ContigRef::RightAmbiguity((a, v)) => {
|
|
|
+ format!("{}<->{}", mapping_to_string(a), mappings_to_string(v))
|
|
|
+ }
|
|
|
+ ContigRef::Ambigous(v) => format!("{}", mappings_to_string(v)),
|
|
|
+ ContigRef::ChimericTriple((a, b, c)) => format!(
|
|
|
+ "{}<->{}<->{}",
|
|
|
+ mapping_to_string(a),
|
|
|
+ mapping_to_string(b),
|
|
|
+ mapping_to_string(c)
|
|
|
+ ),
|
|
|
+ };
|
|
|
+ fmt.write_str(&str).unwrap();
|
|
|
+
|
|
|
+ std::result::Result::Ok(())
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl ContigRef {
|
|
|
+ pub fn breakpoints(&self) -> Option<Vec<Mapping>> {
|
|
|
+ match self {
|
|
|
+ ContigRef::Unique(_) => None,
|
|
|
+ ContigRef::Chimeric((a, b)) => Some(vec![a.clone(), b.clone()]),
|
|
|
+ ContigRef::ChimericTriple((a, b, c)) => Some(vec![a.clone(), b.clone(), c.clone()]),
|
|
|
+ ContigRef::ChimericMultiple(_) => None,
|
|
|
+ ContigRef::LeftAmbiguity(_) => None,
|
|
|
+ ContigRef::RightAmbiguity(_) => None,
|
|
|
+ ContigRef::Ambigous(_) => None,
|
|
|
+ }
|
|
|
+ }
|
|
|
+ pub fn breakpoints_repr(&self) -> Option<Vec<String>> {
|
|
|
+ let left = "►";
|
|
|
+ let right = "◄";
|
|
|
+ let bp_right = "▐";
|
|
|
+ let bp_left = "▌";
|
|
|
+ let format = CustomFormat::builder()
|
|
|
+ .grouping(Grouping::Standard)
|
|
|
+ .minus_sign("-")
|
|
|
+ .separator("_")
|
|
|
+ .build()
|
|
|
+ .unwrap();
|
|
|
+
|
|
|
+ let get_sign = |m: &Mapping| -> &str {
|
|
|
+ match m.strand {
|
|
|
+ minimap2::Strand::Forward => left,
|
|
|
+ minimap2::Strand::Reverse => right,
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ let uk = "UNKNOWN".to_string();
|
|
|
+ let mut res = Vec::new();
|
|
|
+ if let Some(breakpoints) = self.breakpoints() {
|
|
|
+ for v in breakpoints.windows(2) {
|
|
|
+ let mut bp_string = format!("{}:", v[0].target_name.clone().unwrap_or(uk.clone()));
|
|
|
+ let _ = bp_string
|
|
|
+ .write_formatted(&v[0].target_end, &format)
|
|
|
+ .unwrap();
|
|
|
+ bp_string = format!(
|
|
|
+ "{bp_string}{}{bp_left}{bp_right}{}{}:",
|
|
|
+ get_sign(&v[0]),
|
|
|
+ get_sign(&v[1]),
|
|
|
+ v[1].target_name.clone().unwrap_or(uk.clone())
|
|
|
+ );
|
|
|
+ let _ = bp_string
|
|
|
+ .write_formatted(&v[1].target_start, &format)
|
|
|
+ .unwrap();
|
|
|
+ res.push(bp_string);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if !res.is_empty() {
|
|
|
+ Some(res)
|
|
|
+ } else {
|
|
|
+ None
|
|
|
+ }
|
|
|
+ }
|
|
|
+ pub fn desc(&self) -> Option<String> {
|
|
|
+ let uk = "UNKNOWN".to_string();
|
|
|
+ let to_desc = |v: &mut Vec<Mapping>| -> String {
|
|
|
+ v.sort_by(|a, b| a.query_start.cmp(&b.query_start));
|
|
|
+ let v: Vec<String> = v
|
|
|
+ .into_iter()
|
|
|
+ .map(|e| {
|
|
|
+ let strand = match e.strand {
|
|
|
+ minimap2::Strand::Forward => "",
|
|
|
+ minimap2::Strand::Reverse => "_rev",
|
|
|
+ };
|
|
|
+ format!(
|
|
|
+ "{}:{}_{}{}",
|
|
|
+ e.target_name.clone().unwrap_or(uk.clone()),
|
|
|
+ e.target_start,
|
|
|
+ e.target_end,
|
|
|
+ strand
|
|
|
+ )
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+ format!("[{}]", v.join(";"))
|
|
|
+ };
|
|
|
+
|
|
|
+ match self {
|
|
|
+ ContigRef::Unique(a) => Some(format!(
|
|
|
+ "{}:{}_{}",
|
|
|
+ a.target_name.clone().unwrap_or(uk.clone()),
|
|
|
+ a.target_start,
|
|
|
+ a.target_end
|
|
|
+ )),
|
|
|
+ ContigRef::Chimeric((a, b)) => Some(to_desc(&mut vec![a.to_owned(), b.to_owned()])),
|
|
|
+ ContigRef::ChimericTriple((a, b, c)) => {
|
|
|
+ Some(to_desc(&mut vec![a.to_owned(), b.to_owned(), c.to_owned()]))
|
|
|
+ }
|
|
|
+ ContigRef::ChimericMultiple(_) => None,
|
|
|
+ ContigRef::LeftAmbiguity(_) => None,
|
|
|
+ ContigRef::RightAmbiguity(_) => None,
|
|
|
+ ContigRef::Ambigous(a) => Some(to_desc(&mut a.to_owned())),
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn hgvs(&self) -> Option<String> {
|
|
|
+ let uk = "UNKNOWN".to_string();
|
|
|
+ match self {
|
|
|
+ ContigRef::Unique(_) => None,
|
|
|
+ ContigRef::Chimeric((a, b)) => {
|
|
|
+ if a.target_name == b.target_name {
|
|
|
+ let chr = a.target_name.clone().unwrap_or(uk.clone());
|
|
|
+ let del_start = a.target_end;
|
|
|
+ let del_end = b.target_start;
|
|
|
+ let hgvs = format!("{chr}:{del_start}_{del_end}");
|
|
|
+ Some(hgvs)
|
|
|
+ } else {
|
|
|
+ let a_chr = a.target_name.clone().unwrap_or(uk.clone());
|
|
|
+ let a_bp = a.target_end;
|
|
|
+ let b_chr = b.target_name.clone().unwrap_or(uk.clone());
|
|
|
+ let b_bp = b.target_end;
|
|
|
+ let hgvs = format!("{a_chr}:{a_bp}delins[{b_chr}:{b_bp}]");
|
|
|
+ Some(hgvs)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ContigRef::ChimericMultiple(_) => None,
|
|
|
+ ContigRef::LeftAmbiguity(_) => None,
|
|
|
+ ContigRef::RightAmbiguity(_) => None,
|
|
|
+ ContigRef::Ambigous(_) => None,
|
|
|
+ ContigRef::ChimericTriple((a, b, c)) => {
|
|
|
+ let mut v = [a, b, c];
|
|
|
+ v.sort_by(|a, b| a.query_start.cmp(&b.query_start));
|
|
|
+ let (a, b, c) = (
|
|
|
+ *v.get(0).clone().unwrap(),
|
|
|
+ *v.get(1).clone().unwrap(),
|
|
|
+ *v.get(2).clone().unwrap(),
|
|
|
+ );
|
|
|
+ let a_target_name = a.target_name.clone().unwrap_or(uk.clone());
|
|
|
+ let b_target_name = b.target_name.clone().unwrap_or(uk.clone());
|
|
|
+ let c_target_name = c.target_name.clone().unwrap_or(uk.clone());
|
|
|
+
|
|
|
+ // if a_target_name != b_target_name {}
|
|
|
+
|
|
|
+ // Insertions
|
|
|
+ // prioritize first len
|
|
|
+ let (bp_a_1, bp_a_2) = if a.query_end <= b.query_end {
|
|
|
+ // TODO add inserted nt
|
|
|
+ (
|
|
|
+ (a.target_name.clone().unwrap_or(uk.clone()), a.target_end),
|
|
|
+ (b.target_name.clone().unwrap_or(uk.clone()), b.target_start),
|
|
|
+ )
|
|
|
+ } else {
|
|
|
+ let diff = a.query_end - b.query_start;
|
|
|
+ (
|
|
|
+ (a.target_name.clone().unwrap_or(uk.clone()), a.target_end),
|
|
|
+ (
|
|
|
+ b.target_name.clone().unwrap_or(uk.clone()),
|
|
|
+ b.target_start + diff,
|
|
|
+ ),
|
|
|
+ )
|
|
|
+ };
|
|
|
+ let (bp_b_1, bp_b_2) = if b.query_end <= c.query_end {
|
|
|
+ // TODO add inserted nt
|
|
|
+ (
|
|
|
+ (b.target_name.clone().unwrap_or(uk.clone()), b.target_end),
|
|
|
+ (c.target_name.clone().unwrap_or(uk.clone()), c.target_start),
|
|
|
+ )
|
|
|
+ } else {
|
|
|
+ let diff = b.query_end - c.query_start;
|
|
|
+ (
|
|
|
+ (b.target_name.clone().unwrap_or(uk.clone()), b.target_end),
|
|
|
+ (
|
|
|
+ c.target_name.clone().unwrap_or(uk.clone()),
|
|
|
+ c.target_start + diff,
|
|
|
+ ),
|
|
|
+ )
|
|
|
+ };
|
|
|
+ if bp_a_1.0 == bp_b_2.0 {
|
|
|
+ let hgvs = format!(
|
|
|
+ "{}:{}_{}ins[{}:{}_{}]",
|
|
|
+ bp_a_1.0, bp_a_1.1, bp_b_2.1, bp_a_2.0, bp_a_2.1, bp_b_1.1
|
|
|
+ );
|
|
|
+ Some(hgvs)
|
|
|
+ } else {
|
|
|
+ None
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+pub fn mapping_to_string(mapping: &Mapping) -> String {
|
|
|
+ let uk = "UNKNOWN".to_string();
|
|
|
+ format!(
|
|
|
+ "{}:{}-{}({}:{}-{})",
|
|
|
+ mapping.target_name.clone().unwrap_or(uk.clone()),
|
|
|
+ mapping.target_start,
|
|
|
+ mapping.target_end,
|
|
|
+ mapping.query_name.clone().unwrap_or(uk),
|
|
|
+ mapping.query_start,
|
|
|
+ mapping.query_end
|
|
|
+ )
|
|
|
+}
|
|
|
+
|
|
|
+fn mappings_to_string(mappings: &Vec<Mapping>) -> String {
|
|
|
+ let v = mappings
|
|
|
+ .iter()
|
|
|
+ .map(mapping_to_string)
|
|
|
+ .collect::<Vec<String>>();
|
|
|
+ v.join("//")
|
|
|
+}
|
|
|
+
|
|
|
+pub fn get_ref_pos(mappings: Vec<Mapping>) -> Result<ContigRef> {
|
|
|
+ let mut mappings = mappings;
|
|
|
+ mappings.sort_by(|a, b| a.query_start.cmp(&b.query_start));
|
|
|
+
|
|
|
+ if mappings.len() == 1 {
|
|
|
+ return Ok(ContigRef::Unique(mappings.get(0).unwrap().clone()));
|
|
|
+ } else {
|
|
|
+ let mut grouped: Vec<Vec<Mapping>> = group_mappings(&mut mappings)?;
|
|
|
+ // let mut grouped: VecDeque<Vec<Mapping>> = group_mappings(&mut mappings)?.into();
|
|
|
+
|
|
|
+ if grouped.len() == 1 {
|
|
|
+ let r = grouped.into_iter().flat_map(|e| e).collect();
|
|
|
+ return Ok(ContigRef::Ambigous(r));
|
|
|
+ } else if grouped.len() >= 2 {
|
|
|
+ // let first = grouped.pop_back().unwrap();
|
|
|
+ // let last = grouped.pop_front().unwrap();
|
|
|
+ let first = grouped.first().unwrap().to_vec();
|
|
|
+ let last = grouped.last().unwrap().to_vec();
|
|
|
+ grouped.remove(0);
|
|
|
+ grouped.remove(grouped.len() - 1);
|
|
|
+ assert!(first[0].query_start < last[0].query_start);
|
|
|
+
|
|
|
+ if grouped.len() == 0 {
|
|
|
+ if first.len() == 1 && last.len() == 1 {
|
|
|
+ return Ok(ContigRef::Chimeric((
|
|
|
+ first.get(0).unwrap().clone(),
|
|
|
+ last.get(0).unwrap().clone(),
|
|
|
+ )));
|
|
|
+ } else if first.len() == 1 {
|
|
|
+ return Ok(ContigRef::RightAmbiguity((
|
|
|
+ first.get(0).unwrap().clone(),
|
|
|
+ last.clone(),
|
|
|
+ )));
|
|
|
+ } else if last.len() == 1 {
|
|
|
+ return Ok(ContigRef::LeftAmbiguity((
|
|
|
+ first.clone(),
|
|
|
+ last.get(0).unwrap().clone(),
|
|
|
+ )));
|
|
|
+ } else {
|
|
|
+ let all: Vec<Mapping> = vec![first, last].into_iter().flat_map(|e| e).collect();
|
|
|
+ return Ok(ContigRef::Ambigous(all));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if first.len() == 1 && last.len() == 1 {
|
|
|
+ if grouped.len() == 1 {
|
|
|
+ return Ok(ContigRef::ChimericTriple((
|
|
|
+ first.get(0).unwrap().clone(),
|
|
|
+ grouped.get(0).unwrap().get(0).unwrap().clone(),
|
|
|
+ last.get(0).unwrap().clone(),
|
|
|
+ )));
|
|
|
+ } else {
|
|
|
+ return Ok(ContigRef::ChimericMultiple((
|
|
|
+ first.get(0).unwrap().clone(),
|
|
|
+ grouped.into_iter().flat_map(|e| e).collect(),
|
|
|
+ last.get(0).unwrap().clone(),
|
|
|
+ )));
|
|
|
+ }
|
|
|
+ } else if first.len() == 1 {
|
|
|
+ let right: Vec<Mapping> = vec![grouped.into_iter().flat_map(|e| e).collect(), last]
|
|
|
+ .into_iter()
|
|
|
+ .flat_map(|e| e)
|
|
|
+ .collect();
|
|
|
+ return Ok(ContigRef::RightAmbiguity((
|
|
|
+ first.get(0).unwrap().clone(),
|
|
|
+ right,
|
|
|
+ )));
|
|
|
+ } else if last.len() == 1 {
|
|
|
+ let left: Vec<Mapping> = vec![first, grouped.into_iter().flat_map(|e| e).collect()]
|
|
|
+ .into_iter()
|
|
|
+ .flat_map(|e| e)
|
|
|
+ .collect();
|
|
|
+ return Ok(ContigRef::LeftAmbiguity((
|
|
|
+ left,
|
|
|
+ last.get(0).unwrap().clone(),
|
|
|
+ )));
|
|
|
+ } else {
|
|
|
+ let all: Vec<Mapping> =
|
|
|
+ vec![first, grouped.into_iter().flat_map(|e| e).collect(), last]
|
|
|
+ .into_iter()
|
|
|
+ .flat_map(|e| e)
|
|
|
+ .collect();
|
|
|
+ return Ok(ContigRef::Ambigous(all));
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ return Ok(ContigRef::Ambigous(
|
|
|
+ grouped.into_iter().flat_map(|e| e).collect(),
|
|
|
+ ));
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl Genome {
|
|
|
+ pub fn new() -> Self {
|
|
|
+ Genome {
|
|
|
+ chromosomes: HashMap::new(),
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, Chromosome> {
|
|
|
+ self.chromosomes.iter()
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn contigs(&self) -> impl Iterator<Item = &Contig> {
|
|
|
+ self.chromosomes.iter().flat_map(|(_, e)| e.iter())
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn add_contig(
|
|
|
+ &mut self,
|
|
|
+ id: String,
|
|
|
+ mappings: Vec<Mapping>,
|
|
|
+ supporting_records: Option<Vec<Record>>,
|
|
|
+ sequence: String,
|
|
|
+ ) -> Result<()> {
|
|
|
+ let mut mappings = mappings;
|
|
|
+ mappings.sort_by(|a, b| a.query_start.cmp(&b.query_start));
|
|
|
+ let new_contig = Contig {
|
|
|
+ id,
|
|
|
+ mappings: mappings.clone(),
|
|
|
+ supporting_records,
|
|
|
+ sequence,
|
|
|
+ contig_ref: get_ref_pos(mappings)?,
|
|
|
+ };
|
|
|
+ // get the category of Mapping
|
|
|
+ match new_contig.contig_ref.clone() {
|
|
|
+ ContigRef::Unique(contig_mapping) => {
|
|
|
+ match self
|
|
|
+ .chromosomes
|
|
|
+ .get_mut(&contig_mapping.target_name.unwrap())
|
|
|
+ {
|
|
|
+ Some(chromosome) => {
|
|
|
+ chromosome.contigs.push(new_contig);
|
|
|
+ }
|
|
|
+ None => (),
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ContigRef::Chimeric((a, b)) => {
|
|
|
+ let a_target_name = a.target_name.unwrap();
|
|
|
+ let b_target_name = b.target_name.unwrap();
|
|
|
+ if a_target_name == b_target_name {
|
|
|
+ if let Some(chromosome) = self.chromosomes.get_mut(&a_target_name) {
|
|
|
+ chromosome.contigs.push(new_contig);
|
|
|
+ } else {
|
|
|
+ self.chromosomes.insert(
|
|
|
+ a_target_name,
|
|
|
+ Chromosome {
|
|
|
+ contigs: vec![new_contig],
|
|
|
+ },
|
|
|
+ );
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ let chimeric_name = format!("{}-{}", a_target_name, b_target_name);
|
|
|
+ if let Some(chromosome) = self.chromosomes.get_mut(&chimeric_name) {
|
|
|
+ chromosome.contigs.push(new_contig);
|
|
|
+ } else {
|
|
|
+ self.chromosomes.insert(
|
|
|
+ chimeric_name,
|
|
|
+ Chromosome {
|
|
|
+ contigs: vec![new_contig],
|
|
|
+ },
|
|
|
+ );
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ContigRef::ChimericMultiple((left, _, right)) => {
|
|
|
+ let left_target_name = left.target_name.unwrap();
|
|
|
+ let right_target_name = right.target_name.unwrap();
|
|
|
+ if left_target_name == right_target_name {
|
|
|
+ if let Some(chromosome) = self.chromosomes.get_mut(&left_target_name) {
|
|
|
+ chromosome.contigs.push(new_contig);
|
|
|
+ } else {
|
|
|
+ self.chromosomes.insert(
|
|
|
+ left_target_name,
|
|
|
+ Chromosome {
|
|
|
+ contigs: vec![new_contig],
|
|
|
+ },
|
|
|
+ );
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ let chimeric_name = format!("{}-{}", left_target_name, right_target_name);
|
|
|
+ if let Some(chromosome) = self.chromosomes.get_mut(&chimeric_name) {
|
|
|
+ chromosome.contigs.push(new_contig);
|
|
|
+ } else {
|
|
|
+ self.chromosomes.insert(
|
|
|
+ chimeric_name,
|
|
|
+ Chromosome {
|
|
|
+ contigs: vec![new_contig],
|
|
|
+ },
|
|
|
+ );
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ _ => {
|
|
|
+ if let Some(chromosome) = self.chromosomes.get_mut("Ambigous") {
|
|
|
+ chromosome.contigs.push(new_contig);
|
|
|
+ } else {
|
|
|
+ self.chromosomes.insert(
|
|
|
+ "Ambigous".to_string(),
|
|
|
+ Chromosome {
|
|
|
+ contigs: vec![new_contig],
|
|
|
+ },
|
|
|
+ );
|
|
|
+ }
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn add_contig_from_seq(
|
|
|
+ &mut self,
|
|
|
+ name: String,
|
|
|
+ sequence: &[u8],
|
|
|
+ aligner: impl Fn(String) -> Result<Vec<Mapping>>,
|
|
|
+ ) -> Result<()> {
|
|
|
+ let mappings = aligner(String::from_utf8(sequence.to_vec())?)?;
|
|
|
+ // println!("{mappings:?}");
|
|
|
+ self.add_contig(name, mappings, None, String::from_utf8(sequence.to_vec())?)?;
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn write_contigs_sequences(&self, dir: &str) {
|
|
|
+ for contig in self.contigs() {
|
|
|
+ contig.write_fasta(&format!("{dir}/{}.fasta", contig.id))
|
|
|
+ }
|
|
|
+ // self.iter().for_each(|(_, chr)| {
|
|
|
+ // chr.iter().for_each(|c| c.write_fasta(&format!("{dir}/{}.fasta", c.id)))
|
|
|
+ // });
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn from_contigs_sequences(dir: &str) -> Result<Self> {
|
|
|
+ let aligner_url = "http://localhost:4444/align";
|
|
|
+ let aligner = aligner_client::dist_align(aligner_url.to_string());
|
|
|
+ let mut genome = Self::new();
|
|
|
+ // let paths = get_ext_paths(dir, "fasta")?;
|
|
|
+ let paths = get_contigs_fa_paths(dir)?;
|
|
|
+ for path in paths {
|
|
|
+ let fa = read_fasta(path.to_str().unwrap())?;
|
|
|
+ for (name, sequence) in fa {
|
|
|
+ genome.add_contig_from_seq(name, sequence.as_ref(), &aligner)?;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ Ok(genome)
|
|
|
+ }
|
|
|
+ //
|
|
|
+ // pub fn from_dir_bed(dir: &str) {
|
|
|
+ //
|
|
|
+ // }
|
|
|
+
|
|
|
+ // pub fn write_records(&self, file: &str) {
|
|
|
+ // let mut records = Vec::new();
|
|
|
+ // for (name, chromosome) in self.chromosomes.iter() {
|
|
|
+ // for contig in chromosome.iter() {
|
|
|
+ // if let Some(rec) = &contig.supporting_records {
|
|
|
+ // records.extend(rec.iter());
|
|
|
+ // };
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // // header
|
|
|
+ //
|
|
|
+ // // writer
|
|
|
+ // rust_htslib::bam::Writer::from_path(path, header, format)
|
|
|
+ // }
|
|
|
+
|
|
|
+ pub fn stats(&self) {
|
|
|
+ for (k, v) in self.chromosomes.iter() {
|
|
|
+ info!("{}:{}", k, v.contigs.len());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn chromosome(&self, chromosome: &str) -> Option<Vec<Contig>> {
|
|
|
+ self.chromosomes
|
|
|
+ .get(chromosome)
|
|
|
+ .map(|chr| chr.contigs.clone())
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl Chromosome {
|
|
|
+ pub fn iter(&self) -> std::slice::Iter<'_, Contig> {
|
|
|
+ self.contigs.iter()
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl Contig {
|
|
|
+ // pub fn sort(&mut self) {
|
|
|
+ // // sorting by target order
|
|
|
+ // self.mappings
|
|
|
+ // .sort_by(|a, b| a.target_start.cmp(&b.target_start));
|
|
|
+ // }
|
|
|
+
|
|
|
+ pub fn to_igv(&self, dir_path: &str) -> Result<()> {
|
|
|
+ let supporting_records = self.supporting_records.clone().ok_or(anyhow!("no reads"))?;
|
|
|
+ let contig_name = if let Some(hgvs) = self.hgvs() {
|
|
|
+ hgvs
|
|
|
+ } else {
|
|
|
+ self.id.clone()
|
|
|
+ };
|
|
|
+ let contig_dir = format!("{dir_path}/{contig_name}");
|
|
|
+ fs::create_dir_all(contig_dir.clone())?;
|
|
|
+
|
|
|
+ let fasta_path = format!("{contig_dir}/contig.fa");
|
|
|
+ write_fasta(&fasta_path, &vec![(self.id.clone(), self.sequence.clone())]);
|
|
|
+ write_fai(&fasta_path);
|
|
|
+
|
|
|
+ let reads_path = format!("{contig_dir}/reads.fa");
|
|
|
+ let n_reads = supporting_records
|
|
|
+ .clone()
|
|
|
+ .into_iter()
|
|
|
+ .map(|r| {
|
|
|
+ (
|
|
|
+ String::from_utf8(r.qname().to_vec()).unwrap(),
|
|
|
+ String::from_utf8(r.seq().as_bytes()).unwrap(),
|
|
|
+ )
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+ write_fasta(&reads_path, &n_reads);
|
|
|
+
|
|
|
+ let bam_path = format!("{contig_dir}/{}.bam", self.id);
|
|
|
+ write_bam(&fasta_path, &reads_path, &bam_path)?;
|
|
|
+
|
|
|
+ let bed_path = format!("{contig_dir}/contig.bed");
|
|
|
+ match &self.contig_ref {
|
|
|
+ ContigRef::Chimeric((a, b)) => {
|
|
|
+ let d = vec![
|
|
|
+ (
|
|
|
+ self.id.clone(),
|
|
|
+ a.query_start,
|
|
|
+ a.query_end,
|
|
|
+ format!(
|
|
|
+ "{}:{}-{}",
|
|
|
+ a.target_name.clone().unwrap(),
|
|
|
+ a.target_start,
|
|
|
+ a.target_end
|
|
|
+ ),
|
|
|
+ ),
|
|
|
+ (
|
|
|
+ self.id.clone(),
|
|
|
+ b.query_start,
|
|
|
+ b.query_end,
|
|
|
+ format!(
|
|
|
+ "{}:{}-{}",
|
|
|
+ b.target_name.clone().unwrap(),
|
|
|
+ b.target_start,
|
|
|
+ b.target_end
|
|
|
+ ),
|
|
|
+ ),
|
|
|
+ ];
|
|
|
+ write_bed(&bed_path, &d)?;
|
|
|
+ }
|
|
|
+ ContigRef::ChimericTriple((a, b, c)) => {
|
|
|
+ let d: Vec<(String, i32, i32, String)> = [a, b, c]
|
|
|
+ .iter()
|
|
|
+ .map(|r| {
|
|
|
+ (
|
|
|
+ self.id.clone(),
|
|
|
+ r.query_start,
|
|
|
+ r.query_end,
|
|
|
+ format!(
|
|
|
+ "{}:{}-{}",
|
|
|
+ r.target_name.clone().unwrap(),
|
|
|
+ r.target_start,
|
|
|
+ r.target_end
|
|
|
+ ),
|
|
|
+ )
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+ write_bed(&bed_path, &d)?;
|
|
|
+ }
|
|
|
+ _ => (),
|
|
|
+ }
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+
|
|
|
+ // bug cigar len != seq len
|
|
|
+ // pub fn write_bam(&self, path: &str) -> Result<()> {
|
|
|
+ // let aligner = Aligner::builder()
|
|
|
+ // .asm5()
|
|
|
+ // .with_threads(8)
|
|
|
+ // .with_cigar()
|
|
|
+ // .with_seq(self.sequence.as_bytes())
|
|
|
+ // .expect("Unable to build index");
|
|
|
+ //
|
|
|
+ // let mut mappings = Vec::new();
|
|
|
+ // let supporting_records = self
|
|
|
+ // .supporting_records
|
|
|
+ // .clone()
|
|
|
+ // .ok_or(anyhow!("no supporting records"))?;
|
|
|
+ // for record in supporting_records.iter() {
|
|
|
+ // let seq = record.seq().as_bytes();
|
|
|
+ // let alignment = aligner
|
|
|
+ // .map(&seq, false, false, None, None)
|
|
|
+ // .expect("Unable to align");
|
|
|
+ // mappings.push(alignment);
|
|
|
+ // }
|
|
|
+ // let mut mappings: Vec<_> = mappings.into_iter().flatten().collect();
|
|
|
+ // mappings.sort_by(|a, b| a.target_start.cmp(&b.target_start));
|
|
|
+ //
|
|
|
+ // let mut header = bam::Header::new();
|
|
|
+ // let mut sq_record = rust_htslib::bam::header::HeaderRecord::new(b"SQ");
|
|
|
+ // sq_record.push_tag(b"SN", self.id.clone());
|
|
|
+ // sq_record.push_tag(b"LN", self.sequence.len());
|
|
|
+ // header.push_record(&sq_record);
|
|
|
+ //
|
|
|
+ // let mut out = bam::Writer::from_path(path, &header, bam::Format::Bam).unwrap();
|
|
|
+ //
|
|
|
+ // // copy reverse reads to new BAM file
|
|
|
+ // for mapping in mappings.iter() {
|
|
|
+ // let record = minimap2::htslib::mapping_to_record(
|
|
|
+ // Some(mapping),
|
|
|
+ // self.sequence.as_bytes(),
|
|
|
+ // header.clone(),
|
|
|
+ // None,
|
|
|
+ // Some(Uuid::new_v4().as_bytes()),
|
|
|
+ // );
|
|
|
+ // let _ = out.write(&record);
|
|
|
+ // }
|
|
|
+ // rust_htslib::bam::index::build(path, None, rust_htslib::bam::index::Type::Bai, 1)?;
|
|
|
+ // Ok(())
|
|
|
+ // }
|
|
|
+
|
|
|
+ pub fn hgvs(&self) -> Option<String> {
|
|
|
+ self.contig_ref.hgvs()
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn desc(&self) -> Option<String> {
|
|
|
+ self.contig_ref.desc()
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn breakpoints_repr(&self) -> Option<Vec<String>> {
|
|
|
+ self.contig_ref.breakpoints_repr()
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn write_fasta(&self, fasta_path: &str) {
|
|
|
+ write_fasta(fasta_path, &vec![(self.id.clone(), self.sequence.clone())]);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+fn group_mappings(mappings: &mut [Mapping]) -> Result<Vec<Vec<Mapping>>> {
|
|
|
+ // sort alignments by query_start
|
|
|
+ mappings.sort_by(|a, b| a.query_start.cmp(&b.query_start));
|
|
|
+
|
|
|
+ let mut alignments: Vec<Vec<Mapping>> = vec![];
|
|
|
+ // group by overlapps > 30
|
|
|
+ for aln in mappings.iter() {
|
|
|
+ let last = alignments.last_mut();
|
|
|
+ if let Some(l) = last {
|
|
|
+ if l.iter()
|
|
|
+ .filter(|a| a.query_end - aln.query_start > 30)
|
|
|
+ .count()
|
|
|
+ > 0
|
|
|
+ {
|
|
|
+ l.push(aln.clone());
|
|
|
+ } else {
|
|
|
+ alignments.push(vec![aln.clone()]);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ alignments.push(vec![aln.clone()]);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ Ok(alignments)
|
|
|
+}
|
|
|
+
|
|
|
+pub fn write_bed(path: &str, d: &[(String, i32, i32, String)]) -> Result<()> {
|
|
|
+ let file = File::create(path).unwrap();
|
|
|
+ let mut writer = BufWriter::new(file);
|
|
|
+ for (chr, start, end, value) in d.iter() {
|
|
|
+ let row = format!(
|
|
|
+ "{}\n",
|
|
|
+ [
|
|
|
+ chr.to_string(),
|
|
|
+ start.to_string(),
|
|
|
+ end.to_string(),
|
|
|
+ value.to_string()
|
|
|
+ ]
|
|
|
+ .join("\t")
|
|
|
+ );
|
|
|
+ writer.write_all(row.as_bytes())?;
|
|
|
+ }
|
|
|
+ Ok(())
|
|
|
+}
|
|
|
+
|
|
|
+// unique
|
|
|
+pub fn write_fastq(fastq_path: &str, d: &Vec<Record>) -> Result<()> {
|
|
|
+ let file = File::create(fastq_path)?;
|
|
|
+ let mut writer = BufWriter::new(file);
|
|
|
+ for record in d {
|
|
|
+ let name = String::from_utf8(record.qname().to_vec()).unwrap();
|
|
|
+ writer.write_all(format!("@{name}\n").as_bytes())?;
|
|
|
+ let seq = record.seq().as_bytes();
|
|
|
+ writer.write_all(&seq)?;
|
|
|
+ writer.write_all(b"\n+\n")?;
|
|
|
+ let qual = record.qual();
|
|
|
+ writer.write_all(qual)?;
|
|
|
+ }
|
|
|
+ Ok(())
|
|
|
+}
|
|
|
+
|
|
|
+pub fn write_fasta(fasta_path: &str, d: &Vec<(String, String)>) {
|
|
|
+ let file = File::create(fasta_path).unwrap();
|
|
|
+ let mut writer = fasta::writer::Builder::default().build_with_writer(file);
|
|
|
+ let mut passed = Vec::new();
|
|
|
+ for (name, sequence) in d {
|
|
|
+ let name = name.to_string();
|
|
|
+ if sequence.is_empty() {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if passed.contains(&name) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ passed.push(name.clone());
|
|
|
+ let record = fasta::Record::new(
|
|
|
+ fasta::record::Definition::new(name.as_str(), None),
|
|
|
+ fasta::record::Sequence::from(sequence.as_bytes().to_vec()),
|
|
|
+ );
|
|
|
+ writer.write_record(&record).unwrap();
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+pub fn write_fai(path: &str) {
|
|
|
+ let mut faidx = Command::new("samtools")
|
|
|
+ .arg("faidx")
|
|
|
+ .arg(path)
|
|
|
+ .spawn()
|
|
|
+ .expect("Samtools faidx failed to start");
|
|
|
+ faidx.wait().unwrap();
|
|
|
+}
|
|
|
+
|
|
|
+pub fn write_bam(ref_path: &str, reads_path: &str, bam_path: &str) -> Result<()> {
|
|
|
+ let rg_id = uuid::Uuid::new_v4();
|
|
|
+
|
|
|
+ let mm2 = Command::new("minimap2")
|
|
|
+ .arg("-t")
|
|
|
+ .arg("128")
|
|
|
+ .arg("-ax")
|
|
|
+ .arg("map-ont")
|
|
|
+ .arg("-R")
|
|
|
+ .arg(format!(
|
|
|
+ "@RG\\tPL:ONTASM_PROM\\tID:ONTASM_{rg_id}\\tSM:{rg_id}\\tLB:ONTASM_NB_PROM"
|
|
|
+ ))
|
|
|
+ .arg(ref_path)
|
|
|
+ .arg(reads_path)
|
|
|
+ .stdout(Stdio::piped())
|
|
|
+ .stderr(Stdio::piped())
|
|
|
+ .spawn()
|
|
|
+ .expect("Minimap2 failed to start");
|
|
|
+
|
|
|
+ let view = Command::new("sambamba")
|
|
|
+ .arg("view")
|
|
|
+ .arg("-h")
|
|
|
+ .arg("-S")
|
|
|
+ .arg("-t")
|
|
|
+ .arg("20")
|
|
|
+ .arg("--format=bam")
|
|
|
+ .arg("/dev/stdin")
|
|
|
+ .stdin(Stdio::from(mm2.stdout.unwrap()))
|
|
|
+ .stdout(Stdio::piped())
|
|
|
+ .stderr(Stdio::piped())
|
|
|
+ .spawn()
|
|
|
+ .expect("Sambamba view failed to start");
|
|
|
+
|
|
|
+ let mut sort = Command::new("sambamba")
|
|
|
+ .arg("sort")
|
|
|
+ .arg("-t")
|
|
|
+ .arg("20")
|
|
|
+ .arg("/dev/stdin")
|
|
|
+ .arg("-o")
|
|
|
+ .arg(bam_path)
|
|
|
+ .stderr(Stdio::piped())
|
|
|
+ .stdin(Stdio::from(view.stdout.unwrap()))
|
|
|
+ .spawn()
|
|
|
+ .expect("Sambamba sort failed to start");
|
|
|
+
|
|
|
+ sort.wait().unwrap();
|
|
|
+ Ok(())
|
|
|
+}
|
|
|
+
|
|
|
+pub fn read_fasta(path: &str) -> Result<Vec<(String, Sequence)>> {
|
|
|
+ let mut reader = File::open(path)
|
|
|
+ .map(BufReader::new)
|
|
|
+ .map(fasta::Reader::new)?;
|
|
|
+
|
|
|
+ let mut res = Vec::new();
|
|
|
+ for result in reader.records() {
|
|
|
+ let record = result?;
|
|
|
+ let u = String::from_utf8(record.name().to_vec())?;
|
|
|
+ let s = record.sequence().to_owned();
|
|
|
+ res.push((u, s));
|
|
|
+ }
|
|
|
+
|
|
|
+ Ok(res)
|
|
|
+}
|
|
|
+
|
|
|
+// fn get_ext_paths(dir: &str, ext: &str) -> Result<Vec<PathBuf>> {
|
|
|
+// let paths = std::fs::read_dir(dir)?
|
|
|
+// // Filter out all those directory entries which couldn't be read
|
|
|
+// .filter_map(|res| res.ok())
|
|
|
+// // Map the directory entries to paths
|
|
|
+// .map(|dir_entry| dir_entry.path())
|
|
|
+// // Filter out all paths with extensions other than `csv`
|
|
|
+// .filter_map(|path| {
|
|
|
+// if path.extension().map_or(false, |xt| xt == ext) {
|
|
|
+// Some(path)
|
|
|
+// } else {
|
|
|
+// None
|
|
|
+// }
|
|
|
+// })
|
|
|
+// .collect::<Vec<_>>();
|
|
|
+// Ok(paths)
|
|
|
+// }
|
|
|
+
|
|
|
+fn get_contigs_fa_paths(dir: &str) -> Result<Vec<PathBuf>> {
|
|
|
+ let pattern = format!("{}/**/*_flye.fa", dir);
|
|
|
+ let fa_paths: Vec<PathBuf> = glob::glob(&pattern)
|
|
|
+ .expect("Failed to read glob pattern")
|
|
|
+ .filter_map(Result::ok)
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ Ok(fa_paths)
|
|
|
+}
|
|
|
+
|
|
|
+pub fn dot_graph(
|
|
|
+ graph: &StableGraph<String, String>,
|
|
|
+ way: &[NodeIndex],
|
|
|
+ value: &str,
|
|
|
+ color: &str,
|
|
|
+ erase: bool,
|
|
|
+) -> String {
|
|
|
+ let mut g = graph.clone();
|
|
|
+
|
|
|
+ // erase labels
|
|
|
+ if erase {
|
|
|
+ g.edge_weights_mut().for_each(|e| *e = "".to_string());
|
|
|
+ }
|
|
|
+
|
|
|
+ let mut labels = Vec::new();
|
|
|
+ for window in way.windows(2) {
|
|
|
+ let edge_id = g.find_edge(window[0], window[1]).unwrap();
|
|
|
+ let edge = g.edge_weight_mut(edge_id).unwrap();
|
|
|
+ let v = if !edge.is_empty() {
|
|
|
+ let mut v: Vec<&str> = edge.split(",").filter(|e| !e.is_empty()).collect();
|
|
|
+ v.push(value);
|
|
|
+ v.join(", ")
|
|
|
+ } else {
|
|
|
+ value.to_string()
|
|
|
+ };
|
|
|
+ labels.push(v.clone());
|
|
|
+ *edge = v;
|
|
|
+ }
|
|
|
+
|
|
|
+ if erase {
|
|
|
+ g.retain_edges(|g, i| g.edge_weight(i).unwrap().to_string() != "".to_string());
|
|
|
+ g.retain_nodes(|g, n| g.neighbors_undirected(n).count() > 0);
|
|
|
+ }
|
|
|
+
|
|
|
+ let mut dot = Dot::new(&g).to_string().replace("\\\"", "");
|
|
|
+
|
|
|
+ labels.sort_by_key(|b| std::cmp::Reverse(b.len()));
|
|
|
+ // labels.sort_by(|a, b| b.len().cmp(&a.len()));
|
|
|
+ for label in labels {
|
|
|
+ let label_str = format!("label = \"{label}\"");
|
|
|
+ dot = dot.replace(
|
|
|
+ &format!("{label_str} ]\n"),
|
|
|
+ &format!("{label_str}, color = \"{color}\" ]\n"),
|
|
|
+ );
|
|
|
+ }
|
|
|
+ dot
|
|
|
+}
|
|
|
+
|
|
|
+/// for non overlapping ways
|
|
|
+pub fn dot_graph_biall(
|
|
|
+ graph: &StableGraph<String, String>,
|
|
|
+ ways: &[Vec<NodeIndex>],
|
|
|
+ values: Vec<&str>,
|
|
|
+ colors: Vec<&str>,
|
|
|
+ erase: bool,
|
|
|
+) -> String {
|
|
|
+ let mut g = graph.clone();
|
|
|
+ // erase labels
|
|
|
+ if erase {
|
|
|
+ g.edge_weights_mut().for_each(|e| *e = "".to_string());
|
|
|
+ }
|
|
|
+
|
|
|
+ let mut labels = Vec::new();
|
|
|
+ for ((way, value), color) in ways.iter().zip(values.into_iter()).zip(colors.into_iter()) {
|
|
|
+ for window in way.windows(2) {
|
|
|
+ let edge_id = g.find_edge(window[0], window[1]).unwrap();
|
|
|
+ let edge = g.edge_weight_mut(edge_id).unwrap();
|
|
|
+ let v = if !edge.is_empty() {
|
|
|
+ let mut v: Vec<&str> = edge.split(",").filter(|e| !e.is_empty()).collect();
|
|
|
+ v.push(value);
|
|
|
+ v.join(", ")
|
|
|
+ } else {
|
|
|
+ value.to_string()
|
|
|
+ };
|
|
|
+ labels.push((v.clone(), color));
|
|
|
+ *edge = v;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // g.retain_edges(|g, i| g.edge_weight(i).unwrap().to_string() != "".to_string());
|
|
|
+ g.retain_edges(|g, i| !g.edge_weight(i).unwrap().is_empty());
|
|
|
+ g.retain_nodes(|g, n| g.neighbors_undirected(n).count() > 0);
|
|
|
+
|
|
|
+ let mut dot = Dot::new(&g).to_string().replace("\\\"", "");
|
|
|
+
|
|
|
+ labels.sort_by(|a, b| b.0.len().cmp(&a.0.len()));
|
|
|
+ for (label, color) in labels {
|
|
|
+ let label_str = format!("label = \"{label}\"");
|
|
|
+ dot = dot.replace(
|
|
|
+ &format!("{label_str} ]\n"),
|
|
|
+ &format!("{label_str}, color = \"{color}\" ]\n"),
|
|
|
+ );
|
|
|
+ }
|
|
|
+ dot
|
|
|
+}
|
|
|
+
|
|
|
+#[cfg(test)]
|
|
|
+mod tests {
|
|
|
+ use env_logger::Env;
|
|
|
+
|
|
|
+ use super::*;
|
|
|
+ use crate::{
|
|
|
+ genomic_graph::GenomicGraph,
|
|
|
+ // phase::{variants_phasing, write_phases_bed},
|
|
|
+ };
|
|
|
+
|
|
|
+ fn init() {
|
|
|
+ let _ = env_logger::Builder::from_env(Env::default().default_filter_or("info"))
|
|
|
+ .is_test(true)
|
|
|
+ .try_init();
|
|
|
+ }
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn it_works() -> Result<()> {
|
|
|
+ let _ = env_logger::builder().is_test(true).try_init();
|
|
|
+ let contig_fa = "./data_test/contig_2.fa";
|
|
|
+ let aligner_url = "http://localhost:4444/align";
|
|
|
+
|
|
|
+ let mut genome = Genome::new();
|
|
|
+ let aligner = aligner_client::dist_align(aligner_url.to_string());
|
|
|
+
|
|
|
+ let sequences = read_fasta(contig_fa)?;
|
|
|
+ for (name, seq) in sequences {
|
|
|
+ genome.add_contig_from_seq(name.clone(), &seq.as_ref().to_vec(), &aligner)?;
|
|
|
+ let mut seqc: Vec<u8> = seq.complement().map(|e| e.unwrap()).collect();
|
|
|
+ seqc.reverse();
|
|
|
+ genome.add_contig_from_seq(format!("{name}_rev"), &seqc, &aligner)?;
|
|
|
+ println!("Sending");
|
|
|
+ }
|
|
|
+ genome.iter().for_each(|(_, c)| {
|
|
|
+ c.iter().for_each(|cont| {
|
|
|
+ println!("{}", cont.contig_ref.desc().unwrap());
|
|
|
+ });
|
|
|
+ });
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn test_graph() -> Result<()> {
|
|
|
+ init();
|
|
|
+
|
|
|
+ let case = "SALICETTO";
|
|
|
+ let chrom = vec!["chr10"];
|
|
|
+ info!("This record will be captured by `cargo test`");
|
|
|
+
|
|
|
+ let dir = format!("/data/longreads_basic_pipe/{case}/diag/asm_contis");
|
|
|
+
|
|
|
+ // Load from fasta in dir.
|
|
|
+ let genome = Genome::from_contigs_sequences(&dir)?;
|
|
|
+ genome.stats();
|
|
|
+ let mut genomic_graph = GenomicGraph::from_genome(&genome);
|
|
|
+
|
|
|
+ let sens = vec![true, false];
|
|
|
+ let pos = vec![0, i32::MAX];
|
|
|
+ let mut all_ways = Vec::new();
|
|
|
+ if chrom.len() > 1 {
|
|
|
+ (0..4).into_iter().for_each(|i| {
|
|
|
+ let start_pos = if i < 2 { 0 } else { i32::MAX };
|
|
|
+ let end_pos = pos[i % 2];
|
|
|
+
|
|
|
+ (0..4).into_iter().for_each(|i| {
|
|
|
+ let start_sens = if i < 2 { true } else { false };
|
|
|
+ let end_sens = sens[i % 2];
|
|
|
+ (0..4).into_iter().for_each(|i| {
|
|
|
+ let start_chr = if i < 2 { chrom[0] } else { chrom[1] };
|
|
|
+ let end_chr = chrom[i % 2];
|
|
|
+ let start = (start_sens, start_chr, start_pos);
|
|
|
+ let end = (end_sens, end_chr, end_pos);
|
|
|
+
|
|
|
+ let (oriented_graph, _integrated_graph, ways) =
|
|
|
+ genomic_graph.ways(start, end);
|
|
|
+
|
|
|
+ let dot = oriented_graph.dot_graph();
|
|
|
+ println!("dot\n{dot}");
|
|
|
+
|
|
|
+ for (_i, way) in ways.iter().enumerate() {
|
|
|
+ let s = way
|
|
|
+ .iter()
|
|
|
+ .map(|(_, _, _, s)| s.to_string())
|
|
|
+ .collect::<Vec<String>>()
|
|
|
+ .join("");
|
|
|
+ all_ways.push(s);
|
|
|
+ }
|
|
|
+ });
|
|
|
+ });
|
|
|
+ });
|
|
|
+ } else {
|
|
|
+ let start_chr = chrom[0];
|
|
|
+ let end_chr = chrom[0];
|
|
|
+ let start = (true, start_chr, 0);
|
|
|
+ let end = (true, end_chr, i32::MAX);
|
|
|
+
|
|
|
+ let (oriented_graph, _integrated_graph, ways) = genomic_graph.ways(start, end);
|
|
|
+
|
|
|
+ let dot = oriented_graph.dot_graph();
|
|
|
+ println!("dot\n{dot}");
|
|
|
+
|
|
|
+ for (_i, way) in ways.iter().enumerate() {
|
|
|
+ let s = way
|
|
|
+ .iter()
|
|
|
+ .map(|(_, _, _, s)| s.to_string())
|
|
|
+ .collect::<Vec<String>>()
|
|
|
+ .join("");
|
|
|
+ all_ways.push(s);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ all_ways.dedup();
|
|
|
+ all_ways
|
|
|
+ .iter()
|
|
|
+ .enumerate()
|
|
|
+ .for_each(|(i, s)| println!("{i}.\t{s}"));
|
|
|
+
|
|
|
+ // let s = Dot::new(&integrated_graph).to_string().replace("\\\"", "");
|
|
|
+ // let x11_colors: Vec<String> = vec![
|
|
|
+ // String::from("Red"),
|
|
|
+ // String::from("Green"),
|
|
|
+ // String::from("Blue"),
|
|
|
+ // String::from("Cyan"),
|
|
|
+ // String::from("Magenta"),
|
|
|
+ // String::from("Yellow"),
|
|
|
+ // String::from("DarkRed"),
|
|
|
+ // String::from("DarkGreen"),
|
|
|
+ // String::from("DarkBlue"),
|
|
|
+ // String::from("DarkCyan"),
|
|
|
+ // String::from("DarkMagenta"),
|
|
|
+ // String::from("DarkYellow"),
|
|
|
+ // String::from("LightRed"),
|
|
|
+ // String::from("LightGreen"),
|
|
|
+ // String::from("LightBlue"),
|
|
|
+ // String::from("LightCyan"),
|
|
|
+ // String::from("LightMagenta"),
|
|
|
+ // String::from("LightYellow"),
|
|
|
+ // String::from("Orange"),
|
|
|
+ // String::from("Brown"),
|
|
|
+ // String::from("Beige"),
|
|
|
+ // ];
|
|
|
+ // let mut s = s.clone();
|
|
|
+ // ways.iter().enumerate().for_each(|(i, _)| {
|
|
|
+ // s = s.replace(
|
|
|
+ // &format!("[ label = \"{}\" ]", i + 1),
|
|
|
+ // &format!(
|
|
|
+ // "[ label = \"{}\" color = \"{}\" ]",
|
|
|
+ // i + 1,
|
|
|
+ // x11_colors[i].to_string()
|
|
|
+ // ),
|
|
|
+ // );
|
|
|
+ // });
|
|
|
+ // println!("{s}");
|
|
|
+ //
|
|
|
+ // for (i, way) in ways.iter().enumerate() {
|
|
|
+ // let s = way
|
|
|
+ // .iter()
|
|
|
+ // .map(|(_, _, _, s)| s.to_string())
|
|
|
+ // .collect::<Vec<String>>()
|
|
|
+ // .join("");
|
|
|
+ // println!("{}.\t{s}", i + 1);
|
|
|
+ // }
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn dir() {
|
|
|
+ init();
|
|
|
+ let id = "ROBIN";
|
|
|
+ info!("This record will be captured by `cargo test`");
|
|
|
+
|
|
|
+ let dir = format!("/data/longreads_basic_pipe/{id}/diag/scan/reads",);
|
|
|
+
|
|
|
+ // Load from fasta in dir.
|
|
|
+ let genome = Genome::from_contigs_sequences(&dir).unwrap();
|
|
|
+ genome.stats();
|
|
|
+ let mut res: Vec<String> = genome
|
|
|
+ .iter()
|
|
|
+ .flat_map(|(_s, chrom)| {
|
|
|
+ chrom.iter().filter_map(|c| c.hgvs())
|
|
|
+ // .map(|c| println!("{c}"))
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+ res.sort();
|
|
|
+ res.dedup();
|
|
|
+ res.iter().for_each(|s| println!("{s}"));
|
|
|
+
|
|
|
+ // println!("{genome:#?}");
|
|
|
+ }
|
|
|
+}
|