|
|
@@ -1,1201 +0,0 @@
|
|
|
-pub mod breakpoint;
|
|
|
-pub mod genomic_graph;
|
|
|
-// mod phase;
|
|
|
-
|
|
|
-use anyhow::{anyhow, Ok, Result};
|
|
|
-use fasta::record::Sequence;
|
|
|
-use log::info;
|
|
|
-use minimap2::{Aligner, Mapping};
|
|
|
-use noodles_fasta as fasta;
|
|
|
-use num_format::{CustomFormat, Grouping, ToFormattedString, WriteFormatted};
|
|
|
-use petgraph::{dot::Dot, prelude::*};
|
|
|
-use rust_htslib::bam::{self, Record};
|
|
|
-use std::{
|
|
|
- collections::HashMap,
|
|
|
- fmt,
|
|
|
- fs::{self, File},
|
|
|
- io::{BufReader, BufWriter, Write},
|
|
|
- path::PathBuf,
|
|
|
- process::{Command, Stdio},
|
|
|
-};
|
|
|
-use uuid::Uuid;
|
|
|
-
|
|
|
-#[derive(Debug, Clone)]
|
|
|
-pub struct Genome {
|
|
|
- pub chromosomes: HashMap<String, Chromosome>,
|
|
|
-}
|
|
|
-
|
|
|
-#[derive(Debug, Clone)]
|
|
|
-pub struct Chromosome {
|
|
|
- contigs: Vec<Contig>,
|
|
|
-}
|
|
|
-
|
|
|
-#[derive(Debug, Clone)]
|
|
|
-pub struct Contig {
|
|
|
- pub id: String,
|
|
|
- // contig seq on ref
|
|
|
- pub mappings: Vec<Mapping>,
|
|
|
- // reads on ref
|
|
|
- pub supporting_records: Option<Vec<Record>>,
|
|
|
- pub sequence: String,
|
|
|
- pub contig_ref: ContigRef,
|
|
|
-}
|
|
|
-
|
|
|
-#[derive(Debug, Clone)]
|
|
|
-pub enum ContigRef {
|
|
|
- Unique(Mapping),
|
|
|
- Chimeric((Mapping, Mapping)),
|
|
|
- ChimericTriple((Mapping, Mapping, Mapping)),
|
|
|
- ChimericMultiple((Mapping, Vec<Mapping>, Mapping)),
|
|
|
- LeftAmbiguity((Vec<Mapping>, Mapping)),
|
|
|
- RightAmbiguity((Mapping, Vec<Mapping>)),
|
|
|
- Ambigous(Vec<Mapping>),
|
|
|
-}
|
|
|
-
|
|
|
-impl fmt::Display for ContigRef {
|
|
|
- fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
|
|
- let str = match self {
|
|
|
- ContigRef::Unique(m) => mapping_to_string(m),
|
|
|
- ContigRef::Chimeric((a, b)) => {
|
|
|
- format!("{}<->{}", mapping_to_string(a), mapping_to_string(b))
|
|
|
- }
|
|
|
- ContigRef::ChimericMultiple((a, v, b)) => format!(
|
|
|
- "{}<->{}<->{}",
|
|
|
- mapping_to_string(a),
|
|
|
- mappings_to_string(v),
|
|
|
- mapping_to_string(b)
|
|
|
- ),
|
|
|
- ContigRef::LeftAmbiguity((v, b)) => {
|
|
|
- format!("{}<->{}", mappings_to_string(v), mapping_to_string(b))
|
|
|
- }
|
|
|
- ContigRef::RightAmbiguity((a, v)) => {
|
|
|
- format!("{}<->{}", mapping_to_string(a), mappings_to_string(v))
|
|
|
- }
|
|
|
- ContigRef::Ambigous(v) => format!("{}", mappings_to_string(v)),
|
|
|
- ContigRef::ChimericTriple((a, b, c)) => format!(
|
|
|
- "{}<->{}<->{}",
|
|
|
- mapping_to_string(a),
|
|
|
- mapping_to_string(b),
|
|
|
- mapping_to_string(c)
|
|
|
- ),
|
|
|
- };
|
|
|
- fmt.write_str(&str).unwrap();
|
|
|
-
|
|
|
- std::result::Result::Ok(())
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-impl ContigRef {
|
|
|
- pub fn breakpoints(&self) -> Option<Vec<Mapping>> {
|
|
|
- match self {
|
|
|
- ContigRef::Unique(_) => None,
|
|
|
- ContigRef::Chimeric((a, b)) => Some(vec![a.clone(), b.clone()]),
|
|
|
- ContigRef::ChimericTriple((a, b, c)) => Some(vec![a.clone(), b.clone(), c.clone()]),
|
|
|
- ContigRef::ChimericMultiple(_) => None,
|
|
|
- ContigRef::LeftAmbiguity(_) => None,
|
|
|
- ContigRef::RightAmbiguity(_) => None,
|
|
|
- ContigRef::Ambigous(_) => None,
|
|
|
- }
|
|
|
- }
|
|
|
- pub fn breakpoints_repr(&self) -> Option<Vec<String>> {
|
|
|
- let left = "►";
|
|
|
- let right = "◄";
|
|
|
- let bp_right = "▐";
|
|
|
- let bp_left = "▌";
|
|
|
- let format = CustomFormat::builder()
|
|
|
- .grouping(Grouping::Standard)
|
|
|
- .minus_sign("-")
|
|
|
- .separator("_")
|
|
|
- .build()
|
|
|
- .unwrap();
|
|
|
-
|
|
|
- let get_sign = |m: &Mapping| -> &str {
|
|
|
- match m.strand {
|
|
|
- minimap2::Strand::Forward => left,
|
|
|
- minimap2::Strand::Reverse => right,
|
|
|
- }
|
|
|
- };
|
|
|
-
|
|
|
- let uk = "UNKNOWN".to_string();
|
|
|
- let mut res = Vec::new();
|
|
|
- if let Some(breakpoints) = self.breakpoints() {
|
|
|
- for v in breakpoints.windows(2) {
|
|
|
- let mut bp_string = format!("{}:", v[0].target_name.clone().unwrap_or(uk.clone()));
|
|
|
- let _ = bp_string
|
|
|
- .write_formatted(&v[0].target_end, &format)
|
|
|
- .unwrap();
|
|
|
- bp_string = format!(
|
|
|
- "{bp_string}{}{bp_left}{bp_right}{}{}:",
|
|
|
- get_sign(&v[0]),
|
|
|
- get_sign(&v[1]),
|
|
|
- v[1].target_name.clone().unwrap_or(uk.clone())
|
|
|
- );
|
|
|
- let _ = bp_string
|
|
|
- .write_formatted(&v[1].target_start, &format)
|
|
|
- .unwrap();
|
|
|
- res.push(bp_string);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if !res.is_empty() {
|
|
|
- Some(res)
|
|
|
- } else {
|
|
|
- None
|
|
|
- }
|
|
|
- }
|
|
|
- pub fn desc(&self) -> Option<String> {
|
|
|
- let uk = "UNKNOWN".to_string();
|
|
|
- let to_desc = |v: &mut Vec<Mapping>| -> String {
|
|
|
- v.sort_by(|a, b| a.query_start.cmp(&b.query_start));
|
|
|
- let v: Vec<String> = v
|
|
|
- .into_iter()
|
|
|
- .map(|e| {
|
|
|
- let strand = match e.strand {
|
|
|
- minimap2::Strand::Forward => "",
|
|
|
- minimap2::Strand::Reverse => "_rev",
|
|
|
- };
|
|
|
- format!(
|
|
|
- "{}:{}_{}{}",
|
|
|
- e.target_name.clone().unwrap_or(uk.clone()),
|
|
|
- e.target_start,
|
|
|
- e.target_end,
|
|
|
- strand
|
|
|
- )
|
|
|
- })
|
|
|
- .collect();
|
|
|
- format!("[{}]", v.join(";"))
|
|
|
- };
|
|
|
-
|
|
|
- match self {
|
|
|
- ContigRef::Unique(a) => Some(format!(
|
|
|
- "{}:{}_{}",
|
|
|
- a.target_name.clone().unwrap_or(uk.clone()),
|
|
|
- a.target_start,
|
|
|
- a.target_end
|
|
|
- )),
|
|
|
- ContigRef::Chimeric((a, b)) => Some(to_desc(&mut vec![a.to_owned(), b.to_owned()])),
|
|
|
- ContigRef::ChimericTriple((a, b, c)) => {
|
|
|
- Some(to_desc(&mut vec![a.to_owned(), b.to_owned(), c.to_owned()]))
|
|
|
- }
|
|
|
- ContigRef::ChimericMultiple(_) => None,
|
|
|
- ContigRef::LeftAmbiguity(_) => None,
|
|
|
- ContigRef::RightAmbiguity(_) => None,
|
|
|
- ContigRef::Ambigous(a) => Some(to_desc(&mut a.to_owned())),
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- pub fn hgvs(&self) -> Option<String> {
|
|
|
- let uk = "UNKNOWN".to_string();
|
|
|
- match self {
|
|
|
- ContigRef::Unique(_) => None,
|
|
|
- ContigRef::Chimeric((a, b)) => {
|
|
|
- if a.target_name == b.target_name {
|
|
|
- let chr = a.target_name.clone().unwrap_or(uk.clone());
|
|
|
- let del_start = a.target_end;
|
|
|
- let del_end = b.target_start;
|
|
|
- let hgvs = format!("{chr}:{del_start}_{del_end}");
|
|
|
- Some(hgvs)
|
|
|
- } else {
|
|
|
- let a_chr = a.target_name.clone().unwrap_or(uk.clone());
|
|
|
- let a_bp = a.target_end;
|
|
|
- let b_chr = b.target_name.clone().unwrap_or(uk.clone());
|
|
|
- let b_bp = b.target_end;
|
|
|
- let hgvs = format!("{a_chr}:{a_bp}delins[{b_chr}:{b_bp}]");
|
|
|
- Some(hgvs)
|
|
|
- }
|
|
|
- }
|
|
|
- ContigRef::ChimericMultiple(_) => None,
|
|
|
- ContigRef::LeftAmbiguity(_) => None,
|
|
|
- ContigRef::RightAmbiguity(_) => None,
|
|
|
- ContigRef::Ambigous(_) => None,
|
|
|
- ContigRef::ChimericTriple((a, b, c)) => {
|
|
|
- let mut v = [a, b, c];
|
|
|
- v.sort_by(|a, b| a.query_start.cmp(&b.query_start));
|
|
|
- let (a, b, c) = (
|
|
|
- *v.get(0).clone().unwrap(),
|
|
|
- *v.get(1).clone().unwrap(),
|
|
|
- *v.get(2).clone().unwrap(),
|
|
|
- );
|
|
|
- let a_target_name = a.target_name.clone().unwrap_or(uk.clone());
|
|
|
- let b_target_name = b.target_name.clone().unwrap_or(uk.clone());
|
|
|
- let c_target_name = c.target_name.clone().unwrap_or(uk.clone());
|
|
|
-
|
|
|
- // if a_target_name != b_target_name {}
|
|
|
-
|
|
|
- // Insertions
|
|
|
- // prioritize first len
|
|
|
- let (bp_a_1, bp_a_2) = if a.query_end <= b.query_end {
|
|
|
- // TODO add inserted nt
|
|
|
- (
|
|
|
- (a.target_name.clone().unwrap_or(uk.clone()), a.target_end),
|
|
|
- (b.target_name.clone().unwrap_or(uk.clone()), b.target_start),
|
|
|
- )
|
|
|
- } else {
|
|
|
- let diff = a.query_end - b.query_start;
|
|
|
- (
|
|
|
- (a.target_name.clone().unwrap_or(uk.clone()), a.target_end),
|
|
|
- (
|
|
|
- b.target_name.clone().unwrap_or(uk.clone()),
|
|
|
- b.target_start + diff,
|
|
|
- ),
|
|
|
- )
|
|
|
- };
|
|
|
- let (bp_b_1, bp_b_2) = if b.query_end <= c.query_end {
|
|
|
- // TODO add inserted nt
|
|
|
- (
|
|
|
- (b.target_name.clone().unwrap_or(uk.clone()), b.target_end),
|
|
|
- (c.target_name.clone().unwrap_or(uk.clone()), c.target_start),
|
|
|
- )
|
|
|
- } else {
|
|
|
- let diff = b.query_end - c.query_start;
|
|
|
- (
|
|
|
- (b.target_name.clone().unwrap_or(uk.clone()), b.target_end),
|
|
|
- (
|
|
|
- c.target_name.clone().unwrap_or(uk.clone()),
|
|
|
- c.target_start + diff,
|
|
|
- ),
|
|
|
- )
|
|
|
- };
|
|
|
- if bp_a_1.0 == bp_b_2.0 {
|
|
|
- let hgvs = format!(
|
|
|
- "{}:{}_{}ins[{}:{}_{}]",
|
|
|
- bp_a_1.0, bp_a_1.1, bp_b_2.1, bp_a_2.0, bp_a_2.1, bp_b_1.1
|
|
|
- );
|
|
|
- Some(hgvs)
|
|
|
- } else {
|
|
|
- None
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-pub fn mapping_to_string(mapping: &Mapping) -> String {
|
|
|
- let uk = "UNKNOWN".to_string();
|
|
|
- format!(
|
|
|
- "{}:{}-{}({}:{}-{})",
|
|
|
- mapping.target_name.clone().unwrap_or(uk.clone()),
|
|
|
- mapping.target_start,
|
|
|
- mapping.target_end,
|
|
|
- mapping.query_name.clone().unwrap_or(uk),
|
|
|
- mapping.query_start,
|
|
|
- mapping.query_end
|
|
|
- )
|
|
|
-}
|
|
|
-
|
|
|
-fn mappings_to_string(mappings: &Vec<Mapping>) -> String {
|
|
|
- let v = mappings
|
|
|
- .iter()
|
|
|
- .map(mapping_to_string)
|
|
|
- .collect::<Vec<String>>();
|
|
|
- v.join("//")
|
|
|
-}
|
|
|
-
|
|
|
-pub fn get_ref_pos(mappings: Vec<Mapping>) -> Result<ContigRef> {
|
|
|
- let mut mappings = mappings;
|
|
|
- mappings.sort_by(|a, b| a.query_start.cmp(&b.query_start));
|
|
|
-
|
|
|
- if mappings.len() == 1 {
|
|
|
- return Ok(ContigRef::Unique(mappings.get(0).unwrap().clone()));
|
|
|
- } else {
|
|
|
- let mut grouped: Vec<Vec<Mapping>> = group_mappings(&mut mappings)?;
|
|
|
- // let mut grouped: VecDeque<Vec<Mapping>> = group_mappings(&mut mappings)?.into();
|
|
|
-
|
|
|
- if grouped.len() == 1 {
|
|
|
- let r = grouped.into_iter().flat_map(|e| e).collect();
|
|
|
- return Ok(ContigRef::Ambigous(r));
|
|
|
- } else if grouped.len() >= 2 {
|
|
|
- // let first = grouped.pop_back().unwrap();
|
|
|
- // let last = grouped.pop_front().unwrap();
|
|
|
- let first = grouped.first().unwrap().to_vec();
|
|
|
- let last = grouped.last().unwrap().to_vec();
|
|
|
- grouped.remove(0);
|
|
|
- grouped.remove(grouped.len() - 1);
|
|
|
- assert!(first[0].query_start < last[0].query_start);
|
|
|
-
|
|
|
- if grouped.len() == 0 {
|
|
|
- if first.len() == 1 && last.len() == 1 {
|
|
|
- return Ok(ContigRef::Chimeric((
|
|
|
- first.get(0).unwrap().clone(),
|
|
|
- last.get(0).unwrap().clone(),
|
|
|
- )));
|
|
|
- } else if first.len() == 1 {
|
|
|
- return Ok(ContigRef::RightAmbiguity((
|
|
|
- first.get(0).unwrap().clone(),
|
|
|
- last.clone(),
|
|
|
- )));
|
|
|
- } else if last.len() == 1 {
|
|
|
- return Ok(ContigRef::LeftAmbiguity((
|
|
|
- first.clone(),
|
|
|
- last.get(0).unwrap().clone(),
|
|
|
- )));
|
|
|
- } else {
|
|
|
- let all: Vec<Mapping> = vec![first, last].into_iter().flat_map(|e| e).collect();
|
|
|
- return Ok(ContigRef::Ambigous(all));
|
|
|
- }
|
|
|
- }
|
|
|
- if first.len() == 1 && last.len() == 1 {
|
|
|
- if grouped.len() == 1 {
|
|
|
- return Ok(ContigRef::ChimericTriple((
|
|
|
- first.get(0).unwrap().clone(),
|
|
|
- grouped.get(0).unwrap().get(0).unwrap().clone(),
|
|
|
- last.get(0).unwrap().clone(),
|
|
|
- )));
|
|
|
- } else {
|
|
|
- return Ok(ContigRef::ChimericMultiple((
|
|
|
- first.get(0).unwrap().clone(),
|
|
|
- grouped.into_iter().flat_map(|e| e).collect(),
|
|
|
- last.get(0).unwrap().clone(),
|
|
|
- )));
|
|
|
- }
|
|
|
- } else if first.len() == 1 {
|
|
|
- let right: Vec<Mapping> = vec![grouped.into_iter().flat_map(|e| e).collect(), last]
|
|
|
- .into_iter()
|
|
|
- .flat_map(|e| e)
|
|
|
- .collect();
|
|
|
- return Ok(ContigRef::RightAmbiguity((
|
|
|
- first.get(0).unwrap().clone(),
|
|
|
- right,
|
|
|
- )));
|
|
|
- } else if last.len() == 1 {
|
|
|
- let left: Vec<Mapping> = vec![first, grouped.into_iter().flat_map(|e| e).collect()]
|
|
|
- .into_iter()
|
|
|
- .flat_map(|e| e)
|
|
|
- .collect();
|
|
|
- return Ok(ContigRef::LeftAmbiguity((
|
|
|
- left,
|
|
|
- last.get(0).unwrap().clone(),
|
|
|
- )));
|
|
|
- } else {
|
|
|
- let all: Vec<Mapping> =
|
|
|
- vec![first, grouped.into_iter().flat_map(|e| e).collect(), last]
|
|
|
- .into_iter()
|
|
|
- .flat_map(|e| e)
|
|
|
- .collect();
|
|
|
- return Ok(ContigRef::Ambigous(all));
|
|
|
- }
|
|
|
- } else {
|
|
|
- return Ok(ContigRef::Ambigous(
|
|
|
- grouped.into_iter().flat_map(|e| e).collect(),
|
|
|
- ));
|
|
|
- }
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-impl Genome {
|
|
|
- pub fn new() -> Self {
|
|
|
- Genome {
|
|
|
- chromosomes: HashMap::new(),
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, Chromosome> {
|
|
|
- self.chromosomes.iter()
|
|
|
- }
|
|
|
-
|
|
|
- pub fn contigs(&self) -> impl Iterator<Item = &Contig> {
|
|
|
- self.chromosomes.iter().flat_map(|(_, e)| e.iter())
|
|
|
- }
|
|
|
-
|
|
|
- pub fn add_contig(
|
|
|
- &mut self,
|
|
|
- id: String,
|
|
|
- mappings: Vec<Mapping>,
|
|
|
- supporting_records: Option<Vec<Record>>,
|
|
|
- sequence: String,
|
|
|
- ) -> Result<()> {
|
|
|
- let mut mappings = mappings;
|
|
|
- mappings.sort_by(|a, b| a.query_start.cmp(&b.query_start));
|
|
|
- let new_contig = Contig {
|
|
|
- id,
|
|
|
- mappings: mappings.clone(),
|
|
|
- supporting_records,
|
|
|
- sequence,
|
|
|
- contig_ref: get_ref_pos(mappings)?,
|
|
|
- };
|
|
|
- // get the category of Mapping
|
|
|
- match new_contig.contig_ref.clone() {
|
|
|
- ContigRef::Unique(contig_mapping) => {
|
|
|
- match self
|
|
|
- .chromosomes
|
|
|
- .get_mut(&contig_mapping.target_name.unwrap())
|
|
|
- {
|
|
|
- Some(chromosome) => {
|
|
|
- chromosome.contigs.push(new_contig);
|
|
|
- }
|
|
|
- None => (),
|
|
|
- }
|
|
|
- }
|
|
|
- ContigRef::Chimeric((a, b)) => {
|
|
|
- let a_target_name = a.target_name.unwrap();
|
|
|
- let b_target_name = b.target_name.unwrap();
|
|
|
- if a_target_name == b_target_name {
|
|
|
- if let Some(chromosome) = self.chromosomes.get_mut(&a_target_name) {
|
|
|
- chromosome.contigs.push(new_contig);
|
|
|
- } else {
|
|
|
- self.chromosomes.insert(
|
|
|
- a_target_name,
|
|
|
- Chromosome {
|
|
|
- contigs: vec![new_contig],
|
|
|
- },
|
|
|
- );
|
|
|
- }
|
|
|
- } else {
|
|
|
- let chimeric_name = format!("{}-{}", a_target_name, b_target_name);
|
|
|
- if let Some(chromosome) = self.chromosomes.get_mut(&chimeric_name) {
|
|
|
- chromosome.contigs.push(new_contig);
|
|
|
- } else {
|
|
|
- self.chromosomes.insert(
|
|
|
- chimeric_name,
|
|
|
- Chromosome {
|
|
|
- contigs: vec![new_contig],
|
|
|
- },
|
|
|
- );
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- ContigRef::ChimericMultiple((left, _, right)) => {
|
|
|
- let left_target_name = left.target_name.unwrap();
|
|
|
- let right_target_name = right.target_name.unwrap();
|
|
|
- if left_target_name == right_target_name {
|
|
|
- if let Some(chromosome) = self.chromosomes.get_mut(&left_target_name) {
|
|
|
- chromosome.contigs.push(new_contig);
|
|
|
- } else {
|
|
|
- self.chromosomes.insert(
|
|
|
- left_target_name,
|
|
|
- Chromosome {
|
|
|
- contigs: vec![new_contig],
|
|
|
- },
|
|
|
- );
|
|
|
- }
|
|
|
- } else {
|
|
|
- let chimeric_name = format!("{}-{}", left_target_name, right_target_name);
|
|
|
- if let Some(chromosome) = self.chromosomes.get_mut(&chimeric_name) {
|
|
|
- chromosome.contigs.push(new_contig);
|
|
|
- } else {
|
|
|
- self.chromosomes.insert(
|
|
|
- chimeric_name,
|
|
|
- Chromosome {
|
|
|
- contigs: vec![new_contig],
|
|
|
- },
|
|
|
- );
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- _ => {
|
|
|
- if let Some(chromosome) = self.chromosomes.get_mut("Ambigous") {
|
|
|
- chromosome.contigs.push(new_contig);
|
|
|
- } else {
|
|
|
- self.chromosomes.insert(
|
|
|
- "Ambigous".to_string(),
|
|
|
- Chromosome {
|
|
|
- contigs: vec![new_contig],
|
|
|
- },
|
|
|
- );
|
|
|
- }
|
|
|
- }
|
|
|
- };
|
|
|
-
|
|
|
- Ok(())
|
|
|
- }
|
|
|
-
|
|
|
- pub fn add_contig_from_seq(
|
|
|
- &mut self,
|
|
|
- name: String,
|
|
|
- sequence: &[u8],
|
|
|
- aligner: impl Fn(String) -> Result<Vec<Mapping>>,
|
|
|
- ) -> Result<()> {
|
|
|
- let mappings = aligner(String::from_utf8(sequence.to_vec())?)?;
|
|
|
- // println!("{mappings:?}");
|
|
|
- self.add_contig(name, mappings, None, String::from_utf8(sequence.to_vec())?)?;
|
|
|
- Ok(())
|
|
|
- }
|
|
|
-
|
|
|
- pub fn write_contigs_sequences(&self, dir: &str) {
|
|
|
- for contig in self.contigs() {
|
|
|
- contig.write_fasta(&format!("{dir}/{}.fasta", contig.id))
|
|
|
- }
|
|
|
- // self.iter().for_each(|(_, chr)| {
|
|
|
- // chr.iter().for_each(|c| c.write_fasta(&format!("{dir}/{}.fasta", c.id)))
|
|
|
- // });
|
|
|
- }
|
|
|
-
|
|
|
- pub fn from_contigs_sequences(dir: &str) -> Result<Self> {
|
|
|
- let aligner_url = "http://localhost:4444/align";
|
|
|
- let aligner = aligner_client::dist_align(aligner_url.to_string());
|
|
|
- let mut genome = Self::new();
|
|
|
- // let paths = get_ext_paths(dir, "fasta")?;
|
|
|
- let paths = get_contigs_fa_paths(dir)?;
|
|
|
- for path in paths {
|
|
|
- let fa = read_fasta(path.to_str().unwrap())?;
|
|
|
- for (name, sequence) in fa {
|
|
|
- genome.add_contig_from_seq(name, sequence.as_ref(), &aligner)?;
|
|
|
- }
|
|
|
- }
|
|
|
- Ok(genome)
|
|
|
- }
|
|
|
- //
|
|
|
- // pub fn from_dir_bed(dir: &str) {
|
|
|
- //
|
|
|
- // }
|
|
|
-
|
|
|
- // pub fn write_records(&self, file: &str) {
|
|
|
- // let mut records = Vec::new();
|
|
|
- // for (name, chromosome) in self.chromosomes.iter() {
|
|
|
- // for contig in chromosome.iter() {
|
|
|
- // if let Some(rec) = &contig.supporting_records {
|
|
|
- // records.extend(rec.iter());
|
|
|
- // };
|
|
|
- // }
|
|
|
- // }
|
|
|
- // // header
|
|
|
- //
|
|
|
- // // writer
|
|
|
- // rust_htslib::bam::Writer::from_path(path, header, format)
|
|
|
- // }
|
|
|
-
|
|
|
- pub fn stats(&self) {
|
|
|
- for (k, v) in self.chromosomes.iter() {
|
|
|
- info!("{}:{}", k, v.contigs.len());
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- pub fn chromosome(&self, chromosome: &str) -> Option<Vec<Contig>> {
|
|
|
- self.chromosomes
|
|
|
- .get(chromosome)
|
|
|
- .map(|chr| chr.contigs.clone())
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-impl Chromosome {
|
|
|
- pub fn iter(&self) -> std::slice::Iter<'_, Contig> {
|
|
|
- self.contigs.iter()
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-impl Contig {
|
|
|
- // pub fn sort(&mut self) {
|
|
|
- // // sorting by target order
|
|
|
- // self.mappings
|
|
|
- // .sort_by(|a, b| a.target_start.cmp(&b.target_start));
|
|
|
- // }
|
|
|
-
|
|
|
- pub fn to_igv(&self, dir_path: &str) -> Result<()> {
|
|
|
- let supporting_records = self.supporting_records.clone().ok_or(anyhow!("no reads"))?;
|
|
|
- let contig_name = if let Some(hgvs) = self.hgvs() {
|
|
|
- hgvs
|
|
|
- } else {
|
|
|
- self.id.clone()
|
|
|
- };
|
|
|
- let contig_dir = format!("{dir_path}/{contig_name}");
|
|
|
- fs::create_dir_all(contig_dir.clone())?;
|
|
|
-
|
|
|
- let fasta_path = format!("{contig_dir}/contig.fa");
|
|
|
- write_fasta(&fasta_path, &vec![(self.id.clone(), self.sequence.clone())]);
|
|
|
- write_fai(&fasta_path);
|
|
|
-
|
|
|
- let reads_path = format!("{contig_dir}/reads.fa");
|
|
|
- let n_reads = supporting_records
|
|
|
- .clone()
|
|
|
- .into_iter()
|
|
|
- .map(|r| {
|
|
|
- (
|
|
|
- String::from_utf8(r.qname().to_vec()).unwrap(),
|
|
|
- String::from_utf8(r.seq().as_bytes()).unwrap(),
|
|
|
- )
|
|
|
- })
|
|
|
- .collect();
|
|
|
- write_fasta(&reads_path, &n_reads);
|
|
|
-
|
|
|
- let bam_path = format!("{contig_dir}/{}.bam", self.id);
|
|
|
- write_bam(&fasta_path, &reads_path, &bam_path)?;
|
|
|
-
|
|
|
- let bed_path = format!("{contig_dir}/contig.bed");
|
|
|
- match &self.contig_ref {
|
|
|
- ContigRef::Chimeric((a, b)) => {
|
|
|
- let d = vec![
|
|
|
- (
|
|
|
- self.id.clone(),
|
|
|
- a.query_start,
|
|
|
- a.query_end,
|
|
|
- format!(
|
|
|
- "{}:{}-{}",
|
|
|
- a.target_name.clone().unwrap(),
|
|
|
- a.target_start,
|
|
|
- a.target_end
|
|
|
- ),
|
|
|
- ),
|
|
|
- (
|
|
|
- self.id.clone(),
|
|
|
- b.query_start,
|
|
|
- b.query_end,
|
|
|
- format!(
|
|
|
- "{}:{}-{}",
|
|
|
- b.target_name.clone().unwrap(),
|
|
|
- b.target_start,
|
|
|
- b.target_end
|
|
|
- ),
|
|
|
- ),
|
|
|
- ];
|
|
|
- write_bed(&bed_path, &d)?;
|
|
|
- }
|
|
|
- ContigRef::ChimericTriple((a, b, c)) => {
|
|
|
- let d: Vec<(String, i32, i32, String)> = [a, b, c]
|
|
|
- .iter()
|
|
|
- .map(|r| {
|
|
|
- (
|
|
|
- self.id.clone(),
|
|
|
- r.query_start,
|
|
|
- r.query_end,
|
|
|
- format!(
|
|
|
- "{}:{}-{}",
|
|
|
- r.target_name.clone().unwrap(),
|
|
|
- r.target_start,
|
|
|
- r.target_end
|
|
|
- ),
|
|
|
- )
|
|
|
- })
|
|
|
- .collect();
|
|
|
- write_bed(&bed_path, &d)?;
|
|
|
- }
|
|
|
- _ => (),
|
|
|
- }
|
|
|
-
|
|
|
- Ok(())
|
|
|
- }
|
|
|
-
|
|
|
- // bug cigar len != seq len
|
|
|
- // pub fn write_bam(&self, path: &str) -> Result<()> {
|
|
|
- // let aligner = Aligner::builder()
|
|
|
- // .asm5()
|
|
|
- // .with_threads(8)
|
|
|
- // .with_cigar()
|
|
|
- // .with_seq(self.sequence.as_bytes())
|
|
|
- // .expect("Unable to build index");
|
|
|
- //
|
|
|
- // let mut mappings = Vec::new();
|
|
|
- // let supporting_records = self
|
|
|
- // .supporting_records
|
|
|
- // .clone()
|
|
|
- // .ok_or(anyhow!("no supporting records"))?;
|
|
|
- // for record in supporting_records.iter() {
|
|
|
- // let seq = record.seq().as_bytes();
|
|
|
- // let alignment = aligner
|
|
|
- // .map(&seq, false, false, None, None)
|
|
|
- // .expect("Unable to align");
|
|
|
- // mappings.push(alignment);
|
|
|
- // }
|
|
|
- // let mut mappings: Vec<_> = mappings.into_iter().flatten().collect();
|
|
|
- // mappings.sort_by(|a, b| a.target_start.cmp(&b.target_start));
|
|
|
- //
|
|
|
- // let mut header = bam::Header::new();
|
|
|
- // let mut sq_record = rust_htslib::bam::header::HeaderRecord::new(b"SQ");
|
|
|
- // sq_record.push_tag(b"SN", self.id.clone());
|
|
|
- // sq_record.push_tag(b"LN", self.sequence.len());
|
|
|
- // header.push_record(&sq_record);
|
|
|
- //
|
|
|
- // let mut out = bam::Writer::from_path(path, &header, bam::Format::Bam).unwrap();
|
|
|
- //
|
|
|
- // // copy reverse reads to new BAM file
|
|
|
- // for mapping in mappings.iter() {
|
|
|
- // let record = minimap2::htslib::mapping_to_record(
|
|
|
- // Some(mapping),
|
|
|
- // self.sequence.as_bytes(),
|
|
|
- // header.clone(),
|
|
|
- // None,
|
|
|
- // Some(Uuid::new_v4().as_bytes()),
|
|
|
- // );
|
|
|
- // let _ = out.write(&record);
|
|
|
- // }
|
|
|
- // rust_htslib::bam::index::build(path, None, rust_htslib::bam::index::Type::Bai, 1)?;
|
|
|
- // Ok(())
|
|
|
- // }
|
|
|
-
|
|
|
- pub fn hgvs(&self) -> Option<String> {
|
|
|
- self.contig_ref.hgvs()
|
|
|
- }
|
|
|
-
|
|
|
- pub fn desc(&self) -> Option<String> {
|
|
|
- self.contig_ref.desc()
|
|
|
- }
|
|
|
-
|
|
|
- pub fn breakpoints_repr(&self) -> Option<Vec<String>> {
|
|
|
- self.contig_ref.breakpoints_repr()
|
|
|
- }
|
|
|
-
|
|
|
- pub fn write_fasta(&self, fasta_path: &str) {
|
|
|
- write_fasta(fasta_path, &vec![(self.id.clone(), self.sequence.clone())]);
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-fn group_mappings(mappings: &mut [Mapping]) -> Result<Vec<Vec<Mapping>>> {
|
|
|
- // sort alignments by query_start
|
|
|
- mappings.sort_by(|a, b| a.query_start.cmp(&b.query_start));
|
|
|
-
|
|
|
- let mut alignments: Vec<Vec<Mapping>> = vec![];
|
|
|
- // group by overlapps > 30
|
|
|
- for aln in mappings.iter() {
|
|
|
- let last = alignments.last_mut();
|
|
|
- if let Some(l) = last {
|
|
|
- if l.iter()
|
|
|
- .filter(|a| a.query_end - aln.query_start > 30)
|
|
|
- .count()
|
|
|
- > 0
|
|
|
- {
|
|
|
- l.push(aln.clone());
|
|
|
- } else {
|
|
|
- alignments.push(vec![aln.clone()]);
|
|
|
- }
|
|
|
- } else {
|
|
|
- alignments.push(vec![aln.clone()]);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- Ok(alignments)
|
|
|
-}
|
|
|
-
|
|
|
-pub fn write_bed(path: &str, d: &[(String, i32, i32, String)]) -> Result<()> {
|
|
|
- let file = File::create(path).unwrap();
|
|
|
- let mut writer = BufWriter::new(file);
|
|
|
- for (chr, start, end, value) in d.iter() {
|
|
|
- let row = format!(
|
|
|
- "{}\n",
|
|
|
- [
|
|
|
- chr.to_string(),
|
|
|
- start.to_string(),
|
|
|
- end.to_string(),
|
|
|
- value.to_string()
|
|
|
- ]
|
|
|
- .join("\t")
|
|
|
- );
|
|
|
- writer.write_all(row.as_bytes())?;
|
|
|
- }
|
|
|
- Ok(())
|
|
|
-}
|
|
|
-
|
|
|
-// unique
|
|
|
-pub fn write_fastq(fastq_path: &str, d: &Vec<Record>) -> Result<()> {
|
|
|
- let file = File::create(fastq_path)?;
|
|
|
- let mut writer = BufWriter::new(file);
|
|
|
- for record in d {
|
|
|
- let name = String::from_utf8(record.qname().to_vec()).unwrap();
|
|
|
- writer.write_all(format!("@{name}\n").as_bytes())?;
|
|
|
- let seq = record.seq().as_bytes();
|
|
|
- writer.write_all(&seq)?;
|
|
|
- writer.write_all(b"\n+\n")?;
|
|
|
- let qual = record.qual();
|
|
|
- writer.write_all(qual)?;
|
|
|
- }
|
|
|
- Ok(())
|
|
|
-}
|
|
|
-
|
|
|
-pub fn write_fasta(fasta_path: &str, d: &Vec<(String, String)>) {
|
|
|
- let file = File::create(fasta_path).unwrap();
|
|
|
- let mut writer = fasta::writer::Builder::default().build_with_writer(file);
|
|
|
- let mut passed = Vec::new();
|
|
|
- for (name, sequence) in d {
|
|
|
- let name = name.to_string();
|
|
|
- if sequence.is_empty() {
|
|
|
- continue;
|
|
|
- }
|
|
|
- if passed.contains(&name) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- passed.push(name.clone());
|
|
|
- let record = fasta::Record::new(
|
|
|
- fasta::record::Definition::new(name.as_str(), None),
|
|
|
- fasta::record::Sequence::from(sequence.as_bytes().to_vec()),
|
|
|
- );
|
|
|
- writer.write_record(&record).unwrap();
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-pub fn write_fai(path: &str) {
|
|
|
- let mut faidx = Command::new("samtools")
|
|
|
- .arg("faidx")
|
|
|
- .arg(path)
|
|
|
- .spawn()
|
|
|
- .expect("Samtools faidx failed to start");
|
|
|
- faidx.wait().unwrap();
|
|
|
-}
|
|
|
-
|
|
|
-pub fn write_bam(ref_path: &str, reads_path: &str, bam_path: &str) -> Result<()> {
|
|
|
- let rg_id = uuid::Uuid::new_v4();
|
|
|
-
|
|
|
- let mm2 = Command::new("minimap2")
|
|
|
- .arg("-t")
|
|
|
- .arg("128")
|
|
|
- .arg("-ax")
|
|
|
- .arg("map-ont")
|
|
|
- .arg("-R")
|
|
|
- .arg(format!(
|
|
|
- "@RG\\tPL:ONTASM_PROM\\tID:ONTASM_{rg_id}\\tSM:{rg_id}\\tLB:ONTASM_NB_PROM"
|
|
|
- ))
|
|
|
- .arg(ref_path)
|
|
|
- .arg(reads_path)
|
|
|
- .stdout(Stdio::piped())
|
|
|
- .stderr(Stdio::piped())
|
|
|
- .spawn()
|
|
|
- .expect("Minimap2 failed to start");
|
|
|
-
|
|
|
- let view = Command::new("sambamba")
|
|
|
- .arg("view")
|
|
|
- .arg("-h")
|
|
|
- .arg("-S")
|
|
|
- .arg("-t")
|
|
|
- .arg("20")
|
|
|
- .arg("--format=bam")
|
|
|
- .arg("/dev/stdin")
|
|
|
- .stdin(Stdio::from(mm2.stdout.unwrap()))
|
|
|
- .stdout(Stdio::piped())
|
|
|
- .stderr(Stdio::piped())
|
|
|
- .spawn()
|
|
|
- .expect("Sambamba view failed to start");
|
|
|
-
|
|
|
- let mut sort = Command::new("sambamba")
|
|
|
- .arg("sort")
|
|
|
- .arg("-t")
|
|
|
- .arg("20")
|
|
|
- .arg("/dev/stdin")
|
|
|
- .arg("-o")
|
|
|
- .arg(bam_path)
|
|
|
- .stderr(Stdio::piped())
|
|
|
- .stdin(Stdio::from(view.stdout.unwrap()))
|
|
|
- .spawn()
|
|
|
- .expect("Sambamba sort failed to start");
|
|
|
-
|
|
|
- sort.wait().unwrap();
|
|
|
- Ok(())
|
|
|
-}
|
|
|
-
|
|
|
-pub fn read_fasta(path: &str) -> Result<Vec<(String, Sequence)>> {
|
|
|
- let mut reader = File::open(path)
|
|
|
- .map(BufReader::new)
|
|
|
- .map(fasta::Reader::new)?;
|
|
|
-
|
|
|
- let mut res = Vec::new();
|
|
|
- for result in reader.records() {
|
|
|
- let record = result?;
|
|
|
- let u = String::from_utf8(record.name().to_vec())?;
|
|
|
- let s = record.sequence().to_owned();
|
|
|
- res.push((u, s));
|
|
|
- }
|
|
|
-
|
|
|
- Ok(res)
|
|
|
-}
|
|
|
-
|
|
|
-// fn get_ext_paths(dir: &str, ext: &str) -> Result<Vec<PathBuf>> {
|
|
|
-// let paths = std::fs::read_dir(dir)?
|
|
|
-// // Filter out all those directory entries which couldn't be read
|
|
|
-// .filter_map(|res| res.ok())
|
|
|
-// // Map the directory entries to paths
|
|
|
-// .map(|dir_entry| dir_entry.path())
|
|
|
-// // Filter out all paths with extensions other than `csv`
|
|
|
-// .filter_map(|path| {
|
|
|
-// if path.extension().map_or(false, |xt| xt == ext) {
|
|
|
-// Some(path)
|
|
|
-// } else {
|
|
|
-// None
|
|
|
-// }
|
|
|
-// })
|
|
|
-// .collect::<Vec<_>>();
|
|
|
-// Ok(paths)
|
|
|
-// }
|
|
|
-
|
|
|
-fn get_contigs_fa_paths(dir: &str) -> Result<Vec<PathBuf>> {
|
|
|
- let pattern = format!("{}/**/*_flye.fa", dir);
|
|
|
- let fa_paths: Vec<PathBuf> = glob::glob(&pattern)
|
|
|
- .expect("Failed to read glob pattern")
|
|
|
- .filter_map(Result::ok)
|
|
|
- .collect();
|
|
|
-
|
|
|
- Ok(fa_paths)
|
|
|
-}
|
|
|
-
|
|
|
-pub fn dot_graph(
|
|
|
- graph: &StableGraph<String, String>,
|
|
|
- way: &[NodeIndex],
|
|
|
- value: &str,
|
|
|
- color: &str,
|
|
|
- erase: bool,
|
|
|
-) -> String {
|
|
|
- let mut g = graph.clone();
|
|
|
-
|
|
|
- // erase labels
|
|
|
- if erase {
|
|
|
- g.edge_weights_mut().for_each(|e| *e = "".to_string());
|
|
|
- }
|
|
|
-
|
|
|
- let mut labels = Vec::new();
|
|
|
- for window in way.windows(2) {
|
|
|
- let edge_id = g.find_edge(window[0], window[1]).unwrap();
|
|
|
- let edge = g.edge_weight_mut(edge_id).unwrap();
|
|
|
- let v = if !edge.is_empty() {
|
|
|
- let mut v: Vec<&str> = edge.split(",").filter(|e| !e.is_empty()).collect();
|
|
|
- v.push(value);
|
|
|
- v.join(", ")
|
|
|
- } else {
|
|
|
- value.to_string()
|
|
|
- };
|
|
|
- labels.push(v.clone());
|
|
|
- *edge = v;
|
|
|
- }
|
|
|
-
|
|
|
- if erase {
|
|
|
- g.retain_edges(|g, i| g.edge_weight(i).unwrap().to_string() != "".to_string());
|
|
|
- g.retain_nodes(|g, n| g.neighbors_undirected(n).count() > 0);
|
|
|
- }
|
|
|
-
|
|
|
- let mut dot = Dot::new(&g).to_string().replace("\\\"", "");
|
|
|
-
|
|
|
- labels.sort_by_key(|b| std::cmp::Reverse(b.len()));
|
|
|
- // labels.sort_by(|a, b| b.len().cmp(&a.len()));
|
|
|
- for label in labels {
|
|
|
- let label_str = format!("label = \"{label}\"");
|
|
|
- dot = dot.replace(
|
|
|
- &format!("{label_str} ]\n"),
|
|
|
- &format!("{label_str}, color = \"{color}\" ]\n"),
|
|
|
- );
|
|
|
- }
|
|
|
- dot
|
|
|
-}
|
|
|
-
|
|
|
-/// for non overlapping ways
|
|
|
-pub fn dot_graph_biall(
|
|
|
- graph: &StableGraph<String, String>,
|
|
|
- ways: &[Vec<NodeIndex>],
|
|
|
- values: Vec<&str>,
|
|
|
- colors: Vec<&str>,
|
|
|
- erase: bool,
|
|
|
-) -> String {
|
|
|
- let mut g = graph.clone();
|
|
|
- // erase labels
|
|
|
- if erase {
|
|
|
- g.edge_weights_mut().for_each(|e| *e = "".to_string());
|
|
|
- }
|
|
|
-
|
|
|
- let mut labels = Vec::new();
|
|
|
- for ((way, value), color) in ways.iter().zip(values.into_iter()).zip(colors.into_iter()) {
|
|
|
- for window in way.windows(2) {
|
|
|
- let edge_id = g.find_edge(window[0], window[1]).unwrap();
|
|
|
- let edge = g.edge_weight_mut(edge_id).unwrap();
|
|
|
- let v = if !edge.is_empty() {
|
|
|
- let mut v: Vec<&str> = edge.split(",").filter(|e| !e.is_empty()).collect();
|
|
|
- v.push(value);
|
|
|
- v.join(", ")
|
|
|
- } else {
|
|
|
- value.to_string()
|
|
|
- };
|
|
|
- labels.push((v.clone(), color));
|
|
|
- *edge = v;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- // g.retain_edges(|g, i| g.edge_weight(i).unwrap().to_string() != "".to_string());
|
|
|
- g.retain_edges(|g, i| !g.edge_weight(i).unwrap().is_empty());
|
|
|
- g.retain_nodes(|g, n| g.neighbors_undirected(n).count() > 0);
|
|
|
-
|
|
|
- let mut dot = Dot::new(&g).to_string().replace("\\\"", "");
|
|
|
-
|
|
|
- labels.sort_by(|a, b| b.0.len().cmp(&a.0.len()));
|
|
|
- for (label, color) in labels {
|
|
|
- let label_str = format!("label = \"{label}\"");
|
|
|
- dot = dot.replace(
|
|
|
- &format!("{label_str} ]\n"),
|
|
|
- &format!("{label_str}, color = \"{color}\" ]\n"),
|
|
|
- );
|
|
|
- }
|
|
|
- dot
|
|
|
-}
|
|
|
-
|
|
|
-#[cfg(test)]
|
|
|
-mod tests {
|
|
|
- use env_logger::Env;
|
|
|
-
|
|
|
- use super::*;
|
|
|
- use crate::{
|
|
|
- genomic_graph::GenomicGraph,
|
|
|
- // phase::{variants_phasing, write_phases_bed},
|
|
|
- };
|
|
|
-
|
|
|
- fn init() {
|
|
|
- let _ = env_logger::Builder::from_env(Env::default().default_filter_or("info"))
|
|
|
- .is_test(true)
|
|
|
- .try_init();
|
|
|
- }
|
|
|
-
|
|
|
- #[test]
|
|
|
- fn it_works() -> Result<()> {
|
|
|
- let _ = env_logger::builder().is_test(true).try_init();
|
|
|
- let contig_fa = "./data_test/contig_2.fa";
|
|
|
- let aligner_url = "http://localhost:4444/align";
|
|
|
-
|
|
|
- let mut genome = Genome::new();
|
|
|
- let aligner = aligner_client::dist_align(aligner_url.to_string());
|
|
|
-
|
|
|
- let sequences = read_fasta(contig_fa)?;
|
|
|
- for (name, seq) in sequences {
|
|
|
- genome.add_contig_from_seq(name.clone(), &seq.as_ref().to_vec(), &aligner)?;
|
|
|
- let mut seqc: Vec<u8> = seq.complement().map(|e| e.unwrap()).collect();
|
|
|
- seqc.reverse();
|
|
|
- genome.add_contig_from_seq(format!("{name}_rev"), &seqc, &aligner)?;
|
|
|
- println!("Sending");
|
|
|
- }
|
|
|
- genome.iter().for_each(|(_, c)| {
|
|
|
- c.iter().for_each(|cont| {
|
|
|
- println!("{}", cont.contig_ref.desc().unwrap());
|
|
|
- });
|
|
|
- });
|
|
|
-
|
|
|
- Ok(())
|
|
|
- }
|
|
|
-
|
|
|
- #[test]
|
|
|
- fn test_graph() -> Result<()> {
|
|
|
- init();
|
|
|
-
|
|
|
- let case = "SALICETTO";
|
|
|
- let chrom = vec!["chr10"];
|
|
|
- info!("This record will be captured by `cargo test`");
|
|
|
-
|
|
|
- let dir = format!("/data/longreads_basic_pipe/{case}/diag/asm_contis");
|
|
|
-
|
|
|
- // Load from fasta in dir.
|
|
|
- let genome = Genome::from_contigs_sequences(&dir)?;
|
|
|
- genome.stats();
|
|
|
- let mut genomic_graph = GenomicGraph::from_genome(&genome);
|
|
|
-
|
|
|
- let sens = vec![true, false];
|
|
|
- let pos = vec![0, i32::MAX];
|
|
|
- let mut all_ways = Vec::new();
|
|
|
- if chrom.len() > 1 {
|
|
|
- (0..4).into_iter().for_each(|i| {
|
|
|
- let start_pos = if i < 2 { 0 } else { i32::MAX };
|
|
|
- let end_pos = pos[i % 2];
|
|
|
-
|
|
|
- (0..4).into_iter().for_each(|i| {
|
|
|
- let start_sens = if i < 2 { true } else { false };
|
|
|
- let end_sens = sens[i % 2];
|
|
|
- (0..4).into_iter().for_each(|i| {
|
|
|
- let start_chr = if i < 2 { chrom[0] } else { chrom[1] };
|
|
|
- let end_chr = chrom[i % 2];
|
|
|
- let start = (start_sens, start_chr, start_pos);
|
|
|
- let end = (end_sens, end_chr, end_pos);
|
|
|
-
|
|
|
- let (oriented_graph, _integrated_graph, ways) =
|
|
|
- genomic_graph.ways(start, end);
|
|
|
-
|
|
|
- let dot = oriented_graph.dot_graph();
|
|
|
- println!("dot\n{dot}");
|
|
|
-
|
|
|
- for (_i, way) in ways.iter().enumerate() {
|
|
|
- let s = way
|
|
|
- .iter()
|
|
|
- .map(|(_, _, _, s)| s.to_string())
|
|
|
- .collect::<Vec<String>>()
|
|
|
- .join("");
|
|
|
- all_ways.push(s);
|
|
|
- }
|
|
|
- });
|
|
|
- });
|
|
|
- });
|
|
|
- } else {
|
|
|
- let start_chr = chrom[0];
|
|
|
- let end_chr = chrom[0];
|
|
|
- let start = (true, start_chr, 0);
|
|
|
- let end = (true, end_chr, i32::MAX);
|
|
|
-
|
|
|
- let (oriented_graph, _integrated_graph, ways) = genomic_graph.ways(start, end);
|
|
|
-
|
|
|
- let dot = oriented_graph.dot_graph();
|
|
|
- println!("dot\n{dot}");
|
|
|
-
|
|
|
- for (_i, way) in ways.iter().enumerate() {
|
|
|
- let s = way
|
|
|
- .iter()
|
|
|
- .map(|(_, _, _, s)| s.to_string())
|
|
|
- .collect::<Vec<String>>()
|
|
|
- .join("");
|
|
|
- all_ways.push(s);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- all_ways.dedup();
|
|
|
- all_ways
|
|
|
- .iter()
|
|
|
- .enumerate()
|
|
|
- .for_each(|(i, s)| println!("{i}.\t{s}"));
|
|
|
-
|
|
|
- // let s = Dot::new(&integrated_graph).to_string().replace("\\\"", "");
|
|
|
- // let x11_colors: Vec<String> = vec![
|
|
|
- // String::from("Red"),
|
|
|
- // String::from("Green"),
|
|
|
- // String::from("Blue"),
|
|
|
- // String::from("Cyan"),
|
|
|
- // String::from("Magenta"),
|
|
|
- // String::from("Yellow"),
|
|
|
- // String::from("DarkRed"),
|
|
|
- // String::from("DarkGreen"),
|
|
|
- // String::from("DarkBlue"),
|
|
|
- // String::from("DarkCyan"),
|
|
|
- // String::from("DarkMagenta"),
|
|
|
- // String::from("DarkYellow"),
|
|
|
- // String::from("LightRed"),
|
|
|
- // String::from("LightGreen"),
|
|
|
- // String::from("LightBlue"),
|
|
|
- // String::from("LightCyan"),
|
|
|
- // String::from("LightMagenta"),
|
|
|
- // String::from("LightYellow"),
|
|
|
- // String::from("Orange"),
|
|
|
- // String::from("Brown"),
|
|
|
- // String::from("Beige"),
|
|
|
- // ];
|
|
|
- // let mut s = s.clone();
|
|
|
- // ways.iter().enumerate().for_each(|(i, _)| {
|
|
|
- // s = s.replace(
|
|
|
- // &format!("[ label = \"{}\" ]", i + 1),
|
|
|
- // &format!(
|
|
|
- // "[ label = \"{}\" color = \"{}\" ]",
|
|
|
- // i + 1,
|
|
|
- // x11_colors[i].to_string()
|
|
|
- // ),
|
|
|
- // );
|
|
|
- // });
|
|
|
- // println!("{s}");
|
|
|
- //
|
|
|
- // for (i, way) in ways.iter().enumerate() {
|
|
|
- // let s = way
|
|
|
- // .iter()
|
|
|
- // .map(|(_, _, _, s)| s.to_string())
|
|
|
- // .collect::<Vec<String>>()
|
|
|
- // .join("");
|
|
|
- // println!("{}.\t{s}", i + 1);
|
|
|
- // }
|
|
|
- Ok(())
|
|
|
- }
|
|
|
-
|
|
|
- #[test]
|
|
|
- fn dir() {
|
|
|
- init();
|
|
|
- let id = "ROBIN";
|
|
|
- info!("This record will be captured by `cargo test`");
|
|
|
-
|
|
|
- let dir = format!("/data/longreads_basic_pipe/{id}/diag/scan/reads",);
|
|
|
-
|
|
|
- // Load from fasta in dir.
|
|
|
- let genome = Genome::from_contigs_sequences(&dir).unwrap();
|
|
|
- genome.stats();
|
|
|
- let mut res: Vec<String> = genome
|
|
|
- .iter()
|
|
|
- .flat_map(|(_s, chrom)| {
|
|
|
- chrom.iter().filter_map(|c| c.hgvs())
|
|
|
- // .map(|c| println!("{c}"))
|
|
|
- })
|
|
|
- .collect();
|
|
|
- res.sort();
|
|
|
- res.dedup();
|
|
|
- res.iter().for_each(|s| println!("{s}"));
|
|
|
-
|
|
|
- // println!("{genome:#?}");
|
|
|
- }
|
|
|
-}
|