|
|
@@ -4,6 +4,7 @@ pub mod genomic_graph;
|
|
|
|
|
|
use anyhow::{anyhow, Ok, Result};
|
|
|
use fasta::record::Sequence;
|
|
|
+use genomic_graph::GenomicGraphParams;
|
|
|
use log::info;
|
|
|
use minimap2::{Aligner, Mapping};
|
|
|
use noodles_fasta as fasta;
|
|
|
@@ -102,10 +103,13 @@ impl ContigRef {
|
|
|
let right = "◄";
|
|
|
let bp_right = "▐";
|
|
|
let bp_left = "▌";
|
|
|
+
|
|
|
+ let bp_right = "|";
|
|
|
+ let bp_left = "|";
|
|
|
let format = CustomFormat::builder()
|
|
|
.grouping(Grouping::Standard)
|
|
|
.minus_sign("-")
|
|
|
- .separator("_")
|
|
|
+ .separator(",")
|
|
|
.build()
|
|
|
.unwrap();
|
|
|
|
|
|
@@ -120,19 +124,26 @@ impl ContigRef {
|
|
|
let mut res = Vec::new();
|
|
|
if let Some(breakpoints) = self.breakpoints() {
|
|
|
for v in breakpoints.windows(2) {
|
|
|
+ assert!(v[0].target_start < v[0].target_end);
|
|
|
+ assert!(v[1].target_start < v[1].target_end);
|
|
|
let mut bp_string = format!("{}:", v[0].target_name.clone().unwrap_or(uk.clone()));
|
|
|
- let _ = bp_string
|
|
|
- .write_formatted(&v[0].target_end, &format)
|
|
|
- .unwrap();
|
|
|
+ let bp_pos = match v[1].strand {
|
|
|
+ minimap2::Strand::Forward => v[0].target_end,
|
|
|
+ minimap2::Strand::Reverse => v[1].target_start,
|
|
|
+ };
|
|
|
+
|
|
|
+ let _ = bp_string.write_formatted(&bp_pos, &format).unwrap();
|
|
|
bp_string = format!(
|
|
|
"{bp_string}{}{bp_left}{bp_right}{}{}:",
|
|
|
get_sign(&v[0]),
|
|
|
get_sign(&v[1]),
|
|
|
v[1].target_name.clone().unwrap_or(uk.clone())
|
|
|
);
|
|
|
- let _ = bp_string
|
|
|
- .write_formatted(&v[1].target_start, &format)
|
|
|
- .unwrap();
|
|
|
+ let bp_pos = match v[1].strand {
|
|
|
+ minimap2::Strand::Forward => v[1].target_start,
|
|
|
+ minimap2::Strand::Reverse => v[1].target_end,
|
|
|
+ };
|
|
|
+ let _ = bp_string.write_formatted(&bp_pos, &format).unwrap();
|
|
|
res.push(bp_string);
|
|
|
}
|
|
|
}
|
|
|
@@ -271,15 +282,23 @@ impl ContigRef {
|
|
|
}
|
|
|
|
|
|
pub fn mapping_to_string(mapping: &Mapping) -> String {
|
|
|
+ let config = GenomicGraphParams::default();
|
|
|
+
|
|
|
+ let sens = match mapping.strand {
|
|
|
+ minimap2::Strand::Forward => config.sens_symb.clone(),
|
|
|
+ minimap2::Strand::Reverse => config.rc_symb.clone(),
|
|
|
+ };
|
|
|
+
|
|
|
let uk = "UNKNOWN".to_string();
|
|
|
format!(
|
|
|
- "{}:{}-{}({}:{}-{})",
|
|
|
+ "{}:{}-{}({}-{}){}",
|
|
|
mapping.target_name.clone().unwrap_or(uk.clone()),
|
|
|
mapping.target_start,
|
|
|
mapping.target_end,
|
|
|
- mapping.query_name.clone().unwrap_or(uk),
|
|
|
+ // mapping.query_name.clone().unwrap_or(uk),
|
|
|
mapping.query_start,
|
|
|
- mapping.query_end
|
|
|
+ mapping.query_end,
|
|
|
+ sens
|
|
|
)
|
|
|
}
|
|
|
|
|
|
@@ -1210,8 +1229,7 @@ mod tests {
|
|
|
#[test]
|
|
|
fn dir() {
|
|
|
init();
|
|
|
-
|
|
|
- let id = "RIVOALEN";
|
|
|
+ let id = "CHAMPION";
|
|
|
|
|
|
let mut chr: Vec<String> = (1..=22).map(|p| format!("chr{p}")).collect();
|
|
|
chr.extend(["chrX", "chrY", "chrM"].iter().map(|&s| s.to_string()));
|
|
|
@@ -1260,4 +1278,66 @@ mod tests {
|
|
|
|
|
|
// println!("{genome:#?}");
|
|
|
}
|
|
|
+ #[test]
|
|
|
+ fn contig() {
|
|
|
+ init();
|
|
|
+ let id = "SALICETTO";
|
|
|
+ let chr = "chr10";
|
|
|
+
|
|
|
+ // Diag
|
|
|
+ let dir = format!("/data/longreads_basic_pipe/{id}/diag/assemblies/{chr}",);
|
|
|
+ let mut genome = Genome::new();
|
|
|
+ genome.add_dir(&dir).unwrap();
|
|
|
+ let mut bp_representations: Vec<String> = genome
|
|
|
+ .iter()
|
|
|
+ .flat_map(|(_, c)| {
|
|
|
+ c.contigs
|
|
|
+ .iter()
|
|
|
+ .filter_map(|contig| contig.breakpoints_repr().and_then(|r| r.first().cloned()))
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ bp_representations.sort();
|
|
|
+ bp_representations.dedup();
|
|
|
+ info!("Diag bp {}", bp_representations.len());
|
|
|
+
|
|
|
+ // MRD
|
|
|
+ let dir = format!("/data/longreads_basic_pipe/{id}/mrd/assemblies/{chr}",);
|
|
|
+ let mut genome = Genome::new();
|
|
|
+ genome.add_dir(&dir).unwrap();
|
|
|
+ let mut bp_representations_mrd: Vec<String> = genome
|
|
|
+ .iter()
|
|
|
+ .flat_map(|(_, c)| {
|
|
|
+ c.contigs
|
|
|
+ .iter()
|
|
|
+ .filter_map(|contig| contig.breakpoints_repr().and_then(|r| r.first().cloned()))
|
|
|
+ })
|
|
|
+ .flat_map(|bp| {
|
|
|
+ bp.split("||")
|
|
|
+ .map(|s| s.replace("►", "").replace("◄", ""))
|
|
|
+ .collect::<Vec<String>>()
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+ bp_representations_mrd.sort();
|
|
|
+ bp_representations_mrd.dedup();
|
|
|
+
|
|
|
+ info!("Mrd bp {}", bp_representations_mrd.len());
|
|
|
+
|
|
|
+ let bp: Vec<String> = bp_representations
|
|
|
+ .into_iter()
|
|
|
+ .filter(|bp| !bp_representations_mrd.contains(bp))
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ info!("Final BP {}", bp.len());
|
|
|
+
|
|
|
+ bp.iter()
|
|
|
+ .filter(|bp| {
|
|
|
+ bp.split("||")
|
|
|
+ .map(|s| s.replace("►", "").replace("◄", ""))
|
|
|
+ .filter(|bp| bp_representations_mrd.contains(bp))
|
|
|
+ .count()
|
|
|
+ == 0
|
|
|
+ })
|
|
|
+ .for_each(|bp| println!("{bp}"));
|
|
|
+ }
|
|
|
}
|