|
|
@@ -98,6 +98,7 @@ impl ContigRef {
|
|
|
ContigRef::Ambigous(_) => None,
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
pub fn breakpoints_repr(&self) -> Option<Vec<String>> {
|
|
|
let left = "►";
|
|
|
let right = "◄";
|
|
|
@@ -524,6 +525,7 @@ impl Genome {
|
|
|
sequence: &[u8],
|
|
|
aligner: impl Fn(String) -> Result<Vec<Mapping>>,
|
|
|
) -> Result<()> {
|
|
|
+ println!("adding: {name}");
|
|
|
let mappings = aligner(String::from_utf8(sequence.to_vec())?)?;
|
|
|
// println!("{mappings:?}");
|
|
|
self.add_contig(name, mappings, None, String::from_utf8(sequence.to_vec())?)?;
|
|
|
@@ -546,6 +548,7 @@ impl Genome {
|
|
|
}
|
|
|
|
|
|
pub fn add_dir(&mut self, dir: &str) -> Result<()> {
|
|
|
+ info!("Adding dir: {dir}");
|
|
|
let aligner_url = "http://localhost:4444/align";
|
|
|
let aligner = aligner_client::dist_align(aligner_url.to_string());
|
|
|
for path in get_contigs_fa_paths(dir)? {
|
|
|
@@ -632,6 +635,7 @@ impl Contig {
|
|
|
let bam_path = format!("{contig_dir}/{}.bam", self.id);
|
|
|
write_bam(&fasta_path, &reads_path, &bam_path)?;
|
|
|
|
|
|
+ // TODO: modify
|
|
|
let bed_path = format!("{contig_dir}/contig.bed");
|
|
|
match &self.contig_ref {
|
|
|
ContigRef::Chimeric((a, b)) => {
|
|
|
@@ -1102,134 +1106,134 @@ mod tests {
|
|
|
}
|
|
|
|
|
|
#[test]
|
|
|
- fn test_graph() -> Result<()> {
|
|
|
- init();
|
|
|
-
|
|
|
- let case = "SALICETTO";
|
|
|
- let chrom = ["chr10"];
|
|
|
- info!("This record will be captured by `cargo test`");
|
|
|
- let genome = load_genome(case);
|
|
|
-
|
|
|
- // let dir = format!("/data/longreads_basic_pipe/{case}/diag/asm_contis");
|
|
|
- //
|
|
|
- // // Load from fasta in dir.
|
|
|
- // let genome = Genome::from_dir(&dir)?;
|
|
|
- // genome.stats();
|
|
|
- let mut genomic_graph = GenomicGraph::from_genome(&genome);
|
|
|
-
|
|
|
- let sens = vec![true, false];
|
|
|
- let pos = vec![0, i32::MAX];
|
|
|
- let mut all_ways = Vec::new();
|
|
|
- if chrom.len() > 1 {
|
|
|
- (0..4).into_iter().for_each(|i| {
|
|
|
- let start_pos = if i < 2 { 0 } else { i32::MAX };
|
|
|
- let end_pos = pos[i % 2];
|
|
|
-
|
|
|
- (0..4).into_iter().for_each(|i| {
|
|
|
- let start_sens = if i < 2 { true } else { false };
|
|
|
- let end_sens = sens[i % 2];
|
|
|
- (0..4).into_iter().for_each(|i| {
|
|
|
- let start_chr = if i < 2 { chrom[0] } else { chrom[1] };
|
|
|
- let end_chr = chrom[i % 2];
|
|
|
- let start = (start_sens, start_chr, start_pos);
|
|
|
- let end = (end_sens, end_chr, end_pos);
|
|
|
-
|
|
|
- let (oriented_graph, _integrated_graph, ways) =
|
|
|
- genomic_graph.ways(start, end);
|
|
|
-
|
|
|
- let dot = oriented_graph.dot_graph();
|
|
|
- println!("dot\n{dot}");
|
|
|
-
|
|
|
- for (_i, way) in ways.iter().enumerate() {
|
|
|
- let s = way
|
|
|
- .iter()
|
|
|
- .map(|(_, _, _, s)| s.to_string())
|
|
|
- .collect::<Vec<String>>()
|
|
|
- .join("");
|
|
|
- all_ways.push(s);
|
|
|
- }
|
|
|
- });
|
|
|
- });
|
|
|
- });
|
|
|
- } else {
|
|
|
- let start_chr = chrom[0];
|
|
|
- let end_chr = chrom[0];
|
|
|
- let start = (true, start_chr, 0);
|
|
|
- let end = (true, end_chr, i32::MAX);
|
|
|
-
|
|
|
- let (oriented_graph, _integrated_graph, ways) = genomic_graph.ways(start, end);
|
|
|
-
|
|
|
- let dot = oriented_graph.dot_graph();
|
|
|
- println!("dot\n{dot}");
|
|
|
-
|
|
|
- for (_i, way) in ways.iter().enumerate() {
|
|
|
- let s = way
|
|
|
- .iter()
|
|
|
- .map(|(_, _, _, s)| s.to_string())
|
|
|
- .collect::<Vec<String>>()
|
|
|
- .join("");
|
|
|
- all_ways.push(s);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- all_ways.dedup();
|
|
|
- all_ways
|
|
|
- .iter()
|
|
|
- .enumerate()
|
|
|
- .for_each(|(i, s)| println!("{i}.\t{s}"));
|
|
|
-
|
|
|
- // let s = Dot::new(&integrated_graph).to_string().replace("\\\"", "");
|
|
|
- // let x11_colors: Vec<String> = vec![
|
|
|
- // String::from("Red"),
|
|
|
- // String::from("Green"),
|
|
|
- // String::from("Blue"),
|
|
|
- // String::from("Cyan"),
|
|
|
- // String::from("Magenta"),
|
|
|
- // String::from("Yellow"),
|
|
|
- // String::from("DarkRed"),
|
|
|
- // String::from("DarkGreen"),
|
|
|
- // String::from("DarkBlue"),
|
|
|
- // String::from("DarkCyan"),
|
|
|
- // String::from("DarkMagenta"),
|
|
|
- // String::from("DarkYellow"),
|
|
|
- // String::from("LightRed"),
|
|
|
- // String::from("LightGreen"),
|
|
|
- // String::from("LightBlue"),
|
|
|
- // String::from("LightCyan"),
|
|
|
- // String::from("LightMagenta"),
|
|
|
- // String::from("LightYellow"),
|
|
|
- // String::from("Orange"),
|
|
|
- // String::from("Brown"),
|
|
|
- // String::from("Beige"),
|
|
|
- // ];
|
|
|
- // let mut s = s.clone();
|
|
|
- // ways.iter().enumerate().for_each(|(i, _)| {
|
|
|
- // s = s.replace(
|
|
|
- // &format!("[ label = \"{}\" ]", i + 1),
|
|
|
- // &format!(
|
|
|
- // "[ label = \"{}\" color = \"{}\" ]",
|
|
|
- // i + 1,
|
|
|
- // x11_colors[i].to_string()
|
|
|
- // ),
|
|
|
- // );
|
|
|
- // });
|
|
|
- // println!("{s}");
|
|
|
- //
|
|
|
- // for (i, way) in ways.iter().enumerate() {
|
|
|
- // let s = way
|
|
|
- // .iter()
|
|
|
- // .map(|(_, _, _, s)| s.to_string())
|
|
|
- // .collect::<Vec<String>>()
|
|
|
- // .join("");
|
|
|
- // println!("{}.\t{s}", i + 1);
|
|
|
- // }
|
|
|
- Ok(())
|
|
|
- }
|
|
|
+ // fn test_graph() -> Result<()> {
|
|
|
+ // init();
|
|
|
+ //
|
|
|
+ // let case = "SALICETTO";
|
|
|
+ // let chrom = ["chr10"];
|
|
|
+ // info!("This record will be captured by `cargo test`");
|
|
|
+ // let genome = load_genome(case);
|
|
|
+ //
|
|
|
+ // // let dir = format!("/data/longreads_basic_pipe/{case}/diag/asm_contis");
|
|
|
+ // //
|
|
|
+ // // // Load from fasta in dir.
|
|
|
+ // // let genome = Genome::from_dir(&dir)?;
|
|
|
+ // // genome.stats();
|
|
|
+ // let mut genomic_graph = GenomicGraph::from_genome(&genome);
|
|
|
+ //
|
|
|
+ // let sens = vec![true, false];
|
|
|
+ // let pos = vec![0, i32::MAX];
|
|
|
+ // let mut all_ways = Vec::new();
|
|
|
+ // if chrom.len() > 1 {
|
|
|
+ // (0..4).into_iter().for_each(|i| {
|
|
|
+ // let start_pos = if i < 2 { 0 } else { i32::MAX };
|
|
|
+ // let end_pos = pos[i % 2];
|
|
|
+ //
|
|
|
+ // (0..4).into_iter().for_each(|i| {
|
|
|
+ // let start_sens = if i < 2 { true } else { false };
|
|
|
+ // let end_sens = sens[i % 2];
|
|
|
+ // (0..4).into_iter().for_each(|i| {
|
|
|
+ // let start_chr = if i < 2 { chrom[0] } else { chrom[1] };
|
|
|
+ // let end_chr = chrom[i % 2];
|
|
|
+ // let start = (start_sens, start_chr, start_pos);
|
|
|
+ // let end = (end_sens, end_chr, end_pos);
|
|
|
+ //
|
|
|
+ // let (oriented_graph, _integrated_graph, ways) =
|
|
|
+ // genomic_graph.ways(start, end);
|
|
|
+ //
|
|
|
+ // let dot = oriented_graph.dot_graph();
|
|
|
+ // println!("dot\n{dot}");
|
|
|
+ //
|
|
|
+ // for (_i, way) in ways.iter().enumerate() {
|
|
|
+ // let s = way
|
|
|
+ // .iter()
|
|
|
+ // .map(|(_, _, _, s)| s.to_string())
|
|
|
+ // .collect::<Vec<String>>()
|
|
|
+ // .join("");
|
|
|
+ // all_ways.push(s);
|
|
|
+ // }
|
|
|
+ // });
|
|
|
+ // });
|
|
|
+ // });
|
|
|
+ // } else {
|
|
|
+ // let start_chr = chrom[0];
|
|
|
+ // let end_chr = chrom[0];
|
|
|
+ // let start = (true, start_chr, 0);
|
|
|
+ // let end = (true, end_chr, i32::MAX);
|
|
|
+ //
|
|
|
+ // let (oriented_graph, _integrated_graph, ways) = genomic_graph.ways(start, end);
|
|
|
+ //
|
|
|
+ // let dot = oriented_graph.dot_graph();
|
|
|
+ // println!("dot\n{dot}");
|
|
|
+ //
|
|
|
+ // for (_i, way) in ways.iter().enumerate() {
|
|
|
+ // let s = way
|
|
|
+ // .iter()
|
|
|
+ // .map(|(_, _, _, s)| s.to_string())
|
|
|
+ // .collect::<Vec<String>>()
|
|
|
+ // .join("");
|
|
|
+ // all_ways.push(s);
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ //
|
|
|
+ // all_ways.dedup();
|
|
|
+ // all_ways
|
|
|
+ // .iter()
|
|
|
+ // .enumerate()
|
|
|
+ // .for_each(|(i, s)| println!("{i}.\t{s}"));
|
|
|
+ //
|
|
|
+ // // let s = Dot::new(&integrated_graph).to_string().replace("\\\"", "");
|
|
|
+ // // let x11_colors: Vec<String> = vec![
|
|
|
+ // // String::from("Red"),
|
|
|
+ // // String::from("Green"),
|
|
|
+ // // String::from("Blue"),
|
|
|
+ // // String::from("Cyan"),
|
|
|
+ // // String::from("Magenta"),
|
|
|
+ // // String::from("Yellow"),
|
|
|
+ // // String::from("DarkRed"),
|
|
|
+ // // String::from("DarkGreen"),
|
|
|
+ // // String::from("DarkBlue"),
|
|
|
+ // // String::from("DarkCyan"),
|
|
|
+ // // String::from("DarkMagenta"),
|
|
|
+ // // String::from("DarkYellow"),
|
|
|
+ // // String::from("LightRed"),
|
|
|
+ // // String::from("LightGreen"),
|
|
|
+ // // String::from("LightBlue"),
|
|
|
+ // // String::from("LightCyan"),
|
|
|
+ // // String::from("LightMagenta"),
|
|
|
+ // // String::from("LightYellow"),
|
|
|
+ // // String::from("Orange"),
|
|
|
+ // // String::from("Brown"),
|
|
|
+ // // String::from("Beige"),
|
|
|
+ // // ];
|
|
|
+ // // let mut s = s.clone();
|
|
|
+ // // ways.iter().enumerate().for_each(|(i, _)| {
|
|
|
+ // // s = s.replace(
|
|
|
+ // // &format!("[ label = \"{}\" ]", i + 1),
|
|
|
+ // // &format!(
|
|
|
+ // // "[ label = \"{}\" color = \"{}\" ]",
|
|
|
+ // // i + 1,
|
|
|
+ // // x11_colors[i].to_string()
|
|
|
+ // // ),
|
|
|
+ // // );
|
|
|
+ // // });
|
|
|
+ // // println!("{s}");
|
|
|
+ // //
|
|
|
+ // // for (i, way) in ways.iter().enumerate() {
|
|
|
+ // // let s = way
|
|
|
+ // // .iter()
|
|
|
+ // // .map(|(_, _, _, s)| s.to_string())
|
|
|
+ // // .collect::<Vec<String>>()
|
|
|
+ // // .join("");
|
|
|
+ // // println!("{}.\t{s}", i + 1);
|
|
|
+ // // }
|
|
|
+ // Ok(())
|
|
|
+ // }
|
|
|
|
|
|
#[test]
|
|
|
fn dir() {
|
|
|
init();
|
|
|
- let id = "CHAMPION";
|
|
|
+ let id = "BITANG";
|
|
|
|
|
|
let mut chr: Vec<String> = (1..=22).map(|p| format!("chr{p}")).collect();
|
|
|
chr.extend(["chrX", "chrY", "chrM"].iter().map(|&s| s.to_string()));
|
|
|
@@ -1281,15 +1285,14 @@ mod tests {
|
|
|
#[test]
|
|
|
fn contig() {
|
|
|
init();
|
|
|
- let id = "SALICETTO";
|
|
|
- let chr = "chr10";
|
|
|
-
|
|
|
- // Diag
|
|
|
+ let id = "BITANG";
|
|
|
+ let chr = "chr3";
|
|
|
|
|
|
// MRD
|
|
|
let dir = format!("/data/longreads_basic_pipe/{id}/mrd/assemblies/{chr}",);
|
|
|
let mut genome = Genome::new();
|
|
|
genome.add_dir(&dir).unwrap();
|
|
|
+ print!("{genome:?}");
|
|
|
let mut bp_representations_mrd: Vec<String> = genome
|
|
|
.iter()
|
|
|
.flat_map(|(_, c)| {
|
|
|
@@ -1305,9 +1308,9 @@ mod tests {
|
|
|
.collect();
|
|
|
bp_representations_mrd.sort();
|
|
|
bp_representations_mrd.dedup();
|
|
|
-
|
|
|
info!("Mrd bp {}", bp_representations_mrd.len());
|
|
|
|
|
|
+ let bp_representations_mrd = Vec::new();
|
|
|
let dir = format!("/data/longreads_basic_pipe/{id}/diag/assemblies/{chr}",);
|
|
|
let mut genome = Genome::new();
|
|
|
genome.add_dir(&dir).unwrap();
|
|
|
@@ -1315,6 +1318,10 @@ mod tests {
|
|
|
.iter()
|
|
|
.flat_map(|(_, c)| {
|
|
|
c.contigs.iter().filter_map(|contig| {
|
|
|
+ println!("{:#?}", contig.hgvs());
|
|
|
+ if contig.id == "47108362-81682505_8090_wtdbg2".to_string() {
|
|
|
+ println!("{contig:#?}");
|
|
|
+ }
|
|
|
contig
|
|
|
.breakpoints_repr()
|
|
|
.and_then(|r| r.first().cloned())
|
|
|
@@ -1330,12 +1337,10 @@ mod tests {
|
|
|
|
|
|
let mut grouped: HashMap<String, Vec<String>> = HashMap::new();
|
|
|
|
|
|
- // Groupement des IDs par breakpoint
|
|
|
for (id, bp) in bp_representations {
|
|
|
grouped.entry(bp).or_default().push(id);
|
|
|
}
|
|
|
|
|
|
- // Conversion en Vec, tri des IDs dans chaque groupe, et tri final
|
|
|
let mut result: Vec<(Vec<String>, String)> = grouped
|
|
|
.into_iter()
|
|
|
.map(|(bp, mut ids)| {
|
|
|
@@ -1350,5 +1355,6 @@ mod tests {
|
|
|
result
|
|
|
.iter()
|
|
|
.for_each(|(ids, bp)| { println!("{bp}\t{}", ids.join("|")) });
|
|
|
+ println!("{id}");
|
|
|
}
|
|
|
}
|