|
@@ -10,6 +10,7 @@ use flye::{read_flye_coverage, run_flye};
|
|
|
use log::{info, warn};
|
|
use log::{info, warn};
|
|
|
use nom::AsBytes;
|
|
use nom::AsBytes;
|
|
|
use pandora_lib_blastn::BlastResult;
|
|
use pandora_lib_blastn::BlastResult;
|
|
|
|
|
+use pandora_lib_igv::{BamTrack, BedTrack, Track};
|
|
|
use petgraph::{
|
|
use petgraph::{
|
|
|
algo::dijkstra,
|
|
algo::dijkstra,
|
|
|
data::{Build, DataMap},
|
|
data::{Build, DataMap},
|
|
@@ -92,15 +93,15 @@ pub fn lastz_two_by_two(records: &[String], best: bool) -> anyhow::Result<Vec<Bl
|
|
|
|
|
|
|
|
pub fn lastz_bam(query_path: &str, subject_path: &str) -> anyhow::Result<Vec<bam::Record>> {
|
|
pub fn lastz_bam(query_path: &str, subject_path: &str) -> anyhow::Result<Vec<bam::Record>> {
|
|
|
let lastz_bin = "lastz";
|
|
let lastz_bin = "lastz";
|
|
|
- let pipe =
|
|
|
|
|
- format!("{lastz_bin} --format=softsam {query_path} {subject_path} | samtools view -b");
|
|
|
|
|
|
|
+ let pipe = format!("{lastz_bin} --format=softsam {query_path} {subject_path} | samtools sort");
|
|
|
let out = duct::cmd!("bash", "-c", pipe).reader()?;
|
|
let out = duct::cmd!("bash", "-c", pipe).reader()?;
|
|
|
let reader = bam::BamReader::from_stream(out, 4)?;
|
|
let reader = bam::BamReader::from_stream(out, 4)?;
|
|
|
Ok(reader.into_iter().filter_map(anyhow::Result::ok).collect())
|
|
Ok(reader.into_iter().filter_map(anyhow::Result::ok).collect())
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
pub fn lastz_remap(
|
|
pub fn lastz_remap(
|
|
|
- bam_path: &str,
|
|
|
|
|
|
|
+ // bam_path: &str,
|
|
|
|
|
+ input_records: &Vec<bam::Record>,
|
|
|
ref_fa: &str,
|
|
ref_fa: &str,
|
|
|
ref_len: u32,
|
|
ref_len: u32,
|
|
|
new_bam: &str,
|
|
new_bam: &str,
|
|
@@ -113,9 +114,9 @@ pub fn lastz_remap(
|
|
|
let mut records = Vec::new();
|
|
let mut records = Vec::new();
|
|
|
let mut mm_hm = HashMap::new();
|
|
let mut mm_hm = HashMap::new();
|
|
|
let mut ml_hm = HashMap::new();
|
|
let mut ml_hm = HashMap::new();
|
|
|
- let reader = bam::BamReader::from_path(bam_path, 4).unwrap();
|
|
|
|
|
- for record in reader {
|
|
|
|
|
- let record = record?;
|
|
|
|
|
|
|
+ // let reader = bam::BamReader::from_path(bam_path, 4).unwrap();
|
|
|
|
|
+ for record in input_records {
|
|
|
|
|
+ // let record = record?;
|
|
|
let query_name = String::from_utf8(record.name().to_vec())?;
|
|
let query_name = String::from_utf8(record.name().to_vec())?;
|
|
|
if let Some(TagValue::String(value, StringType::String)) = record.tags().get(b"MM") {
|
|
if let Some(TagValue::String(value, StringType::String)) = record.tags().get(b"MM") {
|
|
|
mm_hm.insert(query_name.clone(), value.to_vec());
|
|
mm_hm.insert(query_name.clone(), value.to_vec());
|
|
@@ -465,9 +466,10 @@ impl FlyeAsm {
|
|
|
let tmp_dir = format!("/tmp/ass_{}", uuid::Uuid::new_v4());
|
|
let tmp_dir = format!("/tmp/ass_{}", uuid::Uuid::new_v4());
|
|
|
info!("Creating tmp directory {tmp_dir}");
|
|
info!("Creating tmp directory {tmp_dir}");
|
|
|
fs::create_dir(&tmp_dir).unwrap();
|
|
fs::create_dir(&tmp_dir).unwrap();
|
|
|
- let input_fa = format!("{tmp_dir}/input.fa");
|
|
|
|
|
|
|
+ let input_fa = format!("{}/{}.fasta", self.asm_dir, self.input_id);
|
|
|
|
|
|
|
|
- records_to_fasta(&self.input_records, &input_fa)?;
|
|
|
|
|
|
|
+ // let input_fa = format!("{tmp_dir}/input.fa");
|
|
|
|
|
+ // records_to_fasta(&self.input_records, &input_fa)?;
|
|
|
|
|
|
|
|
let flye_tmp_dir = format!("{tmp_dir}/flye");
|
|
let flye_tmp_dir = format!("{tmp_dir}/flye");
|
|
|
fs::create_dir(&flye_tmp_dir).unwrap();
|
|
fs::create_dir(&flye_tmp_dir).unwrap();
|
|
@@ -493,7 +495,7 @@ impl FlyeAsm {
|
|
|
let seq = record.sequence().as_ref();
|
|
let seq = record.sequence().as_ref();
|
|
|
let seq = String::from_utf8(seq.to_vec()).unwrap();
|
|
let seq = String::from_utf8(seq.to_vec()).unwrap();
|
|
|
let seq: String = seq[s..e].into();
|
|
let seq: String = seq[s..e].into();
|
|
|
- contigs.push(seq.clone());
|
|
|
|
|
|
|
+ contigs.push(seq);
|
|
|
}
|
|
}
|
|
|
self.contigs = Some(contigs);
|
|
self.contigs = Some(contigs);
|
|
|
} else {
|
|
} else {
|
|
@@ -538,14 +540,23 @@ impl FlyeAsm {
|
|
|
// Remaping input bam to contig
|
|
// Remaping input bam to contig
|
|
|
info!("Mapping input reads to {contig_id}");
|
|
info!("Mapping input reads to {contig_id}");
|
|
|
let new_bam = format!("{}/{contig_id}.bam", self.asm_dir);
|
|
let new_bam = format!("{}/{contig_id}.bam", self.asm_dir);
|
|
|
- let modkit_pileup = format!("{}/{contig_id}_mod.bed", self.asm_dir);
|
|
|
|
|
- lastz_remap(
|
|
|
|
|
- &self.input_bam,
|
|
|
|
|
- &contig_fa,
|
|
|
|
|
- contig.len() as u32,
|
|
|
|
|
- &new_bam,
|
|
|
|
|
- &modkit_pileup,
|
|
|
|
|
- )?;
|
|
|
|
|
|
|
+ duct::cmd!("bwa", "index", contig_fa.clone()).run()?;
|
|
|
|
|
+
|
|
|
|
|
+ let input_fa = format!("{}/{}.fasta", self.asm_dir, self.input_id);
|
|
|
|
|
+ let bwa = format!("bwa mem {contig_fa} {input_fa}");
|
|
|
|
|
+ let samtools = "samtools sort /dev/stdin";
|
|
|
|
|
+ let pipe = format!("{bwa} | {samtools} > {new_bam}");
|
|
|
|
|
+ duct::cmd!("bash", "-c", pipe).run()?;
|
|
|
|
|
+ duct::cmd!("samtools", "index", new_bam).run()?;
|
|
|
|
|
+
|
|
|
|
|
+ // let modkit_pileup = format!("{}/{contig_id}_mod.bed", self.asm_dir);
|
|
|
|
|
+ // lastz_remap(
|
|
|
|
|
+ // &self.input_records,
|
|
|
|
|
+ // &contig_fa,
|
|
|
|
|
+ // contig.len() as u32,
|
|
|
|
|
+ // &new_bam,
|
|
|
|
|
+ // &modkit_pileup,
|
|
|
|
|
+ // )?;
|
|
|
}
|
|
}
|
|
|
Ok(())
|
|
Ok(())
|
|
|
}
|
|
}
|
|
@@ -572,28 +583,29 @@ pub fn records_to_fasta(r: &Vec<bam::Record>, fa_path: &str) -> anyhow::Result<(
|
|
|
|
|
|
|
|
pub fn dir_flye(dir: &str) -> anyhow::Result<()> {
|
|
pub fn dir_flye(dir: &str) -> anyhow::Result<()> {
|
|
|
let pattern = format!("{}/*.bam", dir);
|
|
let pattern = format!("{}/*.bam", dir);
|
|
|
- let re = Regex::new(r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\.bam$")
|
|
|
|
|
- .unwrap();
|
|
|
|
|
|
|
+ let re =
|
|
|
|
|
+ Regex::new(r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\.bam$").unwrap();
|
|
|
|
|
|
|
|
for entry in glob::glob(&pattern).expect("Failed to read glob pattern") {
|
|
for entry in glob::glob(&pattern).expect("Failed to read glob pattern") {
|
|
|
match entry {
|
|
match entry {
|
|
|
Ok(path) => {
|
|
Ok(path) => {
|
|
|
if let Some(file_name) = path.file_name().and_then(|name| name.to_str()) {
|
|
if let Some(file_name) = path.file_name().and_then(|name| name.to_str()) {
|
|
|
if re.is_match(file_name) {
|
|
if re.is_match(file_name) {
|
|
|
- if let Some(input_id ) = path.file_stem().and_then(|n| n.to_str()) {
|
|
|
|
|
|
|
+ if let Some(input_id) = path.file_stem().and_then(|n| n.to_str()) {
|
|
|
if PathBuf::from(format!("{dir}/{input_id}_flye.bam")).exists() {
|
|
if PathBuf::from(format!("{dir}/{input_id}_flye.bam")).exists() {
|
|
|
- warn!("{input_id} already assembled");
|
|
|
|
|
|
|
+ warn!("{input_id} already assembled: {}", igv_link(dir, input_id)?);
|
|
|
} else {
|
|
} else {
|
|
|
let f = FlyeAsm::new(dir, input_id)?;
|
|
let f = FlyeAsm::new(dir, input_id)?;
|
|
|
if let Ok(f) = f.assemble() {
|
|
if let Ok(f) = f.assemble() {
|
|
|
match f.save() {
|
|
match f.save() {
|
|
|
- Ok(_) => info!("✅"),
|
|
|
|
|
|
|
+ Ok(_) => {
|
|
|
|
|
+ info!("✅ Assembled: {}", igv_link(dir, input_id)?);
|
|
|
|
|
+ }
|
|
|
Err(e) => warn!("❌ {e}"),
|
|
Err(e) => warn!("❌ {e}"),
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
@@ -603,6 +615,37 @@ pub fn dir_flye(dir: &str) -> anyhow::Result<()> {
|
|
|
Ok(())
|
|
Ok(())
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+pub fn igv_link(dir: &str, contig_id: &str) -> anyhow::Result<String> {
|
|
|
|
|
+ let contig_fa = format!("{dir}/{contig_id}_flye.fa");
|
|
|
|
|
+ let bam_track = Track::Bam(BamTrack::new(
|
|
|
|
|
+ "Assembled reads",
|
|
|
|
|
+ &format!("{dir}/{contig_id}_flye.bam"),
|
|
|
|
|
+ ));
|
|
|
|
|
+ let bed_track = Track::Bed(BedTrack::new(
|
|
|
|
|
+ "Blastn",
|
|
|
|
|
+ &format!("{dir}/{contig_id}_flye.bed"),
|
|
|
|
|
+ ));
|
|
|
|
|
+ let mod_track = Track::Bed(BedTrack::new(
|
|
|
|
|
+ "Modified bases",
|
|
|
|
|
+ &format!("{dir}/{contig_id}_flye_mod.bed"),
|
|
|
|
|
+ ));
|
|
|
|
|
+
|
|
|
|
|
+ let reference = pandora_lib_igv::ReferenceValues {
|
|
|
|
|
+ id: format!("{contig_id}_flye.fa"),
|
|
|
|
|
+ name: format!("{contig_id}_flye.fa"),
|
|
|
|
|
+ index_url: format!("{contig_fa}.fai"),
|
|
|
|
|
+ fasta_url: contig_fa,
|
|
|
|
|
+ ..Default::default()
|
|
|
|
|
+ };
|
|
|
|
|
+ let igv = pandora_lib_igv::Session::default()
|
|
|
|
|
+ .with_reference(reference)
|
|
|
|
|
+ .add_track(bam_track)?
|
|
|
|
|
+ .add_track(bed_track)?
|
|
|
|
|
+ .add_track(mod_track)?;
|
|
|
|
|
+ let link = igv.link("http://store-desktop.local/igv/")?;
|
|
|
|
|
+ Ok(link)
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
#[cfg(test)]
|
|
#[cfg(test)]
|
|
|
mod tests {
|
|
mod tests {
|
|
|
use std::fs;
|
|
use std::fs;
|
|
@@ -663,13 +706,15 @@ mod tests {
|
|
|
let asm_dir = format!("{res_dir}/{id}/diag/asm_contigs");
|
|
let asm_dir = format!("{res_dir}/{id}/diag/asm_contigs");
|
|
|
|
|
|
|
|
FlyeAsm::new(&asm_dir, input_id)?.assemble()?.save()?;
|
|
FlyeAsm::new(&asm_dir, input_id)?.assemble()?.save()?;
|
|
|
|
|
+ let l = igv_link(&asm_dir, input_id)?;
|
|
|
|
|
+ info!("{l}");
|
|
|
Ok(())
|
|
Ok(())
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
#[test]
|
|
|
fn flye_d() -> anyhow::Result<()> {
|
|
fn flye_d() -> anyhow::Result<()> {
|
|
|
init();
|
|
init();
|
|
|
- let id = "SALICETTO";
|
|
|
|
|
|
|
+ let id = "BECERRA";
|
|
|
let res_dir = "/data/longreads_basic_pipe";
|
|
let res_dir = "/data/longreads_basic_pipe";
|
|
|
let asm_dir = format!("{res_dir}/{id}/diag/asm_contigs");
|
|
let asm_dir = format!("{res_dir}/{id}/diag/asm_contigs");
|
|
|
dir_flye(&asm_dir)
|
|
dir_flye(&asm_dir)
|