|
@@ -2,11 +2,14 @@ use anyhow::{Ok, Result};
|
|
|
use log::info;
|
|
use log::info;
|
|
|
use minimap2::{Aligner, Mapping};
|
|
use minimap2::{Aligner, Mapping};
|
|
|
use rust_htslib::bam::{self, Record};
|
|
use rust_htslib::bam::{self, Record};
|
|
|
-use uuid::Uuid;
|
|
|
|
|
use std::{
|
|
use std::{
|
|
|
collections::{HashMap, VecDeque},
|
|
collections::{HashMap, VecDeque},
|
|
|
- fmt, fs::File, io::BufWriter,
|
|
|
|
|
|
|
+ fmt,
|
|
|
|
|
+ fs::{File, self},
|
|
|
|
|
+ io::BufWriter, process::{Command, Stdio},
|
|
|
};
|
|
};
|
|
|
|
|
+use uuid::Uuid;
|
|
|
|
|
+use noodles_fasta as fasta;
|
|
|
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
#[derive(Debug, Clone)]
|
|
|
pub struct Genome {
|
|
pub struct Genome {
|
|
@@ -321,9 +324,21 @@ impl Contig {
|
|
|
.sort_by(|a, b| a.target_start.cmp(&b.target_start));
|
|
.sort_by(|a, b| a.target_start.cmp(&b.target_start));
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // pub fn to_igv(&mut seld) -> {
|
|
|
|
|
- //
|
|
|
|
|
- // }
|
|
|
|
|
|
|
+ pub fn to_igv(&self, dir_path: &str) -> Result<()> {
|
|
|
|
|
+ let contig_dir = format!("{dir_path}/{}", self.id);
|
|
|
|
|
+ fs::create_dir_all(contig_dir.clone())?;
|
|
|
|
|
+
|
|
|
|
|
+ let fasta_path = format!("{contig_dir}/contig.fa");
|
|
|
|
|
+ write_fasta(&fasta_path, &vec![( self.id.clone(), self.sequence.clone() )]);
|
|
|
|
|
+
|
|
|
|
|
+ let reads_path = format!("{contig_dir}/reads.fq");
|
|
|
|
|
+ write_fastq(&reads_path, &self.supporting_records)?;
|
|
|
|
|
+
|
|
|
|
|
+ let bam_path = format!("{contig_dir}/{}.bam", self.id);
|
|
|
|
|
+ create_bam(&fasta_path, &reads_path, &bam_path)?;
|
|
|
|
|
+
|
|
|
|
|
+ Ok(())
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
// bug cigar len != seq len
|
|
// bug cigar len != seq len
|
|
|
pub fn write_bam(&self, path: &str) -> Result<()> {
|
|
pub fn write_bam(&self, path: &str) -> Result<()> {
|
|
@@ -400,12 +415,86 @@ fn group_mappings(mappings: &mut Vec<Mapping>) -> Result<Vec<Vec<Mapping>>> {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// unique
|
|
// unique
|
|
|
-pub fn write_fastq(fastq_path: &str, d: &Vec<Record>) {
|
|
|
|
|
- let file = File::create(fastq_path).unwrap();
|
|
|
|
|
|
|
+pub fn write_fastq(fastq_path: &str, d: &Vec<Record>) -> Result<()> {
|
|
|
|
|
+ let file = File::create(fastq_path)?;
|
|
|
let mut writer = BufWriter::new(file);
|
|
let mut writer = BufWriter::new(file);
|
|
|
for record in d {
|
|
for record in d {
|
|
|
- seq_io::fastq::write_parts(&mut writer, record.qname(), None, &record.seq().as_bytes(), record.qual());
|
|
|
|
|
|
|
+ seq_io::fastq::write_parts(
|
|
|
|
|
+ &mut writer,
|
|
|
|
|
+ record.qname(),
|
|
|
|
|
+ None,
|
|
|
|
|
+ &record.seq().as_bytes(),
|
|
|
|
|
+ record.qual(),
|
|
|
|
|
+ )?;
|
|
|
}
|
|
}
|
|
|
|
|
+ Ok(())
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+// unique
|
|
|
|
|
+pub fn write_fasta(fasta_path: &str, d: &Vec<(String, String)>) {
|
|
|
|
|
+ let file = File::create(fasta_path).unwrap();
|
|
|
|
|
+ let mut writer = fasta::writer::Builder::default().build_with_writer(file);
|
|
|
|
|
+ let mut passed = Vec::new();
|
|
|
|
|
+ for (name, sequence) in d {
|
|
|
|
|
+ let name = name.to_string();
|
|
|
|
|
+ if sequence.len() == 0 {
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+ if passed.contains(&name) {
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+ passed.push(name.clone());
|
|
|
|
|
+ let record = fasta::Record::new(
|
|
|
|
|
+ fasta::record::Definition::new(name.as_str(), None),
|
|
|
|
|
+ fasta::record::Sequence::from(sequence.as_bytes().to_vec()),
|
|
|
|
|
+ );
|
|
|
|
|
+ writer.write_record(&record).unwrap();
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+pub fn create_bam(ref_path: &str, reads_path: &str, bam_path: &str) -> Result<()> {
|
|
|
|
|
+ let rg_id = uuid::Uuid::new_v4();
|
|
|
|
|
+ let mm2 = Command::new("minimap2")
|
|
|
|
|
+ .arg("-t")
|
|
|
|
|
+ .arg("128")
|
|
|
|
|
+ .arg("-ax")
|
|
|
|
|
+ .arg("map-ont")
|
|
|
|
|
+ .arg("-R")
|
|
|
|
|
+ .arg(format!(
|
|
|
|
|
+ "@RG\tPL:ONTASM_PROM\tID:ONTASM_${rg_id}\tSM:${rg_id}\\tLB:ONTASM_NB_PROM"
|
|
|
|
|
+ ))
|
|
|
|
|
+ .arg(ref_path)
|
|
|
|
|
+ .arg(reads_path)
|
|
|
|
|
+ .stdout(Stdio::piped())
|
|
|
|
|
+ .spawn()
|
|
|
|
|
+ .expect("Minimap2 failed to start");
|
|
|
|
|
+
|
|
|
|
|
+ let view = Command::new("sambamba")
|
|
|
|
|
+ .arg("view")
|
|
|
|
|
+ .arg("-h")
|
|
|
|
|
+ .arg("-S")
|
|
|
|
|
+ .arg("-t")
|
|
|
|
|
+ .arg("20")
|
|
|
|
|
+ .arg("--format=bam")
|
|
|
|
|
+ .arg("/dev/stdin")
|
|
|
|
|
+ .stdin(Stdio::from(mm2.stdout.unwrap()))
|
|
|
|
|
+ .stdout(Stdio::piped())
|
|
|
|
|
+ .spawn()
|
|
|
|
|
+ .expect("Sambamba view failed to start");
|
|
|
|
|
+
|
|
|
|
|
+ let mut sort = Command::new("sambamba")
|
|
|
|
|
+ .arg("sort")
|
|
|
|
|
+ .arg("-t")
|
|
|
|
|
+ .arg("20")
|
|
|
|
|
+ .arg("/dev/stdin")
|
|
|
|
|
+ .arg("-o")
|
|
|
|
|
+ .arg(bam_path)
|
|
|
|
|
+ .stdin(Stdio::from(view.stdout.unwrap()))
|
|
|
|
|
+ .spawn()
|
|
|
|
|
+ .expect("Sambamba sort failed to start");
|
|
|
|
|
+
|
|
|
|
|
+ sort.wait().unwrap();
|
|
|
|
|
+ Ok(())
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
#[cfg(test)]
|