Thomas 2 gadi atpakaļ
vecāks
revīzija
091a3e7c63
3 mainītis faili ar 144 papildinājumiem un 8 dzēšanām
  1. 46 0
      Cargo.lock
  2. 1 0
      Cargo.toml
  3. 97 8
      src/lib.rs

+ 46 - 0
Cargo.lock

@@ -57,6 +57,12 @@ version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 
+[[package]]
+name = "bytes"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223"
+
 [[package]]
 name = "bzip2"
 version = "0.4.4"
@@ -112,6 +118,15 @@ dependencies = [
  "cfg-if",
 ]
 
+[[package]]
+name = "crossbeam-channel"
+version = "0.5.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab3db02a9c5b5121e1e42fbdb1aeb65f5e02624cc58c43f2884c6ccac0b82f95"
+dependencies = [
+ "crossbeam-utils",
+]
+
 [[package]]
 name = "crossbeam-utils"
 version = "0.8.19"
@@ -157,6 +172,7 @@ dependencies = [
  "anyhow",
  "log",
  "minimap2",
+ "noodles-fasta",
  "rust-htslib",
  "seq_io",
  "uuid",
@@ -363,6 +379,36 @@ dependencies = [
  "rustc_version",
 ]
 
+[[package]]
+name = "noodles-bgzf"
+version = "0.27.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43ce62d1e012aa3793e17be1c286b8b71dad5a902a19524eb21729ed16113a9b"
+dependencies = [
+ "byteorder",
+ "bytes",
+ "crossbeam-channel",
+ "flate2",
+]
+
+[[package]]
+name = "noodles-core"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7336c3be652de4e05444c9b12a32331beb5ba3316e8872d92bfdd8ef3b06c282"
+
+[[package]]
+name = "noodles-fasta"
+version = "0.34.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a838081c9f88c96c7a2c14c8c8a3303f282964b9e91fb53de2f3f9ef8c272b21"
+dependencies = [
+ "bytes",
+ "memchr",
+ "noodles-bgzf",
+ "noodles-core",
+]
+
 [[package]]
 name = "openssl-src"
 version = "300.2.3+3.2.1"

+ 1 - 0
Cargo.toml

@@ -12,3 +12,4 @@ anyhow = "1.0.75"
 log = "0.4.19"
 uuid = { version = "1.6.1", features = ["serde", "v4"] }
 seq_io = "0.3.2"
+noodles-fasta = "0.34.0"

+ 97 - 8
src/lib.rs

@@ -2,11 +2,14 @@ use anyhow::{Ok, Result};
 use log::info;
 use minimap2::{Aligner, Mapping};
 use rust_htslib::bam::{self, Record};
-use uuid::Uuid;
 use std::{
     collections::{HashMap, VecDeque},
-    fmt, fs::File, io::BufWriter,
+    fmt,
+    fs::{File, self},
+    io::BufWriter, process::{Command, Stdio},
 };
+use uuid::Uuid;
+use noodles_fasta as fasta;
 
 #[derive(Debug, Clone)]
 pub struct Genome {
@@ -321,9 +324,21 @@ impl Contig {
             .sort_by(|a, b| a.target_start.cmp(&b.target_start));
     }
 
-    // pub fn to_igv(&mut seld) -> {
-    //
-    // }
+    pub fn to_igv(&self, dir_path: &str) -> Result<()> {
+        let contig_dir = format!("{dir_path}/{}", self.id);
+        fs::create_dir_all(contig_dir.clone())?;
+
+        let fasta_path = format!("{contig_dir}/contig.fa");
+        write_fasta(&fasta_path, &vec![( self.id.clone(), self.sequence.clone() )]);
+
+        let reads_path = format!("{contig_dir}/reads.fq");
+        write_fastq(&reads_path, &self.supporting_records)?;
+
+        let bam_path = format!("{contig_dir}/{}.bam", self.id);
+        create_bam(&fasta_path, &reads_path, &bam_path)?;
+
+        Ok(())
+    }
 
     // bug cigar len != seq len
     pub fn write_bam(&self, path: &str) -> Result<()> {
@@ -400,12 +415,86 @@ fn group_mappings(mappings: &mut Vec<Mapping>) -> Result<Vec<Vec<Mapping>>> {
 }
 
 // unique
-pub fn write_fastq(fastq_path: &str, d: &Vec<Record>) {
-    let file = File::create(fastq_path).unwrap();
+pub fn write_fastq(fastq_path: &str, d: &Vec<Record>) -> Result<()> {
+    let file = File::create(fastq_path)?;
     let mut writer = BufWriter::new(file);
     for record in d {
-        seq_io::fastq::write_parts(&mut writer, record.qname(), None, &record.seq().as_bytes(), record.qual());
+        seq_io::fastq::write_parts(
+            &mut writer,
+            record.qname(),
+            None,
+            &record.seq().as_bytes(),
+            record.qual(),
+        )?; 
     }
+    Ok(())
+}
+
+// unique
+pub fn write_fasta(fasta_path: &str, d: &Vec<(String, String)>) {
+    let file = File::create(fasta_path).unwrap();
+    let mut writer = fasta::writer::Builder::default().build_with_writer(file);
+    let mut passed = Vec::new();
+    for (name, sequence) in d {
+        let name = name.to_string();
+        if sequence.len() == 0 {
+            continue;
+        }
+        if passed.contains(&name) {
+            continue;
+        }
+        passed.push(name.clone());
+        let record = fasta::Record::new(
+            fasta::record::Definition::new(name.as_str(), None),
+            fasta::record::Sequence::from(sequence.as_bytes().to_vec()),
+        );
+        writer.write_record(&record).unwrap();
+    }
+}
+
+pub fn create_bam(ref_path: &str, reads_path: &str, bam_path: &str) -> Result<()> {
+    let rg_id = uuid::Uuid::new_v4();
+    let mm2 = Command::new("minimap2")
+        .arg("-t")
+        .arg("128")
+        .arg("-ax")
+        .arg("map-ont")
+        .arg("-R")
+        .arg(format!(
+            "@RG\tPL:ONTASM_PROM\tID:ONTASM_${rg_id}\tSM:${rg_id}\\tLB:ONTASM_NB_PROM"
+        ))
+        .arg(ref_path)
+        .arg(reads_path)
+        .stdout(Stdio::piped())
+        .spawn()
+        .expect("Minimap2 failed to start");
+
+    let view = Command::new("sambamba")
+        .arg("view")
+        .arg("-h")
+        .arg("-S")
+        .arg("-t")
+        .arg("20")
+        .arg("--format=bam")
+        .arg("/dev/stdin")
+        .stdin(Stdio::from(mm2.stdout.unwrap()))
+        .stdout(Stdio::piped())
+        .spawn()
+        .expect("Sambamba view failed to start");
+
+    let mut sort = Command::new("sambamba")
+        .arg("sort")
+        .arg("-t")
+        .arg("20")
+        .arg("/dev/stdin")
+        .arg("-o")
+        .arg(bam_path)
+        .stdin(Stdio::from(view.stdout.unwrap()))
+        .spawn()
+        .expect("Sambamba sort failed to start");
+
+    sort.wait().unwrap();
+    Ok(())
 }
 
 #[cfg(test)]