Thomas hace 1 año
padre
commit
eb7105f059
Se han modificado 8 ficheros con 233 adiciones y 139 borrados
  1. 0 60
      Cargo.lock
  2. 0 1
      Cargo.toml
  3. 33 28
      src/assembler/flye.rs
  4. 14 2
      src/assembler/mod.rs
  5. 12 12
      src/assembler/spades.rs
  6. 12 18
      src/assembler/wtdbg2.rs
  7. 111 1
      src/io/bam.rs
  8. 51 17
      src/io/fasta.rs

+ 0 - 60
Cargo.lock

@@ -134,16 +134,6 @@ version = "2.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
 
-[[package]]
-name = "bstr"
-version = "1.10.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c"
-dependencies = [
- "memchr",
- "serde",
-]
-
 [[package]]
 name = "buffer-redux"
 version = "1.0.2"
@@ -159,12 +149,6 @@ version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 
-[[package]]
-name = "bytes"
-version = "1.7.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50"
-
 [[package]]
 name = "bzip2-sys"
 version = "0.1.11+1.0.8"
@@ -249,15 +233,6 @@ dependencies = [
  "cfg-if",
 ]
 
-[[package]]
-name = "crossbeam-channel"
-version = "0.5.13"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2"
-dependencies = [
- "crossbeam-utils",
-]
-
 [[package]]
 name = "crossbeam-deque"
 version = "0.8.5"
@@ -671,40 +646,6 @@ dependencies = [
  "minimal-lexical",
 ]
 
-[[package]]
-name = "noodles-bgzf"
-version = "0.32.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2b2fba0f4a64cc897d9396d730a0c444d148daed7de31ad5904ecc673178fc9d"
-dependencies = [
- "byteorder",
- "bytes",
- "crossbeam-channel",
- "flate2",
-]
-
-[[package]]
-name = "noodles-core"
-version = "0.15.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c5a8c6b020d1205abef2b0fab4463a6c5ecc3c8f4d561ca8b0d1a42323376200"
-dependencies = [
- "bstr",
-]
-
-[[package]]
-name = "noodles-fasta"
-version = "0.42.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d60db7c4c514211598f2d7eb38e499e3b42d3eb690779fd0b36f224650c75c82"
-dependencies = [
- "bstr",
- "bytes",
- "memchr",
- "noodles-bgzf",
- "noodles-core",
-]
-
 [[package]]
 name = "num-format"
 version = "0.4.4"
@@ -772,7 +713,6 @@ dependencies = [
  "glob",
  "log",
  "nom",
- "noodles-fasta",
  "pandora_lib_blastn",
  "pandora_lib_igv",
  "petgraph",

+ 0 - 1
Cargo.toml

@@ -8,7 +8,6 @@ log = "^0.4.22"
 env_logger = "^0.11.5"
 anyhow = "1.0.86"
 duct = "0.13.7"
-noodles-fasta = "0.42.0"
 rust-htslib = "0.47.0"
 uuid = { version = "1.10.0", features = ["v4"] }
 pandora_lib_blastn = { git = "https://git.t0m4.fr/Thomas/pandora_lib_blastn.git" }

+ 33 - 28
src/assembler/flye.rs

@@ -1,15 +1,19 @@
+use super::Assemble;
+use crate::{
+    assembler::{calculate_shannon_entropy, remove_bwa_indices, AssembleError},
+    io::{
+        bam::{cp_mod_tags, read_bam, run_modkit},
+        fasta::{fai, records_to_fasta, write_fasta},
+    },
+};
 use log::{info, warn};
+use seq_io::fasta::Record;
 use std::{
     fs::{self, File},
     io::{BufRead, BufReader, Read},
     path::{Path, PathBuf},
     process::{Command, Stdio},
 };
-use super::Assemble;
-use crate::{assembler::AssembleError, io::{
-    bam::{cp_mod_tags, read_bam},
-    fasta::{fai, records_to_fasta, write_fasta},
-}};
 
 #[derive(Debug, Clone)]
 pub struct FlyeConfig {
@@ -17,6 +21,7 @@ pub struct FlyeConfig {
 
     pub min_cov: u16,
     pub min_reads: u32,
+    pub min_entropy: f64,
     pub threads: u16,
     pub min_overlap: u32,
     pub flye_bin: String,
@@ -28,6 +33,8 @@ impl Default for FlyeConfig {
             output_dir: PathBuf::new(),
             min_cov: 2,
             min_reads: 3,
+            min_entropy: 0.2,
+
             threads: 12,
             min_overlap: 1000,
             flye_bin: "/data/tools/Flye/bin/flye".to_string(),
@@ -37,7 +44,10 @@ impl Default for FlyeConfig {
 
 impl FlyeConfig {
     pub fn new(output_dir: &str) -> Self {
-        FlyeConfig { output_dir: PathBuf::from(output_dir), ..Default::default() }
+        FlyeConfig {
+            output_dir: PathBuf::from(output_dir),
+            ..Default::default()
+        }
     }
 }
 
@@ -52,10 +62,7 @@ pub struct Flye {
 }
 
 impl Assemble for Flye {
-    fn init(
-        input_bam: &Path,
-        config: &super::AssembleConfig,
-    ) -> anyhow::Result<Self> {
+    fn init(input_bam: &Path, config: &super::AssembleConfig) -> anyhow::Result<Self> {
         match config {
             super::AssembleConfig::Flye(config) => {
                 let input_id = input_bam.file_stem().unwrap().to_str().unwrap().to_string();
@@ -74,8 +81,6 @@ impl Assemble for Flye {
     }
 
     fn assemble(mut self) -> anyhow::Result<Self> {
-        let min_cov = self.config.min_cov;
-
         let tmp_dir = format!("/tmp/ass_{}", uuid::Uuid::new_v4());
         info!("Creating tmp directory {tmp_dir}");
         fs::create_dir(&tmp_dir).unwrap();
@@ -93,23 +98,16 @@ impl Assemble for Flye {
         let contigs_path = format!("{flye_tmp_dir}/assembly.fasta");
 
         if Path::new(&contigs_path).exists() {
-            let assembly_path = format!("{flye_tmp_dir}/assembly.fasta");
-            let flye_cov_path = format!("{flye_tmp_dir}/40-polishing/base_coverage.bed.gz");
-
-            // Read the assembly fasta
-            let mut reader = File::open(assembly_path)
-                .map(BufReader::new)
-                .map(noodles_fasta::Reader::new)?;
+            let mut reader = seq_io::fasta::Reader::from_path(&contigs_path)?;
 
             let mut contigs = Vec::new();
             for result in reader.records() {
                 let record = result?;
-                let contig_name = String::from_utf8(record.name().to_vec()).unwrap();
-                let (s, e) = read_flye_coverage(&flye_cov_path, min_cov.into(), &contig_name);
-                let seq = record.sequence().as_ref();
-                let seq = String::from_utf8(seq.to_vec()).unwrap();
-                let seq: String = seq[s..e].into();
-                contigs.push(seq);
+                let seq = record.seq();
+                let seq = String::from_utf8(seq.to_vec())?;
+                if calculate_shannon_entropy(&seq) >= self.config.min_entropy {
+                    contigs.push(seq);
+                }
             }
             self.contigs = Some(contigs);
         } else {
@@ -143,7 +141,7 @@ impl Assemble for Flye {
             let contig_fa = format!("{}/{contig_id}.fa", self.config.output_dir.display());
 
             info!("Saving contig {contig_id} in {contig_fa}");
-            write_fasta(&contig_fa, &vec![(contig_id.clone(), contig.clone())]);
+            write_fasta(&contig_fa, &vec![(contig_id.clone(), contig.clone())])?;
             fai(&contig_fa)?;
 
             // Writing bed file from best blastn results
@@ -159,18 +157,25 @@ impl Assemble for Flye {
             info!("Mapping  input reads to {contig_id}");
             let new_bam = format!("{}/{contig_id}.bam", self.config.output_dir.display());
             duct::cmd!("bwa", "index", contig_fa.clone()).run()?;
-            let input_fa = format!("{}/{}.fasta", self.config.output_dir.display(), self.input_id);
+            let input_fa = format!(
+                "{}/{}.fasta",
+                self.config.output_dir.display(),
+                self.input_id
+            );
             let bwa = format!("bwa mem {contig_fa} {input_fa}");
             let samtools = "samtools sort /dev/stdin";
             let pipe = format!("{bwa} | {samtools} > {new_bam}");
             duct::cmd!("bash", "-c", pipe).run()?;
 
+            // clean bwa indices
+            remove_bwa_indices(&contig_fa)?;
+
             // Copy modified base tags to new bam
             cp_mod_tags(&self.input_records, &new_bam)?;
 
             // Run modkit
             let modkit_pileup = format!("{}/{contig_id}_mod.bed", self.config.output_dir.display());
-            duct::cmd!("modkit", "pileup", new_bam, modkit_pileup).run()?;
+            run_modkit(&new_bam, &contig_fa, &modkit_pileup)?;
         }
         Ok(())
     }

+ 14 - 2
src/assembler/mod.rs

@@ -1,4 +1,6 @@
-use std::{collections::HashMap, path::{Path, PathBuf}};
+use std::{
+    collections::HashMap, fs, path::{Path, PathBuf}
+};
 
 use anyhow::{Context, Ok};
 use log::warn;
@@ -8,7 +10,8 @@ use crate::io::bam::{all_bam_paths, create_bam_leave_two_out};
 
 use self::{
     flye::{Flye, FlyeConfig},
-    spades::{Spades, SpadesConfig}, wtdbg2::{Wtdbg2, Wtdbg2Config},
+    spades::{Spades, SpadesConfig},
+    wtdbg2::{Wtdbg2, Wtdbg2Config},
 };
 
 pub mod flye;
@@ -154,3 +157,12 @@ pub fn calculate_shannon_entropy(sequence: &str) -> f64 {
     // Round to 4 decimal places
     (normalized_entropy * 10000.0).round() / 10000.0
 }
+
+pub fn remove_bwa_indices(contig_fa: &str) -> anyhow::Result<()> {
+    fs::remove_file(format!("{contig_fa}.amb"))?;
+    fs::remove_file(format!("{contig_fa}.ann"))?;
+    fs::remove_file(format!("{contig_fa}.bwt"))?;
+    fs::remove_file(format!("{contig_fa}.pac"))?;
+    fs::remove_file(format!("{contig_fa}.sa"))?;
+    Ok(())
+}

+ 12 - 12
src/assembler/spades.rs

@@ -1,16 +1,17 @@
 use std::{
-    fs::{self, File},
+    fs,
     io::{BufRead, BufReader},
     path::{Path, PathBuf},
     process::{Command, Stdio},
 };
 
 use log::{info, warn};
+use seq_io::fasta::Record;
 
 use crate::{
-    assembler::AssembleError,
+    assembler::{remove_bwa_indices, AssembleError},
     io::{
-        bam::{cp_mod_tags, read_bam},
+        bam::{cp_mod_tags, read_bam, run_modkit},
         fasta::{fai, write_fasta},
         fastq::records_to_fastq,
     },
@@ -105,16 +106,13 @@ impl Assemble for Spades {
         let contigs_path = format!("{spades_tmp_dir}/contigs.fasta");
 
         if Path::new(&contigs_path).exists() {
-            // Read the assembly fasta
-            let mut reader = File::open(contigs_path)
-                .map(BufReader::new)
-                .map(noodles_fasta::Reader::new)?;
+            let mut reader = seq_io::fasta::Reader::from_path(&contigs_path)?;
 
             let mut contigs = Vec::new();
             for result in reader.records() {
                 let record = result?;
-                let seq = record.sequence().as_ref();
-                let seq = String::from_utf8(seq.to_vec()).unwrap();
+                let seq = record.seq();
+                let seq = String::from_utf8(seq.to_vec())?;
                 if calculate_shannon_entropy(&seq) >= self.config.min_entropy {
                     contigs.push(seq);
                 }
@@ -124,7 +122,6 @@ impl Assemble for Spades {
             anyhow::bail!(AssembleError::NoContig(self.input_id));
         }
 
-        
         Ok(self)
     }
 
@@ -148,7 +145,7 @@ impl Assemble for Spades {
             let contig_fa = format!("{}/{contig_id}.fa", self.config.output_dir.display());
 
             info!("Saving contig {contig_id} in {contig_fa}");
-            write_fasta(&contig_fa, &vec![(contig_id.clone(), contig.clone())]);
+            write_fasta(&contig_fa, &vec![(contig_id.clone(), contig.clone())])?;
             fai(&contig_fa)?;
 
             // Remaping input bam to contig
@@ -160,12 +157,15 @@ impl Assemble for Spades {
             let pipe = format!("{bwa} | {samtools} > {new_bam}");
             duct::cmd!("bash", "-c", pipe).run()?;
 
+            // clean bwa indices
+            remove_bwa_indices(&contig_fa)?;
+
             // Copy modified base tags to new bam
             cp_mod_tags(&self.input_records, &new_bam)?;
 
             // Run modkit
             let modkit_pileup = format!("{}/{contig_id}_mod.bed", self.config.output_dir.display());
-            duct::cmd!("modkit", "pileup", "--cpg", "--ref", contig_fa, new_bam, modkit_pileup).run()?;
+            run_modkit(&new_bam, &contig_fa, &modkit_pileup)?;
         }
 
         // Cleaning

+ 12 - 18
src/assembler/wtdbg2.rs

@@ -1,17 +1,18 @@
 // https://github.com/ruanjue/wtdbg2/
 use std::{
-    fs::{self, File},
+    fs,
     io::{BufRead, BufReader},
     path::{Path, PathBuf},
     process::{Command, Stdio},
 };
 
 use log::{info, warn};
+use seq_io::fasta::Record;
 
 use crate::{
     assembler::AssembleError,
     io::{
-        bam::{cp_mod_tags, read_bam},
+        bam::{cp_mod_tags, read_bam, remap_bam, run_modkit},
         fasta::{fai, write_fasta},
         fastq::records_to_fastq,
     },
@@ -107,16 +108,13 @@ impl Assemble for Wtdbg2 {
         warn!("reading {}", contigs_path);
 
         if Path::new(&contigs_path).exists() {
-            // Read the assembly fasta
-            let mut reader = File::open(contigs_path)
-                .map(BufReader::new)
-                .map(noodles_fasta::Reader::new)?;
+            let mut reader = seq_io::fasta::Reader::from_path(&contigs_path)?;
 
             let mut contigs = Vec::new();
             for result in reader.records() {
                 let record = result?;
-                let seq = record.sequence().as_ref();
-                let seq = String::from_utf8(seq.to_vec()).unwrap();
+                let seq = record.seq();
+                let seq = String::from_utf8(seq.to_vec())?;
                 if calculate_shannon_entropy(&seq) >= self.config.min_entropy {
                     contigs.push(seq);
                 }
@@ -126,7 +124,6 @@ impl Assemble for Wtdbg2 {
             anyhow::bail!(AssembleError::NoContig(self.input_id));
         }
 
-        
         Ok(self)
     }
 
@@ -150,24 +147,23 @@ impl Assemble for Wtdbg2 {
             let contig_fa = format!("{}/{contig_id}.fa", self.config.output_dir.display());
 
             info!("Saving contig {contig_id} in {contig_fa}");
-            write_fasta(&contig_fa, &vec![(contig_id.clone(), contig.clone())]);
+            write_fasta(&contig_fa, &vec![(contig_id.clone(), contig.clone())])?;
             fai(&contig_fa)?;
 
             // Remaping input bam to contig
             info!("Mapping  input reads to {contig_id}");
             let new_bam = format!("{}/{contig_id}.bam", self.config.output_dir.display());
-            duct::cmd!("bwa", "index", contig_fa.clone()).run()?;
-            let bwa = format!("bwa mem {contig_fa} {}", self.input_fq);
-            let samtools = "samtools sort /dev/stdin";
-            let pipe = format!("{bwa} | {samtools} > {new_bam}");
-            duct::cmd!("bash", "-c", pipe).run()?;
+            remap_bam(&contig_fa, &self.input_fq, &new_bam)?;
+
+            // clean bwa indices
+            // remove_bwa_indices(&contig_fa)?;
 
             // Copy modified base tags to new bam
             cp_mod_tags(&self.input_records, &new_bam)?;
 
             // Run modkit
             let modkit_pileup = format!("{}/{contig_id}_mod.bed", self.config.output_dir.display());
-            duct::cmd!("modkit", "pileup", "--cpg", "--ref", contig_fa, new_bam, modkit_pileup).run()?;
+            run_modkit(&new_bam, &contig_fa, &modkit_pileup)?;
         }
 
         // Cleaning
@@ -205,5 +201,3 @@ pub fn run_wtdbg2(input_fq: &str, tmp_dir: &str, config: &Wtdbg2Config) {
     cmd.wait().unwrap();
     cmd.kill().unwrap();
 }
-
-

+ 111 - 1
src/io/bam.rs

@@ -1,13 +1,16 @@
 use anyhow::Context;
 use bam::{
-    record::tags::{StringType, TagValue}, BamReader, Record, RecordWriter
+    record::tags::{StringType, TagValue},
+    BamReader, Record, RecordWriter,
 };
 use log::{info, warn};
 // use nom::AsBytes;
 use std::{
     collections::HashMap,
     fs,
+    io::{BufRead, BufReader, Read},
     path::{Path, PathBuf},
+    process::{Command, Stdio},
 };
 use uuid::Uuid;
 
@@ -155,4 +158,111 @@ pub fn create_bam_leave_two_out(input_path: &str) -> anyhow::Result<Vec<PathBuf>
     Ok(output_paths)
 }
 
+// pub fn remap_bam(reference: &str, input_seq: &str, output_bam: &str) -> anyhow::Result<()> {
+//     duct::cmd!("bwa", "index", reference).run()?;
+//     let bwa = format!("bwa mem {reference} {input_seq}");
+//     let samtools = "samtools sort /dev/stdin";
+//     let pipe = format!("{bwa} | {samtools} > {output_bam}");
+//     duct::cmd!("bash", "-c", pipe).run()?;
+//     Ok(())
+// }
+pub fn remap_bam(reference: &str, input_seq: &str, output_bam: &str) -> anyhow::Result<()> {
+    info!("Remaping {input_seq} to {reference} into {output_bam}");
+     // Index the reference
+    let index_output = Command::new("bwa")
+        .args(["index", reference])
+        .output()
+        .context("Failed to run bwa index")?;
 
+    if !index_output.status.success() {
+        anyhow::bail!("bwa index failed: {}", String::from_utf8_lossy(&index_output.stderr));
+    }
+
+    // Prepare the bwa mem command
+    let mut bwa_command = Command::new("bwa");
+    bwa_command.args(["mem", reference, input_seq]);
+    bwa_command.stdout(Stdio::piped());
+    bwa_command.stderr(Stdio::piped());
+
+    let mut bwa_process = bwa_command.spawn()
+        .context("Failed to spawn bwa mem command")?;
+
+    // Prepare the samtools sort command
+    let mut samtools_command = Command::new("samtools");
+    samtools_command.args(["sort", "/dev/stdin"]);
+    samtools_command.stdin(Stdio::piped());
+    samtools_command.stdout(Stdio::from(
+        std::fs::File::create(output_bam).context("Failed to create output BAM file")?
+    ));
+    samtools_command.stderr(Stdio::piped());
+
+    let mut samtools_process = samtools_command.spawn()
+        .context("Failed to spawn samtools command")?;
+
+    // Connect bwa stdout to samtools stdin
+    if let (Some(mut bwa_stdout), Some(mut samtools_stdin)) = (bwa_process.stdout.take(), samtools_process.stdin.take()) {
+        std::thread::spawn(move || {
+            std::io::copy(&mut bwa_stdout, &mut samtools_stdin)
+        });
+    }
+
+    // Capture stderr from bwa
+    let mut bwa_stderr = String::new();
+    bwa_process.stderr.take().unwrap().read_to_string(&mut bwa_stderr)?;
+
+    // Wait for bwa to finish
+    let bwa_status = bwa_process.wait().context("Failed to wait for bwa mem")?;
+    if !bwa_status.success() {
+        anyhow::bail!("bwa mem failed: {}", bwa_stderr);
+    }
+
+    // Capture stderr from samtools
+    let mut samtools_stderr = String::new();
+    samtools_process.stderr.take().unwrap().read_to_string(&mut samtools_stderr)?;
+
+    // Wait for samtools to finish
+    let samtools_status = samtools_process.wait().context("Failed to wait for samtools")?;
+    if !samtools_status.success() {
+        anyhow::bail!("samtools sort failed: {}", samtools_stderr);
+    }
+
+    fs::remove_file(format!("{reference}.amb"))?;
+    fs::remove_file(format!("{reference}.ann"))?;
+    fs::remove_file(format!("{reference}.bwt"))?;
+    fs::remove_file(format!("{reference}.pac"))?;
+    fs::remove_file(format!("{reference}.sa"))?;
+
+    // Log the captured stderr
+    // println!("bwa mem stderr: {}", bwa_stderr);
+    // println!("samtools sort stderr: {}", samtools_stderr);
+
+    Ok(())
+}
+
+pub fn run_modkit(bam_path: &str, contig_fa: &str, pileup_path: &str) -> anyhow::Result<()> {
+    info!("Running Modkit pileup for {bam_path}");
+    let mut cmd = Command::new("modkit")
+        .arg("pileup")
+        .arg("--with-header")
+        .arg("--cpg")
+        .arg("--ref")
+        .arg(contig_fa)
+        .arg(bam_path)
+        .arg(pileup_path)
+        .stderr(Stdio::piped())
+        .spawn()
+        .expect("Modkit pileup failed to start");
+
+    let stderr = cmd.stderr.take().unwrap();
+    let reader = BufReader::new(stderr);
+    reader
+        .lines()
+        .map_while(Result::ok)
+        // .inspect(|o| info!("{o}"))
+        .filter(|line| line.contains("ERROR"))
+        .for_each(|line| warn!("[Modkit pileup] {line}"));
+
+    cmd.wait().unwrap();
+    cmd.kill().unwrap();
+    Ok(())
+}

+ 51 - 17
src/io/fasta.rs

@@ -1,31 +1,66 @@
 use log::info;
-use noodles_fasta::{
-    record::{Definition, Sequence},
-    writer::Builder,
-    Record,
-};
-use std::fs::File;
+use seq_io::fasta::write_parts;
+use std::{fs::File, io::BufWriter};
+
+// pub fn records_to_fasta(r: &Vec<bam::Record>, fa_path: &str) -> anyhow::Result<()> {
+//     let file = File::create(fa_path).unwrap();
+//     info!("Writing fasta to {fa_path}");
+//     let mut writer = Builder::default().build_with_writer(file);
+//     let mut names = Vec::new();
+//     for bam_record in r {
+//         let name = bam_record.name();
+//         if !names.contains(&name) {
+//             names.push(name);
+//             let sequence = bam_record.sequence().to_vec();
+//             let record = Record::new(Definition::new(name, None), Sequence::from(sequence));
+//             writer.write_record(&record)?;
+//         }
+//     }
+//     Ok(())
+// }
 
 pub fn records_to_fasta(r: &Vec<bam::Record>, fa_path: &str) -> anyhow::Result<()> {
     let file = File::create(fa_path).unwrap();
+    let mut writer = BufWriter::new(file);
     info!("Writing fasta to {fa_path}");
-    let mut writer = Builder::default().build_with_writer(file);
     let mut names = Vec::new();
     for bam_record in r {
         let name = bam_record.name();
         if !names.contains(&name) {
-            names.push(name);
             let sequence = bam_record.sequence().to_vec();
-            let record = Record::new(Definition::new(name, None), Sequence::from(sequence));
-            writer.write_record(&record)?;
+            write_parts(&mut writer, name, None, &sequence)?;
+            names.push(name);
         }
     }
     Ok(())
 }
 
-pub fn write_fasta(output_fa: &str, d: &Vec<(String, String)>) {
+// pub fn write_fasta(output_fa: &str, d: &Vec<(String, String)>) {
+//     let file = File::create(output_fa).unwrap();
+//     let mut writer = Builder::default().build_with_writer(file);
+//     let mut passed = Vec::new();
+//     for (name, sequence) in d {
+//         let name = name.to_string();
+//         if sequence.is_empty() {
+//             continue;
+//         }
+//         if passed.contains(&name) {
+//             continue;
+//         }
+//         passed.push(name.clone());
+//         let record = Record::new(
+//             Definition::new(name.as_str(), None),
+//             Sequence::from(sequence.as_bytes().to_vec()),
+//         );
+//         writer.write_record(&record).unwrap();
+//     }
+// }
+
+pub fn write_fasta(output_fa: &str, d: &Vec<(String, String)>) -> anyhow::Result<()> {
     let file = File::create(output_fa).unwrap();
-    let mut writer = Builder::default().build_with_writer(file);
+    let mut writer = BufWriter::new(file);
+    info!("Writing fasta to {output_fa}");
+
     let mut passed = Vec::new();
     for (name, sequence) in d {
         let name = name.to_string();
@@ -35,13 +70,12 @@ pub fn write_fasta(output_fa: &str, d: &Vec<(String, String)>) {
         if passed.contains(&name) {
             continue;
         }
+        
+        write_parts(&mut writer, name.as_bytes(), None, sequence.as_bytes())?;
+
         passed.push(name.clone());
-        let record = Record::new(
-            Definition::new(name.as_str(), None),
-            Sequence::from(sequence.as_bytes().to_vec()),
-        );
-        writer.write_record(&record).unwrap();
     }
+    Ok(())
 }
 
 pub fn fai(path: &str) -> Result<std::process::Output, std::io::Error> {