|
|
@@ -1,3 +1,8 @@
|
|
|
+//! BGZF and plain-text file writer helpers.
|
|
|
+//!
|
|
|
+//! All BGZF writers produce files compatible with `bgzip`/HTSlib.
|
|
|
+//! Use [`finalize_bgzf_file`] to flush and sync every BGZF writer created here.
|
|
|
+
|
|
|
use std::{
|
|
|
fs::{self, File, OpenOptions},
|
|
|
io::{BufWriter, Write},
|
|
|
@@ -5,42 +10,32 @@ use std::{
|
|
|
};
|
|
|
|
|
|
use anyhow::Context;
|
|
|
-// use bgzip::{BGZFWriter, Compression};
|
|
|
use log::info;
|
|
|
-
|
|
|
-use crate::io::readers::get_reader;
|
|
|
-
|
|
|
-// pub fn get_gz_writer(path: &str, force: bool) -> anyhow::Result<BGZFWriter<File>> {
|
|
|
-// if !path.ends_with(".gz") {
|
|
|
-// anyhow::bail!("The file should end with gz");
|
|
|
-// }
|
|
|
-//
|
|
|
-// if force && Path::new(path).exists() {
|
|
|
-// fs::remove_file(path).with_context(|| anyhow::anyhow!("Failed to remove file: {path}"))?;
|
|
|
-// }
|
|
|
-//
|
|
|
-// let file = OpenOptions::new()
|
|
|
-// .write(true) // Open the file for writing
|
|
|
-// .create_new(true)
|
|
|
-// .truncate(true)
|
|
|
-// .open(path)
|
|
|
-// .with_context(|| anyhow::anyhow!("failed to open the file: {path}"))?;
|
|
|
-//
|
|
|
-// info!("Writing into {path}");
|
|
|
-// Ok(BGZFWriter::new(file, Compression::default()))
|
|
|
-// }
|
|
|
-
|
|
|
use noodles_bgzf as bgzf;
|
|
|
|
|
|
+use crate::{helpers::TempFileGuard, io::readers::get_reader};
|
|
|
+
|
|
|
+/// Create a new BGZF writer for `path` (must end with `.gz`).
|
|
|
+///
|
|
|
+/// If `force` is `true` and the file already exists it is removed first.
|
|
|
+/// Otherwise an existing file causes an error (`create_new` semantics).
|
|
|
+///
|
|
|
+/// Call [`finalize_bgzf_file`] on the returned writer to flush, sync, and
|
|
|
+/// write the BGZF EOF block.
|
|
|
+///
|
|
|
+/// # Errors
|
|
|
+///
|
|
|
+/// Returns an error if the path does not end in `.gz`, removal fails when
|
|
|
+/// `force` is set, or the file cannot be created.
|
|
|
pub fn get_gz_writer(
|
|
|
path: &str,
|
|
|
force: bool,
|
|
|
-) -> anyhow::Result<bgzf::io::Writer<BufWriter<std::fs::File>>> {
|
|
|
+) -> anyhow::Result<bgzf::io::Writer<BufWriter<File>>> {
|
|
|
if !path.ends_with(".gz") {
|
|
|
anyhow::bail!("file should end with .gz: {path}");
|
|
|
}
|
|
|
|
|
|
- if force && std::path::Path::new(path).exists() {
|
|
|
+ if force && Path::new(path).exists() {
|
|
|
fs::remove_file(path).with_context(|| format!("failed to remove file: {path}"))?;
|
|
|
}
|
|
|
|
|
|
@@ -53,47 +48,85 @@ pub fn get_gz_writer(
|
|
|
Ok(bgzf::io::Writer::new(BufWriter::new(file)))
|
|
|
}
|
|
|
|
|
|
+/// Create a new plain-text buffered writer for `path`.
|
|
|
+///
|
|
|
+/// Fails if the file already exists (`create_new` semantics).
|
|
|
+///
|
|
|
+/// # Errors
|
|
|
+///
|
|
|
+/// Returns an error if the file cannot be created.
|
|
|
pub fn get_writer(path: &str) -> anyhow::Result<BufWriter<File>> {
|
|
|
let file = OpenOptions::new()
|
|
|
- .write(true) // Open the file for writing
|
|
|
+ .write(true)
|
|
|
.create_new(true)
|
|
|
- .truncate(true)
|
|
|
.open(path)
|
|
|
- .with_context(|| anyhow::anyhow!("failed to open the file: {path}"))?;
|
|
|
+ .with_context(|| format!("failed to create file: {path}"))?;
|
|
|
|
|
|
info!("Writing into {path}");
|
|
|
Ok(BufWriter::new(file))
|
|
|
}
|
|
|
|
|
|
+/// Compress a plain-text file to BGZF, writing atomically via a UUID temp file.
|
|
|
+///
|
|
|
+/// The output is `<input>.gz`. If the output already exists and `force` is
|
|
|
+/// `false`, an error is returned before any writing begins. If `force` is
|
|
|
+/// `true`, the existing file is replaced.
|
|
|
+///
|
|
|
+/// A UUID-named temp file is created in the same directory as the output
|
|
|
+/// (same filesystem, guaranteeing an atomic rename). A [`TempFileGuard`] ensures
|
|
|
+/// the temp file is removed automatically on failure or panic.
|
|
|
+///
|
|
|
+/// # Arguments
|
|
|
+///
|
|
|
+/// * `input` - Path to the input file (plain text or BGZF as accepted by [`get_reader`])
|
|
|
+/// * `force` - Overwrite the output `.gz` if it already exists
|
|
|
+///
|
|
|
+/// # Errors
|
|
|
+///
|
|
|
+/// Returns an error if the output exists and `force` is `false`, the input
|
|
|
+/// cannot be read, the output cannot be written, or the atomic rename fails.
|
|
|
pub fn convert_bgz(input: impl AsRef<Path>, force: bool) -> anyhow::Result<()> {
|
|
|
let input = input.as_ref();
|
|
|
+ let output_path = format!("{}.gz", input.display());
|
|
|
|
|
|
- let mut reader = get_reader(&input.to_string_lossy())?;
|
|
|
- let mut writer = get_gz_writer(&format!("{}.gz", input.display()), force)?;
|
|
|
+ if !force && Path::new(&output_path).exists() {
|
|
|
+ anyhow::bail!("output already exists (use force=true to overwrite): {output_path}");
|
|
|
+ }
|
|
|
|
|
|
- std::io::copy(&mut reader, &mut writer)?;
|
|
|
+ let tmp_dir = input.parent().unwrap_or(Path::new("."));
|
|
|
+ let mut guard = TempFileGuard::new();
|
|
|
+ let tmp_path = guard.tmp_path(".gz", tmp_dir);
|
|
|
+ let tmp_str = tmp_path.to_string_lossy();
|
|
|
|
|
|
- writer
|
|
|
- .try_finish()
|
|
|
- .with_context(|| format!("failed finishing BGZF writer: {}", input.display()))?;
|
|
|
+ let mut reader = get_reader(&input.to_string_lossy())?;
|
|
|
+ let mut writer = get_gz_writer(&tmp_str, false)?;
|
|
|
|
|
|
- let mut inner = writer
|
|
|
- .finish()
|
|
|
- .with_context(|| format!("failed returning inner BGZF writer: {}", input.display()))?;
|
|
|
+ std::io::copy(&mut reader, &mut writer)
|
|
|
+ .with_context(|| format!("failed copying {} → {tmp_str}", input.display()))?;
|
|
|
|
|
|
- inner
|
|
|
- .flush()
|
|
|
- .with_context(|| format!("failed flushing inner writer: {}", input.display()))?;
|
|
|
+ finalize_bgzf_file(writer, &tmp_str)?;
|
|
|
|
|
|
- inner
|
|
|
- .into_inner()
|
|
|
- .with_context(|| format!("failed unwrapping BufWriter: {}", input.display()))?
|
|
|
- .sync_all()
|
|
|
- .with_context(|| format!("failed syncing file: {}", input.display()))?;
|
|
|
+ if force && Path::new(&output_path).exists() {
|
|
|
+ fs::remove_file(&output_path)
|
|
|
+ .with_context(|| format!("failed to remove existing file: {output_path}"))?;
|
|
|
+ }
|
|
|
+
|
|
|
+ fs::rename(&tmp_path, &output_path)
|
|
|
+ .with_context(|| format!("failed to rename {tmp_str} → {output_path}"))?;
|
|
|
|
|
|
+ guard.disarm();
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
+/// Flush, sync, and write the BGZF EOF block for a writer created by [`get_gz_writer`].
|
|
|
+///
|
|
|
+/// Must be called after all data has been written; omitting it leaves the file
|
|
|
+/// without a valid BGZF terminator.
|
|
|
+///
|
|
|
+/// # Errors
|
|
|
+///
|
|
|
+/// Returns an error if flushing, unwrapping the inner `BufWriter`, or syncing
|
|
|
+/// the underlying file fails.
|
|
|
pub fn finalize_bgzf_file(
|
|
|
writer: bgzf::io::Writer<BufWriter<File>>,
|
|
|
path: &str,
|