|
|
@@ -6,6 +6,7 @@
|
|
|
//! in a single atomic pass.
|
|
|
|
|
|
use std::{
|
|
|
+ collections::HashSet,
|
|
|
fs::{self, File, OpenOptions},
|
|
|
io::{BufWriter, Write},
|
|
|
path::{Path, PathBuf},
|
|
|
@@ -217,6 +218,10 @@ pub struct BgzTabixWriter {
|
|
|
tmp_path: PathBuf,
|
|
|
output_path: String,
|
|
|
guard: TempFileGuard,
|
|
|
+ // sort-order tracking
|
|
|
+ prev_rname: String,
|
|
|
+ prev_start: Option<Position>,
|
|
|
+ finished_rnames: HashSet<String>,
|
|
|
}
|
|
|
|
|
|
impl BgzTabixWriter {
|
|
|
@@ -264,6 +269,9 @@ impl BgzTabixWriter {
|
|
|
tmp_path,
|
|
|
output_path: output_path.to_string(),
|
|
|
guard,
|
|
|
+ prev_rname: String::new(),
|
|
|
+ prev_start: None,
|
|
|
+ finished_rnames: HashSet::new(),
|
|
|
})
|
|
|
}
|
|
|
|
|
|
@@ -286,6 +294,11 @@ impl BgzTabixWriter {
|
|
|
/// Tabix. For BED files (0-based half-open), use [`write_bed_record`](Self::write_bed_record)
|
|
|
/// instead.
|
|
|
///
|
|
|
+ /// Enforces tabix sort order: within a contig records must be in ascending
|
|
|
+ /// start order; a contig may not reappear after another contig has begun.
|
|
|
+ /// Violations return an error immediately — the [`TempFileGuard`] ensures
|
|
|
+ /// the partial temp file is cleaned up automatically.
|
|
|
+ ///
|
|
|
/// # Arguments
|
|
|
///
|
|
|
/// * `line` - Raw bytes of the line (including the trailing newline)
|
|
|
@@ -295,7 +308,7 @@ impl BgzTabixWriter {
|
|
|
///
|
|
|
/// # Errors
|
|
|
///
|
|
|
- /// Returns an error if the write or index insertion fails.
|
|
|
+ /// Returns an error if the sort order is violated or if the write fails.
|
|
|
pub fn write_record(
|
|
|
&mut self,
|
|
|
line: &[u8],
|
|
|
@@ -303,6 +316,32 @@ impl BgzTabixWriter {
|
|
|
start: Position,
|
|
|
end: Position,
|
|
|
) -> anyhow::Result<()> {
|
|
|
+ if rname == self.prev_rname {
|
|
|
+ if let Some(prev) = self.prev_start {
|
|
|
+ if start < prev {
|
|
|
+ anyhow::bail!(
|
|
|
+ "BED/tabix input is not sorted: {rname} position {} comes after {} \
|
|
|
+ (tabix requires records sorted by contig then start position)",
|
|
|
+ usize::from(start),
|
|
|
+ usize::from(prev),
|
|
|
+ );
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ if self.finished_rnames.contains(rname) {
|
|
|
+ anyhow::bail!(
|
|
|
+ "BED/tabix input is not sorted: contig {rname} appears non-contiguously \
|
|
|
+ (tabix requires all records for the same contig to be consecutive)"
|
|
|
+ );
|
|
|
+ }
|
|
|
+ if !self.prev_rname.is_empty() {
|
|
|
+ self.finished_rnames.insert(self.prev_rname.clone());
|
|
|
+ }
|
|
|
+ self.prev_rname = rname.to_string();
|
|
|
+ self.prev_start = None;
|
|
|
+ }
|
|
|
+ self.prev_start = Some(start);
|
|
|
+
|
|
|
let chunk_start = self.writer.virtual_position();
|
|
|
self.writer.write_all(line).context("failed writing record")?;
|
|
|
let chunk_end = self.writer.virtual_position();
|
|
|
@@ -366,7 +405,7 @@ impl BgzTabixWriter {
|
|
|
///
|
|
|
/// Returns an error if BGZF finalisation, index writing, or the rename fails.
|
|
|
pub fn finish(self) -> anyhow::Result<()> {
|
|
|
- let Self { writer, indexer, format, tmp_path, output_path, mut guard } = self;
|
|
|
+ let Self { writer, indexer, format, tmp_path, output_path, mut guard, .. } = self;
|
|
|
|
|
|
let tmp_str = tmp_path.to_string_lossy().to_string();
|
|
|
|