преди 2 месеца · e4ef7537d2
--- a/src/io/fastq.rs
+++ b/src/io/fastq.rs
@@ -1,35 +1,50 @@
 
				-use std::{io::Write, path::Path};
			
 
				+//! FASTQ writer from BAM/CRAM records.
			
 
				+
			
 
				+use std::{
			
 
				+    io::{BufWriter, Write},
			
 
				+    path::Path,
			
 
				+};
			
 
				 
			
 
				 use anyhow::Context;
			
 
				 use rust_htslib::bam;
			
 
				 
			
 
				-// ─── FASTQ writer ─────────────────────────────────────────────────────────────
			
 
				- 
			
 
				-/// Write htslib records to FASTQ.
			
 
				+/// Write a slice of BAM records to a FASTQ file.
			
 
				+///
			
 
				+/// - Sequence is taken as stored in the BAM record (always forward-strand,
			
 
				+///   regardless of alignment flags)
			
 
				+/// - Quality scores are converted from phred+0 (HTSlib internal) to phred+33 ASCII
			
 
				+/// - Records with empty sequences are silently skipped
			
 
				+///
			
 
				+/// # Arguments
			
 
				+///
			
 
				+/// * `records` - BAM records to write
			
 
				+/// * `out` - Destination FASTQ file path (created or overwritten)
			
 
				+///
			
 
				+/// # Errors
			
 
				 ///
			
 
				-/// - htslib always stores the forward-strand sequence regardless of flag
			
 
				-/// - Converts phred+0 qual to phred+33 ASCII
			
 
				-/// - Skips records with empty sequence
			
 
				+/// Returns an error if the file cannot be created or a read name is not valid UTF-8.
			
 
				 pub fn write_fastq(records: &[bam::Record], out: &Path) -> anyhow::Result<()> {
			
 
				-    let mut f = std::fs::File::create(out)
			
 
				-        .with_context(|| format!("Cannot create FASTQ: {}", out.display()))?;
			
 
				- 
			
 
				+    let mut f = BufWriter::new(
			
 
				+        std::fs::File::create(out)
			
 
				+            .with_context(|| format!("Cannot create FASTQ: {}", out.display()))?,
			
 
				+    );
			
 
				+
			
 
				     for rec in records {
			
 
				         let seq = rec.seq().as_bytes();
			
 
				         if seq.is_empty() {
			
 
				             continue;
			
 
				         }
			
 
				- 
			
 
				-        let name       = std::str::from_utf8(rec.qname()).context("Non-UTF8 read name")?;
			
 
				+
			
 
				+        let name = std::str::from_utf8(rec.qname()).context("Non-UTF8 read name")?;
			
 
				         let qual_ascii: Vec<u8> = rec.qual().iter().map(|&q| q + 33).collect();
			
 
				- 
			
 
				-        writeln!(f, "@{}", name)?;
			
 
				+
			
 
				+        writeln!(f, "@{name}")?;
			
 
				         f.write_all(&seq)?;
			
 
				         writeln!(f)?;
			
 
				         writeln!(f, "+")?;
			
 
				         f.write_all(&qual_ascii)?;
			
 
				         writeln!(f)?;
			
 
				     }
			
 
				- 
			
 
				+
			
 
				     Ok(())
			
 
				 }
			
--- a/src/io/mod.rs
+++ b/src/io/mod.rs
@@ -1,15 +1,51 @@
 
				-pub mod pod5_infos;
			
 
				-pub mod readers;
			
 
				-pub mod vcf;
			
 
				+//! File I/O for all genomic formats used by Pandora.
			
 
				+//!
			
 
				+//! # Coordinate convention
			
 
				+//!
			
 
				+//! All types and functions in this module use **0-based, half-open `[start, end)`**
			
 
				+//! coordinates unless explicitly documented otherwise. This matches the BED format,
			
 
				+//! Rust's `Range<u32>`, and the internal [`GenomeRange`](crate::positions::GenomeRange)
			
 
				+//! representation. Conversions to/from 1-based formats (GFF3, VCF POS, SAM POS,
			
 
				+//! Tabix positions) are handled internally and noted in each function's documentation.
			
 
				+//!
			
 
				+//! # BGZF vs standard gzip
			
 
				+//!
			
 
				+//! All `.gz` files are treated as **BGZF** (block gzip), not standard gzip.
			
 
				+//! BGZF is produced by `bgzip` and used by BAM, VCF.gz, BED.gz, etc.
			
 
				+//! Plain `gzip` output will not decompress correctly. See [`readers`] for details.
			
 
				+//!
			
 
				+//! # Submodules
			
 
				+//!
			
 
				+//! | Module | Purpose |
			
 
				+//! |--------|---------|
			
 
				+//! | [`bam`] | BAM/CRAM reading, SA-tag parsing, fold-back inversion detection |
			
 
				+//! | [`bed`] | BED file I/O, overlap queries, gene annotation, tabix compression |
			
 
				+//! | [`vcf`] | VCF file I/O with BGZF + Tabix index |
			
 
				+//! | [`fasta`] | Indexed FASTA access, contig splitting |
			
 
				+//! | [`gff`] | GFF3 feature range extraction |
			
 
				+//! | [`modkit`] | Modkit bedMethyl pileup parsing, epigenetic activity computation |
			
 
				+//! | [`straglr`] | Straglr STR genotyper TSV parsing |
			
 
				+//! | [`liftover`] | UCSC chain file parsing and coordinate liftover |
			
 
				+//! | [`readers`] | Generic BGZF/plain readers, Tabix region fetch (`fetch_tabix_lines_with`) |
			
 
				+//! | [`writers`] | BGZF writers, `BgzTabixWriter` for combined BGZF + Tabix output |
			
 
				+//! | [`tsv`] | `TsvLine` — reusable delimiter-agnostic line buffer (replaces `csv::ByteRecord`) |
			
 
				+//! | [`dict`] | Sequence dictionary (`.dict`) reader |
			
 
				+//! | [`fastq`] | FASTQ writer from BAM records |
			
 
				+//! | [`pod5_infos`] | POD5 run metadata extraction via Arrow IPC + flatbuffers |
			
 
				+//! | [`pod5_footer_generated`] | Auto-generated flatbuffers types for the POD5 footer |
			
 
				+
			
 
				+pub mod bam;
			
 
				 pub mod bed;
			
 
				 pub mod dict;
			
 
				 pub mod fasta;
			
 
				-pub mod pod5_footer_generated;
			
 
				+pub mod fastq;
			
 
				 pub mod gff;
			
 
				-pub mod bam;
			
 
				-pub mod writers;
			
 
				+pub mod liftover;
			
 
				+pub mod modkit;
			
 
				+pub mod pod5_footer_generated;
			
 
				+pub mod pod5_infos;
			
 
				+pub mod readers;
			
 
				 pub mod straglr;
			
 
				 pub mod tsv;
			
 
				-pub mod modkit;
			
 
				-pub mod liftover;
			
 
				-pub mod fastq;
			
 
				+pub mod vcf;
			
 
				+pub mod writers;