|
@@ -1,15 +1,51 @@
|
|
|
-pub mod pod5_infos;
|
|
|
|
|
-pub mod readers;
|
|
|
|
|
-pub mod vcf;
|
|
|
|
|
|
|
+//! File I/O for all genomic formats used by Pandora.
|
|
|
|
|
+//!
|
|
|
|
|
+//! # Coordinate convention
|
|
|
|
|
+//!
|
|
|
|
|
+//! All types and functions in this module use **0-based, half-open `[start, end)`**
|
|
|
|
|
+//! coordinates unless explicitly documented otherwise. This matches the BED format,
|
|
|
|
|
+//! Rust's `Range<u32>`, and the internal [`GenomeRange`](crate::positions::GenomeRange)
|
|
|
|
|
+//! representation. Conversions to/from 1-based formats (GFF3, VCF POS, SAM POS,
|
|
|
|
|
+//! Tabix positions) are handled internally and noted in each function's documentation.
|
|
|
|
|
+//!
|
|
|
|
|
+//! # BGZF vs standard gzip
|
|
|
|
|
+//!
|
|
|
|
|
+//! All `.gz` files are treated as **BGZF** (block gzip), not standard gzip.
|
|
|
|
|
+//! BGZF is produced by `bgzip` and used by BAM, VCF.gz, BED.gz, etc.
|
|
|
|
|
+//! Plain `gzip` output will not decompress correctly. See [`readers`] for details.
|
|
|
|
|
+//!
|
|
|
|
|
+//! # Submodules
|
|
|
|
|
+//!
|
|
|
|
|
+//! | Module | Purpose |
|
|
|
|
|
+//! |--------|---------|
|
|
|
|
|
+//! | [`bam`] | BAM/CRAM reading, SA-tag parsing, fold-back inversion detection |
|
|
|
|
|
+//! | [`bed`] | BED file I/O, overlap queries, gene annotation, tabix compression |
|
|
|
|
|
+//! | [`vcf`] | VCF file I/O with BGZF + Tabix index |
|
|
|
|
|
+//! | [`fasta`] | Indexed FASTA access, contig splitting |
|
|
|
|
|
+//! | [`gff`] | GFF3 feature range extraction |
|
|
|
|
|
+//! | [`modkit`] | Modkit bedMethyl pileup parsing, epigenetic activity computation |
|
|
|
|
|
+//! | [`straglr`] | Straglr STR genotyper TSV parsing |
|
|
|
|
|
+//! | [`liftover`] | UCSC chain file parsing and coordinate liftover |
|
|
|
|
|
+//! | [`readers`] | Generic BGZF/plain readers, Tabix region fetch (`fetch_tabix_lines_with`) |
|
|
|
|
|
+//! | [`writers`] | BGZF writers, `BgzTabixWriter` for combined BGZF + Tabix output |
|
|
|
|
|
+//! | [`tsv`] | `TsvLine` — reusable delimiter-agnostic line buffer (replaces `csv::ByteRecord`) |
|
|
|
|
|
+//! | [`dict`] | Sequence dictionary (`.dict`) reader |
|
|
|
|
|
+//! | [`fastq`] | FASTQ writer from BAM records |
|
|
|
|
|
+//! | [`pod5_infos`] | POD5 run metadata extraction via Arrow IPC + flatbuffers |
|
|
|
|
|
+//! | [`pod5_footer_generated`] | Auto-generated flatbuffers types for the POD5 footer |
|
|
|
|
|
+
|
|
|
|
|
+pub mod bam;
|
|
|
pub mod bed;
|
|
pub mod bed;
|
|
|
pub mod dict;
|
|
pub mod dict;
|
|
|
pub mod fasta;
|
|
pub mod fasta;
|
|
|
-pub mod pod5_footer_generated;
|
|
|
|
|
|
|
+pub mod fastq;
|
|
|
pub mod gff;
|
|
pub mod gff;
|
|
|
-pub mod bam;
|
|
|
|
|
-pub mod writers;
|
|
|
|
|
|
|
+pub mod liftover;
|
|
|
|
|
+pub mod modkit;
|
|
|
|
|
+pub mod pod5_footer_generated;
|
|
|
|
|
+pub mod pod5_infos;
|
|
|
|
|
+pub mod readers;
|
|
|
pub mod straglr;
|
|
pub mod straglr;
|
|
|
pub mod tsv;
|
|
pub mod tsv;
|
|
|
-pub mod modkit;
|
|
|
|
|
-pub mod liftover;
|
|
|
|
|
-pub mod fastq;
|
|
|
|
|
|
|
+pub mod vcf;
|
|
|
|
|
+pub mod writers;
|