Ver código fonte

pandora file reader now accept any reader with Trait

STEIMLE Thomas 14 horas atrás
pai
commit
c3d27c4862
2 arquivos alterados com 73 adições e 33 exclusões
  1. 0 4
      Cargo.lock
  2. 73 29
      src/io/somaticpipe_container.rs

+ 0 - 4
Cargo.lock

@@ -1376,8 +1376,6 @@ dependencies = [
 [[package]]
 name = "hts-sys"
 version = "2.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e38d7f1c121cd22aa214cb4dadd4277dc5447391eac518b899b29ba6356fbbb2"
 dependencies = [
  "bindgen",
  "bzip2-sys",
@@ -2630,8 +2628,6 @@ dependencies = [
 [[package]]
 name = "rust-htslib"
 version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f22161678c3d72e6434c5f3383325dbf88c3cacce665f0c7b4b077fc6e957ba9"
 dependencies = [
  "bio-types",
  "byteorder",

+ 73 - 29
src/io/somaticpipe_container.rs

@@ -685,6 +685,31 @@ pub struct PandoraSectionValidation {
     pub error: Option<String>,
 }
 
+/// Object-safe `Read + Seek`, so sources can hand back a boxed reader.
+pub trait ReadSeek: Read + Seek {}
+impl<T: Read + Seek> ReadSeek for T {}
+
+/// A source that produces a fresh `Read + Seek` over a `.pandora` container.
+///
+/// `PandoraReader` reads each section by opening a reader and seeking to the
+/// section offset, so a source is a *factory*: local files re-`File::open`,
+/// while a remote source (e.g. SMB) can hand back a new cursor over a
+/// kept-open handle. Must be `Send + Sync` so `PandoraReader` stays so.
+pub trait SectionSource: Send + Sync {
+    fn open(&self) -> anyhow::Result<Box<dyn ReadSeek>>;
+}
+
+/// Default local-file section source.
+struct PathSource(PathBuf);
+
+impl SectionSource for PathSource {
+    fn open(&self) -> anyhow::Result<Box<dyn ReadSeek>> {
+        let file = File::open(&self.0)
+            .with_context(|| format!("failed to open {}", self.0.display()))?;
+        Ok(Box::new(file))
+    }
+}
+
 /// High-level random-access reader for `.pandora` containers.
 ///
 /// The reader provides:
@@ -695,9 +720,8 @@ pub struct PandoraSectionValidation {
 /// - Integrity-aware decoding
 ///
 /// Sections are decoded on demand.
-#[derive(Debug, Clone)]
 pub struct PandoraReader {
-    path: PathBuf,
+    source: Box<dyn SectionSource>,
     prelude: ContainerPrelude,
     header: ContainerHeader,
 }
@@ -723,20 +747,21 @@ impl PandoraReader {
     /// - The header checksum fails
     /// - The container structure is malformed
     pub fn open(path: impl AsRef<Path>) -> anyhow::Result<Self> {
-        let path = path.as_ref().to_path_buf();
-        let (prelude, header) = read_header(&path)?;
+        Self::from_source(Box::new(PathSource(path.as_ref().to_path_buf())))
+    }
+
+    /// Opens a container from an arbitrary section source (local file, SMB, …).
+    /// The header is read eagerly; section payloads stay lazy.
+    pub fn from_source(source: Box<dyn SectionSource>) -> anyhow::Result<Self> {
+        let mut reader = source.open()?;
+        let (prelude, header) = read_header_from_reader(&mut reader)?;
         Ok(Self {
-            path,
+            source,
             prelude,
             header,
         })
     }
 
-    /// Returns the underlying container path.
-    pub fn path(&self) -> &Path {
-        &self.path
-    }
-
     /// Returns the decoded container prelude.
     pub fn prelude(&self) -> &ContainerPrelude {
         &self.prelude
@@ -773,7 +798,8 @@ impl PandoraReader {
         let Some(descriptor) = self.section_descriptor(name).cloned() else {
             return Ok(None);
         };
-        read_described_section(&self.path, descriptor).map(Some)
+        let mut reader = self.source.open()?;
+        read_described_section_from_reader(&mut reader, descriptor).map(Some)
     }
 
     /// Reads a section and fails if it is missing.
@@ -815,7 +841,8 @@ impl PandoraReader {
     ///
     /// Only the required compressed block is decoded.
     pub fn read_variant_record(&self, pointer: &VariantRecordPointer) -> anyhow::Result<Variant> {
-        read_variant_record_from_path(&self.path, &self.header, pointer)
+        let mut reader = self.source.open()?;
+        read_variant_record_from_reader(&mut reader, &self.header, pointer)
     }
 
     /// Reads multiple lazily stored variant records.
@@ -826,7 +853,8 @@ impl PandoraReader {
         &self,
         pointers: &[VariantRecordPointer],
     ) -> anyhow::Result<Vec<Variant>> {
-        read_variant_records_from_path(&self.path, &self.header, pointers)
+        let mut reader = self.source.open()?;
+        read_variant_records_from_reader(&mut reader, &self.header, pointers)
     }
 
     /// Reads the optional copy-number payload.
@@ -1104,7 +1132,17 @@ pub fn write_container(
 pub fn read_header(path: impl AsRef<Path>) -> anyhow::Result<(ContainerPrelude, ContainerHeader)> {
     let mut reader = File::open(path.as_ref())
         .with_context(|| format!("failed to open {}", path.as_ref().display()))?;
-    let prelude = ContainerPrelude::from_reader(&mut reader)?;
+    read_header_from_reader(&mut reader)
+}
+
+/// Reads and validates the prelude + header from any `Read + Seek` source.
+/// The total container length (needed for layout validation) is obtained via
+/// `Seek`, so this works for non-file sources such as SMB. The reader must be
+/// positioned at the start of the container.
+pub fn read_header_from_reader<R: Read + Seek>(
+    reader: &mut R,
+) -> anyhow::Result<(ContainerPrelude, ContainerHeader)> {
+    let prelude = ContainerPrelude::from_reader(&mut *reader)?;
     validate_prelude(&prelude)?;
 
     let mut header_bytes = vec![0; prelude.header_len as usize];
@@ -1113,10 +1151,11 @@ pub fn read_header(path: impl AsRef<Path>) -> anyhow::Result<(ContainerPrelude,
 
     let header = decode_header(&header_bytes)?;
     validate_header(&header)?;
+    let total_len = reader.seek(SeekFrom::End(0))?;
     validate_section_layout(
         &header,
         (PANDORA_PRELUDE_LEN + header_bytes.len()) as u64,
-        reader.metadata()?.len(),
+        total_len,
     )?;
     Ok((prelude, header))
 }
@@ -1146,6 +1185,15 @@ fn read_described_section(
 ) -> anyhow::Result<DecodedSection> {
     let mut reader = File::open(path.as_ref())
         .with_context(|| format!("failed to open {}", path.as_ref().display()))?;
+    read_described_section_from_reader(&mut reader, descriptor)
+}
+
+/// Reads, checksum-verifies and decompresses a section from any `Read + Seek`
+/// source, seeking to the descriptor's offset first.
+fn read_described_section_from_reader<R: Read + Seek>(
+    reader: &mut R,
+    descriptor: SectionDescriptor,
+) -> anyhow::Result<DecodedSection> {
     reader.seek(SeekFrom::Start(descriptor.offset))?;
 
     let mut stored = vec![0; descriptor.length as usize];
@@ -1669,9 +1717,9 @@ fn encode_variant_index_arrow(
     Ok(payload)
 }
 
-/// Reads and decodes a single lazily stored variant record from disk.
-fn read_variant_record_from_path(
-    path: impl AsRef<Path>,
+/// Reads and decodes a single lazily stored variant record from any source.
+fn read_variant_record_from_reader<R: Read + Seek>(
+    reader: &mut R,
     header: &ContainerHeader,
     pointer: &VariantRecordPointer,
 ) -> anyhow::Result<Variant> {
@@ -1681,9 +1729,7 @@ fn read_variant_record_from_path(
 
     ensure_variant_record_pointer(descriptor, pointer)?;
 
-    let mut reader = File::open(path.as_ref())
-        .with_context(|| format!("failed to open {}", path.as_ref().display()))?;
-    let block = read_variant_record_block(&mut reader, descriptor, pointer)?;
+    let block = read_variant_record_block(&mut *reader, descriptor, pointer)?;
 
     decode_variant_record_from_block(&block, pointer)
 }
@@ -1691,8 +1737,8 @@ fn read_variant_record_from_path(
 /// Reads multiple lazily stored variant records with shared block caching.
 ///
 /// Compressed blocks are decoded at most once during the operation.
-fn read_variant_records_from_path(
-    path: impl AsRef<Path>,
+fn read_variant_records_from_reader<R: Read + Seek>(
+    reader: &mut R,
     header: &ContainerHeader,
     pointers: &[VariantRecordPointer],
 ) -> anyhow::Result<Vec<Variant>> {
@@ -1703,8 +1749,6 @@ fn read_variant_records_from_path(
         ensure_variant_record_pointer(descriptor, pointer)?;
     }
 
-    let mut reader = File::open(path.as_ref())
-        .with_context(|| format!("failed to open {}", path.as_ref().display()))?;
     let mut block_cache: BTreeMap<(u64, u64, u64), Vec<u8>> = BTreeMap::new();
     let mut variants = Vec::with_capacity(pointers.len());
 
@@ -1715,7 +1759,7 @@ fn read_variant_records_from_path(
             pointer.block_uncompressed_length,
         );
         if let std::collections::btree_map::Entry::Vacant(e) = block_cache.entry(block_key) {
-            let block = read_variant_record_block(&mut reader, descriptor, pointer)?;
+            let block = read_variant_record_block(&mut *reader, descriptor, pointer)?;
             e.insert(block);
         }
 
@@ -1728,9 +1772,9 @@ fn read_variant_records_from_path(
     Ok(variants)
 }
 
-/// Reads and decompresses a variant-record block.
-fn read_variant_record_block(
-    reader: &mut File,
+/// Reads and decompresses a variant-record block from any `Read + Seek` source.
+fn read_variant_record_block<R: Read + Seek>(
+    reader: &mut R,
     descriptor: &SectionDescriptor,
     pointer: &VariantRecordPointer,
 ) -> anyhow::Result<Vec<u8>> {