|
@@ -6,8 +6,8 @@
|
|
|
use std::{
|
|
use std::{
|
|
|
collections::BTreeMap,
|
|
collections::BTreeMap,
|
|
|
fs::File,
|
|
fs::File,
|
|
|
- io::{Read, Seek, SeekFrom, Write},
|
|
|
|
|
- path::Path,
|
|
|
|
|
|
|
+ io::{Cursor, Read, Seek, SeekFrom, Write},
|
|
|
|
|
+ path::{Path, PathBuf},
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
use anyhow::{bail, Context};
|
|
use anyhow::{bail, Context};
|
|
@@ -251,6 +251,141 @@ pub struct DecodedSection {
|
|
|
pub payload: Vec<u8>,
|
|
pub payload: Vec<u8>,
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
|
|
|
+pub struct PandoraSummary {
|
|
|
|
|
+ pub prelude: ContainerPrelude,
|
|
|
|
|
+ pub header: ContainerHeader,
|
|
|
|
|
+ pub sections: Vec<PandoraSectionSummary>,
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
|
|
|
|
+pub struct PandoraSectionSummary {
|
|
|
|
|
+ pub name: SectionName,
|
|
|
|
|
+ pub kind: SectionKind,
|
|
|
|
|
+ pub offset: u64,
|
|
|
|
|
+ pub length: u64,
|
|
|
|
|
+ pub checksum: String,
|
|
|
|
|
+ pub required: bool,
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+#[derive(Debug, Clone)]
|
|
|
|
|
+pub struct PandoraReader {
|
|
|
|
|
+ path: PathBuf,
|
|
|
|
|
+ prelude: ContainerPrelude,
|
|
|
|
|
+ header: ContainerHeader,
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+impl PandoraReader {
|
|
|
|
|
+ pub fn open(path: impl AsRef<Path>) -> anyhow::Result<Self> {
|
|
|
|
|
+ let path = path.as_ref().to_path_buf();
|
|
|
|
|
+ let (prelude, header) = read_header(&path)?;
|
|
|
|
|
+ Ok(Self {
|
|
|
|
|
+ path,
|
|
|
|
|
+ prelude,
|
|
|
|
|
+ header,
|
|
|
|
|
+ })
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ pub fn path(&self) -> &Path {
|
|
|
|
|
+ &self.path
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ pub fn prelude(&self) -> &ContainerPrelude {
|
|
|
|
|
+ &self.prelude
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ pub fn header(&self) -> &ContainerHeader {
|
|
|
|
|
+ &self.header
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ pub fn section_descriptor(&self, name: &SectionName) -> Option<&SectionDescriptor> {
|
|
|
|
|
+ self.header.section(name)
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ pub fn summary(&self) -> PandoraSummary {
|
|
|
|
|
+ PandoraSummary::from_parts(self.prelude.clone(), self.header.clone())
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ pub fn read_section(&self, name: &SectionName) -> anyhow::Result<Option<DecodedSection>> {
|
|
|
|
|
+ let Some(descriptor) = self.section_descriptor(name).cloned() else {
|
|
|
|
|
+ return Ok(None);
|
|
|
|
|
+ };
|
|
|
|
|
+ read_described_section(&self.path, descriptor).map(Some)
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ pub fn read_required_section(&self, name: &SectionName) -> anyhow::Result<DecodedSection> {
|
|
|
|
|
+ self.read_section(name)?
|
|
|
|
|
+ .with_context(|| format!("missing required section: {name:?}"))
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ pub fn variants(&self) -> anyhow::Result<Variants> {
|
|
|
|
|
+ let section = self.read_required_section(&SectionName::Variants)?;
|
|
|
|
|
+ decode_variants_section(§ion)
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ pub fn copy_number(&self) -> anyhow::Result<Option<SavanaCN>> {
|
|
|
|
|
+ self.read_section(&SectionName::CopyNumber)?
|
|
|
|
|
+ .map(|section| decode_copy_number_section(§ion))
|
|
|
|
|
+ .transpose()
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ pub fn bam_qc(&self) -> anyhow::Result<Option<BamQcPayload>> {
|
|
|
|
|
+ self.read_section(&SectionName::BamQc)?
|
|
|
|
|
+ .map(|section| decode_bam_qc_section(§ion))
|
|
|
|
|
+ .transpose()
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ pub fn pipe_qc(&self) -> anyhow::Result<Option<PipeQcPayload>> {
|
|
|
|
|
+ self.read_section(&SectionName::PipeQc)?
|
|
|
|
|
+ .map(|section| decode_pipe_qc_section(§ion))
|
|
|
|
|
+ .transpose()
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ pub fn provenance(&self) -> anyhow::Result<Option<ProvenancePayload>> {
|
|
|
|
|
+ self.read_section(&SectionName::Provenance)?
|
|
|
|
|
+ .map(|section| decode_provenance_section(§ion))
|
|
|
|
|
+ .transpose()
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+impl PandoraSummary {
|
|
|
|
|
+ pub fn from_parts(prelude: ContainerPrelude, header: ContainerHeader) -> Self {
|
|
|
|
|
+ let sections = header
|
|
|
|
|
+ .sections
|
|
|
|
|
+ .iter()
|
|
|
|
|
+ .map(PandoraSectionSummary::from)
|
|
|
|
|
+ .collect();
|
|
|
|
|
+
|
|
|
|
|
+ Self {
|
|
|
|
|
+ prelude,
|
|
|
|
|
+ header,
|
|
|
|
|
+ sections,
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+impl From<&SectionDescriptor> for PandoraSectionSummary {
|
|
|
|
|
+ fn from(section: &SectionDescriptor) -> Self {
|
|
|
|
|
+ Self {
|
|
|
|
|
+ name: section.name.clone(),
|
|
|
|
|
+ kind: section.kind.clone(),
|
|
|
|
|
+ offset: section.offset,
|
|
|
|
|
+ length: section.length,
|
|
|
|
|
+ checksum: section.checksum.clone(),
|
|
|
|
|
+ required: section.required,
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+pub fn open_pandora(path: impl AsRef<Path>) -> anyhow::Result<PandoraReader> {
|
|
|
|
|
+ PandoraReader::open(path)
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+pub fn read_pandora_summary(path: impl AsRef<Path>) -> anyhow::Result<PandoraSummary> {
|
|
|
|
|
+ let (prelude, header) = read_header(path)?;
|
|
|
|
|
+ Ok(PandoraSummary::from_parts(prelude, header))
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
pub fn encode_header(header: &ContainerHeader) -> anyhow::Result<Vec<u8>> {
|
|
pub fn encode_header(header: &ContainerHeader) -> anyhow::Result<Vec<u8>> {
|
|
|
let packed = rmp_serde::to_vec_named(header)?;
|
|
let packed = rmp_serde::to_vec_named(header)?;
|
|
|
zstd::bulk::compress(&packed, header.compression.level.unwrap_or(3))
|
|
zstd::bulk::compress(&packed, header.compression.level.unwrap_or(3))
|
|
@@ -258,7 +393,7 @@ pub fn encode_header(header: &ContainerHeader) -> anyhow::Result<Vec<u8>> {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
pub fn decode_header(bytes: &[u8]) -> anyhow::Result<ContainerHeader> {
|
|
pub fn decode_header(bytes: &[u8]) -> anyhow::Result<ContainerHeader> {
|
|
|
- let unpacked = zstd::bulk::decompress(bytes, usize::MAX)
|
|
|
|
|
|
|
+ let unpacked = zstd::stream::decode_all(Cursor::new(bytes))
|
|
|
.context("failed to decompress .pandora header")?;
|
|
.context("failed to decompress .pandora header")?;
|
|
|
rmp_serde::from_slice(&unpacked).context("failed to decode .pandora header")
|
|
rmp_serde::from_slice(&unpacked).context("failed to decode .pandora header")
|
|
|
}
|
|
}
|
|
@@ -283,7 +418,7 @@ pub fn write_container(
|
|
|
.map(encode_pending_section)
|
|
.map(encode_pending_section)
|
|
|
.collect::<anyhow::Result<Vec<_>>>()?;
|
|
.collect::<anyhow::Result<Vec<_>>>()?;
|
|
|
|
|
|
|
|
- let (header_bytes, descriptors) = finalize_header_sections(&header, &stored_sections)?;
|
|
|
|
|
|
|
+ let (_, descriptors) = finalize_header_sections(&header, &stored_sections)?;
|
|
|
header.sections = descriptors;
|
|
header.sections = descriptors;
|
|
|
let header_bytes = encode_header(&header)?;
|
|
let header_bytes = encode_header(&header)?;
|
|
|
let header_checksum = blake3::hash(&header_bytes);
|
|
let header_checksum = blake3::hash(&header_bytes);
|
|
@@ -328,6 +463,13 @@ pub fn read_section(
|
|
|
return Ok(None);
|
|
return Ok(None);
|
|
|
};
|
|
};
|
|
|
|
|
|
|
|
|
|
+ read_described_section(path, descriptor).map(Some)
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+fn read_described_section(
|
|
|
|
|
+ path: impl AsRef<Path>,
|
|
|
|
|
+ descriptor: SectionDescriptor,
|
|
|
|
|
+) -> anyhow::Result<DecodedSection> {
|
|
|
let mut reader = File::open(path.as_ref())
|
|
let mut reader = File::open(path.as_ref())
|
|
|
.with_context(|| format!("failed to open {}", path.as_ref().display()))?;
|
|
.with_context(|| format!("failed to open {}", path.as_ref().display()))?;
|
|
|
reader.seek(SeekFrom::Start(descriptor.offset))?;
|
|
reader.seek(SeekFrom::Start(descriptor.offset))?;
|
|
@@ -345,10 +487,10 @@ pub fn read_section(
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
let payload = decode_section_payload(&stored, &descriptor.compression)?;
|
|
let payload = decode_section_payload(&stored, &descriptor.compression)?;
|
|
|
- Ok(Some(DecodedSection {
|
|
|
|
|
|
|
+ Ok(DecodedSection {
|
|
|
descriptor,
|
|
descriptor,
|
|
|
payload,
|
|
payload,
|
|
|
- }))
|
|
|
|
|
|
|
+ })
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
pub fn read_required_section(
|
|
pub fn read_required_section(
|
|
@@ -553,7 +695,7 @@ fn decode_section_payload(
|
|
|
) -> anyhow::Result<Vec<u8>> {
|
|
) -> anyhow::Result<Vec<u8>> {
|
|
|
match compression.algorithm {
|
|
match compression.algorithm {
|
|
|
CompressionAlgorithm::None => Ok(bytes.to_vec()),
|
|
CompressionAlgorithm::None => Ok(bytes.to_vec()),
|
|
|
- CompressionAlgorithm::Zstd => zstd::bulk::decompress(bytes, usize::MAX)
|
|
|
|
|
|
|
+ CompressionAlgorithm::Zstd => zstd::stream::decode_all(Cursor::new(bytes))
|
|
|
.context("failed to zstd-decompress .pandora section"),
|
|
.context("failed to zstd-decompress .pandora section"),
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
@@ -675,6 +817,31 @@ mod tests {
|
|
|
Ok(())
|
|
Ok(())
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ #[test]
|
|
|
|
|
+ fn reader_exposes_summary_and_sections() -> anyhow::Result<()> {
|
|
|
|
|
+ let path = std::env::temp_dir().join(format!("{}.pandora", uuid::Uuid::new_v4()));
|
|
|
|
|
+ let payload = b"hello reader".to_vec();
|
|
|
|
|
+ let section = PendingSection::new(
|
|
|
|
|
+ SectionName::Provenance,
|
|
|
|
|
+ SectionKind::RawBytes,
|
|
|
|
|
+ payload.clone(),
|
|
|
|
|
+ );
|
|
|
|
|
+
|
|
|
|
|
+ write_container(&path, test_header(), vec![section])?;
|
|
|
|
|
+
|
|
|
|
|
+ let reader = PandoraReader::open(&path)?;
|
|
|
|
|
+ let summary = reader.summary();
|
|
|
|
|
+ assert_eq!(summary.header.sample.sample_id, "sample_001");
|
|
|
|
|
+ assert_eq!(summary.sections.len(), 1);
|
|
|
|
|
+ assert_eq!(summary.sections[0].name, SectionName::Provenance);
|
|
|
|
|
+
|
|
|
|
|
+ let read = reader.read_required_section(&SectionName::Provenance)?;
|
|
|
|
|
+ assert_eq!(read.payload, payload);
|
|
|
|
|
+
|
|
|
|
|
+ std::fs::remove_file(path)?;
|
|
|
|
|
+ Ok(())
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
#[test]
|
|
#[test]
|
|
|
fn rejects_bad_magic() -> anyhow::Result<()> {
|
|
fn rejects_bad_magic() -> anyhow::Result<()> {
|
|
|
let bytes = [0u8; PANDORA_PRELUDE_LEN];
|
|
let bytes = [0u8; PANDORA_PRELUDE_LEN];
|