|
|
@@ -1,7 +1,8 @@
|
|
|
use std::{
|
|
|
- collections::HashSet, fmt, fs, path::{Path, PathBuf}
|
|
|
+ collections::HashSet,
|
|
|
+ fmt, fs,
|
|
|
+ path::{Path, PathBuf},
|
|
|
};
|
|
|
-use log::warn;
|
|
|
|
|
|
use chrono::{DateTime, Utc};
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
@@ -44,17 +45,23 @@ impl Pod5 {
|
|
|
/// corresponding fields in `Pod5`.
|
|
|
pub fn from_path<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
|
|
|
let path_ref = path.as_ref();
|
|
|
-
|
|
|
+
|
|
|
// Convert path to string, returning an error if it contains invalid UTF-8
|
|
|
- let path_str = path_ref
|
|
|
- .to_str()
|
|
|
- .ok_or_else(|| anyhow::anyhow!("Path contains invalid UTF-8: {}", path_ref.display()))?;
|
|
|
-
|
|
|
+ let path_str = path_ref.to_str().ok_or_else(|| {
|
|
|
+ anyhow::anyhow!("Path contains invalid UTF-8: {}", path_ref.display())
|
|
|
+ })?;
|
|
|
+
|
|
|
// Pod5Info::from_pod5 now returns Result
|
|
|
let info = Pod5Info::from_pod5(path_str)?;
|
|
|
-
|
|
|
+
|
|
|
let file_size = std::fs::metadata(path_ref)
|
|
|
- .map_err(|e| anyhow::anyhow!("Failed to read metadata for '{}': {}", path_ref.display(), e))?
|
|
|
+ .map_err(|e| {
|
|
|
+ anyhow::anyhow!(
|
|
|
+ "Failed to read metadata for '{}': {}",
|
|
|
+ path_ref.display(),
|
|
|
+ e
|
|
|
+ )
|
|
|
+ })?
|
|
|
.len();
|
|
|
|
|
|
Ok(Self {
|
|
|
@@ -105,9 +112,12 @@ impl Pod5sRun {
|
|
|
/// Each file is parsed using `Pod5::from_path`.
|
|
|
pub fn load_from_dir<P: AsRef<Path>>(dir: P) -> anyhow::Result<Self> {
|
|
|
let pod_paths = list_files_with_ext(dir.as_ref(), "pod5")?;
|
|
|
-
|
|
|
+
|
|
|
if pod_paths.is_empty() {
|
|
|
- anyhow::bail!("No .pod5 files found in directory: {}", dir.as_ref().display());
|
|
|
+ anyhow::bail!(
|
|
|
+ "No .pod5 files found in directory: {}",
|
|
|
+ dir.as_ref().display()
|
|
|
+ );
|
|
|
}
|
|
|
|
|
|
let mut pod5s = Vec::with_capacity(pod_paths.len());
|
|
|
@@ -120,12 +130,13 @@ impl Pod5sRun {
|
|
|
let pod = match Pod5::from_path(&p) {
|
|
|
Ok(pod) => pod,
|
|
|
Err(e) => {
|
|
|
- warn!("Skipping corrupted POD5 file '{}': {}", p.display(), e);
|
|
|
+ // Log corrupted files at debug level
|
|
|
+ log::debug!("Skipping corrupted POD5 file '{}': {}", p.display(), e);
|
|
|
skipped += 1;
|
|
|
continue;
|
|
|
}
|
|
|
};
|
|
|
-
|
|
|
+
|
|
|
// run_id uniqueness check
|
|
|
match &run_id {
|
|
|
None => run_id = Some(pod.protocol_run_id.clone()),
|
|
|
@@ -176,7 +187,11 @@ impl Pod5sRun {
|
|
|
let sequencing_kit = sequencing_kit.ok_or(anyhow::anyhow!("No valid pod5 files loaded"))?;
|
|
|
|
|
|
if skipped > 0 {
|
|
|
- warn!("Skipped {} corrupted POD5 file(s)", skipped);
|
|
|
+ log::debug!(
|
|
|
+ "Skipped {} corrupted POD5 file(s) in directory '{}'",
|
|
|
+ skipped,
|
|
|
+ dir.as_ref().display()
|
|
|
+ );
|
|
|
}
|
|
|
|
|
|
Ok(Self {
|
|
|
@@ -370,13 +385,29 @@ impl Pod5sRuns {
|
|
|
|
|
|
/// Load metadata JSON and restore each run via scanning its directory.
|
|
|
///
|
|
|
- /// Rebuilds `pod5s` by calling `load_from_dir` for each `dir`.
|
|
|
+ /// Rebuilds `pod5s` by calling `load_from_dir` for each `dir`,
|
|
|
+ /// but preserves the `cases` from the saved JSON.
|
|
|
pub fn load_json<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
|
|
|
let raw: Pod5sRuns = serde_json::from_str(&fs::read_to_string(path)?)?;
|
|
|
|
|
|
let mut rebuilt = Pod5sRuns::new();
|
|
|
- for r in raw.data {
|
|
|
- rebuilt.add_from_dir(&r.dir)?;
|
|
|
+ for saved_run in raw.data {
|
|
|
+ // Rebuild pod5s from directory
|
|
|
+ match Pod5sRun::load_from_dir(&saved_run.dir) {
|
|
|
+ Ok(mut fresh_run) => {
|
|
|
+ // Preserve the cases from the saved JSON
|
|
|
+ fresh_run.cases = saved_run.cases;
|
|
|
+ rebuilt.data.push(fresh_run);
|
|
|
+ }
|
|
|
+ Err(e) => {
|
|
|
+ // Log error but continue with other runs
|
|
|
+ log::warn!(
|
|
|
+ "Failed to reload run from '{}': {}",
|
|
|
+ saved_run.dir.display(),
|
|
|
+ e
|
|
|
+ );
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
Ok(rebuilt)
|
|
|
}
|
|
|
@@ -392,10 +423,9 @@ mod tests {
|
|
|
fn load_pod5s() -> anyhow::Result<()> {
|
|
|
test_init();
|
|
|
|
|
|
- let dir = "/mnt/beegfs02/scratch/t_steimle/prom_runs/B/20251117_0915_P2I-00461-B_PBI54633_d4d7692a/pod5_recovered";
|
|
|
+ let dir = "/mnt/beegfs02/scratch/t_steimle/prom_runs/A/20251117_0915_P2I-00461-A_PBI55810_22582b29/pod5_recovered";
|
|
|
let saved_runs = "~/data/seq_runs_cases.json";
|
|
|
|
|
|
-
|
|
|
let flow_cell = Pod5sRun::load_from_dir(dir)?;
|
|
|
println!("{:#?}", flow_cell.pod5s.first());
|
|
|
let stats = flow_cell.stats();
|