Thomas 3 місяців тому
батько
коміт
79ef966d5d
1 змінених файлів з 49 додано та 19 видалено
  1. 49 19
      src/collection/pod5.rs

+ 49 - 19
src/collection/pod5.rs

@@ -1,7 +1,8 @@
 use std::{
-    collections::HashSet, fmt, fs, path::{Path, PathBuf}
+    collections::HashSet,
+    fmt, fs,
+    path::{Path, PathBuf},
 };
-use log::warn;
 
 use chrono::{DateTime, Utc};
 use serde::{Deserialize, Serialize};
@@ -44,17 +45,23 @@ impl Pod5 {
     /// corresponding fields in `Pod5`.
     pub fn from_path<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
         let path_ref = path.as_ref();
-        
+
         // Convert path to string, returning an error if it contains invalid UTF-8
-        let path_str = path_ref
-            .to_str()
-            .ok_or_else(|| anyhow::anyhow!("Path contains invalid UTF-8: {}", path_ref.display()))?;
-        
+        let path_str = path_ref.to_str().ok_or_else(|| {
+            anyhow::anyhow!("Path contains invalid UTF-8: {}", path_ref.display())
+        })?;
+
         // Pod5Info::from_pod5 now returns Result
         let info = Pod5Info::from_pod5(path_str)?;
-        
+
         let file_size = std::fs::metadata(path_ref)
-            .map_err(|e| anyhow::anyhow!("Failed to read metadata for '{}': {}", path_ref.display(), e))?
+            .map_err(|e| {
+                anyhow::anyhow!(
+                    "Failed to read metadata for '{}': {}",
+                    path_ref.display(),
+                    e
+                )
+            })?
             .len();
 
         Ok(Self {
@@ -105,9 +112,12 @@ impl Pod5sRun {
     /// Each file is parsed using `Pod5::from_path`.
     pub fn load_from_dir<P: AsRef<Path>>(dir: P) -> anyhow::Result<Self> {
         let pod_paths = list_files_with_ext(dir.as_ref(), "pod5")?;
-        
+
         if pod_paths.is_empty() {
-            anyhow::bail!("No .pod5 files found in directory: {}", dir.as_ref().display());
+            anyhow::bail!(
+                "No .pod5 files found in directory: {}",
+                dir.as_ref().display()
+            );
         }
 
         let mut pod5s = Vec::with_capacity(pod_paths.len());
@@ -120,12 +130,13 @@ impl Pod5sRun {
             let pod = match Pod5::from_path(&p) {
                 Ok(pod) => pod,
                 Err(e) => {
-                    warn!("Skipping corrupted POD5 file '{}': {}", p.display(), e);
+                    // Log corrupted files at debug level
+                    log::debug!("Skipping corrupted POD5 file '{}': {}", p.display(), e);
                     skipped += 1;
                     continue;
                 }
             };
-            
+
             // run_id uniqueness check
             match &run_id {
                 None => run_id = Some(pod.protocol_run_id.clone()),
@@ -176,7 +187,11 @@ impl Pod5sRun {
         let sequencing_kit = sequencing_kit.ok_or(anyhow::anyhow!("No valid pod5 files loaded"))?;
 
         if skipped > 0 {
-            warn!("Skipped {} corrupted POD5 file(s)", skipped);
+            log::debug!(
+                "Skipped {} corrupted POD5 file(s) in directory '{}'",
+                skipped,
+                dir.as_ref().display()
+            );
         }
 
         Ok(Self {
@@ -370,13 +385,29 @@ impl Pod5sRuns {
 
     /// Load metadata JSON and restore each run via scanning its directory.
     ///
-    /// Rebuilds `pod5s` by calling `load_from_dir` for each `dir`.
+    /// Rebuilds `pod5s` by calling `load_from_dir` for each `dir`,
+    /// but preserves the `cases` from the saved JSON.
     pub fn load_json<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
         let raw: Pod5sRuns = serde_json::from_str(&fs::read_to_string(path)?)?;
 
         let mut rebuilt = Pod5sRuns::new();
-        for r in raw.data {
-            rebuilt.add_from_dir(&r.dir)?;
+        for saved_run in raw.data {
+            // Rebuild pod5s from directory
+            match Pod5sRun::load_from_dir(&saved_run.dir) {
+                Ok(mut fresh_run) => {
+                    // Preserve the cases from the saved JSON
+                    fresh_run.cases = saved_run.cases;
+                    rebuilt.data.push(fresh_run);
+                }
+                Err(e) => {
+                    // Log error but continue with other runs
+                    log::warn!(
+                        "Failed to reload run from '{}': {}",
+                        saved_run.dir.display(),
+                        e
+                    );
+                }
+            }
         }
         Ok(rebuilt)
     }
@@ -392,10 +423,9 @@ mod tests {
     fn load_pod5s() -> anyhow::Result<()> {
         test_init();
 
-        let dir = "/mnt/beegfs02/scratch/t_steimle/prom_runs/B/20251117_0915_P2I-00461-B_PBI54633_d4d7692a/pod5_recovered";
+        let dir = "/mnt/beegfs02/scratch/t_steimle/prom_runs/A/20251117_0915_P2I-00461-A_PBI55810_22582b29/pod5_recovered";
         let saved_runs = "~/data/seq_runs_cases.json";
 
-
         let flow_cell = Pod5sRun::load_from_dir(dir)?;
         println!("{:#?}", flow_cell.pod5s.first());
         let stats = flow_cell.stats();