Kaynağa Gözat

Pod5Info corupted file skipping

Thomas 3 ay önce
ebeveyn
işleme
4429cfcf4f
1 değiştirilmiş dosya ile 18 ekleme ve 6 silme
  1. 18 6
      src/collection/pod5.rs

+ 18 - 6
src/collection/pod5.rs

@@ -1,6 +1,7 @@
 use std::{
     collections::HashSet, fmt, fs, path::{Path, PathBuf}
 };
+use log::warn;
 
 use chrono::{DateTime, Utc};
 use serde::{Deserialize, Serialize};
@@ -113,10 +114,17 @@ impl Pod5sRun {
         let mut flow_cell_id: Option<String> = None;
         let mut sequencing_kit: Option<String> = None;
         let mut run_id: Option<String> = None;
+        let mut skipped = 0;
 
         for p in pod_paths.iter() {
-            let pod = Pod5::from_path(&p)
-                .map_err(|e| anyhow::anyhow!("Failed to parse POD5 file '{}': {:#}", p.display(), e))?;
+            let pod = match Pod5::from_path(&p) {
+                Ok(pod) => pod,
+                Err(e) => {
+                    warn!("Skipping corrupted POD5 file '{}': {}", p.display(), e);
+                    skipped += 1;
+                    continue;
+                }
+            };
             
             // run_id uniqueness check
             match &run_id {
@@ -163,9 +171,13 @@ impl Pod5sRun {
             pod5s.push(pod);
         }
 
-        let run_id = run_id.ok_or(anyhow::anyhow!("No pod5 files loaded"))?;
-        let flow_cell_id = flow_cell_id.ok_or(anyhow::anyhow!("No pod5 files loaded"))?;
-        let sequencing_kit = sequencing_kit.ok_or(anyhow::anyhow!("No pod5 files loaded"))?;
+        let run_id = run_id.ok_or(anyhow::anyhow!("No valid pod5 files loaded"))?;
+        let flow_cell_id = flow_cell_id.ok_or(anyhow::anyhow!("No valid pod5 files loaded"))?;
+        let sequencing_kit = sequencing_kit.ok_or(anyhow::anyhow!("No valid pod5 files loaded"))?;
+
+        if skipped > 0 {
+            eprintln!("⚠️  Skipped {} corrupted POD5 file(s)", skipped);
+        }
 
         Ok(Self {
             run_id,
@@ -380,7 +392,7 @@ mod tests {
     fn load_pod5s() -> anyhow::Result<()> {
         test_init();
 
-        let dir = "/mnt/beegfs02/scratch/t_steimle/prom_runs/A/20251117_0915_P2I-00461-A_PBI55810_22582b29/pod5_recovered";
+        let dir = "/mnt/beegfs02/scratch/t_steimle/prom_runs/B/20251117_0915_P2I-00461-B_PBI54633_d4d7692a/pod5_recovered";
         let saved_runs = "~/data/seq_runs_cases.json";