|
|
@@ -288,109 +288,63 @@ impl FlowCells {
|
|
|
self.flow_cells = map.into_values().collect();
|
|
|
}
|
|
|
|
|
|
- /// Updates a JSON archive of `FlowCel` objects by scanning `.tar` archives in a directory.
|
|
|
- ///
|
|
|
- /// This function is used to **discover new archived flowcells** by scanning all `.tar` files
|
|
|
- /// in a given directory, parsing their contents using [`scan_archive`] and [`FlowCel::new`],
|
|
|
- /// and then appending the results to an existing JSON file (if present).
|
|
|
- /// Flowcells are **deduplicated** by `flowcell_id`, and the updated result is saved back to disk.
|
|
|
- ///
|
|
|
- /// # Arguments
|
|
|
- /// - `archive_path`: Path to a directory containing `.tar` archives produced by MinKNOW.
|
|
|
- /// - `save_path`: Path to a JSON file where the deduplicated list of `FlowCel` objects will be saved.
|
|
|
- ///
|
|
|
- /// # Behavior
|
|
|
- /// - If `save_path` exists, the function loads existing flowcells from it.
|
|
|
- /// - Then it scans all `.tar` files in `archive_path`, one by one:
|
|
|
- /// - Extracts `sample_sheet` and `.pod5` file metadata using [`scan_archive`]
|
|
|
- /// - Builds a new [`FlowCel`] using [`FlowCel::new`] with location `FlowCellLocation::Archived(...)`
|
|
|
- /// - Logs and skips entries that fail to parse
|
|
|
- /// - All new flowcells are added to the existing list and deduplicated.
|
|
|
- /// - The updated list is sorted and written back to `save_path`.
|
|
|
- ///
|
|
|
- /// # Deduplication
|
|
|
- /// - Flowcells are deduplicated using `.dedup_by_key(|fc| fc.flowcell_id.clone())`.
|
|
|
- /// - The last encountered entry is kept if duplicates exist.
|
|
|
- ///
|
|
|
- /// # Returns
|
|
|
- /// - `Ok(())` if scanning and update succeeds.
|
|
|
- /// - `Err` if the archive directory, `.tar` files, or save path cannot be processed.
|
|
|
- ///
|
|
|
- /// # Example
|
|
|
- /// ```
|
|
|
- /// update_archive_from_scan("archives/", "flowcell_cache.json")?;
|
|
|
- /// ```
|
|
|
- ///
|
|
|
- /// # See Also
|
|
|
- /// - [`scan_archive`]
|
|
|
- /// - [`FlowCell`]
|
|
|
- /// - [`FlowCellLocation::Archived`]
|
|
|
- pub fn update_archive_from_scan(archive_path: &str, save_path: &str) -> anyhow::Result<()> {
|
|
|
- // Load existing archive, if any
|
|
|
- let mut all: Vec<FlowCell> = if Path::new(save_path).exists() {
|
|
|
- let file = File::open(save_path)?;
|
|
|
- serde_json::from_reader(BufReader::new(file))
|
|
|
- .map_err(|e| anyhow::anyhow!("Failed to parse: {save_path}.\n\t{e}"))?
|
|
|
- } else {
|
|
|
- Vec::new()
|
|
|
- };
|
|
|
-
|
|
|
- let n_before = all.len();
|
|
|
- let pattern = format!("{archive_path}/*.tar");
|
|
|
-
|
|
|
- // Scan all .tar archives
|
|
|
- let res: Vec<FlowCell> = glob(&pattern)?
|
|
|
- .filter_map(Result::ok)
|
|
|
- .filter_map(|path| {
|
|
|
- let archive_str = path.to_string_lossy();
|
|
|
- let (sample_sheet, pore_activity, throughput, files) =
|
|
|
- match scan_archive(&archive_str) {
|
|
|
- Ok(r) => r,
|
|
|
+ /// Discover new archived flowcells in `archive_dir` and update the JSON at `store_path`.
|
|
|
+ ///
|
|
|
+ /// - Loads existing archive (gzip-aware) into `self`.
|
|
|
+ /// - Recursively scans **all** `.tar` files under `archive_dir`.
|
|
|
+ /// - For each archive, runs `scan_archive`, builds a `FlowCell` with
|
|
|
+ /// `FlowCellLocation::Archived(archive_dir)`, and merges it via `insert_flowcells`.
|
|
|
+ /// - Logs how many new flowcells were added.
|
|
|
+ /// - Saves back to `store_path` (gzip-compressed).
|
|
|
+ pub fn update_archive_from_scan(
|
|
|
+ &mut self,
|
|
|
+ archive_dir: &str,
|
|
|
+ store_path: &str,
|
|
|
+ ) -> anyhow::Result<()> {
|
|
|
+ // 1) Load whatever’s already in the archive (if present)
|
|
|
+ self.load_from_archive(store_path)
|
|
|
+ .with_context(|| format!("Loading existing archive from {}", store_path))?;
|
|
|
+
|
|
|
+ let before = self.flow_cells.len();
|
|
|
+ info!("Scanning '{}' for .tar archives…", archive_dir);
|
|
|
+
|
|
|
+ // 2) Find & process every .tar (recursive)
|
|
|
+ let pattern = format!("{}/*.tar", archive_dir);
|
|
|
+ let entries =
|
|
|
+ glob(&pattern).with_context(|| format!("Invalid glob pattern: {}", &pattern))?;
|
|
|
+ for entry in entries.filter_map(Result::ok) {
|
|
|
+ let path = entry.to_string_lossy();
|
|
|
+ match scan_archive(&path) {
|
|
|
+ Ok((sheet, pore, throughput, files)) => {
|
|
|
+ match FlowCell::new(
|
|
|
+ sheet,
|
|
|
+ pore,
|
|
|
+ throughput,
|
|
|
+ FlowCellLocation::Archived(archive_dir.to_string()),
|
|
|
+ files,
|
|
|
+ ) {
|
|
|
+ Ok(fc) => {
|
|
|
+ // merge & dedupe
|
|
|
+ self.insert_flowcells(vec![fc]);
|
|
|
+ }
|
|
|
Err(e) => {
|
|
|
- warn!("Failed to scan archive {}: {e}", archive_str);
|
|
|
- return None;
|
|
|
+ warn!("Failed to build FlowCell from '{}': {}", path, e);
|
|
|
}
|
|
|
- };
|
|
|
- match FlowCell::new(
|
|
|
- sample_sheet,
|
|
|
- pore_activity,
|
|
|
- throughput,
|
|
|
- FlowCellLocation::Archived(archive_path.to_string()),
|
|
|
- files,
|
|
|
- ) {
|
|
|
- Ok(fc) => Some(fc),
|
|
|
- Err(e) => {
|
|
|
- warn!("Failed to create FlowCel from {}: {e}", archive_str);
|
|
|
- None
|
|
|
}
|
|
|
}
|
|
|
- })
|
|
|
- .collect();
|
|
|
-
|
|
|
- // Merge, deduplicate, and write updated archive
|
|
|
- all.extend(res);
|
|
|
- all.sort_by(|a, b| {
|
|
|
- a.sample_sheet
|
|
|
- .flow_cell_id
|
|
|
- .cmp(&b.sample_sheet.flow_cell_id)
|
|
|
- });
|
|
|
- all.dedup_by_key(|v| v.sample_sheet.flow_cell_id.clone());
|
|
|
-
|
|
|
- let n_final = all.len();
|
|
|
- info!(
|
|
|
- "{} new archive(s) discovered.",
|
|
|
- n_final.saturating_sub(n_before)
|
|
|
- );
|
|
|
+ Err(e) => warn!("Failed to scan archive '{}': {}", path, e),
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- let json = serde_json::to_string_pretty(&all)
|
|
|
- .map_err(|e| anyhow::anyhow!("Can't convert into json.\n{e}"))?;
|
|
|
+ let added = self.flow_cells.len().saturating_sub(before);
|
|
|
+ info!("Discovered {} new archived flowcell(s).", added);
|
|
|
|
|
|
- fs::write(save_path, json)
|
|
|
- .map_err(|e| anyhow::anyhow!("Can't write file: {save_path}.\n{e}"))?;
|
|
|
+ // 3) Save the merged list back
|
|
|
+ self.save_to_archive(store_path)
|
|
|
+ .with_context(|| format!("Saving updated archive to {}", store_path))?;
|
|
|
|
|
|
Ok(())
|
|
|
}
|
|
|
-
|
|
|
/// Sorts `flow_cells` in-place from oldest (`modified` smallest) to newest.
|
|
|
pub fn sort_by_modified(&mut self) {
|
|
|
self.flow_cells.sort_by_key(|fc| fc.modified);
|