|
|
@@ -45,45 +45,27 @@ pub struct IdInput {
|
|
|
|
|
|
impl IdsInput {
|
|
|
/// Load `IdsInput` from a JSON file.
|
|
|
- ///
|
|
|
- /// # Arguments
|
|
|
- /// * `path` - Path to a JSON file containing an array of `IdInput`.
|
|
|
- ///
|
|
|
- /// # Errors
|
|
|
- /// Returns an error if the file cannot be opened or parsed.
|
|
|
- pub fn load_json(path: &str) -> anyhow::Result<Self> {
|
|
|
- let f = File::open(path)?;
|
|
|
- let s: Self = serde_json::from_reader(f)?;
|
|
|
- Ok(s)
|
|
|
+ pub fn load_json<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
|
|
|
+ let file = File::open(&path)?;
|
|
|
+ Ok(serde_json::from_reader(file)?)
|
|
|
}
|
|
|
|
|
|
- /// Save `IdsInput` to a JSON file.
|
|
|
- ///
|
|
|
- /// # Arguments
|
|
|
- /// * `path` - Destination file path.
|
|
|
- ///
|
|
|
- /// # Errors
|
|
|
- /// Returns an error if the file cannot be created or written.
|
|
|
- pub fn save_json(&self, path: &str) -> anyhow::Result<()> {
|
|
|
- let f = File::create(path)?;
|
|
|
- serde_json::to_writer_pretty(f, self)?;
|
|
|
+ /// Save `IdsInput` to a JSON file (pretty-printed).
|
|
|
+ pub fn save_json<P: AsRef<Path>>(&self, path: P) -> anyhow::Result<()> {
|
|
|
+ let file = File::create(&path)?;
|
|
|
+ serde_json::to_writer_pretty(file, self)?;
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
- /// Remove duplicate `IdInput` entries.
|
|
|
- ///
|
|
|
- /// Keeps the first occurrence of each unique `IdInput`.
|
|
|
+ /// Remove duplicate `IdInput` entries, retaining the first occurrence.
|
|
|
pub fn dedup(&mut self) {
|
|
|
- let mut unique = HashSet::new();
|
|
|
- self.data.retain(|item| unique.insert(item.clone()));
|
|
|
+ let mut seen = HashSet::new();
|
|
|
+ self.data.retain(|item| seen.insert(item.clone()));
|
|
|
}
|
|
|
|
|
|
- /// Add a new `IdInput` and deduplicate the collection.
|
|
|
- ///
|
|
|
- /// # Arguments
|
|
|
- /// * `values` - A new `IdInput` record.
|
|
|
- pub fn add_input(&mut self, values: IdInput) {
|
|
|
- self.data.push(values);
|
|
|
+ /// Add a new `IdInput` and deduplicate.
|
|
|
+ pub fn add_input(&mut self, entry: IdInput) {
|
|
|
+ self.data.push(entry);
|
|
|
self.dedup();
|
|
|
}
|
|
|
}
|
|
|
@@ -136,62 +118,146 @@ pub struct FlowCells {
|
|
|
}
|
|
|
|
|
|
impl FlowCells {
|
|
|
- /// Loads and merges `FlowCel` objects from both archive and local filesystem, deduplicating by `flowcell_id`.
|
|
|
+ /// Load and merge flowcells from archive and local directories, then annotate with `IdsInput`.
|
|
|
///
|
|
|
- /// This function combines flowcells from:
|
|
|
- /// - a precomputed archive (JSON),
|
|
|
- /// - and a dynamic scan of local run directories.
|
|
|
+ /// # Arguments
|
|
|
+ /// * `local_run_dir` - Root directory containing local flowcell run folders.
|
|
|
+ /// * `inputs_path` - Path to a JSON file with [`IdInput`] annotations.
|
|
|
+ /// * `archive_store_path` - Path to a cached JSON file of archived flowcells.
|
|
|
///
|
|
|
- /// The result is deduplicated by `flow_cell_id`, and enriched with case-level annotations
|
|
|
- /// from an `IdsInput` file.
|
|
|
+ /// # Returns
|
|
|
+ /// A deduplicated and annotated [`FlowCells`] collection.
|
|
|
///
|
|
|
- /// # Deduplication Logic
|
|
|
- /// If a flowcell appears in both sources, the one with the more recent `modified` timestamp is retained.
|
|
|
+ /// # Errors
|
|
|
+ /// Returns an error if any input file cannot be read or parsed, or if local scanning fails.
|
|
|
pub fn load(
|
|
|
local_run_dir: &str,
|
|
|
inputs_path: &str,
|
|
|
archive_store_path: &str,
|
|
|
) -> anyhow::Result<Self> {
|
|
|
- let mut merged_map: HashMap<String, FlowCell> = HashMap::new();
|
|
|
+ let mut instance = Self {
|
|
|
+ flow_cells: Vec::new(),
|
|
|
+ };
|
|
|
|
|
|
- // Load Vec<FlowCell> from archive if present
|
|
|
- if Path::new(archive_store_path).exists() {
|
|
|
- let file = File::open(archive_store_path)
|
|
|
- .with_context(|| format!("Failed to open File: {archive_store_path}"))?;
|
|
|
- let archived: Vec<FlowCell> = serde_json::from_reader(BufReader::new(file))
|
|
|
- .with_context(|| format!("Failed to parse FlowCells from: {archive_store_path}"))?;
|
|
|
+ instance.load_from_archive(archive_store_path)?;
|
|
|
+ instance.load_from_local(local_run_dir)?;
|
|
|
+ instance.annotate_with_inputs(inputs_path)?;
|
|
|
|
|
|
- for fc in archived {
|
|
|
- merged_map.insert(fc.sample_sheet.flow_cell_id.clone(), fc);
|
|
|
- }
|
|
|
+ Ok(instance)
|
|
|
+ }
|
|
|
+
|
|
|
+ /// Load flowcells from a cached archive and merge into `self.flow_cells`.
|
|
|
+ ///
|
|
|
+ /// Retains only the most recently modified entry for each `flowcell_id`.
|
|
|
+ ///
|
|
|
+ /// # Arguments
|
|
|
+ /// * `archive_path` - Path to archive JSON file.
|
|
|
+ ///
|
|
|
+ /// # Errors
|
|
|
+ /// Returns an error if the file cannot be read or parsed.
|
|
|
+ fn load_from_archive(&mut self, archive_path: &str) -> anyhow::Result<()> {
|
|
|
+ if !Path::new(archive_path).exists() {
|
|
|
+ return Ok(());
|
|
|
}
|
|
|
|
|
|
- // Scan local sample_sheets
|
|
|
- let sample_sheets = find_files(&format!("{local_run_dir}/**/sample_sheet*"))?;
|
|
|
- for sample_sheet_path in sample_sheets {
|
|
|
- let dir = sample_sheet_path.parent().ok_or_else(|| {
|
|
|
- anyhow::anyhow!(
|
|
|
- "Failed to get directory from path: {}",
|
|
|
- sample_sheet_path.display()
|
|
|
- )
|
|
|
- })?;
|
|
|
+ let file = File::open(archive_path)
|
|
|
+ .with_context(|| format!("Failed to open archive file: {archive_path}"))?;
|
|
|
+ let archived: Vec<FlowCell> = serde_json::from_reader(BufReader::new(file))
|
|
|
+ .with_context(|| format!("Failed to parse FlowCells from: {archive_path}"))?;
|
|
|
+
|
|
|
+ self.insert_flowcells(archived);
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+
|
|
|
+ /// Scan local run directories for flowcells and merge into `self.flow_cells`.
|
|
|
+ ///
|
|
|
+ /// Uses `scan_local()` to parse local metadata and selects the most recently modified
|
|
|
+ /// version if duplicates exist.
|
|
|
+ ///
|
|
|
+ /// # Arguments
|
|
|
+ /// * `local_dir` - Root directory containing flowcell runs.
|
|
|
+ ///
|
|
|
+ /// # Errors
|
|
|
+ /// Returns an error if scanning fails or if directory layout is invalid.
|
|
|
+ fn load_from_local(&mut self, local_dir: &str) -> anyhow::Result<()> {
|
|
|
+ let sample_sheets = find_files(&format!("{local_dir}/**/sample_sheet*"))?;
|
|
|
+
|
|
|
+ for path in sample_sheets {
|
|
|
+ let dir = path
|
|
|
+ .parent()
|
|
|
+ .ok_or_else(|| anyhow::anyhow!("Invalid sample_sheet path: {}", path.display()))?;
|
|
|
+ let dir_str = dir.to_string_lossy();
|
|
|
|
|
|
- let dir_str = dir.to_string_lossy().to_string();
|
|
|
+ let (sheet, pore, throughput, files) = scan_local(&dir_str)
|
|
|
+ .with_context(|| format!("Failed to scan local dir: {dir_str}"))?;
|
|
|
|
|
|
- let (sample_sheet, pore_activity, throughput, files) = scan_local(&dir_str)
|
|
|
- .with_context(|| format!("Failed to scan local run dir: {dir_str}"))?;
|
|
|
let fc = FlowCell::new(
|
|
|
- sample_sheet,
|
|
|
- pore_activity,
|
|
|
+ sheet,
|
|
|
+ pore,
|
|
|
throughput,
|
|
|
- FlowCellLocation::Local(dir_str.clone()),
|
|
|
+ FlowCellLocation::Local(dir_str.to_string()),
|
|
|
files,
|
|
|
)
|
|
|
- .with_context(|| format!("Failed to load FlowCell from dir: {dir_str}"))?;
|
|
|
+ .with_context(|| format!("Failed to create FlowCell from dir: {dir_str}"))?;
|
|
|
|
|
|
- // Dedup by flowcell_id, retain most recently modified
|
|
|
- merged_map
|
|
|
- .entry(fc.sample_sheet.flow_cell_id.clone())
|
|
|
+ self.insert_flowcells(vec![fc]);
|
|
|
+ }
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+
|
|
|
+ /// Annotate flowcells with matching `IdInput` records from JSON.
|
|
|
+ ///
|
|
|
+ /// Assigns to `FlowCell.cases` all matching `IdInput`s by `flow_cell_id`.
|
|
|
+ ///
|
|
|
+ /// # Arguments
|
|
|
+ /// * `inputs_path` - Path to JSON file containing a list of `IdInput` records.
|
|
|
+ ///
|
|
|
+ /// # Errors
|
|
|
+ /// Returns an error if the file is unreadable or malformed.
|
|
|
+ fn annotate_with_inputs(&mut self, inputs_path: &str) -> anyhow::Result<()> {
|
|
|
+ if !Path::new(inputs_path).exists() {
|
|
|
+ warn!("IdsInput file not found at {}", inputs_path);
|
|
|
+ return Ok(());
|
|
|
+ }
|
|
|
+
|
|
|
+ let inputs = IdsInput::load_json(inputs_path)
|
|
|
+ .with_context(|| format!("Failed to load IdsInput from: {inputs_path}"))?;
|
|
|
+
|
|
|
+ self.cross_cases(inputs)
|
|
|
+ .with_context(|| format!("Failed to cross with IdsInput from: {inputs_path}"))?;
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn cross_cases(&mut self, inputs: IdsInput) -> anyhow::Result<()> {
|
|
|
+ for fc in &mut self.flow_cells {
|
|
|
+ fc.cases = inputs
|
|
|
+ .data
|
|
|
+ .iter()
|
|
|
+ .filter(|id| id.flow_cell_id == fc.sample_sheet.flow_cell_id)
|
|
|
+ .cloned()
|
|
|
+ .collect();
|
|
|
+ }
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+
|
|
|
+ /// Insert new flowcells into the current collection, deduplicating by `flowcell_id`.
|
|
|
+ ///
|
|
|
+ /// For duplicates, retains the flowcell with the newer `.modified` timestamp.
|
|
|
+ ///
|
|
|
+ /// # Arguments
|
|
|
+ /// * `new_cells` - A vector of parsed `FlowCell` instances.
|
|
|
+ fn insert_flowcells(&mut self, new_cells: Vec<FlowCell>) {
|
|
|
+ let mut map: HashMap<String, FlowCell> = self
|
|
|
+ .flow_cells
|
|
|
+ .drain(..)
|
|
|
+ .map(|fc| (fc.sample_sheet.flow_cell_id.clone(), fc))
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ for fc in new_cells {
|
|
|
+ map.entry(fc.sample_sheet.flow_cell_id.clone())
|
|
|
.and_modify(|existing| {
|
|
|
if fc.modified > existing.modified {
|
|
|
*existing = fc.clone();
|
|
|
@@ -200,25 +266,7 @@ impl FlowCells {
|
|
|
.or_insert(fc);
|
|
|
}
|
|
|
|
|
|
- // Load IdsInput and annotate flowcells
|
|
|
- if Path::new(inputs_path).exists() {
|
|
|
- let inputs = IdsInput::load_json(inputs_path)
|
|
|
- .with_context(|| format!("Failed to load json from: {inputs_path}"))?;
|
|
|
- for fc in merged_map.values_mut() {
|
|
|
- fc.cases = inputs
|
|
|
- .data
|
|
|
- .iter()
|
|
|
- .filter(|info| info.flow_cell_id == fc.sample_sheet.flow_cell_id)
|
|
|
- .cloned()
|
|
|
- .collect();
|
|
|
- }
|
|
|
- } else {
|
|
|
- warn!("No inputs json...");
|
|
|
- }
|
|
|
-
|
|
|
- Ok(Self {
|
|
|
- flow_cells: merged_map.into_values().collect(),
|
|
|
- })
|
|
|
+ self.flow_cells = map.into_values().collect();
|
|
|
}
|
|
|
|
|
|
/// Updates a JSON archive of `FlowCel` objects by scanning `.tar` archives in a directory.
|