|
|
@@ -1,12 +1,14 @@
|
|
|
-use anyhow::{anyhow, Context};
|
|
|
+use anyhow::{anyhow, Context, Result};
|
|
|
use chrono::{DateTime, Utc};
|
|
|
+use csv::ReaderBuilder;
|
|
|
use glob::glob;
|
|
|
-use log::warn;
|
|
|
+use log::{info, warn};
|
|
|
use pandora_lib_pod5::Pod5Info;
|
|
|
+use serde::Deserialize;
|
|
|
use std::{
|
|
|
collections::HashMap,
|
|
|
fmt::Display,
|
|
|
- fs::{self, Metadata},
|
|
|
+ fs::{self, File, Metadata},
|
|
|
os::unix::fs::MetadataExt,
|
|
|
path::PathBuf,
|
|
|
usize,
|
|
|
@@ -35,7 +37,6 @@ impl Display for Pod5Type {
|
|
|
};
|
|
|
f.write_str(s)
|
|
|
}
|
|
|
- // add code here
|
|
|
}
|
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
@@ -60,7 +61,7 @@ impl Default for Pod5Config {
|
|
|
}
|
|
|
|
|
|
impl Pod5 {
|
|
|
- pub fn from_path(path: &PathBuf, config: Pod5Config) -> anyhow::Result<Self> {
|
|
|
+ pub fn from_path(path: &PathBuf, config: &Pod5Config) -> Result<Self> {
|
|
|
let s = path
|
|
|
.to_str()
|
|
|
.context("Can't convert PathBuf to str {path:?}")?;
|
|
|
@@ -86,8 +87,6 @@ impl Pod5 {
|
|
|
.context("Can't get flowcell_name")?
|
|
|
.to_string();
|
|
|
|
|
|
- // let info = Pod5Info::from_pod5(s);
|
|
|
-
|
|
|
Ok(Self {
|
|
|
path: s.to_string(),
|
|
|
pod5_type,
|
|
|
@@ -98,13 +97,22 @@ impl Pod5 {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-pub fn list_pod_files(dir: &str) -> anyhow::Result<Vec<Pod5>> {
|
|
|
+pub fn list_pod_files(dir: &str) -> Result<Vec<Pod5>> {
|
|
|
let pattern = format!("{}/**/*.pod5", dir);
|
|
|
let mut pod_files = Vec::new();
|
|
|
|
|
|
+ let conf = Pod5Config {
|
|
|
+ base_dir: if dir.ends_with('/') {
|
|
|
+ dir.to_string()
|
|
|
+ } else {
|
|
|
+ format!("{dir}/")
|
|
|
+ },
|
|
|
+ ..Pod5Config::default()
|
|
|
+ };
|
|
|
+
|
|
|
for entry in glob(&pattern).expect("Failed to read glob pattern") {
|
|
|
match entry {
|
|
|
- Ok(path) => match Pod5::from_path(&path, Pod5Config::default()) {
|
|
|
+ Ok(path) => match Pod5::from_path(&path, &conf) {
|
|
|
Ok(pod5) => pod_files.push(pod5),
|
|
|
Err(e) => warn!("{e}"),
|
|
|
},
|
|
|
@@ -123,6 +131,8 @@ pub struct Run {
|
|
|
#[derive(Debug, Clone)]
|
|
|
pub struct FlowCell {
|
|
|
pub flowcell_name: String,
|
|
|
+ pub corrected_name: String,
|
|
|
+ pub cases: Vec<FlowCellCase>,
|
|
|
pub run_name: String,
|
|
|
pub pod5_type: Pod5Type,
|
|
|
pub pod5_info: Pod5Info,
|
|
|
@@ -135,9 +145,17 @@ pub struct Runs {
|
|
|
pub runs: Vec<Run>,
|
|
|
}
|
|
|
|
|
|
+#[derive(Debug, Clone)]
|
|
|
+pub struct FlowCellCase {
|
|
|
+ pub id: String,
|
|
|
+ pub time_point: String,
|
|
|
+ pub barcode: String,
|
|
|
+}
|
|
|
+
|
|
|
impl Runs {
|
|
|
- pub fn import_dir(dir: &str) -> anyhow::Result<Self> {
|
|
|
+ pub fn import_dir(dir: &str, corrected_fc_path: &str) -> Result<Self> {
|
|
|
let pod5 = list_pod_files(dir)?;
|
|
|
+ println!("N pod5 {}", pod5.len());
|
|
|
|
|
|
let mut fc: HashMap<String, Vec<Pod5>> = HashMap::new();
|
|
|
for pod in pod5 {
|
|
|
@@ -145,11 +163,51 @@ impl Runs {
|
|
|
fc.entry(k).or_default().push(pod);
|
|
|
}
|
|
|
|
|
|
- let flow_cells: Vec<FlowCell> = fc.into_values().map(|v| {
|
|
|
+ let corrected_fc = load_flowcells_corrected_names(corrected_fc_path)?;
|
|
|
+ let flow_cells: Vec<FlowCell> = fc
|
|
|
+ .into_values()
|
|
|
+ .map(|v| {
|
|
|
let first = &v[0];
|
|
|
let pod5_info = Pod5Info::from_pod5(&first.path);
|
|
|
+ let flowcell_name = first.flowcell_name.clone();
|
|
|
+
|
|
|
+ let sel: Vec<FCLine> = corrected_fc
|
|
|
+ .iter()
|
|
|
+ .filter(|e| e.flow_cell == flowcell_name)
|
|
|
+ .cloned()
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ let mut corrected_name: Vec<String> = sel
|
|
|
+ .clone()
|
|
|
+ .into_iter()
|
|
|
+ .map(|e| e.ref_flow_cell)
|
|
|
+ .filter(|e| !e.is_empty())
|
|
|
+ .collect();
|
|
|
+ corrected_name.dedup();
|
|
|
+
|
|
|
+ if corrected_name.len() > 1 {
|
|
|
+ panic!("Multiple corrected flow_cells for {v:?}");
|
|
|
+ }
|
|
|
+
|
|
|
+ let corrected_name = if !corrected_name.is_empty() {
|
|
|
+ corrected_name.first().unwrap().to_string()
|
|
|
+ } else {
|
|
|
+ "".to_string()
|
|
|
+ };
|
|
|
+
|
|
|
+ let cases: Vec<FlowCellCase> = sel
|
|
|
+ .iter()
|
|
|
+ .map(|e| FlowCellCase {
|
|
|
+ id: e.id.clone(),
|
|
|
+ time_point: e.time_point.clone(),
|
|
|
+ barcode: e.barcode_number.clone(),
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+
|
|
|
FlowCell {
|
|
|
- flowcell_name: first.flowcell_name.clone(),
|
|
|
+ flowcell_name,
|
|
|
+ corrected_name,
|
|
|
+ cases,
|
|
|
run_name: first.run_name.clone(),
|
|
|
pod5_type: first.pod5_type.clone(),
|
|
|
pod5_info,
|
|
|
@@ -187,7 +245,7 @@ impl Runs {
|
|
|
let dates: Vec<DateTime<Utc>> = fc
|
|
|
.pod5
|
|
|
.iter()
|
|
|
- .map(|p| p.file_metadata.created().unwrap().into())
|
|
|
+ .map(|p| p.file_metadata.modified().unwrap().into())
|
|
|
.collect();
|
|
|
let from = dates.iter().min().unwrap();
|
|
|
let to = dates.iter().max().unwrap();
|
|
|
@@ -200,6 +258,7 @@ impl Runs {
|
|
|
fc.flowcell_name.to_string(),
|
|
|
fc.pod5_type.to_string(),
|
|
|
fc.pod5_info.acquisition_id.clone(),
|
|
|
+ format!("{:?}", fc.cases),
|
|
|
]
|
|
|
.join("\t");
|
|
|
println!("{s}");
|
|
|
@@ -208,3 +267,30 @@ impl Runs {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+#[derive(Debug, Deserialize, Clone)]
|
|
|
+pub struct FCLine {
|
|
|
+ pub id: String,
|
|
|
+ pub time_point: String,
|
|
|
+ pub barcode_number: String,
|
|
|
+ pub flow_cell: String,
|
|
|
+ pub run: String,
|
|
|
+ pub path: String,
|
|
|
+ pub ref_flow_cell: String,
|
|
|
+}
|
|
|
+
|
|
|
+pub fn load_flowcells_corrected_names(file_path: &str) -> anyhow::Result<Vec<FCLine>> {
|
|
|
+ let file = File::open(file_path)?;
|
|
|
+
|
|
|
+ let mut rdr = ReaderBuilder::new()
|
|
|
+ .delimiter(b'\t')
|
|
|
+ .has_headers(true)
|
|
|
+ .from_reader(file);
|
|
|
+
|
|
|
+ let mut records = Vec::new();
|
|
|
+ for result in rdr.deserialize() {
|
|
|
+ let record: FCLine = result?;
|
|
|
+ records.push(record);
|
|
|
+ }
|
|
|
+
|
|
|
+ Ok(records)
|
|
|
+}
|