Thomas 1 anno fa
parent
commit
0776ccd633
4 ha cambiato i file con 189 aggiunte e 56 eliminazioni
  1. 81 37
      Cargo.lock
  2. 5 2
      Cargo.toml
  3. 4 4
      src/lib.rs
  4. 99 13
      src/pod5.rs

+ 81 - 37
Cargo.lock

@@ -40,6 +40,55 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "anstream"
+version = "0.6.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391"
+dependencies = [
+ "windows-sys",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19"
+dependencies = [
+ "anstyle",
+ "windows-sys",
+]
+
 [[package]]
 name = "anyhow"
 version = "1.0.86"
@@ -545,6 +594,12 @@ dependencies = [
  "windows-targets",
 ]
 
+[[package]]
+name = "colorchoice"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422"
+
 [[package]]
 name = "const-random"
 version = "0.1.18"
@@ -598,17 +653,27 @@ dependencies = [
  "memchr",
 ]
 
+[[package]]
+name = "env_filter"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea"
+dependencies = [
+ "log",
+ "regex",
+]
+
 [[package]]
 name = "env_logger"
-version = "0.10.2"
+version = "0.11.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580"
+checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9"
 dependencies = [
+ "anstream",
+ "anstyle",
+ "env_filter",
  "humantime",
- "is-terminal",
  "log",
- "regex",
- "termcolor",
 ]
 
 [[package]]
@@ -671,12 +736,6 @@ version = "0.14.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
 
-[[package]]
-name = "hermit-abi"
-version = "0.3.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
-
 [[package]]
 name = "humantime"
 version = "2.1.0"
@@ -717,15 +776,10 @@ dependencies = [
 ]
 
 [[package]]
-name = "is-terminal"
-version = "0.4.12"
+name = "is_terminal_polyfill"
+version = "1.70.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b"
-dependencies = [
- "hermit-abi",
- "libc",
- "windows-sys",
-]
+checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800"
 
 [[package]]
 name = "itoa"
@@ -932,11 +986,13 @@ version = "0.1.0"
 dependencies = [
  "anyhow",
  "chrono",
+ "csv",
  "env_logger",
  "glob",
  "log",
  "pandora_lib_pod5",
  "regex",
+ "serde",
 ]
 
 [[package]]
@@ -1067,15 +1123,6 @@ dependencies = [
  "unicode-ident",
 ]
 
-[[package]]
-name = "termcolor"
-version = "1.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
-dependencies = [
- "winapi-util",
-]
-
 [[package]]
 name = "tiny-keccak"
 version = "2.0.2"
@@ -1091,6 +1138,12 @@ version = "1.0.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
 
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
 [[package]]
 name = "uuid"
 version = "1.10.0"
@@ -1166,15 +1219,6 @@ version = "0.2.92"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96"
 
-[[package]]
-name = "winapi-util"
-version = "0.1.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b"
-dependencies = [
- "windows-sys",
-]
-
 [[package]]
 name = "windows-core"
 version = "0.52.0"

+ 5 - 2
Cargo.toml

@@ -4,10 +4,13 @@ version = "0.1.0"
 edition = "2021"
 
 [dependencies]
-log = "^0.4.20"
-env_logger = "^0.10.1"
+log = "^0.4.22"
+env_logger = "^0.11.3"
 anyhow = "1.0.86"
 glob = "0.3.1"
 pandora_lib_pod5 = { git = "https://git.t0m4.fr/Thomas/pandora_lib_pod5.git" }
 regex = "1.10.5"
 chrono = "0.4.38"
+csv = "1.3.0"
+serde = { version = "1.0.204", features = ["derive"] }
+

+ 4 - 4
src/lib.rs

@@ -27,10 +27,10 @@ mod tests {
 
     #[test]
     fn pod5() -> anyhow::Result<()> {
-        let _ =
-            env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
-                .build();
-        let runs = Runs::import_dir("/data/run_data")?;
+        let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
+            .build();
+
+        let runs = Runs::import_dir("/data/store/banana-pool/run_data", "/data/flow_cells.tsv")?;
         runs.print_info();
         Ok(())
     }

+ 99 - 13
src/pod5.rs

@@ -1,12 +1,14 @@
-use anyhow::{anyhow, Context};
+use anyhow::{anyhow, Context, Result};
 use chrono::{DateTime, Utc};
+use csv::ReaderBuilder;
 use glob::glob;
-use log::warn;
+use log::{info, warn};
 use pandora_lib_pod5::Pod5Info;
+use serde::Deserialize;
 use std::{
     collections::HashMap,
     fmt::Display,
-    fs::{self, Metadata},
+    fs::{self, File, Metadata},
     os::unix::fs::MetadataExt,
     path::PathBuf,
     usize,
@@ -35,7 +37,6 @@ impl Display for Pod5Type {
         };
         f.write_str(s)
     }
-    // add code here
 }
 
 #[derive(Debug, Clone)]
@@ -60,7 +61,7 @@ impl Default for Pod5Config {
 }
 
 impl Pod5 {
-    pub fn from_path(path: &PathBuf, config: Pod5Config) -> anyhow::Result<Self> {
+    pub fn from_path(path: &PathBuf, config: &Pod5Config) -> Result<Self> {
         let s = path
             .to_str()
             .context("Can't convert PathBuf to str {path:?}")?;
@@ -86,8 +87,6 @@ impl Pod5 {
             .context("Can't get flowcell_name")?
             .to_string();
 
-        // let info = Pod5Info::from_pod5(s);
-
         Ok(Self {
             path: s.to_string(),
             pod5_type,
@@ -98,13 +97,22 @@ impl Pod5 {
     }
 }
 
-pub fn list_pod_files(dir: &str) -> anyhow::Result<Vec<Pod5>> {
+pub fn list_pod_files(dir: &str) -> Result<Vec<Pod5>> {
     let pattern = format!("{}/**/*.pod5", dir);
     let mut pod_files = Vec::new();
 
+    let conf = Pod5Config {
+        base_dir: if dir.ends_with('/') {
+            dir.to_string()
+        } else {
+            format!("{dir}/")
+        },
+        ..Pod5Config::default()
+    };
+
     for entry in glob(&pattern).expect("Failed to read glob pattern") {
         match entry {
-            Ok(path) => match Pod5::from_path(&path, Pod5Config::default()) {
+            Ok(path) => match Pod5::from_path(&path, &conf) {
                 Ok(pod5) => pod_files.push(pod5),
                 Err(e) => warn!("{e}"),
             },
@@ -123,6 +131,8 @@ pub struct Run {
 #[derive(Debug, Clone)]
 pub struct FlowCell {
     pub flowcell_name: String,
+    pub corrected_name: String,
+    pub cases: Vec<FlowCellCase>,
     pub run_name: String,
     pub pod5_type: Pod5Type,
     pub pod5_info: Pod5Info,
@@ -135,9 +145,17 @@ pub struct Runs {
     pub runs: Vec<Run>,
 }
 
+#[derive(Debug, Clone)]
+pub struct FlowCellCase {
+    pub id: String,
+    pub time_point: String,
+    pub barcode: String,
+}
+
 impl Runs {
-    pub fn import_dir(dir: &str) -> anyhow::Result<Self> {
+    pub fn import_dir(dir: &str, corrected_fc_path: &str) -> Result<Self> {
         let pod5 = list_pod_files(dir)?;
+        println!("N pod5 {}", pod5.len());
 
         let mut fc: HashMap<String, Vec<Pod5>> = HashMap::new();
         for pod in pod5 {
@@ -145,11 +163,51 @@ impl Runs {
             fc.entry(k).or_default().push(pod);
         }
 
-        let flow_cells: Vec<FlowCell> = fc.into_values().map(|v| {
+        let corrected_fc = load_flowcells_corrected_names(corrected_fc_path)?;
+        let flow_cells: Vec<FlowCell> = fc
+            .into_values()
+            .map(|v| {
                 let first = &v[0];
                 let pod5_info = Pod5Info::from_pod5(&first.path);
+                let flowcell_name = first.flowcell_name.clone();
+
+                let sel: Vec<FCLine> = corrected_fc
+                    .iter()
+                    .filter(|e| e.flow_cell == flowcell_name)
+                    .cloned()
+                    .collect();
+
+                let mut corrected_name: Vec<String> = sel
+                    .clone()
+                    .into_iter()
+                    .map(|e| e.ref_flow_cell)
+                    .filter(|e| !e.is_empty())
+                    .collect();
+                corrected_name.dedup();
+
+                if corrected_name.len() > 1 {
+                    panic!("Multiple corrected flow_cells for {v:?}");
+                }
+
+                let corrected_name = if !corrected_name.is_empty() {
+                    corrected_name.first().unwrap().to_string()
+                } else {
+                    "".to_string()
+                };
+
+                let cases: Vec<FlowCellCase> = sel
+                    .iter()
+                    .map(|e| FlowCellCase {
+                        id: e.id.clone(),
+                        time_point: e.time_point.clone(),
+                        barcode: e.barcode_number.clone(),
+                    })
+                    .collect();
+
                 FlowCell {
-                    flowcell_name: first.flowcell_name.clone(),
+                    flowcell_name,
+                    corrected_name,
+                    cases,
                     run_name: first.run_name.clone(),
                     pod5_type: first.pod5_type.clone(),
                     pod5_info,
@@ -187,7 +245,7 @@ impl Runs {
                 let dates: Vec<DateTime<Utc>> = fc
                     .pod5
                     .iter()
-                    .map(|p| p.file_metadata.created().unwrap().into())
+                    .map(|p| p.file_metadata.modified().unwrap().into())
                     .collect();
                 let from = dates.iter().min().unwrap();
                 let to = dates.iter().max().unwrap();
@@ -200,6 +258,7 @@ impl Runs {
                     fc.flowcell_name.to_string(),
                     fc.pod5_type.to_string(),
                     fc.pod5_info.acquisition_id.clone(),
+                    format!("{:?}", fc.cases),
                 ]
                 .join("\t");
                 println!("{s}");
@@ -208,3 +267,30 @@ impl Runs {
     }
 }
 
+#[derive(Debug, Deserialize, Clone)]
+pub struct FCLine {
+    pub id: String,
+    pub time_point: String,
+    pub barcode_number: String,
+    pub flow_cell: String,
+    pub run: String,
+    pub path: String,
+    pub ref_flow_cell: String,
+}
+
+pub fn load_flowcells_corrected_names(file_path: &str) -> anyhow::Result<Vec<FCLine>> {
+    let file = File::open(file_path)?;
+
+    let mut rdr = ReaderBuilder::new()
+        .delimiter(b'\t')
+        .has_headers(true)
+        .from_reader(file);
+
+    let mut records = Vec::new();
+    for result in rdr.deserialize() {
+        let record: FCLine = result?;
+        records.push(record);
+    }
+
+    Ok(records)
+}