Thomas 1 năm trước cách đây
mục cha
commit
494efb6235
9 tập tin đã thay đổi với 1061 bổ sung256 xóa
  1. 1 0
      .gitignore
  2. 487 12
      Cargo.lock
  3. 4 0
      Cargo.toml
  4. 44 13
      src/bam.rs
  5. 4 0
      src/commands/dorado.rs
  6. 2 0
      src/commands/mod.rs
  7. 158 38
      src/lib.rs
  8. 224 193
      src/pod5.rs
  9. 137 0
      src/vcf.rs

+ 1 - 0
.gitignore

@@ -1 +1,2 @@
 /target
+.temp_*

+ 487 - 12
Cargo.lock

@@ -17,6 +17,17 @@ version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
 
+[[package]]
+name = "ahash"
+version = "0.7.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9"
+dependencies = [
+ "getrandom",
+ "once_cell",
+ "version_check",
+]
+
 [[package]]
 name = "ahash"
 version = "0.8.11"
@@ -110,13 +121,19 @@ version = "1.0.86"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
 
+[[package]]
+name = "arrayvec"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
+
 [[package]]
 name = "arrow"
 version = "49.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5bc25126d18a012146a888a0298f2c22e1150327bd2765fc76d710a556b2d614"
 dependencies = [
- "ahash",
+ "ahash 0.8.11",
  "arrow-arith 49.0.0",
  "arrow-array 49.0.0",
  "arrow-buffer 49.0.0",
@@ -189,13 +206,13 @@ version = "49.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6bda9acea48b25123c08340f3a8ac361aa0f74469bb36f5ee9acf923fce23e9d"
 dependencies = [
- "ahash",
+ "ahash 0.8.11",
  "arrow-buffer 49.0.0",
  "arrow-data 49.0.0",
  "arrow-schema 49.0.0",
  "chrono",
  "half",
- "hashbrown",
+ "hashbrown 0.14.5",
  "num",
 ]
 
@@ -205,13 +222,13 @@ version = "52.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "81c16ec702d3898c2f5cfdc148443c6cd7dbe5bac28399859eb0a3d38f072827"
 dependencies = [
- "ahash",
+ "ahash 0.8.11",
  "arrow-buffer 52.1.0",
  "arrow-data 52.1.0",
  "arrow-schema 52.1.0",
  "chrono",
  "half",
- "hashbrown",
+ "hashbrown 0.14.5",
  "num",
 ]
 
@@ -441,13 +458,13 @@ version = "49.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "361249898d2d6d4a6eeb7484be6ac74977e48da12a4dd81a708d620cc558117a"
 dependencies = [
- "ahash",
+ "ahash 0.8.11",
  "arrow-array 49.0.0",
  "arrow-buffer 49.0.0",
  "arrow-data 49.0.0",
  "arrow-schema 49.0.0",
  "half",
- "hashbrown",
+ "hashbrown 0.14.5",
 ]
 
 [[package]]
@@ -456,13 +473,13 @@ version = "52.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ca5e3a6b7fda8d9fe03f3b18a2d946354ea7f3c8e4076dbdb502ad50d9d44824"
 dependencies = [
- "ahash",
+ "ahash 0.8.11",
  "arrow-array 52.1.0",
  "arrow-buffer 52.1.0",
  "arrow-data 52.1.0",
  "arrow-schema 52.1.0",
  "half",
- "hashbrown",
+ "hashbrown 0.14.5",
 ]
 
 [[package]]
@@ -483,7 +500,7 @@ version = "49.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4f6208466590960efc1d2a7172bc4ff18a67d6e25c529381d7f96ddaf0dc4036"
 dependencies = [
- "ahash",
+ "ahash 0.8.11",
  "arrow-array 49.0.0",
  "arrow-buffer 49.0.0",
  "arrow-data 49.0.0",
@@ -497,7 +514,7 @@ version = "52.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e80159088ffe8c48965cb9b1a7c968b2729f29f37363df7eca177fc3281fe7c3"
 dependencies = [
- "ahash",
+ "ahash 0.8.11",
  "arrow-array 52.1.0",
  "arrow-buffer 52.1.0",
  "arrow-data 52.1.0",
@@ -624,6 +641,12 @@ dependencies = [
  "thiserror",
 ]
 
+[[package]]
+name = "bit-vec"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2c54ff287cfc0a34f38a6b832ea1bd8e448a330b3e40a50859e6488bee07f22"
+
 [[package]]
 name = "bitflags"
 version = "1.3.2"
@@ -636,12 +659,97 @@ version = "2.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
 
+[[package]]
+name = "bitvec"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
+dependencies = [
+ "funty",
+ "radium",
+ "tap",
+ "wyz",
+]
+
+[[package]]
+name = "block"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a"
+
+[[package]]
+name = "borsh"
+version = "1.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6362ed55def622cddc70a4746a68554d7b687713770de539e59a739b249f8ed"
+dependencies = [
+ "borsh-derive",
+ "cfg_aliases",
+]
+
+[[package]]
+name = "borsh-derive"
+version = "1.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3ef8005764f53cd4dca619f5bf64cafd4664dada50ece25e4d81de54c80cc0b"
+dependencies = [
+ "once_cell",
+ "proc-macro-crate",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.71",
+ "syn_derive",
+]
+
+[[package]]
+name = "bstr"
+version = "1.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706"
+dependencies = [
+ "memchr",
+ "serde",
+]
+
 [[package]]
 name = "bumpalo"
 version = "3.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
 
+[[package]]
+name = "byte-unit"
+version = "5.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33ac19bdf0b2665407c39d82dbc937e951e7e2001609f0fb32edd0af45a2d63e"
+dependencies = [
+ "rust_decimal",
+ "serde",
+ "utf8-width",
+]
+
+[[package]]
+name = "bytecheck"
+version = "0.6.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2"
+dependencies = [
+ "bytecheck_derive",
+ "ptr_meta",
+ "simdutf8",
+]
+
+[[package]]
+name = "bytecheck_derive"
+version = "0.6.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
 [[package]]
 name = "byteorder"
 version = "1.5.0"
@@ -691,6 +799,12 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 
+[[package]]
+name = "cfg_aliases"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
+
 [[package]]
 name = "chrono"
 version = "0.4.38"
@@ -770,6 +884,24 @@ version = "0.8.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
 
+[[package]]
+name = "crc32fast"
+version = "1.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "crossbeam-channel"
+version = "0.5.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2"
+dependencies = [
+ "crossbeam-utils",
+]
+
 [[package]]
 name = "crossbeam-deque"
 version = "0.8.5"
@@ -948,6 +1080,16 @@ dependencies = [
  "rustc_version 0.4.0",
 ]
 
+[[package]]
+name = "flate2"
+version = "1.0.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae"
+dependencies = [
+ "crc32fast",
+ "miniz_oxide",
+]
+
 [[package]]
 name = "float-ord"
 version = "0.3.2"
@@ -972,6 +1114,12 @@ dependencies = [
  "quick-error",
 ]
 
+[[package]]
+name = "funty"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
+
 [[package]]
 name = "futures"
 version = "0.3.30"
@@ -1095,6 +1243,15 @@ dependencies = [
  "num-traits",
 ]
 
+[[package]]
+name = "hashbrown"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+dependencies = [
+ "ahash 0.7.8",
+]
+
 [[package]]
 name = "hashbrown"
 version = "0.14.5"
@@ -1182,7 +1339,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
 dependencies = [
  "equivalent",
- "hashbrown",
+ "hashbrown 0.14.5",
 ]
 
 [[package]]
@@ -1374,6 +1531,19 @@ version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bfae20f6b19ad527b550c223fddc3077a547fc70cda94b9b566575423fd303ee"
 
+[[package]]
+name = "locale_config"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d2c35b16f4483f6c26f0e4e9550717a2f6575bcd6f12a53ff0c490a94a6934"
+dependencies = [
+ "lazy_static",
+ "objc",
+ "objc-foundation",
+ "regex",
+ "winapi",
+]
+
 [[package]]
 name = "lock_api"
 version = "0.4.12"
@@ -1414,6 +1584,15 @@ dependencies = [
  "pkg-config",
 ]
 
+[[package]]
+name = "malloc_buf"
+version = "0.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62bb907fe88d54d8d9ce32a3cceab4218ed2f6b7d35617cafe9adf84e43919cb"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "matchers"
 version = "0.1.0"
@@ -1474,6 +1653,40 @@ dependencies = [
  "minimal-lexical",
 ]
 
+[[package]]
+name = "noodles-bgzf"
+version = "0.32.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b2fba0f4a64cc897d9396d730a0c444d148daed7de31ad5904ecc673178fc9d"
+dependencies = [
+ "byteorder",
+ "bytes",
+ "crossbeam-channel",
+ "flate2",
+]
+
+[[package]]
+name = "noodles-core"
+version = "0.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c5a8c6b020d1205abef2b0fab4463a6c5ecc3c8f4d561ca8b0d1a42323376200"
+dependencies = [
+ "bstr",
+]
+
+[[package]]
+name = "noodles-csi"
+version = "0.37.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e4bc8001c54f1d8e47e1ac6041a5f27edc99b68bacea3fade9c89059de285aea"
+dependencies = [
+ "bit-vec",
+ "byteorder",
+ "indexmap",
+ "noodles-bgzf",
+ "noodles-core",
+]
+
 [[package]]
 name = "nu-ansi-term"
 version = "0.46.0"
@@ -1517,6 +1730,16 @@ dependencies = [
  "num-traits",
 ]
 
+[[package]]
+name = "num-format"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3"
+dependencies = [
+ "arrayvec",
+ "itoa",
+]
+
 [[package]]
 name = "num-integer"
 version = "0.1.46"
@@ -1574,6 +1797,35 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
 
+[[package]]
+name = "objc"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "915b1b472bc21c53464d6c8461c9d3af805ba1ef837e1cac254428f4a77177b1"
+dependencies = [
+ "malloc_buf",
+]
+
+[[package]]
+name = "objc-foundation"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1add1b659e36c9607c7aab864a76c7a4c2760cd0cd2e120f3fb8b952c7e22bf9"
+dependencies = [
+ "block",
+ "objc",
+ "objc_id",
+]
+
+[[package]]
+name = "objc_id"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c92d4ddb4bd7b50d730c215ff871754d0da6b2178849f8a2a2ab69712d0c073b"
+dependencies = [
+ "objc",
+]
+
 [[package]]
 name = "object"
 version = "0.36.1"
@@ -1660,12 +1912,16 @@ name = "pandora_lib_promethion"
 version = "0.1.0"
 dependencies = [
  "anyhow",
+ "byte-unit",
  "chrono",
  "csv",
  "env_logger 0.11.3",
  "glob",
+ "locale_config",
  "log",
  "logtest",
+ "noodles-csi",
+ "num-format",
  "pandora_lib_bindings",
  "pandora_lib_pileup",
  "pandora_lib_pod5",
@@ -1741,6 +1997,44 @@ version = "1.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0"
 
+[[package]]
+name = "ppv-lite86"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
+
+[[package]]
+name = "proc-macro-crate"
+version = "3.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d37c51ca738a55da99dc0c4a34860fd675453b8b36209178c2249bb13651284"
+dependencies = [
+ "toml_edit",
+]
+
+[[package]]
+name = "proc-macro-error"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
+dependencies = [
+ "proc-macro-error-attr",
+ "proc-macro2",
+ "quote",
+ "version_check",
+]
+
+[[package]]
+name = "proc-macro-error-attr"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "version_check",
+]
+
 [[package]]
 name = "proc-macro2"
 version = "1.0.86"
@@ -1750,6 +2044,26 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "ptr_meta"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1"
+dependencies = [
+ "ptr_meta_derive",
+]
+
+[[package]]
+name = "ptr_meta_derive"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
 [[package]]
 name = "quick-error"
 version = "1.2.3"
@@ -1765,6 +2079,42 @@ dependencies = [
  "proc-macro2",
 ]
 
+[[package]]
+name = "radium"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
+
+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom",
+]
+
 [[package]]
 name = "rayon"
 version = "1.10.0"
@@ -1838,6 +2188,44 @@ version = "0.8.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
 
+[[package]]
+name = "rend"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c"
+dependencies = [
+ "bytecheck",
+]
+
+[[package]]
+name = "rkyv"
+version = "0.7.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5cba464629b3394fc4dbc6f940ff8f5b4ff5c7aef40f29166fd4ad12acbc99c0"
+dependencies = [
+ "bitvec",
+ "bytecheck",
+ "bytes",
+ "hashbrown 0.12.3",
+ "ptr_meta",
+ "rend",
+ "rkyv_derive",
+ "seahash",
+ "tinyvec",
+ "uuid",
+]
+
+[[package]]
+name = "rkyv_derive"
+version = "0.7.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7dddfff8de25e6f62b9d64e6e432bf1c6736c57d20323e15ee10435fbda7c65"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
 [[package]]
 name = "rust-htslib"
 version = "0.47.0"
@@ -1860,6 +2248,22 @@ dependencies = [
  "url",
 ]
 
+[[package]]
+name = "rust_decimal"
+version = "1.35.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1790d1c4c0ca81211399e0e0af16333276f375209e71a37b67698a373db5b47a"
+dependencies = [
+ "arrayvec",
+ "borsh",
+ "bytes",
+ "num-traits",
+ "rand",
+ "rkyv",
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "rustc-demangle"
 version = "0.1.24"
@@ -1908,6 +2312,12 @@ version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
 
+[[package]]
+name = "seahash"
+version = "4.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b"
+
 [[package]]
 name = "semver"
 version = "0.1.20"
@@ -1975,6 +2385,12 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "simdutf8"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a"
+
 [[package]]
 name = "slab"
 version = "0.4.9"
@@ -2041,6 +2457,24 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "syn_derive"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1329189c02ff984e9736652b1631330da25eaa6bc639089ed4915d25446cbe7b"
+dependencies = [
+ "proc-macro-error",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.71",
+]
+
+[[package]]
+name = "tap"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
+
 [[package]]
 name = "termcolor"
 version = "1.4.1"
@@ -2156,6 +2590,23 @@ dependencies = [
  "syn 2.0.71",
 ]
 
+[[package]]
+name = "toml_datetime"
+version = "0.6.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf"
+
+[[package]]
+name = "toml_edit"
+version = "0.21.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1"
+dependencies = [
+ "indexmap",
+ "toml_datetime",
+ "winnow",
+]
+
 [[package]]
 name = "tracing"
 version = "0.1.40"
@@ -2276,6 +2727,12 @@ dependencies = [
  "percent-encoding",
 ]
 
+[[package]]
+name = "utf8-width"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3"
+
 [[package]]
 name = "utf8parse"
 version = "0.2.2"
@@ -2555,6 +3012,24 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
+[[package]]
+name = "winnow"
+version = "0.5.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "wyz"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
+dependencies = [
+ "tap",
+]
+
 [[package]]
 name = "zerocopy"
 version = "0.7.35"

+ 4 - 0
Cargo.toml

@@ -19,4 +19,8 @@ tracing-test = "0.2.5"
 tracing = "0.1.40"
 logtest = "2.0.0"
 test-log = "0.2.16"
+noodles-csi = "0.37.0"
+num-format = "0.4.4"
+locale_config = "0.3.0"
+byte-unit = "5.1.4"
 

+ 44 - 13
src/bam.rs

@@ -1,4 +1,5 @@
 use std::{
+    collections::HashMap,
     fs::{self, Metadata},
     path::PathBuf,
     str::FromStr,
@@ -6,7 +7,7 @@ use std::{
 
 use anyhow::{anyhow, Context};
 use glob::glob;
-use log::{info, warn};
+use log::warn;
 use pandora_lib_bindings::{
     progs::cramino::{Cramino, CraminoRes},
     utils::RunBin,
@@ -21,6 +22,7 @@ pub struct Bam {
     pub path: PathBuf,
     pub file_metadata: Metadata,
     pub cramino: Option<CraminoRes>,
+    pub composition: Vec<(String, f64)>,
 }
 
 #[derive(Debug, PartialEq)]
@@ -41,12 +43,15 @@ impl Bam {
         let stem: Vec<&str> = stem.split('_').collect();
 
         if stem.len() > 4 || stem.len() < 3 {
-            return Err(anyhow!("Error in bam name formating"));
+            return Err(anyhow!("Error in bam name formating {}", path.display()));
         }
 
         let id = stem[0].to_string();
         let time_point = stem[1].to_string();
-        let reference_genome = stem.last().context("Can't get last from stem {stem}")?.to_string();
+        let reference_genome = stem
+            .last()
+            .context("Can't get last from stem {stem}")?
+            .to_string();
 
         let bam_type = if stem.len() == 4 {
             match stem[2] {
@@ -84,6 +89,8 @@ impl Bam {
             None
         };
 
+        let composition = pandora_lib_pileup::bam_compo(path.to_string_lossy().as_ref(), 10000)?;
+
         Ok(Self {
             path,
             file_metadata,
@@ -92,6 +99,7 @@ impl Bam {
             time_point: time_point.to_string(),
             bam_type,
             reference_genome,
+            composition,
         })
     }
 }
@@ -101,20 +109,43 @@ pub struct BamCollection {
     pub bams: Vec<Bam>,
 }
 
-pub fn load_bam_collection(
-    bam_dir: &str,
-) -> BamCollection {
+impl BamCollection {
+    pub fn new(result_dir: &str) -> Self {
+        load_bam_collection(result_dir)
+    }
+
+    pub fn by_acquisition_id(&self) -> HashMap<String, Vec<&Bam>> {
+        let mut acq: HashMap<String, Vec<&Bam>> = HashMap::new();
+        for bam in self.bams.iter() {
+            for (acq_id, _) in bam.composition.iter() {
+                if let Some(entry) = acq.get_mut(acq_id) {
+                    entry.push(bam);
+                } else {
+                    acq.insert(acq_id.to_string(), vec![bam]);
+                }
+            }
+        }
+        acq
+    }
+
+    pub fn get(&self, id: &str, time_point: &str) -> Vec<&Bam> {
+        self.bams
+            .iter()
+            .filter(|b| b.id == id && b.time_point == time_point)
+            .collect()
+    }
+}
+
+pub fn load_bam_collection(result_dir: &str) -> BamCollection {
     let mut bams = Vec::new();
-    let pattern = format!("{}/**/*.bam", bam_dir);
+    let pattern = format!("{}/*/*/*.bam", result_dir);
 
     for entry in glob(&pattern).expect("Failed to read glob pattern") {
         match entry {
-            Ok(path) => {
-                match Bam::new(path) {
-                    Ok(bam) => bams.push(bam),
-                    Err(e) => warn!("{e}"),
-                }
-            }
+            Ok(path) => match Bam::new(path) {
+                Ok(bam) => bams.push(bam),
+                Err(e) => warn!("{e}"),
+            },
             Err(e) => warn!("Error: {:?}", e),
         }
     }

+ 4 - 0
src/dorado.rs → src/commands/dorado.rs

@@ -5,6 +5,8 @@ use std::{
     time::SystemTime,
 };
 
+use log::info;
+
 pub trait Run {
     fn run(self);
 }
@@ -44,6 +46,8 @@ impl Run for Dorado {
         let start_time = std::time::SystemTime::now();
         self.start_time = start_time;
 
+        info!("Running Dorado with params: {:#?}", self.config);
+
         let name = &self.config.name;
         let time = &self.config.time;
         let pod_dir = &self.config.pod_dir;

+ 2 - 0
src/commands/mod.rs

@@ -0,0 +1,2 @@
+pub mod dorado;
+

+ 158 - 38
src/lib.rs

@@ -1,18 +1,141 @@
+use std::path::PathBuf;
+
+use bam::BamCollection;
+use commands::dorado::{Dorado, DoradoConfig, Run};
+use log::{info, warn};
+use pod5::{Pod5Collection, Pod5Type};
+use vcf::{load_vcf_collection, VcfCollection};
+
 pub mod bam;
-pub mod dorado;
+pub mod commands;
 pub mod modkit;
 pub mod pod5;
+mod vcf;
+
+#[derive(Debug)]
+pub struct Collections {
+    pub pod5: Pod5Collection,
+    pub bam: BamCollection,
+    pub vcf: VcfCollection,
+    pub tasks: Vec<CollectionsTasks>
+}
 
-#[cfg(test)]
-mod tests {
+impl Collections {
+    pub fn new(pod_dir: &str, corrected_fc_path: &str, result_dir: &str) -> anyhow::Result<Self> {
+        let pod5 = Pod5Collection::import_dir(
+            "/data/run_data",
+            "/data/flow_cells.tsv",
+            "/data/longreads_basic_pipe",
+        )?;
+        let bam = BamCollection::new(result_dir);
+        let vcf = load_vcf_collection(result_dir);
+        Ok(Self { pod5, bam, vcf, tasks: Vec::new() })
+    }
 
-    use std::fs;
+    pub fn todo(&mut self) {
+        info!("Looking for base calling tasks...");
+
+        // let bams_acquisitions_ids = self.bam.by_acquisition_id();
+        for run in self.pod5.runs.iter() {
+            for fc in run.flowcells.iter() {
+                let acq_id = fc.pod5_info.acquisition_id.clone();
+                for case in fc.cases.iter() {
+                    let bams = self.bam.get(&case.id, &case.time_point);
+                    if bams.is_empty() {
+                        self.tasks.push(CollectionsTasks::CreateBam {
+                            id: case.id.clone(),
+                            time_point: case.time_point.clone(),
+                            pod5_type: fc.pod5_type.clone(),
+                            pod5_dir: case.pod_dir.clone(),
+                        })
+                    } else {
+                        let acq_ids: Vec<String> = bams
+                            .iter()
+                            .flat_map(|b| {
+                                b.composition.iter().map(|(acq_id, _)| acq_id.to_string())
+                            })
+                            .collect();
+
+                        if !acq_ids.contains(&acq_id) {
+                            self.tasks.push(CollectionsTasks::CompleteBam {
+                                id: case.id.clone(),
+                                time_point: case.time_point.clone(),
+                                pod5_type: fc.pod5_type.clone(),
+                                pod5_dir: case.pod_dir.clone(),
+                            });
+                        }
+                    }
+                }
+            }
+        }
+    }
 
-    use log::{info, warn};
+    pub fn run(&mut self) {
+        if self.tasks.is_empty() {
+            self.todo();
+            self.run();
+        } else {
+            while let Some(task) = self.tasks.pop() {
+                
+                task.run();
+            }
+        }
+    }
+}
 
+#[derive(Debug)]
+pub enum CollectionsTasks {
+    CreateBam {
+        id: String,
+        time_point: String,
+        pod5_type: Pod5Type,
+        pod5_dir: PathBuf,
+    },
+    CompleteBam {
+        id: String,
+        time_point: String,
+        pod5_type: Pod5Type,
+        pod5_dir: PathBuf,
+    },
+}
+
+impl CollectionsTasks {
+    pub fn run(&self) {
+        match self {
+            CollectionsTasks::CreateBam {
+                id,
+                time_point,
+                pod5_type,
+                pod5_dir,
+            } => {
+                if *pod5_type != Pod5Type::Raw {
+                    let d = Dorado::init(DoradoConfig {
+                        ref_fa: "/data/ref/hs1/chm13v2.0.fa".to_string(),
+                        ref_mmi: "/data/ref/chm13v2.0.mmi".to_string(),
+                        name: id.to_string(),
+                        time: time_point.to_string(),
+                        pod_dir: pod5_dir.display().to_string(),
+                    });
+                    d.run();
+                }
+            }
+
+            CollectionsTasks::CompleteBam {
+                id,
+                time_point,
+                pod5_type,
+                pod5_dir,
+            } => warn!("TODO"),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
     use crate::bam::BamType;
 
-    use self::{bam::Bam, dorado::Run, pod5::Runs};
+    use self::commands::dorado::{self, Run};
+
     use super::*;
 
     #[test]
@@ -38,56 +161,53 @@ mod tests {
         let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
             .build();
 
-        let runs = Runs::import_dir(
+        let runs = Pod5Collection::import_dir(
             "/data/run_data",
             "/data/flow_cells.tsv",
             "/data/longreads_basic_pipe",
         )?;
         // let runs = Runs::import_dir("/home/prom/store/banana-pool/run_data", "/data/flow_cells.tsv")?;
-        runs.check_local()?;
         Ok(())
     }
 
-    #[test]
-    fn todo() -> anyhow::Result<()> {
-        let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
-            .build();
+    #[test_log::test]
+    fn bam() -> anyhow::Result<()> {
+        let bam_collection = bam::load_bam_collection("/data/longreads_basic_pipe");
+
+        bam_collection
+            .bams
+            .iter()
+            .filter(|b| matches!(b.bam_type, BamType::Panel(_)))
+            .for_each(|b| println!("{b:#?}"));
+
+        let u = bam_collection.get("PARACHINI", "mrd");
+        println!("{u:#?}");
 
-        let runs = Runs::import_dir(
-            "/data/run_data",
-            "/data/flow_cells.tsv",
-            "/data/longreads_basic_pipe",
-        )?;
-        // let runs = Runs::import_dir("/home/prom/store/banana-pool/run_data", "/data/flow_cells.tsv")?;
-        runs.todo();
         Ok(())
     }
 
-    #[test]
-    fn done() -> anyhow::Result<()> {
-        let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
-            .build();
+    #[test_log::test]
+    fn vcf() -> anyhow::Result<()> {
+        let mut vcf_collection = vcf::load_vcf_collection("/data/longreads_basic_pipe");
+        vcf_collection.sort_by_id();
+
+        vcf_collection
+            .vcfs
+            .iter()
+            .for_each(|v| v.println().unwrap());
 
-        let runs = Runs::import_dir(
-            "/data/run_data",
-            "/data/flow_cells.tsv",
-            "/data/longreads_basic_pipe",
-        )?;
-        // let runs = Runs::import_dir("/home/prom/store/banana-pool/run_data", "/data/flow_cells.tsv")?;
-        runs.fc_done();
         Ok(())
     }
 
     #[test_log::test]
-    fn bam() -> anyhow::Result<()> {
-        let bam_collection = bam::load_bam_collection(
+    fn collections() -> anyhow::Result<()> {
+        let mut collections = Collections::new(
+            "/data/run_data",
+            "/data/flow_cells.tsv",
             "/data/longreads_basic_pipe",
-        );
-
-        // bam_collection.bams.iter().filter(|b| b.bam_type == BamType::Panel(_)).for_each(|b| println!("{b:#?}"));
-        bam_collection.bams.iter().filter(|b| matches!(b.bam_type, BamType::Panel(_))).for_each(|b| println!("{b:#?}"));
-
-        // println!("{:#?}", bam_collection.bams);
+        )?;
+        // collections.vcf.print_tsv();
+        collections.run();
         Ok(())
     }
 }

+ 224 - 193
src/pod5.rs

@@ -2,7 +2,7 @@ use anyhow::{anyhow, Context, Result};
 use chrono::{DateTime, Utc};
 use csv::ReaderBuilder;
 use glob::glob;
-use log::warn;
+use log::{info, warn};
 use pandora_lib_pod5::Pod5Info;
 use serde::Deserialize;
 use std::{
@@ -17,7 +17,7 @@ use std::{
 
 #[derive(Debug, Clone)]
 pub struct Pod5 {
-    pub path: String,
+    pub path: PathBuf,
     pub pod5_type: Pod5Type,
     pub run_name: String,
     pub flowcell_name: String,
@@ -89,7 +89,7 @@ impl Pod5 {
             .to_string();
 
         Ok(Self {
-            path: s.to_string(),
+            path: path.to_path_buf(),
             pod5_type,
             run_name,
             flowcell_name,
@@ -146,8 +146,24 @@ pub struct FlowCell {
     pub pod5: Vec<Pod5>,
 }
 
+// impl FlowCell {
+//     pub fn cases_pod5_dir(&self) -> Vec<PathBuf> {
+//         match self.pod5_type {
+//             Pod5Type::Raw => {
+//                 let p = self.pod5.first().unwrap();
+//                 vec![p.path.parent().unwrap().to_path_buf()]
+//             },
+//             Pod5Type::Demuxed => {
+//                 self.cases.iter().map(|c| {
+//                     let str_barcode = format!("barcode{}", c.barcode);
+//                 })
+//             },
+//         }
+//     }
+// }
+
 #[derive(Debug)]
-pub struct Runs {
+pub struct Pod5Collection {
     pub importation_date: DateTime<Utc>,
     pub runs: Vec<Run>,
     pub bam_dir: String,
@@ -159,49 +175,50 @@ pub struct FlowCellCase {
     pub id: String,
     pub time_point: String,
     pub barcode: String,
-    pub basecalled: Option<bool>,
+    pub pod_dir: PathBuf,
+    // pub basecalled: Option<bool>,
 }
 
 impl FlowCellCase {
-    pub fn basecalled(&mut self, bam_dir: &str, acquisition_id: String) -> bool {
-        if let Some(b) = self.basecalled {
-            return b;
-        } else if let std::result::Result::Ok(p) = PathBuf::from_str(&format!(
-            "{bam_dir}/{}/{}/{}_{}_hs1.bam",
-            self.id,
-            self.time_point.to_lowercase(),
-            self.id,
-            self.time_point.to_lowercase()
-        )) {
-            if p.exists() {
-                let has_id = pandora_lib_pileup::bam_compo(p.to_str().unwrap(), 20000)
-                    .unwrap()
-                    .iter()
-                    .flat_map(|(rg, _)| {
-                        if let Some(index) = rg.find('_') {
-                            let fc_id: &str = &rg[..index];
-                            vec![fc_id.to_string()]
-                        } else {
-                            vec![]
-                        }
-                    })
-                    .filter(|i| *i == acquisition_id)
-                    .count()
-                    > 0;
-                if has_id {
-                    self.basecalled = Some(true);
-                    return true;
-                }
-            }
-        }
-        false
-    }
+    // pub fn basecalled(&mut self, bam_dir: &str, acquisition_id: String) -> bool {
+    //     if let Some(b) = self.basecalled {
+    //         return b;
+    //     } else if let std::result::Result::Ok(p) = PathBuf::from_str(&format!(
+    //         "{bam_dir}/{}/{}/{}_{}_hs1.bam",
+    //         self.id,
+    //         self.time_point.to_lowercase(),
+    //         self.id,
+    //         self.time_point.to_lowercase()
+    //     )) {
+    //         if p.exists() {
+    //             let has_id = pandora_lib_pileup::bam_compo(p.to_str().unwrap(), 20000)
+    //                 .unwrap()
+    //                 .iter()
+    //                 .flat_map(|(rg, _)| {
+    //                     if let Some(index) = rg.find('_') {
+    //                         let fc_id: &str = &rg[..index];
+    //                         vec![fc_id.to_string()]
+    //                     } else {
+    //                         vec![]
+    //                     }
+    //                 })
+    //                 .filter(|i| *i == acquisition_id)
+    //                 .count()
+    //                 > 0;
+    //             if has_id {
+    //                 self.basecalled = Some(true);
+    //                 return true;
+    //             }
+    //         }
+    //     }
+    //     false
+    // }
 }
 
-impl Runs {
+impl Pod5Collection {
     pub fn import_dir(pod5_dir: &str, corrected_fc_path: &str, bam_dir: &str) -> Result<Self> {
         let pod5 = list_pod_files(pod5_dir)?;
-        println!("N pod5 {}", pod5.len());
+        info!("n pod5 {}", pod5.len());
 
         let mut fc: HashMap<String, Vec<Pod5>> = HashMap::new();
         for pod in pod5 {
@@ -214,7 +231,7 @@ impl Runs {
             .into_values()
             .map(|v| {
                 let first = &v[0];
-                let pod5_info = Pod5Info::from_pod5(&first.path);
+                let pod5_info = Pod5Info::from_pod5(first.path.to_str().unwrap());
                 let flowcell_name = first.flowcell_name.clone();
 
                 let sel: Vec<FCLine> = corrected_fc
@@ -244,14 +261,23 @@ impl Runs {
                 let cases: Vec<FlowCellCase> = sel
                     .iter()
                     .map(|e| {
-                        let mut c = FlowCellCase {
+                        let pod_dir = match first.pod5_type {
+                            Pod5Type::Raw => {
+                                first.path.parent().unwrap().to_path_buf()
+                            }
+                            Pod5Type::Demuxed => {
+                                let mut bc_dir = first.path.parent().unwrap().parent().unwrap().to_path_buf();
+                                bc_dir.push(format!("barcode{}", e.barcode_number.replace("NB", "")));
+                                bc_dir
+                            },
+                        };
+
+                        FlowCellCase {
                             id: e.id.clone(),
                             time_point: e.time_point.clone(),
                             barcode: e.barcode_number.clone(),
-                            basecalled: None,
-                        };
-                        c.basecalled(bam_dir, pod5_info.acquisition_id.clone());
-                        c
+                            pod_dir,
+                        }
                     })
                     .collect();
 
@@ -319,152 +345,152 @@ impl Runs {
         });
     }
 
-    pub fn check_local(&self) -> anyhow::Result<()> {
-        let mut res = Vec::new();
-        for run in self.runs.iter() {
-            for fc in run.flowcells.iter() {
-                for c in fc.cases.iter() {
-                    let bases_called = if let Some(b) = c.basecalled {
-                        if b {
-                            "✅".to_string()
-                        } else {
-                            "❌".to_string()
-                        }
-                    } else {
-                        "❌".to_string()
-                    };
-
-                    let s = [
-                        c.id.to_string(),
-                        c.time_point.to_string(),
-                        c.barcode.to_string(),
-                        run.run_name.clone(),
-                        fc.flowcell_name.to_string(),
-                        fc.pod5_type.to_string(),
-                        fc.pod5_info.acquisition_id.clone(),
-                        bases_called,
-                    ]
-                    .join("\t");
-                    res.push(s);
-                }
-            }
-        }
-        res.sort();
-        println!("{}", res.join("\n"));
-        Ok(())
-    }
-
-    pub fn fc_done(&self) {
-        for run in self.runs.iter() {
-            for fc in run.flowcells.iter() {
-                let n_called = fc
-                    .cases
-                    .iter()
-                    .filter(|c| if let Some(b) = c.basecalled { b } else { false })
-                    .count();
-                if n_called != 0 && n_called == fc.cases.len() {
-                    let s = [
-                        format!("{}/{}", run.run_name, fc.flowcell_name),
-                        fc.pod5_info.acquisition_id.to_string(),
-                        format!("{:#?}", fc.cases),
-                    ]
-                    .join("\t");
-                    println!("{s}");
-                }
-            }
-        }
-    }
-
-    pub fn todo(&self) {
-        let run_dir = &self.pod5_dir;
-        for run in self.runs.iter() {
-            for fc in run.flowcells.iter() {
-                let to_call: Vec<_> = fc
-                    .cases
-                    .iter()
-                    .filter(|c| if let Some(b) = c.basecalled { !b } else { true })
-                    .collect();
-
-                if !to_call.is_empty() {
-                    if fc.pod5_type == Pod5Type::Raw && to_call.len() != fc.cases.len() {
-                        println!("No solution for: {}/{}", run.run_name, fc.flowcell_name);
-                    } else {
-                        match fc.pod5_type {
-                            Pod5Type::Raw => {
-                                let cases: Vec<String> = to_call
-                                    .iter()
-                                    .map(|c| {
-                                        let bc = c.barcode.replace("NB", "");
-                                        let tp = c.time_point.to_lowercase();
-                                        [bc, c.id.to_string(), tp].join(" ")
-                                    })
-                                    .collect();
-                                println!(
-                                    "from_mux.sh {}/{}/{} {}",
-                                    run_dir,
-                                    run.run_name,
-                                    fc.flowcell_name,
-                                    cases.join(" ")
-                                );
-                            }
-                            Pod5Type::Demuxed => to_call.iter().for_each(|c| {
-                                let bc = c.barcode.replace("NB", "");
-                                let tp = c.time_point.to_lowercase();
-                                let bam = format!(
-                                    "{}/{}/{}/{}_{}_hs1.bam",
-                                    self.bam_dir, c.id, c.time_point, c.id, c.time_point
-                                );
-                                if PathBuf::from(bam).exists() {
-                                    let pod_dir: Vec<String> = fc
-                                        .pod5
-                                        .iter()
-                                        .filter(|p| {
-                                            p.path.contains(&format!("barcode{}", bc.clone()))
-                                        })
-                                        .take(1)
-                                        .map(|p| p.path.to_string())
-                                        .collect();
-
-                                    let pod_dir = pod_dir.first().unwrap();
-                                    let mut pod_dir = PathBuf::from(pod_dir);
-                                    pod_dir.pop();
-
-                                    // TODO sheduler
-                                    println!(
-                                        "complete_bam.sh {} {} {}",
-                                        c.id,
-                                        tp,
-                                        pod_dir.to_string_lossy()
-                                    )
-                                } else {
-                                    let pod_dir: Vec<String> = fc
-                                        .pod5
-                                        .iter()
-                                        .filter(|p| {
-                                            p.path.contains(&format!("barcode{}", bc.clone()))
-                                        })
-                                        .take(1)
-                                        .map(|p| p.path.to_string())
-                                        .collect();
-
-                                    let pod_dir = pod_dir.first().unwrap();
-                                    let mut pod_dir = PathBuf::from(pod_dir);
-                                    pod_dir.pop();
-
-                                    println!(
-                                        "dorado.sh {} {} {}",
-                                        c.id,
-                                        tp,
-                                        pod_dir.to_string_lossy()
-                                    )
-                                }
-                            }),
-                        };
-                    }
-                }
-            }
-        }
-    }
+    // pub fn check_local(&self) -> anyhow::Result<()> {
+    //     let mut res = Vec::new();
+    //     for run in self.runs.iter() {
+    //         for fc in run.flowcells.iter() {
+    //             for c in fc.cases.iter() {
+    //                 let bases_called = if let Some(b) = c.basecalled {
+    //                     if b {
+    //                         "✅".to_string()
+    //                     } else {
+    //                         "❌".to_string()
+    //                     }
+    //                 } else {
+    //                     "❌".to_string()
+    //                 };
+    //
+    //                 let s = [
+    //                     c.id.to_string(),
+    //                     c.time_point.to_string(),
+    //                     c.barcode.to_string(),
+    //                     run.run_name.clone(),
+    //                     fc.flowcell_name.to_string(),
+    //                     fc.pod5_type.to_string(),
+    //                     fc.pod5_info.acquisition_id.clone(),
+    //                     bases_called,
+    //                 ]
+    //                 .join("\t");
+    //                 res.push(s);
+    //             }
+    //         }
+    //     }
+    //     res.sort();
+    //     println!("{}", res.join("\n"));
+    //     Ok(())
+    // }
+
+    // pub fn fc_done(&self) {
+    //     for run in self.runs.iter() {
+    //         for fc in run.flowcells.iter() {
+    //             let n_called = fc
+    //                 .cases
+    //                 .iter()
+    //                 .filter(|c| if let Some(b) = c.basecalled { b } else { false })
+    //                 .count();
+    //             if n_called != 0 && n_called == fc.cases.len() {
+    //                 let s = [
+    //                     format!("{}/{}", run.run_name, fc.flowcell_name),
+    //                     fc.pod5_info.acquisition_id.to_string(),
+    //                     format!("{:#?}", fc.cases),
+    //                 ]
+    //                 .join("\t");
+    //                 println!("{s}");
+    //             }
+    //         }
+    //     }
+    // }
+
+    // pub fn todo(&self) {
+    //     let run_dir = &self.pod5_dir;
+    //     for run in self.runs.iter() {
+    //         for fc in run.flowcells.iter() {
+    //             let to_call: Vec<_> = fc
+    //                 .cases
+    //                 .iter()
+    //                 .filter(|c| if let Some(b) = c.basecalled { !b } else { true })
+    //                 .collect();
+    //
+    //             if !to_call.is_empty() {
+    //                 if fc.pod5_type == Pod5Type::Raw && to_call.len() != fc.cases.len() {
+    //                     println!("No solution for: {}/{}", run.run_name, fc.flowcell_name);
+    //                 } else {
+    //                     match fc.pod5_type {
+    //                         Pod5Type::Raw => {
+    //                             let cases: Vec<String> = to_call
+    //                                 .iter()
+    //                                 .map(|c| {
+    //                                     let bc = c.barcode.replace("NB", "");
+    //                                     let tp = c.time_point.to_lowercase();
+    //                                     [bc, c.id.to_string(), tp].join(" ")
+    //                                 })
+    //                                 .collect();
+    //                             println!(
+    //                                 "from_mux.sh {}/{}/{} {}",
+    //                                 run_dir,
+    //                                 run.run_name,
+    //                                 fc.flowcell_name,
+    //                                 cases.join(" ")
+    //                             );
+    //                         }
+    //                         Pod5Type::Demuxed => to_call.iter().for_each(|c| {
+    //                             let bc = c.barcode.replace("NB", "");
+    //                             let tp = c.time_point.to_lowercase();
+    //                             let bam = format!(
+    //                                 "{}/{}/{}/{}_{}_hs1.bam",
+    //                                 self.bam_dir, c.id, c.time_point, c.id, c.time_point
+    //                             );
+    //                             if PathBuf::from(bam).exists() {
+    //                                 let pod_dir: Vec<String> = fc
+    //                                     .pod5
+    //                                     .iter()
+    //                                     .filter(|p| {
+    //                                         p.path.contains(&format!("barcode{}", bc.clone()))
+    //                                     })
+    //                                     .take(1)
+    //                                     .map(|p| p.path.to_string())
+    //                                     .collect();
+    //
+    //                                 let pod_dir = pod_dir.first().unwrap();
+    //                                 let mut pod_dir = PathBuf::from(pod_dir);
+    //                                 pod_dir.pop();
+    //
+    //                                 // TODO sheduler
+    //                                 println!(
+    //                                     "complete_bam.sh {} {} {}",
+    //                                     c.id,
+    //                                     tp,
+    //                                     pod_dir.to_string_lossy()
+    //                                 )
+    //                             } else {
+    //                                 let pod_dir: Vec<String> = fc
+    //                                     .pod5
+    //                                     .iter()
+    //                                     .filter(|p| {
+    //                                         p.path.contains(&format!("barcode{}", bc.clone()))
+    //                                     })
+    //                                     .take(1)
+    //                                     .map(|p| p.path.to_string())
+    //                                     .collect();
+    //
+    //                                 let pod_dir = pod_dir.first().unwrap();
+    //                                 let mut pod_dir = PathBuf::from(pod_dir);
+    //                                 pod_dir.pop();
+    //
+    //                                 println!(
+    //                                     "dorado.sh {} {} {}",
+    //                                     c.id,
+    //                                     tp,
+    //                                     pod_dir.to_string_lossy()
+    //                                 )
+    //                             }
+    //                         }),
+    //                     };
+    //                 }
+    //             }
+    //         }
+    //     }
+    // }
 
     pub fn ids(&self) -> Vec<String> {
         let mut ids: Vec<String> = self
@@ -510,7 +536,12 @@ pub fn load_flowcells_corrected_names(file_path: &str) -> anyhow::Result<Vec<FCL
 
     let mut records = Vec::new();
     for result in rdr.deserialize() {
-        let record: FCLine = result?;
+        let mut record: FCLine = result?;
+
+        // formating
+        record.time_point = record.time_point.to_lowercase();
+        record.id = record.id.to_uppercase();
+
         records.push(record);
     }
 

+ 137 - 0
src/vcf.rs

@@ -0,0 +1,137 @@
+use anyhow::{anyhow, Context};
+use chrono::{DateTime, Utc};
+use csi::binning_index::ReferenceSequence;
+use glob::glob;
+use log::warn;
+use std::{fs::Metadata, os::unix::fs::MetadataExt, path::PathBuf};
+
+use noodles_csi as csi;
+use num_format::{Locale, ToFormattedString};
+
+#[derive(Debug)]
+pub struct Vcf {
+    pub id: String,
+    pub caller: String,
+    pub time_point: String,
+    pub path: PathBuf,
+    pub file_metadata: Metadata,
+    pub n_variants: u64,
+}
+
+impl Vcf {
+    pub fn new(path: PathBuf) -> anyhow::Result<Self> {
+        let stem = path
+            .file_stem()
+            .context("Can't parse stem")?
+            .to_string_lossy()
+            .to_string();
+        let stem_splt: Vec<&str> = stem.split('_').collect();
+        let id = stem_splt[0].to_string();
+        let time_point = stem_splt[1].to_string();
+        let caller = stem_splt[2..stem_splt.len() - 1].join("_");
+
+        if !PathBuf::from(format!("{}.csi", path.display())).exists() {
+            return Err(anyhow!("No csi for {}", path.display()));
+        }
+
+        let n_variants = n_variants(path.to_str().context("Can't convert path to str")?)?;
+        let file_metadata = path.metadata()?;
+
+        Ok(Self {
+            id,
+            caller,
+            time_point,
+            path,
+            file_metadata,
+            n_variants,
+        })
+    }
+
+    pub fn modified(&self) -> anyhow::Result<DateTime<Utc>> {
+        Ok(self.file_metadata.modified().unwrap().into())
+    }
+
+    pub fn size(&self) -> u64 {
+        self.file_metadata.size()
+    }
+
+    pub fn tsv(&self) -> anyhow::Result<String> {
+        Ok([
+            self.id.clone(),
+            self.time_point.clone(),
+            self.caller.clone(),
+            self.n_variants.to_string(),
+            self.modified()?.to_string(),
+            self.size().to_string(),
+            self.path.display().to_string(),
+        ]
+        .join("\t"))
+    }
+
+    pub fn println(&self) -> anyhow::Result<()> {
+        let formated_n_variants = self.n_variants.to_formatted_string(&Locale::en);
+        let formated_modified = self.modified()?.naive_local().to_string();
+        let formated_size = format!("{:#}", byte_unit::Byte::from_u64(self.size()));
+        println!(
+            "{}",
+            [
+                self.id.to_string(),
+                self.time_point.to_string(),
+                self.caller.to_string(),
+                formated_n_variants,
+                formated_modified,
+                formated_size,
+                self.path.display().to_string()
+            ]
+            .join("\t")
+        );
+        Ok(())
+    }
+}
+
+#[derive(Debug)]
+pub struct VcfCollection {
+    pub vcfs: Vec<Vcf>,
+}
+
+impl VcfCollection {
+    pub fn print_tsv(&self) {
+        for vcf in self.vcfs.iter() {}
+    }
+
+    pub fn sort_by_id(&mut self) {
+        self.vcfs.sort_by_key(|v| v.id.clone());
+    }
+}
+
+pub fn load_vcf_collection(result_dir: &str) -> VcfCollection {
+    let mut vcfs = Vec::new();
+    let pattern = format!("{}/*/*/*/*_PASSED.vcf.gz", result_dir);
+
+    for entry in glob(&pattern).expect("Failed to read glob pattern") {
+        match entry {
+            Ok(path) => match Vcf::new(path) {
+                Ok(vcf) => vcfs.push(vcf),
+                Err(e) => warn!("{e}"),
+            },
+            Err(e) => warn!("Error: {:?}", e),
+        }
+    }
+
+    VcfCollection { vcfs }
+}
+
+pub fn n_variants(path: &str) -> anyhow::Result<u64> {
+    let csi_src = format!("{path}.csi");
+    let index = csi::read(csi_src)?;
+
+    let mut n = 0;
+
+    for reference_sequence in index.reference_sequences() {
+        if let Some(metadata) = reference_sequence.metadata() {
+            n += metadata.mapped_record_count()
+        }
+    }
+
+    Ok(n)
+}