Browse Source

annotations

Thomas 1 year ago
commit
b83fd913c5

+ 1 - 0
.gitignore

@@ -0,0 +1 @@
+/target

+ 1894 - 0
Cargo.lock

@@ -0,0 +1,1894 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "adler"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
+
+[[package]]
+name = "ahash"
+version = "0.8.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "version_check",
+ "zerocopy",
+]
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "allocator-api2"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
+
+[[package]]
+name = "anstream"
+version = "0.6.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648"
+dependencies = [
+ "windows-sys",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7"
+dependencies = [
+ "anstyle",
+ "windows-sys",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.81"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247"
+
+[[package]]
+name = "approx"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80"
+
+[[package]]
+name = "bgzip"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b64fd8980fb64af5951bc05de7772b598150a6f7eac42ec17f73e8489915f99b"
+dependencies = [
+ "flate2",
+ "log",
+ "rayon",
+ "thiserror",
+]
+
+[[package]]
+name = "bio-types"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d45749b87f21808051025e9bf714d14ff4627f9d8ca967eade6946ea769aa4a"
+dependencies = [
+ "derive-new",
+ "lazy_static",
+ "regex",
+ "strum_macros",
+ "thiserror",
+]
+
+[[package]]
+name = "bit-vec"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "bitflags"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1"
+
+[[package]]
+name = "bytemuck"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15"
+
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
+[[package]]
+name = "bytes"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
+
+[[package]]
+name = "bzip2-sys"
+version = "0.1.11+1.0.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+]
+
+[[package]]
+name = "cc"
+version = "1.0.90"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5"
+dependencies = [
+ "jobserver",
+ "libc",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "clap"
+version = "4.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "clap_lex",
+ "strsim",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64"
+dependencies = [
+ "heck 0.5.0",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.57",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce"
+
+[[package]]
+name = "cmake"
+version = "0.1.50"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "colorchoice"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
+
+[[package]]
+name = "confy"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e37668cb35145dcfaa1931a5f37fde375eeae8068b4c0d2f289da28a270b2d2c"
+dependencies = [
+ "directories",
+ "serde",
+ "thiserror",
+ "toml",
+]
+
+[[package]]
+name = "console"
+version = "0.15.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb"
+dependencies = [
+ "encode_unicode 0.3.6",
+ "lazy_static",
+ "libc",
+ "unicode-width",
+ "windows-sys",
+]
+
+[[package]]
+name = "crc32fast"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "crossbeam-channel"
+version = "0.5.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab3db02a9c5b5121e1e42fbdb1aeb65f5e02624cc58c43f2884c6ccac0b82f95"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"
+
+[[package]]
+name = "crunchy"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
+
+[[package]]
+name = "csv"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe"
+dependencies = [
+ "csv-core",
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "csv-core"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "curl-sys"
+version = "0.4.72+curl-8.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29cbdc8314c447d11e8fd156dcdd031d9e02a7a976163e396b548c03153bc9ea"
+dependencies = [
+ "cc",
+ "libc",
+ "libz-sys",
+ "openssl-sys",
+ "pkg-config",
+ "vcpkg",
+ "windows-sys",
+]
+
+[[package]]
+name = "custom_derive"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9"
+
+[[package]]
+name = "dashmap"
+version = "5.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
+dependencies = [
+ "cfg-if",
+ "hashbrown",
+ "lock_api",
+ "once_cell",
+ "parking_lot_core",
+ "rayon",
+ "serde",
+]
+
+[[package]]
+name = "derive-new"
+version = "0.5.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3418329ca0ad70234b9735dc4ceed10af4df60eff9c8e7b06cb5e520d92c3535"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "directories"
+version = "4.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f51c5d4ddabd36886dd3e1438cb358cdcb0d7c499cb99cb4ac2e38e18b5cb210"
+dependencies = [
+ "dirs-sys",
+]
+
+[[package]]
+name = "dirs-next"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1"
+dependencies = [
+ "cfg-if",
+ "dirs-sys-next",
+]
+
+[[package]]
+name = "dirs-sys"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6"
+dependencies = [
+ "libc",
+ "redox_users",
+ "winapi",
+]
+
+[[package]]
+name = "dirs-sys-next"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d"
+dependencies = [
+ "libc",
+ "redox_users",
+ "winapi",
+]
+
+[[package]]
+name = "either"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a"
+
+[[package]]
+name = "encode_unicode"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
+
+[[package]]
+name = "encode_unicode"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
+
+[[package]]
+name = "env_logger"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580"
+dependencies = [
+ "humantime",
+ "is-terminal",
+ "log",
+ "regex",
+ "termcolor",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
+
+[[package]]
+name = "fallible-iterator"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
+
+[[package]]
+name = "fallible-streaming-iterator"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
+
+[[package]]
+name = "flate2"
+version = "1.0.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e"
+dependencies = [
+ "crc32fast",
+ "miniz_oxide",
+]
+
+[[package]]
+name = "form_urlencoded"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456"
+dependencies = [
+ "percent-encoding",
+]
+
+[[package]]
+name = "fs-utils"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6fc7a9dc005c944c98a935e7fd626faf5bf7e5a609f94bc13e42fc4a02e52593"
+dependencies = [
+ "quick-error",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.2.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
+[[package]]
+name = "glob"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
+
+[[package]]
+name = "half"
+version = "2.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5eceaaeec696539ddaf7b333340f1af35a5aa87ae3e4f3ead0532f72affab2e"
+dependencies = [
+ "cfg-if",
+ "crunchy",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.14.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
+dependencies = [
+ "ahash",
+ "allocator-api2",
+ "rayon",
+ "serde",
+]
+
+[[package]]
+name = "hashlink"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7"
+dependencies = [
+ "hashbrown",
+]
+
+[[package]]
+name = "heck"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "hermit-abi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
+
+[[package]]
+name = "hts-sys"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "deebfb779c734d542e7f14c298597914b9b5425e4089aef482eacb5cab941915"
+dependencies = [
+ "bzip2-sys",
+ "cc",
+ "curl-sys",
+ "fs-utils",
+ "glob",
+ "libz-sys",
+ "lzma-sys",
+ "openssl-sys",
+]
+
+[[package]]
+name = "humantime"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
+
+[[package]]
+name = "idna"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6"
+dependencies = [
+ "unicode-bidi",
+ "unicode-normalization",
+]
+
+[[package]]
+name = "ieee754"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9007da9cacbd3e6343da136e98b0d2df013f553d35bdec8b518f07bea768e19c"
+
+[[package]]
+name = "indexmap"
+version = "2.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
+[[package]]
+name = "indicatif"
+version = "0.17.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3"
+dependencies = [
+ "console",
+ "instant",
+ "number_prefix",
+ "portable-atomic",
+ "rayon",
+ "unicode-width",
+]
+
+[[package]]
+name = "indicatif-log-bridge"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2963046f28a204e3e3fd7e754fd90a6235da05b5378f24707ff0ec9513725ce3"
+dependencies = [
+ "indicatif",
+ "log",
+]
+
+[[package]]
+name = "instant"
+version = "0.1.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "is-terminal"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "windows-sys",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
+
+[[package]]
+name = "jobserver"
+version = "0.1.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+
+[[package]]
+name = "lexical-core"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46"
+dependencies = [
+ "lexical-parse-float",
+ "lexical-parse-integer",
+ "lexical-util",
+ "lexical-write-float",
+ "lexical-write-integer",
+]
+
+[[package]]
+name = "lexical-parse-float"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f"
+dependencies = [
+ "lexical-parse-integer",
+ "lexical-util",
+ "static_assertions",
+]
+
+[[package]]
+name = "lexical-parse-integer"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9"
+dependencies = [
+ "lexical-util",
+ "static_assertions",
+]
+
+[[package]]
+name = "lexical-util"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc"
+dependencies = [
+ "static_assertions",
+]
+
+[[package]]
+name = "lexical-write-float"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862"
+dependencies = [
+ "lexical-util",
+ "lexical-write-integer",
+ "static_assertions",
+]
+
+[[package]]
+name = "lexical-write-integer"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446"
+dependencies = [
+ "lexical-util",
+ "static_assertions",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.153"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
+
+[[package]]
+name = "libm"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
+
+[[package]]
+name = "libredox"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d"
+dependencies = [
+ "bitflags 2.5.0",
+ "libc",
+]
+
+[[package]]
+name = "libsqlite3-sys"
+version = "0.27.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf4e226dcd58b4be396f7bd3c20da8fdee2911400705297ba7d2d7cc2c30f716"
+dependencies = [
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "libz-sys"
+version = "1.1.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e143b5e666b2695d28f6bca6497720813f699c9602dd7f5cac91008b8ada7f9"
+dependencies = [
+ "cc",
+ "cmake",
+ "libc",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "linear-map"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfae20f6b19ad527b550c223fddc3077a547fc70cda94b9b566575423fd303ee"
+
+[[package]]
+name = "lock_api"
+version = "0.4.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45"
+dependencies = [
+ "autocfg",
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
+
+[[package]]
+name = "lzma-sys"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+]
+
+[[package]]
+name = "matrixmultiply"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7574c1cf36da4798ab73da5b215bbf444f50718207754cb522201d78d1cd0ff2"
+dependencies = [
+ "autocfg",
+ "rawpointer",
+]
+
+[[package]]
+name = "memchr"
+version = "2.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
+
+[[package]]
+name = "miniz_oxide"
+version = "0.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7"
+dependencies = [
+ "adler",
+]
+
+[[package]]
+name = "nalgebra"
+version = "0.29.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d506eb7e08d6329505faa8a3a00a5dcc6de9f76e0c77e4b75763ae3c770831ff"
+dependencies = [
+ "approx",
+ "matrixmultiply",
+ "nalgebra-macros",
+ "num-complex",
+ "num-rational",
+ "num-traits",
+ "rand",
+ "rand_distr",
+ "simba",
+ "typenum",
+]
+
+[[package]]
+name = "nalgebra-macros"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "01fcc0b8149b4632adc89ac3b7b31a12fb6099a0317a4eb2ebff574ef7de7218"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "newtype_derive"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac8cd24d9f185bb7223958d8c1ff7a961b74b1953fd05dba7cc568a63b3861ec"
+dependencies = [
+ "rustc_version",
+]
+
+[[package]]
+name = "noodles-bam"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e75824c4fad4713c177148543d96893212c2b8b6efc3cd9fc19934bb9334c97"
+dependencies = [
+ "bit-vec",
+ "byteorder",
+ "bytes",
+ "indexmap",
+ "noodles-bgzf",
+ "noodles-core 0.13.0",
+ "noodles-csi 0.29.0",
+ "noodles-sam",
+]
+
+[[package]]
+name = "noodles-bgzf"
+version = "0.26.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8970db2e84adb1007377dd3988258d7a64e3fc4c05602ebf94e1f8cba207c030"
+dependencies = [
+ "byteorder",
+ "bytes",
+ "crossbeam-channel",
+ "flate2",
+]
+
+[[package]]
+name = "noodles-core"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2993a01927b449e191670446b8a36e153e89fc4527a246a84eed9057adeefe1b"
+
+[[package]]
+name = "noodles-core"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7336c3be652de4e05444c9b12a32331beb5ba3316e8872d92bfdd8ef3b06c282"
+
+[[package]]
+name = "noodles-csi"
+version = "0.29.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9abd5616c374ad3da6677603dc1637ef518388537ec4a0263b8e4471ee5b0801"
+dependencies = [
+ "bit-vec",
+ "byteorder",
+ "indexmap",
+ "noodles-bgzf",
+ "noodles-core 0.13.0",
+]
+
+[[package]]
+name = "noodles-csi"
+version = "0.30.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a60dfe0919f7ecbd081a82eb1d32e8f89f9041932d035fe8309073c8c01277bf"
+dependencies = [
+ "bit-vec",
+ "byteorder",
+ "indexmap",
+ "noodles-bgzf",
+ "noodles-core 0.14.0",
+]
+
+[[package]]
+name = "noodles-fasta"
+version = "0.33.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e9e953e4e90e6c96e6a384598ebf2ab6d2f5add259ff05a194cf635e892c980"
+dependencies = [
+ "bytes",
+ "memchr",
+ "noodles-bgzf",
+ "noodles-core 0.14.0",
+]
+
+[[package]]
+name = "noodles-gff"
+version = "0.27.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "14f8ec87fe3630f57d6d8ea24cbc2cbd0bfed1fe66238bda7a7c3fb6a36d3713"
+dependencies = [
+ "indexmap",
+ "noodles-bgzf",
+ "noodles-core 0.14.0",
+ "noodles-csi 0.30.0",
+ "percent-encoding",
+]
+
+[[package]]
+name = "noodles-sam"
+version = "0.49.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b94966806ac7aec118d41eea7080bfbd0e8b843ba64f46522c57f0f55cfb1f0"
+dependencies = [
+ "bitflags 2.5.0",
+ "indexmap",
+ "lexical-core",
+ "memchr",
+ "noodles-bgzf",
+ "noodles-core 0.13.0",
+ "noodles-csi 0.29.0",
+]
+
+[[package]]
+name = "noodles-tabix"
+version = "0.36.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc1ab29335a68d0c2bdf41460a67714ca69e23a1cbeb950ac5c38a9afa446a62"
+dependencies = [
+ "bit-vec",
+ "byteorder",
+ "indexmap",
+ "noodles-bgzf",
+ "noodles-core 0.14.0",
+ "noodles-csi 0.30.0",
+]
+
+[[package]]
+name = "noodles-vcf"
+version = "0.49.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e1f2fa749afaccadc596ec55ccb7bdcd8101fa79f8382384223c0dbae3e245b"
+dependencies = [
+ "indexmap",
+ "memchr",
+ "noodles-bgzf",
+ "noodles-core 0.14.0",
+ "noodles-csi 0.30.0",
+ "noodles-tabix",
+ "percent-encoding",
+]
+
+[[package]]
+name = "num-complex"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-integer"
+version = "0.1.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-rational"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a"
+dependencies = [
+ "autocfg",
+ "libm",
+]
+
+[[package]]
+name = "number_prefix"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
+
+[[package]]
+name = "once_cell"
+version = "1.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
+
+[[package]]
+name = "openssl-src"
+version = "300.2.3+3.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5cff92b6f71555b61bb9315f7c64da3ca43d87531622120fea0195fc761b4843"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "openssl-sys"
+version = "0.9.102"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2"
+dependencies = [
+ "cc",
+ "libc",
+ "openssl-src",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "pandora_lib_variants"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "bgzip",
+ "clap",
+ "confy",
+ "crossbeam-deque",
+ "csv",
+ "dashmap",
+ "env_logger",
+ "hashbrown",
+ "indicatif",
+ "indicatif-log-bridge",
+ "log",
+ "noodles-bam",
+ "noodles-bgzf",
+ "noodles-core 0.14.0",
+ "noodles-csi 0.30.0",
+ "noodles-fasta",
+ "noodles-gff",
+ "noodles-sam",
+ "noodles-tabix",
+ "noodles-vcf",
+ "num-integer",
+ "pot",
+ "prettytable-rs",
+ "rayon",
+ "rust-htslib",
+ "rust-lapper",
+ "serde",
+ "serde_json",
+ "serde_rusqlite",
+ "statrs",
+ "trc",
+ "uuid",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-targets 0.48.5",
+]
+
+[[package]]
+name = "paste"
+version = "1.0.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"
+
+[[package]]
+name = "percent-encoding"
+version = "2.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
+
+[[package]]
+name = "pkg-config"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
+
+[[package]]
+name = "portable-atomic"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0"
+
+[[package]]
+name = "pot"
+version = "3.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df842bdb3b0553a411589e64aaa1a7d0c0259f72fabcedfaa841683ae3019d80"
+dependencies = [
+ "byteorder",
+ "half",
+ "serde",
+]
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
+
+[[package]]
+name = "prettytable-rs"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eea25e07510aa6ab6547308ebe3c036016d162b8da920dbb079e3ba8acf3d95a"
+dependencies = [
+ "csv",
+ "encode_unicode 1.0.0",
+ "is-terminal",
+ "lazy_static",
+ "term",
+ "unicode-width",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.79"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quick-error"
+version = "1.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
+
+[[package]]
+name = "quote"
+version = "1.0.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom",
+]
+
+[[package]]
+name = "rand_distr"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31"
+dependencies = [
+ "num-traits",
+ "rand",
+]
+
+[[package]]
+name = "rawpointer"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
+
+[[package]]
+name = "rayon"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa"
+dependencies = [
+ "bitflags 1.3.2",
+]
+
+[[package]]
+name = "redox_users"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891"
+dependencies = [
+ "getrandom",
+ "libredox",
+ "thiserror",
+]
+
+[[package]]
+name = "regex"
+version = "1.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
+
+[[package]]
+name = "rusqlite"
+version = "0.30.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a78046161564f5e7cd9008aff3b2990b3850dc8e0349119b98e8f251e099f24d"
+dependencies = [
+ "bitflags 2.5.0",
+ "fallible-iterator",
+ "fallible-streaming-iterator",
+ "hashlink",
+ "libsqlite3-sys",
+ "smallvec",
+]
+
+[[package]]
+name = "rust-htslib"
+version = "0.44.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c7eb0f29fce64a4e22578905efef3d72389058016023279a58b282eb5c0c467"
+dependencies = [
+ "bio-types",
+ "byteorder",
+ "custom_derive",
+ "derive-new",
+ "hts-sys",
+ "ieee754",
+ "lazy_static",
+ "libc",
+ "linear-map",
+ "newtype_derive",
+ "regex",
+ "thiserror",
+ "url",
+]
+
+[[package]]
+name = "rust-lapper"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee43d8e721ac803031dbab6a944b957b49a3b11eadbc099880c8aaaebf23ed27"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "rustc_version"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c5f5376ea5e30ce23c03eb77cbe4962b988deead10910c372b226388b594c084"
+dependencies = [
+ "semver",
+]
+
+[[package]]
+name = "rustversion"
+version = "1.0.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
+
+[[package]]
+name = "ryu"
+version = "1.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1"
+
+[[package]]
+name = "safe_arch"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f398075ce1e6a179b46f51bd88d0598b92b00d3551f1a2d4ac49e771b56ac354"
+dependencies = [
+ "bytemuck",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "semver"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac"
+
+[[package]]
+name = "serde"
+version = "1.0.197"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.197"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.57",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.115"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd"
+dependencies = [
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "serde_rusqlite"
+version = "0.34.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4600dac14aada464c5584d327baa164e372153309bc4c0fb1498bbfbaa5a028b"
+dependencies = [
+ "rusqlite",
+ "serde",
+]
+
+[[package]]
+name = "simba"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0b7840f121a46d63066ee7a99fc81dcabbc6105e437cae43528cea199b5a05f"
+dependencies = [
+ "approx",
+ "num-complex",
+ "num-traits",
+ "paste",
+ "wide",
+]
+
+[[package]]
+name = "smallvec"
+version = "1.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
+
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
+
+[[package]]
+name = "static_assertions"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
+
+[[package]]
+name = "statrs"
+version = "0.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d08e5e1748192713cc281da8b16924fb46be7b0c2431854eadc785823e5696e"
+dependencies = [
+ "approx",
+ "lazy_static",
+ "nalgebra",
+ "num-traits",
+ "rand",
+]
+
+[[package]]
+name = "strsim"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01"
+
+[[package]]
+name = "strum_macros"
+version = "0.25.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0"
+dependencies = [
+ "heck 0.4.1",
+ "proc-macro2",
+ "quote",
+ "rustversion",
+ "syn 2.0.57",
+]
+
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.57"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11a6ae1e52eb25aab8f3fb9fca13be982a373b8f1157ca14b897a825ba4a2d35"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "term"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f"
+dependencies = [
+ "dirs-next",
+ "rustversion",
+ "winapi",
+]
+
+[[package]]
+name = "termcolor"
+version = "1.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "thiserror"
+version = "1.0.58"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.58"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.57",
+]
+
+[[package]]
+name = "tinyvec"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50"
+dependencies = [
+ "tinyvec_macros",
+]
+
+[[package]]
+name = "tinyvec_macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
+
+[[package]]
+name = "toml"
+version = "0.5.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "trc"
+version = "1.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91dd8e922a6b95b816407eb8c37e17a409512f759561ac48ec2b7281b395a3c6"
+dependencies = [
+ "serde",
+ "stable_deref_trait",
+]
+
+[[package]]
+name = "typenum"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
+
+[[package]]
+name = "unicode-bidi"
+version = "0.3.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
+
+[[package]]
+name = "unicode-normalization"
+version = "0.1.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5"
+dependencies = [
+ "tinyvec",
+]
+
+[[package]]
+name = "unicode-width"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85"
+
+[[package]]
+name = "url"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633"
+dependencies = [
+ "form_urlencoded",
+ "idna",
+ "percent-encoding",
+]
+
+[[package]]
+name = "utf8parse"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
+
+[[package]]
+name = "uuid"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0"
+dependencies = [
+ "getrandom",
+ "serde",
+]
+
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
+[[package]]
+name = "version_check"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
+
+[[package]]
+name = "wasi"
+version = "0.11.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
+
+[[package]]
+name = "wide"
+version = "0.7.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89beec544f246e679fc25490e3f8e08003bc4bf612068f325120dad4cea02c1c"
+dependencies = [
+ "bytemuck",
+ "safe_arch",
+]
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-util"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets 0.52.4",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
+dependencies = [
+ "windows_aarch64_gnullvm 0.48.5",
+ "windows_aarch64_msvc 0.48.5",
+ "windows_i686_gnu 0.48.5",
+ "windows_i686_msvc 0.48.5",
+ "windows_x86_64_gnu 0.48.5",
+ "windows_x86_64_gnullvm 0.48.5",
+ "windows_x86_64_msvc 0.48.5",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.4",
+ "windows_aarch64_msvc 0.52.4",
+ "windows_i686_gnu 0.52.4",
+ "windows_i686_msvc 0.52.4",
+ "windows_x86_64_gnu 0.52.4",
+ "windows_x86_64_gnullvm 0.52.4",
+ "windows_x86_64_msvc 0.52.4",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
+
+[[package]]
+name = "zerocopy"
+version = "0.7.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.7.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.57",
+]

+ 43 - 0
Cargo.toml

@@ -0,0 +1,43 @@
+[package]
+name = "pandora_lib_variants"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+log = "^0.4.20"
+env_logger = "^0.10.1"
+clap = { version = "^4.3.16", features = ["derive"] }
+anyhow = "^1.0.75"
+indicatif = {version = "0.17.8", features = ["rayon"]}
+indicatif-log-bridge = "0.2.2"
+num-integer = "0.1.46"
+# kdam = "0.5.1"
+# pbr = "1.1.1"
+serde = { version = "^1.0.188", features = ["derive"] }
+confy = "0.5.1"
+hashbrown = { version = "0.14.3", features = ["rayon", "serde"] }
+serde_json = "1.0"
+bgzip = "0.3.1"
+rust-lapper = "1.1.0"
+csv = "1.3.0"
+statrs = "0.16.0"
+rust-htslib = "0.44.1"
+uuid = { version = "1.6.1", features = ["serde", "v4"] }
+prettytable-rs = "^0.10"
+noodles-core = "0.14.0"
+noodles-gff = "0.27.0"
+noodles-bgzf = "0.26.0"
+noodles-csi = "0.30.0"
+noodles-fasta = "0.33.0"
+noodles-sam = "0.49.0"
+noodles-bam = "0.52.0"
+noodles-vcf = "0.49.0"
+noodles-tabix = "0.36.0"
+rayon = "1.8.0"
+serde_rusqlite = "0.34.0"
+dashmap = { version = "5.5.3", features = ["rayon", "serde"] }
+crossbeam-deque = "0.8.5"
+trc = "1.2.4"
+pot = "=3.0.0"

+ 33 - 0
src/annotations/cosmic.rs

@@ -0,0 +1,33 @@
+use std::str::FromStr;
+
+use serde::{Serialize, Deserialize};
+
+use anyhow::{anyhow, Context, Ok, Result};
+
+#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
+pub struct Cosmic {
+    pub cosmic_cnt: u64,
+}
+
+impl FromStr for Cosmic {
+    // add code here
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<Self> {
+        let vs: Vec<&str> = s.split(";").collect();
+        if vs.len() != 3 {
+            return Err(anyhow!("Error not the right number of parts for {}", s));
+        }
+
+        if vs[0].contains("MISSING") {
+            return Err(anyhow!("MISSING values {}", s));
+        } else {
+            let v: Vec<&str> = vs[2].split("=").collect();
+
+            Ok(Cosmic {
+                cosmic_cnt: v[1].parse().context("parsing cosmic cnt")?,
+            })
+        }
+    }
+}
+

+ 60 - 0
src/annotations/echtvar.rs

@@ -0,0 +1,60 @@
+use std::{process::{Command, Stdio}, io::{BufReader, BufRead}};
+
+use anyhow::{Context, Ok, Result};
+use log::{info, warn};
+
+use super::{cosmic::Cosmic, gnomad::GnomAD};
+
+// /data/tools/echtvar anno -e /data/ref/hs1/CosmicCodingMuts.echtvar.zip -e /data/ref/hs1/gnomAD_4-2022_10-gnomad.echtvar.zip BENGUIRAT_diag_clairs_PASSED.vcf.gz test.bcf
+pub fn run_echtvar(in_path: &str, out_path: &str) -> Result<()> {
+    let bin_dir = "/data/tools";
+
+    let annot_sources: Vec<&str> = vec![
+        "/data/ref/hs1/CosmicCodingMuts.echtvar.zip",
+        "/data/ref/hs1/gnomAD_4-2022_10-gnomad.echtvar.zip",
+    ]
+    .iter()
+    .flat_map(|e| vec!["-e", e])
+    .collect();
+
+    // info!("Running echtvar anno for {}", in_path);
+    let mut cmd = Command::new(format!("{}/echtvar", bin_dir))
+        .arg("anno")
+        .args(annot_sources)
+        .arg(in_path)
+        .arg(out_path)
+        .stderr(Stdio::piped())
+        .spawn()
+        .context("echtvar anno failed to start")?;
+
+    let stderr = cmd.stderr.take().unwrap();
+    let reader = BufReader::new(stderr);
+    reader
+        .lines()
+        .filter_map(|line| line.ok())
+        .filter(|line| line.find("error").is_some())
+        .for_each(|line| warn!("{}", line));
+
+    cmd.wait()?;
+    Ok(())
+}
+
+pub fn parse_echtvar_val(s: &str) -> Result<(Option<Cosmic>, Option<GnomAD>)> {
+    let mi: Vec<_> = s.match_indices(r";gnomad").collect();
+    let (i, _) = mi[0];
+    let str_cosmic = &s[..i];
+    let str_gnomad = &s[i + 1..];
+
+    let cosmic = if str_cosmic.contains("MISSING") {
+        None
+    } else {
+        Some(str_cosmic.parse::<Cosmic>()?)
+    };
+
+    let gnomad = if str_gnomad.contains("-1") {
+        None
+    } else {
+        Some(str_gnomad.parse::<GnomAD>()?)
+    };
+    Ok((cosmic, gnomad))
+}

+ 62 - 0
src/annotations/gnomad.rs

@@ -0,0 +1,62 @@
+use std::str::FromStr;
+use serde::{Serialize, Deserialize};
+use anyhow::{anyhow, Ok, Result};
+
+#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
+pub struct GnomAD {
+    pub gnomad_ac: u64,
+    pub gnomad_an: u64,
+    pub gnomad_af: f64,
+    pub gnomad_af_oth: f64,
+    pub gnomad_af_ami: f64,
+    pub gnomad_af_sas: f64,
+    pub gnomad_af_fin: f64,
+    pub gnomad_af_eas: f64,
+    pub gnomad_af_amr: f64,
+    pub gnomad_af_afr: f64,
+    pub gnomad_af_mid: f64,
+    pub gnomad_af_asj: f64,
+    pub gnomad_af_nfe: f64,
+}
+
+impl FromStr for GnomAD {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<Self> {
+        let vs: Vec<_> = s.split(";").collect();
+        if vs.len() < 13 {
+            return Err(anyhow!("Error not the right number of parts for {:?}", s));
+        }
+        if vs[0].contains("-1") {
+            return Err(anyhow!(
+                "MISSING values check for -1 before parsing for {:?}",
+                s
+            ));
+        }
+
+        let vv: Vec<&str> = vs
+            .iter()
+            .map(|e| {
+                let v: Vec<_> = e.split("=").collect();
+                v[1]
+            })
+            .collect();
+
+        Ok(GnomAD {
+            gnomad_ac: vv[0].parse()?,
+            gnomad_an: vv[1].parse()?,
+            gnomad_af: vv[2].parse()?,
+            gnomad_af_oth: vv[3].parse()?,
+            gnomad_af_ami: vv[4].parse()?,
+            gnomad_af_sas: vv[5].parse()?,
+            gnomad_af_fin: vv[6].parse()?,
+            gnomad_af_eas: vv[7].parse()?,
+            gnomad_af_amr: vv[8].parse()?,
+            gnomad_af_afr: vv[9].parse()?,
+            gnomad_af_mid: vv[10].parse()?,
+            gnomad_af_asj: vv[11].parse()?,
+            gnomad_af_nfe: vv[12].parse()?,
+        })
+    }
+}
+

+ 5 - 0
src/annotations/mod.rs

@@ -0,0 +1,5 @@
+pub mod vep;
+pub mod echtvar;
+pub mod ncbi_gff;
+pub mod cosmic;
+pub mod gnomad;

+ 89 - 0
src/annotations/ncbi_gff.rs

@@ -0,0 +1,89 @@
+use std::str::FromStr;
+use anyhow::{Context, Ok, Result};
+use serde::{Serialize, Deserialize};
+
+#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
+pub struct NCBIGFF {
+    pub feature: String,
+    pub name: Option<String>,
+    pub standard_name: Option<String>,
+    pub function: Option<String>,
+    pub experiment: Option<String>,
+    pub note: Option<String>,
+    pub regulatory_class: Option<String>,
+}
+
+impl From<noodles_gff::Record> for NCBIGFF {
+    fn from(r: noodles_gff::Record) -> Self {
+        let attr = r.attributes();
+
+        let inner_string = |name: &str| {
+            if let Some(e) = attr.get(name) {
+                Some(e.to_string())
+            } else {
+                None
+            }
+        };
+
+        NCBIGFF {
+            feature: r.ty().to_string(),
+            name: inner_string("Name"),
+            standard_name: inner_string("standard_name"),
+            function: inner_string("function"),
+            experiment: inner_string("experiment"),
+            note: inner_string("Note"),
+            regulatory_class: inner_string("regulatory_class"),
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct NCBIAcc {
+    pub prefix: String,
+    pub number: u64,
+    pub version: f32,
+}
+
+impl FromStr for NCBIAcc {
+    type Err = anyhow::Error;
+    fn from_str(s: &str) -> Result<Self> {
+        if s.contains("unassigned_transcript_") {
+            let s = s.replace("unassigned_transcript_", "");
+            let (num, v) = if s.contains("_") {
+                s.split_once("_").context("first split error")?
+            } else {
+                (s.as_str(), "0")
+            };
+            Ok(NCBIAcc {
+                prefix: "unassigned_transcript".to_string(),
+                number: num.parse().context("Error parsing NCBI accession number")?,
+                version: v.parse().context("Error parsing NCBI accession version")?,
+            })
+        } else {
+            if s.contains("_") {
+                if s.contains(".") {
+                    let (rest, v) = s.split_once(".").context("first split error")?;
+                    let (pref, num) = rest.split_once("_").context("second split error")?;
+                    let v = v.replace("_", ".");
+                    Ok(NCBIAcc {
+                        prefix: pref.to_string(),
+                        number: num.parse().context("Error parsing NCBI accession number")?,
+                        version: v.parse().context("Error parsing NCBI accession version")?,
+                    })
+                } else {
+                    Ok(NCBIAcc {
+                        prefix: s.to_string(),
+                        number: u64::MAX,
+                        version: 0.0,
+                    })
+                }
+            } else {
+                Ok(NCBIAcc {
+                    prefix: s.to_string(),
+                    number: u64::MAX,
+                    version: 0.0,
+                })
+            }
+        }
+    }
+}

+ 364 - 0
src/annotations/vep.rs

@@ -0,0 +1,364 @@
+use anyhow::{anyhow, Context, Ok, Result};
+use csv::ReaderBuilder;
+use hashbrown::HashMap;
+use log::{info, warn};
+use serde::{Deserialize, Serialize};
+use std::io::Write;
+use std::{
+    env::temp_dir,
+    fs::{self, File},
+    io::{BufRead, BufReader},
+    process::{Command, Stdio},
+    str::FromStr,
+};
+
+use crate::variants::{AnnotationType, Variant};
+
+use super::ncbi_gff::NCBIAcc;
+
+#[derive(Debug, PartialEq, Serialize, Deserialize)]
+pub struct VEPLine {
+    pub uploaded_variation: String,
+    pub location: String,
+    pub allele: String,
+    pub gene: String,
+    pub feature: String,
+    pub feature_type: String,
+    pub consequence: String,
+    pub cdna_position: String,
+    pub cds_position: String,
+    pub protein_position: String,
+    pub amino_acids: String,
+    pub codons: String,
+    pub existing_variation: String,
+    pub extra: String,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct VEP {
+    pub gene: Option<String>,
+    pub feature: Option<String>,
+    pub feature_type: Option<String>,
+    pub consequence: Option<Vec<String>>,
+    pub cdna_position: Option<String>,
+    pub cds_position: Option<String>,
+    pub protein_position: Option<String>,
+    pub amino_acids: Option<String>,
+    pub codons: Option<String>,
+    pub existing_variation: Option<String>,
+    pub extra: VEPExtra,
+}
+impl VEP {
+    fn from_vep_line(d: &VEPLine) -> Result<VEP> {
+        let or_opt = |s: &str| match s {
+            "-" => None,
+            _ => Some(s.to_string()),
+        };
+
+        let consequence = if let Some(c) = or_opt(&d.consequence) {
+            Some(c.split(",").map(|e| e.to_string()).collect::<Vec<String>>())
+        } else {
+            None
+        };
+
+        Ok(VEP {
+            gene: or_opt(&d.gene),
+            feature: or_opt(&d.feature),
+            feature_type: or_opt(&d.feature_type),
+            consequence,
+            cdna_position: or_opt(&d.feature_type),
+            cds_position: or_opt(&d.cds_position),
+            protein_position: or_opt(&d.protein_position),
+            amino_acids: or_opt(&d.amino_acids),
+            codons: or_opt(&d.codons),
+            existing_variation: or_opt(&d.existing_variation),
+            extra: d.extra.parse()?,
+        })
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct VEPExtra {
+    pub impact: Option<VEPImpact>,
+    pub symbol: Option<String>,
+    pub distance: Option<u32>,
+    pub hgvs_c: Option<String>,
+    pub hgvs_p: Option<String>,
+}
+impl FromStr for VEPExtra {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<Self> {
+        let err = |c| anyhow!("Error {} parsing VEP Extra field {}", c, s);
+
+        let elements = s.split(";").collect::<Vec<&str>>();
+
+        let mut kv = HashMap::new();
+
+        for e in elements.iter() {
+            let (k, v) = e.split_once("=").ok_or(err("in split '='"))?;
+            if !kv.insert(k, v).is_none() {
+                return Err(err("kv insert"));
+            };
+        }
+
+        let impact: Option<VEPImpact> = if let Some(v) = kv.get("IMPACT") {
+            Some(v.parse()?)
+        } else {
+            None
+        };
+        let symbol: Option<String> = if let Some(v) = kv.get("SYMBOL") {
+            Some(v.to_string())
+        } else {
+            None
+        };
+        let distance: Option<u32> = if let Some(v) = kv.get("DISTANCE") {
+            Some(v.parse()?)
+        } else {
+            None
+        };
+        let hgvs_c: Option<String> = if let Some(v) = kv.get("HGVSc") {
+            Some(v.to_string())
+        } else {
+            None
+        };
+        let hgvs_p: Option<String> = if let Some(v) = kv.get("HGVSp") {
+            Some(v.to_string())
+        } else {
+            None
+        };
+
+        Ok(VEPExtra {
+            impact,
+            symbol,
+            distance,
+            hgvs_c,
+            hgvs_p,
+        })
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub enum VEPImpact {
+    Low,
+    Moderate,
+    High,
+    Modifier,
+}
+
+impl FromStr for VEPImpact {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<Self> {
+        match s {
+            "LOW" => Ok(VEPImpact::Low),
+            "MODERATE" => Ok(VEPImpact::Moderate),
+            "HIGH" => Ok(VEPImpact::High),
+            "MODIFIER" => Ok(VEPImpact::Modifier),
+            _ => Err(anyhow!("Unexpected VEP Impact value")),
+        }
+    }
+}
+pub fn vep_chunk(data: &mut [Variant]) -> Result<()> {
+    let in_vcf = format!(
+        "{}/vep_{}.vcf",
+        temp_dir().to_str().unwrap(),
+        uuid::Uuid::new_v4()
+    );
+    let out_vep = format!(
+        "{}/vep_{}.txt",
+        temp_dir().to_str().unwrap(),
+        uuid::Uuid::new_v4()
+    );
+
+    let mut vcf = File::create(&in_vcf).unwrap();
+    // write_vcf_rayon(&in_vcf, data, dict_file)?;
+    let vcf_header = r"##fileformat=VCFv4.2
+##contig=<ID=chr1,length=248387328>
+##contig=<ID=chr2,length=242696752>
+##contig=<ID=chr3,length=201105948>
+##contig=<ID=chr4,length=193574945>
+##contig=<ID=chr5,length=182045439>
+##contig=<ID=chr6,length=172126628>
+##contig=<ID=chr7,length=160567428>
+##contig=<ID=chr8,length=146259331>
+##contig=<ID=chr9,length=150617247>
+##contig=<ID=chr10,length=134758134>
+##contig=<ID=chr11,length=135127769>
+##contig=<ID=chr12,length=133324548>
+##contig=<ID=chr13,length=113566686>
+##contig=<ID=chr14,length=101161492>
+##contig=<ID=chr15,length=99753195>
+##contig=<ID=chr16,length=96330374>
+##contig=<ID=chr17,length=84276897>
+##contig=<ID=chr18,length=80542538>
+##contig=<ID=chr19,length=61707364>
+##contig=<ID=chr20,length=66210255>
+##contig=<ID=chr21,length=45090682>
+##contig=<ID=chr22,length=51324926>
+##contig=<ID=chrX,length=154259566>
+##contig=<ID=chrY,length=62460029>
+##contig=<ID=chrM,length=16569>
+#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  SAMPLE";
+
+    writeln!(vcf, "{}", vcf_header).unwrap();
+
+    for (i, row) in data.iter().enumerate() {
+        writeln!(
+            vcf,
+            "{}\t{}\t{}\t{}\t{}\t{}\tPASS\t.\t{}\t{}",
+            row.contig,
+            row.position,
+            i + 1,
+            row.reference,
+            row.alternative,
+            ".",
+            ".",
+            "."
+        )?;
+    }
+
+    run_vep(&in_vcf, &out_vep)?;
+
+    // read the results in txt file, parse and add to HashMap
+    let mut reader_vep = ReaderBuilder::new()
+        .delimiter(b'\t')
+        .has_headers(false)
+        .comment(Some(b'#'))
+        .flexible(true)
+        .from_reader(fs::File::open(out_vep.clone())?);
+
+    let mut lines: HashMap<u64, Vec<VEPLine>> = HashMap::new();
+    for line in reader_vep.deserialize::<VEPLine>() {
+        if let std::result::Result::Ok(line) = line {
+            if let std::result::Result::Ok(k) = line.uploaded_variation.parse::<u64>() {
+                lines
+                    .raw_entry_mut()
+                    .from_key(&k)
+                    .or_insert_with(|| (k, vec![]))
+                    .1
+                    .push(line);
+            } else {
+                return Err(anyhow!("Error while parsing: {:?}", line));
+            }
+        } else {
+            return Err(anyhow!("Error while parsing: {:?}", line));
+        }
+    }
+
+    // remove input and result file
+    fs::remove_file(in_vcf)?;
+    fs::remove_file(out_vep)?;
+
+    let mut n_not_vep = 0;
+    data.iter_mut().enumerate().for_each(|(i, entry)| {
+        let k = (i + 1) as u64;
+
+        match lines.get(&k) {
+            Some(vep_lines) => {
+                let vep: Vec<VEP> = vep_lines
+                    .iter()
+                    .map(|e| match VEP::from_vep_line(e) {
+                        std::result::Result::Ok(r) => r,
+                        Err(err) => panic!("Error while parsing: {} line: {:?}", err, e),
+                    })
+                    .collect();
+                entry.annotations.push(AnnotationType::VEP(vep.to_vec()));
+            }
+            None => {
+                n_not_vep += 1;
+            }
+        };
+    });
+
+    if n_not_vep > 0 {
+        warn!("{} variants not annotated by VEP", n_not_vep);
+    }
+
+    Ok(())
+}
+
+fn run_vep(in_path: &str, out_path: &str) -> Result<()> {
+    let bin_dir = "/data/tools/ensembl-vep";
+    let dir_cache = "/data/ref/hs1/vepcache/";
+    let fasta = "/data/ref/hs1/chm13v2.0.fa";
+    let gff = "/data/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_sorted.gff3.gz";
+    // let gff = "/data/ref/hs1/ncbi_dataset/data/GCF_009914755.1/genomic_chr_sorted.gff.gz";
+
+    // info!("Running VEP for {}", in_path);
+    let mut cmd = Command::new(format!("{}/vep", bin_dir))
+        .arg("--dir_cache")
+        .arg(dir_cache)
+        .arg("--cache")
+        .arg("--offline")
+        .arg("--fasta")
+        .arg(fasta)
+        .arg("--gff")
+        .arg(gff)
+        .arg("--symbol")
+        .arg("--plugin")
+        .arg("SpliceRegion")
+        .arg("--plugin")
+        .arg("Downstream")
+        .arg("--hgvs")
+        .arg("-i")
+        .arg(in_path)
+        .arg("-o")
+        .arg(out_path)
+        .stderr(Stdio::piped())
+        // .stderr(Stdio::null())
+        .spawn()
+        .expect("VEP failed to start");
+        // .stderr
+        // .ok_or_else(|| std::io::Error::new(std::io::ErrorKind::Other, "Could not capture standard output.")).unwrap();
+
+    let stderr = cmd.stderr.take().unwrap();
+    let reader = BufReader::new(stderr);
+    reader
+        .lines()
+        .filter_map(|line| line.ok())
+        .filter(|line| line.find("error").is_some())
+        .for_each(|line| warn!("{}", line));
+
+    cmd.wait()?;
+    Ok(())
+}
+
+pub fn get_best_vep(d: &Vec<VEP>) -> Result<VEP> {
+    if d.len() == 0 {
+        return Err(anyhow!("No element in VEP vector"));
+    }
+    if d.len() == 1 {
+        return Ok(d.get(0).unwrap().clone());
+    }
+
+    let mut parsed: Vec<(usize, NCBIAcc)> = Vec::new();
+    for (i, vep) in d.iter().enumerate() {
+        if let Some(feat) = &vep.feature {
+            if let std::result::Result::Ok(f) = feat
+                .parse::<NCBIAcc>()
+                .context("Error parsing NCBI accession")
+            {
+                parsed.push((i, f));
+            } else {
+                warn!("Can't parse {}", feat);
+            }
+        }
+    }
+
+    parsed.sort_by(|(_, a), (_, b)| a.number.cmp(&b.number));
+
+    let nm: Vec<(usize, NCBIAcc)> = parsed
+        .clone()
+        .into_iter()
+        .filter(|(_, e)| e.prefix == "NM".to_string())
+        .collect();
+
+    if nm.len() > 0 {
+        let (k, _) = nm.get(0).unwrap();
+        return Ok(d.get(*k).unwrap().clone());
+    } else {
+        let (k, _) = parsed.get(0).unwrap();
+        return Ok(d.get(*k).unwrap().clone());
+    }
+}

+ 63 - 0
src/callers/clairs.rs

@@ -0,0 +1,63 @@
+use serde::{Deserialize, Serialize};
+use std::str::FromStr;
+
+pub type ClairSInfo = String;
+
+#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
+pub struct ClairSFormat {
+    pub gt: String,
+    pub gq: u32,
+    pub dp: u32,
+    pub af: f64,
+    pub ad: Vec<u32>,
+    pub naf: f64,
+    pub ndp: u32,
+    pub nad: Vec<u32>,
+    pub au: u32,
+    pub cu: u32,
+    pub gu: u32,
+    pub tu: u32,
+    pub nau: u32,
+    pub ncu: u32,
+    pub ngu: u32,
+    pub ntu: u32,
+}
+
+impl FromStr for ClairSFormat {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> anyhow::Result<Self> {
+        let vals = s.split(':').collect::<Vec<&str>>();
+
+        Ok(ClairSFormat {
+            gt: vals.get(0).unwrap().to_string(),
+            gq: vals.get(1).unwrap().parse()?,
+            dp: vals.get(2).unwrap().parse()?,
+            af: vals.get(3).unwrap().parse()?,
+            ad: vals
+                .get(4)
+                .unwrap()
+                .split(',')
+                .map(|e| e.parse().unwrap())
+                .collect(),
+            naf: vals.get(5).unwrap().parse()?,
+            ndp: vals.get(6).unwrap().parse()?,
+            nad: vals
+                .get(7)
+                .unwrap()
+                .split(',')
+                .map(|e| e.parse().unwrap())
+                .collect(),
+            au: vals.get(8).unwrap().parse()?,
+            cu: vals.get(9).unwrap().parse()?,
+            gu: vals.get(10).unwrap().parse()?,
+            tu: vals.get(11).unwrap().parse()?,
+            nau: vals.get(12).unwrap().parse()?,
+            ncu: vals.get(13).unwrap().parse()?,
+            ngu: vals.get(14).unwrap().parse()?,
+            ntu: vals.get(15).unwrap().parse()?,
+        })
+    }
+}
+
+

+ 63 - 0
src/callers/deepvariant.rs

@@ -0,0 +1,63 @@
+use serde::{Deserialize, Serialize};
+use std::str::FromStr;
+
+pub type DeepVariantInfo = String;
+
+#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)]
+pub struct DeepVariantFormat {
+    pub gt: String,
+    pub gq: u32,
+    pub dp: u32,
+    pub ad: Vec<u32>,
+    pub vaf: f32,
+    pub pl: Vec<u32>,
+}
+
+impl FromStr for DeepVariantFormat {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> anyhow::Result<Self> {
+        let vals = s.split(':').collect::<Vec<&str>>();
+
+        Ok(DeepVariantFormat {
+            gt: vals.get(0).unwrap().to_string(),
+            gq: vals.get(1).unwrap().parse().unwrap(),
+            dp: vals.get(2).unwrap().parse().unwrap(),
+            ad: vals
+                .get(3)
+                .unwrap()
+                .split(',')
+                .map(|e| e.parse().unwrap())
+                .collect(),
+            vaf: vals.get(4).unwrap().parse().unwrap(),
+            pl: vals
+                .get(5)
+                .unwrap()
+                .split(',')
+                .map(|e| e.parse().unwrap())
+                .collect(),
+        })
+    }
+}
+
+impl ToString for DeepVariantFormat {
+    fn to_string(&self) -> String {
+        vec![
+            self.gt.to_string(),
+            self.gq.to_string(),
+            self.dp.to_string(),
+            self.ad
+                .iter()
+                .map(|e| e.to_string())
+                .collect::<Vec<String>>()
+                .join(","),
+            self.vaf.to_string(),
+            self.pl
+                .iter()
+                .map(|e| e.to_string())
+                .collect::<Vec<String>>()
+                .join(","),
+        ]
+        .join(":")
+    }
+}

+ 4 - 0
src/callers/mod.rs

@@ -0,0 +1,4 @@
+pub mod deepvariant;
+pub mod clairs;
+pub mod sniffles;
+pub mod nanomonsv;

+ 129 - 0
src/callers/nanomonsv.rs

@@ -0,0 +1,129 @@
+use std::str::FromStr;
+use anyhow::{anyhow, Ok, Result};
+use hashbrown::HashMap;
+use serde::{Serialize, Deserialize};
+
+#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
+pub struct NanomonsvFormat {
+    pub tr: u32,
+    pub vr: u32,
+}
+
+impl FromStr for NanomonsvFormat {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<Self> {
+        let elements = s.split(":").collect::<Vec<&str>>();
+
+        if elements.len() != 2 {
+            return Err(anyhow!(
+                "Error of NanomonsvFormat expecting 2 tokens while parsing: {}",
+                s
+            ));
+        }
+
+        let tr = elements[0].parse()?;
+        let vr = elements[1].parse()?;
+
+        Ok(NanomonsvFormat { tr, vr })
+    }
+}
+
+impl ToString for NanomonsvFormat {
+    fn to_string(&self) -> String {
+        format!("{}:{}", self.tr, self.vr)
+    }
+}
+
+#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
+pub struct NanomonsvInfo {
+    pub svtype: String,
+    pub svlen: Option<i32>,
+    pub end: Option<u32>,
+    pub svinslen: Option<u32>,
+    pub svinsseq: Option<String>,
+    pub mateid: Option<String>,
+}
+
+impl FromStr for NanomonsvInfo {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<Self> {
+        let err = |c| anyhow!("Error {} parsing NanomonsvInfo {}", c, s);
+
+        let elements = s.split(";").collect::<Vec<&str>>();
+
+        if elements.len() < 1 {
+            return Err(err("unexpected Info length"));
+        }
+
+        let mut kv = HashMap::new();
+
+        for e in elements.iter() {
+            let (k, v) = e.split_once("=").ok_or(err("split ="))?;
+            if !kv.insert(k, v).is_none() {
+                return Err(err("kv insert"));
+            };
+        }
+
+        let svlen = match kv.get("SVLEN") {
+            Some(v) => Some(v.parse()?),
+            None => None,
+        };
+
+        let end = match kv.get("END") {
+            Some(v) => Some(v.parse()?),
+            None => None,
+        };
+
+        let svinslen = match kv.get("SVINSLEN") {
+            Some(v) => Some(v.parse()?),
+            None => None,
+        };
+
+        let svinsseq = match kv.get("SVINSSEQ") {
+            Some(v) => Some(v.to_string()),
+            None => None,
+        };
+
+        let mateid = match kv.get("MATEID") {
+            Some(v) => Some(v.to_string()),
+            None => None,
+        };
+
+        Ok(NanomonsvInfo {
+            svtype: kv.get("SVTYPE").ok_or(err("SVTYPE"))?.to_string(),
+            svlen,
+            end,
+            svinslen,
+            svinsseq,
+            mateid,
+        })
+    }
+}
+
+impl ToString for NanomonsvInfo {
+    fn to_string(&self) -> String {
+        let mut s = Vec::new();
+
+        s.push(format!("SVTYPE={}", self.svtype));
+        if let Some(v) = self.svlen {
+            s.push(format!("SVLEN={}", v));
+        }
+        if let Some(v) = self.end {
+            s.push(format!("END={}", v));
+        }
+        if let Some(v) = self.svinslen {
+            s.push(format!("SVINSLEN={}", v));
+        }
+        if let Some(v) = &self.svinsseq {
+            s.push(format!("SVINSSEQ={}", v));
+        }
+        if let Some(v) = &self.mateid {
+            s.push(format!("MATEID={}", v));
+        }
+
+        s.join(";")
+    }
+}
+

+ 179 - 0
src/callers/sniffles.rs

@@ -0,0 +1,179 @@
+use std::str::FromStr;
+use std::string::ToString;
+use anyhow::{anyhow, Ok, Result};
+use hashbrown::{HashSet, HashMap};
+use serde::{Serialize, Deserialize};
+
+#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
+pub struct SnifflesFormat {
+    pub gt: String,
+    pub gq: u32,
+    pub dr: u32,
+    pub dv: u32,
+}
+
+impl FromStr for SnifflesFormat {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<Self> {
+        let elements = s.split(":").collect::<Vec<&str>>();
+
+        if elements.len() != 4 {
+            return Err(anyhow!("Error of SnifflesFormat parsing: {}", s));
+        }
+
+        let gt = elements[0].to_string();
+        let gq = elements[1].parse()?;
+        let dr = elements[2].parse()?;
+        let dv = elements[3].parse()?;
+
+        Ok(SnifflesFormat { gt, gq, dr, dv })
+    }
+}
+
+impl ToString for SnifflesFormat {
+    fn to_string(&self) -> String {
+        format!("{}:{}:{}", self.gt, self.dr, self.dv)
+    }
+}
+
+#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
+pub struct SnifflesInfo {
+    pub tags: Vec<String>,
+    pub svtype: String,
+    pub svlen: Option<i32>,
+    pub end: Option<u32>,
+    pub support: u32,
+    pub rnames: Vec<String>,
+    pub coverage: Vec<u32>,
+    pub strand: String,
+    pub nm: f32,
+    pub af: f32,
+    pub stdev_len: Option<f32>,
+    pub stdev_pos: f32,
+    pub support_long: Option<u32>,
+    pub chr2: Option<String>,
+}
+
+impl FromStr for SnifflesInfo {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<Self> {
+        let err = |c| anyhow!("Error {} parsing SnifflesInfo {}", c, s);
+
+        let elements = s.split(";").collect::<Vec<&str>>();
+
+        if elements.len() < 11 {
+            return Err(err("length"));
+        }
+
+        let mut tags = HashSet::new();
+        let mut kv = HashMap::new();
+
+        for e in elements.iter() {
+            if e.contains("=") {
+                let (k, v) = e.split_once("=").ok_or(err("split ="))?;
+                if !kv.insert(k, v).is_none() {
+                    return Err(err("kv insert"));
+                };
+            } else {
+                tags.insert(e);
+            }
+        }
+
+        let svlen = match kv.get("SVLEN") {
+            Some(v) => Some(v.parse()?),
+            None => None,
+        };
+
+        let end = match kv.get("END") {
+            Some(v) => Some(v.parse()?),
+            None => None,
+        };
+
+        let stdev_len = match kv.get("STDEV_LEN") {
+            Some(v) => Some(v.parse()?),
+            None => None,
+        };
+
+        let support_long = match kv.get("SUPPORT_LONG") {
+            Some(v) => Some(v.parse()?),
+            None => None,
+        };
+
+        let chr2 = match kv.get("CHR2") {
+            Some(v) => Some(v.to_string()),
+            None => None,
+        };
+
+        Ok(SnifflesInfo {
+            tags: tags.into_iter().map(|e| e.to_string()).collect(),
+            svtype: kv.get("SVTYPE").ok_or(err("SVTYPE"))?.parse()?,
+            svlen,
+            end,
+            support: kv.get("SUPPORT").ok_or(err("SUPPORT"))?.parse()?,
+            rnames: kv
+                .get("RNAMES")
+                .ok_or(err("RNAMES"))?
+                .to_string()
+                .split(",")
+                .map(|e| e.to_string())
+                .collect(),
+            coverage: kv
+                .get("COVERAGE")
+                .ok_or(err("COVERAGE"))?
+                .to_string()
+                .split(",")
+                .map(|e| e.parse())
+                .collect::<Result<Vec<u32>, _>>()?,
+            strand: kv.get("STRAND").ok_or(err("STRAND"))?.parse()?,
+            nm: kv.get("NM").ok_or(err("NM"))?.parse()?,
+            af: kv.get("AF").ok_or(err("AF"))?.parse()?,
+            stdev_len,
+            stdev_pos: kv.get("STDEV_POS").ok_or(err("STDEV_POS"))?.parse()?,
+            support_long,
+            chr2,
+        })
+    }
+}
+
+impl ToString for SnifflesInfo {
+    fn to_string(&self) -> String {
+        let mut s = Vec::new();
+
+        s.push(self.tags.join(","));
+        s.push(format!("SVTYPE={}", self.svtype));
+        if let Some(v) = self.svlen {
+            s.push(format!("SVLEN={}", v));
+        }
+        if let Some(v) = self.end {
+            s.push(format!("END={}", v));
+        }
+        s.push(format!("SUPPORT={}", self.support));
+        s.push(format!("RNAMES={}", self.rnames.join(",")));
+        s.push(format!(
+            "COVERAGE={}",
+            self.coverage
+                .iter()
+                .map(|e| e.to_string())
+                .collect::<Vec<String>>()
+                .join(",")
+        ));
+        s.push(format!("STRAND={}", self.strand));
+        s.push(format!("NM={}", self.nm));
+        s.push(format!("AF={}", self.af));
+        if let Some(v) = self.stdev_len {
+            s.push(format!("STDEV_LEN={}", v));
+        }
+        s.push(format!("STDEV_POS={}", self.stdev_pos));
+        if let Some(v) = self.support_long {
+            s.push(format!("SUPPORT_LONG={}", v));
+        }
+        if let Some(v) = &self.chr2 {
+            s.push(format!("CHR2={}", v));
+        }
+
+        s.join(";")
+    }
+}
+

+ 49 - 0
src/config.rs

@@ -0,0 +1,49 @@
+use std::path::PathBuf;
+
+use serde::{Serialize, Deserialize};
+use anyhow::Result;
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct Config {
+    pub db: String,
+    pub reference_fa: String,
+    pub longreads_results_dir: String,
+    pub panel_results_dir: String,
+    pub dict_file: String,
+    pub gff_path: String,
+    pub min_loh_diff: f32,
+    pub deepvariant_loh_pval: f64,
+    pub min_mrd_depth: u32,
+    pub min_diversity: f64,
+    pub vep_chunk_size: usize,
+    pub max_gnomad_af: f64,
+}
+
+impl ::std::default::Default for Config {
+    fn default() -> Self {
+        Self {
+            db: "/data/db_results.sqlite".to_string(),
+            reference_fa: "/data/ref/hs1/chm13v2.0.fa".to_string(),
+            longreads_results_dir: "/data/longreads_basic_pipe".to_string(),
+            panel_results_dir: "/data/oncoT".to_string(),
+            dict_file: "/data/ref/hs1/chm13v2.0.dict".to_string(),
+            gff_path: "/data/ref/hs1/features_not_in_vep.gff.gz".to_string(),
+            min_loh_diff: 0.25,
+            deepvariant_loh_pval: 0.001,
+            min_mrd_depth: 6,
+            min_diversity: 1.6,
+            vep_chunk_size: 1_000,
+            max_gnomad_af: 0.01,
+        }
+    }
+}
+
+impl Config {
+    pub fn get() -> Result<Self> {
+        Ok(confy::load("pandora_lib_variants", None)?)
+    }
+    pub fn path() -> Result<PathBuf> {
+        Ok(confy::get_configuration_file_path("pandora_lib_variants", None)?)
+    } 
+}
+

+ 28 - 0
src/in_out/dict_reader.rs

@@ -0,0 +1,28 @@
+use std::fs;
+
+use csv::ReaderBuilder;
+use anyhow::{ Ok, Result};
+use log::info;
+
+pub fn read_dict(path: &str) -> Result<Vec<(String, u32)>> {
+    info!("Parsing {}", path);
+
+    let mut reader = ReaderBuilder::new()
+        .delimiter(b'\t')
+        .flexible(true)
+        .has_headers(false)
+        .from_reader(fs::File::open(path)?);
+
+    let mut res = Vec::new();
+    for line in reader.records() {
+        let line = line.unwrap();
+        if line.get(0).unwrap() == "@SQ" {
+            res.push((
+                line.get(1).unwrap().replace("SN:", ""),
+                line.get(2).unwrap().replace("LN:", "").parse().unwrap(),
+            ));
+        }
+    }
+    Ok(res)
+}
+

+ 64 - 0
src/in_out/mod.rs

@@ -0,0 +1,64 @@
+use anyhow::{Ok, Result};
+use bgzip::BGZFReader;
+use indicatif::MultiProgress;
+use std::{
+    fs::{File, Metadata},
+    io::BufReader,
+};
+
+use crate::utils::{new_pg_speed, new_pg_bytes};
+
+pub mod dict_reader;
+pub mod vcf_reader;
+pub mod vcf_writer;
+
+pub fn get_reader(path: &str) -> Result<Box<dyn std::io::Read>> {
+    let file_type = *path.split(".").collect::<Vec<&str>>().last().unwrap();
+
+    assert!(file_type == "gz" || file_type == "vcf");
+
+    let raw_reader: Box<dyn std::io::Read> = Box::new(File::open(path)?);
+
+    match file_type {
+        "gz" => {
+            let reader = Box::new(BGZFReader::new(raw_reader)?);
+            Ok(Box::new(BufReader::new(reader)))
+        }
+        "vcf" => {
+            // let reader = Box::new(BzDecoder::new(raw_reader));
+            Ok(Box::new(BufReader::new(raw_reader)))
+        }
+        t => {
+            panic!("unknown file type: {}", t)
+        }
+    }
+}
+pub fn get_reader_progress(
+    path: &str,
+    mp: &MultiProgress,
+) -> Result<Box<dyn std::io::Read>> {
+    let file_type = *path.split(".").collect::<Vec<&str>>().last().unwrap();
+
+    assert!(file_type == "gz" || file_type == "vcf");
+    let file = File::open(path)?;
+    let metadata = file.metadata()?;
+    let pg = mp.add(new_pg_bytes(metadata.len() as u64));
+    pg.set_message(format!("Reading {path}"));
+
+    let raw_reader: Box<dyn std::io::Read> = Box::new(file);
+    let raw_reader = pg.wrap_read(raw_reader);
+
+    match file_type {
+        "gz" => {
+            let reader = Box::new(BGZFReader::new(raw_reader)?);
+            Ok(Box::new(BufReader::new(reader)))
+        }
+        "vcf" => {
+            // let reader = Box::new(BzDecoder::new(raw_reader));
+            Ok(Box::new(BufReader::new(raw_reader)))
+        }
+        t => {
+            panic!("unknown file type: {}", t)
+        }
+    }
+}

+ 199 - 0
src/in_out/vcf_reader.rs

@@ -0,0 +1,199 @@
+use std::{fs, fmt::Write};
+
+use crate::{
+    in_out::get_reader,
+    utils::new_pg_speed,
+    variants::{VCFSource, Variant, VariantType},
+};
+use anyhow::{Ok, Result};
+use csv::ReaderBuilder;
+use indicatif::{MultiProgress, ProgressBar, ProgressStyle, ProgressState};
+use log::{info, warn};
+use rayon::prelude::*;
+
+#[derive(Debug, serde::Deserialize, Eq, PartialEq, Clone)]
+pub struct VCFRow {
+    pub chr: String,
+    pub pos: u32,
+    pub id: String,
+    pub reference: String,
+    pub alt: String,
+    pub qual: String,
+    pub filter: String,
+    pub info: String,
+    pub format: String,
+    pub value: String,
+}
+
+pub fn read_vcf(
+    path: &str,
+    source: &VCFSource,
+    variant_type: &VariantType,
+) -> Result<Vec<Variant>> {
+    info!("Reading VCF {}", path);
+
+    let mut reader = ReaderBuilder::new()
+        .delimiter(b'\t')
+        .comment(Some(b'#'))
+        .has_headers(false)
+        .flexible(true)
+        .from_reader(get_reader(&path)?);
+    let mut iter = reader.deserialize();
+
+    let mut all = Vec::new();
+
+    while let Some(result) = iter.next() {
+        let record: VCFRow = result?;
+
+        // Normalize into multirows
+        if record.alt.contains(",") {
+            let alts = record.alt.split(',').collect::<Vec<&str>>();
+            let n = alts.len();
+
+            let vals = record.value.split(':').collect::<Vec<&str>>();
+            let ads = vals.get(3).unwrap().split(',').collect::<Vec<&str>>();
+            let vafs = vals.get(4).unwrap().split(',').collect::<Vec<&str>>();
+            let pls = vals.get(5).unwrap().split(',').collect::<Vec<&str>>();
+
+            for i in 0..n {
+                let cp = &pls[(i * 3)..(i * 3 + 3)];
+                let nval = format!(
+                    "{}:{}:{}:{}:{}:{}",
+                    vals[0],
+                    vals[1],
+                    vals[2],
+                    vec![ads[0], ads[i + 1]].join(","),
+                    vafs[i],
+                    cp.join(",")
+                );
+
+                let mut rec = record.clone();
+                rec.value = nval.clone();
+                rec.alt = alts[i].to_string();
+
+                all.push(rec);
+            }
+        } else {
+            all.push(record);
+        }
+    }
+
+    info!("{} VCF rows ", all.len());
+    let base_n = "N".to_string();
+    let res: Vec<Variant> = all
+        .par_iter_mut()
+        .map(|row| {
+            // for Sniffles normalize insertion/deletion position (after the pos)
+            if source == &VCFSource::Sniffles {
+                if row.reference == base_n && row.alt.len() > 1 {
+                    row.pos -= 1;
+                }
+            }
+            return Variant::from_vcfrow(row, source.clone(), variant_type.clone()).unwrap();
+        })
+        .filter(|v| {
+            for cd in v.callers_data.iter() {
+                if cd.should_filter() {
+                    return false;
+                }
+            }
+            return true;
+        })
+        .collect();
+
+    Ok(res)
+}
+
+pub fn read_vcf_progress(
+    path: &str,
+    source: &VCFSource,
+    variant_type: &VariantType,
+    mp: MultiProgress,
+) -> Result<Vec<Variant>> {
+    info!("Reading VCF {}", path);
+    let metadata = fs::metadata(path)?;
+
+    let mut downloaded = 0;
+    let total_size = metadata.len();
+
+    let pb = ProgressBar::new(total_size);
+    pb.set_style(ProgressStyle::with_template("{spinner:.green} [{elapsed_precise}] [{wide_bar:.cyan/blue}] {bytes}/{total_bytes} ({eta})")
+        .unwrap()
+        .with_key("eta", |state: &ProgressState, w: &mut dyn Write| write!(w, "{:.1}s", state.eta().as_secs_f64()).unwrap())
+        .progress_chars("=>-"));
+
+    let mut reader = ReaderBuilder::new()
+        .delimiter(b'\t')
+        .comment(Some(b'#'))
+        .has_headers(false)
+        .flexible(true)
+        .from_reader(get_reader(&path)?);
+    let mut iter = reader.deserialize();
+    // let r = iter.reader();
+
+    let mut all = Vec::new();
+    let pg = mp.add(pb);
+    pg.set_message("Reading VCF");
+    while let Some(result) = iter.next() {
+        let record: VCFRow = result?;
+
+        // Normalize into multirows
+        if record.alt.contains(",") {
+            let alts = record.alt.split(',').collect::<Vec<&str>>();
+            let n = alts.len();
+
+            let vals = record.value.split(':').collect::<Vec<&str>>();
+            let ads = vals.get(3).unwrap().split(',').collect::<Vec<&str>>();
+            let vafs = vals.get(4).unwrap().split(',').collect::<Vec<&str>>();
+            let pls = vals.get(5).unwrap().split(',').collect::<Vec<&str>>();
+
+            for i in 0..n {
+                let cp = &pls[(i * 3)..(i * 3 + 3)];
+                let nval = format!(
+                    "{}:{}:{}:{}:{}:{}",
+                    vals[0],
+                    vals[1],
+                    vals[2],
+                    vec![ads[0], ads[i + 1]].join(","),
+                    vafs[i],
+                    cp.join(",")
+                );
+
+                let mut rec = record.clone();
+                rec.value = nval.clone();
+                rec.alt = alts[i].to_string();
+
+                all.push(rec);
+            }
+        } else {
+            all.push(record);
+        }
+        let b = iter.reader().position().byte();
+        pg.set_position(b);
+    }
+
+    info!("{} VCF rows ", all.len());
+    let base_n = "N".to_string();
+    let res: Vec<Variant> = all
+        .par_iter_mut()
+        .map(|row| {
+            // for Sniffles normalize insertion/deletion position (after the pos)
+            if source == &VCFSource::Sniffles {
+                if row.reference == base_n && row.alt.len() > 1 {
+                    row.pos -= 1;
+                }
+            }
+            return Variant::from_vcfrow(row, source.clone(), variant_type.clone()).unwrap();
+        })
+        .filter(|v| {
+            for cd in v.callers_data.iter() {
+                if cd.should_filter() {
+                    return false;
+                }
+            }
+            return true;
+        })
+        .collect();
+
+    Ok(res)
+}

+ 344 - 0
src/in_out/vcf_writer.rs

@@ -0,0 +1,344 @@
+use bgzf::{VirtualPosition, Writer};
+use log::info;
+use noodles_csi::{self as csi, binning_index::index::reference_sequence::bin::Chunk};
+// use noodles_csi::{self as csi, index::reference_sequence::bin::Chunk};
+use noodles_bgzf as bgzf;
+use noodles_tabix as tabix;
+use tabix::index::Indexer;
+
+use std::io::{BufRead, Write};
+use std::{fs::File, io};
+
+use anyhow::{Ok, Result};
+use noodles_vcf::{
+    self as vcf,
+    header::record::value::{
+        map::{Contig, Format},
+        Map,
+    },
+    record::{genotypes::keys::key, Genotypes, Position},
+};
+use vcf::Header;
+
+use crate::in_out::get_reader;
+use crate::{in_out::dict_reader::read_dict, variants::Variant};
+
+fn get_vcf_header(dict_file: &str) -> Result<Header> {
+    // let mut header: Vec<String> = vec!["##fileformat=VCFv4.2".to_string()];
+    //
+    // header.extend(
+    //     read_dict(&dict_file)?
+    //         .iter()
+    //         .map(|(sn, len)| format!("##contig=<ID={},length={}>", sn, len)),
+    // );
+    //
+    // header.push("##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Read depth\">".to_string());
+    // header.push(
+    //     "##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Read depth for each allele\">"
+    //         .to_string(),
+    // );
+    //
+    // header.push(
+    //     vec![
+    //         "#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT", "SAMPLE",
+    //     ]
+    //     .join("\t"),
+    // );
+
+    let mut header = vcf::Header::builder()
+        .add_format(key::READ_DEPTH, Map::<Format>::from(&key::READ_DEPTH))
+        .add_format(key::READ_DEPTHS, Map::<Format>::from(&key::READ_DEPTHS))
+        .add_sample_name("LOH")
+        .build();
+
+    for (ctg, len) in read_dict(&dict_file)? {
+        let mut contig = Map::<Contig>::new();
+        *contig.length_mut() = Some(len as usize);
+        header.contigs_mut().insert(ctg.parse()?, contig);
+
+        // header.add_contig(ctg.parse().unwrap(), contig);
+    }
+    // read_dict(&dict_file)?.iter().for_each(|(ctg, len)| {
+    //     let mut contig = Map::<Contig>::new();
+    //     *contig.length_mut() = Some(*len as usize);
+    //     header.add_contig(ctg.parse().unwrap(), contig);
+    // });
+
+    // header.add_format(key::READ_DEPTH, Map::<Format>::from(&key::READ_DEPTH));
+    // header.formats_mut().insert(key::READ_DEPTH, Map::<Format>::from(&key::READ_DEPTH));
+
+    // header.add_format(key::READ_DEPTHS, Map::<Format>::from(&key::READ_DEPTHS));
+    Ok(header)
+    // Ok(header.join("\n"))
+}
+
+pub fn write_vcf(path: &str, data: &mut Vec<Variant>, dict_file: &str) -> Result<()> {
+    let mut writer = File::create(path).map(bgzf::Writer::new)?;
+    // let mut indexer = csi::binning_index::Indexer::default().set_header(csi::binning_index::index::header::Builder::vcf().build());
+
+    let mut indexer = tabix::index::Indexer::default();
+    indexer.set_header(csi::binning_index::index::header::Builder::vcf().build());
+
+    // indexer.set_header(csi::binning_index::index::header::Builder::vcf())
+    // indexer.set_header(csi::binning_index::index::header::Builder::vcf().build());
+
+    let header = get_vcf_header(dict_file)?;
+    // indexer.set_header(csi::binning_index::index::header::Builder::);
+    let hu = header.to_string();
+    writer.write_all(hu.as_bytes())?;
+
+    let mut start_position = writer.virtual_position();
+    // let mut actual_contig = String::new();
+    // let mut actual_id = 0;
+
+    for (i, row) in data.iter_mut().enumerate() {
+        // if actual_contig != row.contig {
+        //     actual_contig = row.contig.clone();
+        //     actual_id += 1;
+        // }
+        let record = vcf::Record::builder()
+            .set_chromosome(row.contig.parse()?)
+            .set_position(Position::from(row.position as usize))
+            .set_ids(i.to_string().parse()?)
+            .set_reference_bases(format!("{}", row.reference).parse()?)
+            .set_alternate_bases(format!("{}", row.alternative).parse()?)
+            .set_genotypes(Genotypes::parse(&row.to_min_string(), &header)?)
+            .build()?;
+
+        writer.write_all(record.to_string().as_bytes())?;
+        writer.write_all("\n".to_string().as_bytes())?;
+        let end_position = writer.virtual_position();
+
+        let chunk = Chunk::new(start_position, end_position);
+
+        // let reference_sequence_name = record.chromosome().to_string();
+        let start = noodles_core::Position::try_from(usize::from(record.position()))
+            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
+        let end = record
+            .end()
+            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
+            .and_then(|position| {
+                noodles_core::Position::try_from(usize::from(position))
+                    .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
+            })?;
+
+        // indexer.add_record(Some((actual_id - 1, start, end, true)), chunk)?;
+        indexer.add_record(&row.contig, start, end, chunk)?;
+
+        // writer.write_record(&header, &record);
+
+        start_position = end_position;
+    }
+    // let index = indexer.build(read_dict(&dict_file)?.len());
+    let index = indexer.build();
+
+    // let index_file = File::create(&format!("{}.csi", path)).expect("error creating index file");
+    // let mut writer = csi::Writer::new(index_file);
+    // csi::write(&format!("{}.csi", path), &index)?;
+    tabix::write(&format!("{}.tbi", path), &index)?;
+
+    // writer.write_index(&index)?;
+
+    // writeln!(vcf, "{}", get_vcf_header(dict_file)?).unwrap();
+
+    // for (i, row) in data.iter_mut().enumerate() {
+    //     writeln!(
+    //         vcf,
+    //         "{}\t{}\t{}\t{}\t{}\t{}\tPASS\t.\t{}",
+    //         row.contig.to_string(),
+    //         row.position.to_string(),
+    //         i + 1,
+    //         row.reference.to_string(),
+    //         row.alternative.to_string(),
+    //         ".", // qual
+    //         row.to_min_string()
+    //     )?;
+    // }
+    // let index = vcf::index(path)?;
+    Ok(())
+}
+
+pub struct VariantWritter {
+    path: String,
+    writer: Writer<File>,
+    header: Header,
+    indexer: Indexer,
+    start_position: VirtualPosition,
+    id: usize,
+}
+
+impl VariantWritter {
+    pub fn new(path: &str, dict_file: &str) -> Result<Self> {
+        let mut writer = File::create(path).map(bgzf::Writer::new)?;
+        let mut indexer = tabix::index::Indexer::default();
+        indexer.set_header(csi::binning_index::index::header::Builder::vcf().build());
+        let header = get_vcf_header(dict_file)?;
+        let hs = header.to_string();
+        writer.write_all(hs.as_bytes())?;
+
+        let start_position = writer.virtual_position();
+        Ok(Self {
+            path: path.to_string(),
+            writer,
+            header,
+            indexer,
+            start_position,
+            id: 0,
+        })
+    }
+    pub fn write_variant(&mut self, row: &mut Variant) -> Result<()> {
+        let record = vcf::Record::builder()
+            .set_chromosome(row.contig.parse()?)
+            .set_position(Position::from(row.position as usize))
+            // .set_ids(Ids::default())
+            .set_ids(self.id.to_string().parse()?)
+            .set_reference_bases(format!("{}", row.reference).parse()?)
+            .set_alternate_bases(format!("{}", row.alternative).parse()?)
+            .set_genotypes(Genotypes::parse(&row.to_min_string(), &self.header)?)
+            .build()?;
+
+        // self.writer.write
+        self.writer.write(record.to_string().as_bytes()).unwrap();
+        // info!("{:?}", record);
+        self.writer.write("\n".to_string().as_bytes())?;
+        self.writer.flush()?;
+        self.id += 1;
+        let end_position = self.writer.virtual_position();
+
+        let chunk = Chunk::new(self.start_position, end_position);
+
+        // let reference_sequence_name = record.chromosome().to_string();
+        let start = noodles_core::Position::try_from(usize::from(record.position()))
+            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
+        let end = record
+            .end()
+            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
+            .and_then(|position| {
+                noodles_core::Position::try_from(usize::from(position))
+                    .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
+            })?;
+
+        // indexer.add_record(Some((actual_id - 1, start, end, true)), chunk)?;
+        self.indexer.add_record(&row.contig, start, end, chunk)?;
+
+        // writer.write_record(&header, &record);
+
+        self.start_position = end_position;
+        Ok(())
+    }
+    pub fn write_index_finish(&mut self) -> Result<()> {
+        // self.writer.finish();
+        let mut idx = Indexer::default();
+        std::mem::swap(&mut idx, &mut self.indexer);
+        // std::mem::replace(&mut self.indexer, Indexer::default());
+        let index = idx.build();
+        tabix::write(&format!("{}.tbi", &self.path), &index)?;
+
+        Ok(())
+    }
+}
+
+// pub fn write_vcf_rayon(path: &str, data: &mut [Variant], dict_file: &str) -> Result<()> {
+//     let mut writer = File::create(path).map(bgzf::Writer::new)?;
+//     // let mut indexer = csi::binning_index::Indexer::default().set_header(csi::binning_index::index::header::Builder::vcf().build());
+//
+//     let mut indexer = tabix::index::Indexer::default();
+//     indexer.set_header(csi::binning_index::index::header::Builder::vcf().build());
+//
+//     // indexer.set_header(csi::binning_index::index::header::Builder::vcf())
+//     // indexer.set_header(csi::binning_index::index::header::Builder::vcf().build());
+//
+//     let header = get_vcf_header(dict_file)?;
+//     // indexer.set_header(csi::binning_index::index::header::Builder::);
+//     let hu = header.to_string();
+//     writer.write_all(hu.as_bytes())?;
+//
+//     let mut start_position = writer.virtual_position();
+//     // let mut actual_contig = String::new();
+//     // let mut actual_id = 0;
+//
+//     for (i, row) in data.iter_mut().enumerate() {
+//         // if actual_contig != row.contig {
+//         //     actual_contig = row.contig.clone();
+//         //     actual_id += 1;
+//         // }
+//         let record = vcf::Record::builder()
+//             .set_chromosome(row.contig.parse()?)
+//             .set_position(Position::from(row.position as usize))
+//             .set_ids(i.to_string().parse()?)
+//             .set_reference_bases(format!("{}", row.reference).parse()?)
+//             .set_alternate_bases(format!("{}", row.alternative).parse()?)
+//             .set_genotypes(Genotypes::parse(&row.to_min_string(), &header)?)
+//             .build()?;
+//
+//         writer.write_all(record.to_string().as_bytes())?;
+//         writer.write_all("\n".to_string().as_bytes())?;
+//         let end_position = writer.virtual_position();
+//
+//         let chunk = Chunk::new(start_position, end_position);
+//
+//         // let reference_sequence_name = record.chromosome().to_string();
+//         let start = noodles_core::Position::try_from(usize::from(record.position()))
+//             .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
+//         let end = record
+//             .end()
+//             .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
+//             .and_then(|position| {
+//                 noodles_core::Position::try_from(usize::from(position))
+//                     .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
+//             })?;
+//
+//         // indexer.add_record(Some((actual_id - 1, start, end, true)), chunk)?;
+//         indexer.add_record(&row.contig, start, end, chunk)?;
+//
+//         // writer.write_record(&header, &record);
+//
+//         start_position = end_position;
+//     }
+//     // let index = indexer.build(read_dict(&dict_file)?.len());
+//     let index = indexer.build();
+//
+//     // let index_file = File::create(&format!("{}.csi", path)).expect("error creating index file");
+//     // let mut writer = csi::Writer::new(index_file);
+//     // csi::write(&format!("{}.csi", path), &index)?;
+//     tabix::write(&format!("{}.csi", path), &index)?;
+//
+//     // writer.write_index(&index)?;
+//
+//     // writeln!(vcf, "{}", get_vcf_header(dict_file)?).unwrap();
+//
+//     // for (i, row) in data.iter_mut().enumerate() {
+//     //     writeln!(
+//     //         vcf,
+//     //         "{}\t{}\t{}\t{}\t{}\t{}\tPASS\t.\t{}",
+//     //         row.contig.to_string(),
+//     //         row.position.to_string(),
+//     //         i + 1,
+//     //         row.reference.to_string(),
+//     //         row.alternative.to_string(),
+//     //         ".", // qual
+//     //         row.to_min_string()
+//     //     )?;
+//     // }
+//     // let index = vcf::index(path)?;
+//     Ok(())
+// }
+
+pub fn vcf_header_from(path: &str) -> Result<String> {
+    info!("Parsing {} for Header", path);
+
+    let reader = io::BufReader::new(get_reader(&path)?);
+
+    let mut res = Vec::new();
+    for line in reader.lines() {
+        if let std::result::Result::Ok(line) = line {
+            if line.starts_with("#") {
+                res.push(line);
+            } else {
+                break;
+            }
+        }
+    }
+
+    Ok(res.join("\n"))
+}

+ 66 - 0
src/lib.rs

@@ -0,0 +1,66 @@
+pub mod annotations;
+pub mod callers;
+pub mod config;
+pub mod in_out;
+pub mod sql;
+pub mod utils;
+pub mod variants;
+
+#[cfg(test)]
+mod tests {
+    use anyhow::{Ok, Result};
+    use indicatif::MultiProgress;
+    use indicatif_log_bridge::LogWrapper;
+    use log::info;
+
+    use crate::{
+        config::Config,
+        sql::variants_sql::{load_variants_name, remove_variants_names},
+        variants::{VCFSource, Variant, VariantType, Variants},
+    };
+
+    use super::*;
+    #[test]
+    fn get_config() -> Result<()> {
+        let conf_path = Config::path()?;
+        println!("Configuration path {}", conf_path.to_str().unwrap());
+        Ok(())
+    }
+
+    #[test]
+    fn run_pipe() -> Result<()> {
+        let name = "GALLET";
+
+        let logger =
+            env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
+                .build();
+        let multi = MultiProgress::new();
+        LogWrapper::new(multi.clone(), logger).try_init().unwrap();
+
+        variants::run_pipe(name, &multi)?;
+        
+        Ok(())
+    }
+
+    #[test]
+    fn load_from_db() -> Result<()> {
+        let name = "CAMARA";
+        let logger =
+            env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
+                .build();
+        let multi = MultiProgress::new();
+        LogWrapper::new(multi.clone(), logger).try_init().unwrap();
+
+        let mut variants = load_variants_name(name, &multi)?;
+        let v = variants.get_cat(&variants::VariantCategory::Constit);
+        // let v = v.iter().filter(|v| 0.25 < v.vaf.unwrap() && v.vaf.unwrap() < 0.75 )
+        //     .map(|v| {
+        //
+        // }).collect::<Vec<Variant>>();
+        //
+
+        variants.write_vcf_cat("test.vcf.gz", &variants::VariantCategory::Somatic)?;
+        println!("{} variants loaded from db.", variants.len());
+        Ok(())
+    }
+}

+ 2 - 0
src/sql/mod.rs

@@ -0,0 +1,2 @@
+pub mod variants_sql;
+pub mod stats_sql;

+ 47 - 0
src/sql/stats_sql.rs

@@ -0,0 +1,47 @@
+use anyhow::{anyhow, Context, Result};
+use serde::{Deserialize, Serialize};
+use serde_rusqlite::*;
+
+#[derive(Clone, Serialize, Deserialize, Debug, PartialEq)]
+struct StatsSQL {
+    pub key: String,
+    pub value: String,
+}
+
+impl StatsSQL {
+    pub fn insert_into_stats(&self, connection: &rusqlite::Connection) -> Result<()> {
+        let row = self;
+        let r = connection.execute(
+            "INSERT INTO Stats (key, value) VALUES (:key, :value)",
+            to_params_named(&self).unwrap().to_slice().as_slice(),
+        );
+        match r {
+            std::result::Result::Ok(_) => (),
+            Err(r) => panic!("Error with VariantSQL inserting: {} {:?}", r, row),
+        }
+
+        Ok(())
+    }
+}
+pub fn init_stats_table(connection: &rusqlite::Connection) -> Result<usize> {
+    connection
+        .execute(
+            r"
+           CREATE TABLE IF NOT EXISTS Stats (
+               id INTEGER PRIMARY KEY AUTOINCREMENT,
+               key TEXT,
+               value TEXT
+            )",
+            [],
+        )
+        .context(anyhow!("Failed to create TABLE Stats"))
+}
+
+pub fn insert_stats(key: String, value: String, path: &str) -> Result<()> {
+    let connection = rusqlite::Connection::open(&path)?;
+    init_stats_table(&connection)?;
+
+    let v = StatsSQL { key, value };
+    v.insert_into_stats(&connection)?;
+    Ok(())
+}

+ 244 - 0
src/sql/variants_sql.rs

@@ -0,0 +1,244 @@
+use std::str::FromStr;
+
+use anyhow::{anyhow, Context, Ok, Result};
+use indicatif::MultiProgress;
+use serde::{Deserialize, Serialize};
+use serde_rusqlite::*;
+
+use crate::{variants::{
+    AnnotationType, Format, ReferenceAlternative, VCFSource, Variant, Variants
+}, config::Config, utils::new_pg_speed};
+
+#[derive(Clone, Serialize, Deserialize, Debug, PartialEq)]
+pub struct VariantSQL {
+    name: String,
+    contig: String,
+    position: u32,
+    reference: String,
+    alternative: String,
+    m_vaf: f64,
+    callers: String,
+    callers_data: String,
+    variant_type: String,
+    vep: Option<String>,
+    selected_vep: Option<String>,
+    consequence: Option<String>,
+    gene: Option<String>,
+    gene_distance: Option<u32>,
+    hgvs_c: Option<String>,
+    hgvs_p: Option<String>,
+    annotations: String,
+    gnomad_af: Option<f64>,
+    cosmic_n: Option<u64>,
+    ncbi_feature: Option<String>,
+}
+
+impl TryFrom<&VariantSQL> for Variant {
+    type Error = anyhow::Error;
+
+    fn try_from(value: &VariantSQL) -> Result<Self> {
+        let source: Vec<VCFSource> = value
+            .callers
+            .split(",")
+            .map(|e| VCFSource::from_str(e).unwrap())
+            .collect();
+        Ok(Self {
+            contig: value.contig.clone(),
+            position: value.position.clone(),
+            reference: ReferenceAlternative::from_str(&value.reference)?,
+            alternative: ReferenceAlternative::from_str(&value.alternative)?,
+            callers_data: serde_json::from_str(&value.callers_data)?,
+            n_alt: None,
+            n_ref: None,
+            vaf: None,
+            depth: None,
+            variant_type: serde_json::from_str(&value.variant_type)?,
+            source,
+            annotations: serde_json::from_str(&value.annotations)?,
+        })
+    }
+}
+
+impl TryFrom<&Variant> for VariantSQL {
+    type Error = anyhow::Error;
+
+    fn try_from(v: &Variant) -> Result<Self> {
+        let vaf_sum = v.callers_data.iter().map(|e| e.get_vaf()).sum::<f64>();
+        let m_vaf = vaf_sum / v.callers_data.len() as f64;
+
+        // Get the callers name as joined string
+        let mut callers: Vec<String> = v
+            .callers_data
+            .iter()
+            .map(|cd| match cd.format {
+                Format::DeepVariant(_) => "DeepVariant".to_string(),
+                Format::ClairS(_) => "ClairS".to_string(),
+                Format::Sniffles(_) => "Sniffles".to_string(),
+                Format::Nanomonsv(_) => "Nanomonsv".to_string(),
+            })
+            .collect();
+        callers.sort();
+        let callers = callers.join(",");
+
+        // Put the relevant VEP result
+        let (vep, selected_vep, consequence, gene, gene_distance, hgvs_c, hgvs_p) =
+            if let std::result::Result::Ok(best_vep) = v.get_best_vep() {
+                // let best_vep = get_best_vep(&v.vep).context("best vep")?;
+                let csq = if let Some(mut csq) = best_vep.consequence {
+                    csq.sort();
+                    Some(csq.join(","))
+                } else {
+                    None
+                };
+                let extra = best_vep.extra;
+                (
+                    Some(serde_json::to_string(&v.get_veps()).context("vep data")?),
+                    best_vep.feature,
+                    csq,
+                    extra.symbol.clone(),
+                    extra.distance,
+                    extra.hgvs_c,
+                    extra.hgvs_p,
+                )
+            } else {
+                (None, None, None, None, None, None, None)
+            };
+
+        let (gnomad_af, cosmic_n, ncbi_feature) = if v.annotations.len() > 0 {
+            let mut gnomad_af: Option<f64> = None;
+            let mut cosmic_n: Option<u64> = None;
+            let mut ncbi_feature: Option<String> = None;
+            for annot in v.annotations.iter() {
+                match annot {
+                    AnnotationType::Cosmic(c) => cosmic_n = Some(c.cosmic_cnt),
+                    AnnotationType::GnomAD(g) => gnomad_af = Some(g.gnomad_af),
+                    AnnotationType::NCBIGFF(n) => ncbi_feature = Some(n.feature.to_string()),
+                    _ => (),
+                }
+            }
+
+            (gnomad_af, cosmic_n, ncbi_feature)
+        } else {
+            (None, None, None)
+        };
+
+        Ok(VariantSQL {
+            name: "".to_string(),
+            contig: v.contig.to_string(),
+            position: v.position,
+            reference: v.reference.to_string(),
+            alternative: v.alternative.to_string(),
+            m_vaf,
+            callers,
+            callers_data: serde_json::to_string(&v.callers_data).context("callers_data")?,
+            variant_type: serde_json::to_string(&v.variant_type)
+                .context("Error while parsing variant_type from Variant to VariantSQL")?,
+            vep,
+            selected_vep,
+            consequence,
+            gene,
+            gene_distance,
+            hgvs_c,
+            hgvs_p,
+            annotations: serde_json::to_string(&v.annotations).context("annotations")?,
+            gnomad_af,
+            cosmic_n,
+            ncbi_feature,
+        })
+    }
+}
+
+impl VariantSQL {
+    pub fn insert_into_variants(
+        &mut self,
+        connection: &rusqlite::Connection,
+        name: String,
+    ) -> Result<()> {
+        self.name = name.clone();
+
+        let r = connection.execute(
+                "INSERT INTO Variants (name, contig, position, reference, alternative, m_vaf, callers, callers_data, variant_type, vep, selected_vep, annotations, consequence, gene, gene_distance, hgvs_c, hgvs_p, gnomad_af, cosmic_n, ncbi_feature) VALUES (:name, :contig, :position, :reference, :alternative, :m_vaf, :callers, :callers_data, :variant_type, :vep, :selected_vep, :annotations, :consequence, :gene, :gene_distance, :hgvs_c, :hgvs_p, :gnomad_af, :cosmic_n, :ncbi_feature)",
+                to_params_named(&self).unwrap().to_slice().as_slice(),
+            );
+        match r {
+            std::result::Result::Ok(_) => return Ok(()),
+            Err(r) => panic!("Error with VariantSQL inserting: {} {:?}", r, self),
+        }
+    }
+}
+pub fn init_variants_table(connection: &rusqlite::Connection) -> Result<usize> {
+    connection
+        .execute(
+            r"
+           CREATE TABLE IF NOT EXISTS Variants (
+               id INTEGER PRIMARY KEY AUTOINCREMENT,
+               name TEXT,
+               contig TEXT,
+               position INT,
+               reference TEXT,
+               alternative TEXT,
+               m_vaf REAL,
+               callers TEXT,
+               callers_data BLOB,
+               variant_type BLOB,
+               vep BLOB,
+               selected_vep TEXT,
+               annotations BLOB,
+               consequence TEXT,
+               gene TEXT,
+               gene_distance INT,
+               hgvs_c TEXT,
+               hgvs_p TEXT,
+               gnomad_af REAL,
+               cosmic_n INT,
+               ncbi_feature TEXT
+            )",
+            [],
+        )
+        .context(anyhow!("Failed to create TABLE Variants"))
+}
+
+pub fn insert_variants(variants: &Variants, path: &str) -> Result<()> {
+    let connection = rusqlite::Connection::open(&path)?;
+    init_variants_table(&connection)?;
+    let pg = variants.mp.add(new_pg_speed(variants.len() as u64));
+    pg.set_message(format!("Inserting data into DB: {path}"));
+
+    for v in variants.data.iter() {
+        let mut var_sql: VariantSQL = v.try_into()?;
+        var_sql.insert_into_variants(&connection, variants.name.clone())?;
+        pg.inc(1);
+    }
+    pg.finish();
+    Ok(())
+}
+
+pub fn remove_variants_names(db_path: &str, name: &str) -> Result<()> {
+    let connection = rusqlite::Connection::open(&db_path)?;
+    connection.execute("DELETE FROM Variants WHERE name = :name", &[(":name", name)])?;
+    Ok(())
+}
+
+pub fn load_variants_name(name: &str, mp: &MultiProgress) -> Result<Variants> {
+    let cfg = Config::get()?;
+    let connection = rusqlite::Connection::open(&cfg.db)?;
+    let mut stmt = connection.prepare("SELECT * FROM Variants WHERE name = (?1)")?;
+    let rows = stmt.query_and_then([name], |r| {
+        match from_row::<VariantSQL>(r) {
+            std::result::Result::Ok(row) => {
+                match Variant::try_from(&row) {
+                    std::result::Result::Ok(v) => Ok(v),
+                    Err(e) => Err(anyhow!(e)),
+                }
+            },
+            Err(e) => Err(anyhow!(e)),
+        }
+    })?;
+
+    let mut data = Vec::new();
+    for res in rows {
+        data.push(res?);
+    }
+
+    Ok(Variants::from_vec(name.to_string(), mp, data))
+}

+ 221 - 0
src/utils.rs

@@ -0,0 +1,221 @@
+use std::time::Duration;
+
+use anyhow::{Context, Ok, Result};
+use hashbrown::HashMap;
+use indicatif::{ProgressBar, ProgressStyle};
+use statrs::distribution::{ChiSquared, ContinuousCDF};
+
+pub fn chi_square_test_impl(observed: &[f64], expected: &[f64]) -> anyhow::Result<f64> {
+    if observed.len() != expected.len() {
+        return Err(anyhow::anyhow!("Input vectors must have the same length"));
+    }
+
+    // Calculate the chi-squared statistic
+    let chi_squared_statistic: f64 = observed
+        .iter()
+        .zip(expected.iter())
+        .map(|(obs, exp)| ((obs - exp).abs() - 0.5).powi(2) / exp)
+        .sum();
+
+    // Degrees of freedom is the number of categories minus 1
+    // let degrees_of_freedom = (observed.len() - 1) as f64;
+    let degrees_of_freedom = 1.0;
+
+    // Calculate p-value using chi-squared distribution
+    let chi_squared_distribution = ChiSquared::new(degrees_of_freedom).unwrap();
+    let p_value = 1.0 - chi_squared_distribution.cdf(chi_squared_statistic);
+
+    // You can use the p-value to make decisions based on your significance level
+    // For example, with a significance level of 0.05, if p_value < 0.05, reject the null hypothesis
+    Ok(p_value)
+}
+
+/// 2-sample test for equality of proportions with continuity correction
+/// remerciements to chatGPT
+pub fn chi_square_test_for_proportions(
+    success_a: f64,
+    total_a: f64,
+    success_b: f64,
+    total_b: f64,
+) -> anyhow::Result<f64> {
+    let observed_counts = vec![
+        success_a,
+        total_a - success_a,
+        success_b,
+        total_b - success_b,
+    ];
+    let expected_counts = vec![
+        total_a * (success_a + success_b) / (total_a + total_b),
+        total_a * (total_a - success_a + total_b - success_b) / (total_a + total_b),
+        total_b * (success_a + success_b) / (total_a + total_b),
+        total_b * (total_a - success_a + total_b - success_b) / (total_a + total_b),
+    ];
+
+    chi_square_test_impl(&observed_counts, &expected_counts)
+}
+
+pub fn get_hts_nt_pileup(
+    bam: &mut rust_htslib::bam::IndexedReader,
+    chr: &str,
+    start: i32,
+    with_next_ins: bool,
+) -> Result<Vec<u8>> {
+    use rust_htslib::{bam, bam::Read};
+    let stop = start + 1;
+    let mut bases = Vec::new();
+    bam.fetch((chr, start, stop))?;
+    let mut bam_pileup = Vec::new();
+    for p in bam.pileup() {
+        let pileup = p.context(format!(
+            "Can't pilup bam at position {}:{}-{}",
+            chr, start, stop
+        ))?;
+        let position = pileup.pos() as i32;
+        if position == start {
+            for alignment in pileup.alignments() {
+                match alignment.indel() {
+                    bam::pileup::Indel::Ins(_len) => bam_pileup.push(b'I'),
+                    bam::pileup::Indel::Del(_len) => bam_pileup.push(b'D'),
+                    _ => {
+                        let record = alignment.record();
+                        if record.seq_len() > 0 {
+                            if let Some(b) = hts_base_at(&record, start as u32, with_next_ins)? {
+                                bases.push(b);
+                            }
+                        } else {
+                            if alignment.is_del() {
+                                bases.push(b'D');
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    Ok(bases)
+}
+
+pub fn hts_base_at(
+    record: &rust_htslib::bam::record::Record,
+    at_pos: u32,
+    with_next_ins: bool,
+) -> Result<Option<u8>> {
+    use rust_htslib::bam::record::Cigar;
+
+    let cigar = record.cigar();
+    let seq = record.seq();
+    let pos = cigar.pos() as u32;
+
+    let mut read_i = 0u32;
+    let at_pos = at_pos - 1;
+    let mut ref_pos = pos;
+    if ref_pos > at_pos {
+        return Ok(None);
+    }
+
+    for (id, op) in cigar.iter().enumerate() {
+        let (add_read, add_ref) = match *op {
+            Cigar::Match(len) | Cigar::Equal(len) | Cigar::Diff(len) => (len, len),
+            Cigar::Ins(len) => (len, 0),
+            Cigar::Del(len) => (0, len),
+            Cigar::RefSkip(len) => (0, len),
+            Cigar::SoftClip(len) => (len, 0),
+            Cigar::HardClip(_) | Cigar::Pad(_) => (0, 0),
+        };
+        // If at the end of the op len and next op is Ins return I
+        if with_next_ins && ref_pos + add_read == at_pos + 1 {
+            if let Some(Cigar::Ins(_)) = cigar.get(id + 1) {
+                return Ok(Some(b'I'));
+            }
+        }
+
+        if ref_pos + add_ref > at_pos {
+            // Handle deletions directly
+            if let Cigar::Del(_) = *op {
+                return Ok(Some(b'D'));
+            } else if let Cigar::RefSkip(_) = op {
+                return Ok(None);
+            } else {
+                let diff = at_pos - ref_pos;
+                let p = read_i + diff;
+                return Ok(Some(seq[p as usize]));
+            }
+        }
+
+        read_i += add_read;
+        ref_pos += add_ref;
+    }
+    Ok(None)
+}
+
+// thanks to chatGPT (the best)
+pub fn estimate_shannon_entropy(dna_sequence: &str) -> f64 {
+    let m = dna_sequence.len() as f64;
+
+    // Count occurrences of each base
+    let mut bases = HashMap::<char, usize>::new();
+    for base in dna_sequence.chars() {
+        *bases.entry(base).or_insert(0) += 1;
+    }
+
+    // Calculate Shannon entropy
+    let mut shannon_entropy_value = 0.0;
+    for &n_i in bases.values() {
+        let p_i = n_i as f64 / m;
+        shannon_entropy_value -= p_i * p_i.log2();
+    }
+
+    shannon_entropy_value
+}
+
+pub fn print_stat_cat(s: &HashMap<String, u32>, denum: u32) {
+    let denum = denum as f32;
+    let mut v: Vec<(&String, &u32)> = s.iter().map(|e| e).collect();
+    v.sort_by(|(_, a), (_, b)| b.cmp(a));
+
+    let mut table = prettytable::table!(["category", "n", "%"]);
+
+    v.iter().for_each(|(k, v)| {
+        let p = (**v as f32) * 100 as f32 / denum;
+        let p = format!("{:.2}", p);
+        table.add_row([*k, &v.to_string(), &p].into());
+    });
+
+    table.printstd();
+}
+
+pub fn new_pg(len: u64) -> ProgressBar {
+    let sty = ProgressStyle::with_template(
+        " {spinner}  {msg:>7.cyan} [{elapsed_precise}] [{bar:40}] {human_pos:>7}/{human_len:7}",
+    )
+    .unwrap()
+    .progress_chars("=>-");
+    let pg = ProgressBar::new(len);
+    pg.set_style(sty);
+    pg.enable_steady_tick(Duration::from_millis(200));
+    pg
+}
+
+pub fn new_pg_speed(len: u64) -> ProgressBar {
+    let sty = ProgressStyle::with_template(
+        "  {msg:>7.cyan} [{elapsed_precise}] [{bar:40}] {human_pos:>7}/{human_len:7} {per_sec}",
+    )
+    .unwrap()
+    .progress_chars("=>-");
+    let pg = ProgressBar::new(len);
+    pg.set_style(sty);
+    pg.enable_steady_tick(Duration::from_millis(200));
+    pg
+}
+
+pub fn new_pg_bytes(len: u64) -> ProgressBar {
+    let sty = ProgressStyle::with_template(
+        "  {msg:>7.cyan} [{elapsed_precise}] [{bar:40}] {decimal_bytes:>7}/{decimal_total_bytes:7} {decimal_bytes_per_sec}",
+    )
+    .unwrap()
+    .progress_chars("=>-");
+    let pg = ProgressBar::new(len);
+    pg.set_style(sty);
+    pg.enable_steady_tick(Duration::from_millis(200));
+    pg
+}

+ 1483 - 0
src/variants.rs

@@ -0,0 +1,1483 @@
+use crate::{
+    annotations::{
+        cosmic::Cosmic,
+        echtvar::{parse_echtvar_val, run_echtvar},
+        gnomad::GnomAD,
+        ncbi_gff::NCBIGFF,
+        vep::{get_best_vep, vep_chunk, VEP},
+    },
+    callers::{
+        clairs::{ClairSFormat, ClairSInfo},
+        deepvariant::{DeepVariantFormat, DeepVariantInfo},
+        nanomonsv::{NanomonsvFormat, NanomonsvInfo},
+        sniffles::{SnifflesFormat, SnifflesInfo},
+    },
+    config::{self, Config},
+    in_out::{
+        self,
+        dict_reader::read_dict,
+        get_reader,
+        vcf_reader::{read_vcf, VCFRow},
+        vcf_writer::{vcf_header_from, VariantWritter},
+    },
+    sql::{stats_sql::insert_stats, variants_sql::insert_variants},
+    utils::{
+        chi_square_test_for_proportions, estimate_shannon_entropy, get_hts_nt_pileup, new_pg,
+        new_pg_speed, print_stat_cat,
+    },
+};
+use anyhow::{anyhow, Context, Ok, Result};
+use csv::ReaderBuilder;
+use dashmap::DashMap;
+use hashbrown::HashMap;
+use indicatif::{MultiProgress, ParallelProgressIterator};
+use log::{info, warn};
+use noodles_core::{region::Region, Position};
+use noodles_fasta::indexed_reader::Builder as FastaBuilder;
+use noodles_gff as gff;
+
+use rayon::prelude::*;
+use serde::{Deserialize, Serialize};
+use std::io::Write;
+use std::{
+    env::temp_dir,
+    fmt,
+    fs::File,
+    str::FromStr,
+    sync::{
+        atomic::{AtomicI32, Ordering},
+        Arc,
+    },
+};
+
+// chr12:25116542|G>T KRAS
+#[derive(Debug, Clone)]
+pub struct Variants {
+    pub name: String,
+    pub data: Vec<Variant>,
+    pub constit: DashMap<String, Variant>,
+    pub stats_vcf: StatsVCF,
+    pub stats_bam: StatsBAM,
+    pub cfg: Config,
+    pub mp: MultiProgress,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+pub struct StatsVCF {
+    n_tumoral_init: usize,
+    n_constit_init: usize,
+    n_constit: i32,
+    n_loh: i32,
+    n_low_mrd_depth: i32,
+}
+
+impl fmt::Display for StatsVCF {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let k = 100.0 / self.n_tumoral_init as f64;
+        let string = format!(
+            "VCF filters found {} ({:.1}%) constit, {} ({:.1}%) LOH, {} ({:.1}%) Low depth for constit variants",
+            self.n_constit, self.n_constit as f64 * k,
+            self.n_loh, self.n_loh as f64 * k,
+            self.n_low_mrd_depth, self.n_low_mrd_depth as f64 * k
+        );
+        write!(f, "{}", string)
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+pub struct StatsBAM {
+    n_lasting: i32,
+    n_constit: i32,
+    n_low_mrd_depth: i32,
+    n_low_diversity: i32,
+    n_somatic: i32,
+}
+
+impl fmt::Display for StatsBAM {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let k = 100.0 / self.n_lasting as f64;
+        let string = format!(
+            "BAM filters found {} ({:.1}%) constit, {} ({:.1}%) low depth for constit variants, {} ({:.1}%) low diversity of sequence at the variant position, {} ({:.1}%) somatic variants",
+            self.n_constit, self.n_constit as f64 * k,
+            self.n_low_mrd_depth, self.n_low_mrd_depth as f64 * k,
+            self.n_low_diversity, self.n_low_diversity as f64 * k,
+            self.n_somatic, self.n_somatic as f64 * k
+        );
+        write!(f, "{}", string)
+    }
+}
+
+impl Variants {
+    pub fn from_vec(name: String, mp: &MultiProgress, data: Vec<Variant>) -> Self {
+        Self {
+            name,
+            data,
+            constit: DashMap::new(),
+            stats_vcf: StatsVCF::default(),
+            stats_bam: StatsBAM::default(),
+            cfg: Config::get().unwrap(),
+            mp: mp.clone(),
+        }
+    }
+
+    pub fn from_vcfs(
+        name: String,
+        v: Vec<(&str, &VCFSource, &VariantType)>,
+        cfg: &Config,
+        mp: MultiProgress,
+    ) -> Result<Self> {
+        let pg = mp.add(new_pg(v.len() as u64));
+        pg.set_message("Reading VCF");
+
+        let constit: Arc<DashMap<String, Variant>> = Arc::new(DashMap::new());
+        let n_constit = AtomicI32::new(0);
+        let data: Vec<Variant> = v
+            .par_iter()
+            // .progress_count(v.len() as u64)
+            .flat_map(|(path, source, variant_type)| {
+                let r = match variant_type {
+                    VariantType::Somatic => read_vcf(path, source, variant_type).unwrap(),
+                    VariantType::Constitutionnal => {
+                        read_vcf(path, source, variant_type)
+                            .unwrap()
+                            .par_iter()
+                            .for_each(|e| {
+                                n_constit.fetch_add(1, Ordering::SeqCst);
+                                constit.insert(
+                                    format!(
+                                        "{}:{}|{}>{}",
+                                        e.contig, e.position, e.reference, e.alternative
+                                    ),
+                                    e.clone(),
+                                );
+                            });
+                        vec![]
+                    }
+                };
+                pg.inc(1);
+                r
+            })
+            .collect();
+
+        let stats_vcf = StatsVCF::default();
+        let stats_bam = StatsBAM::default();
+
+        let constit = Arc::try_unwrap(constit).unwrap();
+        let elapsed = pg.elapsed();
+        pg.finish();
+        info!("{} variants parsed from somatic VCFs and {} variants positions parsed from constitutionnal VCFs. Executed in {}s", data.len(), constit.len(), elapsed.as_secs());
+        let cfg = cfg.clone();
+
+        return Ok(Self {
+            name,
+            data,
+            constit,
+            stats_vcf,
+            stats_bam,
+            cfg,
+            mp: mp.clone(),
+        });
+    }
+
+    pub fn vcf_filters(&mut self) {
+        let cfg = &self.cfg;
+        let pg = self.mp.add(new_pg_speed(self.data.len() as u64));
+        pg.set_message("VCF filtering");
+
+        let n_tumoral_init = self.len();
+        let n_constit_init = self.constit_len();
+        let min_loh_diff = cfg.deepvariant_loh_pval as f64;
+        let min_mrd_depth = cfg.min_mrd_depth;
+
+        info!("Filtering Constitutionnal (reported variant in constit), LOH (VAF proportion test < {}), LowMRDDepth (< {} in constit) variants by VCF annotations of {} likely somatic variants", min_loh_diff, min_mrd_depth, n_tumoral_init);
+        let n_constit = AtomicI32::new(0);
+        let n_loh = AtomicI32::new(0);
+        let n_low_mrd_depth = AtomicI32::new(0);
+        self.data = self
+            .data
+            .par_iter()
+            .map(|e| {
+                let mut tumoral = e.clone();
+                let k = format!(
+                    "{}:{}|{}>{}",
+                    tumoral.contig, tumoral.position, tumoral.reference, tumoral.alternative
+                );
+
+                if let Some(mut constit) = self.constit.get_mut(&k) {
+                    if constit.get_depth() < min_mrd_depth {
+                        n_low_mrd_depth.fetch_add(1, Ordering::SeqCst);
+                        tumoral.annotations.push(AnnotationType::VariantCategory(
+                            VariantCategory::LowMRDDepth,
+                        ));
+                    } else if constit.get_n_alt() == constit.get_depth()
+                        && tumoral.get_n_alt() == tumoral.get_depth()
+                    {
+                        n_constit.fetch_add(1, Ordering::SeqCst);
+                        tumoral
+                            .annotations
+                            .push(AnnotationType::VariantCategory(VariantCategory::Constit));
+                    } else {
+                        let pval = chi_square_test_for_proportions(
+                            tumoral.get_n_alt() as f64,
+                            tumoral.get_depth() as f64,
+                            constit.get_n_alt() as f64,
+                            constit.get_depth() as f64,
+                        )
+                        .unwrap();
+                        if pval != 0.0 && pval <= min_loh_diff {
+                            n_loh.fetch_add(1, Ordering::SeqCst);
+                            tumoral
+                                .annotations
+                                .push(AnnotationType::VariantCategory(VariantCategory::LOH));
+                        } else {
+                            n_constit.fetch_add(1, Ordering::SeqCst);
+                            tumoral
+                                .annotations
+                                .push(AnnotationType::VariantCategory(VariantCategory::Constit));
+                        }
+                    }
+                // If not un Constit registry, ClairS look for VCF constit depth and n_alt
+                } else if let Format::ClairS(format) = &tumoral.callers_data.get(0).unwrap().format
+                {
+                    if format.ndp < min_mrd_depth {
+                        n_low_mrd_depth.fetch_add(1, Ordering::SeqCst);
+                        tumoral.annotations.push(AnnotationType::VariantCategory(
+                            VariantCategory::LowMRDDepth,
+                        ));
+                    } else if let ReferenceAlternative::Nucleotide(alt_base) = &tumoral.alternative
+                    {
+                        let mrd_n_alt = match alt_base {
+                            Base::A => format.nau,
+                            Base::T => format.ntu,
+                            Base::C => format.ncu,
+                            Base::G => format.ngu,
+                            _ => 0,
+                        };
+                        if mrd_n_alt != 0 {
+                            n_constit.fetch_add(1, Ordering::SeqCst);
+                            tumoral
+                                .annotations
+                                .push(AnnotationType::VariantCategory(VariantCategory::Constit));
+                        }
+                    }
+                }
+                pg.inc(1);
+                tumoral
+            })
+            .collect();
+
+        let n_constit = n_constit.load(Ordering::SeqCst);
+        let n_loh = n_loh.load(Ordering::SeqCst);
+        let n_low_mrd_depth = n_low_mrd_depth.load(Ordering::SeqCst);
+
+        self.stats_vcf = StatsVCF {
+            n_tumoral_init,
+            n_constit_init,
+            n_constit,
+            n_loh,
+            n_low_mrd_depth,
+        };
+        // let elapsed = start.elapsed();
+        let elapsed = pg.elapsed();
+        pg.finish();
+        info!("{}. Executed in {}s", self.stats_vcf, elapsed.as_secs());
+    }
+
+    /// Filter variants by reading informations from constist BAM.
+    pub fn bam_filters(&mut self, mrd_bam: &str) {
+        let cfg = &self.cfg;
+        // let start = Instant::now();
+        let pg = self.mp.add(new_pg_speed(self.data.len() as u64));
+        pg.set_message("BAM filtering");
+
+        let min_mrd_depth = cfg.min_mrd_depth;
+        info!("Filtering Constitutionnal (Alt base found in BAM pileup), LowDiversity (sequence +/- 20nt around variant with entropy < {}), LowMRDDepth (BAM pileup depth < {}) variants by BAM pileup fetching of {} likely somatic variants", cfg.min_diversity, min_mrd_depth, self.stats_vcf.n_tumoral_init - (self.stats_vcf.n_constit + self.stats_vcf.n_loh + self.stats_vcf.n_low_mrd_depth) as usize);
+
+        let n_already = AtomicI32::new(0);
+        let n_constit = AtomicI32::new(0);
+        let n_low_mrd_depth = AtomicI32::new(0);
+        let n_low_diversity = AtomicI32::new(0);
+        let n_somatic = AtomicI32::new(0);
+        self.data.par_chunks_mut(10_000).for_each(|chunk| {
+            let mut bam = rust_htslib::bam::IndexedReader::from_path(mrd_bam)
+                .context(anyhow!("Reading {}", mrd_bam))
+                .unwrap();
+            let mut genome_reader = FastaBuilder::default()
+                .build_from_path(&cfg.reference_fa)
+                .unwrap();
+
+            for tumoral in chunk.iter_mut() {
+                pg.inc(1);
+
+                if tumoral.annotations.len() > 0 {
+                    n_already.fetch_add(1, Ordering::SeqCst);
+                    continue;
+                }
+                let (pos, is_ins) = match tumoral.alt_cat() {
+                    AlterationCategory::INS => (tumoral.position, true),
+                    AlterationCategory::DEL => (tumoral.position, false),
+                    _ => (tumoral.position, false),
+                };
+                match get_hts_nt_pileup(
+                    &mut bam,
+                    &tumoral.contig,
+                    pos as i32,
+                    is_ins, // tumoral.position as i32,
+                ) {
+                    std::result::Result::Ok(bases) => {
+                        let depth = bases.len() as u32;
+
+                        if depth < min_mrd_depth {
+                            n_low_mrd_depth.fetch_add(1, Ordering::SeqCst);
+                            tumoral.annotations.push(AnnotationType::VariantCategory(
+                                VariantCategory::LowMRDDepth,
+                            ));
+                        } else {
+                            // Check local diversity
+                            let start =
+                                Position::try_from((tumoral.position - 20) as usize).unwrap();
+                            let end = Position::try_from((tumoral.position + 19) as usize).unwrap();
+                            let r = Region::new(tumoral.contig.to_string(), start..=end);
+                            if let std::result::Result::Ok(reg) = genome_reader.query(&r) {
+                                let s = reg.sequence();
+                                let u = s.as_ref();
+                                let s = String::from_utf8(u.to_vec()).unwrap();
+                                let ent = estimate_shannon_entropy(&s.to_lowercase());
+
+                                if ent < cfg.min_diversity {
+                                    if tumoral.position == 148725437 {
+                                        warn!("POS {}", ent);
+                                    }
+                                    n_low_diversity.fetch_add(1, Ordering::SeqCst);
+                                    tumoral.annotations.push(AnnotationType::VariantCategory(
+                                        VariantCategory::LowDiversity,
+                                    ));
+                                    continue;
+                                }
+                            }
+
+                            // Check if the base is in constitutionnal pileup
+                            if let ReferenceAlternative::Nucleotide(alt_b) = &tumoral.alternative {
+                                let alt_b = alt_b.clone().into_u8();
+                                let n_alt_mrd = bases
+                                    .clone()
+                                    .into_iter()
+                                    .filter(|e| *e == alt_b)
+                                    .collect::<Vec<_>>()
+                                    .len();
+                                if n_alt_mrd > 0 {
+                                    n_constit.fetch_add(1, Ordering::SeqCst);
+                                    tumoral.annotations.push(AnnotationType::VariantCategory(
+                                        VariantCategory::Constit,
+                                    ));
+                                } else {
+                                    n_somatic.fetch_add(1, Ordering::SeqCst);
+                                    tumoral.annotations.push(AnnotationType::VariantCategory(
+                                        VariantCategory::Somatic,
+                                    ));
+                                }
+                            } else if tumoral.is_ins() {
+                                let n_alt_mrd =
+                                    bases.clone().into_iter().filter(|e| *e == b'I').count();
+                                if n_alt_mrd > 0 {
+                                    n_constit.fetch_add(1, Ordering::SeqCst);
+                                    tumoral.annotations.push(AnnotationType::VariantCategory(
+                                        VariantCategory::Constit,
+                                    ));
+                                } else {
+                                    n_somatic.fetch_add(1, Ordering::SeqCst);
+                                    tumoral.annotations.push(AnnotationType::VariantCategory(
+                                        VariantCategory::Somatic,
+                                    ));
+                                }
+                            } else if tumoral.alt_cat() == AlterationCategory::DEL {
+                                let n_alt_mrd =
+                                    bases.clone().into_iter().filter(|e| *e == b'D').count();
+                                if n_alt_mrd > 0 {
+                                    n_constit.fetch_add(1, Ordering::SeqCst);
+                                    tumoral.annotations.push(AnnotationType::VariantCategory(
+                                        VariantCategory::Constit,
+                                    ));
+                                } else {
+                                    n_somatic.fetch_add(1, Ordering::SeqCst);
+                                    tumoral.annotations.push(AnnotationType::VariantCategory(
+                                        VariantCategory::Somatic,
+                                    ));
+                                }
+                            }
+                        }
+                    }
+                    Err(r) => panic!("{}", r),
+                }
+            }
+        });
+        let n_constit = n_constit.load(Ordering::SeqCst);
+        let n_low_mrd_depth = n_low_mrd_depth.load(Ordering::SeqCst);
+        let n_low_diversity = n_low_diversity.load(Ordering::SeqCst);
+        let n_somatic = n_somatic.load(Ordering::SeqCst);
+        let n_lasting = self.data.len() as i32 - n_already.load(Ordering::SeqCst);
+        self.stats_bam = StatsBAM {
+            n_lasting,
+            n_constit,
+            n_low_mrd_depth,
+            n_low_diversity,
+            n_somatic,
+        };
+        let elapsed = pg.elapsed();
+        pg.finish();
+        info!("{}. Executed in {}s", self.stats_vcf, elapsed.as_secs());
+    }
+
+    pub fn get_cat(&mut self, cat: &VariantCategory) -> Vec<Variant> {
+        let pg = self.mp.add(new_pg_speed(self.data.len() as u64));
+        pg.set_message(format!("Get cat {:?}", cat));
+        self.data
+            .par_iter()
+            .progress_with(pg)
+            .flat_map(|e| {
+                if e.annotations
+                    .iter()
+                    .filter(|e| match e {
+                        AnnotationType::VariantCategory(vc) => vc == cat,
+                        _ => false,
+                    })
+                    .count()
+                    > 0
+                {
+                    vec![e.clone()]
+                } else {
+                    vec![]
+                }
+            })
+            .collect::<Vec<Variant>>()
+    }
+
+    pub fn write_vcf_cat(&mut self, path: &str, cat: &VariantCategory) -> Result<()> {
+        info!("Writing VCF {}", path);
+
+        let mut to_write = sort_variants(self.get_cat(cat), &self.cfg.dict_file)?;
+        let pg = self.mp.add(new_pg_speed(to_write.len() as u64));
+        pg.set_message("Writing VCF");
+
+        let mut w = VariantWritter::new(path, &self.cfg.dict_file)?;
+        for row in to_write.iter_mut() {
+            w.write_variant(row)?;
+            pg.inc(1);
+        }
+        w.write_index_finish()?;
+        Ok(())
+    }
+
+    /// Keep variants annotated Somatic
+    pub fn keep_somatics_un(&mut self) {
+        let pg = self.mp.add(new_pg_speed(self.data.len() as u64));
+        pg.set_message("Filtering Variants");
+
+        self.data = self
+            .data
+            .par_iter_mut()
+            .progress_with(pg)
+            .flat_map(|e| {
+                // keep unannotated and somatic
+                if e.annotations
+                    .iter()
+                    .filter(|a| match a {
+                        AnnotationType::VariantCategory(vc) => match vc {
+                            VariantCategory::Somatic => false,
+                            _ => true,
+                        },
+                        _ => false,
+                    })
+                    .count()
+                    == 0
+                {
+                    vec![e]
+                } else {
+                    vec![]
+                }
+            })
+            .map(|e| e.clone())
+            .collect();
+    }
+
+    /// Annotate with VEP
+    pub fn vep(&mut self) {
+        let pg = self.mp.add(new_pg_speed(self.len() as u64));
+        pg.set_message("VEP");
+        self.data
+            .par_chunks_mut(self.cfg.vep_chunk_size)
+            .progress_with(pg)
+            .for_each(|chunks| vep_chunk(chunks).unwrap());
+    }
+
+    /// sort_variants TODO
+    pub fn sort(&mut self) -> Result<()> {
+        let cfg = &self.cfg;
+        self.data = sort_variants(self.data.clone(), &cfg.dict_file)?;
+        Ok(())
+    }
+
+    /// 
+    pub fn merge(&mut self) {
+        let pg = self.mp.add(new_pg_speed(self.len() as u64));
+        pg.set_message("Merging Variants by contig, positions, ref, alt");
+        let hm: DashMap<String, Variant> = DashMap::new();
+        self.data.par_iter().progress_with(pg).for_each(|e| {
+            let k = format!(
+                "{}:{}|{}>{}",
+                e.contig, e.position, e.reference, e.alternative
+            );
+
+            if let Some(mut v) = hm.get_mut(&k) {
+                let v = v.value_mut();
+                e.callers_data.iter().for_each(|cd| {
+                    v.callers_data.push(cd.clone());
+                    v.callers_data.dedup();
+                });
+                v.source.extend(e.source.clone());
+                v.source.dedup();
+            } else {
+                hm.insert(k, e.clone());
+            }
+        });
+        self.data = hm.iter().map(|e| e.value().clone()).collect();
+    }
+
+    pub fn annotate_gff_feature(&mut self, gff_path: &str) -> Result<()> {
+        let gff_path = gff_path.to_string();
+        let len = self.data.len();
+        let pg = self.mp.add(new_pg_speed(self.len() as u64));
+        pg.set_message("GFF Annotate");
+
+        self.data
+            .par_chunks_mut(len / 33)
+            .progress_with(pg)
+            .for_each(|chunk| {
+                let mut reader = File::open(gff_path.to_string())
+                    .map(noodles_bgzf::Reader::new)
+                    .map(gff::Reader::new)
+                    .unwrap();
+
+                let index = noodles_csi::read(format!("{}.csi", gff_path)).unwrap();
+
+                for v in chunk.iter_mut() {
+                    let start = Position::try_from(v.position as usize).unwrap();
+                    let r = Region::new(v.contig.to_string(), start..=start);
+                    if let std::result::Result::Ok(rows) = reader.query(&index, &r.clone()) {
+                        for row in rows {
+                            let ncbi = NCBIGFF::try_from(row.unwrap()).unwrap();
+                            v.annotations.push(AnnotationType::NCBIGFF(ncbi));
+                        }
+                    }
+                }
+            });
+        Ok(())
+    }
+
+    pub fn echtvar_annotate(&mut self, header_path: &str) -> Result<()> {
+        let len = self.len();
+        let header = vcf_header_from(header_path)?;
+        let pg = self.mp.add(new_pg_speed(len as u64));
+        pg.set_message("Echtvar Annotate");
+
+        self.data
+            .par_chunks_mut(len / 33)
+            .progress_with(pg)
+            .for_each(|chunk| {
+                let in_tmp = format!(
+                    "{}/echtvar_in_{}.vcf",
+                    temp_dir().to_str().unwrap(),
+                    uuid::Uuid::new_v4()
+                );
+
+                let out_tmp = format!(
+                    "{}/echtvar_in_{}.vcf.gz",
+                    temp_dir().to_str().unwrap(),
+                    uuid::Uuid::new_v4()
+                );
+                let mut vcf = File::create(&in_tmp).unwrap();
+
+                let _ = writeln!(vcf, "{}", header);
+
+                for (i, row) in chunk.iter().enumerate() {
+                    let _ = writeln!(
+                        vcf,
+                        "{}\t{}\t{}\t{}\t{}\t{}\tPASS\t.\t{}\t{}",
+                        row.contig,
+                        row.position,
+                        i + 1,
+                        row.reference,
+                        row.alternative,
+                        ".",
+                        ".",
+                        "."
+                    );
+                }
+
+                run_echtvar(&in_tmp, &out_tmp).unwrap();
+
+                let mut reader = ReaderBuilder::new()
+                    .delimiter(b'\t')
+                    .has_headers(false)
+                    .comment(Some(b'#'))
+                    .flexible(true)
+                    .from_reader(get_reader(&out_tmp).unwrap());
+
+                // let mut lines: HashMap<u64, Vec<VEPLine>> = HashMap::new();
+                let mut last: usize = 1;
+                for line in reader.deserialize::<VCFRow>() {
+                    if let std::result::Result::Ok(row) = line {
+                        let (cosmic, gnomad) = parse_echtvar_val(&row.info).unwrap();
+                        let id: usize = row.id.parse().unwrap();
+                        if id != last {
+                            panic!("Echtvar output not in input order!");
+                        }
+                        if let Some(c) = cosmic {
+                            chunk[id - 1].annotations.push(AnnotationType::Cosmic(c));
+                        }
+                        if let Some(g) = gnomad {
+                            chunk[id - 1].annotations.push(AnnotationType::GnomAD(g));
+                        }
+                        last += 1;
+                    }
+                }
+            });
+        Ok(())
+    }
+
+    pub fn category_iter(&self, category: &VariantCategory) -> Vec<&Variant> {
+        self.data
+            .par_iter()
+            .filter(|v| {
+                for annotation in v.annotations.iter() {
+                    match annotation {
+                        AnnotationType::VariantCategory(cat) => {
+                            if cat == category {
+                                return true;
+                            }
+                        }
+                        _ => (),
+                    }
+                }
+                return false;
+            })
+            .collect::<Vec<&Variant>>()
+    }
+
+    /// Filter based on GnomAD if gnomad_af < max_gnomad_af
+    pub fn filter_snp(&mut self) -> Result<i32> {
+        let n_snp = AtomicI32::new(0);
+        self.data = self
+            .data
+            .clone()
+            .into_par_iter()
+            .filter(|e| {
+                let mut res = true;
+                e.annotations.iter().for_each(|a| {
+                    match a {
+                        AnnotationType::GnomAD(g) => {
+                            res = g.gnomad_af < self.cfg.max_gnomad_af;
+                        }
+                        _ => (),
+                    };
+                });
+                if !res {
+                    n_snp.fetch_add(1, Ordering::SeqCst);
+                }
+                res
+            })
+            .collect();
+        let n = n_snp.load(Ordering::SeqCst);
+        Ok(n)
+    }
+
+    pub fn len(&self) -> usize {
+        self.data.len()
+    }
+
+    pub fn constit_len(&self) -> usize {
+        self.constit.len()
+    }
+
+    pub fn get_variant(&self, contig: &str, pos: u32) -> Vec<Variant> {
+        self.data
+            .par_iter()
+            .filter(|v| v.contig == contig && v.position == pos)
+            .map(|v| v.clone())
+            .collect()
+    }
+
+    pub fn stats(&self) -> Result<()> {
+        let mut callers_cat = HashMap::new();
+        let mut n_caller_data = 0;
+
+        let mut ncbi_feature = HashMap::new();
+        let mut n_ncbi_feature = 0;
+
+        let mut cosmic_sup_1 = HashMap::new();
+        let mut n_cosmic_sup_1 = 0;
+
+        let mut cons_cat = HashMap::new();
+        let mut n_csq = 0;
+
+        let add_hm = |hm: &mut HashMap<String, u32>, k: &str| {
+            let (_, v) = hm.raw_entry_mut().from_key(k).or_insert(k.to_string(), 1);
+            *v += 1;
+        };
+
+        for ele in self.data.iter() {
+            // Callers
+            let mut callers = Vec::new();
+            for cd in &ele.callers_data {
+                callers.push(
+                    match cd.format {
+                        Format::DeepVariant(_) => "DeepVariant",
+                        Format::ClairS(_) => "ClairS",
+                        Format::Sniffles(_) => "Sniffles",
+                        Format::Nanomonsv(_) => "Nanomonsv",
+                    }
+                    .to_string(),
+                );
+            }
+
+            if callers.len() > 0 {
+                n_caller_data += 1;
+                callers.sort();
+                let k = callers.join(",");
+
+                let (_, v) = callers_cat
+                    .raw_entry_mut()
+                    .from_key(&k)
+                    .or_insert(k.clone(), 1);
+                *v += 1;
+            }
+
+            // Annotations
+            for annot in ele.annotations.iter() {
+                let mut features = Vec::new();
+                let mut cosmic_m1 = false;
+
+                match annot {
+                    AnnotationType::NCBIGFF(ncbi) => {
+                        features.push(ncbi.feature.to_string());
+                    }
+                    AnnotationType::Cosmic(c) => {
+                        if c.cosmic_cnt > 1 {
+                            cosmic_m1 = true;
+                        }
+                    }
+                    _ => (),
+                };
+                if features.len() > 0 {
+                    features.sort();
+                    add_hm(&mut ncbi_feature, &features.join(","));
+                    n_ncbi_feature += 1;
+                }
+
+                if cosmic_m1 {
+                    add_hm(&mut cosmic_sup_1, "Cosmic > 1");
+                    n_cosmic_sup_1 += 1;
+                }
+            }
+
+            // VEP
+            let d: Vec<VEP> = ele
+                .annotations
+                .iter()
+                .flat_map(|e| {
+                    if let AnnotationType::VEP(e) = e {
+                        e.clone()
+                    } else {
+                        vec![]
+                    }
+                })
+                .collect();
+            if let std::result::Result::Ok(vep) = get_best_vep(&d) {
+                if let Some(csq) = vep.consequence {
+                    n_csq += 1;
+                    let csq = csq.join(",");
+                    let (_, v) = cons_cat
+                        .raw_entry_mut()
+                        .from_key(&csq)
+                        .or_insert(csq.clone(), 1);
+                    *v += 1;
+                }
+            }
+        }
+
+        print_stat_cat(&cons_cat, n_csq as u32);
+        print_stat_cat(&ncbi_feature, n_ncbi_feature as u32);
+        print_stat_cat(&cosmic_sup_1, n_cosmic_sup_1 as u32);
+        print_stat_cat(&callers_cat, n_caller_data as u32);
+
+        // let file = File::create(path)?;
+        // let mut writer = BufWriter::new(file);
+        // let tow = Stats::new(
+        //     (n_csq, cons_cat),
+        //     (n_ncbi_feature, ncbi_feature),
+        //     (n_caller_data, callers_cat),
+        //     n_cosmic_sup_1,
+        //     n_total,
+        //     n_constit,
+        //     n_tumoral,
+        //     n_constit_first,
+        //     n_loh_first,
+        //     n_low_mrd_depth_first,
+        //     n_constit_sec,
+        //     n_low_diversity_sec,
+        //     n_low_mrd_depth_sec,
+        //     n_somatic_sec,
+        // );
+        // serde_json::to_writer(&mut writer, &tow)?;
+
+        Ok(())
+    }
+
+    pub fn save_sql(&self, path: &str) -> Result<()> {
+        insert_variants(&self, path)
+    }
+
+    pub fn stats_sql(&self, path: &str) -> Result<()> {
+        insert_stats(
+            "VCF".to_string(),
+            serde_json::to_string(&self.stats_vcf)?,
+            path,
+        )?;
+        insert_stats(
+            "BAM".to_string(),
+            serde_json::to_string(&self.stats_bam)?,
+            path,
+        )?;
+        Ok(())
+    }
+
+    pub fn save_bytes(&self, path: &str) -> Result<()> {
+        let serialized = pot::to_vec(&self.data)?;
+        let mut w = noodles_bgzf::writer::Builder::default().build_with_writer(File::create(path)?);
+        w.write_all(&serialized)?;
+        Ok(())
+    }
+
+    pub fn new_from_bytes(name: &str, path: &str, mp: MultiProgress) -> Result<Self> {
+        info!("Loading variants from: {path}");
+        let r = in_out::get_reader_progress(path, &mp)?;
+
+        let data: Vec<Variant> = pot::from_reader(r)?;
+        Ok(Self {
+            name: name.to_string(),
+            data,
+            constit: DashMap::new(),
+            stats_vcf: StatsVCF::default(),
+            stats_bam: StatsBAM::default(),
+            cfg: Config::get()?,
+            mp,
+        })
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+pub struct Variant {
+    pub contig: String,
+    pub position: u32,
+    pub reference: ReferenceAlternative,
+    pub alternative: ReferenceAlternative,
+    pub callers_data: Vec<CallerData>,
+    pub n_alt: Option<u32>,
+    pub n_ref: Option<u32>,
+    pub vaf: Option<f32>,
+    pub depth: Option<u32>,
+    pub variant_type: VariantType,
+    pub source: Vec<VCFSource>,
+    pub annotations: Vec<AnnotationType>,
+}
+
+#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
+pub struct CallerData {
+    pub qual: Option<f32>,
+    pub format: Format,
+    pub info: Info,
+}
+
+impl CallerData {
+    pub fn get_vaf(&self) -> f64 {
+        match &self.format {
+            Format::DeepVariant(v) => v.vaf as f64,
+            Format::ClairS(v) => v.af,
+            Format::Sniffles(v) => v.dv as f64 / (v.dv as f64 + v.dr as f64),
+            Format::Nanomonsv(v) => v.vr as f64 / v.tr as f64,
+        }
+    }
+    pub fn get_depth(&mut self) -> u32 {
+        match &self.format {
+            Format::DeepVariant(v) => v.dp,
+            Format::ClairS(v) => v.dp,
+            Format::Sniffles(v) => v.dv + v.dr,
+            Format::Nanomonsv(v) => v.tr,
+        }
+    }
+    pub fn get_n_alt(&mut self) -> u32 {
+        match &self.format {
+            Format::DeepVariant(v) => v.ad.get(1).unwrap().to_owned(),
+            Format::ClairS(v) => v.ad.get(1).unwrap().to_owned(),
+            Format::Sniffles(v) => v.dv,
+            Format::Nanomonsv(v) => v.tr - v.vr,
+        }
+    }
+
+    /// Variants filter rules
+    pub fn should_filter(&self) -> bool {
+        if let Info::Sniffles(info) = &self.info {
+            let imprecise = info
+                .tags
+                .iter()
+                .filter(|s| s.to_string() == "IMPRECISE".to_string())
+                .count();
+            let mut n_alt = 0;
+            if let Format::Sniffles(f) = &self.format {
+                n_alt = f.dv;
+            }
+            if imprecise == 0 && n_alt >= 3 {
+                return false;
+            } else {
+                return true;
+            }
+        } else {
+            return false;
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Eq, PartialEq, Deserialize)]
+pub enum VariantType {
+    Somatic,
+    Constitutionnal,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+pub enum VCFSource {
+    DeepVariant,
+    ClairS,
+    Sniffles,
+    Nanomonsv,
+}
+
+impl FromStr for VCFSource {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<Self> {
+        match s {
+            "DeepVariant" => Ok(VCFSource::DeepVariant),
+            "ClairS" => Ok(VCFSource::ClairS),
+            "Sniffles" => Ok(VCFSource::Sniffles),
+            "Nanomonsv" => Ok(VCFSource::Nanomonsv),
+            _ => Err(anyhow!("Error parsing VCFSource")),
+        }
+    }
+}
+
+impl Variant {
+    pub fn from_vcfrow(row: &VCFRow, source: VCFSource, variant_type: VariantType) -> Result<Self> {
+        let callers_data = vec![CallerData {
+            qual: row.qual.parse::<f32>().ok(),
+            info: parse_info(&row.info, &source).context(anyhow!(
+                "Can't parse {:?} info for {}",
+                source,
+                row.info
+            ))?,
+            format: parse_format(&source, &row.value).context(anyhow!(
+                "Can't parse {:?} format for {}",
+                source,
+                row.value
+            ))?,
+        }];
+        Ok(Variant {
+            contig: row.chr.to_string(),
+            position: row.pos,
+            reference: row
+                .reference
+                .parse()
+                .context(anyhow!("Error while parsing {}", row.reference))?,
+            alternative: row
+                .alt
+                .parse()
+                .context(anyhow!("Error while parsing {}", row.alt))?,
+            n_ref: None,
+            n_alt: None,
+            vaf: None,
+            depth: None,
+            callers_data,
+            source: vec![source],
+            variant_type,
+            annotations: Vec::new(),
+        })
+    }
+
+    pub fn get_depth(&mut self) -> u32 {
+        if let Some(depth) = self.depth {
+            return depth;
+        } else {
+            let depth = self
+                .callers_data
+                .iter_mut()
+                .map(|v| v.get_depth())
+                .max()
+                .unwrap();
+            self.depth = Some(depth);
+            return depth;
+        }
+    }
+
+    pub fn get_n_alt(&mut self) -> u32 {
+        if let Some(n_alt) = self.n_alt {
+            return n_alt;
+        } else {
+            let n_alt = self
+                .callers_data
+                .iter_mut()
+                .map(|v| v.get_n_alt())
+                .max()
+                .unwrap();
+            self.n_alt = Some(n_alt);
+            return n_alt;
+        }
+    }
+
+    pub fn vaf(&mut self) -> f64 {
+        let n_alt = self.get_n_alt() as f64;
+        let depth = self.get_depth() as f64;
+        n_alt / depth
+    }
+
+    fn is_ins(&self) -> bool {
+        match (&self.reference, &self.alternative) {
+            (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotides(_)) => true,
+            _ => false,
+        }
+    }
+
+    fn alt_cat(&self) -> AlterationCategory {
+        match (&self.reference, &self.alternative) {
+            (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotide(_)) => {
+                AlterationCategory::SNV
+            }
+            (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotides(_)) => {
+                AlterationCategory::INS
+            }
+            (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Unstructured(_)) => {
+                AlterationCategory::Other
+            }
+            (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Nucleotide(_)) => {
+                AlterationCategory::DEL
+            }
+            (ReferenceAlternative::Nucleotides(a), ReferenceAlternative::Nucleotides(b)) => {
+                let a = a.len();
+                let b = b.len();
+                if a < b {
+                    AlterationCategory::INS
+                } else if a > b {
+                    AlterationCategory::DEL
+                } else {
+                    AlterationCategory::REP
+                }
+            }
+            (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Unstructured(_)) => {
+                AlterationCategory::Other
+            }
+            (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Nucleotide(_)) => {
+                AlterationCategory::Other
+            }
+            (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Nucleotides(_)) => {
+                AlterationCategory::Other
+            }
+            (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Unstructured(_)) => {
+                AlterationCategory::Other
+            }
+        }
+    }
+
+    pub fn to_min_string(&mut self) -> String {
+        let depth = self.get_depth();
+        let n_alt = self.get_n_alt();
+
+        format!(
+            "DP:AD\t{}:{}",
+            depth,
+            vec![(depth - n_alt).to_string(), n_alt.to_string()].join(",")
+        )
+    }
+
+    pub fn get_veps(&self) -> Vec<VEP> {
+        self.annotations
+            .iter()
+            .flat_map(|e| {
+                if let AnnotationType::VEP(e) = e {
+                    e.clone()
+                } else {
+                    vec![]
+                }
+            })
+            .collect()
+    }
+    pub fn get_best_vep(&self) -> Result<VEP> {
+        get_best_vep(&self.get_veps())
+    }
+}
+#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
+enum AlterationCategory {
+    SNV,
+    INS,
+    DEL,
+    REP,
+    Other,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+pub enum AnnotationType {
+    VariantCategory(VariantCategory),
+    VEP(Vec<VEP>),
+    Cluster(i32),
+    Cosmic(Cosmic),
+    GnomAD(GnomAD),
+    NCBIGFF(NCBIGFF),
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+pub enum VariantCategory {
+    Somatic,
+    LowMRDDepth,
+    LOH,
+    Constit,
+    LowDiversity,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
+pub enum ReferenceAlternative {
+    Nucleotide(Base),
+    Nucleotides(Vec<Base>),
+    Unstructured(String),
+}
+
+impl FromStr for ReferenceAlternative {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<Self> {
+        let mut possible_bases = s.as_bytes().iter();
+        let mut res: Vec<Base> = Vec::new();
+        while let Some(&base) = possible_bases.next() {
+            match base.try_into() {
+                std::result::Result::Ok(b) => res.push(b),
+                Err(_) => {
+                    return Ok(Self::Unstructured(s.to_string()));
+                }
+            }
+        }
+
+        if res.len() == 1 {
+            return Ok(Self::Nucleotide(res.pop().unwrap()));
+        } else {
+            return Ok(Self::Nucleotides(res));
+        }
+    }
+}
+
+impl fmt::Display for ReferenceAlternative {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let string = match self {
+            ReferenceAlternative::Nucleotide(b) => b.to_string(),
+            ReferenceAlternative::Nucleotides(bases) => bases
+                .iter()
+                .fold(String::new(), |acc, e| format!("{}{}", acc, e.to_string())),
+            ReferenceAlternative::Unstructured(s) => s.to_string(),
+        };
+        write!(f, "{}", string)
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
+pub enum Base {
+    A,
+    T,
+    C,
+    G,
+    N,
+}
+
+impl TryFrom<u8> for Base {
+    type Error = anyhow::Error;
+    fn try_from(base: u8) -> Result<Self> {
+        match base {
+            b'A' => Ok(Base::A),
+            b'T' => Ok(Base::T),
+            b'C' => Ok(Base::C),
+            b'G' => Ok(Base::G),
+            b'N' => Ok(Base::N),
+            _ => Err(anyhow!(
+                "Unknown base: {}",
+                String::from_utf8_lossy(&vec![base])
+            )),
+        }
+    }
+}
+
+impl Base {
+    pub fn into_u8(self) -> u8 {
+        return match self {
+            Base::A => b'A',
+            Base::T => b'T',
+            Base::C => b'C',
+            Base::G => b'G',
+            Base::N => b'N',
+        };
+    }
+}
+
+impl fmt::Display for Base {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        // Use `self.number` to refer to each positional data point.
+        let str = match self {
+            Base::A => "A",
+            Base::T => "T",
+            Base::C => "C",
+            Base::G => "G",
+            Base::N => "N",
+        };
+        write!(f, "{}", str)
+    }
+}
+
+#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
+pub enum Format {
+    DeepVariant(DeepVariantFormat),
+    ClairS(ClairSFormat),
+    Sniffles(SnifflesFormat),
+    Nanomonsv(NanomonsvFormat),
+}
+
+#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
+pub enum Info {
+    DeepVariant(DeepVariantInfo),
+    ClairS(ClairSInfo),
+    Sniffles(SnifflesInfo),
+    Nanomonsv(NanomonsvInfo),
+}
+
+fn parse_info(s: &str, source: &VCFSource) -> Result<Info> {
+    match source {
+        VCFSource::DeepVariant => Ok(Info::DeepVariant(s.parse()?)),
+        VCFSource::ClairS => Ok(Info::ClairS(s.parse()?)),
+        VCFSource::Sniffles => Ok(Info::Sniffles(s.parse()?)),
+        VCFSource::Nanomonsv => Ok(Info::Nanomonsv(s.parse()?)),
+    }
+}
+
+fn parse_format(vcf_source: &VCFSource, data: &str) -> Result<Format> {
+    let res = match vcf_source {
+        VCFSource::DeepVariant => Format::DeepVariant(data.parse()?),
+        VCFSource::ClairS => Format::ClairS(data.parse()?),
+        VCFSource::Sniffles => Format::Sniffles(data.parse()?),
+        VCFSource::Nanomonsv => Format::Nanomonsv(data.parse()?),
+    };
+    Ok(res)
+}
+
+pub fn sort_variants(d: Vec<Variant>, dict_path: &str) -> Result<Vec<Variant>> {
+    info!("Sorting {} entries", d.len());
+    let dict = read_dict(dict_path)?;
+
+    let mut store: HashMap<String, Vec<Variant>> = HashMap::new();
+
+    // add to store
+    d.iter().for_each(|e| {
+        if let Some(vec) = store.get_mut(&e.contig) {
+            vec.push(e.clone());
+        } else {
+            store.insert(e.contig.to_string(), vec![e.clone()]);
+        }
+    });
+
+    // sort in each contig
+    store
+        .iter_mut()
+        .for_each(|(_, vec)| vec.sort_by(|a, b| a.position.partial_cmp(&b.position).unwrap()));
+
+    // return contig in the order of dict file
+    Ok(dict
+        .iter()
+        .flat_map(|(chr, _)| {
+            if let Some((_, vec)) = store.remove_entry(chr) {
+                vec
+            } else {
+                vec![]
+            }
+        })
+        .collect())
+}
+
+pub fn run_pipe(name: &str, multi: &MultiProgress) -> Result<()> {
+    let cfg = config::Config::get()?;
+    let deepvariant_diag_vcf = format!(
+        "{}/{name}/diag/DeepVariant/{name}_diag_DeepVariant_PASSED.vcf.gz",
+        cfg.longreads_results_dir
+    );
+    if !std::path::Path::new(&deepvariant_diag_vcf).exists() {
+        panic!("{deepvariant_diag_vcf} is required")
+    }
+    let deepvariant_mrd_vcf = format!(
+        "{}/{name}/mrd/DeepVariant/{name}_mrd_DeepVariant_PASSED.vcf.gz",
+        cfg.longreads_results_dir
+    );
+    if !std::path::Path::new(&deepvariant_mrd_vcf).exists() {
+        panic!("{deepvariant_mrd_vcf} is required")
+    }
+    let mrd_bam = format!(
+        "{}/{name}/mrd/{name}_mrd_hs1.bam",
+        cfg.longreads_results_dir
+    );
+    if !std::path::Path::new(&mrd_bam).exists() {
+        panic!("{mrd_bam} is required")
+    }
+    let clairs_vcf = format!(
+        "{}/{name}/diag/ClairS/{name}_diag_clairs_PASSED.vcf.gz",
+        cfg.longreads_results_dir
+    );
+    if !std::path::Path::new(&clairs_vcf).exists() {
+        panic!("{clairs_vcf} is required")
+    }
+    let clairs_indels_vcf = format!(
+        "{}/{name}/diag/ClairS/{name}_diag_clairs_indel_PASSED.vcf.gz",
+        cfg.longreads_results_dir
+    );
+    if !std::path::Path::new(&clairs_indels_vcf).exists() {
+        panic!("{clairs_indels_vcf} is required")
+    }
+    let sniffles_vcf = format!(
+        "{}/{name}/diag/Sniffles/{name}_diag_sniffles.vcf",
+        cfg.longreads_results_dir
+    );
+    let sniffles_mrd_vcf = format!(
+        "{}/{name}/mrd/Sniffles/{name}_mrd_sniffles.vcf",
+        cfg.longreads_results_dir
+    );
+    if !std::path::Path::new(&sniffles_vcf).exists() {
+        panic!("{sniffles_vcf} is required")
+    }
+    let nanomonsv_vcf = format!(
+        "{}/{name}/diag/nanomonsv/{name}_diag_nanomonsv_PASSED.vcf.gz",
+        cfg.longreads_results_dir
+    );
+    if !std::path::Path::new(&nanomonsv_vcf).exists() {
+        panic!("{nanomonsv_vcf} is required")
+    }
+
+    // let db_path = "/data/db_results.sqlite".to_string();
+    // `${data_dir}/${name}/diag/${name}_variants.sqlite`
+    let db_path = format!(
+        "{}/{name}/diag/{name}_variants.sqlite",
+        cfg.longreads_results_dir
+    );
+    let bytes_path = format!(
+        "{}/{name}/diag/{name}_variants.bytes.gz",
+        cfg.longreads_results_dir
+    );
+
+    let loh_path = format!(
+        "{}/{name}/diag/{name}_loh.vcf.gz",
+        cfg.longreads_results_dir
+    );
+    // let db_constit_path = format!(
+    //     "{}/{name}/diag/{name}_constit.sqlite",
+    //     cfg.longreads_results_dir
+    // );
+    let bytes_constit_path = format!(
+        "{}/{name}/diag/{name}_constit.bytes.gz",
+        cfg.longreads_results_dir
+    );
+
+    let sources = vec![
+        (
+            deepvariant_diag_vcf.as_str(),
+            &VCFSource::DeepVariant,
+            &VariantType::Somatic,
+        ),
+        (
+            deepvariant_mrd_vcf.as_str(),
+            &VCFSource::DeepVariant,
+            &VariantType::Constitutionnal,
+        ),
+        (
+            clairs_vcf.as_str(),
+            &VCFSource::ClairS,
+            &VariantType::Somatic,
+        ),
+        (
+            sniffles_vcf.as_str(),
+            &VCFSource::Sniffles,
+            &VariantType::Somatic,
+        ),
+        (
+            sniffles_mrd_vcf.as_str(),
+            &VCFSource::Sniffles,
+            &VariantType::Constitutionnal,
+        ),
+        (
+            nanomonsv_vcf.as_str(),
+            &VCFSource::Nanomonsv,
+            &VariantType::Somatic,
+        ),
+    ];
+    let mut variants = Variants::from_vcfs(name.to_string(), sources, &cfg, multi.clone())?;
+
+    variants.vcf_filters();
+    variants.write_vcf_cat(&loh_path, &VariantCategory::LOH)?;
+    variants.bam_filters(&mrd_bam);
+
+    let constits = variants.get_cat(&VariantCategory::Constit);
+    let constits = Variants::from_vec(name.to_string(), &multi, constits);
+    // constits.save_sql(&db_constit_path)?;
+    constits.save_bytes(&bytes_constit_path)?;
+
+    variants.keep_somatics_un();
+    info!("Variants retained: {}", variants.len());
+
+    variants.merge();
+    variants.sort()?;
+    variants.vep();
+    info!("Variants retained: {}", variants.len());
+
+    variants.annotate_gff_feature(&cfg.gff_path)?;
+    variants.echtvar_annotate(&deepvariant_mrd_vcf)?;
+
+    variants.save_bytes(&bytes_path)?;
+    // variants.filter_snp()?;
+    // variants.stats()?;
+    //
+    // if std::path::Path::new(&db_path).exists() {
+    //     crate::sql::variants_sql::remove_variants_names(&db_path, &name)?;
+    // }
+    //
+    // variants.save_sql(&db_path)?;
+    // variants.stats_sql(&db_path)?;
+    info!("Variants : {}", variants.len());
+
+    Ok(())
+}
+
+// pub fn cluster_variants(d: &mut Vec<Variant>, max_dist: u32) -> i32 {
+//     let mut cluster_id = 0;
+//     let first = d.get(0).unwrap();
+//     let mut last_pos = first.position;
+//     let mut last_contig = first.contig.to_string();
+//
+//     d.iter_mut().for_each(|e| {
+//         if e.contig != last_contig {
+//             cluster_id += 1;
+//             last_contig = e.contig.to_string();
+//         } else if e.position - last_pos > max_dist {
+//             cluster_id += 1;
+//         }
+//         e.annotations.push(AnnotationType::Cluster(cluster_id));
+//         last_pos = e.position;
+//     });
+//
+//     cluster_id
+// }