Browse Source

scan outliers

Thomas 1 year ago
commit
e74f938e84
5 changed files with 1415 additions and 0 deletions
  1. 1 0
      .gitignore
  2. 819 0
      Cargo.lock
  3. 11 0
      Cargo.toml
  4. 327 0
      src/bin.rs
  5. 257 0
      src/lib.rs

+ 1 - 0
.gitignore

@@ -0,0 +1 @@
+/target

+ 819 - 0
Cargo.lock

@@ -0,0 +1,819 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "anstream"
+version = "0.6.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a"
+dependencies = [
+ "windows-sys",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8"
+dependencies = [
+ "anstyle",
+ "windows-sys",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.86"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
+
+[[package]]
+name = "bindgen"
+version = "0.69.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0"
+dependencies = [
+ "bitflags",
+ "cexpr",
+ "clang-sys",
+ "itertools",
+ "lazy_static",
+ "lazycell",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash",
+ "shlex",
+ "syn 2.0.72",
+]
+
+[[package]]
+name = "bio-types"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1cc7edd677651969cc262a8dfb870f0c2266c3ceeaf863d742982e39699ff460"
+dependencies = [
+ "derive-new 0.6.0",
+ "lazy_static",
+ "regex",
+ "strum_macros",
+ "thiserror",
+]
+
+[[package]]
+name = "bitflags"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
+
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
+[[package]]
+name = "bzip2-sys"
+version = "0.1.11+1.0.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+]
+
+[[package]]
+name = "cc"
+version = "1.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2aba8f4e9906c7ce3c73463f62a7f0c65183ada1a2d47e397cc8810827f9694f"
+dependencies = [
+ "jobserver",
+ "libc",
+]
+
+[[package]]
+name = "cexpr"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
+dependencies = [
+ "nom",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "clang-sys"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
+dependencies = [
+ "glob",
+ "libc",
+ "libloading",
+]
+
+[[package]]
+name = "cmake"
+version = "0.1.50"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "colorchoice"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0"
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
+
+[[package]]
+name = "curl-sys"
+version = "0.4.73+curl-8.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "450ab250ecf17227c39afb9a2dd9261dc0035cb80f2612472fc0c4aac2dcb84d"
+dependencies = [
+ "cc",
+ "libc",
+ "libz-sys",
+ "openssl-sys",
+ "pkg-config",
+ "vcpkg",
+ "windows-sys",
+]
+
+[[package]]
+name = "custom_derive"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9"
+
+[[package]]
+name = "derive-new"
+version = "0.5.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3418329ca0ad70234b9735dc4ceed10af4df60eff9c8e7b06cb5e520d92c3535"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "derive-new"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d150dea618e920167e5973d70ae6ece4385b7164e0d799fe7c122dd0a5d912ad"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.72",
+]
+
+[[package]]
+name = "either"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
+
+[[package]]
+name = "env_filter"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4f2c92ceda6ceec50f43169f9ee8424fe2db276791afde7b2cd8bc084cb376ab"
+dependencies = [
+ "log",
+ "regex",
+]
+
+[[package]]
+name = "env_logger"
+version = "0.11.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e13fa619b91fb2381732789fc5de83b45675e882f66623b7d8cb4f643017018d"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "env_filter",
+ "humantime",
+ "log",
+]
+
+[[package]]
+name = "form_urlencoded"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456"
+dependencies = [
+ "percent-encoding",
+]
+
+[[package]]
+name = "fs-utils"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6fc7a9dc005c944c98a935e7fd626faf5bf7e5a609f94bc13e42fc4a02e52593"
+dependencies = [
+ "quick-error",
+]
+
+[[package]]
+name = "glob"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "hts-sys"
+version = "2.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e9f348d14cb4e50444e39fcd6b00302fe2ed2bc88094142f6278391d349a386d"
+dependencies = [
+ "bindgen",
+ "bzip2-sys",
+ "cc",
+ "curl-sys",
+ "fs-utils",
+ "glob",
+ "libz-sys",
+ "lzma-sys",
+ "openssl-sys",
+]
+
+[[package]]
+name = "humantime"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
+
+[[package]]
+name = "idna"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6"
+dependencies = [
+ "unicode-bidi",
+ "unicode-normalization",
+]
+
+[[package]]
+name = "ieee754"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9007da9cacbd3e6343da136e98b0d2df013f553d35bdec8b518f07bea768e19c"
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
+
+[[package]]
+name = "itertools"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "jobserver"
+version = "0.1.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "lazycell"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
+
+[[package]]
+name = "libc"
+version = "0.2.155"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
+
+[[package]]
+name = "libloading"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
+dependencies = [
+ "cfg-if",
+ "windows-targets",
+]
+
+[[package]]
+name = "libz-sys"
+version = "1.1.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c15da26e5af7e25c90b37a2d75cdbf940cf4a55316de9d84c679c9b8bfabf82e"
+dependencies = [
+ "cc",
+ "cmake",
+ "libc",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "linear-map"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfae20f6b19ad527b550c223fddc3077a547fc70cda94b9b566575423fd303ee"
+
+[[package]]
+name = "log"
+version = "0.4.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
+
+[[package]]
+name = "lzma-sys"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+]
+
+[[package]]
+name = "memchr"
+version = "2.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
+
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
+[[package]]
+name = "newtype_derive"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac8cd24d9f185bb7223958d8c1ff7a961b74b1953fd05dba7cc568a63b3861ec"
+dependencies = [
+ "rustc_version",
+]
+
+[[package]]
+name = "nom"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
+[[package]]
+name = "openssl-src"
+version = "300.3.1+3.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7259953d42a81bf137fbbd73bd30a8e1914d6dce43c2b90ed575783a22608b91"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "openssl-sys"
+version = "0.9.103"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f9e8deee91df40a943c71b917e5874b951d32a802526c85721ce3b776c929d6"
+dependencies = [
+ "cc",
+ "libc",
+ "openssl-src",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "pandora_lib_scan"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "env_logger",
+ "log",
+ "rayon",
+ "rust-htslib",
+]
+
+[[package]]
+name = "percent-encoding"
+version = "2.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
+
+[[package]]
+name = "pkg-config"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.86"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quick-error"
+version = "1.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
+
+[[package]]
+name = "quote"
+version = "1.0.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rayon"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "regex"
+version = "1.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
+
+[[package]]
+name = "rust-htslib"
+version = "0.47.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41f1796800e73ebb282c6fc5c03f1fe160e867e01114a58a7e115ee3c1d02482"
+dependencies = [
+ "bio-types",
+ "byteorder",
+ "custom_derive",
+ "derive-new 0.5.9",
+ "hts-sys",
+ "ieee754",
+ "lazy_static",
+ "libc",
+ "libz-sys",
+ "linear-map",
+ "newtype_derive",
+ "regex",
+ "thiserror",
+ "url",
+]
+
+[[package]]
+name = "rustc-hash"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+
+[[package]]
+name = "rustc_version"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c5f5376ea5e30ce23c03eb77cbe4962b988deead10910c372b226388b594c084"
+dependencies = [
+ "semver",
+]
+
+[[package]]
+name = "rustversion"
+version = "1.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6"
+
+[[package]]
+name = "semver"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac"
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
+[[package]]
+name = "strum_macros"
+version = "0.26.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "rustversion",
+ "syn 2.0.72",
+]
+
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.72"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "thiserror"
+version = "1.0.63"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.63"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.72",
+]
+
+[[package]]
+name = "tinyvec"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938"
+dependencies = [
+ "tinyvec_macros",
+]
+
+[[package]]
+name = "tinyvec_macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
+
+[[package]]
+name = "unicode-bidi"
+version = "0.3.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
+
+[[package]]
+name = "unicode-normalization"
+version = "0.1.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5"
+dependencies = [
+ "tinyvec",
+]
+
+[[package]]
+name = "url"
+version = "2.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c"
+dependencies = [
+ "form_urlencoded",
+ "idna",
+ "percent-encoding",
+]
+
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"

+ 11 - 0
Cargo.toml

@@ -0,0 +1,11 @@
+[package]
+name = "pandora_lib_scan"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+anyhow = "1.0.86"
+env_logger = "0.11.5"
+log = "0.4.22"
+rayon = "1.10.0"
+rust-htslib = "0.47.0"

+ 327 - 0
src/bin.rs

@@ -0,0 +1,327 @@
+use rayon::prelude::*;
+use std::{collections::HashMap, usize};
+
+use anyhow::Context;
+use rust_htslib::bam::{ext::BamRecordExtensions, record::Aux, Read, Record};
+
+/// Enforce that reads should have unique qnames
+#[derive(Debug)]
+pub struct Bin {
+    pub contig: String,
+    pub start: u32, // 0-based inclusif
+    pub end: u32,
+    pub reads_store: HashMap<Vec<u8>, Record>,
+}
+
+impl Bin {
+    pub fn new(bam_path: &str, contig: &str, start: u32, length: u32) -> anyhow::Result<Self> {
+        let mapq = 50;
+        let mut bam_reader = rust_htslib::bam::IndexedReader::from_path(bam_path)
+            .context(format!("Can't open {}", bam_path))?;
+
+        // Inclusive range
+        let end = start + length - 1;
+
+        // Fetch the required positions
+        bam_reader.fetch((contig, start, end))?;
+
+        let mut reads_store: HashMap<Vec<u8>, Record> = HashMap::new();
+        for read in bam_reader.records() {
+            let record = read.context("Error while parsing record")?;
+            // Skip reads with low mapping quality
+            if record.mapq() < mapq {
+                continue;
+            }
+            reads_store.insert(record.qname().to_vec(), record);
+        }
+        Ok(Bin {
+            contig: contig.to_string(),
+            start,
+            end,
+            reads_store,
+        })
+    }
+
+    pub fn n_reads(&self) -> usize {
+        self.reads_store.len()
+    }
+
+    pub fn n_sa(&self) -> usize {
+        self.reads_store
+            .values()
+            .filter(|record| matches!(record.aux(b"SA"), Ok(Aux::String(_))))
+            .count()
+    }
+
+    pub fn max_start_or_end(&self) -> (u32, usize) {
+        let mut starts: HashMap<u32, usize> = HashMap::new();
+        let mut ends: HashMap<u32, usize> = HashMap::new();
+        self.reads_store.values().for_each(|record| {
+            let reference_start = record.reference_start() as u32;
+            let reference_end = record.reference_end() as u32;
+
+            if reference_start >= self.start && reference_start <= self.end {
+                *starts.entry(reference_start).or_default() += 1;
+            }
+            if reference_end >= self.start && reference_end <= self.end {
+                *ends.entry(reference_end).or_default() += 1;
+            }
+        });
+
+        let max_pos_start = starts.into_iter().max_by_key(|(_, v)| *v);
+        let max_pos_end = ends.into_iter().max_by_key(|(_, v)| *v);
+
+        if let (Some(s), Some(e)) = (max_pos_start, max_pos_end) {
+            if s > e {
+                s
+            } else {
+                e
+            }
+        } else {
+            (0, 0)
+        }
+    }
+
+    // Initiate
+    // let mut reads_starts: Vec<Vec<Record>> = Vec::new();
+    // reads_starts.resize(length as usize, vec![]);
+    // let mut reads_ends: Vec<Vec<Record>> = Vec::new();
+    // reads_ends.resize(length as usize, vec![]);
+    //
+    // for read in bam_reader.records() {
+    //     let record = read.context(format!("Error while parsing record"))?;
+    //     // Skip reads with low mapping quality
+    //     if record.mapq() < mapq {
+    //         continue;
+    //     }
+    //
+    //     let read_start = record.reference_start() as u32;
+    //     let read_end = record.reference_end() as u32;
+    //
+    //     if read_start >= start && read_start < end {
+    //         let index = read_start - start;
+    //         let at_pos = reads_starts.get_mut(index as usize).unwrap();
+    //         at_pos.push(record.clone());
+    //     }
+    //
+    //     if read_end >= start && read_end < end {
+    //         let index = read_end - start;
+    //         let at_pos = reads_ends.get_mut(index as usize).unwrap();
+    //         at_pos.push(record.clone());
+    //     }
+    // }
+
+    // let mut bam_pileup = Vec::new();
+    // for p in bam.pileup() {
+    //     let pileup = p.context(format!(
+    //         "Can't pilup bam at position {}:{}-{}",
+    //         chr, start, stop
+    //     ))?;
+    //     let position = pileup.pos() as i32;
+    //     if position == start {
+    //         for alignment in pileup.alignments() {
+    //             match alignment.indel() {
+    //                 bam::pileup::Indel::Ins(_len) => bam_pileup.push(b'I'),
+    //                 bam::pileup::Indel::Del(_len) => bam_pileup.push(b'D'),
+    //                 _ => {
+    //                     let record = alignment.record();
+    //                     if record.seq_len() > 0 {
+    //                         if let Some(b) = hts_base_at(&record, start as u32, with_next_ins)?
+    //                         {
+    //                             bases.push((record.clone(), b));
+    //                         }
+    //                     } else if alignment.is_del() {
+    //                         bases.push((record.clone(), b'D'));
+    //                     }
+    //                 }
+    //             }
+    //         }
+    //     }
+    // }
+}
+
+pub fn scan(
+    bam_path: &str,
+    contig: &str,
+    start: u32,
+    end: u32,
+    length: u32,
+) -> Vec<(u32, usize, usize, (u32, usize))> {
+    let mut starts = Vec::new();
+    let mut current = start;
+    while current <= end {
+        starts.push(current);
+        current += length;
+    }
+
+    starts
+        .into_par_iter()
+        .filter_map(|start| {
+            if let Ok(bin) = Bin::new(bam_path, contig, start, length) {
+                return Some((start, bin.n_reads(), bin.n_sa(), bin.max_start_or_end()));
+            }
+            None
+        })
+        .collect()
+}
+
+pub fn filter_outliers_z_score(data: &mut Vec<f64>) {
+    let mean = data.iter().copied().sum::<f64>() / data.len() as f64;
+    let variance = data.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / data.len() as f64;
+    let std_dev = variance.sqrt();
+
+    data.retain(|&x| {
+        let z_score = (x - mean) / std_dev;
+        z_score.abs() <= 3.0 // You can adjust the threshold
+    });
+}
+
+pub fn filter_outliers_modified_z_score(data: Vec<f64>, indices: Vec<u32>) -> Vec<u32> {
+    if data.is_empty() {
+        return Vec::new();
+    }
+
+    // Compute median
+    let median = {
+        let mut sorted_data = data.clone();
+        sorted_data.sort_by(|a, b| a.partial_cmp(b).unwrap());
+        let len = sorted_data.len();
+        if len % 2 == 0 {
+            (sorted_data[len / 2 - 1] + sorted_data[len / 2]) / 2.0
+        } else {
+            sorted_data[len / 2]
+        }
+    };
+
+    // Compute Median Absolute Deviation (MAD)
+    let mad = {
+        let deviations: Vec<f64> = data.iter().map(|&x| (x - median).abs()).collect();
+
+        let mut sorted_deviations = deviations.clone();
+        sorted_deviations.sort_by(|a, b| a.partial_cmp(b).unwrap());
+        let len = sorted_deviations.len();
+        if len % 2 == 0 {
+            (sorted_deviations[len / 2 - 1] + sorted_deviations[len / 2]) / 2.0
+        } else {
+            sorted_deviations[len / 2]
+        }
+    };
+
+    // Filter based on Modified Z-Score and keep indices
+    indices
+        .into_iter()
+        .zip(data)
+        .filter(|(_, x)| {
+            let modified_z_score = 0.6745 * (x - median).abs() / mad;
+            modified_z_score <= 1.5 // Threshold for outliers
+        })
+        .map(|(index, _)| index)
+        .collect()
+}
+
+// Function to filter outliers based on Modified Z-Score
+pub fn filter_outliers_modified_z_score_with_indices(
+    data: Vec<f64>,
+    indices: Vec<u32>,
+) -> Vec<u32> {
+    if data.is_empty() {
+        return Vec::new();
+    }
+
+    // Compute median
+    let median = compute_median(&data);
+
+    // Compute Median Absolute Deviation (MAD)
+    let mad = compute_mad(&data, median);
+
+    // Threshold for Modified Z-Score
+    let threshold = 3.5;
+
+    // Filter based on Modified Z-Score and keep indices of outliers
+    indices
+        .into_iter()
+        .zip(data)
+        .filter_map(|(index, x)| {
+            let modified_z_score = 0.6745 * (x - median).abs() / mad;
+            if modified_z_score > threshold {
+                Some(index)
+            } else {
+                None
+            }
+        })
+        .collect()
+}
+
+// Function to compute the median
+fn compute_median(data: &[f64]) -> f64 {
+    let mut sorted_data = data.to_vec();
+    sorted_data.sort_by(|a, b| a.partial_cmp(b).unwrap());
+    let len = sorted_data.len();
+    if len % 2 == 0 {
+        (sorted_data[len / 2 - 1] + sorted_data[len / 2]) / 2.0
+    } else {
+        sorted_data[len / 2]
+    }
+}
+
+// Function to compute the Median Absolute Deviation (MAD)
+fn compute_mad(data: &[f64], median: f64) -> f64 {
+    let deviations: Vec<f64> = data.iter().map(|&x| (x - median).abs()).collect();
+
+    compute_median(&deviations)
+}
+
+pub fn scan_outliers(bam_path: &str, contig: &str, start: u32, end: u32, length: u32) -> Vec<(String, usize, f64, bool, f64, bool)> {
+    let mut starts = Vec::new();
+    let mut current = start;
+    while current <= end {
+        starts.push(current);
+        current += length;
+    }
+
+    let ratios: Vec<(u32, usize, f64, f64)> = starts
+        .into_par_iter()
+        .filter_map(|start| {
+            if let Ok(bin) = Bin::new(bam_path, contig, start, length) {
+                let n = bin.n_reads();
+                let n_f = n as f64;
+                let (_, se) = bin.max_start_or_end();
+                return Some((start, n, bin.n_sa() as f64 / n_f, se as f64 / n_f));
+            }
+            None
+        })
+        .collect();
+
+    let (indices, sa_ratios, se_ratios): (Vec<u32>, Vec<f64>, Vec<f64>) = ratios
+        .par_iter()
+        .fold(
+            || (Vec::new(), Vec::new(), Vec::new()), // Initial value
+            |(mut indices, mut sa_ratios, mut se_ratios), (index, _, sa_ratio, se_ratio)| {
+                indices.push(*index);
+                sa_ratios.push(*sa_ratio);
+                se_ratios.push(*se_ratio);
+                (indices, sa_ratios, se_ratios)
+            },
+        )
+        .reduce(
+            || (Vec::new(), Vec::new(), Vec::new()), // Identity value for combining results
+            |(mut indices1, mut sa_ratios1, mut se_ratios1), (indices2, sa_ratios2, se_ratios2)| {
+                indices1.extend(indices2);
+                sa_ratios1.extend(sa_ratios2);
+                se_ratios1.extend(se_ratios2);
+                (indices1, sa_ratios1, se_ratios1)
+            },
+        );
+
+    let filtered_sa_indices =
+        filter_outliers_modified_z_score_with_indices(sa_ratios, indices.clone());
+    let filtered_se_indices = filter_outliers_modified_z_score_with_indices(se_ratios, indices);
+
+    ratios.iter().map(|(p, n, sa, se)| {
+        let end = p + length - 1;
+        let sa_outlier = filtered_sa_indices.contains(p);
+        let se_outlier = filtered_se_indices.contains(p);
+        (format!("{contig}:{p}-{end}"), *n, *sa, sa_outlier, *se, se_outlier)
+
+    }).collect()
+}

+ 257 - 0
src/lib.rs

@@ -0,0 +1,257 @@
+pub mod bin;
+
+// fn get_se_diag_mrd(
+//     diag_bam_path: &str,
+//     mrd_bam_path: &str,
+//     chr: &str,
+//     start: i32,
+//     stop: i32,
+//     mapq: u8,
+//     name: String,
+// ) {
+//     let min_reads = 3;
+//     let bin_size = 1_000;
+//     let n_bins = stop - start;
+//
+//     let mut se_raw = vec![(0, 0)];
+//     se_raw.resize(n_bins as usize, (0, 0));
+//
+//     let get_pos = |i: usize| -> i32 { start + (i as i32 * bin_size as i32) };
+//     let se_raw: Vec<(i32, i32)> = se_raw
+//         .par_chunks(bin_size)
+//         .enumerate()
+//         .flat_map(|(i, _)| {
+//             let mut diag_bam = rust_htslib::bam::IndexedReader::from_path(diag_bam_path)
+//                 .context(anyhow!("Reading {}", diag_bam_path))
+//                 .unwrap();
+//             let mut mrd_bam = rust_htslib::bam::IndexedReader::from_path(mrd_bam_path)
+//                 .context(anyhow!("Reading {}", mrd_bam_path))
+//                 .unwrap();
+//
+//             let s = get_pos(i);
+//             let e = s + bin_size as i32;
+//             let d = get_start_end_qual(&mut diag_bam, chr, s, e, mapq).unwrap();
+//             let m = get_start_end_qual(&mut mrd_bam, chr, s, e, mapq).unwrap();
+//
+//             d.iter()
+//                 .zip(m.iter())
+//                 .map(|(d, m)| (*d, *m))
+//                 .collect::<Vec<(i32, i32)>>()
+//         })
+//         .collect();
+//
+//     let (diag_sum, mrd_sum) = se_raw
+//         .iter()
+//         .fold((0i32, 0i32), |acc, (d, m)| (acc.0 + d, acc.1 + m));
+//
+//     let diag = se_raw.iter().map(|(d, _)| *d as f64).collect::<Vec<f64>>();
+//     let diag_mean = mean(&diag).unwrap();
+//     let diag_std = std_deviation(&diag).unwrap();
+//
+//     let mrd = se_raw.iter().map(|(_, m)| *m as f64).collect::<Vec<f64>>();
+//     let mrd_mean = mean(&mrd).unwrap();
+//     let mrd_std = std_deviation(&mrd).unwrap();
+//
+//     info!(
+//         "N/nt diag {} mrd {}",
+//         diag_sum as f64 / (stop - start) as f64,
+//         mrd_sum as f64 / (stop - start) as f64
+//     );
+//     info!("Mean diag {diag_mean} mrd {mrd_mean}");
+//     info!("std dev diag {diag_std} mrd {mrd_std}");
+//
+//     let ratio_se: Vec<f64> = diag
+//         .iter()
+//         .zip(mrd.iter())
+//         .map(|(d, m)| (((d / diag_mean) + 1.0) / ((m / mrd_mean) + 1.0)) - 1.0)
+//         .collect();
+//
+//     let r_stddev = std_deviation(&ratio_se).unwrap();
+//     info!("ratio mean {} std dev {r_stddev}", mean(&ratio_se).unwrap());
+//
+//     let all: Vec<_> = se_raw
+//         .into_iter()
+//         .enumerate()
+//         .zip(ratio_se.iter())
+//         .filter(|((_i, (d, m)), _r)| *d > 3 && *m == 0)
+//         .collect();
+//
+//     info!("{} locations to assemble", all.len());
+//
+//     // getting primary reads from bam at given positions
+//     let se_data: Vec<(Vec<Record>, Vec<Record>)> = all
+//         .par_chunks(100)
+//         .flat_map(|chunks| {
+//             // Loading bam reader.
+//             let mut diag_bam = rust_htslib::bam::IndexedReader::from_path(diag_bam_path)
+//                 .context(anyhow!("Reading {}", diag_bam_path))
+//                 .unwrap();
+//             chunks
+//                 .to_vec()
+//                 .iter()
+//                 .map(|((i, (d, m)), r)| {
+//                     let pos = *i as i32 + start;
+//                     let (mut s, mut e) =
+//                         pileup::get_start_end_qual_rec(&mut diag_bam, chr, pos, pos + 1, mapq)
+//                             .unwrap();
+//                     let s = s.pop().unwrap();
+//                     let s = pileup::swap_by_primary(diag_bam_path, s);
+//                     let e = e.pop().unwrap();
+//                     let e = pileup::swap_by_primary(diag_bam_path, e);
+//                     // info!("{chr}:{pos}\t{r}\t{d} (s:{} e:{:?})\t{m}", s.len(), e.len());
+//                     // println!("e {e:?}");
+//                     // println!("s {s:?}");
+//
+//                     (s, e)
+//                 })
+//                 .collect::<Vec<(Vec<Record>, Vec<Record>)>>()
+//         })
+//         .collect();
+// }
+//
+fn mean(data: &[f64]) -> Option<f64> {
+    let sum = data.iter().sum::<f64>();
+    let count = data.len() as f64;
+
+    match count {
+        positive if positive > 0.0 => Some(sum / count),
+        _ => None,
+    }
+}
+fn std_deviation(data: &[f64]) -> Option<f64> {
+    match (mean(data), data.len() as f64) {
+        (Some(data_mean), count) if count > 0.0 => {
+            let variance = data
+                .iter()
+                .map(|value| {
+                    let diff = data_mean - *value;
+                    diff * diff
+                })
+                .sum::<f64>()
+                / count;
+
+            Some(variance.sqrt())
+        }
+        _ => None,
+    }
+}
+#[cfg(test)]
+mod tests {
+    use log::{info, warn};
+    use std::iter::FromIterator;
+
+    use self::bin::{
+        filter_outliers_modified_z_score, filter_outliers_modified_z_score_with_indices, scan,
+        scan_outliers, Bin,
+    };
+
+    use super::*;
+
+    fn init() {
+        let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
+            .is_test(true)
+            .try_init();
+    }
+    #[test]
+    fn bin() -> anyhow::Result<()> {
+        init();
+        let id = "SALICETTO";
+        let contig = "chr10";
+        let start = 91_000_000;
+        let start = 91902888 - 10;
+        // let stop = 104_000_000;
+
+        let bin_length = 1000;
+
+        let result_dir = "/data/longreads_basic_pipe";
+        let diag_bam_path = format!("{result_dir}/{id}/diag/{id}_diag_hs1.bam");
+
+        let bin = Bin::new(&diag_bam_path, contig, start, bin_length)?;
+        info!(
+            "{}:{}-{}\tn_reads: {}\tn_sa: {}\tmax_start_end: {}",
+            bin.contig,
+            bin.start,
+            bin.end,
+            bin.n_reads(),
+            bin.n_sa(),
+            bin.max_start_or_end().1
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn par() {
+        init();
+        let id = "SALICETTO";
+        let contig = "chr10";
+        let start = 91_000_000;
+        // let start = 91902888 - 10;
+        let end = start + 1_000_000;
+        // let stop = 104_000_000;
+
+        let bin_length = 1000;
+        let result_dir = "/data/longreads_basic_pipe";
+        let diag_bam_path = format!("{result_dir}/{id}/diag/{id}_diag_hs1.bam");
+
+        let res = scan(&diag_bam_path, contig, start, end, bin_length);
+        let indexed_values: Vec<(u32, f64, f64)> = res
+            .iter()
+            .map(|(index, n, sa, se)| (*index, *sa as f64 / *n as f64, se.1 as f64 / *n as f64))
+            .collect();
+
+        let (indices, sa_ratios, se_ratios): (Vec<u32>, Vec<f64>, Vec<f64>) = indexed_values
+            .into_iter()
+            // .map(|(index, sa_ratio, se_ratio)| (index, sa_ratio, se_ratio))
+            .fold(
+                (Vec::new(), Vec::new(), Vec::new()),
+                |(mut indices, mut sa_ratios, mut se_ratios), (index, sa_ratio, se_ratio)| {
+                    indices.push(index);
+                    sa_ratios.push(sa_ratio);
+                    se_ratios.push(se_ratio);
+                    (indices, sa_ratios, se_ratios)
+                },
+            );
+
+        let filtered_sa_indices =
+            filter_outliers_modified_z_score_with_indices(sa_ratios, indices.clone());
+        let filtered_se_indices = filter_outliers_modified_z_score_with_indices(se_ratios, indices);
+
+        const RESET: &str = "\x1b[0m"; // Reset to default color
+        const RED: &str = "\x1b[31m"; // Red text
+
+        res.iter().for_each(|(p, n, sa, se)| {
+            let se = se.1;
+            let sa = if filtered_sa_indices.contains(p) {
+                format!("{RED}{sa}{RESET}")
+            } else {
+                sa.to_string()
+            };
+            let se = if filtered_se_indices.contains(p) {
+                format!("{RED}{se}{RESET}")
+            } else {
+                se.to_string()
+            };
+            info!("{}:{}-{}\t{n}\t{sa}\t{se}", contig, p, p + bin_length - 1,);
+        });
+    }
+
+    #[test]
+    fn outliers() {
+        init();
+        let id = "SALICETTO";
+        let contig = "chr10";
+        let start = 91_000_000;
+        // let start = 91902888 - 10;
+        let end = start + 1_000_000;
+        // let stop = 104_000_000;
+
+        let bin_length = 1000;
+        let result_dir = "/data/longreads_basic_pipe";
+        let diag_bam_path = format!("{result_dir}/{id}/diag/{id}_diag_hs1.bam");
+
+        scan_outliers(&diag_bam_path, contig, start, end, bin_length)
+            .iter()
+            .filter(|(_, _, _, sa_outlier, _, se_outlier)| *sa_outlier || *se_outlier)
+            .for_each(|(pos, n, sa, _, se, _)| info!("{pos}\t{n}\t{sa}\t{se}"));
+    }
+}