Thomas 1 year ago
parent
commit
1ff296803b
4 changed files with 298 additions and 24 deletions
  1. 240 10
      Cargo.lock
  2. 1 0
      Cargo.toml
  3. 32 0
      src/config.rs
  4. 25 14
      src/lib.rs

+ 240 - 10
Cargo.lock

@@ -17,6 +17,51 @@ version = "1.0.86"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
 
+[[package]]
+name = "bitflags"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "confy"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "45b1f4c00870f07dc34adcac82bb6a72cc5aabca8536ba1797e01df51d2ce9a0"
+dependencies = [
+ "directories",
+ "serde",
+ "thiserror",
+ "toml",
+]
+
+[[package]]
+name = "directories"
+version = "5.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a49173b84e034382284f27f1af4dcbbd231ffa358c0fe316541a7337f376a35"
+dependencies = [
+ "dirs-sys",
+]
+
+[[package]]
+name = "dirs-sys"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c"
+dependencies = [
+ "libc",
+ "option-ext",
+ "redox_users",
+ "windows-sys 0.48.0",
+]
+
 [[package]]
 name = "env_logger"
 version = "0.10.2"
@@ -30,6 +75,29 @@ dependencies = [
  "termcolor",
 ]
 
+[[package]]
+name = "equivalent"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
+
+[[package]]
+name = "getrandom"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.14.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
+
 [[package]]
 name = "hermit-abi"
 version = "0.3.9"
@@ -42,6 +110,16 @@ version = "2.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
 
+[[package]]
+name = "indexmap"
+version = "2.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
 [[package]]
 name = "is-terminal"
 version = "0.4.12"
@@ -50,7 +128,7 @@ checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b"
 dependencies = [
  "hermit-abi",
  "libc",
- "windows-sys",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -65,6 +143,16 @@ version = "0.2.155"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
 
+[[package]]
+name = "libredox"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d"
+dependencies = [
+ "bitflags",
+ "libc",
+]
+
 [[package]]
 name = "log"
 version = "0.4.22"
@@ -77,11 +165,18 @@ version = "2.7.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
 
+[[package]]
+name = "option-ext"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
+
 [[package]]
 name = "pandora_lib_blastn"
 version = "0.1.0"
 dependencies = [
  "anyhow",
+ "confy",
  "env_logger",
  "log",
  "serde",
@@ -107,6 +202,17 @@ dependencies = [
  "proc-macro2",
 ]
 
+[[package]]
+name = "redox_users"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891"
+dependencies = [
+ "getrandom",
+ "libredox",
+ "thiserror",
+]
+
 [[package]]
 name = "regex"
 version = "1.10.5"
@@ -173,6 +279,15 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "serde_spanned"
+version = "0.6.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "79e674e01f999af37c49f70a6ede167a8a60b2503e56c5599532a65baa5969a0"
+dependencies = [
+ "serde",
+]
+
 [[package]]
 name = "syn"
 version = "2.0.70"
@@ -213,19 +328,68 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "toml"
+version = "0.8.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f49eb2ab21d2f26bd6db7bf383edc527a7ebaee412d17af4d40fdccd442f335"
+dependencies = [
+ "serde",
+ "serde_spanned",
+ "toml_datetime",
+ "toml_edit",
+]
+
+[[package]]
+name = "toml_datetime"
+version = "0.6.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "toml_edit"
+version = "0.22.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d59a3a72298453f564e2b111fa896f8d07fabb36f51f06d7e875fc5e0b5a3ef1"
+dependencies = [
+ "indexmap",
+ "serde",
+ "serde_spanned",
+ "toml_datetime",
+ "winnow",
+]
+
 [[package]]
 name = "unicode-ident"
 version = "1.0.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
 
+[[package]]
+name = "wasi"
+version = "0.11.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
+
 [[package]]
 name = "winapi-util"
 version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b"
 dependencies = [
- "windows-sys",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
+dependencies = [
+ "windows-targets 0.48.5",
 ]
 
 [[package]]
@@ -234,7 +398,22 @@ version = "0.52.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
 dependencies = [
- "windows-targets",
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
+dependencies = [
+ "windows_aarch64_gnullvm 0.48.5",
+ "windows_aarch64_msvc 0.48.5",
+ "windows_i686_gnu 0.48.5",
+ "windows_i686_msvc 0.48.5",
+ "windows_x86_64_gnu 0.48.5",
+ "windows_x86_64_gnullvm 0.48.5",
+ "windows_x86_64_msvc 0.48.5",
 ]
 
 [[package]]
@@ -243,28 +422,46 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
 dependencies = [
- "windows_aarch64_gnullvm",
- "windows_aarch64_msvc",
- "windows_i686_gnu",
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
  "windows_i686_gnullvm",
- "windows_i686_msvc",
- "windows_x86_64_gnu",
- "windows_x86_64_gnullvm",
- "windows_x86_64_msvc",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
 ]
 
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
+
 [[package]]
 name = "windows_aarch64_gnullvm"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
 
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
+
 [[package]]
 name = "windows_aarch64_msvc"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
 
+[[package]]
+name = "windows_i686_gnu"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
+
 [[package]]
 name = "windows_i686_gnu"
 version = "0.52.6"
@@ -277,26 +474,59 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
 
+[[package]]
+name = "windows_i686_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
+
 [[package]]
 name = "windows_i686_msvc"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
 
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
+
 [[package]]
 name = "windows_x86_64_gnu"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
 
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
+
 [[package]]
 name = "windows_x86_64_gnullvm"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
 
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
+
 [[package]]
 name = "windows_x86_64_msvc"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "winnow"
+version = "0.6.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "59b5e5f6c299a3c7890b876a2a587f3115162487e704907d9b6cd29473052ba1"
+dependencies = [
+ "memchr",
+]

+ 1 - 0
Cargo.toml

@@ -10,3 +10,4 @@ env_logger = "^0.10.1"
 anyhow = "^1.0.75"
 serde_json = "1.0.120"
 serde = { version = "1.0.204", features = ["derive"] }
+confy = "0.6.1"

+ 32 - 0
src/config.rs

@@ -0,0 +1,32 @@
+use std::path::PathBuf;
+
+use serde::{Serialize, Deserialize};
+use anyhow::Result;
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct Config {
+    pub blast_db: String,
+    pub blastn_bin: String,
+    pub num_threads: usize,
+    pub max_results: usize,
+}
+
+impl ::std::default::Default for Config {
+    fn default() -> Self {
+        Self {
+            blast_db: "/data/ref/hs1/hs1_simple_chr.fa".to_string(),
+            blastn_bin: "/usr/bin/blastn".to_string(),
+            num_threads: 6,
+            max_results: 1000,
+        }
+    }
+}
+
+impl Config {
+    pub fn get() -> Result<Self> {
+        Ok(confy::load("pandora_lib_blastn", None)?)
+    }
+    pub fn path() -> Result<PathBuf> {
+        Ok(confy::get_configuration_file_path("pandora_lib_blastn", None)?)
+    } 
+}

+ 25 - 14
src/lib.rs

@@ -1,3 +1,5 @@
+mod config;
+
 use std::{
     fs::{self, File},
     io::Write,
@@ -5,14 +7,16 @@ use std::{
 };
 
 use anyhow::{Context, Ok};
+use config::Config;
 use log::info;
-use serde::{Deserialize, Serialize};
+use serde::Serialize;
 
 // cat /home/prom/Documents/Programmes/desc_seq_lib/data_test/419b7353-bc8c-4ffe-8ad6-0bbfac8c0cfa.fasta | blastn -db hs1_simple_chr.fa -outfmt 6
 pub fn run_blastn(query_fa_path: &str) -> anyhow::Result<String> {
     info!("Running blastn in {query_fa_path}");
-    let blastn_bin = "/usr/bin/blastn";
-    let blast_db = "/data/ref/hs1/hs1_simple_chr.fa";
+    let config = Config::get()?;
+    let blastn_bin = config.blastn_bin;
+    let blast_db = config.blast_db;
 
     let file_contents = fs::read_to_string(query_fa_path).context("Failed to read the file")?;
 
@@ -22,7 +26,7 @@ pub fn run_blastn(query_fa_path: &str) -> anyhow::Result<String> {
         .arg("-outfmt")
         .arg("6")
         .arg("-num_threads")
-        .arg("6")
+        .arg(config.num_threads.to_string())
         .stdin(Stdio::piped())
         .stdout(Stdio::piped())
         .spawn()
@@ -52,7 +56,7 @@ pub fn parse_blastn_touput(output: String) -> Vec<BlastResult> {
     results
 }
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
+#[derive(Debug, Clone, Serialize)]
 pub struct BlastResult {
     pub query_id: String,
     pub subject_id: String,
@@ -135,15 +139,14 @@ fn sort_blast_pile(objects: &mut Vec<BlastPile>) {
     objects.sort_by(|a, b| a.x.partial_cmp(&b.x).unwrap());
 }
 
-fn allocate_y_positions(objects: &mut Vec<BlastPile>) {
-    sort_blast_pile(objects);
+fn allocate_y_positions(pile: &mut Vec<BlastPile>) {
+    sort_blast_pile(pile);
 
     let mut active_intervals: Vec<(usize, usize)> = vec![]; // Stores (x + length, y) tuples
 
-    for obj in objects.iter_mut() {
+    for obj in pile.iter_mut() {
         let mut y_position = 0;
 
-        // Find the lowest y position where it does not overlap
         loop {
             let overlap = active_intervals
                 .iter()
@@ -154,30 +157,35 @@ fn allocate_y_positions(objects: &mut Vec<BlastPile>) {
             y_position += 1;
         }
 
-        // Assign the y position
         obj.y = y_position;
 
-        // Add the current object to active intervals
         active_intervals.push((obj.x + obj.length, obj.y));
 
-        // Remove intervals that are no longer active
         active_intervals.retain(|(end_x, _)| obj.x < *end_x);
     }
 }
 
 pub fn best_blastn(fa_path: &str) -> anyhow::Result<Vec<BlastResult>> {
+    let config = Config::get()?;
+
     let res = run_blastn(fa_path)?;
-    let mut results: Vec<BlastPile> = parse_blastn_touput(res)
+    let res = parse_blastn_touput(res);
+    // res.sort_by(|a, b| b.bit_score.total_cmp(&a.bit_score));
+    // res.sort_by(|a, b| b.percent_identity.total_cmp(&a.percent_identity));
+
+    let mut results: Vec<BlastPile> = res
         .iter()
-        .take(1000)
+        .take(config.max_results)
         .map(|v| BlastPile::new(v))
         .collect();
     allocate_y_positions(&mut results);
+
     let results: Vec<BlastResult> = results
         .into_iter()
         .filter(|r| r.y == 0)
         .map(|r| r.data)
         .collect();
+
     Ok(results)
 }
 
@@ -190,9 +198,12 @@ mod tests {
         let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
             .build();
 
+        info!("best blastn results with config: {}", config::Config::path()?.to_string_lossy());
+
         let fa = "test_data/419b7353-bc8c-4ffe-8ad6-0bbfac8c0cfa.fasta";
 
         let res = best_blastn(fa)?;
+        println!("{res:?}");
 
         assert_eq!(res[0].q_end, 2838);
         assert_eq!(res[1].q_start, 2935);