Browse Source

first commit

Thomas 2 năm trước cách đây
commit
22dc578344
3 tập tin đã thay đổi với 481 bổ sung0 xóa
  1. 335 0
      Cargo.lock
  2. 13 0
      Cargo.toml
  3. 133 0
      src/main.rs

+ 335 - 0
Cargo.lock

@@ -0,0 +1,335 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "ahash"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "version_check",
+]
+
+[[package]]
+name = "allocator-api2"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4f263788a35611fba42eb41ff811c5d0360c58b97402570312a350736e2542e"
+
+[[package]]
+name = "autocfg"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "corr"
+version = "0.1.0"
+dependencies = [
+ "hashbrown 0.14.0",
+ "ndarray",
+ "ndarray-stats",
+ "rayon",
+]
+
+[[package]]
+name = "crossbeam-channel"
+version = "0.5.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200"
+dependencies = [
+ "cfg-if",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef"
+dependencies = [
+ "cfg-if",
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695"
+dependencies = [
+ "autocfg",
+ "cfg-if",
+ "crossbeam-utils",
+ "memoffset",
+ "scopeguard",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "either"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
+
+[[package]]
+name = "getrandom"
+version = "0.2.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+
+[[package]]
+name = "hashbrown"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
+dependencies = [
+ "ahash",
+ "allocator-api2",
+ "rayon",
+]
+
+[[package]]
+name = "hermit-abi"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "indexmap"
+version = "1.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
+dependencies = [
+ "autocfg",
+ "hashbrown 0.12.3",
+]
+
+[[package]]
+name = "itertools"
+version = "0.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.146"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f92be4933c13fd498862a9e02a3055f8a8d9c039ce33db97306fd5a6caa7f29b"
+
+[[package]]
+name = "matrixmultiply"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "090126dc04f95dc0d1c1c91f61bdd474b3930ca064c1edc8a849da2c6cbe1e77"
+dependencies = [
+ "autocfg",
+ "rawpointer",
+]
+
+[[package]]
+name = "memoffset"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "ndarray"
+version = "0.15.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32"
+dependencies = [
+ "matrixmultiply",
+ "num-complex",
+ "num-integer",
+ "num-traits",
+ "rawpointer",
+]
+
+[[package]]
+name = "ndarray-stats"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af5a8477ac96877b5bd1fd67e0c28736c12943aba24eda92b127e036b0c8f400"
+dependencies = [
+ "indexmap",
+ "itertools",
+ "ndarray",
+ "noisy_float",
+ "num-integer",
+ "num-traits",
+ "rand",
+]
+
+[[package]]
+name = "noisy_float"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "978fe6e6ebc0bf53de533cd456ca2d9de13de13856eda1518a285d7705a213af"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-complex"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-integer"
+version = "0.1.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9"
+dependencies = [
+ "autocfg",
+ "num-traits",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "num_cpus"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b"
+dependencies = [
+ "hermit-abi",
+ "libc",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
+
+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom",
+]
+
+[[package]]
+name = "rawpointer"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
+
+[[package]]
+name = "rayon"
+version = "1.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d"
+dependencies = [
+ "crossbeam-channel",
+ "crossbeam-deque",
+ "crossbeam-utils",
+ "num_cpus",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
+
+[[package]]
+name = "version_check"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
+
+[[package]]
+name = "wasi"
+version = "0.11.0+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"

+ 13 - 0
Cargo.toml

@@ -0,0 +1,13 @@
+[package]
+name = "corr"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+
+ndarray = "0.15.6"
+ndarray-stats = "0.5.1"
+rayon = "1.7.0"
+hashbrown = {version = "0.14.0", features = ["rayon"]}

+ 133 - 0
src/main.rs

@@ -0,0 +1,133 @@
+use std::error::Error;
+use std::fs::File;
+use std::io::{BufReader, BufRead};
+use std::sync::{Arc, Mutex};
+
+use rayon::prelude::*;
+use hashbrown::HashMap;
+
+use ndarray::Array2;
+use ndarray_stats::CorrelationExt;
+use std::io::Write;
+
+fn main() -> Result<(), Box<dyn Error>> {
+    let filename = "/Turbine-pool/LAL-T_ChIP/data/all_sub.tsv";
+    let enhancer_prefix = "EH38E";
+
+    let file = File::open(filename)?;
+    let reader = BufReader::new(file);
+
+    let mut rna_set: HashMap<String, Vec<f32>> = HashMap::default();
+    let mut enhancer_set: HashMap<String, Vec<f32>> = HashMap::default();
+
+    for (index, line) in reader.lines().enumerate() {
+        if index == 0 { continue; }
+
+        if let Ok(line) = line {
+            let mut key = String::default();
+            let mut values: Vec<f32> = Vec::new();
+            for (col, item) in line.split('\t').into_iter().enumerate() {
+                match col {
+                    0 => key = item.to_string(),
+                    5.. => values.push(item.parse()?),
+                    _ => ()
+                }
+            }
+
+            if key.starts_with(enhancer_prefix) {
+                enhancer_set.insert(key, values);
+            } else {
+                rna_set.insert(key, values);
+            }
+
+            // if enhancer_set.len() > 2 {
+            //     break;
+            // }
+        }
+    }
+    println!("{} enhancers", enhancer_set.len());
+    println!("{} rnas", rna_set.len());
+
+    // let best_corr: Arc<Mutex<HashMap<String, (f32, Vec<String>)>>> = Arc::new(Mutex::new(HashMap::new()));
+
+    let enhancers_to_rnas: Arc<Mutex<HashMap<String, HashMap<String, f32>>>> = Arc::new(Mutex::new(HashMap::new()));
+
+    enhancer_set.par_iter().for_each(|(a_k, a_v)| {
+
+        let correlations: Arc<Mutex<HashMap<String, f32>>> = Arc::new(Mutex::new(HashMap::new()));
+        println!("correlation for {}", a_k);
+        
+        rna_set.par_iter().for_each(|(b_k, b_v)| {
+
+            let rand_vec = vec![a_v, b_v];
+            let ncols = rand_vec.first().map_or(0, |row| row.len());
+            let mut nrows = 0;
+            let mut data = Vec::new();
+
+            for i in 0..rand_vec.len() {
+                data.extend_from_slice(&rand_vec[i]);
+                nrows += 1;
+            }
+
+            let a: ndarray::ArrayBase<ndarray::OwnedRepr<_>, ndarray::Dim<[usize; 2]>> = Array2::from_shape_vec((nrows, ncols), data).unwrap();
+
+            let corr = a.pearson_correlation().unwrap();
+            let mut correlations_guard = correlations.lock().unwrap();
+            correlations_guard.insert(b_k.clone(), corr.clone().into_raw_vec().into_iter().reduce(f32::min).unwrap());
+
+        });
+
+        let correlations_guard = correlations.lock().unwrap();
+        
+        let mut enhancers_to_rnas_guard = enhancers_to_rnas.lock().unwrap();
+        enhancers_to_rnas_guard.insert(a_k.to_string(), correlations_guard.clone());
+        
+        // let min_value = correlations_guard.values().min_by(|a, b| a.partial_cmp(b).unwrap());
+        // if let Some(min) = min_value {
+        //     let minimal_keys: Vec<String> = correlations_guard
+        //         .iter()
+        //         .filter(|(_, value)| *value == min)
+        //         .map(|(key, _)| key.to_string())
+        //         .collect();
+
+        //     let mut best_corr_guard = best_corr.lock().unwrap();
+        //     best_corr_guard.insert(a_k.to_string(), (*min, minimal_keys));
+            
+        // }
+    });
+
+    let enhancers_to_rnas_guard = enhancers_to_rnas.lock().unwrap();
+    println!("{:?} enhancers results to write...", enhancers_to_rnas_guard.len());
+
+    let mut rna_keys: Vec<&String> = rna_set.keys().collect();
+    rna_keys.sort();
+
+
+    let mut file = File::create("output.tsv")?;
+
+    // Write the header row
+    write!(file, "ID\t")?; 
+    for rna_key in &rna_keys {
+        write!(file, "{}\t", rna_key)?;
+    }
+    writeln!(file)?; // \n
+
+
+    // Iterate over the enhancers
+    for (enhancer, inner_map) in enhancers_to_rnas_guard.iter() {
+        write!(file, "{}\t", enhancer)?;
+
+        // Iterate over the RNA keys
+        for rna_key in &rna_keys {
+            // Write the value if present, otherwise write an empty cell
+            if let Some(value) = inner_map.get(*rna_key) {
+                write!(file, "{}\t", value)?;
+            } else {
+                write!(file, "\t")?;
+            }
+        }
+        writeln!(file)?;
+    }
+
+    Ok(())
+}