Thomas 1 سال پیش
والد
کامیت
b115eecf5c
3فایلهای تغییر یافته به همراه114 افزوده شده و 115 حذف شده
  1. 68 56
      Cargo.lock
  2. 6 6
      src/bin.rs
  3. 40 53
      src/lib.rs

+ 68 - 56
Cargo.lock

@@ -3,10 +3,10 @@
 version = 3
 
 [[package]]
-name = "adler"
-version = "1.0.2"
+name = "adler2"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
+checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
 
 [[package]]
 name = "ahash"
@@ -101,9 +101,9 @@ dependencies = [
 
 [[package]]
 name = "arrayvec"
-version = "0.7.4"
+version = "0.7.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
+checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
 [[package]]
 name = "atomic-polyfill"
@@ -160,7 +160,7 @@ dependencies = [
  "regex",
  "rustc-hash",
  "shlex",
- "syn 2.0.72",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -200,9 +200,9 @@ dependencies = [
 
 [[package]]
 name = "bytemuck"
-version = "1.16.3"
+version = "1.17.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "102087e286b4677862ea56cf8fc58bb2cdfa8725c40ffb80fe3a008eb7f2fc83"
+checksum = "6fd4c6dcc3b0aea2f5c0b4b82c2b15fe39ddbc76041a310848f4706edf76bb31"
 
 [[package]]
 name = "byteorder"
@@ -229,12 +229,13 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.1.8"
+version = "1.1.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "504bdec147f2cc13c8b57ed9401fd8a147cc66b67ad5cb241394244f2c947549"
+checksum = "72db2f7947ecee9b03b510377e8bb9077afa27176fdbff55c51027e976fdcc48"
 dependencies = [
  "jobserver",
  "libc",
+ "shlex",
 ]
 
 [[package]]
@@ -265,9 +266,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.14"
+version = "4.5.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c937d4061031a6d0c8da4b9a4f98a172fc2976dfb1c19213a9cf7d0d3c837e36"
+checksum = "ed6719fffa43d0d87e5fd8caeab59be1554fb028cd30edc88fc4369b17971019"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -275,9 +276,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.14"
+version = "4.5.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85379ba512b21a328adf887e85f7742d12e96eb31f3ef077df4ffc26b506ffed"
+checksum = "216aec2b177652e3846684cbfe25c9964d18ec45234f0f5da5157b207ed1aab6"
 dependencies = [
  "anstream",
  "anstyle",
@@ -294,7 +295,7 @@ dependencies = [
  "heck",
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -305,9 +306,9 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
 
 [[package]]
 name = "cmake"
-version = "0.1.50"
+version = "0.1.51"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130"
+checksum = "fb1e43aa7fd152b1f968787f7dbcdeb306d1867ff373c69955211876c053f91a"
 dependencies = [
  "cc",
 ]
@@ -481,7 +482,7 @@ checksum = "d150dea618e920167e5973d70ae6ece4385b7164e0d799fe7c122dd0a5d912ad"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -547,6 +548,12 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced"
 
+[[package]]
+name = "embedded-io"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d"
+
 [[package]]
 name = "encode_unicode"
 version = "0.3.6"
@@ -602,9 +609,9 @@ checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
 
 [[package]]
 name = "flate2"
-version = "1.0.31"
+version = "1.0.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f211bbe8e69bbd0cfdea405084f128ae8b4aaa6b0b522fc8f2b009084797920"
+checksum = "9c0596c1eac1f9e04ed902702e9878208b336edc9d6fddc8a48387349bab3666"
 dependencies = [
  "crc32fast",
  "miniz_oxide",
@@ -713,9 +720,9 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
 
 [[package]]
 name = "hermit-abi"
-version = "0.3.9"
+version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
+checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc"
 
 [[package]]
 name = "hts-sys"
@@ -758,9 +765,9 @@ checksum = "9007da9cacbd3e6343da136e98b0d2df013f553d35bdec8b518f07bea768e19c"
 
 [[package]]
 name = "indexmap"
-version = "2.3.0"
+version = "2.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de3fc2e30ba82dd1b3911c8de1ffc143c74a914a14e99514d7637e3099df5ea0"
+checksum = "93ead53efc7ea8ed3cfb0c79fc8023fbb782a5432b52830b6518941cebe6505c"
 dependencies = [
  "equivalent",
  "hashbrown",
@@ -802,9 +809,9 @@ dependencies = [
 
 [[package]]
 name = "is-terminal"
-version = "0.4.12"
+version = "0.4.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b"
+checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b"
 dependencies = [
  "hermit-abi",
  "libc",
@@ -919,9 +926,9 @@ dependencies = [
 
 [[package]]
 name = "libc"
-version = "0.2.155"
+version = "0.2.158"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
+checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439"
 
 [[package]]
 name = "libloading"
@@ -961,9 +968,9 @@ dependencies = [
 
 [[package]]
 name = "libz-sys"
-version = "1.1.18"
+version = "1.1.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c15da26e5af7e25c90b37a2d75cdbf940cf4a55316de9d84c679c9b8bfabf82e"
+checksum = "fdc53a7799a7496ebc9fd29f31f7df80e83c9bda5299768af5f9e59eeea74647"
 dependencies = [
  "cc",
  "cmake",
@@ -1029,11 +1036,11 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
 
 [[package]]
 name = "miniz_oxide"
-version = "0.7.4"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08"
+checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1"
 dependencies = [
- "adler",
+ "adler2",
 ]
 
 [[package]]
@@ -1062,7 +1069,7 @@ checksum = "254a5372af8fc138e36684761d3c0cdb758a4410e938babcff1c860ce14ddbfc"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -1309,7 +1316,7 @@ checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
 [[package]]
 name = "pandora_lib_pileup"
 version = "0.1.0"
-source = "git+https://git.t0m4.fr/Thomas/pandora_lib_pileup.git#896ce97f874980cdcc22f3c477454e714363caf3"
+source = "git+https://git.t0m4.fr/Thomas/pandora_lib_pileup.git#381addffe723a828effcb5aabc00ada14586ba5f"
 dependencies = [
  "anyhow",
  "average",
@@ -1345,16 +1352,18 @@ dependencies = [
 [[package]]
 name = "pandora_lib_variants"
 version = "0.1.0"
-source = "git+https://git.t0m4.fr/Thomas/pandora_lib_variants.git#eaefae4807aadf2753973870a555a309176fdf5c"
+source = "git+https://git.t0m4.fr/Thomas/pandora_lib_variants.git#df63d1b17adbb16b85fb68ba4dd1a515868635c4"
 dependencies = [
  "anyhow",
  "bgzip",
  "clap",
  "confy",
+ "crossbeam-channel",
  "crossbeam-deque",
  "csv",
  "dashmap",
  "env_logger",
+ "flate2",
  "hashbrown",
  "indicatif",
  "indicatif-log-bridge",
@@ -1368,8 +1377,10 @@ dependencies = [
  "noodles-sam",
  "noodles-tabix",
  "noodles-vcf",
+ "num-format",
  "num-integer",
  "pandora_lib_pileup",
+ "postcard",
  "pot",
  "prettytable-rs",
  "rayon",
@@ -1424,12 +1435,13 @@ checksum = "da544ee218f0d287a911e9c99a39a8c9bc8fcad3cb8db5959940044ecfc67265"
 
 [[package]]
 name = "postcard"
-version = "1.0.8"
+version = "1.0.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a55c51ee6c0db07e68448e336cf8ea4131a620edefebf9893e759b2d793420f8"
+checksum = "20ee10b999a00ca189ac2cb99f5db1ca71fb7371e3d5f493b879ca95d2a67220"
 dependencies = [
  "cobs",
- "embedded-io",
+ "embedded-io 0.4.0",
+ "embedded-io 0.6.1",
  "heapless",
  "serde",
 ]
@@ -1593,9 +1605,9 @@ dependencies = [
 
 [[package]]
 name = "redox_users"
-version = "0.4.5"
+version = "0.4.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891"
+checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43"
 dependencies = [
  "getrandom",
  "libredox",
@@ -1741,29 +1753,29 @@ checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b"
 
 [[package]]
 name = "serde"
-version = "1.0.205"
+version = "1.0.208"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e33aedb1a7135da52b7c21791455563facbbcc43d0f0f66165b42c21b3dfb150"
+checksum = "cff085d2cb684faa248efb494c39b68e522822ac0de72ccf08109abde717cfb2"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.205"
+version = "1.0.208"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "692d6f5ac90220161d6774db30c662202721e64aed9058d2c394f451261420c1"
+checksum = "24008e81ff7613ed8e5ba0cfaf24e2c2f1e5b8a0495711e44fcd4882fca62bcf"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.75",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.122"
+version = "1.0.125"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da"
+checksum = "83c8e735a073ccf5be70aa8066aa984eaf2fa000db6c8d0100ae605b366d31ed"
 dependencies = [
  "itoa",
  "memchr",
@@ -1864,7 +1876,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.72",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -1880,9 +1892,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.72"
+version = "2.0.75"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af"
+checksum = "f6af063034fc1935ede7be0122941bafa9bacb949334d090b77ca98b5817c7d9"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1917,7 +1929,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -2050,7 +2062,7 @@ dependencies = [
  "proc-macro-error",
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.75",
 ]
 
 [[package]]
@@ -2083,9 +2095,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
 
 [[package]]
 name = "wide"
-version = "0.7.26"
+version = "0.7.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "901e8597c777fa042e9e245bd56c0dc4418c5db3f845b6ff94fbac732c6a0692"
+checksum = "b828f995bf1e9622031f8009f8481a85406ce1f4d4588ff746d872043e855690"
 dependencies = [
  "bytemuck",
  "safe_arch",
@@ -2279,5 +2291,5 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.72",
+ "syn 2.0.75",
 ]

+ 6 - 6
src/bin.rs

@@ -237,7 +237,7 @@ pub fn scan_outliers(
     start: u32,
     end: u32,
     length: u32,
-) -> Vec<(u32, usize, f64, bool, f64, bool)> {
+) -> Vec<(u32, usize, u32, f64, bool, f64, bool)> {
     let mut starts = Vec::new();
     let mut current = start;
     while current <= end {
@@ -246,7 +246,7 @@ pub fn scan_outliers(
     }
     // println!("group {contig}:{}-{}", starts.first().unwrap(), starts.last().unwrap() + length - 1);
 
-    let ratios: Vec<(u32, usize, f64, f64)> = starts
+    let ratios: Vec<(u32, usize, u32, f64, f64)> = starts
         .into_par_iter()
         .filter_map(|start| {
             match Bin::new(bam_path, contig, start, length) {
@@ -259,7 +259,7 @@ pub fn scan_outliers(
                     } else {
                         (0.0, 0.0)
                     };
-                    return Some((start, n, r_sa, r_se));
+                    return Some((start, n, bin.n_low_mapq, r_sa, r_se));
                 }
                 Err(e) => warn!("{e}"),
             }
@@ -271,7 +271,7 @@ pub fn scan_outliers(
         .par_iter()
         .fold(
             || (Vec::new(), Vec::new(), Vec::new()), // Initial value
-            |(mut indices, mut sa_ratios, mut se_ratios), (index, _, sa_ratio, se_ratio)| {
+            |(mut indices, mut sa_ratios, mut se_ratios), (index, _, _, sa_ratio, se_ratio)| {
                 indices.push(*index);
                 sa_ratios.push(*sa_ratio);
                 se_ratios.push(*se_ratio);
@@ -297,10 +297,10 @@ pub fn scan_outliers(
     // }
     ratios
         .iter()
-        .map(|(p, n, sa, se)| {
+        .map(|(p, n, n_low_mapq, sa, se)| {
             let sa_outlier = filtered_sa_indices.contains(p);
             let se_outlier = filtered_se_indices.contains(p);
-            (*p, *n, *sa, sa_outlier, *se, se_outlier)
+            (*p, *n, *n_low_mapq, *sa, sa_outlier, *se, se_outlier)
         })
         .collect()
 }

+ 40 - 53
src/lib.rs

@@ -14,7 +14,6 @@ use std::{
 
 pub mod bam;
 pub mod bin;
-pub mod phase;
 
 #[derive(Debug)]
 pub struct Config {
@@ -50,11 +49,11 @@ pub fn scan_save(
     let outliers_records: Vec<Vec<Record>> =
         scan_outliers(bam_path, contig, start, end, bin_length)
             .iter()
-            .map(|(start, n, sa, sa_outlier, se, se_outlier)| {
+            .map(|(start, n, n_low_mapq, sa, sa_outlier, se, se_outlier)| {
                 writer
                     .write_all(
                         format!(
-                            "{}:{start}-{}\t{n}\t{sa}\t{}\t{se}\t{}\n",
+                            "{}:{start}-{}\t{n}\t{n_low_mapq}\t{sa}\t{}\t{se}\t{}\n",
                             contig,
                             start + bin_length - 1,
                             sa_outlier,
@@ -63,10 +62,11 @@ pub fn scan_save(
                         .as_bytes(),
                     )
                     .unwrap();
-                (start, *n, *sa, *sa_outlier, *se, *se_outlier)
+                (start, *n, n_low_mapq, *sa, *sa_outlier, *se, *se_outlier)
             })
-            .filter(|(_, _, _, sa_outlier, _, se_outlier)| *sa_outlier || *se_outlier)
-            .map(|(start, _n, _sa, sa_outlier, _se, se_outlier)| {
+            .filter(|(_, n, n_low_mapq, _, _, _, _)| *n > **n_low_mapq as usize)
+            .filter(|(_, _, _, _, sa_outlier, _, se_outlier)| *sa_outlier || *se_outlier)
+            .map(|(start, _n, _, _sa, sa_outlier, _se, se_outlier)| {
                 let mut bin = Bin::new(bam_path, contig, *start, bin_length).unwrap();
                 let mut records = Vec::new();
                 if sa_outlier {
@@ -75,16 +75,6 @@ pub fn scan_save(
                     let (pos, _) = bin.max_start_or_end();
                     records.extend(bin.se_primary(pos));
                 };
-                // writeln!(
-                //     writer,
-                //     "{}:{start}-{}\t{n}\t{sa}\t{}\t{se}\t{}\t{}",
-                //     contig,
-                //     start + bin_length - 1,
-                //     sa_outlier,
-                //     se_outlier,
-                //     records.len(),
-                // )
-                // .unwrap();
                 records
             })
             .collect();
@@ -279,10 +269,6 @@ pub fn par_whole_scan(contig: &str, bam_path: &str, out_dir: &str) -> anyhow::Re
 
 #[cfg(test)]
 mod tests {
-    use indicatif::MultiProgress;
-    use indicatif_log_bridge::LogWrapper;
-
-    use crate::phase::{load_phases, phase, PhaserConfig};
 
     use super::*;
 
@@ -295,47 +281,48 @@ mod tests {
     #[test]
     fn par() {
         init();
+        let id = "SPINATO";
         let tmp_dir = format!("/data/tmp/scan_{}", uuid::Uuid::new_v4());
         fs::create_dir_all(&tmp_dir).unwrap();
         info!("Creating {tmp_dir}");
         par_whole_scan(
             "chr9",
-            "/data/longreads_basic_pipe/SALICETTO/diag/SALICETTO_diag_hs1.bam",
+            &format!("/data/longreads_basic_pipe/{id}/diag/{id}_diag_hs1.bam"),
             &tmp_dir,
         )
         .unwrap();
     }
 
-    #[test]
-    fn phasing() -> anyhow::Result<()> {
-        let id = "SALICETTO";
-        let min_records = 2;
-
-        let logger =
-            env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
-                .build();
-        let multi = MultiProgress::new();
-        LogWrapper::new(multi.clone(), logger).try_init().unwrap();
-
-        let config = PhaserConfig::new(id, "/data/longreads_basic_pipe", min_records, 0.33);
-        phase(config, multi)
-    }
-
-    #[test]
-    fn load_phase() -> anyhow::Result<()> {
-        init();
-        let id = "SALICETTO";
-        let contig = "chr7";
-        let phases_dir = format!("/data/longreads_basic_pipe/{id}/diag/phases");
-        let phase_path = format!("{phases_dir}/{id}_{contig}_phases.postcard.gz");
-        let p = load_phases(&phase_path)?;
-        info!("{} phases", p.len());
-
-        for phase in p {
-            if let Some(phase_id) = &phase.id {
-                info!("{}\t{}", phase_id, phase.mean_vaf());
-            }
-        }
-        Ok(())
-    }
+    // #[test]
+    // fn phasing() -> anyhow::Result<()> {
+    //     let id = "SALICETTO";
+    //     let min_records = 2;
+    //
+    //     let logger =
+    //         env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
+    //             .build();
+    //     let multi = MultiProgress::new();
+    //     LogWrapper::new(multi.clone(), logger).try_init().unwrap();
+    //
+    //     let config = PhaserConfig::new(id, "/data/longreads_basic_pipe", min_records, 0.33);
+    //     phase(config, multi)
+    // }
+    //
+    // #[test]
+    // fn load_phase() -> anyhow::Result<()> {
+    //     init();
+    //     let id = "SALICETTO";
+    //     let contig = "chr7";
+    //     let phases_dir = format!("/data/longreads_basic_pipe/{id}/diag/phases");
+    //     let phase_path = format!("{phases_dir}/{id}_{contig}_phases.postcard.gz");
+    //     let p = load_phases(&phase_path)?;
+    //     info!("{} phases", p.len());
+    //
+    //     for phase in p {
+    //         if let Some(phase_id) = &phase.id {
+    //             info!("{}\t{}", phase_id, phase.mean_vaf());
+    //         }
+    //     }
+    //     Ok(())
+    // }
 }