Thomas 3 недель назад
Родитель
Сommit
7ba042df8d
11 измененных файлов с 60 добавлено и 163 удалено
  1. 31 74
      Cargo.lock
  2. 5 7
      Cargo.toml
  3. 2 2
      pandora-config.example.toml
  4. 10 5
      src/annotation/ncbi.rs
  5. 1 1
      src/annotation/vep.rs
  6. 1 32
      src/io/dict.rs
  7. 6 6
      src/io/fasta.rs
  8. 1 1
      src/io/gff.rs
  9. 1 1
      src/lib.rs
  10. 0 32
      src/locker.rs
  11. 2 2
      src/variant/variant_collection.rs

+ 31 - 74
Cargo.lock

@@ -1370,12 +1370,6 @@ version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
 
-[[package]]
-name = "fastrand"
-version = "2.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
-
 [[package]]
 name = "fdeflate"
 version = "0.3.7"
@@ -1454,6 +1448,12 @@ version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
 
+[[package]]
+name = "foldhash"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
+
 [[package]]
 name = "fontconfig-parser"
 version = "0.5.8"
@@ -1746,7 +1746,7 @@ checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
 dependencies = [
  "allocator-api2",
  "equivalent",
- "foldhash",
+ "foldhash 0.1.5",
  "rayon",
  "serde",
 ]
@@ -1756,6 +1756,12 @@ name = "hashbrown"
 version = "0.16.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+dependencies = [
+ "allocator-api2",
+ "equivalent",
+ "foldhash 0.2.0",
+ "rayon",
+]
 
 [[package]]
 name = "hashlink"
@@ -2521,18 +2527,6 @@ dependencies = [
  "flate2",
 ]
 
-[[package]]
-name = "noodles-bgzf"
-version = "0.35.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c6786136e224bdb8550b077ad44ef2bd5ebc8b06d07fab69aaa7f47d06f0da75"
-dependencies = [
- "byteorder",
- "bytes",
- "crossbeam-channel",
- "flate2",
-]
-
 [[package]]
 name = "noodles-bgzf"
 version = "0.45.0"
@@ -2553,15 +2547,6 @@ dependencies = [
  "bstr",
 ]
 
-[[package]]
-name = "noodles-core"
-version = "0.16.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "962b13b79312f773a12ffcb0cdaccab6327f8343b6f440a888eff10c749d52b0"
-dependencies = [
- "bstr",
-]
-
 [[package]]
 name = "noodles-core"
 version = "0.18.0"
@@ -2585,20 +2570,6 @@ dependencies = [
  "noodles-core 0.15.0",
 ]
 
-[[package]]
-name = "noodles-csi"
-version = "0.43.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "197f4c332f233135159b62bd9a6c35d0bf8366ccf0d7b9cbed3c6ec92a8e4464"
-dependencies = [
- "bit-vec 0.8.0",
- "bstr",
- "byteorder",
- "indexmap",
- "noodles-bgzf 0.35.0",
- "noodles-core 0.16.0",
-]
-
 [[package]]
 name = "noodles-csi"
 version = "0.53.0"
@@ -2627,15 +2598,14 @@ dependencies = [
 
 [[package]]
 name = "noodles-fasta"
-version = "0.48.0"
+version = "0.58.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ce052e3ec1623aac185890eac7c6c76bca100876dce3507a58fbb29957ba1af2"
+checksum = "3e929a025d64f44c5e063cd8fc370fd002eb771942a458205d4fd6836fb22fe6"
 dependencies = [
  "bstr",
- "bytes",
  "memchr",
- "noodles-bgzf 0.35.0",
- "noodles-core 0.16.0",
+ "noodles-bgzf 0.45.0",
+ "noodles-core 0.18.0",
 ]
 
 [[package]]
@@ -2653,14 +2623,16 @@ dependencies = [
 
 [[package]]
 name = "noodles-gff"
-version = "0.43.0"
+version = "0.54.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d968405cf400568b24a47587b6e916d0cf118d7d38625c29472f491461f094e"
+checksum = "6b803bc5dac749a8d7c16a337255f27a0a755c9bde28cd362fe47643e6b5e557"
 dependencies = [
+ "bstr",
  "indexmap",
- "noodles-bgzf 0.35.0",
- "noodles-core 0.16.0",
- "noodles-csi 0.43.0",
+ "lexical-core",
+ "noodles-bgzf 0.45.0",
+ "noodles-core 0.18.0",
+ "noodles-csi 0.53.0",
  "percent-encoding",
 ]
 
@@ -2939,21 +2911,20 @@ dependencies = [
  "dashmap",
  "directories 6.0.0",
  "dirs",
- "duct",
  "env_logger",
  "flatbuffers",
  "glob",
- "hashbrown 0.15.5",
+ "hashbrown 0.16.1",
  "hex",
  "hostname",
  "indicatif 0.17.11",
  "itertools 0.14.0",
  "lazy_static",
  "log",
- "noodles-core 0.16.0",
+ "noodles-core 0.18.0",
  "noodles-csi 0.53.0",
- "noodles-fasta 0.48.0",
- "noodles-gff 0.43.0",
+ "noodles-fasta 0.58.0",
+ "noodles-gff 0.54.0",
  "num-format",
  "ordered-float",
  "pandora_lib_assembler",
@@ -2964,13 +2935,12 @@ dependencies = [
  "rayon",
  "regex",
  "rusqlite",
- "rust-htslib 0.50.0",
+ "rust-htslib 0.51.0",
  "rustc-hash 2.1.1",
  "semver 1.0.27",
  "serde",
  "serde_json",
  "tar",
- "tempfile",
  "thiserror 2.0.17",
  "toml 0.9.10+spec-1.1.0",
  "tracing",
@@ -3732,9 +3702,9 @@ dependencies = [
 
 [[package]]
 name = "rust-htslib"
-version = "0.50.0"
+version = "0.51.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f7ec28c79f6b9ba90c45b33a519724708558dcaf73a3e37179586b8da9890630"
+checksum = "354416dd2300ff9e7aff8ddc747c875d6c5086f83c2cb2599f3692421c2b77fd"
 dependencies = [
  "bio-types",
  "byteorder",
@@ -4430,19 +4400,6 @@ dependencies = [
  "xattr",
 ]
 
-[[package]]
-name = "tempfile"
-version = "3.23.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
-dependencies = [
- "fastrand",
- "getrandom 0.3.4",
- "once_cell",
- "rustix 1.1.2",
- "windows-sys 0.61.2",
-]
-
 [[package]]
 name = "term"
 version = "0.7.0"

+ 5 - 7
Cargo.toml

@@ -19,24 +19,22 @@ tracing = "0.1.40"
 noodles-csi = "0.53.0"
 num-format = "0.4.4"
 byte-unit = "5.1.4"
-duct = "0.13.7"
 uuid = { version = "1.13.1", features = ["v4"] }
 rayon = "1.10.0"
-hashbrown = { version = "0.15.0", features = ["rayon"] }
+hashbrown = { version = "0.16.1", features = ["rayon"] }
 lazy_static = "1.5.0"
 indicatif = "0.17.8"
-rust-htslib = "0.50.0"
+rust-htslib = "0.51.0"
 arrow = { git = "https://github.com/apache/arrow-rs" }
 # arrow = "54.2.1"
 bgzip = "0.3.1"
-tempfile = "3.14.0"
 dashmap = { version = "6.1.0", features = ["rayon", "serde"] }
-noodles-fasta = "0.48.0"
-noodles-core = "0.16.0"
+noodles-fasta = "0.58.0"
+noodles-core = "0.18.0"
 blake3 = "1.5.5"
 rusqlite = { version = "0.33.0", features = ["chrono", "serde_json"] }
 dirs = "6.0.0"
-noodles-gff = "0.43.0"
+noodles-gff = "0.54.0"
 itertools = "0.14.0"
 rand = "0.9.0"
 tar = "0.4.43"

+ 2 - 2
pandora-config.example.toml

@@ -42,7 +42,7 @@ pseudoautosomal_regions_bed = "/home/t_steimle/ref/hs1/chm13v2.0_PAR.bed"
 dict_file = "/home/t_steimle/ref/hs1/chm13v2.0.dict"
 
 # RefSeq GFF3 annotation (sorted/indexed).
-refseq_gff = "/data/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_sorted.gff3.gz"
+refseq_gff = "/home/t_steimle/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_sorted.gff3.gz"
 
 # Template for mask BED file (low-quality / filtered regions).
 # {result_dir} -> global result directory
@@ -333,7 +333,7 @@ longphase_threads = 20
 
 # Threads for longphase modcall step.
 # limit memory usage here
-longphase_modcall_threads = 6
+longphase_modcall_threads = 4
 
 # Force longphase recomputation (haplotagging/phasing).
 longphase_force = false

+ 10 - 5
src/annotation/ncbi.rs

@@ -1,4 +1,5 @@
 use anyhow::{Context, Ok, Result};
+use noodles_gff::feature::{record_buf::attributes::field::Value, RecordBuf};
 use semver::Version;
 use serde::{Deserialize, Serialize};
 use std::str::FromStr;
@@ -25,7 +26,7 @@ pub struct NCBIGFF {
     pub regulatory_class: Option<String>,
 }
 
-impl From<noodles_gff::RecordBuf> for NCBIGFF {
+impl From<RecordBuf> for NCBIGFF {
     /// Converts a noodles_gff::RecordBuf into an NCBIGFF struct.
     ///
     /// This implementation extracts relevant information from a GFF record
@@ -40,13 +41,17 @@ impl From<noodles_gff::RecordBuf> for NCBIGFF {
     /// # Note
     /// This conversion handles both string and array attributes, joining array
     /// values with a space if necessary.
-    fn from(r: noodles_gff::RecordBuf) -> Self {
+    fn from(r: RecordBuf) -> Self {
         let attr = r.attributes();
 
         let inner_string = |name: &str| {
-            attr.get(name).map(|e| match e {
-                noodles_gff::record_buf::attributes::field::Value::String(s) => s.to_string(),
-                noodles_gff::record_buf::attributes::field::Value::Array(v) => v.join(" "),
+            attr.get(name.as_bytes()).map(|e| match e {
+                Value::String(s) => s.to_string(),
+                Value::Array(v) => v
+                    .iter()
+                    .map(|e| e.to_string())
+                    .collect::<Vec<String>>()
+                    .join(" "),
             })
         };
 

+ 1 - 1
src/annotation/vep.rs

@@ -704,7 +704,7 @@ impl SbatchRunner for VepJob {
             job_name: Some("VEP".into()),
             partition: Some("shortq".into()),
             cpus_per_task: Some(10),
-            mem: Some("30G".into()),
+            mem: Some("10G".into()),
             gres: None,
         }
     }

+ 1 - 32
src/io/dict.rs

@@ -1,36 +1,5 @@
-use anyhow::{Context, Ok, Result};
-use csv::ReaderBuilder;
+use anyhow::{Context, Ok};
 use log::debug;
-use std::fs;
-
-// pub fn read_dict(path: &str) -> Result<Vec<(String, u32)>> {
-//     debug!("Parsing {path}.");
-//
-//     let mut reader = ReaderBuilder::new()
-//         .delimiter(b'\t')
-//         .flexible(true)
-//         .has_headers(false)
-//         .from_reader(fs::File::open(path)?);
-//
-//     let mut res = Vec::new();
-//     for line in reader.records() {
-//         let line = line.context("Can't parse dict file.")?;
-//         if line.get(0).context("Can't parse dict file.")? == "@SQ" {
-//             res.push((
-//                 line.get(1)
-//                     .map(|s| s.replace("SN:", ""))
-//                     .context("Can't parse dict file.")?
-//                     .parse()?,
-//                 line.get(2)
-//                     .map(|s| s.replace("LN:", ""))
-//                     .context("Can't parse dict file.")?
-//                     .parse()?,
-//             ));
-//         }
-//     }
-//
-//     Ok(res)
-// }
 
 pub fn read_dict(path: &str) -> anyhow::Result<Vec<(String, u32)>> {
     debug!("Parsing {path}.");

+ 6 - 6
src/io/fasta.rs

@@ -1,8 +1,10 @@
 use std::fs::File;
 
+use noodles_fasta::io::IndexedReader;
+
 // 0-based position in input
 pub fn sequence_at(
-    fasta_reader: &mut noodles_fasta::IndexedReader<noodles_fasta::io::BufReader<File>>,
+    fasta_reader: &mut IndexedReader<noodles_fasta::io::BufReader<File>>,
     contig: &str,
     position: usize,
     len: usize,
@@ -18,26 +20,24 @@ pub fn sequence_at(
     let end = noodles_core::Position::try_from(end)?;
     let interval = noodles_core::region::interval::Interval::from(start..=end);
 
-    let r = noodles_core::Region::new(contig.to_string(), interval);
+    let r = noodles_core::region::Region::new(contig.to_string(), interval);
     let record = fasta_reader.query(&r)?;
     let s = String::from_utf8(record.sequence().as_ref().to_vec())?.to_uppercase();
 
     Ok(s)
 }
 
-
 pub fn sequence_range(
-    fasta_reader: &mut noodles_fasta::IndexedReader<noodles_fasta::io::BufReader<File>>,
+    fasta_reader: &mut IndexedReader<noodles_fasta::io::BufReader<File>>,
     contig: &str,
     start: usize,
     end: usize,
 ) -> anyhow::Result<String> {
-
     let start = noodles_core::Position::try_from(start + 1)?;
     let end = noodles_core::Position::try_from(end + 1)?;
     let interval = noodles_core::region::interval::Interval::from(start..=end);
 
-    let r = noodles_core::Region::new(contig.to_string(), interval);
+    let r = noodles_core::region::Region::new(contig.to_string(), interval);
     let record = fasta_reader.query(&r)?;
     let s = String::from_utf8(record.sequence().as_ref().to_vec())?.to_uppercase();
 

+ 1 - 1
src/io/gff.rs

@@ -32,7 +32,7 @@ pub fn features_ranges(feature_type: &str, config: &Config) -> anyhow::Result<Ve
         }
 
         res.push(GenomeRange::from_1_inclusive(
-            record.reference_sequence_name(),
+            &record.reference_sequence_name().to_string(),
             record.start().get() as u32,
             record.end().get() as u32,
         ));

+ 1 - 1
src/lib.rs

@@ -921,7 +921,7 @@ mod tests {
         let position = 16761;
 
         let mut fasta_reader =
-            noodles_fasta::indexed_reader::Builder::default().build_from_path(c.reference)?;
+            noodles_fasta::io::indexed_reader::Builder::default().build_from_path(c.reference)?;
         let r = io::fasta::sequence_at(&mut fasta_reader, chr, position, 3)?;
         println!(
             "{r} ({} {:.2})",

+ 0 - 32
src/locker.rs

@@ -193,35 +193,3 @@ impl Drop for SampleLock {
         }
     }
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use tempfile::TempDir;
-
-    #[test]
-    fn test_lock_acquire_release() -> anyhow::Result<()> {
-        let tmp = TempDir::new()?;
-        let lock_dir = tmp.path().to_str().unwrap();
-
-        {
-            let lock = SampleLock::acquire(lock_dir, "sample1", "clairs")?;
-            assert!(lock.path.exists());
-
-            // Second acquire should fail
-            let result = SampleLock::acquire(lock_dir, "sample1", "clairs");
-            assert!(result.is_err());
-
-            // Different sample should succeed
-            let _lock2 = SampleLock::acquire(lock_dir, "sample2", "clairs")?;
-
-            // Different pipeline should succeed
-            let _lock3 = SampleLock::acquire(lock_dir, "sample1", "deepvariant")?;
-        }
-
-        // After drop, should be able to acquire again
-        let _lock = SampleLock::acquire(lock_dir, "sample1", "clairs")?;
-
-        Ok(())
-    }
-}

+ 2 - 2
src/variant/variant_collection.rs

@@ -281,7 +281,7 @@ impl VariantCollection {
         self.variants
             .par_chunks(self.chunk_size(max_threads))
             .for_each(|chunk| {
-                let mut fasta_reader = noodles_fasta::indexed_reader::Builder::default()
+                let mut fasta_reader = noodles_fasta::io::indexed_reader::Builder::default()
                     .build_from_path(reference)
                     .unwrap();
 
@@ -439,7 +439,7 @@ impl VariantCollection {
 
                 let c = crate::config::Config::default();
 
-                let mut fasta_reader = noodles_fasta::indexed_reader::Builder::default()
+                let mut fasta_reader = noodles_fasta::io::indexed_reader::Builder::default()
                     .build_from_path(c.reference)?;
 
                 for var in chunk {