Thomas 1 kuukausi sitten
vanhempi
commit
6f12252396
3 muutettua tiedostoa jossa 32 lisäystä ja 54 poistoa
  1. 1 0
      pandora-config.example.toml
  2. 30 53
      src/collection/prom_run.rs
  3. 1 1
      src/helpers.rs

+ 1 - 0
pandora-config.example.toml

@@ -52,6 +52,7 @@ mask_bed = "{result_dir}/{id}/diag/mask.bed"
 # Panels of interest: [ [name, bed_path], ... ]
 panels = [
   ["Cytoband", "/home/t_steimle/ref/hs1/chm13v2.0_cytobands_allchrs.bed"],
+	["PanelCM", "/home/t_steimle/ref/hs1/panel_cm_hs1.bed"],
 ]
 
 repeats_bed = "/home/t_steimle/ref/hs1/all_repeats_chm13_final.bed"

+ 30 - 53
src/collection/prom_run.rs

@@ -62,10 +62,7 @@ use std::{
 use anyhow::{bail, Context};
 use chrono::{DateTime, Utc};
 use log::{info, warn};
-use rayon::{
-    iter::{IntoParallelRefIterator, ParallelIterator},
-    ThreadPoolBuilder,
-};
+use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
 use rust_htslib::bam::{self, Read};
 use rustc_hash::FxHashSet;
 use serde::{Deserialize, Serialize};
@@ -212,7 +209,7 @@ impl PromRun {
     /// let run = PromRun::from_dir("/data/runs/my_run", &config)?;
     /// println!("Imported {} BAM files", run.bams.len());
     /// ```
-    pub fn from_dir(dir: impl AsRef<Path>, config: &Config) -> anyhow::Result<Self> {
+    pub fn from_dir(dir: impl AsRef<Path>) -> anyhow::Result<Self> {
         let dir = dir.as_ref().to_path_buf();
         if !dir.is_dir() {
             anyhow::bail!(
@@ -255,39 +252,29 @@ impl PromRun {
             .and_then(|path| File::open(path).ok())
             .and_then(|mut reader| parse_pore_activity_from_reader(&mut reader).ok());
 
-        // Build thread pool for parallel parsing
-        let pool = ThreadPoolBuilder::new()
-            .num_threads(config.threads.into())
-            .build()
-            .context("Failed to build Rayon thread pool")?;
-
         // Parse BAM files in parallel
-        let bams: Vec<PromBam> = pool.install(|| {
-            bam_paths
-                .par_iter()
-                .filter_map(|p| match PromBam::from_path(p) {
-                    Ok(bam) => Some(bam),
-                    Err(e) => {
-                        log::warn!("Failed to parse BAM {}: {}", p.display(), e);
-                        None
-                    }
-                })
-                .collect()
-        });
+        let bams: Vec<PromBam> = bam_paths
+            .par_iter()
+            .filter_map(|p| match PromBam::from_path(p) {
+                Ok(bam) => Some(bam),
+                Err(e) => {
+                    log::warn!("Failed to parse BAM {}: {}", p.display(), e);
+                    None
+                }
+            })
+            .collect();
 
         // Parse POD5 files in parallel
-        let pod5s: Vec<Pod5> = pool.install(|| {
-            pod5_paths
-                .par_iter()
-                .filter_map(|p| match Pod5::from_path(p) {
-                    Ok(pod5) => Some(pod5),
-                    Err(e) => {
-                        log::warn!("Failed to parse POD5 {}: {}", p.display(), e);
-                        None
-                    }
-                })
-                .collect()
-        });
+        let pod5s: Vec<Pod5> = pod5_paths
+            .par_iter()
+            .filter_map(|p| match Pod5::from_path(p) {
+                Ok(pod5) => Some(pod5),
+                Err(e) => {
+                    log::warn!("Failed to parse POD5 {}: {}", p.display(), e);
+                    None
+                }
+            })
+            .collect();
 
         let prom_run = Self {
             dir,
@@ -506,7 +493,7 @@ impl PromRun {
             return Ok(());
         }
 
-        let pass_bams = filter_pass_bams(&candidate_bams);
+        let pass_bams = filter_pass_bams(&candidate_bams, kit_type);
 
         if pass_bams.is_empty() {
             bail!("No BAM files found in bam_pass directories");
@@ -1231,26 +1218,16 @@ impl fmt::Display for PromBam {
 }
 
 /// Filters BAMs to only include those from bam_pass directories.
-fn filter_pass_bams<'a>(bams: &[&'a PromBam]) -> Vec<&'a PromBam> {
+fn filter_pass_bams<'a>(bams: &[&'a PromBam], kit_type: KitType) -> Vec<&'a PromBam> {
     bams.iter()
         .filter(|bam| {
-            let path_str = bam.path.to_string_lossy();
-            let is_fail = path_str.contains("bam_fail");
-
-            if is_fail {
-                info!("Skipping failed read BAM: {}", bam.path.display());
-                return false;
-            }
+            let p = bam.path.to_string_lossy();
+            if p.contains("bam_fail") { return false; }
 
-            let is_pass = path_str.contains("bam_pass");
-            if !is_pass {
-                warn!(
-                    "BAM path ambiguous (not in bam_pass or bam_fail), including: {}",
-                    bam.path.display()
-                );
+            match kit_type {
+                KitType::Multiplexed => p.contains("bam_pass"),
+                KitType::NonMultiplexed => p.contains("/bam/") || p.contains("bam_pass"),
             }
-
-            true
         })
         .copied()
         .collect()
@@ -1758,7 +1735,7 @@ mod tests {
         let config = Config::default();
 
         let dir = "/mnt/beegfs02/scratch/t_steimle/data/prom/20251121_001_01_CD/03/20251121_1531_P2I-00461-B_PBI56020_efa567ea";
-        let prom_run = PromRun::from_dir(dir, &config)?;
+        let prom_run = PromRun::from_dir(dir)?;
 
         let prom = PromRun::open(&prom_run.protocol_run_id, &config)?;
         info!("{prom}");

+ 1 - 1
src/helpers.rs

@@ -947,7 +947,7 @@ pub fn split_genome_into_n_regions(
         return Vec::new();
     }
 
-    let target_chunk_size: u64 = (total_bases + n_parts as u64 - 1) / n_parts as u64; // ceil
+    let target_chunk_size: u64 = total_bases.div_ceil(n_parts as u64); // ceil
 
     let mut regions = Vec::new();