|
|
@@ -62,10 +62,7 @@ use std::{
|
|
|
use anyhow::{bail, Context};
|
|
|
use chrono::{DateTime, Utc};
|
|
|
use log::{info, warn};
|
|
|
-use rayon::{
|
|
|
- iter::{IntoParallelRefIterator, ParallelIterator},
|
|
|
- ThreadPoolBuilder,
|
|
|
-};
|
|
|
+use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
|
|
|
use rust_htslib::bam::{self, Read};
|
|
|
use rustc_hash::FxHashSet;
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
@@ -212,7 +209,7 @@ impl PromRun {
|
|
|
/// let run = PromRun::from_dir("/data/runs/my_run", &config)?;
|
|
|
/// println!("Imported {} BAM files", run.bams.len());
|
|
|
/// ```
|
|
|
- pub fn from_dir(dir: impl AsRef<Path>, config: &Config) -> anyhow::Result<Self> {
|
|
|
+ pub fn from_dir(dir: impl AsRef<Path>) -> anyhow::Result<Self> {
|
|
|
let dir = dir.as_ref().to_path_buf();
|
|
|
if !dir.is_dir() {
|
|
|
anyhow::bail!(
|
|
|
@@ -255,39 +252,29 @@ impl PromRun {
|
|
|
.and_then(|path| File::open(path).ok())
|
|
|
.and_then(|mut reader| parse_pore_activity_from_reader(&mut reader).ok());
|
|
|
|
|
|
- // Build thread pool for parallel parsing
|
|
|
- let pool = ThreadPoolBuilder::new()
|
|
|
- .num_threads(config.threads.into())
|
|
|
- .build()
|
|
|
- .context("Failed to build Rayon thread pool")?;
|
|
|
-
|
|
|
// Parse BAM files in parallel
|
|
|
- let bams: Vec<PromBam> = pool.install(|| {
|
|
|
- bam_paths
|
|
|
- .par_iter()
|
|
|
- .filter_map(|p| match PromBam::from_path(p) {
|
|
|
- Ok(bam) => Some(bam),
|
|
|
- Err(e) => {
|
|
|
- log::warn!("Failed to parse BAM {}: {}", p.display(), e);
|
|
|
- None
|
|
|
- }
|
|
|
- })
|
|
|
- .collect()
|
|
|
- });
|
|
|
+ let bams: Vec<PromBam> = bam_paths
|
|
|
+ .par_iter()
|
|
|
+ .filter_map(|p| match PromBam::from_path(p) {
|
|
|
+ Ok(bam) => Some(bam),
|
|
|
+ Err(e) => {
|
|
|
+ log::warn!("Failed to parse BAM {}: {}", p.display(), e);
|
|
|
+ None
|
|
|
+ }
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
|
|
|
// Parse POD5 files in parallel
|
|
|
- let pod5s: Vec<Pod5> = pool.install(|| {
|
|
|
- pod5_paths
|
|
|
- .par_iter()
|
|
|
- .filter_map(|p| match Pod5::from_path(p) {
|
|
|
- Ok(pod5) => Some(pod5),
|
|
|
- Err(e) => {
|
|
|
- log::warn!("Failed to parse POD5 {}: {}", p.display(), e);
|
|
|
- None
|
|
|
- }
|
|
|
- })
|
|
|
- .collect()
|
|
|
- });
|
|
|
+ let pod5s: Vec<Pod5> = pod5_paths
|
|
|
+ .par_iter()
|
|
|
+ .filter_map(|p| match Pod5::from_path(p) {
|
|
|
+ Ok(pod5) => Some(pod5),
|
|
|
+ Err(e) => {
|
|
|
+ log::warn!("Failed to parse POD5 {}: {}", p.display(), e);
|
|
|
+ None
|
|
|
+ }
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
|
|
|
let prom_run = Self {
|
|
|
dir,
|
|
|
@@ -506,7 +493,7 @@ impl PromRun {
|
|
|
return Ok(());
|
|
|
}
|
|
|
|
|
|
- let pass_bams = filter_pass_bams(&candidate_bams);
|
|
|
+ let pass_bams = filter_pass_bams(&candidate_bams, kit_type);
|
|
|
|
|
|
if pass_bams.is_empty() {
|
|
|
bail!("No BAM files found in bam_pass directories");
|
|
|
@@ -1231,26 +1218,16 @@ impl fmt::Display for PromBam {
|
|
|
}
|
|
|
|
|
|
/// Filters BAMs to only include those from bam_pass directories.
|
|
|
-fn filter_pass_bams<'a>(bams: &[&'a PromBam]) -> Vec<&'a PromBam> {
|
|
|
+fn filter_pass_bams<'a>(bams: &[&'a PromBam], kit_type: KitType) -> Vec<&'a PromBam> {
|
|
|
bams.iter()
|
|
|
.filter(|bam| {
|
|
|
- let path_str = bam.path.to_string_lossy();
|
|
|
- let is_fail = path_str.contains("bam_fail");
|
|
|
-
|
|
|
- if is_fail {
|
|
|
- info!("Skipping failed read BAM: {}", bam.path.display());
|
|
|
- return false;
|
|
|
- }
|
|
|
+ let p = bam.path.to_string_lossy();
|
|
|
+ if p.contains("bam_fail") { return false; }
|
|
|
|
|
|
- let is_pass = path_str.contains("bam_pass");
|
|
|
- if !is_pass {
|
|
|
- warn!(
|
|
|
- "BAM path ambiguous (not in bam_pass or bam_fail), including: {}",
|
|
|
- bam.path.display()
|
|
|
- );
|
|
|
+ match kit_type {
|
|
|
+ KitType::Multiplexed => p.contains("bam_pass"),
|
|
|
+ KitType::NonMultiplexed => p.contains("/bam/") || p.contains("bam_pass"),
|
|
|
}
|
|
|
-
|
|
|
- true
|
|
|
})
|
|
|
.copied()
|
|
|
.collect()
|
|
|
@@ -1758,7 +1735,7 @@ mod tests {
|
|
|
let config = Config::default();
|
|
|
|
|
|
let dir = "/mnt/beegfs02/scratch/t_steimle/data/prom/20251121_001_01_CD/03/20251121_1531_P2I-00461-B_PBI56020_efa567ea";
|
|
|
- let prom_run = PromRun::from_dir(dir, &config)?;
|
|
|
+ let prom_run = PromRun::from_dir(dir)?;
|
|
|
|
|
|
let prom = PromRun::open(&prom_run.protocol_run_id, &config)?;
|
|
|
info!("{prom}");
|