Thomas před 1 měsícem
rodič
revize
fbb733ef00

+ 14 - 7
pandora-config.example.toml

@@ -51,9 +51,11 @@ mask_bed = "{result_dir}/{id}/diag/mask.bed"
 
 # Panels of interest: [ [name, bed_path], ... ]
 panels = [
-  ["Repeat",         "/home/t_steimle/ref/hs1/all_repeats_chm13_final.bed"],
+  ["Cytoband", "/home/t_steimle/ref/hs1/chm13v2.0_cytobands_allchrs.bed"],
 ]
 
+repeats_bed = "/home/t_steimle/ref/hs1/all_repeats_chm13_final.bed"
+
 #######################################
 # Sample naming / BAM handling
 #######################################
@@ -106,7 +108,6 @@ somatic_pipe_threads = 150
 # {result_dir}, {id}
 somatic_pipe_stats = "{result_dir}/{id}/diag/somatic_pipe_stats"
 
-
 #######################################
 # Filtering / QC thresholds
 #######################################
@@ -132,7 +133,6 @@ min_high_quality_depth = 14
 # Minimum number of callers required to keep a variant.
 min_n_callers = 1
 
-
 #######################################
 # DeepVariant configuration
 #######################################
@@ -153,7 +153,6 @@ deepvariant_model_type = "ONT_R104"
 # Force DeepVariant recomputation.
 deepvariant_force = false
 
-
 #######################################
 # DeepSomatic configuration
 #######################################
@@ -174,7 +173,6 @@ deepsomatic_model_type = "ONT"
 # Force DeepSomatic recomputation.
 deepsomatic_force = false
 
-
 #######################################
 # ClairS configuration
 #######################################
@@ -195,7 +193,6 @@ clairs_platform = "ont_r10_dorado_sup_5khz_ssrs"
 # {result_dir}, {id}
 clairs_output_dir = "{result_dir}/{id}/diag/ClairS"
 
-
 #######################################
 # Savana configuration
 #######################################
@@ -302,6 +299,17 @@ straglr_force = false
 
 marlin_bed = "/home/t_steimle/ref/hs1/marlin_v1.probes_t2t.bed"
 
+#######################################
+# Echtvar
+#######################################
+
+echtvar_bin = "/home/t_steimle/somatic_pipe_tools/echtvar"
+
+echtvar_sources = [
+	"/home/t_steimle/ref/hs1/gnomAD_4-2022_10-gnomad.echtvar.zip",
+	"/home/t_steimle/ref/hs1/CosmicCodingMuts.echtvar.zip"
+]
+
 #######################################
 # Bcftools configuration
 #######################################
@@ -348,7 +356,6 @@ modkit_summary_threads = 40
 # {result_dir}, {id}, {time}
 modkit_summary_file = "{result_dir}/{id}/{time}/{id}_{time}_5mC_5hmC_summary.txt"
 
-
 #######################################
 # Nanomonsv configuration
 #######################################

+ 5 - 1
src/annotation/cosmic.rs

@@ -27,7 +27,11 @@ impl FromStr for Cosmic {
     /// - The input must contain exactly three parts, separated by semicolons (`;`).
     /// - The third part must be of the form `CNT=<number>`, where `<number>` can be parsed as a `u64`.
     /// - If the first part contains the word `"MISSING"`, parsing will fail.
-    /// encode with echtvar: [{"field":"GENOME_SCREEN_SAMPLE_COUNT", "alias": "CNT"}]
+    ///
+    /// Generated with echtvar encode json:
+    /// ```json
+    /// [{"field":"GENOME_SCREEN_SAMPLE_COUNT", "alias": "CNT"}]
+    /// ```
     ///
     /// # Examples
     ///

+ 68 - 29
src/annotation/echtvar.rs

@@ -1,46 +1,85 @@
 use std::{
     io::{BufRead, BufReader},
+    path::{Path, PathBuf},
     process::{Command, Stdio},
 };
 
 use anyhow::{Context, Ok, Result};
 use log::warn;
+use uuid::Uuid;
+
+use crate::{
+    commands::{
+        CapturedOutput, Command as JobCommand, LocalBatchRunner, LocalRunner, SbatchRunner, SlurmParams, SlurmRunner
+    },
+    config::Config, run,
+};
 
 use super::{cosmic::Cosmic, gnomad::GnomAD};
 
-// /data/tools/echtvar anno -e /data/ref/hs1/CosmicCodingMuts.echtvar.zip -e /data/ref/hs1/gnomAD_4-2022_10-gnomad.echtvar.zip BENGUIRAT_diag_clairs_PASSED.vcf.gz test.bcf
-pub fn run_echtvar(in_path: &str, out_path: &str) -> Result<()> {
-    let bin_dir = "/data/tools";
-    // let _ = Command::new("tabix").arg(in_path).spawn()?.wait()?;
+pub struct EchtvarJob {
+    pub input_vcf: PathBuf,
+    pub output_vcf: PathBuf,
+    pub config: Config,
+}
 
-    let annot_sources: Vec<&str> = [
-        "/data/ref/hs1/CosmicCodingMuts.echtvar.zip",
-        "/data/ref/hs1/gnomAD_4-2022_10-gnomad.echtvar.zip",
-    ]
-    .iter()
-    .flat_map(|e| vec!["-e", e])
-    .collect();
+impl JobCommand for EchtvarJob {
+    fn cmd(&self) -> String {
+        let sources = self
+            .config
+            .echtvar_sources
+            .iter()
+            .map(|e| format!("-e {e}"))
+            .collect::<Vec<String>>()
+            .join(" ");
+        format!(
+            "{echtvar_bin} anno {sources} {input_vcf} {output_vcf}",
+            echtvar_bin = self.config.echtvar_bin,
+            sources = sources,
+            input_vcf = self.input_vcf.display(),
+            output_vcf = self.output_vcf.display()
+        )
+    }
+}
+impl LocalRunner for EchtvarJob {}
+impl LocalBatchRunner for EchtvarJob {}
 
-    // info!("Running echtvar anno for {}", in_path);
-    let mut cmd = Command::new(format!("{}/echtvar", bin_dir))
-        .arg("anno")
-        .args(annot_sources)
-        .arg(in_path)
-        .arg(out_path)
-        .stderr(Stdio::piped())
-        .spawn()
-        .context("echtvar anno failed to start")?;
+impl SlurmRunner for EchtvarJob {
+    fn slurm_args(&self) -> Vec<String> {
+        SlurmParams {
+            job_name: Some(format!("echtvar_{}", Uuid::new_v4())),
+            cpus_per_task: Some(1),
+            mem: Some("20G".into()),
+            partition: Some("shortq".into()),
+            gres: None,
+        }
+        .to_args()
+    }
+}
 
-    let stderr = cmd.stderr.take().unwrap();
-    let reader = BufReader::new(stderr);
-    reader
-        .lines()
-        .map_while(Result::ok)
-        .filter(|line| line.contains("error"))
-        .for_each(|line| warn!("{}", line));
+impl SbatchRunner for EchtvarJob {
+    fn slurm_params(&self) -> SlurmParams {
+        SlurmParams {
+            job_name: Some(format!("echtvar_{}", Uuid::new_v4())),
+            cpus_per_task: Some(1),
+            mem: Some("20G".into()),
+            partition: Some("shortq".into()),
+            gres: None,
+        }
+    }
+}
 
-    cmd.wait()?;
-    Ok(())
+pub fn run_echtvar(
+    in_path: impl AsRef<Path>,
+    output_vcf: impl AsRef<Path>,
+    config: &Config,
+) -> Result<CapturedOutput> {
+    let mut job = EchtvarJob {
+        input_vcf: in_path.as_ref().into(),
+        output_vcf: output_vcf.as_ref().into(),
+        config: config.clone(),
+    };
+    run!(config, &mut job)
 }
 
 pub fn parse_echtvar_val(s: &str) -> Result<(Option<Cosmic>, Option<GnomAD>)> {

+ 1 - 2
src/annotation/gnomad.rs

@@ -3,7 +3,7 @@ use serde::{Deserialize, Serialize};
 use std::str::FromStr;
 
 /// Generated with echtvar json:
-/// ```
+/// ```json
 ///[
 ///  { "field": "AC",     "alias": "gnomad_ac" },
 ///  { "field": "AN",     "alias": "gnomad_an" },
@@ -19,7 +19,6 @@ use std::str::FromStr;
 ///  { "field": "AF_asj", "alias": "gnomad_af_asj", "multiplier": 2000000 },
 ///  { "field": "AF_nfe", "alias": "gnomad_af_nfe", "multiplier": 2000000 }
 ///]
-///
 /// ```
 
 #[derive(Debug, PartialEq, Serialize, Deserialize, Clone, Encode, Decode)]

+ 3 - 3
src/annotation/mod.rs

@@ -90,7 +90,7 @@ pub enum Annotation {
     VNTR,
 
     /// RepeatMasker
-    RepeatMasker,
+    Repeat,
 }
 
 /// Denotes the biological sample type associated with a variant call.
@@ -217,7 +217,7 @@ impl fmt::Display for Annotation {
             VEP(_) => "VEP".into(),
             CpG => "CpG".into(),
             VNTR => "VNTR".into(),
-            RepeatMasker => "VNTR".into(),
+            Repeat => "VNTR".into(),
             TriNucleotides(bases) => format!(
                 "Trinucleotides({})",
                 bases.iter().map(|b| b.to_string()).collect::<String>(),
@@ -368,7 +368,7 @@ impl Annotations {
             for ann in anns.iter() {
                 match ann {
                     LowConstitDepth | LowEntropy | GnomAD(_) | VEP(_) | TriNucleotides(_)
-                    | ReplicationTiming(_) | HighDepth | CpG | VNTR | RepeatMasker | Panel(_)
+                    | ReplicationTiming(_) | HighDepth | CpG | VNTR | Repeat | Panel(_)
                     | LowMAPQ | HighConstitAlt => categorical.push(ann.to_string()),
                     Callers(caller, sample) => categorical.push(format!("{caller} {sample}")),
                     ShannonEntropy(v) => numerical.push((ann.to_string(), *v)),

+ 120 - 105
src/collection/prom_run.rs

@@ -80,9 +80,7 @@ use crate::{
     commands::{
         dorado::DoradoAlign,
         modkit::ModkitSummary,
-        run_many_sbatch,
         samtools::{SamtoolsIndex, SamtoolsMergeMany, SamtoolsSort},
-        SlurmRunner,
     },
     config::Config,
     helpers::{get_genome_sizes, list_files_recursive, remove_bam_with_index, TempFileGuard},
@@ -312,50 +310,51 @@ impl PromRun {
         Ok(prom_run)
     }
 
-    /// Validates that all provided BAMs are unaligned.
+    /// Partitions BAMs into unaligned and already-aligned groups.
     ///
-    /// Returns an error if any BAM contains reference sequences in its header.
-    fn validate_bams_unaligned(
+    /// Returns (unaligned, already_aligned) BAM lists.
+    fn partition_bams_by_alignment<'a>(
         &self,
-        pass_bams: &[&PromBam],
-        pool: &rayon::ThreadPool,
-    ) -> anyhow::Result<()> {
-        let aligned_bams: Vec<PathBuf> = pool.install(|| {
-            pass_bams
-                .par_iter()
-                .filter_map(|bam| match bam::Reader::from_path(&bam.path) {
+        pass_bams: &[&'a PromBam],
+    ) -> (Vec<&'a PromBam>, Vec<&'a PromBam>) {
+        let results: Vec<_> = pass_bams
+            .par_iter()
+            .map(|&bam| {
+                let is_aligned = match bam::Reader::from_path(&bam.path) {
                     Ok(reader) => {
                         let header = bam::Header::from_template(reader.header());
                         match get_genome_sizes(&header) {
-                            Ok(sizes) if !sizes.is_empty() => Some(bam.path.clone()),
-                            Ok(_) => None,
+                            Ok(sizes) => !sizes.is_empty(),
                             Err(_) => {
                                 warn!(
                                     "Failed to parse header for {}, assuming unaligned",
                                     bam.path.display()
                                 );
-                                None
+                                false
                             }
                         }
                     }
                     Err(e) => {
                         warn!("Failed to open BAM {}: {}", bam.path.display(), e);
-                        None
+                        false
                     }
-                })
-                .collect()
-        });
+                };
+                (bam, is_aligned)
+            })
+            .collect();
 
-        if !aligned_bams.is_empty() {
-            bail!(
-                "Found {} BAM file(s) that appear to be already aligned. \
-                 This method only processes unaligned BAMs. First aligned BAM: {}",
-                aligned_bams.len(),
-                aligned_bams[0].display()
-            );
+        let mut unaligned = Vec::new();
+        let mut aligned = Vec::new();
+
+        for (bam, is_aligned) in results {
+            if is_aligned {
+                aligned.push(bam);
+            } else {
+                unaligned.push(bam);
+            }
         }
 
-        Ok(())
+        (unaligned, aligned)
     }
 
     /// Maps BAM files to cases based on kit type (multiplexed vs non-multiplexed).
@@ -528,68 +527,100 @@ impl PromRun {
 
         info!("Filtered to {} BAM files from bam_pass", pass_bams.len());
 
-        let pool = ThreadPoolBuilder::new()
-            .num_threads(config.threads.into())
-            .build()
-            .context("Failed to build thread pool")?;
+        info!("Step 1/5: Validating BAMs are unaligned");
+        let (unaligned_bams, aligned_bams) = self.partition_bams_by_alignment(&pass_bams);
 
-        info!("Step 1/6: Validating BAMs are unaligned");
-        self.validate_bams_unaligned(&pass_bams, &pool)?;
+        if !aligned_bams.is_empty() {
+            info!(
+                "Found {} already-aligned BAMs — will skip alignment for these",
+                aligned_bams.len()
+            );
+        }
+
+        if unaligned_bams.is_empty() && aligned_bams.is_empty() {
+            bail!("No valid BAM files to process");
+        }
 
         metrics.validation_duration = validation_start.elapsed();
         metrics.bams_processed = pass_bams.len();
         metrics.bytes_input = pass_bams.iter().map(|b| b.bam_size).sum();
 
-        info!("✅ All {} BAM files are unaligned", pass_bams.len());
+        info!(
+            "✅ {} unaligned, {} already aligned",
+            unaligned_bams.len(),
+            aligned_bams.len()
+        );
 
         // =====================================================================
         // Step 3: Map BAMs to cases
         // =====================================================================
 
-        info!("Step 2/6: Mapping BAM files to cases");
-        let (bam_to_case, unmatched) = self.map_bams_to_cases(&pass_bams, kit_type)?;
+        info!("Step 2/5: Mapping BAM files to cases");
+        // Map unaligned BAMs (need alignment)
+        let (unaligned_bam_to_case, unmatched_unaligned) =
+            self.map_bams_to_cases(&unaligned_bams, kit_type)?;
 
-        if bam_to_case.is_empty() {
-            bail!("No BAMs were successfully mapped to cases");
-        }
+        // Map aligned BAMs (skip alignment, go straight to sort/merge)
+        let (aligned_bam_to_case, unmatched_aligned) =
+            self.map_bams_to_cases(&aligned_bams, kit_type)?;
+
+        let total_mapped = unaligned_bam_to_case.len() + aligned_bam_to_case.len();
+        let total_unmatched = unmatched_unaligned.len() + unmatched_aligned.len();
 
-        if !unmatched.is_empty() {
-            warn!("{} BAMs could not be matched to cases", unmatched.len());
+        if total_mapped == 0 {
+            bail!("No BAMs were successfully mapped to cases");
         }
 
         info!(
             "✅ Mapped {} BAMs to cases ({} unmatched)",
-            bam_to_case.len(),
-            unmatched.len()
+            total_mapped, total_unmatched
         );
 
         // =====================================================================
         // Step 4: Prepare and run alignment
         // =====================================================================
 
-        info!("Step 3/6: Preparing alignment jobs");
         let align_start = Timer::start();
-        let (align_jobs, case_bam_map) = prepare_alignment_jobs(&bam_to_case, &tmp_dir, config)?;
+        let mut case_bam_map: HashMap<String, Vec<PathBuf>> = HashMap::new();
 
-        // Track temp files for cleanup
-        for bams in case_bam_map.values() {
-            temp_guard.track_many(bams.clone());
+        // Add already-aligned BAMs directly to the map (no alignment needed)
+        for (bam_path, case) in &aligned_bam_to_case {
+            let sample_type_dir = map_sample_type_to_dir(&case.sample_type, config);
+            let key = format!("{}_{}", case.case_id, sample_type_dir);
+            case_bam_map.entry(key).or_default().push(bam_path.clone());
         }
 
-        info!("Step 4/6: Running {} alignment jobs", align_jobs.len());
-        run_many!(config, align_jobs).context("Alignment batch failed")?;
+        if !unaligned_bam_to_case.is_empty() {
+            let (align_jobs, aligned_case_map) =
+                prepare_alignment_jobs(&unaligned_bam_to_case, &tmp_dir, config)?;
 
-        self.verify_outputs_exist(&case_bam_map, "alignment")?;
+            // Track temp files for cleanup
+            for bams in aligned_case_map.values() {
+                temp_guard.track_many(bams.clone());
+            }
 
-        metrics.alignment_duration = align_start.elapsed();
+            info!("Step 3/5: Running {} alignment jobs", align_jobs.len());
+            run_many!(config, align_jobs).context("Alignment batch failed")?;
+
+            self.verify_outputs_exist(&aligned_case_map, "alignment")?;
+
+            // Merge newly aligned BAMs into the case map
+            for (key, bams) in aligned_case_map {
+                case_bam_map.entry(key).or_default().extend(bams);
+            }
 
-        info!("✅ Alignments completed");
+            info!("✅ Alignments completed");
+        } else {
+            info!("Step 3/5: Skipping alignment — all BAMs already aligned");
+        }
+
+        metrics.alignment_duration = align_start.elapsed();
 
         // =====================================================================
         // Step 5: Sort and index chunks
         // =====================================================================
 
-        info!("Step 5/6: Sorting and indexing chunks");
+        info!("Step 4/5: Sorting and indexing chunks");
         let sort_start = Timer::start();
         let sorted_bam_map = sort_and_index_chunks(&case_bam_map, config)?;
 
@@ -607,7 +638,7 @@ impl PromRun {
         // Step 6: Merge and finalize per case
         // =====================================================================
 
-        info!("Step 6/6: Finalizing per-case BAMs");
+        info!("Step 5/5: Finalizing per-case BAMs");
         let finalize_start = Timer::start();
 
         let finalized = finalize_case_bams(&self.cases, &sorted_bam_map, &tmp_dir, config)?;
@@ -1175,8 +1206,7 @@ fn merge_chunk_bams(
 
     let mut merge_cmd =
         SamtoolsMergeMany::from_config(merged_path.clone(), chunk_bams.to_vec(), config);
-
-    SlurmRunner::exec(&mut merge_cmd).with_context(|| {
+    run!(config, &mut merge_cmd).with_context(|| {
         format!(
             "Failed to merge {} chunk BAMs for case {}",
             chunk_bams.len(),
@@ -1229,7 +1259,9 @@ fn atomic_replace(source: &Path, destination: &Path) -> anyhow::Result<()> {
         }
 
         // Remove backup and its orphaned index
-        remove_bam_with_index(&backup).ok();
+        if let Err(e) = remove_bam_with_index(&backup) {
+            log::warn!("Failed to clean up backup file {}: {}", backup.display(), e);
+        }
     } else {
         fs::rename(source, destination).with_context(|| {
             format!(
@@ -1244,12 +1276,9 @@ fn atomic_replace(source: &Path, destination: &Path) -> anyhow::Result<()> {
 }
 /// Indexes a BAM file using samtools.
 fn index_bam(bam: &Path, config: &Config) -> anyhow::Result<()> {
-    let mut index_cmd = SamtoolsIndex::from_config(config, &bam.to_string_lossy().to_string());
+    let mut index_cmd = SamtoolsIndex::from_config(config, bam.to_string_lossy().as_ref());
     run!(config, &mut index_cmd)?;
 
-    // SlurmRunner::run(&mut index_cmd)?;
-    // LocalRunner::run(&mut index_cmd)?;
-
     let index_path = PathBuf::from(format!("{}.bai", bam.display()));
     if !index_path.exists() {
         bail!("Index file not created: {}", index_path.display());
@@ -1294,7 +1323,13 @@ fn merge_into_existing_final(
     index_bam(destination, config)
         .with_context(|| format!("Failed to index final BAM: {}", destination.display()))?;
 
-    remove_bam_with_index(source).ok();
+    if let Err(e) = remove_bam_with_index(source) {
+        log::warn!(
+            "Failed to clean up merged source {}: {}",
+            source.display(),
+            e
+        );
+    }
 
     Ok(())
 }
@@ -1369,7 +1404,7 @@ fn sort_and_index_chunks(
     }
 
     info!("Submitting {} sort jobs", sort_jobs.len());
-    run_many_sbatch(sort_jobs).context("Sort batch failed")?;
+    run_many!(config, sort_jobs).context("Sort batch failed")?;
 
     // Verify sorted BAM exists.
     let missing: Vec<_> = original_to_sorted
@@ -1388,11 +1423,11 @@ fn sort_and_index_chunks(
     // Index every sorted BAM.
     let index_jobs: Vec<SamtoolsIndex> = original_to_sorted
         .values()
-        .map(|sorted| SamtoolsIndex::from_config(config, &sorted.to_string_lossy().as_ref()))
+        .map(|sorted| SamtoolsIndex::from_config(config, sorted.to_string_lossy().as_ref()))
         .collect();
 
     info!("Submitting {} index jobs", index_jobs.len());
-    run_many_sbatch(index_jobs).context("Index batch failed")?;
+    run_many!(config, index_jobs).context("Sort batch failed")?;
 
     // Verify index exists for each BAM.
     let missing_indices: Vec<_> = original_to_sorted
@@ -1411,7 +1446,13 @@ fn sort_and_index_chunks(
 
     // Remove input BAMs.
     for original in original_to_sorted.keys() {
-        fs::remove_file(original).ok();
+        if let Err(e) = fs::remove_file(original) {
+            log::warn!(
+                "Failed to remove unsorted BAM {}: {}",
+                original.display(),
+                e
+            );
+        }
     }
 
     // Construct the new map(case_id) -> BAMs
@@ -1474,7 +1515,9 @@ fn finalize_case_bams(
 
         for chunk in chunk_bams {
             if chunk.exists() {
-                remove_bam_with_index(chunk).ok();
+                if let Err(e) = remove_bam_with_index(chunk) {
+                    log::warn!("Failed to clean up temp chunk {}: {}", chunk.display(), e);
+                }
             }
         }
 
@@ -1515,6 +1558,14 @@ fn normalize_barcode(barcode: &str) -> String {
     stripped.to_string()
 }
 
+lazy_static! {
+    static ref BARCODE_PATTERNS: [regex::Regex; 3] = [
+        regex::Regex::new(r"(?i)barcode(\d+)").unwrap(),
+        regex::Regex::new(r"(?i)nb(\d+)").unwrap(),
+        regex::Regex::new(r"(?i)bc(\d+)").unwrap(),
+    ];
+}
+
 /// Extract barcode number from various filename patterns.
 ///
 /// Recognizes:
@@ -1522,14 +1573,7 @@ fn normalize_barcode(barcode: &str) -> String {
 /// - `*_NB01_*`
 /// - `barcode01/`
 fn extract_and_normalize_barcode(text: &str) -> Option<String> {
-    // Pattern: barcode followed by digits
-    let patterns = [
-        regex::Regex::new(r"(?i)barcode(\d+)").ok()?,
-        regex::Regex::new(r"(?i)nb(\d+)").ok()?,
-        regex::Regex::new(r"(?i)bc(\d+)").ok()?,
-    ];
-
-    for pattern in &patterns {
+    for pattern in BARCODE_PATTERNS.iter() {
         if let Some(caps) = pattern.captures(text) {
             if let Some(num_str) = caps.get(1) {
                 if let Ok(num) = num_str.as_str().parse::<u32>() {
@@ -1616,35 +1660,6 @@ mod tests {
 
     use crate::helpers::test_init;
 
-    #[test]
-    fn prom_run_bam() -> anyhow::Result<()> {
-        test_init();
-
-        let bam_file = "/mnt/beegfs02/scratch/t_steimle/test_data/inputs/test_run_A/bam_pass/barcode02/PBI55810_pass_barcode02_22582b29_d02c5bb8_0.bam";
-        let bam = PromBam::from_path(bam_file)?;
-        info!("{bam}");
-
-        let bam_file = "/home/t_steimle/mnt/prom/20251121_001_01_CD/01/20251121_1531_P2I-00461-A_PBI52256_b1dd5673/bam_pass/PBI52256_pass_b1dd5673_414982db_0.bam";
-        let bam = PromBam::from_path(bam_file)?;
-        info!("{bam}");
-        Ok(())
-    }
-
-    #[test]
-    fn prom_run_import() -> anyhow::Result<()> {
-        test_init();
-        let config = Config::default();
-
-        let dir = "/home/t_steimle/beegfs02/prom_runs/20251107_OL_001_A-B/A/20251117_0915_P2I-00461-A_PBI55810_22582b29";
-        let prom_run = PromRun::from_dir(dir, &config)?;
-        info!("{prom_run}");
-
-        let dir = "/mnt/beegfs02/scratch/t_steimle/data/prom/20251121_001_01_CD/01/20251121_1531_P2I-00461-A_PBI52256_b1dd5673";
-        let prom_run = PromRun::from_dir(dir, &config)?;
-        info!("{prom_run}");
-        Ok(())
-    }
-
     #[test]
     fn prom_run_process() -> anyhow::Result<()> {
         test_init();

+ 9 - 10
src/commands/mod.rs

@@ -158,7 +158,6 @@ pub trait LocalRunner: Command {
 
 use anyhow::Context;
 use log::info;
-use rayon::iter::{IntoParallelIterator, ParallelIterator};
 use serde::{Deserialize, Serialize};
 use uuid::Uuid;
 
@@ -959,21 +958,21 @@ pub trait LocalBatchRunner: Command {
     }
 }
 
-pub fn run_many_local_batch<T>(jobs: Vec<T>, threads: usize) -> anyhow::Result<Vec<CapturedOutput>>
+pub fn run_many_local_batch<T>(jobs: Vec<T>) -> anyhow::Result<Vec<CapturedOutput>>
 where
     T: LocalBatchRunner + Send,
 {
     // Set thread pool size based on max_concurrent
-    let pool = rayon::ThreadPoolBuilder::new()
-        .num_threads(threads)
-        .build()
-        .context("failed to build thread pool")?;
+    // let pool = rayon::ThreadPoolBuilder::new()
+    //     .num_threads(threads)
+    //     .build()
+    //     .context("failed to build thread pool")?;
 
-    pool.install(|| {
-        jobs.into_par_iter()
+    // pool.install(|| {
+        jobs.into_iter()
             .map(|mut job| LocalBatchRunner::run(&mut job))
             .collect()
-    })
+    // })
 }
 
 /// Macro to run multiple batch commands in parallel, either via SLURM or locally.
@@ -989,7 +988,7 @@ macro_rules! run_many {
         if $cfg.slurm_runner {
             $crate::commands::run_many_sbatch($jobs)
         } else {
-            $crate::commands::run_many_local_batch($jobs, $cfg.threads.into())
+            $crate::commands::run_many_local_batch($jobs)
         }
     }}
 }

+ 1 - 0
src/commands/samtools.rs

@@ -43,6 +43,7 @@ impl super::Command for SamtoolsIndex {
 }
 
 impl super::LocalRunner for SamtoolsIndex {}
+impl super::LocalBatchRunner for SamtoolsIndex {}
 
 impl super::SlurmRunner for SamtoolsIndex {
     fn slurm_args(&self) -> Vec<String> {

+ 8 - 0
src/config.rs

@@ -73,6 +73,9 @@ pub struct Config {
     /// Panels of interest (name, BED path).
     pub panels: Vec<(String, String)>,
 
+    /// Repeats bed file
+    pub repeats_bed: String,
+
     // === Sample naming conventions ===
     /// Label used for the tumor sample in directory and file names (e.g. "diag").
     pub tumoral_name: String,
@@ -240,6 +243,11 @@ pub struct Config {
     // === MARLIN ===
     pub marlin_bed: String,
 
+    // === Echtvar ===
+    pub echtvar_bin: String,
+
+    pub echtvar_sources: Vec<String>,
+
     // === Bcftools configuration ===
     /// Path to Bcftools binary.
     pub bcftools_bin: String,

+ 6 - 6
src/pipes/somatic.rs

@@ -429,7 +429,7 @@ impl Run for SomaticPipe {
         variants_collections
             .iter()
             .try_for_each(|c| -> anyhow::Result<()> {
-                let ext_annot = ExternalAnnotation::init()?;
+                let ext_annot = ExternalAnnotation::init(&config)?;
                 ext_annot.annotate(&c.variants, &annotations)?;
                 Ok(())
             })?;
@@ -495,7 +495,7 @@ impl Run for SomaticPipe {
         variants_collections
             .iter()
             .try_for_each(|c| -> anyhow::Result<()> {
-                let ext_annot = ExternalAnnotation::init()?;
+                let ext_annot = ExternalAnnotation::init(&config)?;
                 ext_annot.annotate_vep(&c.variants, &annotations)?;
                 Ok(())
             })?;
@@ -526,20 +526,20 @@ impl Run for SomaticPipe {
         // Ensure sorted (should already be sorted)
         variants.sort();
 
-        let vntrs: Vec<GenomeRange> = read_bed("/data/ref/hs1/vntrs_chm13.bed")?
+        let vntrs: Vec<GenomeRange> = read_bed(&config.vntrs_bed)?
             .iter()
             .map(|r| r.range.clone())
             .collect();
         variants.annotate_with_ranges(&vntrs, Some(Annotation::VNTR), 0, Vec::new());
 
         let repeat_masker: Vec<GenomeRange> =
-            read_bed("/data/ref/hs1/chm13v2.0_RepeatMasker_4.1.2p1.2022Apr14.bed")?
+            read_bed(&config.repeats_bed)?
                 .iter()
                 .map(|r| r.range.clone())
                 .collect();
         variants.annotate_with_ranges(
             &repeat_masker,
-            Some(Annotation::RepeatMasker),
+            Some(Annotation::Repeat),
             0,
             Vec::new(),
         );
@@ -558,7 +558,7 @@ pub fn const_stats(id: String, config: Config) -> anyhow::Result<()> {
     let clairs_germline = ClairS::initialize(&id, &config.clone())?.germline(&annotations)?;
 
     info!("Annotation with Cosmic and GnomAD.");
-    let ext_annot = ExternalAnnotation::init()?;
+    let ext_annot = ExternalAnnotation::init(&config)?;
     ext_annot.annotate(&clairs_germline.variants, &annotations)?;
 
     let mut variants = Variants::default();

+ 11 - 4
src/variant/variant_collection.rs

@@ -2,7 +2,7 @@ use std::{
     collections::{HashMap, HashSet},
     fs::{self, File},
     io::{Read, Write},
-    path::Path,
+    path::Path, sync::Arc,
 };
 
 use anyhow::Context;
@@ -31,6 +31,7 @@ use crate::{
         bam::{counts_at, counts_ins_at},
         vcf::Vcf,
     },
+    config::Config,
     helpers::{
         app_storage_dir, detect_repetition, estimate_shannon_entropy, mean, temp_file_path,
         Hash128, Repeat,
@@ -1432,10 +1433,11 @@ use rusqlite::{params, Connection, Result as SqliteResult};
 
 pub struct ExternalAnnotation {
     pub conn: Connection,
+    pub config: Config,
 }
 
 impl ExternalAnnotation {
-    pub fn init() -> anyhow::Result<Self> {
+    pub fn init(config: &Config) -> anyhow::Result<Self> {
         let mut db_path = app_storage_dir()?;
         db_path.push("annotations_db.sqlite");
         info!("Opening DB: {}", db_path.display());
@@ -1452,7 +1454,10 @@ impl ExternalAnnotation {
             [],
         )?;
 
-        Ok(Self { conn })
+        Ok(Self {
+            conn,
+            config: config.clone(),
+        })
     }
 
     pub fn annotate(
@@ -1541,6 +1546,8 @@ impl ExternalAnnotation {
         let optimal_chunk_size = unfound.len().div_ceil(max_chunks as usize);
         let optimal_chunk_size = optimal_chunk_size.max(min_chunk_size);
 
+        let config = Arc::new(&self.config);
+
         let results = unfound
             .par_chunks(optimal_chunk_size)
             .flat_map(|chunk| -> anyhow::Result<Vec<_>> {
@@ -1563,7 +1570,7 @@ impl ExternalAnnotation {
                 }
 
                 // Run echtvar
-                run_echtvar(in_tmp.to_str().unwrap(), out_tmp.to_str().unwrap())?;
+                run_echtvar(&in_tmp, &out_tmp, &config)?;
                 fs::remove_file(in_tmp)?;
 
                 // Parse echtvar output