Browse Source

callers staglr

Thomas 4 days ago
parent
commit
cab2f12c20

+ 33 - 1
src/callers/clairs.rs

@@ -821,7 +821,39 @@ fn merge_clairs_germline_parts(base: &ClairS, n_parts: usize) -> anyhow::Result<
 
 /// Runs ClairS in parallel chunks, then merges results.
 ///
-/// Execution mode (local vs Slurm) is determined by `config.slurm_runner`.
+/// Splits the genome into N equal-sized regions, runs ClairS on each region
+/// in parallel (local or Slurm based on `config.slurm_runner`), post-processes
+/// each part (concatenates SNV+indel, filters PASS), and merges both somatic
+/// and germline VCFs.
+///
+/// # Arguments
+///
+/// * `id` - Sample identifier
+/// * `config` - Global pipeline configuration
+/// * `n_parts` - Number of parallel chunks (typically 20-30 for whole-genome)
+///
+/// # Returns
+///
+/// `Ok(())` on success, or an error if any step fails.
+///
+/// # Errors
+///
+/// Returns an error if:
+/// - `n_parts` is 0
+/// - Normal BAM file cannot be opened or is corrupted
+/// - BAM header is malformed
+/// - ClairS execution fails on any part
+/// - SNV+indel concatenation fails
+/// - PASS filtering fails
+/// - Somatic or germline VCF merging fails
+/// - Output directory cannot be created
+///
+/// # Example
+///
+/// ```ignore
+/// let config = Config::default();
+/// run_clairs_chunked("sample_001", &config, 30)?;
+/// ```
 pub fn run_clairs_chunked(id: &str, config: &Config, n_parts: usize) -> anyhow::Result<()> {
     anyhow::ensure!(n_parts > 0, "n_parts must be > 0");
 

+ 31 - 1
src/callers/deep_somatic.rs

@@ -541,7 +541,37 @@ fn merge_deepsomatic_parts(base: &DeepSomatic, n_parts: usize) -> anyhow::Result
 
 /// Runs DeepSomatic in parallel chunks, then merges results.
 ///
-/// Execution mode (local vs Slurm) is determined by `config.slurm_runner`.
+/// Splits the genome into N equal-sized regions, runs DeepSomatic on each region
+/// in parallel (local or Slurm based on `config.slurm_runner`), filters PASS variants,
+/// and concatenates the final VCF.
+///
+/// # Arguments
+///
+/// * `id` - Sample identifier
+/// * `config` - Global pipeline configuration
+/// * `n_parts` - Number of parallel chunks (typically 20-30 for whole-genome)
+///
+/// # Returns
+///
+/// `Ok(())` on success, or an error if any step fails.
+///
+/// # Errors
+///
+/// Returns an error if:
+/// - `n_parts` is 0
+/// - Tumor or normal BAM file cannot be opened
+/// - BAM header is malformed
+/// - DeepSomatic execution fails on any part
+/// - PASS filtering fails
+/// - VCF merging fails
+/// - Output directory cannot be created
+///
+/// # Example
+///
+/// ```ignore
+/// let config = Config::default();
+/// run_deepsomatic_chunked("sample_001", &config, 30)?;
+/// ```
 pub fn run_deepsomatic_chunked(id: &str, config: &Config, n_parts: usize) -> anyhow::Result<()> {
     anyhow::ensure!(n_parts > 0, "n_parts must be > 0");
 

+ 34 - 1
src/callers/deep_variant.rs

@@ -739,7 +739,40 @@ fn merge_deepvariant_parts(base: &DeepVariant, n_parts: usize) -> anyhow::Result
 
 /// Runs DeepVariant in parallel chunks, then merges results.
 ///
-/// Execution mode (local vs Slurm) is determined by `config.slurm_runner`.
+/// Splits the genome into N equal-sized regions, runs DeepVariant on each region
+/// in parallel (local or Slurm based on `config.slurm_runner`), filters PASS variants,
+/// and concatenates the final VCF. Karyotype-aware calling ensures X/Y chromosomes
+/// are handled correctly based on sample sex.
+///
+/// # Arguments
+///
+/// * `id` - Sample identifier
+/// * `time_point` - Time point label ("norm" or "diag")
+/// * `config` - Global pipeline configuration
+/// * `n_parts` - Number of parallel chunks (typically 20-30 for whole-genome)
+///
+/// # Returns
+///
+/// `Ok(())` on success, or an error if any step fails.
+///
+/// # Errors
+///
+/// Returns an error if:
+/// - `n_parts` is 0
+/// - `time_point` is invalid (not matching `config.normal_name` or `config.tumoral_name`)
+/// - BAM file cannot be opened or is corrupted
+/// - BAM header is malformed
+/// - Karyotype detection fails
+/// - DeepVariant execution fails on any part
+/// - PASS filtering fails
+/// - VCF merging fails
+///
+/// # Example
+///
+/// ```ignore
+/// let config = Config::default();
+/// run_deepvariant_chunked("sample_001", "norm", &config, 30)?;
+/// ```
 pub fn run_deepvariant_chunked(
     id: &str,
     time_point: &str,

+ 205 - 14
src/callers/mod.rs

@@ -1,13 +1,132 @@
-//! Variant caller integrations wired to the shared runner pattern (local/Slurm via `run!`).
-//! - ClairS — <https://github.com/HKU-BAL/ClairS>
-//! - DeepVariant — <https://github.com/google/deepvariant>
-//! - DeepSomatic — <https://github.com/google/deepsomatic>
-//! - Savana — <https://github.com/cortes-ciriano-lab/savana>
-//! - Severus — <https://github.com/genome-nexus/severus> (structural variants)
-//! - NanomonSV — <https://github.com/friend1ws/nanomonsv>
+//! # Variant Caller Integrations
+//!
+//! This module provides wrappers for multiple variant callers optimized for long-read
+//! sequencing data (ONT and PacBio). All callers are integrated with the shared runner
+//! pattern, allowing seamless execution in local or Slurm HPC environments via the `run!` macro.
+//!
+//! ## Overview
+//!
+//! The module includes seven production-grade variant callers, each specialized for different
+//! variant types and use cases:
+//!
+//! ### Small Variant Callers
+//!
+//! - **[ClairS]** - Deep learning-based somatic SNV/indel caller (paired tumor-normal)
+//!   - Haplotype-aware calling with LongPhase integration
+//!   - Dual output: somatic + germline variants
+//!   - Best for: Somatic SNV/indel detection in cancer samples
+//!   - [GitHub](https://github.com/HKU-BAL/ClairS)
+//!
+//! - **[DeepVariant]** - Deep learning-based germline variant caller (single-sample)
+//!   - Karyotype-aware for accurate X/Y chromosome calling
+//!   - Platform-agnostic models (ONT, PacBio, Illumina)
+//!   - Best for: Germline SNV/indel detection
+//!   - [GitHub](https://github.com/google/deepvariant)
+//!
+//! - **[DeepSomatic]** - Deep learning-based somatic variant caller (paired tumor-normal)
+//!   - Derived from DeepVariant architecture
+//!   - Optimized for somatic mutation detection
+//!   - Best for: Somatic SNV/indel detection
+//!   - [GitHub](https://github.com/google/deepsomatic)
+//!
+//! ### Structural Variant Callers
+//!
+//! - **[NanomonSV]** - Structural variant caller for paired and solo modes
+//!   - Detects deletions, insertions, duplications, inversions, translocations
+//!   - Supports tumor-normal paired analysis
+//!   - Best for: General SV detection in cancer samples
+//!   - [GitHub](https://github.com/friend1ws/nanomonsv)
+//!
+//! - **[Savana]** - Haplotype-aware SV and CNV caller (paired tumor-normal)
+//!   - Integrated copy number variation analysis
+//!   - Allele-specific CNV detection
+//!   - Requires phased germline variants and haplotagged BAMs
+//!   - Best for: Combined SV + CNV analysis with haplotype information
+//!   - [GitHub](https://github.com/cortes-ciriano-lab/savana)
+//!
+//! - **[Severus]** - VNTR and structural variant caller (paired and solo modes)
+//!   - Specialized in VNTR (Variable Number Tandem Repeat) detection
+//!   - High-precision breakpoint resolution
+//!   - Resolves complex overlapping SVs
+//!   - Best for: VNTR analysis and complex SV detection
+//!   - [GitHub](https://github.com/KolmogorovLab/Severus)
+//!
+//! ### STR Genotypers
+//!
+//! - **[Straglr]** - Short Tandem Repeat (STR) genotyper (paired and solo modes)
+//!   - Detects pathogenic repeat expansions in known disease loci
+//!   - Supports custom loci via BED file
+//!   - Provides allele-level genotyping with read support
+//!   - Best for: STR expansion detection in neurological and muscular diseases
+//!   - [GitHub](https://github.com/bcgsc/straglr)
+//!
+//! ## Execution Modes
+//!
+//! All callers support:
+//! - **Local execution** - Direct command execution for debugging/testing
+//! - **Slurm execution** - HPC job submission via `srun` or `sbatch`
+//! - **Chunked parallel execution** - Genome splitting for whole-genome analysis
+//!
+//! Execution mode is automatically selected based on `config.slurm_runner`.
+//!
+//! ## Typical Workflow
+//!
+//! 1. **Initialize** - Create caller instance with `Initialize::initialize()` or `InitializeSolo::initialize()`
+//! 2. **Check freshness** - Use `ShouldRun::should_run()` to avoid redundant work
+//! 3. **Execute** - Run caller with `Run::run()`
+//! 4. **Load variants** - Extract results with `Variants::variants()`
+//!
+//! ## Convenience Function
+//!
+//! The [`run_somatic_callers()`] function executes all somatic callers sequentially
+//! for a complete multi-caller analysis pipeline.
+//!
+//! ## Usage Examples
+//!
+//! ### Individual Caller
+//!
+//! ```ignore
+//! use pandora_lib_promethion::callers::clairs::ClairS;
+//! use pandora_lib_promethion::config::Config;
+//! use pandora_lib_promethion::pipes::Initialize;
+//! use pandora_lib_promethion::runners::Run;
+//!
+//! let config = Config::default();
+//! let mut clairs = ClairS::initialize("sample_001", &config)?;
+//!
+//! if clairs.should_run() {
+//!     clairs.run()?;
+//! }
+//!
+//! let variants = clairs.variants(&annotations)?;
+//! # Ok::<(), anyhow::Error>(())
+//! ```
+//!
+//! ### Complete Multi-Caller Pipeline
+//!
+//! ```ignore
+//! use pandora_lib_promethion::callers::run_somatic_callers;
+//! use pandora_lib_promethion::config::Config;
+//!
+//! let config = Config::default();
+//! run_somatic_callers("sample_001", &config)?;
+//! # Ok::<(), anyhow::Error>(())
+//! ```
+//!
+//! ## References
+//!
+//! Each caller module contains detailed documentation including:
+//! - Variant types detected
+//! - Requirements and dependencies
+//! - Output file formats and locations
+//! - Usage examples
+//! - Scientific publications
 
 use crate::{
-    callers::{clairs::ClairS, deep_somatic::DeepSomatic, deep_variant::DeepVariant, nanomonsv::NanomonSV, savana::Savana, severus::Severus},
+    callers::{
+        clairs::ClairS, deep_somatic::DeepSomatic, deep_variant::DeepVariant, nanomonsv::NanomonSV,
+        savana::Savana, severus::Severus,
+    },
     config::Config,
     pipes::{Initialize, InitializeSolo},
     runners::Run,
@@ -19,21 +138,93 @@ pub mod deep_variant;
 pub mod nanomonsv;
 pub mod savana;
 pub mod severus;
+pub mod straglr;
 
+/// Runs all somatic variant callers sequentially for comprehensive multi-caller analysis.
+///
+/// Executes the following callers in order:
+/// 1. **DeepVariant** (normal sample) - Germline SNV/indels
+/// 2. **DeepVariant** (tumor sample) - Germline SNV/indels
+/// 3. **ClairS** - Somatic SNV/indels (paired)
+/// 4. **Severus** - Somatic SVs and VNTRs (paired)
+/// 5. **Savana** - Somatic SVs and CNVs (paired, haplotype-aware)
+/// 6. **NanomonSV** - Somatic SVs (paired)
+/// 7. **DeepSomatic** - Somatic SNV/indels (paired)
+///
+/// Each caller automatically:
+/// - Checks if it needs to run based on output freshness
+/// - Skips execution if outputs are up-to-date
+/// - Handles prerequisite steps (e.g., phasing, haplotagging)
+/// - Filters results to PASS-only variants
+///
+/// # Arguments
+///
+/// * `id` - Sample identifier
+/// * `config` - Global pipeline configuration
+///
+/// # Returns
+///
+/// `Ok(())` if all callers complete successfully, or an error from the first failed caller.
+///
+/// # Errors
+///
+/// Returns an error if any caller fails. Common failure modes:
+/// - Missing or corrupted BAM files
+/// - Missing reference genome or annotation files
+/// - Insufficient disk space for outputs
+/// - Singularity/Docker image not found
+/// - Slurm job submission failures (if `config.slurm_runner = true`)
+/// - Individual caller-specific errors (see each caller's documentation)
+///
+/// # Performance Notes
+///
+/// This function runs callers **sequentially**, not in parallel. For parallel execution,
+/// invoke callers individually using separate processes or jobs.
+///
+/// Typical runtime for whole-genome sequencing (30x coverage):
+/// - DeepVariant: 2-4 hours (per sample, chunked)
+/// - ClairS: 4-6 hours (chunked)
+/// - Severus: 1-2 hours
+/// - Savana: 2-3 hours
+/// - NanomonSV: 1-2 hours
+/// - DeepSomatic: 3-5 hours (chunked)
+///
+/// Total: ~15-25 hours sequential execution
+///
+/// # Example
+///
+/// ```ignore
+/// use pandora_lib_promethion::callers::run_somatic_callers;
+/// use pandora_lib_promethion::config::Config;
+///
+/// let config = Config::default();
+/// run_somatic_callers("sample_001", &config)?;
+///
+/// println!("All somatic callers completed successfully!");
+/// # Ok::<(), anyhow::Error>(())
+/// ```
 pub fn run_somatic_callers(id: &str, config: &Config) -> anyhow::Result<()> {
-    // DeepVariant
+    // DeepVariant - germline variants for normal sample
     DeepVariant::initialize(id, &config.normal_name, config)?.run()?;
+
+    // DeepVariant - germline variants for tumor sample
     DeepVariant::initialize(id, &config.tumoral_name, config)?.run()?;
-    // ClairS
+
+    // ClairS - somatic SNV/indels with haplotype awareness
     ClairS::initialize(id, config)?.run()?;
-    // Severus
+
+    // Severus - structural variants and VNTRs
     Severus::initialize(id, config)?.run()?;
-    // Savana
+
+    // Savana - haplotype-aware SVs and CNVs
     Savana::initialize(id, config)?.run()?;
-    // Savana
+
+    // NanomonSV - structural variants (paired analysis)
     NanomonSV::initialize(id, config)?.run()?;
-    // DeepSomatic
+
+    // DeepSomatic - somatic SNV/indels
     DeepSomatic::initialize(id, config)?.run()?;
+
     Ok(())
 }
 

+ 22 - 4
src/callers/nanomonsv.rs

@@ -157,6 +157,12 @@ impl Initialize for NanomonSV {
     ///
     /// # Returns
     /// A fully prepared `NanomonSV` instance ready to run.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - `config.nanomonsv_force` is true and output directories cannot be removed
+    /// - Directory deletion fails due to permissions or I/O errors
     fn initialize(id: &str, config: &Config) -> anyhow::Result<Self> {
         let id = id.to_string();
         info!("Initialize Nanomonsv for {id}.");
@@ -214,10 +220,22 @@ impl Run for NanomonSV {
     /// 4. Applies `bcftools` to filter the final result to PASS variants
     ///
     /// This function is idempotent and skips steps if the expected output already exists.
-    /// Runs the full NanomonSV pipeline including:
-    /// 1. Parsing diagnostic and MRD BAMs in parallel.
-    /// 2. Running NanomonSV `get` for both samples.
-    /// 3. Filtering final VCF to retain PASS variants only.
+    ///
+    /// # Returns
+    ///
+    /// `Ok(())` on success, or an error if any pipeline step fails.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - NanomonSV is already up-to-date (`should_run()` returns false)
+    /// - Output directories cannot be created
+    /// - Tumor or normal BAM files are missing or corrupted
+    /// - NanomonSV `parse` step fails for either sample
+    /// - NanomonSV `get` step fails
+    /// - PASS filtering via bcftools fails
+    /// - Log files cannot be written
+    /// - Reference genome or control panel files are missing
     fn run(&mut self) -> anyhow::Result<()> {
         if !self.should_run() {
             anyhow::bail!("NanomonSV is up-to-data.");

+ 6 - 0
src/callers/savana.rs

@@ -141,6 +141,12 @@ impl Initialize for Savana {
     /// # Returns
     ///
     /// A new `Savana` instance, or an error if cleanup fails.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - `config.savana_force` is true and output directory cannot be removed
+    /// - Directory deletion fails due to permissions or I/O errors
     fn initialize(id: &str, config: &Config) -> anyhow::Result<Self> {
         info!("Initialize Savana for {id}.");
         let log_dir = format!("{}/{}/log/savana", config.result_dir, id);

+ 18 - 0
src/callers/severus.rs

@@ -134,6 +134,12 @@ impl Initialize for Severus {
     /// # Returns
     ///
     /// A `Severus` instance wrapped in `Ok`, or an error if setup fails
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - `config.severus_force` is true and output directory cannot be removed
+    /// - Directory deletion fails due to permissions or I/O errors
     fn initialize(id: &str, config: &Config) -> anyhow::Result<Self> {
         info!("Initialize Severus for {id}.");
 
@@ -180,6 +186,18 @@ impl Run for Severus {
     /// # Returns
     ///
     /// `Ok(())` if everything runs successfully; otherwise, an error with context.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - Severus is already up-to-date (`should_run()` returns false)
+    /// - Phased germline VCF is missing and LongPhase fails to generate it
+    /// - Output directory cannot be created
+    /// - Tumor or normal BAM files are missing or corrupted
+    /// - VNTR BED file is missing or malformed
+    /// - Severus execution fails
+    /// - PASS filtering via bcftools fails
+    /// - Log files cannot be written
     fn run(&mut self) -> anyhow::Result<()> {
         if !self.should_run() {
             anyhow::bail!("Severus is up-to-date");

+ 867 - 0
src/callers/straglr.rs

@@ -0,0 +1,867 @@
+//! # Straglr Short Tandem Repeat Genotyper
+//!
+//! This module provides wrappers for [Straglr](https://github.com/bcgsc/straglr),
+//! a genotyper for short tandem repeats (STRs) optimized for long-read sequencing data.
+//!
+//! ## Overview
+//!
+//! Straglr detects and genotypes STR expansions from long-read data, including:
+//! - Pathogenic repeat expansions (Huntington's disease, SCAs, FXS, etc.)
+//! - Genome-wide STR profiling
+//! - De novo repeat expansion detection
+//! - Support for both known and novel STR loci
+//!
+//! ## Key Features
+//!
+//! - **Pathogenic repeat detection** - Identifies disease-causing STR expansions
+//! - **Long-read optimized** - Leverages full-length reads spanning repeat regions
+//! - **Locus annotation** - Uses BED file of known pathogenic loci
+//! - **VCF output** - Optional variant-style output for downstream analysis
+//! - **Solo and paired modes** - Single-sample or tumor-normal analysis
+//!
+//! ## Requirements
+//!
+//! Before running Straglr, ensure:
+//! - BAM file is indexed (`.bai` file present)
+//! - Reference genome is accessible
+//! - STR loci BED file is configured (`config.straglr_loci_bed`)
+//! - Python environment with Straglr is available
+//!
+//! ## Output Files
+//!
+//! Paired mode TSV output:
+//! ```text
+//! {result_dir}/{id}/straglr/{id}_straglr.tsv
+//! ```
+//!
+//! Solo mode TSV output:
+//! ```text
+//! {result_dir}/{id}/straglr_solo/{time_point}/{id}_{time_point}_straglr.tsv
+//! ```
+//!
+//! ## Usage
+//!
+//! ### Paired (Tumor-Normal) Mode
+//!
+//! ```ignore
+//! use pandora_lib_promethion::callers::straglr::Straglr;
+//! use pandora_lib_promethion::config::Config;
+//! use pandora_lib_promethion::pipes::Initialize;
+//! use pandora_lib_promethion::runners::Run;
+//!
+//! let config = Config::default();
+//! let mut caller = Straglr::initialize("sample_001", &config)?;
+//!
+//! if caller.should_run() {
+//!     caller.run()?;
+//! }
+//! # Ok::<(), anyhow::Error>(())
+//! ```
+//!
+//! ### Solo Mode
+//!
+//! ```ignore
+//! use pandora_lib_promethion::callers::straglr::StraglrSolo;
+//! use pandora_lib_promethion::pipes::InitializeSolo;
+//!
+//! let config = Config::default();
+//! let mut caller = StraglrSolo::initialize("sample_001", "norm", &config)?;
+//! caller.run()?;
+//! # Ok::<(), anyhow::Error>(())
+//! ```
+//!
+//! ### Chunked Parallel Mode (Genome-Wide)
+//!
+//! For whole-genome STR genotyping, use the chunked execution mode to parallelize:
+//!
+//! ```ignore
+//! use pandora_lib_promethion::callers::straglr::run_straglr_chunked;
+//! use pandora_lib_promethion::config::Config;
+//!
+//! let config = Config::default();
+//!
+//! // Run genome-wide genotyping with 20 parallel jobs
+//! run_straglr_chunked("sample_001", "norm", &config, 20)?;
+//! # Ok::<(), anyhow::Error>(())
+//! ```
+//!
+//! ### Loading and Analyzing Results
+//!
+//! ```ignore
+//! use pandora_lib_promethion::callers::straglr::Straglr;
+//! use pandora_lib_promethion::pipes::Initialize;
+//!
+//! let config = Config::default();
+//! let caller = Straglr::initialize("sample_001", &config)?;
+//!
+//! // Load results from both samples
+//! let (normal, tumor) = caller.load_results()?;
+//!
+//! // Find pathogenic expansions (e.g., >40 repeats)
+//! for str_locus in &normal {
+//!     if str_locus.is_expanded(40) {
+//!         println!("Expanded repeat at {}: {} copies ({})",
+//!                  str_locus.location_string(),
+//!                  str_locus.max_copy_number().unwrap(),
+//!                  str_locus.repeat_unit);
+//!     }
+//! }
+//!
+//! // Find somatic STR changes
+//! let changes = caller.find_somatic_changes(2)?;
+//! for (location, normal, tumor, diff) in changes {
+//!     println!("{}: Normal={:?}, Tumor={:?}, Diff={}",
+//!              location, normal.copy_numbers, tumor.copy_numbers, diff);
+//! }
+//! # Ok::<(), anyhow::Error>(())
+//! ```
+//!
+//! ## References
+//!
+//! - [Straglr GitHub](https://github.com/bcgsc/straglr)
+//! - [Straglr Paper](https://doi.org/10.1186/s13059-021-02447-3)
+use crate::{
+    commands::{Command as JobCommand, LocalBatchRunner, LocalRunner, SbatchRunner, SlurmParams, SlurmRunner},
+    config::Config,
+    helpers::{is_file_older, remove_dir_if_exists},
+    io::straglr::{read_straglr_tsv, StraglrRow},
+    pipes::{Initialize, InitializeSolo, ShouldRun, Version},
+    run, run_many,
+    runners::Run,
+};
+use anyhow::Context;
+use log::{debug, info};
+use std::{
+    fs::{self, File},
+    io::{BufRead, BufReader, Write},
+    path::Path,
+};
+
+/// Straglr paired (tumor-normal) STR genotyper.
+///
+/// Executes Straglr for STR genotyping on both tumor and normal samples,
+/// enabling detection of somatic STR expansions or contractions.
+///
+/// # Fields
+///
+/// - `id` - Sample identifier (e.g., "34528")
+/// - `config` - Global pipeline configuration
+/// - `log_dir` - Directory for execution logs (e.g., "{result_dir}/{id}/log/straglr")
+#[derive(Debug)]
+pub struct Straglr {
+    /// Sample identifier
+    pub id: String,
+    /// Global pipeline configuration
+    pub config: Config,
+    /// Directory for log file storage
+    pub log_dir: String,
+}
+
+impl Initialize for Straglr {
+    /// Initializes a new Straglr instance for a given sample ID and configuration.
+    ///
+    /// Creates the output log directory path and optionally cleans up previous output files
+    /// if `straglr_force` is set.
+    ///
+    /// # Arguments
+    ///
+    /// * `id` - The sample ID
+    /// * `config` - The execution configuration
+    ///
+    /// # Returns
+    ///
+    /// A `Straglr` instance wrapped in `Ok`, or an error if setup fails
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - `config.straglr_force` is true and output directory cannot be removed
+    /// - Directory deletion fails due to permissions or I/O errors
+    fn initialize(id: &str, config: &Config) -> anyhow::Result<Self> {
+        info!("Initialize Straglr for {id}.");
+
+        let log_dir = format!("{}/{}/log/straglr", config.result_dir, id);
+        let straglr = Self {
+            id: id.to_string(),
+            config: config.clone(),
+            log_dir,
+        };
+
+        if straglr.config.straglr_force {
+            remove_dir_if_exists(&straglr.config.straglr_output_dir(id))?;
+        }
+
+        Ok(straglr)
+    }
+}
+
+impl ShouldRun for Straglr {
+    /// Determines whether Straglr should re-run based on whether the output TSV
+    /// is older than either the tumor or normal BAM file.
+    ///
+    /// # Returns
+    ///
+    /// `true` if Straglr needs to be re-run, otherwise `false`
+    fn should_run(&self) -> bool {
+        let normal_tsv = &self.config.straglr_normal_tsv(&self.id);
+        let tumor_tsv = &self.config.straglr_tumor_tsv(&self.id);
+
+        let result = is_file_older(normal_tsv, &self.config.normal_bam(&self.id), true)
+            .unwrap_or(true)
+            || is_file_older(normal_tsv, &self.config.tumoral_bam(&self.id), true).unwrap_or(true)
+            || is_file_older(tumor_tsv, &self.config.normal_bam(&self.id), true).unwrap_or(true)
+            || is_file_older(tumor_tsv, &self.config.tumoral_bam(&self.id), true).unwrap_or(true);
+
+        if result {
+            info!("Straglr should run for: {}.", self.id);
+        }
+        result
+    }
+}
+
+impl Run for Straglr {
+    /// Runs the Straglr STR genotyper on both normal and tumor BAM files.
+    ///
+    /// Executes Straglr separately for normal and tumor samples, producing
+    /// TSV files with STR genotypes for each.
+    ///
+    /// # Returns
+    ///
+    /// `Ok(())` if everything runs successfully; otherwise, an error with context.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - Straglr is already up-to-date (`should_run()` returns false)
+    /// - Output directory cannot be created
+    /// - Tumor or normal BAM files are missing or corrupted
+    /// - Reference genome is missing
+    /// - STR loci BED file is missing or malformed
+    /// - Straglr execution fails for either sample
+    /// - Log files cannot be written
+    fn run(&mut self) -> anyhow::Result<()> {
+        if !self.should_run() {
+            anyhow::bail!("Straglr is up-to-date");
+        }
+
+        info!("Running Straglr v{}", Straglr::version(&self.config)?);
+
+        let id = &self.id;
+        let output_dir = self.config.straglr_output_dir(id);
+        fs::create_dir_all(&output_dir).context("Failed to create Straglr output directory")?;
+
+        // Run on normal sample
+        let normal_tsv = self.config.straglr_normal_tsv(id);
+        if !Path::new(&normal_tsv).exists() {
+            info!("Running Straglr on normal sample: {}", id);
+            let mut job = StraglrJob {
+                conda_sh: self.config.conda_sh.clone(),
+                straglr_bin: self.config.straglr_bin.clone(),
+                bam: self.config.normal_bam(id),
+                reference: self.config.reference.clone(),
+                loci_bed: self.config.straglr_loci_bed.clone(),
+                output_prefix: format!("{}/{}_normal", output_dir, id),
+                min_support: self.config.straglr_min_support,
+                min_cluster_size: self.config.straglr_min_cluster_size,
+                genotype_in_size: self.config.straglr_genotype_in_size,
+            };
+
+            let output = run!(&self.config, &mut job)
+                .context("Error while running Straglr on normal sample")?;
+
+            let log_file = format!("{}/straglr_normal_", self.log_dir);
+            output
+                .save_to_file(&log_file)
+                .context(format!("Error while writing Straglr logs into {log_file}"))?;
+        } else {
+            debug!(
+                "Straglr normal TSV already exists for {}, skipping execution.",
+                self.id
+            );
+        }
+
+        // Run on tumor sample
+        let tumor_tsv = self.config.straglr_tumor_tsv(id);
+        if !Path::new(&tumor_tsv).exists() {
+            info!("Running Straglr on tumor sample: {}", id);
+            let mut job = StraglrJob {
+                conda_sh: self.config.conda_sh.clone(),
+                straglr_bin: self.config.straglr_bin.clone(),
+                bam: self.config.tumoral_bam(id),
+                reference: self.config.reference.clone(),
+                loci_bed: self.config.straglr_loci_bed.clone(),
+                output_prefix: format!("{}/{}_tumor", output_dir, id),
+                min_support: self.config.straglr_min_support,
+                min_cluster_size: self.config.straglr_min_cluster_size,
+                genotype_in_size: self.config.straglr_genotype_in_size,
+            };
+
+            let output = run!(&self.config, &mut job)
+                .context("Error while running Straglr on tumor sample")?;
+
+            let log_file = format!("{}/straglr_tumor_", self.log_dir);
+            output
+                .save_to_file(&log_file)
+                .context(format!("Error while writing Straglr logs into {log_file}"))?;
+        } else {
+            debug!(
+                "Straglr tumor TSV already exists for {}, skipping execution.",
+                self.id
+            );
+        }
+
+        Ok(())
+    }
+}
+
+impl Straglr {
+    /// Loads and parses the normal sample Straglr TSV results.
+    ///
+    /// # Returns
+    /// Vector of STR loci from the normal sample
+    ///
+    /// # Errors
+    /// Returns an error if the TSV file cannot be read or parsed.
+    pub fn load_normal_results(&self) -> anyhow::Result<Vec<StraglrRow>> {
+        let tsv_path = self.config.straglr_normal_tsv(&self.id);
+        read_straglr_tsv(&tsv_path)
+            .context(format!("Failed to read normal Straglr results from {}", tsv_path))
+    }
+
+    /// Loads and parses the tumor sample Straglr TSV results.
+    ///
+    /// # Returns
+    /// Vector of STR loci from the tumor sample
+    ///
+    /// # Errors
+    /// Returns an error if the TSV file cannot be read or parsed.
+    pub fn load_tumor_results(&self) -> anyhow::Result<Vec<StraglrRow>> {
+        let tsv_path = self.config.straglr_tumor_tsv(&self.id);
+        read_straglr_tsv(&tsv_path)
+            .context(format!("Failed to read tumor Straglr results from {}", tsv_path))
+    }
+
+    /// Loads both normal and tumor results as a tuple.
+    ///
+    /// # Returns
+    /// `(normal_results, tumor_results)` tuple
+    ///
+    /// # Errors
+    /// Returns an error if either TSV file cannot be read or parsed.
+    pub fn load_results(&self) -> anyhow::Result<(Vec<StraglrRow>, Vec<StraglrRow>)> {
+        Ok((self.load_normal_results()?, self.load_tumor_results()?))
+    }
+
+    /// Finds STR loci that differ between tumor and normal samples.
+    ///
+    /// Compares copy numbers at matching loci to identify somatic STR changes.
+    ///
+    /// # Arguments
+    /// * `min_difference` - Minimum copy number difference to report (default: 2)
+    ///
+    /// # Returns
+    /// Vector of tuples: `(locus_id, normal_row, tumor_row, copy_number_diff)`
+    ///
+    /// # Errors
+    /// Returns an error if results cannot be loaded.
+    pub fn find_somatic_changes(
+        &self,
+        min_difference: u32,
+    ) -> anyhow::Result<Vec<(String, StraglrRow, StraglrRow, i64)>> {
+        let (normal, tumor) = self.load_results()?;
+
+        let mut changes = Vec::new();
+
+        for normal_row in &normal {
+            let location = normal_row.location_string();
+
+            // Find matching locus in tumor
+            if let Some(tumor_row) = tumor.iter().find(|t| {
+                t.chrom == normal_row.chrom && t.start == normal_row.start && t.end == normal_row.end
+            }) {
+                // Compare max copy numbers
+                if let (Some(normal_cn), Some(tumor_cn)) =
+                    (normal_row.max_copy_number(), tumor_row.max_copy_number())
+                {
+                    let diff = tumor_cn as i64 - normal_cn as i64;
+                    if diff.abs() >= min_difference as i64 {
+                        changes.push((location, normal_row.clone(), tumor_row.clone(), diff));
+                    }
+                }
+            }
+        }
+
+        Ok(changes)
+    }
+}
+
+#[derive(Debug, Clone)]
+struct StraglrJob {
+    conda_sh: String,
+    straglr_bin: String,
+    bam: String,
+    reference: String,
+    loci_bed: String,
+    output_prefix: String,
+    min_support: u32,
+    min_cluster_size: u32,
+    genotype_in_size: bool,
+}
+
+impl JobCommand for StraglrJob {
+    fn cmd(&self) -> String {
+        let mut cmd = format!(
+            "source {conda_sh} && conda activate straglr_env && {straglr} {bam} {reference} --loci {loci} --min_support {min_sup} --min_cluster_size {min_clust} --output {output}",
+            conda_sh = self.conda_sh,
+            straglr = self.straglr_bin,
+            bam = self.bam,
+            reference = self.reference,
+            loci = self.loci_bed,
+            min_sup = self.min_support,
+            min_clust = self.min_cluster_size,
+            output = self.output_prefix
+        );
+
+        if self.genotype_in_size {
+            cmd.push_str(" --genotype_in_size");
+        }
+
+        cmd
+    }
+}
+
+impl LocalRunner for StraglrJob {}
+
+impl LocalBatchRunner for StraglrJob {}
+
+impl SlurmRunner for StraglrJob {
+    fn slurm_args(&self) -> Vec<String> {
+        SlurmParams {
+            job_name: Some("straglr".into()),
+            partition: Some("shortq".into()),
+            cpus_per_task: Some(4),
+            mem: Some("16G".into()),
+            gres: None,
+        }
+        .to_args()
+    }
+}
+
+impl SbatchRunner for StraglrJob {
+    fn slurm_params(&self) -> SlurmParams {
+        SlurmParams {
+            job_name: Some("straglr".into()),
+            partition: Some("shortq".into()),
+            cpus_per_task: Some(4),
+            mem: Some("16G".into()),
+            gres: None,
+        }
+    }
+}
+
+impl Version for Straglr {
+    /// Retrieves the Straglr version by running `straglr --version`.
+    ///
+    /// # Errors
+    /// Returns an error if command execution fails or version parsing fails.
+    fn version(config: &Config) -> anyhow::Result<String> {
+        // Override cmd for version check
+        struct VersionJob {
+            conda_sh: String,
+            straglr_bin: String,
+        }
+
+        impl JobCommand for VersionJob {
+            fn cmd(&self) -> String {
+                format!(
+                    "source {} && conda activate straglr_env && {} --version",
+                    self.conda_sh, self.straglr_bin
+                )
+            }
+        }
+
+        impl LocalRunner for VersionJob {}
+
+        impl SlurmRunner for VersionJob {
+            fn slurm_args(&self) -> Vec<String> {
+                SlurmParams {
+                    job_name: Some("straglr_version".into()),
+                    partition: Some("shortq".into()),
+                    cpus_per_task: Some(1),
+                    mem: Some("1G".into()),
+                    gres: None,
+                }
+                .to_args()
+            }
+        }
+
+        let mut version_job = VersionJob {
+            conda_sh: config.conda_sh.clone(),
+            straglr_bin: config.straglr_bin.clone(),
+        };
+
+        let out =
+            run!(&config, &mut version_job).context("Error while running `straglr --version`")?;
+
+        let combined = format!("{}{}", out.stdout, out.stderr);
+        let v = combined
+            .lines()
+            .find(|line| line.contains("straglr") || line.contains("version"))
+            .map(|line| line.trim().to_string())
+            .ok_or_else(|| anyhow::anyhow!("Could not parse straglr version from output"))?;
+
+        Ok(v)
+    }
+}
+
+/// Straglr solo (single-sample) STR genotyper.
+///
+/// Executes Straglr for STR genotyping on a single BAM file.
+/// Useful for germline STR analysis or when no matched normal is available.
+///
+/// # Fields
+///
+/// - `id` - Sample identifier (e.g., "34528")
+/// - `time` - Time point label: typically `config.normal_name` ("norm") or `config.tumoral_name` ("diag")
+/// - `config` - Global pipeline configuration
+/// - `log_dir` - Directory for execution logs (e.g., "{result_dir}/{id}/log/straglr_solo")
+#[derive(Debug)]
+pub struct StraglrSolo {
+    /// Sample identifier
+    pub id: String,
+    /// Time point identifier (e.g., "norm" or "diag")
+    pub time: String,
+    /// Global pipeline configuration
+    pub config: Config,
+    /// Directory for log file storage
+    pub log_dir: String,
+}
+
+impl InitializeSolo for StraglrSolo {
+    /// Initializes Straglr solo analysis for a sample at a specific time point.
+    ///
+    /// Creates necessary log directory.
+    ///
+    /// # Errors
+    /// Returns an error if directory creation fails.
+    fn initialize(id: &str, time: &str, config: &Config) -> anyhow::Result<Self> {
+        let log_dir = format!("{}/{}/log/straglr_solo", config.result_dir, id);
+        if !Path::new(&log_dir).exists() {
+            fs::create_dir_all(&log_dir)
+                .context(format!("Failed to create {log_dir} directory"))?;
+        }
+
+        Ok(StraglrSolo {
+            id: id.to_string(),
+            time: time.to_string(),
+            config: config.clone(),
+            log_dir,
+        })
+    }
+}
+
+impl Run for StraglrSolo {
+    /// Runs the Straglr pipeline for a single sample.
+    ///
+    /// Skips if output file already exists.
+    ///
+    /// # Errors
+    /// Returns an error if Straglr execution or log writing fails.
+    fn run(&mut self) -> anyhow::Result<()> {
+        let id = &self.id;
+        let time = &self.time;
+
+        let output_tsv = &self.config.straglr_solo_tsv(id, time);
+
+        if !Path::new(output_tsv).exists() {
+            let output_dir = self.config.straglr_solo_output_dir(id, time);
+            fs::create_dir_all(&output_dir)
+                .context("Failed to create Straglr solo output directory")?;
+
+            let mut job = StraglrJob {
+                conda_sh: self.config.conda_sh.clone(),
+                straglr_bin: self.config.straglr_bin.clone(),
+                bam: self.config.solo_bam(id, time),
+                reference: self.config.reference.clone(),
+                loci_bed: self.config.straglr_loci_bed.clone(),
+                output_prefix: format!("{}/{}_{}", output_dir, id, time),
+                min_support: self.config.straglr_min_support,
+                min_cluster_size: self.config.straglr_min_cluster_size,
+                genotype_in_size: self.config.straglr_genotype_in_size,
+            };
+
+            let report =
+                run!(&self.config, &mut job).context("Error while running straglr solo")?;
+
+            let log_file = format!("{}/straglr_", self.log_dir);
+            report
+                .save_to_file(&log_file)
+                .context(format!("Error while writing logs into {log_file}"))?;
+        } else {
+            debug!("Straglr output TSV already exists.");
+        }
+
+        Ok(())
+    }
+}
+
+impl StraglrSolo {
+    /// Loads and parses the Straglr TSV results for this solo sample.
+    ///
+    /// # Returns
+    /// Vector of STR loci from the solo sample
+    ///
+    /// # Errors
+    /// Returns an error if the TSV file cannot be read or parsed.
+    pub fn load_results(&self) -> anyhow::Result<Vec<StraglrRow>> {
+        let tsv_path = self.config.straglr_solo_tsv(&self.id, &self.time);
+        read_straglr_tsv(&tsv_path)
+            .context(format!("Failed to read Straglr results from {}", tsv_path))
+    }
+
+    /// Filters results to show only expanded repeats above a threshold.
+    ///
+    /// # Arguments
+    /// * `min_copy_number` - Minimum copy number threshold
+    ///
+    /// # Returns
+    /// Vector of STR loci with copy numbers >= threshold
+    ///
+    /// # Errors
+    /// Returns an error if results cannot be loaded.
+    pub fn load_expanded_repeats(&self, min_copy_number: u32) -> anyhow::Result<Vec<StraglrRow>> {
+        let results = self.load_results()?;
+        Ok(results
+            .into_iter()
+            .filter(|row| row.is_expanded(min_copy_number))
+            .collect())
+    }
+}
+
+/// Runs Straglr in parallel chunks for genome-wide STR genotyping.
+///
+/// Splits the genome into `n_parts` regions, creates temporary BED files for each region,
+/// runs Straglr in parallel, and merges the results into a single TSV file.
+///
+/// This function is designed for whole-genome STR genotyping where processing the entire
+/// genome at once would be too slow. By splitting into chunks, multiple Straglr instances
+/// can run in parallel.
+///
+/// # Arguments
+///
+/// * `id` - Sample identifier
+/// * `time_point` - Time point label (e.g., "norm", "diag")
+/// * `config` - Global pipeline configuration
+/// * `n_parts` - Number of parallel chunks (will be adjusted if genome is smaller)
+///
+/// # Returns
+///
+/// `Ok(())` if all chunks complete successfully and merge succeeds
+///
+/// # Errors
+///
+/// Returns an error if:
+/// - `n_parts` is 0
+/// - BAM file cannot be opened or has no header
+/// - Temporary BED files cannot be created
+/// - Any Straglr chunk fails to execute
+/// - TSV merging fails
+/// - Final output file cannot be written
+///
+/// # Implementation Details
+///
+/// 1. Reads BAM header to determine genome sizes
+/// 2. Splits genome into approximately equal-sized regions
+/// 3. Creates temporary BED file for each region in `{tmp_dir}/straglr_chunk_{id}_{time}_{i}.bed`
+/// 4. Runs Straglr in parallel via `run_many!` macro
+/// 5. Concatenates all output TSV files (skipping headers from parts 2+)
+/// 6. Removes temporary BED and partial TSV files
+///
+/// # Example
+///
+/// ```ignore
+/// use pandora_lib_promethion::callers::straglr::run_straglr_chunked;
+/// use pandora_lib_promethion::config::Config;
+///
+/// let config = Config::default();
+///
+/// // Run genome-wide STR genotyping with 10 parallel jobs
+/// run_straglr_chunked("sample_001", "norm", &config, 10)?;
+/// # Ok::<(), anyhow::Error>(())
+/// ```
+pub fn run_straglr_chunked(
+    id: &str,
+    time_point: &str,
+    config: &Config,
+    n_parts: usize,
+) -> anyhow::Result<()> {
+    anyhow::ensure!(n_parts > 0, "n_parts must be > 0");
+
+    info!(
+        "Running Straglr in {} parallel chunks for {} {}",
+        n_parts, id, time_point
+    );
+
+    // Get genome sizes from BAM header
+    let bam_path = config.solo_bam(id, time_point);
+    let reader = bam::Reader::from_path(&bam_path)
+        .with_context(|| format!("Failed to open BAM: {}", bam_path))?;
+    let header = bam::Header::from_template(reader.header());
+    let genome_sizes = get_genome_sizes(&header)?;
+
+    // Split genome into regions
+    let region_chunks = split_genome_into_n_regions_exact(&genome_sizes, n_parts);
+    let actual_n_parts = region_chunks.len();
+
+    info!(
+        "Split genome into {} chunks for Straglr processing",
+        actual_n_parts
+    );
+
+    // Create output directory
+    let output_dir = config.straglr_solo_output_dir(id, time_point);
+    fs::create_dir_all(&output_dir)
+        .context(format!("Failed to create output directory: {}", output_dir))?;
+
+    // Create temporary BED files and jobs
+    let mut jobs = Vec::with_capacity(actual_n_parts);
+    let mut temp_bed_files = Vec::with_capacity(actual_n_parts);
+    let mut temp_tsv_files = Vec::with_capacity(actual_n_parts);
+
+    for (i, regions) in region_chunks.into_iter().enumerate() {
+        let part_num = i + 1;
+
+        // Create temporary BED file for this chunk
+        let bed_path = format!(
+            "{}/straglr_chunk_{}_{}_part{}.bed",
+            config.tmp_dir, id, time_point, part_num
+        );
+
+        let mut bed_file = File::create(&bed_path)
+            .context(format!("Failed to create temporary BED file: {}", bed_path))?;
+
+        // Write regions to BED file (format: chr\tstart\tend)
+        for region_str in &regions {
+            // Parse region format: "chr1:1000-2000" -> "chr1\t1000\t2000"
+            if let Some((chr, range)) = region_str.split_once(':') {
+                if let Some((start, end)) = range.split_once('-') {
+                    writeln!(bed_file, "{}\t{}\t{}", chr, start, end)
+                        .context("Failed to write to BED file")?;
+                }
+            }
+        }
+        bed_file.flush().context("Failed to flush BED file")?;
+
+        temp_bed_files.push(bed_path.clone());
+
+        // Create job for this chunk
+        let output_prefix = format!("{}/{}_{}_part{}", output_dir, id, time_point, part_num);
+        let output_tsv = format!("{}_straglr.tsv", output_prefix);
+        temp_tsv_files.push(output_tsv.clone());
+
+        let job = StraglrJob {
+            conda_sh: config.conda_sh.clone(),
+            straglr_bin: config.straglr_bin.clone(),
+            bam: bam_path.clone(),
+            reference: config.reference.clone(),
+            loci_bed: bed_path, // Use the chunk BED file instead of global loci
+            output_prefix,
+            min_support: config.straglr_min_support,
+            min_cluster_size: config.straglr_min_cluster_size,
+            genotype_in_size: config.straglr_genotype_in_size,
+        };
+
+        jobs.push(job);
+    }
+
+    // Run all chunks in parallel
+    info!("Executing {} Straglr jobs in parallel", actual_n_parts);
+    let outputs = run_many!(config, jobs)?;
+
+    // Save logs
+    let log_dir = format!("{}/{}/log/straglr_chunked", config.result_dir, id);
+    fs::create_dir_all(&log_dir).context("Failed to create log directory")?;
+
+    for (i, output) in outputs.iter().enumerate() {
+        let log_file = format!("{}/straglr_part{}_", log_dir, i + 1);
+        output
+            .save_to_file(&log_file)
+            .context(format!("Failed to save logs for part {}", i + 1))?;
+    }
+
+    // Merge TSV files
+    info!("Merging {} TSV files", actual_n_parts);
+    let final_tsv = config.straglr_solo_tsv(id, time_point);
+    merge_tsv_files(&temp_tsv_files, &final_tsv)
+        .context("Failed to merge Straglr TSV files")?;
+
+    // Clean up temporary files
+    info!("Cleaning up temporary files");
+    for bed_file in &temp_bed_files {
+        if let Err(e) = fs::remove_file(bed_file) {
+            debug!("Failed to remove temporary BED file {}: {}", bed_file, e);
+        }
+    }
+    for tsv_file in &temp_tsv_files {
+        if let Err(e) = fs::remove_file(tsv_file) {
+            debug!("Failed to remove temporary TSV file {}: {}", tsv_file, e);
+        }
+    }
+
+    info!(
+        "Straglr chunked execution completed for {} {} (merged into {})",
+        id, time_point, final_tsv
+    );
+
+    Ok(())
+}
+
+/// Merges multiple TSV files into a single output file.
+///
+/// Concatenates TSV files while preserving the header from the first file
+/// and skipping headers from subsequent files.
+///
+/// # Arguments
+///
+/// * `input_files` - Paths to input TSV files
+/// * `output_file` - Path to merged output TSV file
+///
+/// # Errors
+///
+/// Returns an error if any file cannot be read or the output cannot be written.
+fn merge_tsv_files(input_files: &[String], output_file: &str) -> anyhow::Result<()> {
+    let mut output = File::create(output_file)
+        .context(format!("Failed to create output file: {}", output_file))?;
+
+    let mut first_file = true;
+
+    for (i, input_path) in input_files.iter().enumerate() {
+        if !Path::new(input_path).exists() {
+            debug!("Skipping non-existent file: {}", input_path);
+            continue;
+        }
+
+        let content = fs::read_to_string(input_path)
+            .context(format!("Failed to read input file: {}", input_path))?;
+
+        if first_file {
+            // Write entire first file including header
+            output
+                .write_all(content.as_bytes())
+                .context("Failed to write to output file")?;
+            first_file = false;
+        } else {
+            // Skip header line(s) for subsequent files
+            for line in content.lines() {
+                if !line.starts_with('#') && !line.trim().is_empty() {
+                    writeln!(output, "{}", line).context("Failed to write line to output")?;
+                }
+            }
+        }
+
+        debug!("Merged part {} from {}", i + 1, input_path);
+    }
+
+    output.flush().context("Failed to flush output file")?;
+    Ok(())
+}

+ 74 - 0
src/config.rs

@@ -319,6 +319,35 @@ pub struct Config {
     /// Template for solo nanomonsv passed VCF (`{output_dir}`, `{id}`, `{time}`).
     pub nanomonsv_solo_passed_vcf: String,
 
+    // === Straglr configuration ===
+    /// Path to Straglr executable.
+    pub straglr_bin: String,
+
+    /// Path to STR loci BED file for Straglr.
+    pub straglr_loci_bed: String,
+
+    /// Minimum read support for STR genotyping.
+    pub straglr_min_support: u32,
+
+    /// Minimum cluster size for STR detection.
+    pub straglr_min_cluster_size: u32,
+
+    /// Whether to genotype in size mode.
+    pub straglr_genotype_in_size: bool,
+
+    /// Template for paired Straglr output directory.
+    ///
+    /// Placeholders: `{result_dir}`, `{id}`.
+    pub straglr_output_dir: String,
+
+    /// Template for solo Straglr output directory.
+    ///
+    /// Placeholders: `{result_dir}`, `{id}`, `{time}`.
+    pub straglr_solo_output_dir: String,
+
+    /// Force Straglr recomputation.
+    pub straglr_force: bool,
+
     // === PromethION runs / metadata ===
     /// Directory containing metadata about PromethION runs.
     pub promethion_runs_metadata_dir: String,
@@ -790,6 +819,51 @@ impl Config {
         )
     }
 
+    /// Straglr paired output directory.
+    pub fn straglr_output_dir(&self, id: &str) -> String {
+        self.straglr_output_dir
+            .replace("{result_dir}", &self.result_dir)
+            .replace("{id}", id)
+    }
+
+    /// Straglr normal sample TSV output.
+    pub fn straglr_normal_tsv(&self, id: &str) -> String {
+        format!(
+            "{}/{}_{}_straglr.tsv",
+            self.straglr_output_dir(id),
+            id,
+            self.normal_name
+        )
+    }
+
+    /// Straglr tumor sample TSV output.
+    pub fn straglr_tumor_tsv(&self, id: &str) -> String {
+        format!(
+            "{}/{}_{}_straglr.tsv",
+            self.straglr_output_dir(id),
+            id,
+            self.tumoral_name
+        )
+    }
+
+    /// Straglr solo output directory.
+    pub fn straglr_solo_output_dir(&self, id: &str, time: &str) -> String {
+        self.straglr_solo_output_dir
+            .replace("{result_dir}", &self.result_dir)
+            .replace("{id}", id)
+            .replace("{time}", time)
+    }
+
+    /// Straglr solo TSV output.
+    pub fn straglr_solo_tsv(&self, id: &str, time: &str) -> String {
+        format!(
+            "{}/{}_{}_straglr.tsv",
+            self.straglr_solo_output_dir(id, time),
+            id,
+            time
+        )
+    }
+
     /// Alias for the constitutional germline VCF.
     pub fn constit_vcf(&self, id: &str) -> String {
         self.clairs_germline_passed_vcf(id)

+ 1 - 0
src/io/mod.rs

@@ -8,3 +8,4 @@ pub mod pod5_footer_generated;
 pub mod gff;
 pub mod bam;
 pub mod writers;
+pub mod straglr;

+ 284 - 0
src/io/straglr.rs

@@ -0,0 +1,284 @@
+//! Straglr TSV output parser
+//!
+//! Parses TSV files produced by Straglr STR genotyper.
+
+use anyhow::Context;
+use log::warn;
+use std::{
+    io::{BufRead, BufReader},
+    str::FromStr,
+};
+
+use super::readers::get_reader;
+
+/// Represents a single STR locus genotyped by Straglr.
+///
+/// Each row corresponds to one STR locus with its genotype information,
+/// read support, and confidence metrics.
+#[derive(Debug, Clone)]
+pub struct StraglrRow {
+    /// Chromosome name (e.g., "chr4", "chrX")
+    pub chrom: String,
+    /// Start position (0-based)
+    pub start: u64,
+    /// End position (exclusive)
+    pub end: u64,
+    /// Repeat unit motif (e.g., "CAG", "GGCCCC")
+    pub repeat_unit: String,
+    /// Genotype as string (e.g., "12/13", "45/45", ".")
+    pub genotype: String,
+    /// Copy number for each allele (e.g., [12, 13])
+    pub copy_numbers: Vec<u32>,
+    /// Allele lengths in base pairs
+    pub allele_lengths: Option<Vec<u32>>,
+    /// Read support count
+    pub support: u32,
+    /// Confidence score
+    pub score: Option<f64>,
+}
+
+impl FromStr for StraglrRow {
+    type Err = anyhow::Error;
+
+    /// Parses a single TSV line from Straglr output.
+    ///
+    /// Expected format (tab-separated):
+    /// ```text
+    /// #chrom  start   end repeat_unit genotype    copy_number support [optional: allele_length, score, ...]
+    /// ```
+    ///
+    /// # Errors
+    /// Returns an error if required fields are missing or cannot be parsed.
+    fn from_str(s: &str) -> anyhow::Result<Self> {
+        let fields: Vec<&str> = s.split('\t').collect();
+
+        if fields.len() < 7 {
+            anyhow::bail!(
+                "Invalid Straglr TSV line: expected at least 7 fields, got {}",
+                fields.len()
+            );
+        }
+
+        let chrom = fields[0].to_string();
+        let start: u64 = fields[1]
+            .parse()
+            .context(format!("Failed to parse start position: {}", fields[1]))?;
+        let end: u64 = fields[2]
+            .parse()
+            .context(format!("Failed to parse end position: {}", fields[2]))?;
+        let repeat_unit = fields[3].to_string();
+        let genotype = fields[4].to_string();
+
+        // Parse copy numbers (format: "12,13" or "45")
+        let copy_numbers: Vec<u32> = fields[5]
+            .split(',')
+            .filter_map(|s| s.parse().ok())
+            .collect();
+
+        let support: u32 = fields[6]
+            .parse()
+            .context(format!("Failed to parse support: {}", fields[6]))?;
+
+        // Optional fields
+        let allele_lengths = fields.get(7).and_then(|s| {
+            let lengths: Vec<u32> = s.split(',').filter_map(|v| v.parse().ok()).collect();
+            if lengths.is_empty() {
+                None
+            } else {
+                Some(lengths)
+            }
+        });
+
+        let score = fields.get(8).and_then(|s| s.parse().ok());
+
+        Ok(Self {
+            chrom,
+            start,
+            end,
+            repeat_unit,
+            genotype,
+            copy_numbers,
+            allele_lengths,
+            support,
+            score,
+        })
+    }
+}
+
+impl StraglrRow {
+    /// Returns the locus length in base pairs.
+    pub fn locus_length(&self) -> u64 {
+        self.end.saturating_sub(self.start)
+    }
+
+    /// Returns true if this locus has an expanded repeat (based on copy number threshold).
+    ///
+    /// # Arguments
+    /// * `threshold` - Minimum copy number to consider as expanded
+    pub fn is_expanded(&self, threshold: u32) -> bool {
+        self.copy_numbers.iter().any(|&cn| cn >= threshold)
+    }
+
+    /// Returns the maximum copy number across all alleles.
+    pub fn max_copy_number(&self) -> Option<u32> {
+        self.copy_numbers.iter().max().copied()
+    }
+
+    /// Returns true if the locus is homozygous (all alleles have same copy number).
+    pub fn is_homozygous(&self) -> bool {
+        if self.copy_numbers.len() <= 1 {
+            return true;
+        }
+        let first = self.copy_numbers[0];
+        self.copy_numbers.iter().all(|&cn| cn == first)
+    }
+
+    /// Returns the location as a string (e.g., "chr4:3074876-3074933").
+    pub fn location_string(&self) -> String {
+        format!("{}:{}-{}", self.chrom, self.start, self.end)
+    }
+}
+
+/// Reads and parses a Straglr TSV output file.
+///
+/// Skips header lines (starting with `#`) and empty lines.
+/// Logs warnings for malformed lines but continues parsing.
+///
+/// # Arguments
+/// * `path` - Path to the Straglr TSV file (can be gzipped)
+///
+/// # Returns
+/// `Ok(Vec<StraglrRow>)` with all successfully parsed STR loci
+///
+/// # Errors
+/// Returns an error if the file cannot be opened or read.
+///
+/// # Example
+/// ```ignore
+/// use pandora_lib_promethion::io::straglr::read_straglr_tsv;
+///
+/// let strs = read_straglr_tsv("/path/to/sample_straglr.tsv")?;
+/// for str_locus in strs {
+///     if str_locus.is_expanded(40) {
+///         println!("Expanded repeat at {}: {} copies",
+///                  str_locus.location_string(),
+///                  str_locus.max_copy_number().unwrap());
+///     }
+/// }
+/// # Ok::<(), anyhow::Error>(())
+/// ```
+pub fn read_straglr_tsv(path: &str) -> anyhow::Result<Vec<StraglrRow>> {
+    let reader = BufReader::new(get_reader(path)?);
+    let mut results = Vec::new();
+
+    for (line_num, line) in reader.lines().enumerate() {
+        match line {
+            Ok(line) => {
+                // Skip header lines and empty lines
+                if line.starts_with('#') || line.trim().is_empty() {
+                    continue;
+                }
+
+                match line.parse::<StraglrRow>() {
+                    Ok(row) => results.push(row),
+                    Err(e) => warn!("Failed to parse line {}: {} (error: {})", line_num + 1, line, e),
+                }
+            }
+            Err(e) => warn!("Failed to read line {}: {}", line_num + 1, e),
+        }
+    }
+
+    Ok(results)
+}
+
+/// Filters Straglr results to keep only loci with copy numbers above a threshold.
+///
+/// # Arguments
+/// * `rows` - Vector of Straglr STR loci
+/// * `min_copy_number` - Minimum copy number threshold
+///
+/// # Returns
+/// Filtered vector containing only expanded repeats
+pub fn filter_expanded(rows: Vec<StraglrRow>, min_copy_number: u32) -> Vec<StraglrRow> {
+    rows.into_iter()
+        .filter(|row| row.is_expanded(min_copy_number))
+        .collect()
+}
+
+/// Groups Straglr results by chromosome.
+///
+/// # Arguments
+/// * `rows` - Vector of Straglr STR loci
+///
+/// # Returns
+/// HashMap mapping chromosome names to vectors of STR loci
+pub fn group_by_chromosome(rows: Vec<StraglrRow>) -> std::collections::HashMap<String, Vec<StraglrRow>> {
+    use std::collections::HashMap;
+
+    let mut map: HashMap<String, Vec<StraglrRow>> = HashMap::new();
+    for row in rows {
+        map.entry(row.chrom.clone()).or_insert_with(Vec::new).push(row);
+    }
+    map
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_straglr_row() {
+        let line = "chr4\t3074876\t3074933\tCAG\t19/20\t19,20\t45\t57,60\t0.95";
+        let row: StraglrRow = line.parse().unwrap();
+
+        assert_eq!(row.chrom, "chr4");
+        assert_eq!(row.start, 3074876);
+        assert_eq!(row.end, 3074933);
+        assert_eq!(row.repeat_unit, "CAG");
+        assert_eq!(row.genotype, "19/20");
+        assert_eq!(row.copy_numbers, vec![19, 20]);
+        assert_eq!(row.support, 45);
+        assert_eq!(row.allele_lengths, Some(vec![57, 60]));
+        assert_eq!(row.score, Some(0.95));
+    }
+
+    #[test]
+    fn test_straglr_row_methods() {
+        let row = StraglrRow {
+            chrom: "chr4".to_string(),
+            start: 100,
+            end: 200,
+            repeat_unit: "CAG".to_string(),
+            genotype: "50/50".to_string(),
+            copy_numbers: vec![50, 50],
+            allele_lengths: None,
+            support: 30,
+            score: None,
+        };
+
+        assert_eq!(row.locus_length(), 100);
+        assert!(row.is_expanded(40));
+        assert!(!row.is_expanded(60));
+        assert_eq!(row.max_copy_number(), Some(50));
+        assert!(row.is_homozygous());
+        assert_eq!(row.location_string(), "chr4:100-200");
+    }
+
+    #[test]
+    fn test_heterozygous_detection() {
+        let row = StraglrRow {
+            chrom: "chr4".to_string(),
+            start: 100,
+            end: 200,
+            repeat_unit: "CAG".to_string(),
+            genotype: "19/45".to_string(),
+            copy_numbers: vec![19, 45],
+            allele_lengths: None,
+            support: 30,
+            score: None,
+        };
+
+        assert!(!row.is_homozygous());
+        assert_eq!(row.max_copy_number(), Some(45));
+    }
+}