4 days ago · cab2f12c20
--- a/src/callers/clairs.rs
+++ b/src/callers/clairs.rs
@@ -821,7 +821,39 @@ fn merge_clairs_germline_parts(base: &ClairS, n_parts: usize) -> anyhow::Result<
 
				 
			
 
				 /// Runs ClairS in parallel chunks, then merges results.
			
 
				 ///
			
 
				-/// Execution mode (local vs Slurm) is determined by `config.slurm_runner`.
			
 
				+/// Splits the genome into N equal-sized regions, runs ClairS on each region
			
 
				+/// in parallel (local or Slurm based on `config.slurm_runner`), post-processes
			
 
				+/// each part (concatenates SNV+indel, filters PASS), and merges both somatic
			
 
				+/// and germline VCFs.
			
 
				+///
			
 
				+/// # Arguments
			
 
				+///
			
 
				+/// * `id` - Sample identifier
			
 
				+/// * `config` - Global pipeline configuration
			
 
				+/// * `n_parts` - Number of parallel chunks (typically 20-30 for whole-genome)
			
 
				+///
			
 
				+/// # Returns
			
 
				+///
			
 
				+/// `Ok(())` on success, or an error if any step fails.
			
 
				+///
			
 
				+/// # Errors
			
 
				+///
			
 
				+/// Returns an error if:
			
 
				+/// - `n_parts` is 0
			
 
				+/// - Normal BAM file cannot be opened or is corrupted
			
 
				+/// - BAM header is malformed
			
 
				+/// - ClairS execution fails on any part
			
 
				+/// - SNV+indel concatenation fails
			
 
				+/// - PASS filtering fails
			
 
				+/// - Somatic or germline VCF merging fails
			
 
				+/// - Output directory cannot be created
			
 
				+///
			
 
				+/// # Example
			
 
				+///
			
 
				+/// ```ignore
			
 
				+/// let config = Config::default();
			
 
				+/// run_clairs_chunked("sample_001", &config, 30)?;
			
 
				+/// ```
			
 
				 pub fn run_clairs_chunked(id: &str, config: &Config, n_parts: usize) -> anyhow::Result<()> {
			
 
				     anyhow::ensure!(n_parts > 0, "n_parts must be > 0");
			
 
				 
			
--- a/src/callers/deep_somatic.rs
+++ b/src/callers/deep_somatic.rs
@@ -541,7 +541,37 @@ fn merge_deepsomatic_parts(base: &DeepSomatic, n_parts: usize) -> anyhow::Result
 
				 
			
 
				 /// Runs DeepSomatic in parallel chunks, then merges results.
			
 
				 ///
			
 
				-/// Execution mode (local vs Slurm) is determined by `config.slurm_runner`.
			
 
				+/// Splits the genome into N equal-sized regions, runs DeepSomatic on each region
			
 
				+/// in parallel (local or Slurm based on `config.slurm_runner`), filters PASS variants,
			
 
				+/// and concatenates the final VCF.
			
 
				+///
			
 
				+/// # Arguments
			
 
				+///
			
 
				+/// * `id` - Sample identifier
			
 
				+/// * `config` - Global pipeline configuration
			
 
				+/// * `n_parts` - Number of parallel chunks (typically 20-30 for whole-genome)
			
 
				+///
			
 
				+/// # Returns
			
 
				+///
			
 
				+/// `Ok(())` on success, or an error if any step fails.
			
 
				+///
			
 
				+/// # Errors
			
 
				+///
			
 
				+/// Returns an error if:
			
 
				+/// - `n_parts` is 0
			
 
				+/// - Tumor or normal BAM file cannot be opened
			
 
				+/// - BAM header is malformed
			
 
				+/// - DeepSomatic execution fails on any part
			
 
				+/// - PASS filtering fails
			
 
				+/// - VCF merging fails
			
 
				+/// - Output directory cannot be created
			
 
				+///
			
 
				+/// # Example
			
 
				+///
			
 
				+/// ```ignore
			
 
				+/// let config = Config::default();
			
 
				+/// run_deepsomatic_chunked("sample_001", &config, 30)?;
			
 
				+/// ```
			
 
				 pub fn run_deepsomatic_chunked(id: &str, config: &Config, n_parts: usize) -> anyhow::Result<()> {
			
 
				     anyhow::ensure!(n_parts > 0, "n_parts must be > 0");
			
 
				 
			
--- a/src/callers/deep_variant.rs
+++ b/src/callers/deep_variant.rs
@@ -739,7 +739,40 @@ fn merge_deepvariant_parts(base: &DeepVariant, n_parts: usize) -> anyhow::Result
 
				 
			
 
				 /// Runs DeepVariant in parallel chunks, then merges results.
			
 
				 ///
			
 
				-/// Execution mode (local vs Slurm) is determined by `config.slurm_runner`.
			
 
				+/// Splits the genome into N equal-sized regions, runs DeepVariant on each region
			
 
				+/// in parallel (local or Slurm based on `config.slurm_runner`), filters PASS variants,
			
 
				+/// and concatenates the final VCF. Karyotype-aware calling ensures X/Y chromosomes
			
 
				+/// are handled correctly based on sample sex.
			
 
				+///
			
 
				+/// # Arguments
			
 
				+///
			
 
				+/// * `id` - Sample identifier
			
 
				+/// * `time_point` - Time point label ("norm" or "diag")
			
 
				+/// * `config` - Global pipeline configuration
			
 
				+/// * `n_parts` - Number of parallel chunks (typically 20-30 for whole-genome)
			
 
				+///
			
 
				+/// # Returns
			
 
				+///
			
 
				+/// `Ok(())` on success, or an error if any step fails.
			
 
				+///
			
 
				+/// # Errors
			
 
				+///
			
 
				+/// Returns an error if:
			
 
				+/// - `n_parts` is 0
			
 
				+/// - `time_point` is invalid (not matching `config.normal_name` or `config.tumoral_name`)
			
 
				+/// - BAM file cannot be opened or is corrupted
			
 
				+/// - BAM header is malformed
			
 
				+/// - Karyotype detection fails
			
 
				+/// - DeepVariant execution fails on any part
			
 
				+/// - PASS filtering fails
			
 
				+/// - VCF merging fails
			
 
				+///
			
 
				+/// # Example
			
 
				+///
			
 
				+/// ```ignore
			
 
				+/// let config = Config::default();
			
 
				+/// run_deepvariant_chunked("sample_001", "norm", &config, 30)?;
			
 
				+/// ```
			
 
				 pub fn run_deepvariant_chunked(
			
 
				     id: &str,
			
 
				     time_point: &str,
			
--- a/src/callers/mod.rs
+++ b/src/callers/mod.rs
@@ -1,13 +1,132 @@
 
				-//! Variant caller integrations wired to the shared runner pattern (local/Slurm via `run!`).
			
 
				-//! - ClairS — <https://github.com/HKU-BAL/ClairS>
			
 
				-//! - DeepVariant — <https://github.com/google/deepvariant>
			
 
				-//! - DeepSomatic — <https://github.com/google/deepsomatic>
			
 
				-//! - Savana — <https://github.com/cortes-ciriano-lab/savana>
			
 
				-//! - Severus — <https://github.com/genome-nexus/severus> (structural variants)
			
 
				-//! - NanomonSV — <https://github.com/friend1ws/nanomonsv>
			
 
				+//! # Variant Caller Integrations
			
 
				+//!
			
 
				+//! This module provides wrappers for multiple variant callers optimized for long-read
			
 
				+//! sequencing data (ONT and PacBio). All callers are integrated with the shared runner
			
 
				+//! pattern, allowing seamless execution in local or Slurm HPC environments via the `run!` macro.
			
 
				+//!
			
 
				+//! ## Overview
			
 
				+//!
			
 
				+//! The module includes seven production-grade variant callers, each specialized for different
			
 
				+//! variant types and use cases:
			
 
				+//!
			
 
				+//! ### Small Variant Callers
			
 
				+//!
			
 
				+//! - **[ClairS]** - Deep learning-based somatic SNV/indel caller (paired tumor-normal)
			
 
				+//!   - Haplotype-aware calling with LongPhase integration
			
 
				+//!   - Dual output: somatic + germline variants
			
 
				+//!   - Best for: Somatic SNV/indel detection in cancer samples
			
 
				+//!   - [GitHub](https://github.com/HKU-BAL/ClairS)
			
 
				+//!
			
 
				+//! - **[DeepVariant]** - Deep learning-based germline variant caller (single-sample)
			
 
				+//!   - Karyotype-aware for accurate X/Y chromosome calling
			
 
				+//!   - Platform-agnostic models (ONT, PacBio, Illumina)
			
 
				+//!   - Best for: Germline SNV/indel detection
			
 
				+//!   - [GitHub](https://github.com/google/deepvariant)
			
 
				+//!
			
 
				+//! - **[DeepSomatic]** - Deep learning-based somatic variant caller (paired tumor-normal)
			
 
				+//!   - Derived from DeepVariant architecture
			
 
				+//!   - Optimized for somatic mutation detection
			
 
				+//!   - Best for: Somatic SNV/indel detection
			
 
				+//!   - [GitHub](https://github.com/google/deepsomatic)
			
 
				+//!
			
 
				+//! ### Structural Variant Callers
			
 
				+//!
			
 
				+//! - **[NanomonSV]** - Structural variant caller for paired and solo modes
			
 
				+//!   - Detects deletions, insertions, duplications, inversions, translocations
			
 
				+//!   - Supports tumor-normal paired analysis
			
 
				+//!   - Best for: General SV detection in cancer samples
			
 
				+//!   - [GitHub](https://github.com/friend1ws/nanomonsv)
			
 
				+//!
			
 
				+//! - **[Savana]** - Haplotype-aware SV and CNV caller (paired tumor-normal)
			
 
				+//!   - Integrated copy number variation analysis
			
 
				+//!   - Allele-specific CNV detection
			
 
				+//!   - Requires phased germline variants and haplotagged BAMs
			
 
				+//!   - Best for: Combined SV + CNV analysis with haplotype information
			
 
				+//!   - [GitHub](https://github.com/cortes-ciriano-lab/savana)
			
 
				+//!
			
 
				+//! - **[Severus]** - VNTR and structural variant caller (paired and solo modes)
			
 
				+//!   - Specialized in VNTR (Variable Number Tandem Repeat) detection
			
 
				+//!   - High-precision breakpoint resolution
			
 
				+//!   - Resolves complex overlapping SVs
			
 
				+//!   - Best for: VNTR analysis and complex SV detection
			
 
				+//!   - [GitHub](https://github.com/KolmogorovLab/Severus)
			
 
				+//!
			
 
				+//! ### STR Genotypers
			
 
				+//!
			
 
				+//! - **[Straglr]** - Short Tandem Repeat (STR) genotyper (paired and solo modes)
			
 
				+//!   - Detects pathogenic repeat expansions in known disease loci
			
 
				+//!   - Supports custom loci via BED file
			
 
				+//!   - Provides allele-level genotyping with read support
			
 
				+//!   - Best for: STR expansion detection in neurological and muscular diseases
			
 
				+//!   - [GitHub](https://github.com/bcgsc/straglr)
			
 
				+//!
			
 
				+//! ## Execution Modes
			
 
				+//!
			
 
				+//! All callers support:
			
 
				+//! - **Local execution** - Direct command execution for debugging/testing
			
 
				+//! - **Slurm execution** - HPC job submission via `srun` or `sbatch`
			
 
				+//! - **Chunked parallel execution** - Genome splitting for whole-genome analysis
			
 
				+//!
			
 
				+//! Execution mode is automatically selected based on `config.slurm_runner`.
			
 
				+//!
			
 
				+//! ## Typical Workflow
			
 
				+//!
			
 
				+//! 1. **Initialize** - Create caller instance with `Initialize::initialize()` or `InitializeSolo::initialize()`
			
 
				+//! 2. **Check freshness** - Use `ShouldRun::should_run()` to avoid redundant work
			
 
				+//! 3. **Execute** - Run caller with `Run::run()`
			
 
				+//! 4. **Load variants** - Extract results with `Variants::variants()`
			
 
				+//!
			
 
				+//! ## Convenience Function
			
 
				+//!
			
 
				+//! The [`run_somatic_callers()`] function executes all somatic callers sequentially
			
 
				+//! for a complete multi-caller analysis pipeline.
			
 
				+//!
			
 
				+//! ## Usage Examples
			
 
				+//!
			
 
				+//! ### Individual Caller
			
 
				+//!
			
 
				+//! ```ignore
			
 
				+//! use pandora_lib_promethion::callers::clairs::ClairS;
			
 
				+//! use pandora_lib_promethion::config::Config;
			
 
				+//! use pandora_lib_promethion::pipes::Initialize;
			
 
				+//! use pandora_lib_promethion::runners::Run;
			
 
				+//!
			
 
				+//! let config = Config::default();
			
 
				+//! let mut clairs = ClairS::initialize("sample_001", &config)?;
			
 
				+//!
			
 
				+//! if clairs.should_run() {
			
 
				+//!     clairs.run()?;
			
 
				+//! }
			
 
				+//!
			
 
				+//! let variants = clairs.variants(&annotations)?;
			
 
				+//! # Ok::<(), anyhow::Error>(())
			
 
				+//! ```
			
 
				+//!
			
 
				+//! ### Complete Multi-Caller Pipeline
			
 
				+//!
			
 
				+//! ```ignore
			
 
				+//! use pandora_lib_promethion::callers::run_somatic_callers;
			
 
				+//! use pandora_lib_promethion::config::Config;
			
 
				+//!
			
 
				+//! let config = Config::default();
			
 
				+//! run_somatic_callers("sample_001", &config)?;
			
 
				+//! # Ok::<(), anyhow::Error>(())
			
 
				+//! ```
			
 
				+//!
			
 
				+//! ## References
			
 
				+//!
			
 
				+//! Each caller module contains detailed documentation including:
			
 
				+//! - Variant types detected
			
 
				+//! - Requirements and dependencies
			
 
				+//! - Output file formats and locations
			
 
				+//! - Usage examples
			
 
				+//! - Scientific publications
			
 
				 
			
 
				 use crate::{
			
 
				-    callers::{clairs::ClairS, deep_somatic::DeepSomatic, deep_variant::DeepVariant, nanomonsv::NanomonSV, savana::Savana, severus::Severus},
			
 
				+    callers::{
			
 
				+        clairs::ClairS, deep_somatic::DeepSomatic, deep_variant::DeepVariant, nanomonsv::NanomonSV,
			
 
				+        savana::Savana, severus::Severus,
			
 
				+    },
			
 
				     config::Config,
			
 
				     pipes::{Initialize, InitializeSolo},
			
 
				     runners::Run,
			
@@ -19,21 +138,93 @@ pub mod deep_variant;
 
				 pub mod nanomonsv;
			
 
				 pub mod savana;
			
 
				 pub mod severus;
			
 
				+pub mod straglr;
			
 
				 
			
 
				+/// Runs all somatic variant callers sequentially for comprehensive multi-caller analysis.
			
 
				+///
			
 
				+/// Executes the following callers in order:
			
 
				+/// 1. **DeepVariant** (normal sample) - Germline SNV/indels
			
 
				+/// 2. **DeepVariant** (tumor sample) - Germline SNV/indels
			
 
				+/// 3. **ClairS** - Somatic SNV/indels (paired)
			
 
				+/// 4. **Severus** - Somatic SVs and VNTRs (paired)
			
 
				+/// 5. **Savana** - Somatic SVs and CNVs (paired, haplotype-aware)
			
 
				+/// 6. **NanomonSV** - Somatic SVs (paired)
			
 
				+/// 7. **DeepSomatic** - Somatic SNV/indels (paired)
			
 
				+///
			
 
				+/// Each caller automatically:
			
 
				+/// - Checks if it needs to run based on output freshness
			
 
				+/// - Skips execution if outputs are up-to-date
			
 
				+/// - Handles prerequisite steps (e.g., phasing, haplotagging)
			
 
				+/// - Filters results to PASS-only variants
			
 
				+///
			
 
				+/// # Arguments
			
 
				+///
			
 
				+/// * `id` - Sample identifier
			
 
				+/// * `config` - Global pipeline configuration
			
 
				+///
			
 
				+/// # Returns
			
 
				+///
			
 
				+/// `Ok(())` if all callers complete successfully, or an error from the first failed caller.
			
 
				+///
			
 
				+/// # Errors
			
 
				+///
			
 
				+/// Returns an error if any caller fails. Common failure modes:
			
 
				+/// - Missing or corrupted BAM files
			
 
				+/// - Missing reference genome or annotation files
			
 
				+/// - Insufficient disk space for outputs
			
 
				+/// - Singularity/Docker image not found
			
 
				+/// - Slurm job submission failures (if `config.slurm_runner = true`)
			
 
				+/// - Individual caller-specific errors (see each caller's documentation)
			
 
				+///
			
 
				+/// # Performance Notes
			
 
				+///
			
 
				+/// This function runs callers **sequentially**, not in parallel. For parallel execution,
			
 
				+/// invoke callers individually using separate processes or jobs.
			
 
				+///
			
 
				+/// Typical runtime for whole-genome sequencing (30x coverage):
			
 
				+/// - DeepVariant: 2-4 hours (per sample, chunked)
			
 
				+/// - ClairS: 4-6 hours (chunked)
			
 
				+/// - Severus: 1-2 hours
			
 
				+/// - Savana: 2-3 hours
			
 
				+/// - NanomonSV: 1-2 hours
			
 
				+/// - DeepSomatic: 3-5 hours (chunked)
			
 
				+///
			
 
				+/// Total: ~15-25 hours sequential execution
			
 
				+///
			
 
				+/// # Example
			
 
				+///
			
 
				+/// ```ignore
			
 
				+/// use pandora_lib_promethion::callers::run_somatic_callers;
			
 
				+/// use pandora_lib_promethion::config::Config;
			
 
				+///
			
 
				+/// let config = Config::default();
			
 
				+/// run_somatic_callers("sample_001", &config)?;
			
 
				+///
			
 
				+/// println!("All somatic callers completed successfully!");
			
 
				+/// # Ok::<(), anyhow::Error>(())
			
 
				+/// ```
			
 
				 pub fn run_somatic_callers(id: &str, config: &Config) -> anyhow::Result<()> {
			
 
				-    // DeepVariant
			
 
				+    // DeepVariant - germline variants for normal sample
			
 
				     DeepVariant::initialize(id, &config.normal_name, config)?.run()?;
			
 
				+
			
 
				+    // DeepVariant - germline variants for tumor sample
			
 
				     DeepVariant::initialize(id, &config.tumoral_name, config)?.run()?;
			
 
				-    // ClairS
			
 
				+
			
 
				+    // ClairS - somatic SNV/indels with haplotype awareness
			
 
				     ClairS::initialize(id, config)?.run()?;
			
 
				-    // Severus
			
 
				+
			
 
				+    // Severus - structural variants and VNTRs
			
 
				     Severus::initialize(id, config)?.run()?;
			
 
				-    // Savana
			
 
				+
			
 
				+    // Savana - haplotype-aware SVs and CNVs
			
 
				     Savana::initialize(id, config)?.run()?;
			
 
				-    // Savana
			
 
				+
			
 
				+    // NanomonSV - structural variants (paired analysis)
			
 
				     NanomonSV::initialize(id, config)?.run()?;
			
 
				-    // DeepSomatic
			
 
				+
			
 
				+    // DeepSomatic - somatic SNV/indels
			
 
				     DeepSomatic::initialize(id, config)?.run()?;
			
 
				+
			
 
				     Ok(())
			
 
				 }
			
 
				 
			
--- a/src/callers/nanomonsv.rs
+++ b/src/callers/nanomonsv.rs
@@ -157,6 +157,12 @@ impl Initialize for NanomonSV {
 
				     ///
			
 
				     /// # Returns
			
 
				     /// A fully prepared `NanomonSV` instance ready to run.
			
 
				+    ///
			
 
				+    /// # Errors
			
 
				+    ///
			
 
				+    /// Returns an error if:
			
 
				+    /// - `config.nanomonsv_force` is true and output directories cannot be removed
			
 
				+    /// - Directory deletion fails due to permissions or I/O errors
			
 
				     fn initialize(id: &str, config: &Config) -> anyhow::Result<Self> {
			
 
				         let id = id.to_string();
			
 
				         info!("Initialize Nanomonsv for {id}.");
			
@@ -214,10 +220,22 @@ impl Run for NanomonSV {
 
				     /// 4. Applies `bcftools` to filter the final result to PASS variants
			
 
				     ///
			
 
				     /// This function is idempotent and skips steps if the expected output already exists.
			
 
				-    /// Runs the full NanomonSV pipeline including:
			
 
				-    /// 1. Parsing diagnostic and MRD BAMs in parallel.
			
 
				-    /// 2. Running NanomonSV `get` for both samples.
			
 
				-    /// 3. Filtering final VCF to retain PASS variants only.
			
 
				+    ///
			
 
				+    /// # Returns
			
 
				+    ///
			
 
				+    /// `Ok(())` on success, or an error if any pipeline step fails.
			
 
				+    ///
			
 
				+    /// # Errors
			
 
				+    ///
			
 
				+    /// Returns an error if:
			
 
				+    /// - NanomonSV is already up-to-date (`should_run()` returns false)
			
 
				+    /// - Output directories cannot be created
			
 
				+    /// - Tumor or normal BAM files are missing or corrupted
			
 
				+    /// - NanomonSV `parse` step fails for either sample
			
 
				+    /// - NanomonSV `get` step fails
			
 
				+    /// - PASS filtering via bcftools fails
			
 
				+    /// - Log files cannot be written
			
 
				+    /// - Reference genome or control panel files are missing
			
 
				     fn run(&mut self) -> anyhow::Result<()> {
			
 
				         if !self.should_run() {
			
 
				             anyhow::bail!("NanomonSV is up-to-data.");
			
--- a/src/callers/savana.rs
+++ b/src/callers/savana.rs
@@ -141,6 +141,12 @@ impl Initialize for Savana {
 
				     /// # Returns
			
 
				     ///
			
 
				     /// A new `Savana` instance, or an error if cleanup fails.
			
 
				+    ///
			
 
				+    /// # Errors
			
 
				+    ///
			
 
				+    /// Returns an error if:
			
 
				+    /// - `config.savana_force` is true and output directory cannot be removed
			
 
				+    /// - Directory deletion fails due to permissions or I/O errors
			
 
				     fn initialize(id: &str, config: &Config) -> anyhow::Result<Self> {
			
 
				         info!("Initialize Savana for {id}.");
			
 
				         let log_dir = format!("{}/{}/log/savana", config.result_dir, id);
			
--- a/src/callers/severus.rs
+++ b/src/callers/severus.rs
@@ -134,6 +134,12 @@ impl Initialize for Severus {
 
				     /// # Returns
			
 
				     ///
			
 
				     /// A `Severus` instance wrapped in `Ok`, or an error if setup fails
			
 
				+    ///
			
 
				+    /// # Errors
			
 
				+    ///
			
 
				+    /// Returns an error if:
			
 
				+    /// - `config.severus_force` is true and output directory cannot be removed
			
 
				+    /// - Directory deletion fails due to permissions or I/O errors
			
 
				     fn initialize(id: &str, config: &Config) -> anyhow::Result<Self> {
			
 
				         info!("Initialize Severus for {id}.");
			
 
				 
			
@@ -180,6 +186,18 @@ impl Run for Severus {
 
				     /// # Returns
			
 
				     ///
			
 
				     /// `Ok(())` if everything runs successfully; otherwise, an error with context.
			
 
				+    ///
			
 
				+    /// # Errors
			
 
				+    ///
			
 
				+    /// Returns an error if:
			
 
				+    /// - Severus is already up-to-date (`should_run()` returns false)
			
 
				+    /// - Phased germline VCF is missing and LongPhase fails to generate it
			
 
				+    /// - Output directory cannot be created
			
 
				+    /// - Tumor or normal BAM files are missing or corrupted
			
 
				+    /// - VNTR BED file is missing or malformed
			
 
				+    /// - Severus execution fails
			
 
				+    /// - PASS filtering via bcftools fails
			
 
				+    /// - Log files cannot be written
			
 
				     fn run(&mut self) -> anyhow::Result<()> {
			
 
				         if !self.should_run() {
			
 
				             anyhow::bail!("Severus is up-to-date");
			
--- a/src/callers/straglr.rs
+++ b/src/callers/straglr.rs
@@ -0,0 +1,867 @@
 
				+//! # Straglr Short Tandem Repeat Genotyper
			
 
				+//!
			
 
				+//! This module provides wrappers for [Straglr](https://github.com/bcgsc/straglr),
			
 
				+//! a genotyper for short tandem repeats (STRs) optimized for long-read sequencing data.
			
 
				+//!
			
 
				+//! ## Overview
			
 
				+//!
			
 
				+//! Straglr detects and genotypes STR expansions from long-read data, including:
			
 
				+//! - Pathogenic repeat expansions (Huntington's disease, SCAs, FXS, etc.)
			
 
				+//! - Genome-wide STR profiling
			
 
				+//! - De novo repeat expansion detection
			
 
				+//! - Support for both known and novel STR loci
			
 
				+//!
			
 
				+//! ## Key Features
			
 
				+//!
			
 
				+//! - **Pathogenic repeat detection** - Identifies disease-causing STR expansions
			
 
				+//! - **Long-read optimized** - Leverages full-length reads spanning repeat regions
			
 
				+//! - **Locus annotation** - Uses BED file of known pathogenic loci
			
 
				+//! - **VCF output** - Optional variant-style output for downstream analysis
			
 
				+//! - **Solo and paired modes** - Single-sample or tumor-normal analysis
			
 
				+//!
			
 
				+//! ## Requirements
			
 
				+//!
			
 
				+//! Before running Straglr, ensure:
			
 
				+//! - BAM file is indexed (`.bai` file present)
			
 
				+//! - Reference genome is accessible
			
 
				+//! - STR loci BED file is configured (`config.straglr_loci_bed`)
			
 
				+//! - Python environment with Straglr is available
			
 
				+//!
			
 
				+//! ## Output Files
			
 
				+//!
			
 
				+//! Paired mode TSV output:
			
 
				+//! ```text
			
 
				+//! {result_dir}/{id}/straglr/{id}_straglr.tsv
			
 
				+//! ```
			
 
				+//!
			
 
				+//! Solo mode TSV output:
			
 
				+//! ```text
			
 
				+//! {result_dir}/{id}/straglr_solo/{time_point}/{id}_{time_point}_straglr.tsv
			
 
				+//! ```
			
 
				+//!
			
 
				+//! ## Usage
			
 
				+//!
			
 
				+//! ### Paired (Tumor-Normal) Mode
			
 
				+//!
			
 
				+//! ```ignore
			
 
				+//! use pandora_lib_promethion::callers::straglr::Straglr;
			
 
				+//! use pandora_lib_promethion::config::Config;
			
 
				+//! use pandora_lib_promethion::pipes::Initialize;
			
 
				+//! use pandora_lib_promethion::runners::Run;
			
 
				+//!
			
 
				+//! let config = Config::default();
			
 
				+//! let mut caller = Straglr::initialize("sample_001", &config)?;
			
 
				+//!
			
 
				+//! if caller.should_run() {
			
 
				+//!     caller.run()?;
			
 
				+//! }
			
 
				+//! # Ok::<(), anyhow::Error>(())
			
 
				+//! ```
			
 
				+//!
			
 
				+//! ### Solo Mode
			
 
				+//!
			
 
				+//! ```ignore
			
 
				+//! use pandora_lib_promethion::callers::straglr::StraglrSolo;
			
 
				+//! use pandora_lib_promethion::pipes::InitializeSolo;
			
 
				+//!
			
 
				+//! let config = Config::default();
			
 
				+//! let mut caller = StraglrSolo::initialize("sample_001", "norm", &config)?;
			
 
				+//! caller.run()?;
			
 
				+//! # Ok::<(), anyhow::Error>(())
			
 
				+//! ```
			
 
				+//!
			
 
				+//! ### Chunked Parallel Mode (Genome-Wide)
			
 
				+//!
			
 
				+//! For whole-genome STR genotyping, use the chunked execution mode to parallelize:
			
 
				+//!
			
 
				+//! ```ignore
			
 
				+//! use pandora_lib_promethion::callers::straglr::run_straglr_chunked;
			
 
				+//! use pandora_lib_promethion::config::Config;
			
 
				+//!
			
 
				+//! let config = Config::default();
			
 
				+//!
			
 
				+//! // Run genome-wide genotyping with 20 parallel jobs
			
 
				+//! run_straglr_chunked("sample_001", "norm", &config, 20)?;
			
 
				+//! # Ok::<(), anyhow::Error>(())
			
 
				+//! ```
			
 
				+//!
			
 
				+//! ### Loading and Analyzing Results
			
 
				+//!
			
 
				+//! ```ignore
			
 
				+//! use pandora_lib_promethion::callers::straglr::Straglr;
			
 
				+//! use pandora_lib_promethion::pipes::Initialize;
			
 
				+//!
			
 
				+//! let config = Config::default();
			
 
				+//! let caller = Straglr::initialize("sample_001", &config)?;
			
 
				+//!
			
 
				+//! // Load results from both samples
			
 
				+//! let (normal, tumor) = caller.load_results()?;
			
 
				+//!
			
 
				+//! // Find pathogenic expansions (e.g., >40 repeats)
			
 
				+//! for str_locus in &normal {
			
 
				+//!     if str_locus.is_expanded(40) {
			
 
				+//!         println!("Expanded repeat at {}: {} copies ({})",
			
 
				+//!                  str_locus.location_string(),
			
 
				+//!                  str_locus.max_copy_number().unwrap(),
			
 
				+//!                  str_locus.repeat_unit);
			
 
				+//!     }
			
 
				+//! }
			
 
				+//!
			
 
				+//! // Find somatic STR changes
			
 
				+//! let changes = caller.find_somatic_changes(2)?;
			
 
				+//! for (location, normal, tumor, diff) in changes {
			
 
				+//!     println!("{}: Normal={:?}, Tumor={:?}, Diff={}",
			
 
				+//!              location, normal.copy_numbers, tumor.copy_numbers, diff);
			
 
				+//! }
			
 
				+//! # Ok::<(), anyhow::Error>(())
			
 
				+//! ```
			
 
				+//!
			
 
				+//! ## References
			
 
				+//!
			
 
				+//! - [Straglr GitHub](https://github.com/bcgsc/straglr)
			
 
				+//! - [Straglr Paper](https://doi.org/10.1186/s13059-021-02447-3)
			
 
				+use crate::{
			
 
				+    commands::{Command as JobCommand, LocalBatchRunner, LocalRunner, SbatchRunner, SlurmParams, SlurmRunner},
			
 
				+    config::Config,
			
 
				+    helpers::{is_file_older, remove_dir_if_exists},
			
 
				+    io::straglr::{read_straglr_tsv, StraglrRow},
			
 
				+    pipes::{Initialize, InitializeSolo, ShouldRun, Version},
			
 
				+    run, run_many,
			
 
				+    runners::Run,
			
 
				+};
			
 
				+use anyhow::Context;
			
 
				+use log::{debug, info};
			
 
				+use std::{
			
 
				+    fs::{self, File},
			
 
				+    io::{BufRead, BufReader, Write},
			
 
				+    path::Path,
			
 
				+};
			
 
				+
			
 
				+/// Straglr paired (tumor-normal) STR genotyper.
			
 
				+///
			
 
				+/// Executes Straglr for STR genotyping on both tumor and normal samples,
			
 
				+/// enabling detection of somatic STR expansions or contractions.
			
 
				+///
			
 
				+/// # Fields
			
 
				+///
			
 
				+/// - `id` - Sample identifier (e.g., "34528")
			
 
				+/// - `config` - Global pipeline configuration
			
 
				+/// - `log_dir` - Directory for execution logs (e.g., "{result_dir}/{id}/log/straglr")
			
 
				+#[derive(Debug)]
			
 
				+pub struct Straglr {
			
 
				+    /// Sample identifier
			
 
				+    pub id: String,
			
 
				+    /// Global pipeline configuration
			
 
				+    pub config: Config,
			
 
				+    /// Directory for log file storage
			
 
				+    pub log_dir: String,
			
 
				+}
			
 
				+
			
 
				+impl Initialize for Straglr {
			
 
				+    /// Initializes a new Straglr instance for a given sample ID and configuration.
			
 
				+    ///
			
 
				+    /// Creates the output log directory path and optionally cleans up previous output files
			
 
				+    /// if `straglr_force` is set.
			
 
				+    ///
			
 
				+    /// # Arguments
			
 
				+    ///
			
 
				+    /// * `id` - The sample ID
			
 
				+    /// * `config` - The execution configuration
			
 
				+    ///
			
 
				+    /// # Returns
			
 
				+    ///
			
 
				+    /// A `Straglr` instance wrapped in `Ok`, or an error if setup fails
			
 
				+    ///
			
 
				+    /// # Errors
			
 
				+    ///
			
 
				+    /// Returns an error if:
			
 
				+    /// - `config.straglr_force` is true and output directory cannot be removed
			
 
				+    /// - Directory deletion fails due to permissions or I/O errors
			
 
				+    fn initialize(id: &str, config: &Config) -> anyhow::Result<Self> {
			
 
				+        info!("Initialize Straglr for {id}.");
			
 
				+
			
 
				+        let log_dir = format!("{}/{}/log/straglr", config.result_dir, id);
			
 
				+        let straglr = Self {
			
 
				+            id: id.to_string(),
			
 
				+            config: config.clone(),
			
 
				+            log_dir,
			
 
				+        };
			
 
				+
			
 
				+        if straglr.config.straglr_force {
			
 
				+            remove_dir_if_exists(&straglr.config.straglr_output_dir(id))?;
			
 
				+        }
			
 
				+
			
 
				+        Ok(straglr)
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl ShouldRun for Straglr {
			
 
				+    /// Determines whether Straglr should re-run based on whether the output TSV
			
 
				+    /// is older than either the tumor or normal BAM file.
			
 
				+    ///
			
 
				+    /// # Returns
			
 
				+    ///
			
 
				+    /// `true` if Straglr needs to be re-run, otherwise `false`
			
 
				+    fn should_run(&self) -> bool {
			
 
				+        let normal_tsv = &self.config.straglr_normal_tsv(&self.id);
			
 
				+        let tumor_tsv = &self.config.straglr_tumor_tsv(&self.id);
			
 
				+
			
 
				+        let result = is_file_older(normal_tsv, &self.config.normal_bam(&self.id), true)
			
 
				+            .unwrap_or(true)
			
 
				+            || is_file_older(normal_tsv, &self.config.tumoral_bam(&self.id), true).unwrap_or(true)
			
 
				+            || is_file_older(tumor_tsv, &self.config.normal_bam(&self.id), true).unwrap_or(true)
			
 
				+            || is_file_older(tumor_tsv, &self.config.tumoral_bam(&self.id), true).unwrap_or(true);
			
 
				+
			
 
				+        if result {
			
 
				+            info!("Straglr should run for: {}.", self.id);
			
 
				+        }
			
 
				+        result
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl Run for Straglr {
			
 
				+    /// Runs the Straglr STR genotyper on both normal and tumor BAM files.
			
 
				+    ///
			
 
				+    /// Executes Straglr separately for normal and tumor samples, producing
			
 
				+    /// TSV files with STR genotypes for each.
			
 
				+    ///
			
 
				+    /// # Returns
			
 
				+    ///
			
 
				+    /// `Ok(())` if everything runs successfully; otherwise, an error with context.
			
 
				+    ///
			
 
				+    /// # Errors
			
 
				+    ///
			
 
				+    /// Returns an error if:
			
 
				+    /// - Straglr is already up-to-date (`should_run()` returns false)
			
 
				+    /// - Output directory cannot be created
			
 
				+    /// - Tumor or normal BAM files are missing or corrupted
			
 
				+    /// - Reference genome is missing
			
 
				+    /// - STR loci BED file is missing or malformed
			
 
				+    /// - Straglr execution fails for either sample
			
 
				+    /// - Log files cannot be written
			
 
				+    fn run(&mut self) -> anyhow::Result<()> {
			
 
				+        if !self.should_run() {
			
 
				+            anyhow::bail!("Straglr is up-to-date");
			
 
				+        }
			
 
				+
			
 
				+        info!("Running Straglr v{}", Straglr::version(&self.config)?);
			
 
				+
			
 
				+        let id = &self.id;
			
 
				+        let output_dir = self.config.straglr_output_dir(id);
			
 
				+        fs::create_dir_all(&output_dir).context("Failed to create Straglr output directory")?;
			
 
				+
			
 
				+        // Run on normal sample
			
 
				+        let normal_tsv = self.config.straglr_normal_tsv(id);
			
 
				+        if !Path::new(&normal_tsv).exists() {
			
 
				+            info!("Running Straglr on normal sample: {}", id);
			
 
				+            let mut job = StraglrJob {
			
 
				+                conda_sh: self.config.conda_sh.clone(),
			
 
				+                straglr_bin: self.config.straglr_bin.clone(),
			
 
				+                bam: self.config.normal_bam(id),
			
 
				+                reference: self.config.reference.clone(),
			
 
				+                loci_bed: self.config.straglr_loci_bed.clone(),
			
 
				+                output_prefix: format!("{}/{}_normal", output_dir, id),
			
 
				+                min_support: self.config.straglr_min_support,
			
 
				+                min_cluster_size: self.config.straglr_min_cluster_size,
			
 
				+                genotype_in_size: self.config.straglr_genotype_in_size,
			
 
				+            };
			
 
				+
			
 
				+            let output = run!(&self.config, &mut job)
			
 
				+                .context("Error while running Straglr on normal sample")?;
			
 
				+
			
 
				+            let log_file = format!("{}/straglr_normal_", self.log_dir);
			
 
				+            output
			
 
				+                .save_to_file(&log_file)
			
 
				+                .context(format!("Error while writing Straglr logs into {log_file}"))?;
			
 
				+        } else {
			
 
				+            debug!(
			
 
				+                "Straglr normal TSV already exists for {}, skipping execution.",
			
 
				+                self.id
			
 
				+            );
			
 
				+        }
			
 
				+
			
 
				+        // Run on tumor sample
			
 
				+        let tumor_tsv = self.config.straglr_tumor_tsv(id);
			
 
				+        if !Path::new(&tumor_tsv).exists() {
			
 
				+            info!("Running Straglr on tumor sample: {}", id);
			
 
				+            let mut job = StraglrJob {
			
 
				+                conda_sh: self.config.conda_sh.clone(),
			
 
				+                straglr_bin: self.config.straglr_bin.clone(),
			
 
				+                bam: self.config.tumoral_bam(id),
			
 
				+                reference: self.config.reference.clone(),
			
 
				+                loci_bed: self.config.straglr_loci_bed.clone(),
			
 
				+                output_prefix: format!("{}/{}_tumor", output_dir, id),
			
 
				+                min_support: self.config.straglr_min_support,
			
 
				+                min_cluster_size: self.config.straglr_min_cluster_size,
			
 
				+                genotype_in_size: self.config.straglr_genotype_in_size,
			
 
				+            };
			
 
				+
			
 
				+            let output = run!(&self.config, &mut job)
			
 
				+                .context("Error while running Straglr on tumor sample")?;
			
 
				+
			
 
				+            let log_file = format!("{}/straglr_tumor_", self.log_dir);
			
 
				+            output
			
 
				+                .save_to_file(&log_file)
			
 
				+                .context(format!("Error while writing Straglr logs into {log_file}"))?;
			
 
				+        } else {
			
 
				+            debug!(
			
 
				+                "Straglr tumor TSV already exists for {}, skipping execution.",
			
 
				+                self.id
			
 
				+            );
			
 
				+        }
			
 
				+
			
 
				+        Ok(())
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl Straglr {
			
 
				+    /// Loads and parses the normal sample Straglr TSV results.
			
 
				+    ///
			
 
				+    /// # Returns
			
 
				+    /// Vector of STR loci from the normal sample
			
 
				+    ///
			
 
				+    /// # Errors
			
 
				+    /// Returns an error if the TSV file cannot be read or parsed.
			
 
				+    pub fn load_normal_results(&self) -> anyhow::Result<Vec<StraglrRow>> {
			
 
				+        let tsv_path = self.config.straglr_normal_tsv(&self.id);
			
 
				+        read_straglr_tsv(&tsv_path)
			
 
				+            .context(format!("Failed to read normal Straglr results from {}", tsv_path))
			
 
				+    }
			
 
				+
			
 
				+    /// Loads and parses the tumor sample Straglr TSV results.
			
 
				+    ///
			
 
				+    /// # Returns
			
 
				+    /// Vector of STR loci from the tumor sample
			
 
				+    ///
			
 
				+    /// # Errors
			
 
				+    /// Returns an error if the TSV file cannot be read or parsed.
			
 
				+    pub fn load_tumor_results(&self) -> anyhow::Result<Vec<StraglrRow>> {
			
 
				+        let tsv_path = self.config.straglr_tumor_tsv(&self.id);
			
 
				+        read_straglr_tsv(&tsv_path)
			
 
				+            .context(format!("Failed to read tumor Straglr results from {}", tsv_path))
			
 
				+    }
			
 
				+
			
 
				+    /// Loads both normal and tumor results as a tuple.
			
 
				+    ///
			
 
				+    /// # Returns
			
 
				+    /// `(normal_results, tumor_results)` tuple
			
 
				+    ///
			
 
				+    /// # Errors
			
 
				+    /// Returns an error if either TSV file cannot be read or parsed.
			
 
				+    pub fn load_results(&self) -> anyhow::Result<(Vec<StraglrRow>, Vec<StraglrRow>)> {
			
 
				+        Ok((self.load_normal_results()?, self.load_tumor_results()?))
			
 
				+    }
			
 
				+
			
 
				+    /// Finds STR loci that differ between tumor and normal samples.
			
 
				+    ///
			
 
				+    /// Compares copy numbers at matching loci to identify somatic STR changes.
			
 
				+    ///
			
 
				+    /// # Arguments
			
 
				+    /// * `min_difference` - Minimum copy number difference to report (default: 2)
			
 
				+    ///
			
 
				+    /// # Returns
			
 
				+    /// Vector of tuples: `(locus_id, normal_row, tumor_row, copy_number_diff)`
			
 
				+    ///
			
 
				+    /// # Errors
			
 
				+    /// Returns an error if results cannot be loaded.
			
 
				+    pub fn find_somatic_changes(
			
 
				+        &self,
			
 
				+        min_difference: u32,
			
 
				+    ) -> anyhow::Result<Vec<(String, StraglrRow, StraglrRow, i64)>> {
			
 
				+        let (normal, tumor) = self.load_results()?;
			
 
				+
			
 
				+        let mut changes = Vec::new();
			
 
				+
			
 
				+        for normal_row in &normal {
			
 
				+            let location = normal_row.location_string();
			
 
				+
			
 
				+            // Find matching locus in tumor
			
 
				+            if let Some(tumor_row) = tumor.iter().find(|t| {
			
 
				+                t.chrom == normal_row.chrom && t.start == normal_row.start && t.end == normal_row.end
			
 
				+            }) {
			
 
				+                // Compare max copy numbers
			
 
				+                if let (Some(normal_cn), Some(tumor_cn)) =
			
 
				+                    (normal_row.max_copy_number(), tumor_row.max_copy_number())
			
 
				+                {
			
 
				+                    let diff = tumor_cn as i64 - normal_cn as i64;
			
 
				+                    if diff.abs() >= min_difference as i64 {
			
 
				+                        changes.push((location, normal_row.clone(), tumor_row.clone(), diff));
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        Ok(changes)
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+#[derive(Debug, Clone)]
			
 
				+struct StraglrJob {
			
 
				+    conda_sh: String,
			
 
				+    straglr_bin: String,
			
 
				+    bam: String,
			
 
				+    reference: String,
			
 
				+    loci_bed: String,
			
 
				+    output_prefix: String,
			
 
				+    min_support: u32,
			
 
				+    min_cluster_size: u32,
			
 
				+    genotype_in_size: bool,
			
 
				+}
			
 
				+
			
 
				+impl JobCommand for StraglrJob {
			
 
				+    fn cmd(&self) -> String {
			
 
				+        let mut cmd = format!(
			
 
				+            "source {conda_sh} && conda activate straglr_env && {straglr} {bam} {reference} --loci {loci} --min_support {min_sup} --min_cluster_size {min_clust} --output {output}",
			
 
				+            conda_sh = self.conda_sh,
			
 
				+            straglr = self.straglr_bin,
			
 
				+            bam = self.bam,
			
 
				+            reference = self.reference,
			
 
				+            loci = self.loci_bed,
			
 
				+            min_sup = self.min_support,
			
 
				+            min_clust = self.min_cluster_size,
			
 
				+            output = self.output_prefix
			
 
				+        );
			
 
				+
			
 
				+        if self.genotype_in_size {
			
 
				+            cmd.push_str(" --genotype_in_size");
			
 
				+        }
			
 
				+
			
 
				+        cmd
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl LocalRunner for StraglrJob {}
			
 
				+
			
 
				+impl LocalBatchRunner for StraglrJob {}
			
 
				+
			
 
				+impl SlurmRunner for StraglrJob {
			
 
				+    fn slurm_args(&self) -> Vec<String> {
			
 
				+        SlurmParams {
			
 
				+            job_name: Some("straglr".into()),
			
 
				+            partition: Some("shortq".into()),
			
 
				+            cpus_per_task: Some(4),
			
 
				+            mem: Some("16G".into()),
			
 
				+            gres: None,
			
 
				+        }
			
 
				+        .to_args()
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl SbatchRunner for StraglrJob {
			
 
				+    fn slurm_params(&self) -> SlurmParams {
			
 
				+        SlurmParams {
			
 
				+            job_name: Some("straglr".into()),
			
 
				+            partition: Some("shortq".into()),
			
 
				+            cpus_per_task: Some(4),
			
 
				+            mem: Some("16G".into()),
			
 
				+            gres: None,
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl Version for Straglr {
			
 
				+    /// Retrieves the Straglr version by running `straglr --version`.
			
 
				+    ///
			
 
				+    /// # Errors
			
 
				+    /// Returns an error if command execution fails or version parsing fails.
			
 
				+    fn version(config: &Config) -> anyhow::Result<String> {
			
 
				+        // Override cmd for version check
			
 
				+        struct VersionJob {
			
 
				+            conda_sh: String,
			
 
				+            straglr_bin: String,
			
 
				+        }
			
 
				+
			
 
				+        impl JobCommand for VersionJob {
			
 
				+            fn cmd(&self) -> String {
			
 
				+                format!(
			
 
				+                    "source {} && conda activate straglr_env && {} --version",
			
 
				+                    self.conda_sh, self.straglr_bin
			
 
				+                )
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        impl LocalRunner for VersionJob {}
			
 
				+
			
 
				+        impl SlurmRunner for VersionJob {
			
 
				+            fn slurm_args(&self) -> Vec<String> {
			
 
				+                SlurmParams {
			
 
				+                    job_name: Some("straglr_version".into()),
			
 
				+                    partition: Some("shortq".into()),
			
 
				+                    cpus_per_task: Some(1),
			
 
				+                    mem: Some("1G".into()),
			
 
				+                    gres: None,
			
 
				+                }
			
 
				+                .to_args()
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        let mut version_job = VersionJob {
			
 
				+            conda_sh: config.conda_sh.clone(),
			
 
				+            straglr_bin: config.straglr_bin.clone(),
			
 
				+        };
			
 
				+
			
 
				+        let out =
			
 
				+            run!(&config, &mut version_job).context("Error while running `straglr --version`")?;
			
 
				+
			
 
				+        let combined = format!("{}{}", out.stdout, out.stderr);
			
 
				+        let v = combined
			
 
				+            .lines()
			
 
				+            .find(|line| line.contains("straglr") || line.contains("version"))
			
 
				+            .map(|line| line.trim().to_string())
			
 
				+            .ok_or_else(|| anyhow::anyhow!("Could not parse straglr version from output"))?;
			
 
				+
			
 
				+        Ok(v)
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/// Straglr solo (single-sample) STR genotyper.
			
 
				+///
			
 
				+/// Executes Straglr for STR genotyping on a single BAM file.
			
 
				+/// Useful for germline STR analysis or when no matched normal is available.
			
 
				+///
			
 
				+/// # Fields
			
 
				+///
			
 
				+/// - `id` - Sample identifier (e.g., "34528")
			
 
				+/// - `time` - Time point label: typically `config.normal_name` ("norm") or `config.tumoral_name` ("diag")
			
 
				+/// - `config` - Global pipeline configuration
			
 
				+/// - `log_dir` - Directory for execution logs (e.g., "{result_dir}/{id}/log/straglr_solo")
			
 
				+#[derive(Debug)]
			
 
				+pub struct StraglrSolo {
			
 
				+    /// Sample identifier
			
 
				+    pub id: String,
			
 
				+    /// Time point identifier (e.g., "norm" or "diag")
			
 
				+    pub time: String,
			
 
				+    /// Global pipeline configuration
			
 
				+    pub config: Config,
			
 
				+    /// Directory for log file storage
			
 
				+    pub log_dir: String,
			
 
				+}
			
 
				+
			
 
				+impl InitializeSolo for StraglrSolo {
			
 
				+    /// Initializes Straglr solo analysis for a sample at a specific time point.
			
 
				+    ///
			
 
				+    /// Creates necessary log directory.
			
 
				+    ///
			
 
				+    /// # Errors
			
 
				+    /// Returns an error if directory creation fails.
			
 
				+    fn initialize(id: &str, time: &str, config: &Config) -> anyhow::Result<Self> {
			
 
				+        let log_dir = format!("{}/{}/log/straglr_solo", config.result_dir, id);
			
 
				+        if !Path::new(&log_dir).exists() {
			
 
				+            fs::create_dir_all(&log_dir)
			
 
				+                .context(format!("Failed to create {log_dir} directory"))?;
			
 
				+        }
			
 
				+
			
 
				+        Ok(StraglrSolo {
			
 
				+            id: id.to_string(),
			
 
				+            time: time.to_string(),
			
 
				+            config: config.clone(),
			
 
				+            log_dir,
			
 
				+        })
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl Run for StraglrSolo {
			
 
				+    /// Runs the Straglr pipeline for a single sample.
			
 
				+    ///
			
 
				+    /// Skips if output file already exists.
			
 
				+    ///
			
 
				+    /// # Errors
			
 
				+    /// Returns an error if Straglr execution or log writing fails.
			
 
				+    fn run(&mut self) -> anyhow::Result<()> {
			
 
				+        let id = &self.id;
			
 
				+        let time = &self.time;
			
 
				+
			
 
				+        let output_tsv = &self.config.straglr_solo_tsv(id, time);
			
 
				+
			
 
				+        if !Path::new(output_tsv).exists() {
			
 
				+            let output_dir = self.config.straglr_solo_output_dir(id, time);
			
 
				+            fs::create_dir_all(&output_dir)
			
 
				+                .context("Failed to create Straglr solo output directory")?;
			
 
				+
			
 
				+            let mut job = StraglrJob {
			
 
				+                conda_sh: self.config.conda_sh.clone(),
			
 
				+                straglr_bin: self.config.straglr_bin.clone(),
			
 
				+                bam: self.config.solo_bam(id, time),
			
 
				+                reference: self.config.reference.clone(),
			
 
				+                loci_bed: self.config.straglr_loci_bed.clone(),
			
 
				+                output_prefix: format!("{}/{}_{}", output_dir, id, time),
			
 
				+                min_support: self.config.straglr_min_support,
			
 
				+                min_cluster_size: self.config.straglr_min_cluster_size,
			
 
				+                genotype_in_size: self.config.straglr_genotype_in_size,
			
 
				+            };
			
 
				+
			
 
				+            let report =
			
 
				+                run!(&self.config, &mut job).context("Error while running straglr solo")?;
			
 
				+
			
 
				+            let log_file = format!("{}/straglr_", self.log_dir);
			
 
				+            report
			
 
				+                .save_to_file(&log_file)
			
 
				+                .context(format!("Error while writing logs into {log_file}"))?;
			
 
				+        } else {
			
 
				+            debug!("Straglr output TSV already exists.");
			
 
				+        }
			
 
				+
			
 
				+        Ok(())
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl StraglrSolo {
			
 
				+    /// Loads and parses the Straglr TSV results for this solo sample.
			
 
				+    ///
			
 
				+    /// # Returns
			
 
				+    /// Vector of STR loci from the solo sample
			
 
				+    ///
			
 
				+    /// # Errors
			
 
				+    /// Returns an error if the TSV file cannot be read or parsed.
			
 
				+    pub fn load_results(&self) -> anyhow::Result<Vec<StraglrRow>> {
			
 
				+        let tsv_path = self.config.straglr_solo_tsv(&self.id, &self.time);
			
 
				+        read_straglr_tsv(&tsv_path)
			
 
				+            .context(format!("Failed to read Straglr results from {}", tsv_path))
			
 
				+    }
			
 
				+
			
 
				+    /// Filters results to show only expanded repeats above a threshold.
			
 
				+    ///
			
 
				+    /// # Arguments
			
 
				+    /// * `min_copy_number` - Minimum copy number threshold
			
 
				+    ///
			
 
				+    /// # Returns
			
 
				+    /// Vector of STR loci with copy numbers >= threshold
			
 
				+    ///
			
 
				+    /// # Errors
			
 
				+    /// Returns an error if results cannot be loaded.
			
 
				+    pub fn load_expanded_repeats(&self, min_copy_number: u32) -> anyhow::Result<Vec<StraglrRow>> {
			
 
				+        let results = self.load_results()?;
			
 
				+        Ok(results
			
 
				+            .into_iter()
			
 
				+            .filter(|row| row.is_expanded(min_copy_number))
			
 
				+            .collect())
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/// Runs Straglr in parallel chunks for genome-wide STR genotyping.
			
 
				+///
			
 
				+/// Splits the genome into `n_parts` regions, creates temporary BED files for each region,
			
 
				+/// runs Straglr in parallel, and merges the results into a single TSV file.
			
 
				+///
			
 
				+/// This function is designed for whole-genome STR genotyping where processing the entire
			
 
				+/// genome at once would be too slow. By splitting into chunks, multiple Straglr instances
			
 
				+/// can run in parallel.
			
 
				+///
			
 
				+/// # Arguments
			
 
				+///
			
 
				+/// * `id` - Sample identifier
			
 
				+/// * `time_point` - Time point label (e.g., "norm", "diag")
			
 
				+/// * `config` - Global pipeline configuration
			
 
				+/// * `n_parts` - Number of parallel chunks (will be adjusted if genome is smaller)
			
 
				+///
			
 
				+/// # Returns
			
 
				+///
			
 
				+/// `Ok(())` if all chunks complete successfully and merge succeeds
			
 
				+///
			
 
				+/// # Errors
			
 
				+///
			
 
				+/// Returns an error if:
			
 
				+/// - `n_parts` is 0
			
 
				+/// - BAM file cannot be opened or has no header
			
 
				+/// - Temporary BED files cannot be created
			
 
				+/// - Any Straglr chunk fails to execute
			
 
				+/// - TSV merging fails
			
 
				+/// - Final output file cannot be written
			
 
				+///
			
 
				+/// # Implementation Details
			
 
				+///
			
 
				+/// 1. Reads BAM header to determine genome sizes
			
 
				+/// 2. Splits genome into approximately equal-sized regions
			
 
				+/// 3. Creates temporary BED file for each region in `{tmp_dir}/straglr_chunk_{id}_{time}_{i}.bed`
			
 
				+/// 4. Runs Straglr in parallel via `run_many!` macro
			
 
				+/// 5. Concatenates all output TSV files (skipping headers from parts 2+)
			
 
				+/// 6. Removes temporary BED and partial TSV files
			
 
				+///
			
 
				+/// # Example
			
 
				+///
			
 
				+/// ```ignore
			
 
				+/// use pandora_lib_promethion::callers::straglr::run_straglr_chunked;
			
 
				+/// use pandora_lib_promethion::config::Config;
			
 
				+///
			
 
				+/// let config = Config::default();
			
 
				+///
			
 
				+/// // Run genome-wide STR genotyping with 10 parallel jobs
			
 
				+/// run_straglr_chunked("sample_001", "norm", &config, 10)?;
			
 
				+/// # Ok::<(), anyhow::Error>(())
			
 
				+/// ```
			
 
				+pub fn run_straglr_chunked(
			
 
				+    id: &str,
			
 
				+    time_point: &str,
			
 
				+    config: &Config,
			
 
				+    n_parts: usize,
			
 
				+) -> anyhow::Result<()> {
			
 
				+    anyhow::ensure!(n_parts > 0, "n_parts must be > 0");
			
 
				+
			
 
				+    info!(
			
 
				+        "Running Straglr in {} parallel chunks for {} {}",
			
 
				+        n_parts, id, time_point
			
 
				+    );
			
 
				+
			
 
				+    // Get genome sizes from BAM header
			
 
				+    let bam_path = config.solo_bam(id, time_point);
			
 
				+    let reader = bam::Reader::from_path(&bam_path)
			
 
				+        .with_context(|| format!("Failed to open BAM: {}", bam_path))?;
			
 
				+    let header = bam::Header::from_template(reader.header());
			
 
				+    let genome_sizes = get_genome_sizes(&header)?;
			
 
				+
			
 
				+    // Split genome into regions
			
 
				+    let region_chunks = split_genome_into_n_regions_exact(&genome_sizes, n_parts);
			
 
				+    let actual_n_parts = region_chunks.len();
			
 
				+
			
 
				+    info!(
			
 
				+        "Split genome into {} chunks for Straglr processing",
			
 
				+        actual_n_parts
			
 
				+    );
			
 
				+
			
 
				+    // Create output directory
			
 
				+    let output_dir = config.straglr_solo_output_dir(id, time_point);
			
 
				+    fs::create_dir_all(&output_dir)
			
 
				+        .context(format!("Failed to create output directory: {}", output_dir))?;
			
 
				+
			
 
				+    // Create temporary BED files and jobs
			
 
				+    let mut jobs = Vec::with_capacity(actual_n_parts);
			
 
				+    let mut temp_bed_files = Vec::with_capacity(actual_n_parts);
			
 
				+    let mut temp_tsv_files = Vec::with_capacity(actual_n_parts);
			
 
				+
			
 
				+    for (i, regions) in region_chunks.into_iter().enumerate() {
			
 
				+        let part_num = i + 1;
			
 
				+
			
 
				+        // Create temporary BED file for this chunk
			
 
				+        let bed_path = format!(
			
 
				+            "{}/straglr_chunk_{}_{}_part{}.bed",
			
 
				+            config.tmp_dir, id, time_point, part_num
			
 
				+        );
			
 
				+
			
 
				+        let mut bed_file = File::create(&bed_path)
			
 
				+            .context(format!("Failed to create temporary BED file: {}", bed_path))?;
			
 
				+
			
 
				+        // Write regions to BED file (format: chr\tstart\tend)
			
 
				+        for region_str in &regions {
			
 
				+            // Parse region format: "chr1:1000-2000" -> "chr1\t1000\t2000"
			
 
				+            if let Some((chr, range)) = region_str.split_once(':') {
			
 
				+                if let Some((start, end)) = range.split_once('-') {
			
 
				+                    writeln!(bed_file, "{}\t{}\t{}", chr, start, end)
			
 
				+                        .context("Failed to write to BED file")?;
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+        bed_file.flush().context("Failed to flush BED file")?;
			
 
				+
			
 
				+        temp_bed_files.push(bed_path.clone());
			
 
				+
			
 
				+        // Create job for this chunk
			
 
				+        let output_prefix = format!("{}/{}_{}_part{}", output_dir, id, time_point, part_num);
			
 
				+        let output_tsv = format!("{}_straglr.tsv", output_prefix);
			
 
				+        temp_tsv_files.push(output_tsv.clone());
			
 
				+
			
 
				+        let job = StraglrJob {
			
 
				+            conda_sh: config.conda_sh.clone(),
			
 
				+            straglr_bin: config.straglr_bin.clone(),
			
 
				+            bam: bam_path.clone(),
			
 
				+            reference: config.reference.clone(),
			
 
				+            loci_bed: bed_path, // Use the chunk BED file instead of global loci
			
 
				+            output_prefix,
			
 
				+            min_support: config.straglr_min_support,
			
 
				+            min_cluster_size: config.straglr_min_cluster_size,
			
 
				+            genotype_in_size: config.straglr_genotype_in_size,
			
 
				+        };
			
 
				+
			
 
				+        jobs.push(job);
			
 
				+    }
			
 
				+
			
 
				+    // Run all chunks in parallel
			
 
				+    info!("Executing {} Straglr jobs in parallel", actual_n_parts);
			
 
				+    let outputs = run_many!(config, jobs)?;
			
 
				+
			
 
				+    // Save logs
			
 
				+    let log_dir = format!("{}/{}/log/straglr_chunked", config.result_dir, id);
			
 
				+    fs::create_dir_all(&log_dir).context("Failed to create log directory")?;
			
 
				+
			
 
				+    for (i, output) in outputs.iter().enumerate() {
			
 
				+        let log_file = format!("{}/straglr_part{}_", log_dir, i + 1);
			
 
				+        output
			
 
				+            .save_to_file(&log_file)
			
 
				+            .context(format!("Failed to save logs for part {}", i + 1))?;
			
 
				+    }
			
 
				+
			
 
				+    // Merge TSV files
			
 
				+    info!("Merging {} TSV files", actual_n_parts);
			
 
				+    let final_tsv = config.straglr_solo_tsv(id, time_point);
			
 
				+    merge_tsv_files(&temp_tsv_files, &final_tsv)
			
 
				+        .context("Failed to merge Straglr TSV files")?;
			
 
				+
			
 
				+    // Clean up temporary files
			
 
				+    info!("Cleaning up temporary files");
			
 
				+    for bed_file in &temp_bed_files {
			
 
				+        if let Err(e) = fs::remove_file(bed_file) {
			
 
				+            debug!("Failed to remove temporary BED file {}: {}", bed_file, e);
			
 
				+        }
			
 
				+    }
			
 
				+    for tsv_file in &temp_tsv_files {
			
 
				+        if let Err(e) = fs::remove_file(tsv_file) {
			
 
				+            debug!("Failed to remove temporary TSV file {}: {}", tsv_file, e);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    info!(
			
 
				+        "Straglr chunked execution completed for {} {} (merged into {})",
			
 
				+        id, time_point, final_tsv
			
 
				+    );
			
 
				+
			
 
				+    Ok(())
			
 
				+}
			
 
				+
			
 
				+/// Merges multiple TSV files into a single output file.
			
 
				+///
			
 
				+/// Concatenates TSV files while preserving the header from the first file
			
 
				+/// and skipping headers from subsequent files.
			
 
				+///
			
 
				+/// # Arguments
			
 
				+///
			
 
				+/// * `input_files` - Paths to input TSV files
			
 
				+/// * `output_file` - Path to merged output TSV file
			
 
				+///
			
 
				+/// # Errors
			
 
				+///
			
 
				+/// Returns an error if any file cannot be read or the output cannot be written.
			
 
				+fn merge_tsv_files(input_files: &[String], output_file: &str) -> anyhow::Result<()> {
			
 
				+    let mut output = File::create(output_file)
			
 
				+        .context(format!("Failed to create output file: {}", output_file))?;
			
 
				+
			
 
				+    let mut first_file = true;
			
 
				+
			
 
				+    for (i, input_path) in input_files.iter().enumerate() {
			
 
				+        if !Path::new(input_path).exists() {
			
 
				+            debug!("Skipping non-existent file: {}", input_path);
			
 
				+            continue;
			
 
				+        }
			
 
				+
			
 
				+        let content = fs::read_to_string(input_path)
			
 
				+            .context(format!("Failed to read input file: {}", input_path))?;
			
 
				+
			
 
				+        if first_file {
			
 
				+            // Write entire first file including header
			
 
				+            output
			
 
				+                .write_all(content.as_bytes())
			
 
				+                .context("Failed to write to output file")?;
			
 
				+            first_file = false;
			
 
				+        } else {
			
 
				+            // Skip header line(s) for subsequent files
			
 
				+            for line in content.lines() {
			
 
				+                if !line.starts_with('#') && !line.trim().is_empty() {
			
 
				+                    writeln!(output, "{}", line).context("Failed to write line to output")?;
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        debug!("Merged part {} from {}", i + 1, input_path);
			
 
				+    }
			
 
				+
			
 
				+    output.flush().context("Failed to flush output file")?;
			
 
				+    Ok(())
			
 
				+}
			
--- a/src/config.rs
+++ b/src/config.rs
@@ -319,6 +319,35 @@ pub struct Config {
 
				     /// Template for solo nanomonsv passed VCF (`{output_dir}`, `{id}`, `{time}`).
			
 
				     pub nanomonsv_solo_passed_vcf: String,
			
 
				 
			
 
				+    // === Straglr configuration ===
			
 
				+    /// Path to Straglr executable.
			
 
				+    pub straglr_bin: String,
			
 
				+
			
 
				+    /// Path to STR loci BED file for Straglr.
			
 
				+    pub straglr_loci_bed: String,
			
 
				+
			
 
				+    /// Minimum read support for STR genotyping.
			
 
				+    pub straglr_min_support: u32,
			
 
				+
			
 
				+    /// Minimum cluster size for STR detection.
			
 
				+    pub straglr_min_cluster_size: u32,
			
 
				+
			
 
				+    /// Whether to genotype in size mode.
			
 
				+    pub straglr_genotype_in_size: bool,
			
 
				+
			
 
				+    /// Template for paired Straglr output directory.
			
 
				+    ///
			
 
				+    /// Placeholders: `{result_dir}`, `{id}`.
			
 
				+    pub straglr_output_dir: String,
			
 
				+
			
 
				+    /// Template for solo Straglr output directory.
			
 
				+    ///
			
 
				+    /// Placeholders: `{result_dir}`, `{id}`, `{time}`.
			
 
				+    pub straglr_solo_output_dir: String,
			
 
				+
			
 
				+    /// Force Straglr recomputation.
			
 
				+    pub straglr_force: bool,
			
 
				+
			
 
				     // === PromethION runs / metadata ===
			
 
				     /// Directory containing metadata about PromethION runs.
			
 
				     pub promethion_runs_metadata_dir: String,
			
@@ -790,6 +819,51 @@ impl Config {
 
				         )
			
 
				     }
			
 
				 
			
 
				+    /// Straglr paired output directory.
			
 
				+    pub fn straglr_output_dir(&self, id: &str) -> String {
			
 
				+        self.straglr_output_dir
			
 
				+            .replace("{result_dir}", &self.result_dir)
			
 
				+            .replace("{id}", id)
			
 
				+    }
			
 
				+
			
 
				+    /// Straglr normal sample TSV output.
			
 
				+    pub fn straglr_normal_tsv(&self, id: &str) -> String {
			
 
				+        format!(
			
 
				+            "{}/{}_{}_straglr.tsv",
			
 
				+            self.straglr_output_dir(id),
			
 
				+            id,
			
 
				+            self.normal_name
			
 
				+        )
			
 
				+    }
			
 
				+
			
 
				+    /// Straglr tumor sample TSV output.
			
 
				+    pub fn straglr_tumor_tsv(&self, id: &str) -> String {
			
 
				+        format!(
			
 
				+            "{}/{}_{}_straglr.tsv",
			
 
				+            self.straglr_output_dir(id),
			
 
				+            id,
			
 
				+            self.tumoral_name
			
 
				+        )
			
 
				+    }
			
 
				+
			
 
				+    /// Straglr solo output directory.
			
 
				+    pub fn straglr_solo_output_dir(&self, id: &str, time: &str) -> String {
			
 
				+        self.straglr_solo_output_dir
			
 
				+            .replace("{result_dir}", &self.result_dir)
			
 
				+            .replace("{id}", id)
			
 
				+            .replace("{time}", time)
			
 
				+    }
			
 
				+
			
 
				+    /// Straglr solo TSV output.
			
 
				+    pub fn straglr_solo_tsv(&self, id: &str, time: &str) -> String {
			
 
				+        format!(
			
 
				+            "{}/{}_{}_straglr.tsv",
			
 
				+            self.straglr_solo_output_dir(id, time),
			
 
				+            id,
			
 
				+            time
			
 
				+        )
			
 
				+    }
			
 
				+
			
 
				     /// Alias for the constitutional germline VCF.
			
 
				     pub fn constit_vcf(&self, id: &str) -> String {
			
 
				         self.clairs_germline_passed_vcf(id)
			
--- a/src/io/mod.rs
+++ b/src/io/mod.rs
@@ -8,3 +8,4 @@ pub mod pod5_footer_generated;
 
				 pub mod gff;
			
 
				 pub mod bam;
			
 
				 pub mod writers;
			
 
				+pub mod straglr;
			
--- a/src/io/straglr.rs
+++ b/src/io/straglr.rs
@@ -0,0 +1,284 @@
 
				+//! Straglr TSV output parser
			
 
				+//!
			
 
				+//! Parses TSV files produced by Straglr STR genotyper.
			
 
				+
			
 
				+use anyhow::Context;
			
 
				+use log::warn;
			
 
				+use std::{
			
 
				+    io::{BufRead, BufReader},
			
 
				+    str::FromStr,
			
 
				+};
			
 
				+
			
 
				+use super::readers::get_reader;
			
 
				+
			
 
				+/// Represents a single STR locus genotyped by Straglr.
			
 
				+///
			
 
				+/// Each row corresponds to one STR locus with its genotype information,
			
 
				+/// read support, and confidence metrics.
			
 
				+#[derive(Debug, Clone)]
			
 
				+pub struct StraglrRow {
			
 
				+    /// Chromosome name (e.g., "chr4", "chrX")
			
 
				+    pub chrom: String,
			
 
				+    /// Start position (0-based)
			
 
				+    pub start: u64,
			
 
				+    /// End position (exclusive)
			
 
				+    pub end: u64,
			
 
				+    /// Repeat unit motif (e.g., "CAG", "GGCCCC")
			
 
				+    pub repeat_unit: String,
			
 
				+    /// Genotype as string (e.g., "12/13", "45/45", ".")
			
 
				+    pub genotype: String,
			
 
				+    /// Copy number for each allele (e.g., [12, 13])
			
 
				+    pub copy_numbers: Vec<u32>,
			
 
				+    /// Allele lengths in base pairs
			
 
				+    pub allele_lengths: Option<Vec<u32>>,
			
 
				+    /// Read support count
			
 
				+    pub support: u32,
			
 
				+    /// Confidence score
			
 
				+    pub score: Option<f64>,
			
 
				+}
			
 
				+
			
 
				+impl FromStr for StraglrRow {
			
 
				+    type Err = anyhow::Error;
			
 
				+
			
 
				+    /// Parses a single TSV line from Straglr output.
			
 
				+    ///
			
 
				+    /// Expected format (tab-separated):
			
 
				+    /// ```text
			
 
				+    /// #chrom  start   end repeat_unit genotype    copy_number support [optional: allele_length, score, ...]
			
 
				+    /// ```
			
 
				+    ///
			
 
				+    /// # Errors
			
 
				+    /// Returns an error if required fields are missing or cannot be parsed.
			
 
				+    fn from_str(s: &str) -> anyhow::Result<Self> {
			
 
				+        let fields: Vec<&str> = s.split('\t').collect();
			
 
				+
			
 
				+        if fields.len() < 7 {
			
 
				+            anyhow::bail!(
			
 
				+                "Invalid Straglr TSV line: expected at least 7 fields, got {}",
			
 
				+                fields.len()
			
 
				+            );
			
 
				+        }
			
 
				+
			
 
				+        let chrom = fields[0].to_string();
			
 
				+        let start: u64 = fields[1]
			
 
				+            .parse()
			
 
				+            .context(format!("Failed to parse start position: {}", fields[1]))?;
			
 
				+        let end: u64 = fields[2]
			
 
				+            .parse()
			
 
				+            .context(format!("Failed to parse end position: {}", fields[2]))?;
			
 
				+        let repeat_unit = fields[3].to_string();
			
 
				+        let genotype = fields[4].to_string();
			
 
				+
			
 
				+        // Parse copy numbers (format: "12,13" or "45")
			
 
				+        let copy_numbers: Vec<u32> = fields[5]
			
 
				+            .split(',')
			
 
				+            .filter_map(|s| s.parse().ok())
			
 
				+            .collect();
			
 
				+
			
 
				+        let support: u32 = fields[6]
			
 
				+            .parse()
			
 
				+            .context(format!("Failed to parse support: {}", fields[6]))?;
			
 
				+
			
 
				+        // Optional fields
			
 
				+        let allele_lengths = fields.get(7).and_then(|s| {
			
 
				+            let lengths: Vec<u32> = s.split(',').filter_map(|v| v.parse().ok()).collect();
			
 
				+            if lengths.is_empty() {
			
 
				+                None
			
 
				+            } else {
			
 
				+                Some(lengths)
			
 
				+            }
			
 
				+        });
			
 
				+
			
 
				+        let score = fields.get(8).and_then(|s| s.parse().ok());
			
 
				+
			
 
				+        Ok(Self {
			
 
				+            chrom,
			
 
				+            start,
			
 
				+            end,
			
 
				+            repeat_unit,
			
 
				+            genotype,
			
 
				+            copy_numbers,
			
 
				+            allele_lengths,
			
 
				+            support,
			
 
				+            score,
			
 
				+        })
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl StraglrRow {
			
 
				+    /// Returns the locus length in base pairs.
			
 
				+    pub fn locus_length(&self) -> u64 {
			
 
				+        self.end.saturating_sub(self.start)
			
 
				+    }
			
 
				+
			
 
				+    /// Returns true if this locus has an expanded repeat (based on copy number threshold).
			
 
				+    ///
			
 
				+    /// # Arguments
			
 
				+    /// * `threshold` - Minimum copy number to consider as expanded
			
 
				+    pub fn is_expanded(&self, threshold: u32) -> bool {
			
 
				+        self.copy_numbers.iter().any(|&cn| cn >= threshold)
			
 
				+    }
			
 
				+
			
 
				+    /// Returns the maximum copy number across all alleles.
			
 
				+    pub fn max_copy_number(&self) -> Option<u32> {
			
 
				+        self.copy_numbers.iter().max().copied()
			
 
				+    }
			
 
				+
			
 
				+    /// Returns true if the locus is homozygous (all alleles have same copy number).
			
 
				+    pub fn is_homozygous(&self) -> bool {
			
 
				+        if self.copy_numbers.len() <= 1 {
			
 
				+            return true;
			
 
				+        }
			
 
				+        let first = self.copy_numbers[0];
			
 
				+        self.copy_numbers.iter().all(|&cn| cn == first)
			
 
				+    }
			
 
				+
			
 
				+    /// Returns the location as a string (e.g., "chr4:3074876-3074933").
			
 
				+    pub fn location_string(&self) -> String {
			
 
				+        format!("{}:{}-{}", self.chrom, self.start, self.end)
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/// Reads and parses a Straglr TSV output file.
			
 
				+///
			
 
				+/// Skips header lines (starting with `#`) and empty lines.
			
 
				+/// Logs warnings for malformed lines but continues parsing.
			
 
				+///
			
 
				+/// # Arguments
			
 
				+/// * `path` - Path to the Straglr TSV file (can be gzipped)
			
 
				+///
			
 
				+/// # Returns
			
 
				+/// `Ok(Vec<StraglrRow>)` with all successfully parsed STR loci
			
 
				+///
			
 
				+/// # Errors
			
 
				+/// Returns an error if the file cannot be opened or read.
			
 
				+///
			
 
				+/// # Example
			
 
				+/// ```ignore
			
 
				+/// use pandora_lib_promethion::io::straglr::read_straglr_tsv;
			
 
				+///
			
 
				+/// let strs = read_straglr_tsv("/path/to/sample_straglr.tsv")?;
			
 
				+/// for str_locus in strs {
			
 
				+///     if str_locus.is_expanded(40) {
			
 
				+///         println!("Expanded repeat at {}: {} copies",
			
 
				+///                  str_locus.location_string(),
			
 
				+///                  str_locus.max_copy_number().unwrap());
			
 
				+///     }
			
 
				+/// }
			
 
				+/// # Ok::<(), anyhow::Error>(())
			
 
				+/// ```
			
 
				+pub fn read_straglr_tsv(path: &str) -> anyhow::Result<Vec<StraglrRow>> {
			
 
				+    let reader = BufReader::new(get_reader(path)?);
			
 
				+    let mut results = Vec::new();
			
 
				+
			
 
				+    for (line_num, line) in reader.lines().enumerate() {
			
 
				+        match line {
			
 
				+            Ok(line) => {
			
 
				+                // Skip header lines and empty lines
			
 
				+                if line.starts_with('#') || line.trim().is_empty() {
			
 
				+                    continue;
			
 
				+                }
			
 
				+
			
 
				+                match line.parse::<StraglrRow>() {
			
 
				+                    Ok(row) => results.push(row),
			
 
				+                    Err(e) => warn!("Failed to parse line {}: {} (error: {})", line_num + 1, line, e),
			
 
				+                }
			
 
				+            }
			
 
				+            Err(e) => warn!("Failed to read line {}: {}", line_num + 1, e),
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    Ok(results)
			
 
				+}
			
 
				+
			
 
				+/// Filters Straglr results to keep only loci with copy numbers above a threshold.
			
 
				+///
			
 
				+/// # Arguments
			
 
				+/// * `rows` - Vector of Straglr STR loci
			
 
				+/// * `min_copy_number` - Minimum copy number threshold
			
 
				+///
			
 
				+/// # Returns
			
 
				+/// Filtered vector containing only expanded repeats
			
 
				+pub fn filter_expanded(rows: Vec<StraglrRow>, min_copy_number: u32) -> Vec<StraglrRow> {
			
 
				+    rows.into_iter()
			
 
				+        .filter(|row| row.is_expanded(min_copy_number))
			
 
				+        .collect()
			
 
				+}
			
 
				+
			
 
				+/// Groups Straglr results by chromosome.
			
 
				+///
			
 
				+/// # Arguments
			
 
				+/// * `rows` - Vector of Straglr STR loci
			
 
				+///
			
 
				+/// # Returns
			
 
				+/// HashMap mapping chromosome names to vectors of STR loci
			
 
				+pub fn group_by_chromosome(rows: Vec<StraglrRow>) -> std::collections::HashMap<String, Vec<StraglrRow>> {
			
 
				+    use std::collections::HashMap;
			
 
				+
			
 
				+    let mut map: HashMap<String, Vec<StraglrRow>> = HashMap::new();
			
 
				+    for row in rows {
			
 
				+        map.entry(row.chrom.clone()).or_insert_with(Vec::new).push(row);
			
 
				+    }
			
 
				+    map
			
 
				+}
			
 
				+
			
 
				+#[cfg(test)]
			
 
				+mod tests {
			
 
				+    use super::*;
			
 
				+
			
 
				+    #[test]
			
 
				+    fn test_parse_straglr_row() {
			
 
				+        let line = "chr4\t3074876\t3074933\tCAG\t19/20\t19,20\t45\t57,60\t0.95";
			
 
				+        let row: StraglrRow = line.parse().unwrap();
			
 
				+
			
 
				+        assert_eq!(row.chrom, "chr4");
			
 
				+        assert_eq!(row.start, 3074876);
			
 
				+        assert_eq!(row.end, 3074933);
			
 
				+        assert_eq!(row.repeat_unit, "CAG");
			
 
				+        assert_eq!(row.genotype, "19/20");
			
 
				+        assert_eq!(row.copy_numbers, vec![19, 20]);
			
 
				+        assert_eq!(row.support, 45);
			
 
				+        assert_eq!(row.allele_lengths, Some(vec![57, 60]));
			
 
				+        assert_eq!(row.score, Some(0.95));
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn test_straglr_row_methods() {
			
 
				+        let row = StraglrRow {
			
 
				+            chrom: "chr4".to_string(),
			
 
				+            start: 100,
			
 
				+            end: 200,
			
 
				+            repeat_unit: "CAG".to_string(),
			
 
				+            genotype: "50/50".to_string(),
			
 
				+            copy_numbers: vec![50, 50],
			
 
				+            allele_lengths: None,
			
 
				+            support: 30,
			
 
				+            score: None,
			
 
				+        };
			
 
				+
			
 
				+        assert_eq!(row.locus_length(), 100);
			
 
				+        assert!(row.is_expanded(40));
			
 
				+        assert!(!row.is_expanded(60));
			
 
				+        assert_eq!(row.max_copy_number(), Some(50));
			
 
				+        assert!(row.is_homozygous());
			
 
				+        assert_eq!(row.location_string(), "chr4:100-200");
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn test_heterozygous_detection() {
			
 
				+        let row = StraglrRow {
			
 
				+            chrom: "chr4".to_string(),
			
 
				+            start: 100,
			
 
				+            end: 200,
			
 
				+            repeat_unit: "CAG".to_string(),
			
 
				+            genotype: "19/45".to_string(),
			
 
				+            copy_numbers: vec![19, 45],
			
 
				+            allele_lengths: None,
			
 
				+            support: 30,
			
 
				+            score: None,
			
 
				+        };
			
 
				+
			
 
				+        assert!(!row.is_homozygous());
			
 
				+        assert_eq!(row.max_copy_number(), Some(45));
			
 
				+    }
			
 
				+}