2 months ago · bf12d05d90
--- a/pandora-config.example.toml
+++ b/pandora-config.example.toml
@@ -515,6 +515,17 @@ wtdbg2_threads = 8
 
				 # Memory for SLURM. wtdbg2 is lightweight — 16G is ample for local assembly.
			
 
				 wtdbg2_slurm_mem = "16G"
			
 
				 
			
 
				+#######################################
			
 
				+# longcallD
			
 
				+#######################################
			
 
				+# Template for the longcallD output directory (solo and normal/tumor runs).
			
 
				+#
			
 
				+# Required placeholders: `{result_dir}`, `{id}`, `{time}`.
			
 
				+longcalld_output_dir = "{result_dir}/{id}/{time}/longcallD"
			
 
				+longcalld_bin = "/home/t_steimle/somatic_pipe_tools/longcallD-v0.0.10_x64-linux/longcallD"
			
 
				+longcalld_threads = 10
			
 
				+longcalld_slurm_mem = "40G"
			
 
				+
			
 
				 #######################################
			
 
				 # Marlin
			
 
				 #######################################
			
--- a/src/callers/deep_variant.rs
+++ b/src/callers/deep_variant.rs
@@ -387,22 +387,6 @@ impl JobCommand for DeepVariant {
 
				 impl LocalRunner for DeepVariant {}
			
 
				 impl LocalBatchRunner for DeepVariant {}
			
 
				 
			
 
				-impl SlurmRunner for DeepVariant {
			
 
				-    fn slurm_args(&self) -> Vec<String> {
			
 
				-        let batch_id = self.part_index.map(|i| format!("_{i}")).unwrap_or_default();
			
 
				-        SlurmParams {
			
 
				-            job_name: Some(format!(
			
 
				-                "deepvariant_{}_{}{}",
			
 
				-                self.id, self.time_point, batch_id
			
 
				-            )),
			
 
				-            cpus_per_task: Some(self.config.deepvariant_threads.into()),
			
 
				-            mem: Some("40G".into()),
			
 
				-            partition: Some("shortq".into()),
			
 
				-            gres: None,
			
 
				-        }
			
 
				-        .to_args()
			
 
				-    }
			
 
				-}
			
 
				 impl SbatchRunner for DeepVariant {
			
 
				     fn slurm_params(&self) -> SlurmParams {
			
 
				         let batch_id = self.part_index.map(|i| format!("_{i}")).unwrap_or_default();
			
@@ -419,6 +403,12 @@ impl SbatchRunner for DeepVariant {
 
				     }
			
 
				 }
			
 
				 
			
 
				+impl SlurmRunner for DeepVariant {
			
 
				+    fn slurm_args(&self) -> Vec<String> {
			
 
				+        self.slurm_params().to_args()
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				 impl DeepVariant {
			
 
				     /// Returns the per-part output directory.
			
 
				     ///
			
@@ -823,7 +813,6 @@ pub fn run_deepvariant_chunked(
 
				         .collect::<Vec<String>>();
			
 
				 
			
 
				     let actual_n_parts = region_chunks.len();
			
 
				-
			
 
				     info!(
			
 
				         "Running DeepVariant in {} parallel parts for {} {}",
			
 
				         actual_n_parts, id, time_point
			
--- a/src/callers/longcallD.rs
+++ b/src/callers/longcallD.rs
@@ -0,0 +1,457 @@
 
				+use std::{
			
 
				+    fmt, fs,
			
 
				+    path::{Path, PathBuf},
			
 
				+};
			
 
				+
			
 
				+use anyhow::Context;
			
 
				+use log::info;
			
 
				+use rust_htslib::bam::{self, Read};
			
 
				+use uuid::Uuid;
			
 
				+
			
 
				+use crate::{
			
 
				+    commands::{
			
 
				+        bcftools::{BcftoolsConcat, BcftoolsKeepPass},
			
 
				+        samtools::{SamtoolsIndex, SamtoolsMergeMany},
			
 
				+        Command as JobCommand, LocalBatchRunner, LocalRunner, SbatchRunner, SlurmParams,
			
 
				+        SlurmRunner,
			
 
				+    },
			
 
				+    config::Config,
			
 
				+    helpers::{get_genome_sizes_in_header_order, split_ordered_genome_into_n_regions_exact},
			
 
				+    locker::SampleLock,
			
 
				+    pipes::{InitializeSolo, ShouldRun},
			
 
				+    run, run_many,
			
 
				+    runners::Run,
			
 
				+};
			
 
				+
			
 
				+#[derive(Debug, Clone)]
			
 
				+pub struct LongcallD {
			
 
				+    /// Sample identifier
			
 
				+    pub case_id: String,
			
 
				+    pub time_point: String,
			
 
				+    /// Global pipeline configuration
			
 
				+    pub bin: String,
			
 
				+    pub reference: String,
			
 
				+    pub threads: u8,
			
 
				+    pub slurm_mem: String,
			
 
				+    /// Directory for log file storage
			
 
				+    pub log_dir: String,
			
 
				+    pub output_dir: String,
			
 
				+    pub final_vcf: String,
			
 
				+    /// Optional genomic region restriction (format: "chr:start-end")
			
 
				+    pub regions: String,
			
 
				+    /// Optional part index for chunked parallel runs (1-indexed)
			
 
				+    pub part_index: Option<usize>,
			
 
				+    pub config: Config,
			
 
				+}
			
 
				+
			
 
				+impl fmt::Display for LongcallD {
			
 
				+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
			
 
				+        writeln!(f, "🧬 LongcallD Solo")?;
			
 
				+        writeln!(f, "   Case ID    : {}", self.case_id)?;
			
 
				+        writeln!(f, "   Time point : {}", self.time_point)?;
			
 
				+        writeln!(f, "   Regions    : {}", self.regions)?;
			
 
				+        writeln!(f, "   Log dir    : {}", self.log_dir)?;
			
 
				+        writeln!(
			
 
				+            f,
			
 
				+            "   Part       : {}",
			
 
				+            self.part_index
			
 
				+                .map_or("full".into(), |n| format!("part{n}"))
			
 
				+        )
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl InitializeSolo for LongcallD {
			
 
				+    fn initialize(id: &str, time: &str, config: &Config) -> anyhow::Result<Self> {
			
 
				+        let case_id = id.to_string();
			
 
				+        let time_point = time.to_string();
			
 
				+
			
 
				+        info!("Initializing LongcallD for {case_id} {time}");
			
 
				+        let log_dir = format!("{}/{}/log/longcallD", config.result_dir, &id);
			
 
				+
			
 
				+        let output_dir = config.longcalld_output_dir(&case_id, &time_point);
			
 
				+
			
 
				+        let final_vcf = config.longcalld_solo_passed_vcf(&case_id, &time_point);
			
 
				+
			
 
				+        Ok(Self {
			
 
				+            case_id,
			
 
				+            time_point,
			
 
				+            bin: config.longcalld_bin.clone(),
			
 
				+            reference: config.reference.clone(),
			
 
				+            threads: config.longcalld_threads,
			
 
				+            slurm_mem: config.longcalld_slurm_mem.clone(),
			
 
				+            log_dir,
			
 
				+            output_dir,
			
 
				+            final_vcf,
			
 
				+            regions: String::default(),
			
 
				+            part_index: None,
			
 
				+            config: config.clone(),
			
 
				+        })
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl ShouldRun for LongcallD {
			
 
				+    fn should_run(&self) -> bool {
			
 
				+        // should inspect outputs
			
 
				+        true
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl LongcallD {
			
 
				+    /// Returns the per-part output directory.
			
 
				+    ///
			
 
				+    /// For partitioned runs, creates an isolated subdirectory to prevent
			
 
				+    /// intermediate file collisions between parallel jobs.
			
 
				+    fn part_output_dir(&self) -> String {
			
 
				+        match self.part_index {
			
 
				+            Some(idx) => format!("{}/part{idx}", self.output_dir),
			
 
				+            None => self.output_dir.clone(),
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    /// Returns the output VCF path for this run.
			
 
				+    ///
			
 
				+    /// For partitioned runs, the file is placed in a part-specific subdirectory.
			
 
				+    fn output_vcf_path(&self) -> String {
			
 
				+        let output_dir = self.part_output_dir();
			
 
				+
			
 
				+        format!(
			
 
				+            "{output_dir}/{}_{}_longcallD.vcf.gz",
			
 
				+            self.case_id, self.time_point
			
 
				+        )
			
 
				+    }
			
 
				+
			
 
				+    fn output_bam_path(&self) -> String {
			
 
				+        let output_dir = self.part_output_dir();
			
 
				+
			
 
				+        format!(
			
 
				+            "{output_dir}/{}_{}_longcallD.bam",
			
 
				+            self.case_id, self.time_point
			
 
				+        )
			
 
				+    }
			
 
				+
			
 
				+    /// Returns the PASS-filtered VCF path.
			
 
				+    ///
			
 
				+    /// - For partitioned runs: part-specific path for intermediate PASS VCF
			
 
				+    /// - For single runs or final output: canonical path from config
			
 
				+    ///
			
 
				+    /// The final merged PASS VCF always uses the canonical path regardless
			
 
				+    /// of whether it was produced by a single run or merged from parts.
			
 
				+    fn passed_vcf_path(&self) -> String {
			
 
				+        match self.part_index {
			
 
				+            Some(_) => {
			
 
				+                // Part runs: intermediate PASS VCF in part-specific directory
			
 
				+                self.output_vcf_path().replace(".vcf.gz", ".pass.vcf.gz")
			
 
				+            }
			
 
				+            None => {
			
 
				+                // Single run or final merged output: canonical path
			
 
				+                self.final_vcf.clone()
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    /// Filters variants to keep only PASS entries.
			
 
				+    fn filter_pass(&self) -> anyhow::Result<()> {
			
 
				+        let output_vcf = self.output_vcf_path();
			
 
				+        let vcf_passed = self.passed_vcf_path();
			
 
				+
			
 
				+        info!(
			
 
				+            "Filtering PASS variants for {} {} (part: {:?})",
			
 
				+            self.case_id, self.time_point, self.part_index
			
 
				+        );
			
 
				+
			
 
				+        let mut cmd = BcftoolsKeepPass::from_config(&self.config, output_vcf, vcf_passed);
			
 
				+        let report = run!(&self.config, &mut cmd)?;
			
 
				+
			
 
				+        report
			
 
				+            .save_to_file(format!("{}/bcftools_pass_", self.log_dir))
			
 
				+            .context("Can't save bcftools PASS logs")?;
			
 
				+
			
 
				+        Ok(())
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl JobCommand for LongcallD {
			
 
				+    /// Initializes output and log directories for this run.
			
 
				+    ///
			
 
				+    /// # Errors
			
 
				+    ///
			
 
				+    /// Returns an error if directory creation fails.
			
 
				+    fn init(&mut self) -> anyhow::Result<()> {
			
 
				+        // Part-specific output dir
			
 
				+        let output_dir = self.part_output_dir();
			
 
				+
			
 
				+        fs::create_dir_all(&output_dir).context(format!("Failed to create dir: {output_dir}"))?;
			
 
				+
			
 
				+        // log dir
			
 
				+        fs::create_dir_all(&self.log_dir)
			
 
				+            .context(format!("Failed to create dir: {}", self.log_dir))?;
			
 
				+
			
 
				+        Ok(())
			
 
				+    }
			
 
				+
			
 
				+    //longcallD call -t16 ref.fa ont.bam --ont --refine-aln -b ont_phased_refined.bam > ont.vcf # output phased & refined ONT reads (BAM tag: HP & PS)
			
 
				+    /// Generates the longcallD command string for execution.
			
 
				+    fn cmd(&self) -> String {
			
 
				+        format!(
			
 
				+            "{bin} call \
			
 
				+            -t{threads} \
			
 
				+            {reference} {bam} {regions} --ont \
			
 
				+            --refine-aln \
			
 
				+            -b {out_bam} > {out_vcf}",
			
 
				+            bin = self.bin,
			
 
				+            threads = self.threads,
			
 
				+            reference = self.reference,
			
 
				+            regions = self.regions,
			
 
				+            bam = self.config.solo_bam(&self.case_id, &self.time_point),
			
 
				+            out_bam = self.output_bam_path(),
			
 
				+            out_vcf = self.output_vcf_path()
			
 
				+        )
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl Run for LongcallD {
			
 
				+    fn run(&mut self) -> anyhow::Result<()> {
			
 
				+        let lock_dir = format!("{}/locks", self.config.result_dir);
			
 
				+        let _lock = SampleLock::acquire(
			
 
				+            &lock_dir,
			
 
				+            &format!("{}-{}", self.case_id, self.time_point),
			
 
				+            "longcalld",
			
 
				+        )
			
 
				+        .with_context(|| format!("Cannot start longcallD chunked for {}", self.case_id))?;
			
 
				+
			
 
				+        if !self.should_run() {
			
 
				+            info!("longcallD is up-to-data.");
			
 
				+            return Ok(());
			
 
				+        }
			
 
				+
			
 
				+        if self.config.slurm_runner {
			
 
				+            run_chunked(&self.case_id, &self.time_point, &self.config, 30)
			
 
				+        } else {
			
 
				+            run!(&self.config, self)?;
			
 
				+            self.filter_pass()
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+fn merge_parts(base: &LongcallD, n_parts: usize) -> anyhow::Result<()> {
			
 
				+    let mut part_pass_paths: Vec<PathBuf> = Vec::with_capacity(n_parts);
			
 
				+
			
 
				+    for i in 1..=n_parts {
			
 
				+        let mut dv = base.clone();
			
 
				+        dv.part_index = Some(i);
			
 
				+        let part_pass = dv.passed_vcf_path();
			
 
				+
			
 
				+        anyhow::ensure!(
			
 
				+            Path::new(&part_pass).exists(),
			
 
				+            "Missing part {i} PASS VCF: {part_pass}"
			
 
				+        );
			
 
				+
			
 
				+        part_pass_paths.push(PathBuf::from(part_pass));
			
 
				+    }
			
 
				+
			
 
				+    let final_passed_vcf = base
			
 
				+        .config
			
 
				+        .longcalld_solo_passed_vcf(&base.case_id, &base.time_point);
			
 
				+    let rand = Uuid::new_v4();
			
 
				+    let final_tmp = format!("{final_passed_vcf}_{rand}.tmp");
			
 
				+    let final_tmp_csi = format!("{final_passed_vcf}_{rand}.tmp.csi");
			
 
				+
			
 
				+    if let Some(parent) = Path::new(&final_passed_vcf).parent() {
			
 
				+        fs::create_dir_all(parent)?;
			
 
				+    }
			
 
				+
			
 
				+    info!(
			
 
				+        "Concatenating {} part VCFs into {}",
			
 
				+        n_parts, final_passed_vcf
			
 
				+    );
			
 
				+
			
 
				+    let mut concat = BcftoolsConcat::from_config(&base.config, part_pass_paths, &final_tmp);
			
 
				+    run!(&base.config, &mut concat)
			
 
				+        .context("Failed to run bcftools concat for longcallD parts")?
			
 
				+        .save_to_file(format!("{}/bcftools_concat", base.log_dir))?;
			
 
				+
			
 
				+    fs::rename(&final_tmp, &final_passed_vcf)
			
 
				+        .context("Failed to rename merged longcallD PASS VCF")?;
			
 
				+
			
 
				+    fs::rename(&final_tmp_csi, format!("{final_passed_vcf}.csi"))
			
 
				+        .context("Failed to rename merged longcallD PASS VCF CSI")?;
			
 
				+
			
 
				+    info!(
			
 
				+        "Successfully merged {} parts into {}",
			
 
				+        n_parts, final_passed_vcf
			
 
				+    );
			
 
				+
			
 
				+    Ok(())
			
 
				+}
			
 
				+
			
 
				+fn run_chunked(
			
 
				+    case_id: &str,
			
 
				+    time_point: &str,
			
 
				+    config: &Config,
			
 
				+    n_parts: usize,
			
 
				+) -> anyhow::Result<()> {
			
 
				+    anyhow::ensure!(n_parts > 0, "n_parts must be > 0");
			
 
				+
			
 
				+    let base = LongcallD::initialize(case_id, time_point, config)?;
			
 
				+
			
 
				+    let bam_path = config.solo_bam(case_id, time_point);
			
 
				+    let reader = bam::Reader::from_path(&bam_path)
			
 
				+        .with_context(|| format!("Failed to open BAM: {bam_path}"))?;
			
 
				+    let genome_sizes = get_genome_sizes_in_header_order(reader.header())?;
			
 
				+
			
 
				+    let region_chunks = split_ordered_genome_into_n_regions_exact(&genome_sizes, n_parts)
			
 
				+        .into_iter()
			
 
				+        .map(|v| v.join(" "))
			
 
				+        .collect::<Vec<String>>();
			
 
				+
			
 
				+    let actual_n_parts = region_chunks.len();
			
 
				+    info!(
			
 
				+        "Running longcallD in {} parallel parts for {} {}",
			
 
				+        actual_n_parts, case_id, time_point
			
 
				+    );
			
 
				+
			
 
				+    let mut jobs = Vec::with_capacity(actual_n_parts);
			
 
				+    for (i, regions) in region_chunks.into_iter().enumerate() {
			
 
				+        let mut job = base.clone();
			
 
				+        job.regions = regions;
			
 
				+        job.part_index = Some(i + 1);
			
 
				+        job.log_dir = format!("{}/part{}", base.log_dir, i + 1);
			
 
				+        info!("Planned longcallD job:\n{job}");
			
 
				+        jobs.push(job);
			
 
				+    }
			
 
				+
			
 
				+    // Run all longcallD jobs
			
 
				+    let outputs = run_many!(config, jobs.clone())?;
			
 
				+    for output in outputs.iter() {
			
 
				+        output.save_to_file(format!("{}/longcallD_", base.log_dir))?;
			
 
				+    }
			
 
				+
			
 
				+    // Filter PASS variants for each part
			
 
				+    info!(
			
 
				+        "Filtering PASS variants for all {} parts in parallel",
			
 
				+        actual_n_parts
			
 
				+    );
			
 
				+    let filter_jobs: Vec<_> = jobs
			
 
				+        .iter()
			
 
				+        .map(|job| {
			
 
				+            BcftoolsKeepPass::from_config(&job.config, job.output_vcf_path(), job.passed_vcf_path())
			
 
				+        })
			
 
				+        .collect();
			
 
				+    run_many!(config, filter_jobs)?
			
 
				+        .iter()
			
 
				+        .enumerate()
			
 
				+        .try_for_each(|(i, output)| {
			
 
				+            output
			
 
				+                .save_to_file(format!("{}/bcftools_pass_part{}_", base.log_dir, i + 1))
			
 
				+                .map(|_| ())
			
 
				+        })?;
			
 
				+
			
 
				+    // Merge PASS VCFs
			
 
				+    merge_parts(&base, actual_n_parts)?;
			
 
				+
			
 
				+    // Index per-part BAMs before merging
			
 
				+    info!("Indexing {} part BAMs in parallel", actual_n_parts);
			
 
				+    let index_jobs: Vec<_> = jobs
			
 
				+        .iter()
			
 
				+        .map(|job| SamtoolsIndex::from_config(&job.config, &job.output_bam_path()))
			
 
				+        .collect();
			
 
				+    run_many!(config, index_jobs)?
			
 
				+        .iter()
			
 
				+        .enumerate()
			
 
				+        .try_for_each(|(i, output)| {
			
 
				+            output
			
 
				+                .save_to_file(format!("{}/samtools_index_part{}_", base.log_dir, i + 1))
			
 
				+                .map(|_| ())
			
 
				+        })?;
			
 
				+
			
 
				+    // Merge part BAMs into final BAM
			
 
				+    let final_bam = PathBuf::from(
			
 
				+        base.config
			
 
				+            .longcalld_solo_bam(&base.case_id, &base.time_point),
			
 
				+    );
			
 
				+    let part_bams: Vec<PathBuf> = jobs
			
 
				+        .iter()
			
 
				+        .map(|job| PathBuf::from(job.output_bam_path()))
			
 
				+        .collect();
			
 
				+
			
 
				+    info!(
			
 
				+        "Merging {} part BAMs into {}",
			
 
				+        actual_n_parts,
			
 
				+        final_bam.display()
			
 
				+    );
			
 
				+    run!(
			
 
				+        config,
			
 
				+        &mut SamtoolsMergeMany::from_config(final_bam.clone(), part_bams, config)
			
 
				+    )
			
 
				+    .context("Failed to merge longcallD part BAMs")?
			
 
				+    .save_to_file(format!("{}/samtools_merge_", base.log_dir))?;
			
 
				+
			
 
				+    // Index final merged BAM
			
 
				+    run!(
			
 
				+        config,
			
 
				+        &mut SamtoolsIndex::from_config(config, final_bam.to_str().unwrap())
			
 
				+    )
			
 
				+    .context("Failed to index merged longcallD BAM")?
			
 
				+    .save_to_file(format!("{}/samtools_index_final_", base.log_dir))?;
			
 
				+
			
 
				+    info!(
			
 
				+        "LongcallD completed for {} {}: {} parts merged",
			
 
				+        case_id, time_point, actual_n_parts
			
 
				+    );
			
 
				+
			
 
				+    Ok(())
			
 
				+}
			
 
				+
			
 
				+impl LocalRunner for LongcallD {}
			
 
				+impl LocalBatchRunner for LongcallD {}
			
 
				+
			
 
				+impl SbatchRunner for LongcallD {
			
 
				+    fn slurm_params(&self) -> SlurmParams {
			
 
				+        let batch_id = self.part_index.map(|i| format!("_{i}")).unwrap_or_default();
			
 
				+        SlurmParams {
			
 
				+            job_name: Some(format!(
			
 
				+                "longcallD_{}_{}{}",
			
 
				+                self.case_id, self.time_point, batch_id
			
 
				+            )),
			
 
				+            cpus_per_task: Some(self.threads.into()),
			
 
				+            mem: Some(self.slurm_mem.clone()),
			
 
				+            partition: Some("shortq".into()),
			
 
				+            gres: None,
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+impl SlurmRunner for LongcallD {
			
 
				+    fn slurm_args(&self) -> Vec<String> {
			
 
				+        self.slurm_params().to_args()
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+#[cfg(test)]
			
 
				+mod tests {
			
 
				+    use crate::helpers::test_init;
			
 
				+
			
 
				+    use super::*;
			
 
				+
			
 
				+    // #[test]
			
 
				+    // fn deepvariant_version() -> anyhow::Result<()> {
			
 
				+    //     test_init();
			
 
				+    //     let vl = DeepVariant::version(&Config::default())?;
			
 
				+    //     info!("DeepVariant local version: {vl}");
			
 
				+    //     let vs = DeepVariant::version_slurm(&Config::default())?;
			
 
				+    //     info!("DeepVariant slurm version: {vs}");
			
 
				+    //     assert_eq!(vl, vs);
			
 
				+    //     Ok(())
			
 
				+    // }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn longcalld_run() -> anyhow::Result<()> {
			
 
				+        test_init();
			
 
				+        let config = Config::default();
			
 
				+
			
 
				+        for id in ["CML2518"] {
			
 
				+            crate::runners::Run::run(&mut LongcallD::initialize(id, "diag", &config)?)?;
			
 
				+        }
			
 
				+        Ok(())
			
 
				+    }
			
 
				+}
			
--- a/src/callers/mod.rs
+++ b/src/callers/mod.rs
@@ -157,6 +157,7 @@ pub mod nanomonsv;
 
				 pub mod savana;
			
 
				 pub mod severus;
			
 
				 pub mod straglr;
			
 
				+pub mod longcallD;
			
 
				 
			
 
				 /// Runs all somatic variant callers sequentially for comprehensive multi-caller analysis.
			
 
				 ///
			
--- a/src/config.rs
+++ b/src/config.rs
@@ -545,6 +545,15 @@ pub struct Config {
 
				     pub wtdbg2_bin: String,
			
 
				     pub wtdbg2_threads: u8,
			
 
				     pub wtdbg2_slurm_mem: String,
			
 
				+
			
 
				+    // === longcallD configuration ===
			
 
				+    /// Template for the longcallD output directory (solo and normal/tumor runs).
			
 
				+    ///
			
 
				+    /// Placeholders: `{result_dir}`, `{id}`, `{time}`.
			
 
				+    pub longcalld_output_dir: String,
			
 
				+    pub longcalld_bin: String,
			
 
				+    pub longcalld_threads: u8,
			
 
				+    pub longcalld_slurm_mem: String,
			
 
				 }
			
 
				 
			
 
				 #[derive(Debug, Clone, Serialize, Deserialize)]
			
@@ -1227,6 +1236,36 @@ impl Config {
 
				             .replace("{id}", id)
			
 
				             .replace("{time}", time)
			
 
				     }
			
 
				+
			
 
				+    /// longcallD output directory (`{result_dir}`, `{id}`, `{time}`).
			
 
				+    pub fn longcalld_output_dir(&self, id: &str, time: &str) -> String {
			
 
				+        self.longcalld_output_dir
			
 
				+            .replace("{result_dir}", &self.result_dir)
			
 
				+            .replace("{id}", id)
			
 
				+            .replace("{time}", time)
			
 
				+    }
			
 
				+
			
 
				+    /// longcallD solo *PASSED* VCF for `<id>, <time>`.
			
 
				+    pub fn longcalld_solo_passed_vcf(&self, id: &str, time: &str) -> String {
			
 
				+        format!(
			
 
				+            "{}/{}_{}_longcallD_PASSED.vcf.gz",
			
 
				+            self.longcalld_output_dir(id, time),
			
 
				+            id,
			
 
				+            time
			
 
				+        )
			
 
				+    }
			
 
				+
			
 
				+    pub fn longcalld_solo_bam(&self, id: &str, time: &str) -> String {
			
 
				+        format!(
			
 
				+            "{}/{}_{}_{}_{}_realn.bam",
			
 
				+            self.solo_dir(id, time),
			
 
				+            id,
			
 
				+            time,
			
 
				+            self.reference_name,
			
 
				+            self.haplotagged_bam_tag_name
			
 
				+        )
			
 
				+    }
			
 
				+
			
 
				 }
			
 
				 
			
 
				 impl Default for Config {
			
--- a/src/io/fasta.rs
+++ b/src/io/fasta.rs
@@ -154,3 +154,45 @@ pub fn split_fasta(fasta: &Path, out_dir: &Path) -> anyhow::Result<Vec<ContigFas
 
				 
			
 
				     Ok(contigs)
			
 
				 }
			
 
				+
			
 
				+/// Read a single-contig FASTA produced by split_fasta.
			
 
				+/// Skips the header line, uppercases, replaces non-ACGT with N.
			
 
				+/// Pure std — no noodles needed since we wrote the file ourselves.
			
 
				+pub fn read_single_contig_fasta(path: &Path) -> anyhow::Result<Vec<u8>> {
			
 
				+    use std::io::BufRead;
			
 
				+    let f = std::fs::File::open(path)?;
			
 
				+    let reader = std::io::BufReader::with_capacity(8 * 1024 * 1024, f);
			
 
				+    let mut seq = Vec::new();
			
 
				+    for line in reader.lines() {
			
 
				+        let line = line?;
			
 
				+        if line.starts_with('>') { continue; }
			
 
				+        let sanitized = line.bytes().map(|b| match b.to_ascii_uppercase() {
			
 
				+            b'A' | b'C' | b'G' | b'T' => b.to_ascii_uppercase(),
			
 
				+            _ => b'N',
			
 
				+        });
			
 
				+        seq.extend(sanitized);
			
 
				+    }
			
 
				+    anyhow::ensure!(!seq.is_empty(), "empty contig in {}", path.display());
			
 
				+    Ok(seq)
			
 
				+}
			
 
				+
			
 
				+pub struct FaiEntry {
			
 
				+    pub name: String,
			
 
				+    pub len:  usize,
			
 
				+}
			
 
				+
			
 
				+pub fn read_fai(path: &Path) -> anyhow::Result<Vec<FaiEntry>> {
			
 
				+    use std::io::BufRead;
			
 
				+    let f = std::fs::File::open(path)?;
			
 
				+    std::io::BufReader::new(f)
			
 
				+        .lines()
			
 
				+        .map(|l| {
			
 
				+            let l = l?;
			
 
				+            let mut cols = l.split('\t');
			
 
				+            let name = cols.next().context("fai: missing name")?.to_owned();
			
 
				+            let len  = cols.next().context("fai: missing len")?.parse::<usize>()
			
 
				+                           .context("fai: bad len")?;
			
 
				+            Ok(FaiEntry { name, len })
			
 
				+        })
			
 
				+        .collect()
			
 
				+}
			
--- a/src/io/vcf.rs
+++ b/src/io/vcf.rs
@@ -154,7 +154,7 @@ pub fn fetch_vcf(bgz_path: &str, region: &GenomeRange) -> anyhow::Result<Vec<Vcf
 
				             .parse()
			
 
				             .with_context(|| format!("failed to parse VCF line: {line}"))?;
			
 
				         if v.position.contig == region.contig && region.range.contains(&v.position.position) {
			
 
				-            for split in from_multiallelic(v) {
			
 
				+            for split in from_multiallelic(v)? {
			
 
				                 variants.push(split);
			
 
				             }
			
 
				         }
			
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -150,6 +150,7 @@ pub mod runners;
 
				 pub mod scan;
			
 
				 pub mod slurm_helpers;
			
 
				 pub mod variant;
			
 
				+pub mod uniqueness;
			
 
				 
			
 
				 #[macro_use]
			
 
				 extern crate lazy_static;
			
--- a/src/uniqueness/cache.rs
+++ b/src/uniqueness/cache.rs
@@ -0,0 +1,156 @@
 
				+use std::fs::{self, File};
			
 
				+use std::io::{self, BufReader, BufWriter, Read, Write};
			
 
				+use std::path::Path;
			
 
				+use std::time::SystemTime;
			
 
				+
			
 
				+/// Magic + version guard
			
 
				+const MAGIC: &[u8; 8] = b"UNIQIDX1";
			
 
				+const VERSION: u32 = 1;
			
 
				+
			
 
				+/// Identifies a cache entry unambiguously.
			
 
				+/// Derive from FASTA path + contig name + genome length + content hash.
			
 
				+pub struct CacheKey {
			
 
				+    pub contig: String,
			
 
				+    pub genome_len: usize,
			
 
				+    /// Blake3 hex hash of the raw genome bytes (first 16 hex chars sufficient)
			
 
				+    pub hash: String,
			
 
				+}
			
 
				+
			
 
				+impl CacheKey {
			
 
				+    /// Compute from raw genome bytes + contig name.
			
 
				+    pub fn from_genome(contig: &str, genome: &[u8]) -> Self {
			
 
				+        // Blake3 is fast (~3 GB/s), ideal for large genomes.
			
 
				+        // Use `blake3` crate or fall back to a simple FNV for no-dep builds.
			
 
				+        let hash = blake3_hex_short(genome);
			
 
				+        Self {
			
 
				+            contig: contig.to_owned(),
			
 
				+            genome_len: genome.len(),
			
 
				+            hash,
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    /// Construit la clé depuis le .fai (noodles) — ne lit pas le FASTA.
			
 
				+    /// `fai_entry` contient déjà la longueur du contig.
			
 
				+    pub fn from_fai(contig: &str, fai_len: usize, fasta_path: &Path) -> io::Result<Self> {
			
 
				+        let meta = std::fs::metadata(fasta_path)?;
			
 
				+        let mtime = meta
			
 
				+            .modified()?
			
 
				+            .duration_since(SystemTime::UNIX_EPOCH)
			
 
				+            .map(|d| d.as_secs())
			
 
				+            .unwrap_or(0);
			
 
				+        let fsize = meta.len();
			
 
				+
			
 
				+        // Hash = mtime + fsize — suffisant pour invalider sur modification
			
 
				+        let hash = format!("{mtime:016x}{fsize:016x}");
			
 
				+
			
 
				+        Ok(Self {
			
 
				+            contig: contig.to_owned(),
			
 
				+            genome_len: fai_len,
			
 
				+            hash,
			
 
				+        })
			
 
				+    }
			
 
				+
			
 
				+    pub fn file_name(&self) -> String {
			
 
				+        format!(
			
 
				+            "{}__{}__{}.uniqidx",
			
 
				+            self.contig, self.genome_len, self.hash
			
 
				+        )
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/// Binary format (little-endian):
			
 
				+///
			
 
				+/// [0..8]   magic   = b"UNIQIDX1"
			
 
				+/// [8..12]  version = 1u32
			
 
				+/// [12..20] n       = genome_len as u64
			
 
				+/// [20..]   min_unique_len: n × u64  (usize::MAX encoded as u64::MAX)
			
 
				+pub fn save_to_cache(path: &Path, min_unique_len: &[usize]) -> io::Result<()> {
			
 
				+    let tmp = path.with_extension("uniqidx.tmp");
			
 
				+    {
			
 
				+        let f = File::create(&tmp)?;
			
 
				+        let mut w = BufWriter::with_capacity(8 * 1024 * 1024, f);
			
 
				+
			
 
				+        w.write_all(MAGIC)?;
			
 
				+        w.write_all(&VERSION.to_le_bytes())?;
			
 
				+        w.write_all(&(min_unique_len.len() as u64).to_le_bytes())?;
			
 
				+
			
 
				+        // Bulk-write as u64 array — avoids per-element call overhead
			
 
				+        let buf: Vec<u8> = min_unique_len
			
 
				+            .iter()
			
 
				+            .flat_map(|&v| {
			
 
				+                let encoded: u64 = if v == usize::MAX { u64::MAX } else { v as u64 };
			
 
				+                encoded.to_le_bytes()
			
 
				+            })
			
 
				+            .collect();
			
 
				+        w.write_all(&buf)?;
			
 
				+        w.flush()?;
			
 
				+    }
			
 
				+    // Atomic rename: avoids corrupt cache on crash mid-write
			
 
				+    fs::rename(&tmp, path)?;
			
 
				+    Ok(())
			
 
				+}
			
 
				+
			
 
				+pub fn load_from_cache(path: &Path, expected_len: usize) -> io::Result<Vec<usize>> {
			
 
				+    let f = File::open(path)?;
			
 
				+    let mut r = BufReader::with_capacity(8 * 1024 * 1024, f);
			
 
				+
			
 
				+    // Magic
			
 
				+    let mut magic = [0u8; 8];
			
 
				+    r.read_exact(&mut magic)?;
			
 
				+    if &magic != MAGIC {
			
 
				+        return Err(io::Error::new(io::ErrorKind::InvalidData, "bad magic"));
			
 
				+    }
			
 
				+
			
 
				+    // Version
			
 
				+    let mut vbuf = [0u8; 4];
			
 
				+    r.read_exact(&mut vbuf)?;
			
 
				+    if u32::from_le_bytes(vbuf) != VERSION {
			
 
				+        return Err(io::Error::new(
			
 
				+            io::ErrorKind::InvalidData,
			
 
				+            "version mismatch",
			
 
				+        ));
			
 
				+    }
			
 
				+
			
 
				+    // Length
			
 
				+    let mut nbuf = [0u8; 8];
			
 
				+    r.read_exact(&mut nbuf)?;
			
 
				+    let n = u64::from_le_bytes(nbuf) as usize;
			
 
				+    if n != expected_len {
			
 
				+        return Err(io::Error::new(
			
 
				+            io::ErrorKind::InvalidData,
			
 
				+            format!("length mismatch: cache has {n}, genome is {expected_len}"),
			
 
				+        ));
			
 
				+    }
			
 
				+
			
 
				+    // Payload: read all at once then cast
			
 
				+    let byte_len = n * 8;
			
 
				+    let mut buf = vec![0u8; byte_len];
			
 
				+    r.read_exact(&mut buf)?;
			
 
				+
			
 
				+    let result: Vec<usize> = buf
			
 
				+        .chunks_exact(8)
			
 
				+        .map(|c| {
			
 
				+            let v = u64::from_le_bytes(c.try_into().unwrap());
			
 
				+            if v == u64::MAX {
			
 
				+                usize::MAX
			
 
				+            } else {
			
 
				+                v as usize
			
 
				+            }
			
 
				+        })
			
 
				+        .collect();
			
 
				+
			
 
				+    Ok(result)
			
 
				+}
			
 
				+
			
 
				+// ── Minimal Blake3-like hash (FNV-1a 64bit, no deps) ─────────────────────────
			
 
				+// Replace with `blake3` crate for collision resistance on production.
			
 
				+fn blake3_hex_short(data: &[u8]) -> String {
			
 
				+    const FNV_OFFSET: u64 = 14695981039346656037;
			
 
				+    const FNV_PRIME: u64 = 1099511628211;
			
 
				+    let mut h = FNV_OFFSET;
			
 
				+    for &b in data {
			
 
				+        h ^= b as u64;
			
 
				+        h = h.wrapping_mul(FNV_PRIME);
			
 
				+    }
			
 
				+    format!("{h:016x}")
			
 
				+}
			
--- a/src/uniqueness/genome_index.rs
+++ b/src/uniqueness/genome_index.rs
@@ -0,0 +1,132 @@
 
				+// src/uniqueness/genome_index.rs
			
 
				+
			
 
				+use std::collections::HashMap;
			
 
				+use std::path::Path;
			
 
				+use log::{info, warn};
			
 
				+
			
 
				+use anyhow::Context;
			
 
				+
			
 
				+use super::{AnchorBias, CacheKey, UniqueInterval, UniquenessIndex};
			
 
				+use crate::{
			
 
				+    io::fasta::{read_fai, read_single_contig_fasta, split_fasta, FaiEntry},
			
 
				+    uniqueness::cache::load_from_cache,
			
 
				+}; // ton module existant
			
 
				+
			
 
				+#[derive(Debug, Clone, PartialEq, Eq)]
			
 
				+pub struct GenomicPos {
			
 
				+    pub contig: String,
			
 
				+    pub pos: usize, // 0-based
			
 
				+}
			
 
				+
			
 
				+pub struct GenomeIndex {
			
 
				+    contigs: HashMap<String, UniquenessIndex>,
			
 
				+}
			
 
				+
			
 
				+impl GenomeIndex {
			
 
				+    /// Build or load from cache.
			
 
				+    ///
			
 
				+    /// Uses your existing `split_fasta` to stream contig by contig into
			
 
				+    /// a temp dir, builds/loads a UniquenessIndex per contig, then drops
			
 
				+    /// the split files.
			
 
				+    ///
			
 
				+    /// `filter`: if Some, only process these contig names.
			
 
				+    pub fn build(
			
 
				+        fasta_path: impl AsRef<Path>,
			
 
				+        cache_dir: impl AsRef<Path>,
			
 
				+        filter: Option<&[&str]>,
			
 
				+    ) -> anyhow::Result<Self> {
			
 
				+        let fasta_path = fasta_path.as_ref();
			
 
				+        let cache_dir = cache_dir.as_ref();
			
 
				+        std::fs::create_dir_all(cache_dir)?;
			
 
				+
			
 
				+        let fai_path = fasta_path.with_extension("fa.fai");
			
 
				+        let fai = read_fai(&fai_path)
			
 
				+            .with_context(|| format!("cannot read FAI {}", fai_path.display()))?;
			
 
				+
			
 
				+        // ── Premier passage : quels contigs ont besoin d'être (re)buildés ? ──
			
 
				+        let mut need_build: Vec<&FaiEntry> = Vec::new();
			
 
				+        let mut cached: HashMap<String, UniquenessIndex> = HashMap::new();
			
 
				+
			
 
				+        for entry in &fai {
			
 
				+            if let Some(allowed) = filter {
			
 
				+                if !allowed.contains(&entry.name.as_str()) {
			
 
				+                    continue;
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            let key = CacheKey::from_fai(&entry.name, entry.len, fasta_path)?;
			
 
				+            let cache_path = cache_dir.join(key.file_name());
			
 
				+
			
 
				+            if cache_path.exists() {
			
 
				+                match load_from_cache(&cache_path, entry.len) {
			
 
				+                    Ok(min_unique_len) => {
			
 
				+                        info!("[genome_index] cache hit  : {}", entry.name);
			
 
				+                        cached.insert(
			
 
				+                            entry.name.clone(),
			
 
				+                            UniquenessIndex::from_raw(min_unique_len),
			
 
				+                        );
			
 
				+                    }
			
 
				+                    Err(e) => {
			
 
				+                        warn!("[genome_index] cache miss : {} ({e})", entry.name);
			
 
				+                        need_build.push(entry);
			
 
				+                    }
			
 
				+                }
			
 
				+            } else {
			
 
				+                warn!("[genome_index] cache miss : {}", entry.name);
			
 
				+                need_build.push(entry);
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        if need_build.is_empty() {
			
 
				+            info!("[genome_index] all contigs loaded from cache, FASTA not read.");
			
 
				+            return Ok(Self { contigs: cached });
			
 
				+        }
			
 
				+
			
 
				+        let need_names: Vec<&str> = need_build.iter().map(|e| e.name.as_str()).collect();
			
 
				+        info!(
			
 
				+            "[genome_index] building {} contig(s): {:?}",
			
 
				+            need_names.len(),
			
 
				+            need_names
			
 
				+        );
			
 
				+
			
 
				+        let split_dir = cache_dir.join("_split_tmp");
			
 
				+        let contig_fastas = split_fasta(fasta_path, &split_dir)?;
			
 
				+
			
 
				+        for cf in &contig_fastas {
			
 
				+            if !need_names.contains(&cf.name.as_str()) {
			
 
				+                continue;
			
 
				+            }
			
 
				+
			
 
				+            let entry = need_build.iter().find(|e| e.name == cf.name).unwrap();
			
 
				+            let seq = read_single_contig_fasta(&cf.fasta_path)?;
			
 
				+
			
 
				+            info!(
			
 
				+                "[genome_index] building SA : {} ({:.1} Mbp)",
			
 
				+                cf.name,
			
 
				+                seq.len() as f64 / 1e6
			
 
				+            );
			
 
				+
			
 
				+            let key = CacheKey::from_fai(&cf.name, entry.len, fasta_path)?;
			
 
				+            let idx = UniquenessIndex::build_with_cache(&seq, &key, cache_dir)
			
 
				+                .with_context(|| format!("build_with_cache failed for {}", cf.name))?;
			
 
				+            cached.insert(cf.name.clone(), idx);
			
 
				+        }
			
 
				+
			
 
				+        let _ = std::fs::remove_dir_all(&split_dir);
			
 
				+        Ok(Self { contigs: cached })
			
 
				+    }
			
 
				+
			
 
				+    pub fn query(&self, pos: &GenomicPos, bias: AnchorBias) -> Option<UniqueInterval> {
			
 
				+        self.contigs
			
 
				+            .get(&pos.contig)?
			
 
				+            .minimal_unique_interval_containing(pos.pos, bias)
			
 
				+    }
			
 
				+
			
 
				+    pub fn contig_names(&self) -> impl Iterator<Item = &str> {
			
 
				+        self.contigs.keys().map(String::as_str)
			
 
				+    }
			
 
				+
			
 
				+    pub fn contig_len(&self, contig: &str) -> Option<usize> {
			
 
				+        self.contigs.get(contig).map(|idx| idx.genome_len())
			
 
				+    }
			
 
				+}
			
--- a/src/uniqueness/mod.rs
+++ b/src/uniqueness/mod.rs
@@ -0,0 +1,583 @@
 
				+use std::fs;
			
 
				+use std::io::{self, BufReader, BufWriter, Read, Write};
			
 
				+use std::path::{Path, PathBuf};
			
 
				+use log::{info, warn};
			
 
				+
			
 
				+mod cache;
			
 
				+mod genome_index;
			
 
				+mod suffix;
			
 
				+
			
 
				+pub use cache::CacheKey;
			
 
				+
			
 
				+use cache::{load_from_cache, save_to_cache};
			
 
				+use suffix::{build_lcp_kasai, build_suffix_array};
			
 
				+
			
 
				+/// Pure computation: genome bytes → min_unique_len vector.
			
 
				+/// Separated so GenomeIndex can call it without going through build_with_cache.
			
 
				+fn build_min_unique_len(genome: &[u8]) -> Vec<usize> {
			
 
				+    let n   = genome.len();
			
 
				+    let sa  = build_suffix_array(genome);
			
 
				+    let lcp = build_lcp_kasai(genome, &sa);
			
 
				+
			
 
				+    let mut rank = vec![0usize; n];
			
 
				+    for (i, &s) in sa.iter().enumerate() { rank[s] = i; }
			
 
				+
			
 
				+    let mut mul = vec![usize::MAX; n];
			
 
				+    for p in 0..n {
			
 
				+        let r     = rank[p];
			
 
				+        let left  = if r > 0     { lcp[r]     } else { 0 };
			
 
				+        let right = if r + 1 < n { lcp[r + 1] } else { 0 };
			
 
				+        let k     = left.max(right) + 1;
			
 
				+        mul[p]    = if p + k <= n { k } else { usize::MAX };
			
 
				+    }
			
 
				+    mul
			
 
				+}
			
 
				+
			
 
				+#[derive(Debug, Clone, PartialEq, Eq)]
			
 
				+pub struct UniqueInterval {
			
 
				+    pub start: usize,
			
 
				+    pub end: usize,
			
 
				+}
			
 
				+
			
 
				+impl UniqueInterval {
			
 
				+    pub fn len(&self) -> usize {
			
 
				+        self.end - self.start
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+#[derive(Debug, Clone, Copy)]
			
 
				+pub enum AnchorBias {
			
 
				+    Left,
			
 
				+    Right,
			
 
				+    Auto,
			
 
				+}
			
 
				+
			
 
				+pub struct UniquenessIndex {
			
 
				+    min_unique_len: Vec<usize>,
			
 
				+    genome_len: usize,
			
 
				+}
			
 
				+
			
 
				+impl UniquenessIndex {
			
 
				+    /// Build from genome bytes, using a disk cache keyed on `cache_key`.
			
 
				+    /// Cache lives in `cache_dir` (created if absent).
			
 
				+    /// If the cache file exists and is valid, loading is O(n) read — no recomputation.
			
 
				+    pub fn build_with_cache(genome: &[u8], key: &CacheKey, cache_dir: &Path) -> io::Result<Self> {
			
 
				+        let cache_path = cache_dir.join(key.file_name());
			
 
				+        if cache_path.exists() {
			
 
				+            match load_from_cache(&cache_path, genome.len()) {
			
 
				+                Ok(mul) => return Ok(Self::from_raw(mul)),
			
 
				+                Err(e)  => warn!("[cache] invalid ({e}), rebuilding..."),
			
 
				+            }
			
 
				+        }
			
 
				+        let mul = build_min_unique_len(genome);
			
 
				+        save_to_cache(&cache_path, &mul)?;
			
 
				+        Ok(Self::from_raw(mul))
			
 
				+    }
			
 
				+
			
 
				+    /// Construct directly from a precomputed `min_unique_len` vector.
			
 
				+    /// Used when loading from disk cache — skips SA/LCP computation entirely.
			
 
				+    pub fn from_raw(min_unique_len: Vec<usize>) -> Self {
			
 
				+        let genome_len = min_unique_len.len();
			
 
				+        Self {
			
 
				+            min_unique_len,
			
 
				+            genome_len,
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    /// Build without cache (e.g. for tests or one-shot use).
			
 
				+    pub fn build(genome: &[u8]) -> Self {
			
 
				+        Self::from_raw(build_min_unique_len(genome))
			
 
				+    }
			
 
				+
			
 
				+    pub fn minimal_unique_interval_containing(
			
 
				+        &self,
			
 
				+        pos: usize,
			
 
				+        bias: AnchorBias,
			
 
				+    ) -> Option<UniqueInterval> {
			
 
				+        assert!(pos < self.genome_len);
			
 
				+
			
 
				+        match bias {
			
 
				+            AnchorBias::Left => {
			
 
				+                let k = self.min_unique_len[pos];
			
 
				+                if k == usize::MAX {
			
 
				+                    return None;
			
 
				+                }
			
 
				+                Some(UniqueInterval {
			
 
				+                    start: pos,
			
 
				+                    end: pos + k,
			
 
				+                })
			
 
				+            }
			
 
				+
			
 
				+            AnchorBias::Right => {
			
 
				+                for a in (0..=pos).rev() {
			
 
				+                    let k = self.min_unique_len[a];
			
 
				+                    if k == usize::MAX {
			
 
				+                        continue;
			
 
				+                    }
			
 
				+                    if a + k > pos {
			
 
				+                        return Some(UniqueInterval {
			
 
				+                            start: a,
			
 
				+                            end: a + k,
			
 
				+                        });
			
 
				+                    }
			
 
				+                    // a + k <= pos and a is decreasing: gap can only grow, bail
			
 
				+                    if pos - a >= self.genome_len {
			
 
				+                        break;
			
 
				+                    }
			
 
				+                }
			
 
				+                None
			
 
				+            }
			
 
				+
			
 
				+            AnchorBias::Auto => {
			
 
				+                let window = 500;
			
 
				+                let mut best: Option<UniqueInterval> = None;
			
 
				+                for a in pos.saturating_sub(window)..=pos {
			
 
				+                    let k = self.min_unique_len[a];
			
 
				+                    if k == usize::MAX || a + k <= pos {
			
 
				+                        continue;
			
 
				+                    }
			
 
				+                    let c = UniqueInterval {
			
 
				+                        start: a,
			
 
				+                        end: a + k,
			
 
				+                    };
			
 
				+                    best = Some(match best {
			
 
				+                        None => c,
			
 
				+                        Some(b) => {
			
 
				+                            if c.len() < b.len() {
			
 
				+                                c
			
 
				+                            } else {
			
 
				+                                b
			
 
				+                            }
			
 
				+                        }
			
 
				+                    });
			
 
				+                }
			
 
				+                best
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    pub fn genome_len(&self) -> usize {
			
 
				+        self.genome_len
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+// tests/uniqueness.rs
			
 
				+
			
 
				+#[cfg(test)]
			
 
				+mod tests {
			
 
				+    use crate::uniqueness::genome_index::{GenomeIndex, GenomicPos};
			
 
				+
			
 
				+    use super::*;
			
 
				+
			
 
				+    fn toy_genome() -> &'static [u8] {
			
 
				+        // "banana" — well-known SA example, easy to verify by hand
			
 
				+        b"ACGTACGTNNACGT"
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn test_cache_roundtrip() {
			
 
				+        let genome = toy_genome();
			
 
				+        let dir = std::path::Path::new("/home/t_steimle/tmp/");
			
 
				+
			
 
				+        let key = CacheKey::from_genome("chr_test", genome);
			
 
				+
			
 
				+        let idx1 = UniquenessIndex::build_with_cache(genome, &key, dir).unwrap();
			
 
				+        // Second call must hit cache
			
 
				+        let idx2 = UniquenessIndex::build_with_cache(genome, &key, dir).unwrap();
			
 
				+
			
 
				+        assert_eq!(idx1.min_unique_len, idx2.min_unique_len);
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn test_cache_invalidated_on_length_mismatch() {
			
 
				+        let genome1 = b"ACGTACGT".as_slice();
			
 
				+        let genome2 = b"ACGTACGTNN".as_slice();
			
 
				+        let dir = std::path::Path::new("/home/t_steimle/tmp/");
			
 
				+        let key = CacheKey::from_genome("chr_test", genome1);
			
 
				+
			
 
				+        UniquenessIndex::build_with_cache(genome1, &key, dir).unwrap();
			
 
				+        // genome2 has different len → load_from_cache returns Err → rebuild
			
 
				+        let key2 = CacheKey::from_genome("chr_test", genome2);
			
 
				+        // different hash → different file, no collision
			
 
				+        assert_ne!(key.file_name(), key2.file_name());
			
 
				+    }
			
 
				+
			
 
				+    // ── helpers ───────────────────────────────────────────────────────────────
			
 
				+
			
 
				+    /// Verify that a given interval is actually unique in the genome:
			
 
				+    /// the subsequence appears exactly once.
			
 
				+    fn assert_unique(genome: &[u8], iv: &UniqueInterval) {
			
 
				+        let needle = &genome[iv.start..iv.end];
			
 
				+        let count = genome
			
 
				+            .windows(needle.len())
			
 
				+            .filter(|w| *w == needle)
			
 
				+            .count();
			
 
				+        assert_eq!(
			
 
				+            count,
			
 
				+            1,
			
 
				+            "interval [{}, {}) = {:?} appears {count}x in genome — not unique",
			
 
				+            iv.start,
			
 
				+            iv.end,
			
 
				+            std::str::from_utf8(needle).unwrap_or("<binary>"),
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				+    /// Verify that no strictly shorter sub-interval anchored at the same start
			
 
				+    /// is also unique (minimality check for Left bias).
			
 
				+    fn assert_minimal_left(genome: &[u8], iv: &UniqueInterval) {
			
 
				+        if iv.len() <= 1 {
			
 
				+            return;
			
 
				+        }
			
 
				+        let shorter = UniqueInterval {
			
 
				+            start: iv.start,
			
 
				+            end: iv.end - 1,
			
 
				+        };
			
 
				+        let needle = &genome[shorter.start..shorter.end];
			
 
				+        let count = genome
			
 
				+            .windows(needle.len())
			
 
				+            .filter(|w| *w == needle)
			
 
				+            .count();
			
 
				+        assert!(
			
 
				+            count > 1,
			
 
				+            "interval [{}, {}) = {:?} is shorter and already unique — minimality violated",
			
 
				+            shorter.start,
			
 
				+            shorter.end,
			
 
				+            std::str::from_utf8(needle).unwrap_or("<binary>"),
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				+    /// Same for Right bias: no strictly shorter interval ending at the same end.
			
 
				+    fn assert_minimal_right(genome: &[u8], iv: &UniqueInterval) {
			
 
				+        if iv.len() <= 1 {
			
 
				+            return;
			
 
				+        }
			
 
				+        let shorter = UniqueInterval {
			
 
				+            start: iv.start + 1,
			
 
				+            end: iv.end,
			
 
				+        };
			
 
				+        let needle = &genome[shorter.start..shorter.end];
			
 
				+        let count = genome
			
 
				+            .windows(needle.len())
			
 
				+            .filter(|w| *w == needle)
			
 
				+            .count();
			
 
				+        assert!(
			
 
				+            count > 1,
			
 
				+            "interval [{}, {}) = {:?} is shorter and already unique — minimality violated",
			
 
				+            shorter.start,
			
 
				+            shorter.end,
			
 
				+            std::str::from_utf8(needle).unwrap_or("<binary>"),
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				+    /// Assert that no interval of the same length covering pos is shorter
			
 
				+    /// (Auto minimality: no other anchor yields a shorter unique window).
			
 
				+    fn assert_minimal_auto(genome: &[u8], iv: &UniqueInterval, pos: usize) {
			
 
				+        // Try all anchors in [pos - len + 1 .. pos] and verify none gives shorter
			
 
				+        let window = iv.len().saturating_sub(1);
			
 
				+        for a in pos.saturating_sub(window)..=pos {
			
 
				+            if a + 1 > genome.len() {
			
 
				+                break;
			
 
				+            }
			
 
				+            let max_end = (a + iv.len()).min(genome.len());
			
 
				+            for end in (a + 1)..max_end {
			
 
				+                let needle = &genome[a..end];
			
 
				+                let count = genome
			
 
				+                    .windows(needle.len())
			
 
				+                    .filter(|w| *w == needle)
			
 
				+                    .count();
			
 
				+                if count == 1 {
			
 
				+                    panic!(
			
 
				+                        "found shorter unique interval [{a}, {end}) len={} \
			
 
				+                         covering pos={pos}, but Auto returned [{}, {}) len={}",
			
 
				+                        end - a,
			
 
				+                        iv.start,
			
 
				+                        iv.end,
			
 
				+                        iv.len()
			
 
				+                    );
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // ── genome fixtures ───────────────────────────────────────────────────────
			
 
				+
			
 
				+    /// Simple genome with obvious unique regions.
			
 
				+    ///
			
 
				+    /// Layout (positions):
			
 
				+    ///  0123456789...
			
 
				+    ///  AAAA TTTT AAAA GCTAGCTA NNNN GCTAGCTA
			
 
				+    ///                 ^unique^       ^repeat^
			
 
				+    ///
			
 
				+    /// "GCTAGCTA" appears twice → not unique
			
 
				+    /// "TTTT"     appears once  → unique but short
			
 
				+    /// The N-run should return None for all biases.
			
 
				+    fn genome_simple() -> Vec<u8> {
			
 
				+        //0         1         2         3
			
 
				+        //0123456789012345678901234567890123456789
			
 
				+        b"AAAATTTTAAAAGCTAGCTANNNNNNNNNNNGCTAGCTA".to_vec()
			
 
				+    }
			
 
				+
			
 
				+    /// Genome where Left/Right/Auto produce verifiably different intervals.
			
 
				+    ///
			
 
				+    ///  ACGT ACGT TGCA TGCA AAAA
			
 
				+    ///  0    4    8    12   16
			
 
				+    ///
			
 
				+    /// "ACGTACGT" covers [0..8) — only one occurrence at pos 0
			
 
				+    /// "TGCATGCA" covers [8..16) — only one occurrence
			
 
				+    /// Repeated motifs force longer unique strings.
			
 
				+    fn genome_repeats() -> Vec<u8> {
			
 
				+        b"ACGTACGTTGCATGCAAAAACCCCCGGGGG".to_vec()
			
 
				+    }
			
 
				+
			
 
				+    /// Genome constructed so that Left, Right and Auto give three distinct
			
 
				+    /// intervals for a single query position.
			
 
				+    ///
			
 
				+    ///  ATCGATCG GGGG ATCGATCG TTTT ATCG CCCC
			
 
				+    ///  0        8    12       20   24   28
			
 
				+    ///
			
 
				+    /// pos=6 (inside first ATCGATCG repeat region):
			
 
				+    ///   Left  → must extend right to break the repeat
			
 
				+    ///   Right → extends left into earlier unique context
			
 
				+    ///   Auto  → picks whichever anchor gives shortest total span
			
 
				+    fn genome_biased() -> Vec<u8> {
			
 
				+        //0         1         2         3
			
 
				+        //012345678901234567890123456789012345
			
 
				+        b"ATCGATCGGGGGGGGATCGATCGTTTTTATCGCCCCCCCC".to_vec()
			
 
				+    }
			
 
				+
			
 
				+    // ── Left bias tests ───────────────────────────────────────────────────────
			
 
				+
			
 
				+    #[test]
			
 
				+    fn left_interval_starts_at_pos() {
			
 
				+        let g = genome_repeats();
			
 
				+        let idx = UniquenessIndex::build(&g);
			
 
				+
			
 
				+        for pos in [0, 4, 8, 12] {
			
 
				+            if let Some(iv) = idx.minimal_unique_interval_containing(pos, AnchorBias::Left) {
			
 
				+                assert_eq!(iv.start, pos, "Left: interval must start at pos={pos}");
			
 
				+                assert!(iv.end > pos, "Left: interval must extend past pos");
			
 
				+                assert_unique(&g, &iv);
			
 
				+                assert_minimal_left(&g, &iv);
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn left_interval_is_unique() {
			
 
				+        let g = genome_simple();
			
 
				+        let idx = UniquenessIndex::build(&g);
			
 
				+
			
 
				+        // pos=4 is in the TTTT region — unique and short
			
 
				+        let iv = idx
			
 
				+            .minimal_unique_interval_containing(4, AnchorBias::Left)
			
 
				+            .expect("TTTT region should have a unique interval");
			
 
				+        assert_eq!(iv.start, 4);
			
 
				+        assert_unique(&g, &iv);
			
 
				+        assert_minimal_left(&g, &iv);
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn left_returns_none_for_n_run() {
			
 
				+        let g = genome_simple();
			
 
				+        let idx = UniquenessIndex::build(&g);
			
 
				+        // N-run starts at pos 20 in genome_simple
			
 
				+        // Ns produce repeated k-mers → no unique interval anchored there
			
 
				+        let result = idx.minimal_unique_interval_containing(22, AnchorBias::Left);
			
 
				+        // We don't assert None strictly (an N-run touching unique flanks might
			
 
				+        // resolve), but if Some, it must be genuinely unique.
			
 
				+        if let Some(iv) = result {
			
 
				+            assert_unique(&g, &iv);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // ── Right bias tests ──────────────────────────────────────────────────────
			
 
				+
			
 
				+    #[test]
			
 
				+    fn right_interval_covers_pos() {
			
 
				+        let g = genome_repeats();
			
 
				+        let idx = UniquenessIndex::build(&g);
			
 
				+
			
 
				+        for pos in [5, 10, 15, 20] {
			
 
				+            if pos >= g.len() {
			
 
				+                continue;
			
 
				+            }
			
 
				+            if let Some(iv) = idx.minimal_unique_interval_containing(pos, AnchorBias::Right) {
			
 
				+                assert!(iv.start <= pos, "Right: interval must start ≤ pos={pos}");
			
 
				+                assert!(iv.end > pos, "Right: interval must end   > pos={pos}");
			
 
				+                assert_unique(&g, &iv);
			
 
				+                assert_minimal_right(&g, &iv);
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn right_interval_ends_as_early_as_possible() {
			
 
				+        let g = genome_biased();
			
 
				+        let idx = UniquenessIndex::build(&g);
			
 
				+        let pos = 6;
			
 
				+
			
 
				+        let iv_right = idx
			
 
				+            .minimal_unique_interval_containing(pos, AnchorBias::Right)
			
 
				+            .expect("should find a unique interval with Right bias");
			
 
				+        let iv_left = idx
			
 
				+            .minimal_unique_interval_containing(pos, AnchorBias::Left)
			
 
				+            .expect("should find a unique interval with Left bias");
			
 
				+
			
 
				+        // Right-biased interval uses a left anchor → end is generally earlier
			
 
				+        // than Left-biased interval's end (which is anchored at pos and must
			
 
				+        // extend further right to become unique).
			
 
				+        // This is not guaranteed in all genomes, but holds for genome_biased.
			
 
				+        assert!(
			
 
				+            iv_right.end <= iv_left.end,
			
 
				+            "Right end={} should be ≤ Left end={} for this genome",
			
 
				+            iv_right.end,
			
 
				+            iv_left.end,
			
 
				+        );
			
 
				+        assert_unique(&g, &iv_right);
			
 
				+    }
			
 
				+
			
 
				+    // ── Auto bias tests ───────────────────────────────────────────────────────
			
 
				+
			
 
				+    #[test]
			
 
				+    fn auto_is_shortest_among_all_biases() {
			
 
				+        let g = genome_biased();
			
 
				+        let idx = UniquenessIndex::build(&g);
			
 
				+
			
 
				+        for pos in 0..g.len() {
			
 
				+            let auto = idx.minimal_unique_interval_containing(pos, AnchorBias::Auto);
			
 
				+            let left = idx.minimal_unique_interval_containing(pos, AnchorBias::Left);
			
 
				+            let right = idx.minimal_unique_interval_containing(pos, AnchorBias::Right);
			
 
				+
			
 
				+            if let Some(ref a) = auto {
			
 
				+                assert_unique(&g, a);
			
 
				+                assert_minimal_auto(&g, a, pos);
			
 
				+
			
 
				+                // Auto must be ≤ Left in length
			
 
				+                if let Some(ref l) = left {
			
 
				+                    assert!(
			
 
				+                        a.len() <= l.len(),
			
 
				+                        "pos={pos}: Auto len={} > Left len={} — Auto must be minimal",
			
 
				+                        a.len(),
			
 
				+                        l.len()
			
 
				+                    );
			
 
				+                }
			
 
				+                // Auto must be ≤ Right in length
			
 
				+                if let Some(ref r) = right {
			
 
				+                    assert!(
			
 
				+                        a.len() <= r.len(),
			
 
				+                        "pos={pos}: Auto len={} > Right len={} — Auto must be minimal",
			
 
				+                        a.len(),
			
 
				+                        r.len()
			
 
				+                    );
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn auto_covers_pos() {
			
 
				+        let g = genome_repeats();
			
 
				+        let idx = UniquenessIndex::build(&g);
			
 
				+
			
 
				+        for pos in 0..g.len() {
			
 
				+            if let Some(iv) = idx.minimal_unique_interval_containing(pos, AnchorBias::Auto) {
			
 
				+                assert!(
			
 
				+                    iv.start <= pos && iv.end > pos,
			
 
				+                    "Auto interval [{}, {}) must contain pos={pos}",
			
 
				+                    iv.start,
			
 
				+                    iv.end
			
 
				+                );
			
 
				+                assert_unique(&g, &iv);
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // ── Cross-bias consistency ────────────────────────────────────────────────
			
 
				+
			
 
				+    #[test]
			
 
				+    fn all_biases_return_unique_intervals() {
			
 
				+        let g = genome_simple();
			
 
				+        let idx = UniquenessIndex::build(&g);
			
 
				+
			
 
				+        for pos in 0..g.len() {
			
 
				+            for bias in [AnchorBias::Left, AnchorBias::Right, AnchorBias::Auto] {
			
 
				+                if let Some(iv) = idx.minimal_unique_interval_containing(pos, bias) {
			
 
				+                    assert!(iv.start <= pos, "start must be ≤ pos");
			
 
				+                    assert!(iv.end > pos, "end must be > pos");
			
 
				+                    assert!(iv.end <= g.len(), "end must be in bounds");
			
 
				+                    assert_unique(&g, &iv);
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn left_start_constraint_holds_for_all_positions() {
			
 
				+        let g = genome_repeats();
			
 
				+        let idx = UniquenessIndex::build(&g);
			
 
				+
			
 
				+        for pos in 0..g.len() {
			
 
				+            if let Some(iv) = idx.minimal_unique_interval_containing(pos, AnchorBias::Left) {
			
 
				+                assert_eq!(
			
 
				+                    iv.start, pos,
			
 
				+                    "Left bias must anchor at pos={pos}, got start={}",
			
 
				+                    iv.start
			
 
				+                );
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // ── Cache integration ─────────────────────────────────────────────────────
			
 
				+
			
 
				+    #[test]
			
 
				+    fn cache_preserves_query_results() {
			
 
				+        let g = genome_biased();
			
 
				+        let dir = Path::new("/home/t_steimle/tmp/");
			
 
				+        let key = CacheKey::from_genome("test_contig", &g);
			
 
				+
			
 
				+        let idx_built = UniquenessIndex::build_with_cache(&g, &key, dir).unwrap();
			
 
				+        let idx_cached = UniquenessIndex::build_with_cache(&g, &key, dir).unwrap();
			
 
				+
			
 
				+        for pos in 0..g.len() {
			
 
				+            for bias in [AnchorBias::Left, AnchorBias::Right, AnchorBias::Auto] {
			
 
				+                assert_eq!(
			
 
				+                    idx_built.minimal_unique_interval_containing(pos, bias),
			
 
				+                    idx_cached.minimal_unique_interval_containing(pos, bias),
			
 
				+                    "cache mismatch at pos={pos} bias={bias:?}",
			
 
				+                );
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+        use crate::helpers::test_init;
			
 
				+
			
 
				+
			
 
				+    #[test]
			
 
				+    fn uniqueness_genome() -> anyhow::Result<()> {
			
 
				+        test_init();
			
 
				+        let index = GenomeIndex::build(
			
 
				+            "/home/t_steimle/ref/hs1/chm13v2.0.fa",
			
 
				+            "/home/t_steimle/ref/hs1/hs1_uniqueness",
			
 
				+            None, // None = tout indexer
			
 
				+        )?;
			
 
				+
			
 
				+        let pos = GenomicPos {
			
 
				+            contig: "chr1".into(),
			
 
				+            pos: 248_000_000,
			
 
				+        };
			
 
				+
			
 
				+        for bias in [AnchorBias::Left, AnchorBias::Right, AnchorBias::Auto] {
			
 
				+            match index.query(&pos, bias) {
			
 
				+                Some(iv) => println!(
			
 
				+                    "{bias:?}: {}:{}-{} ({}bp)",
			
 
				+                    pos.contig,
			
 
				+                    iv.start,
			
 
				+                    iv.end,
			
 
				+                    iv.len()
			
 
				+                ),
			
 
				+                None => println!("{bias:?}: non-unique"),
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        Ok(())
			
 
				+    }
			
 
				+}
			
--- a/src/uniqueness/suffix.rs
+++ b/src/uniqueness/suffix.rs
@@ -0,0 +1,75 @@
 
				+use rayon::prelude::*;
			
 
				+
			
 
				+pub fn build_suffix_array(genome: &[u8]) -> Vec<usize> {
			
 
				+    let n = genome.len();
			
 
				+    if n == 0 {
			
 
				+        return Vec::new();
			
 
				+    }
			
 
				+
			
 
				+    let mut sa: Vec<usize> = (0..n).collect();
			
 
				+    let mut rank: Vec<i64> = genome.iter().map(|&b| b as i64).collect();
			
 
				+    let mut tmp = vec![0i64; n];
			
 
				+    let mut gap = 1usize;
			
 
				+
			
 
				+    while gap < n {
			
 
				+        // ── Sort — main bottleneck, worth parallelising ───────────────────
			
 
				+        sa.par_sort_unstable_by(|&a, &b| {
			
 
				+            let second = |i: usize| if i + gap < n { rank[i + gap] } else { -1 };
			
 
				+            (rank[a], second(a)).cmp(&(rank[b], second(b)))
			
 
				+        });
			
 
				+
			
 
				+        // ── Compute key-change flags in parallel ──────────────────────────
			
 
				+        let differs: Vec<bool> = (1..n)
			
 
				+            .into_par_iter()
			
 
				+            .map(|i| {
			
 
				+                let second = |i: usize| if i + gap < n { rank[i + gap] } else { -1i64 };
			
 
				+                let prev = sa[i - 1];
			
 
				+                let cur = sa[i];
			
 
				+                (rank[prev], second(prev)) != (rank[cur], second(cur))
			
 
				+            })
			
 
				+            .collect();
			
 
				+
			
 
				+        // ── Prefix sum — sequential, O(n), cache-friendly ────────────────
			
 
				+        tmp[sa[0]] = 0;
			
 
				+        for i in 1..n {
			
 
				+            tmp[sa[i]] = tmp[sa[i - 1]] + if differs[i - 1] { 1 } else { 0 };
			
 
				+        }
			
 
				+
			
 
				+        rank.copy_from_slice(&tmp);
			
 
				+
			
 
				+        if rank[sa[n - 1]] == (n - 1) as i64 {
			
 
				+            break;
			
 
				+        }
			
 
				+        gap *= 2;
			
 
				+    }
			
 
				+
			
 
				+    sa
			
 
				+}
			
 
				+
			
 
				+pub fn build_lcp_kasai(genome: &[u8], sa: &[usize]) -> Vec<usize> {
			
 
				+    let n = genome.len();
			
 
				+    let mut rank = vec![0usize; n];
			
 
				+    for (i, &s) in sa.iter().enumerate() {
			
 
				+        rank[s] = i;
			
 
				+    }
			
 
				+
			
 
				+    let mut lcp = vec![0usize; n];
			
 
				+    let mut h = 0usize;
			
 
				+
			
 
				+    for p in 0..n {
			
 
				+        let r = rank[p];
			
 
				+        if r == 0 {
			
 
				+            h = 0;
			
 
				+            continue;
			
 
				+        }
			
 
				+        let prev = sa[r - 1];
			
 
				+        while p + h < n && prev + h < n && genome[p + h] == genome[prev + h] {
			
 
				+            h += 1;
			
 
				+        }
			
 
				+        lcp[r] = h;
			
 
				+        if h > 0 {
			
 
				+            h -= 1;
			
 
				+        }
			
 
				+    }
			
 
				+    lcp
			
 
				+}
			
--- a/src/variant/vcf_variant.rs
+++ b/src/variant/vcf_variant.rs
@@ -226,7 +226,11 @@ impl FromStr for VcfVariant {
 
				         hasher.update(reference.to_string().as_bytes()); // Reference string as bytes
			
 
				         hasher.update(alternative.to_string().as_bytes()); // Alternative string as bytes
			
 
				         let hash = hasher.finalize();
			
 
				-        let hash = Hash128::new(hash.as_bytes()[..16].try_into().unwrap());
			
 
				+        let hash = Hash128::new(
			
 
				+            hash.as_bytes()[..16]
			
 
				+                .try_into()
			
 
				+                .expect("blake3 digest is always ≥ 16 bytes"),
			
 
				+        );
			
 
				 
			
 
				         Ok(Self {
			
 
				             hash,
			
@@ -841,28 +845,30 @@ impl core::fmt::Display for BNDDesc {
 
				 
			
 
				 /// Split a multiallelic VcfVariant into N biallelic VcfVariants.
			
 
				 /// Rewrites A/R-number INFO fields per allele using header metadata.
			
 
				-pub fn from_multiallelic(variant: VcfVariant) -> Vec<VcfVariant> {
			
 
				+pub fn from_multiallelic(variant: VcfVariant) -> anyhow::Result<Vec<VcfVariant>> {
			
 
				     let alt_str = variant.alternative.to_string();
			
 
				     let alts: Vec<&str> = alt_str.split(',').collect();
			
 
				 
			
 
				     // Already biallelic
			
 
				     if alts.len() == 1 {
			
 
				-        return vec![variant];
			
 
				+        return Ok(vec![variant]);
			
 
				     }
			
 
				 
			
 
				     alts.iter()
			
 
				         .enumerate()
			
 
				-        .map(|(i, alt)| {
			
 
				+        .map(|(i, alt)| -> anyhow::Result<VcfVariant> {
			
 
				             let mut v = variant.clone();
			
 
				 
			
 
				             // Rewrite ALT
			
 
				-            v.alternative = alt.parse().expect("Failed to parse split ALT");
			
 
				+            v.alternative = alt
			
 
				+                .parse()
			
 
				+                .with_context(|| format!("failed to parse split ALT allele: {alt}"))?;
			
 
				 
			
 
				             // Rewrite INFO fields (A/R-number)
			
 
				             v.infos = rewrite_infos(&variant.infos, i);
			
 
				 
			
 
				             // Rewrite FORMAT/GT
			
 
				-            v.formats = rewrite_formats(&variant.formats, i);
			
 
				+            v.formats = rewrite_formats(&variant.formats, i)?;
			
 
				 
			
 
				             // Recompute hash (pos + ref + new alt)
			
 
				             let mut hasher = blake3::Hasher::new();
			
@@ -871,9 +877,13 @@ pub fn from_multiallelic(variant: VcfVariant) -> Vec<VcfVariant> {
 
				             hasher.update(v.reference.to_string().as_bytes());
			
 
				             hasher.update(v.alternative.to_string().as_bytes());
			
 
				             let hash = hasher.finalize();
			
 
				-            v.hash = Hash128::new(hash.as_bytes()[..16].try_into().unwrap());
			
 
				+            v.hash = Hash128::new(
			
 
				+                hash.as_bytes()[..16]
			
 
				+                    .try_into()
			
 
				+                    .expect("blake3 digest is always ≥ 16 bytes"),
			
 
				+            );
			
 
				 
			
 
				-            v
			
 
				+            Ok(v)
			
 
				         })
			
 
				         .collect()
			
 
				 }
			
@@ -908,9 +918,9 @@ fn rewrite_infos(infos: &Infos, allele_idx: usize) -> Infos {
 
				     Infos(rewritten)
			
 
				 }
			
 
				 
			
 
				-fn rewrite_formats(formats: &Formats, allele_idx: usize) -> Formats {
			
 
				+fn rewrite_formats(formats: &Formats, allele_idx: usize) -> anyhow::Result<Formats> {
			
 
				     if formats.0.is_empty() {
			
 
				-        return formats.clone();
			
 
				+        return Ok(formats.clone());
			
 
				     }
			
 
				     // Reconstruct a fake FORMAT+sample row to reuse existing FromStr
			
 
				     let (fmt_str, sample_str): (String, String) = formats.clone().into();
			
@@ -918,7 +928,7 @@ fn rewrite_formats(formats: &Formats, allele_idx: usize) -> Formats {
 
				     let rewritten_sample = rewrite_sample(&sample_str, &fmt_keys, allele_idx);
			
 
				     (fmt_str.as_str(), rewritten_sample.as_str())
			
 
				         .try_into()
			
 
				-        .expect("Failed to parse rewritten FORMAT")
			
 
				+        .context("failed to parse rewritten FORMAT field")
			
 
				 }
			
 
				 
			
 
				 /// Recode a sample's GT from multiallelic to biallelic representation.