Thomas
/
pandora_lib_promethion


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368
							//! # 🧬 Long-read Somatic Variant Calling and Analysis Framework
//!
//! This Rust library provides a modular, parallelizable framework for somatic variant calling, annotation, and interpretation from long-read sequencing data. It is designed to support full pipelines for research and clinical workflows across multiple variant callers and analysis stages.
//!
//! The library also serves as an extensible platform that developers can leverage to add custom features, integrate new tools, and tailor workflows to specific use cases.
//!
//! ## 🧩 Key Features
//!
//! - **POD5 Demultiplexing and Alignment**: End-to-end support for processing ONT POD5 files:
//!     - Barcode-aware demultiplexing using metadata CSVs
//!     - POD5 subsetting and organization by case
//!     - Integration with basecallers (e.g., Dorado) for read alignment
//! - **Pipeline Management**: Full orchestration of Dockerized execution pipelines for tools such as ClairS, Nanomonsv, DeepVariant, Savana, Modkit, and Severus.
//! - **Flexible Configuration**: Centralized configuration system (`Config`, `CollectionsConfig`) for all modules and pipelines.
//! - **Input Abstraction**: Unified handling of BAM, POD5, and VCF file collections across cohorts and directories.
//! - **Variant Processing**: Modular loading, filtering, statistical analysis, and annotation of somatic and germline variants.
//! - **Haplotype Phasing and Methylation**: Support for LongPhase-based phasing and Modkit methylation pileups with support for multi-threaded pileup and aggregation.
//! - **Parallel Execution**: Uses `rayon` for efficient multicore parallelization over large cohorts and tasks.
//!
//! ## 📚 Module Highlights
//!
//! - `callers`: Interfaces to variant calling tools (ClairS, DeepVariant, Nanomonsv, Savana, etc...)
//! - `runners`: Pipeline runners (e.g. `Somatic`, `SeverusSolo`, `LongphasePhase`) that manage end-to-end execution.
//! - `collection`: Organizes input data across BAMs, VCFs, and POD5 files with auto-detection of completed runs.
//! - `annotation`: VEP line parsing and high-level annotation aggregation.
//! - `pipes`: Composition modules for executing pipelines across callers and post-processing steps.
//! - `functions`: Custom logic for genome assembly, entropy estimation, and internal tooling.
//! - `positions`, `variant`, `helpers`: Utilities for SV modeling, variant filtering, position overlap logic, and helper methods.
//!
//! ## ⚡ Workflow Overview
//!
//! ### 1. 📦 From POD5 to BAM Alignment
//!
//! - **Demultiplexing**: POD5 files are subset and demuxed using barcodes (via CSV metadata).
//! - **Flowcell Case Management**: Each sample is identified by a [`collection::pod5::FlowCellCase`] containing its ID, time point, and POD5 directory.
//! - **Alignment**: The [`commands::dorado::Dorado`] module handles alignment of POD5 reads to reference genome, producing BAMs.
//!
//! ```rust
//! let case = FlowCellCase { id: "PATIENT1", time_point: "diag", barcode: "01", pod_dir: "...".into() };
//! Dorado::init(case, Config::default())?.run_pipe()?;
//! ```
//!
//! ### 2. 🧬 Variant Calling (BAM ➝ VCF)
//!
//! Using the aligned BAMs, multiple variant callers can be run in parallel. The [`callers`] and [`runners`] modules support:
//!
//! - **ClairS** – somatic small variant calling with LongPhase haplotagging  
//! - **Nanomonsv** – structural variants (SV)  
//! - **DeepVariant** – germline small variants  
//! - **Savana** – SVs and copy number variations (CNV)  
//! - **Modkit** – methylation pileups  
//! - **LongPhase** – phasing and modcalling
//!
//! All workflows can be triggered per-case or per-cohort using `Collections` or `Somatic` runners.
//!
//! ```rust
//! ClairS::initialize("PATIENT1", Config::default())?.run()?;
//! NanomonSV::initialize("PATIENT1", Config::default())?.run()?;
//! ```
//!
//! ### 3. 📈 Aggregation & Statistics (VCF ➝ JSON / Stats)
//!
//! After variant calling:
//!
//! - Annotate with VEP ([`annotation`] module)
//! - Load and filter with [`variant::variant_collection`]
//! - Compute variant and region-level stats (e.g., mutation rates, alteration categories, coding overlaps)
//!
//! ```rust
//! let variants = Variants::load_from_json("/path/to/somatic_variants.json.gz")?;
//! let stats = VariantsStats::new(&variants, "PATIENT1", &config)?;
//! stats.save_to_json("/output/path/stats.json.gz")?;
//! ```
//!
//! ### 4. 🧠 Intelligent Task Management (`collection` module)
//!
//! - Auto-discovers available samples, POD5s, BAMs, and VCFs
//! - Detects missing outputs and creates task lists
//! - Tasks are parallelizable using Rayon and can be run on-demand
//!
//! ```rust
//! let mut collections = Collections::new(CollectionsConfig::default())?;
//! collections.todo()?;      // Identify missing steps
//! collections.run()?;       // Run them automatically
//! ```
//!
//! ## 🔬 Testing
//!
//! Integration tests demonstrate the entire pipeline. Run with logging enabled:
//!
//! ```bash
//! export RUST_LOG=debug
//! cargo test -- --nocapture
//! ```
//!
//! ## 🧪 Example Use Cases
//!
//! - Full somatic variant calling pipeline on matched tumor/normal samples
//! - POD5-based pipeline from raw signal to variants
//! - Aggregation and annotation of SVs across a clinical cohort
//! - Methylation analysis using nanopore-specific tools
//! - Variant calling and analysis in large-scale longitudinal studies
//!
//! ## 🚀 Getting Started
//!
//! All workflows are initialized from `Config` and driven by the `Collections` structure:
//!
//! ```rust
//! let collections = Collections::new(CollectionsConfig::default())?;
//! collections.todo()?;
//! collections.run()?;
//! ```
//!
//! ## 🔗 References
//! 
//! **Basecalling and alignment**
//! - Dorado: <https://github.com/nanoporetech/dorado>
//!
//! **Variants Callers**
//! - ClairS: <https://github.com/HKU-BAL/ClairS>
//! - Nanomonsv: <https://github.com/friend1ws/nanomonsv>
//! - Savana: <https://github.com/cortes-ciriano-lab/savana>
//! - DeepVariant: <https://github.com/google/deepvariant>
//! - DeepSomatic: <https://github.com/google/deepsomatic>
//! - LongPhase: <https://github.com/PorubskyResearch/LongPhase>
//! - Modkit: <https://github.com/nanoporetech/modkit>
//!
//! **Variants annotation**
//! - VEP: <https://www.ensembl.org/info/docs/tools/vep/index.html>
//!
//! ---

use std::sync::{Arc, Mutex};

pub mod commands;
pub mod config;
pub mod modkit;
pub mod callers;
pub mod runners;
pub mod collection;
pub mod functions;
pub mod helpers;
pub mod variant;
pub mod io;
pub mod pipes;
pub mod positions;
pub mod annotation;
pub mod cases;
pub mod scan;
pub mod math;

#[macro_use]
extern crate lazy_static;

// Define DOCKER_ID lock for handling Docker kill when ctrlc is pressed
lazy_static! {
    static ref DOCKER_ID: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));
}

#[cfg(test)]
mod tests {
    use std::{collections::HashMap, fs, path::Path};

    use annotation::{vep::{VepLine, VEP}, Annotations};
    use callers::{nanomonsv::nanomonsv_create_pon, savana::{Savana, SavanaReadCounts}, severus::{Severus, SeverusSolo}};
    use collection::{bam::{counts_at, counts_ins_at, nt_pileup, WGSBam, WGSBamStats}, Initialize, InitializeSolo, Version};
    use commands::{longphase::{LongphaseConfig, LongphaseHap, LongphaseModcallSolo, LongphasePhase}, modkit::{bed_methyl, ModkitConfig}};
    use functions::assembler::{Assembler, AssemblerConfig};
    use helpers::estimate_shannon_entropy;
    use io::bed::read_bed;
    use itertools::Itertools;
    use log::{error, info, warn};
    use pipes::somatic::SomaticPipe;
    use positions::{overlaps_par, GenomePosition, GenomeRange};
    use rayon::prelude::*;
    use runners::Run;
    use variant::{variant::{Variants, VcfVariant}, variant_collection};

    use self::{collection::pod5::{FlowCellCase, Pod5Collection}, commands::dorado, config::Config};
    use super::*;
    use crate::{annotation::Annotation, callers::{clairs::ClairS, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVSolo}, savana::SavanaCN}, collection::{bam::{self, nt_pileup_new}, flowcells::{scan_archive, FlowCells}, run_tasks, vcf::VcfCollection, Collections, CollectionsConfig, ShouldRun}, commands::dorado::Dorado, helpers::find_files, io::{bed::bedrow_overlaps_par, dict::read_dict, gff::features_ranges}, pipes::somatic::const_stats, positions::{merge_overlapping_genome_ranges, range_intersection_par, sort_ranges}, scan::scan::somatic_scan, variant::{variant::{AlterationCategory, BNDDesc, BNDGraph, GroupByThreshold, ToBNDGraph}, variant_collection::{group_variants_by_bnd_desc, group_variants_by_bnd_rc, Variant}, variants_stats::{self, somatic_depth_quality_ranges, VariantsStats}}};

    // export RUST_LOG="debug"
    fn init() {
        let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
            .is_test(true)
            .try_init();
    }

    #[test]
    fn it_works() {
        let bam_path = "/data/longreads_basic_pipe/PARACHINI/diag/PARACHINI_diag_hs1.bam";
        modkit::modkit(bam_path);
    }

    #[test]
    fn run_dorado() -> anyhow::Result<()> {
        let case = FlowCellCase { 
            id: "CONSIGNY".to_string(), 
            time_point: "mrd".to_string(), barcode: "07".to_string(), pod_dir: "/data/run_data/20240326-CL/CONSIGNY-MRD-NB07_RICCO-DIAG-NB08/20240326_1355_1E_PAU78333_bc25da25/pod5_pass/barcode07".into() 
        };
        dorado::Dorado::init(case, Config::default())?.run_pipe()
    }

    #[test]
    fn pod5() -> anyhow::Result<()> {
        let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
            .build();

        let coll = Pod5Collection::new(
            "/data/run_data",
            "/data/flow_cells.tsv",
            "/data/longreads_basic_pipe",
        )?;
        println!("{coll:#?}");
        // let runs = Runs::import_dir("/home/prom/store/banana-pool/run_data", "/data/flow_cells.tsv")?;
        Ok(())
    }

    #[test]
    fn bam() -> anyhow::Result<()> {
        init();
        let bam_collection = bam::load_bam_collection("/data/longreads_basic_pipe");

        bam_collection
            .bams
            .iter()
            // .filter(|b| matches!(b.bam_type, BamType::Panel(_)))
            .for_each(|b| println!("{b:#?}"));

        let u = bam_collection.get("PARACHINI", "mrd");
        println!("{u:#?}");

        Ok(())
    }

    #[test]
    fn vcf() -> anyhow::Result<()> {
        init();
        let mut vcf_collection = VcfCollection::new("/data/longreads_basic_pipe");
        vcf_collection.sort_by_id();
        vcf_collection
            .vcfs
            .iter()
            .for_each(|v| v.println().unwrap());

        Ok(())
    }

    // pod5 view -I /data/run_data/20240903-CL/ARMEM-DG-N02_ASSJU-DG-N03/20240903_1428_1B_PAW47629_fc24c3cf/pod5/PAW47629_fc24c3cf_77b07847_0.pod5 | head -5000 | awk '{if(NR==1){print "target,"$0}else{print "subset_1.pod5,"$0}}' > /tmp/subset_ids.csv
    // pod5 subset /data/run_data/20240903-CL/ARMEM-DG-N02_ASSJU-DG-N03/20240903_1428_1B_PAW47629_fc24c3cf/pod5/PAW47629_fc24c3cf_77b07847_0.pod5 --csv /tmp/subset_ids.csv -o /data/test_suite/pod5/muxed/
    #[test]
    fn mux() -> anyhow::Result<()> {
        init();
        let result_dir = "/data/test_suite/results".to_string();
        let cases = vec![
            FlowCellCase { id: "test_02".to_string(), time_point: "diag".to_string(), barcode: "02".to_string(), pod_dir: "/data/test_suite/pod5/muxed".into() },
            FlowCellCase { id: "test_03".to_string(), time_point: "diag".to_string(), barcode: "03".to_string(), pod_dir: "/data/test_suite/pod5/muxed".into() },
        ];

        cases.iter().for_each(|c| {
            let dir = format!("{result_dir}/{}", c.id);
            if Path::new(&dir).exists() {
                fs::remove_dir_all(dir).unwrap();
            }
        });
        let config = Config { result_dir, ..Default::default()  };
        Dorado::from_mux(cases, config)
    }

    // #[test_log::test]
    // fn clairs() -> anyhow::Result<()> {
    //     let config = ClairSConfig {
    //         result_dir: "/data/test".to_string(),
    //         ..ClairSConfig::default()
    //     };
    //     ClairS::new("test_a", "/data/test_data/subset.bam", "/data/test_data/subset_mrd.bam", config).run()
    // }

    #[test]
    fn nanomonsv() -> anyhow::Result<()> {
        init();
        let id = "HAMROUNE";
        NanomonSV::initialize(id, Config::default())?.run()
    }

    #[test]
    fn nanomddonsv_solo() -> anyhow::Result<()> {
        init();
        NanomonSVSolo::initialize("BRETON", "diag", Config::default())?.run()
    }

    // cargo test run -- --nocapture; ~/run_scripts/notify_finish.sh &
    #[test]
    fn todo_all() -> anyhow::Result<()> {
        init();
        // let config = CollectionsConfig::default();
        let config = CollectionsConfig { pod_dir: "/data/run_data".to_string(), ..Default::default() };
        info!("Runing todo with config: {:#?}", config);
        let mut collections = Collections::new(config)?;
        collections.todo()?;
        collections.tasks.iter().for_each(|t| println!("{t}"));
        println!("{}", collections.tasks.len());
        Ok(())
    }

    // #[test]
    // fn todo_agg() -> anyhow::Result<()> {
    //     init();
    //     let config = CollectionsConfig::default();
    //     info!("Runing todo with config: {:#?}", config);
    //     let collections = Collections::new(config)?;
    //     let agg_tasks = collections.todo_variants_agg()?;
    //     println!("{:#?}", agg_tasks);
    //     println!("{}", agg_tasks.len());
    //     Ok(())
    // }

    // #[test]
    // fn run_agg() -> anyhow::Result<()> {
    //     init();
    //     let config = CollectionsConfig {
    //         id_black_list: vec!["MANCUSO".to_string(),"HAMROUNE".to_string()],
    //         ..Default::default()
    //     };
    //     info!("Runing todo with config: {:#?}", config);
    //     let mut collections = Collections::new(config)?;
    //     collections.tasks = collections.todo_variants_agg()?;
    //     collections.run()?;
    //     
    //     Ok(())
    // }

    // export RUST_LOG="debug"
    #[test]
    fn run_t() -> anyhow::Result<()> {
        init();
        // let config = CollectionsConfig::default();
        let config = CollectionsConfig { pod_dir: "/data/run_data".to_string(), ..Default::default() };

        run_tasks(config)
    }

    // #[test_log::test]
    // fn bcftools_pass() {
    //     let config = BcftoolsConfig::default();
    //     let id = "RICCO";
    //     let time = "diag";
    //     let caller = "DeepVariant";
    //
    //     Config::default();
    //
    //     // let (i, o) = 
    //     // let i = format!("/data/longreads_basic_pipe/{id}/{time}/nanomonsv/{id}_diag.nanomonsv.result.vcf");
    //     // let o = format!("/data/longreads_basic_pipe/{id}/{time}/nanomonsv/{id}_diag_nanomonsv_PASSED.vcf.gz");
    //     bcftools_keep_pass(&i, &o, config).unwrap();
    // }

    #[test]
    fn bam_ok() -> anyhow::Result<()> {
        init();
        let  collections = Collections::new(
            CollectionsConfig::default()
        )?;
        let mut res: Vec<_> = collections.bam.by_id_completed(15.0, 10.0).iter().map(|b| {
            (b.id.to_string(), b.time_point.to_string(), b.path.to_str().unwrap().to_string())
        }).collect();
        res.sort_by_key(|b| b.1.clone());
        res.sort_by_key(|b| b.0.clone());
        
        res.iter().for_each(|(id, tp, path)| println!("{id}\t{tp}\t{path}"));
        Ok(())
    }

    #[test]
    fn todo_assembler() -> anyhow::Result<()> {
        init();
        let collections = Collections::new(
            CollectionsConfig::default()
        )?;
        collections.todo_assembler()?;
        Ok(())
    }

    #[test]
    fn sv_pon() -> anyhow::Result<()> {
        init();
        nanomonsv_create_pon(&Config::default(), "/data/ref/hs1/nanomonsv_pon.vcf.gz")
    }

    #[test]
    fn todo_mod() -> anyhow::Result<()> {
        init();
        let collections = Collections::new(
            CollectionsConfig::default()
        )?;
        collections.todo_mod_pileup();
        Ok(())
    }

    #[test]
    fn todo_deepv() -> anyhow::Result<()> {
        init();
        let collections = Collections::new(
            CollectionsConfig::default()
        )?;
        let tasks = collections.todo_deepvariants();
        tasks.iter().for_each(|t| info!("{t}"));
        info!("n tasks {}", tasks.len());
        Ok(())
    }

    #[test]
    fn todo_clairs() -> anyhow::Result<()> {
        init();
        let collections = Collections::new(
            CollectionsConfig::default()
        )?;
        collections.todo_clairs().iter().for_each(|t| info!("{t}"));
        Ok(())
    }

    #[test]
    fn run_assemblers() -> anyhow::Result<()> {
        Assembler::new("CAMEL".to_string(), "diag".to_string(), AssemblerConfig::default()).run()
    }

    // #[test]
    // fn run_dmr_par() -> anyhow::Result<()> {
    //     init();
    //     let collections = Collections::new(
    //         CollectionsConfig::default()
    //     )?;
    //     let tasks = collections.todo_dmr_c_diag_mrd();
    //     tasks.iter().for_each(|t| info!("{t}"));
    //     let len = tasks.len();
    //     // let pool = ThreadPoolBuilder::new().num_threads(10).build().unwrap();
    //     // pool.install(|| {
    //     //     tasks.par_iter().enumerate().for_each(|(i, t)| {
    //     //         let config = ModkitConfig {threads: 2,  ..Default::default() };
    //     //         if let collection::CollectionsTasks::DMRCDiagMrd { id, .. } = t { let _ = dmr_c_mrd_diag(id, &config); }
    //     //         println!("⚡ {i}/{len}");
    //     //     });
    //     // });
    //     Ok(())
    // }

    #[test]
    fn run_mod_par() -> anyhow::Result<()> {
        init();
        let collections = Collections::new(
            CollectionsConfig::default()
        )?;
        let tasks = collections.todo_mod_pileup();
        let len = tasks.len();

        tasks.par_iter().enumerate().for_each(|(i, t)| {
            let config = ModkitConfig {threads: 2,  ..Default::default() };
            if let collection::CollectionsTasks::ModPileup { bam, .. } = t { let _ = bed_methyl(bam.to_owned(), &config); }
            println!("⚡ {i}/{len}");
        });
        Ok(())
    }

    #[test]
    fn run_severus() -> anyhow::Result<()> {
        init();
        Severus::initialize("CAMEL", Config::default())?.run()
    }

    #[test]
    fn run_severus_solo() -> anyhow::Result<()> {
        init();
        SeverusSolo::initialize("CAMEL","diag", Config::default())?.run()
    }

    #[test]
    fn run_savana() -> anyhow::Result<()> {
        init();
         let  collections = Collections::new(
            CollectionsConfig::default()
        )?;
        for bam in collections.bam.by_id_completed(15.0, 10.0).iter() {
            let id = &bam.id;
            match ClairS::initialize(id, Config::default())?.run() {
                Ok(_) => match Savana::initialize(id, Config::default())?.run() {
                    Ok(_) => (),
                    Err(e) => error!("{e}"),
                },
                Err(e) => error!("{e}"),
            }
            ;
        }
        Ok(())
    }

    #[test]
    fn check_versions() -> anyhow::Result<()> {
        init();
        let config = Config::default();
        let v = Savana::version(&config)?;
        info!("Savanna version {v}");
        let v = Severus::version(&config)?;
        info!("Severus version {v}");
        Ok(())
    }

    #[test]
    fn run_multi_deepvariant() -> anyhow::Result<()> {
        init();
        let mut collections = Collections::new(
            CollectionsConfig::default()
        )?;
        collections.run_deepvariant()
    }

    #[test]
    fn run_deepvariant() -> anyhow::Result<()> {
        init();
        DeepVariant::initialize("HAMROUNE", "diag", Config::default())?.run()
    }

    #[test]
    fn run_clairs() -> anyhow::Result<()> {
        init();
        ClairS::initialize("ADJAGBA", Config::default())?.run()
    }

    #[test]
    fn run_longphase() -> anyhow::Result<()> {
        init();
        let id = "BECERRA";
        let diag_bam = format!("/data/longreads_basic_pipe/{id}/diag/{id}_diag_hs1.bam");
        let vcf = format!("/data/longreads_basic_pipe/{id}/diag/ClairS/clair3_normal_tumoral_germline_output.vcf.gz");
        let mrd_bam = format!("/data/longreads_basic_pipe/{id}/mrd/{id}_mrd_hs1.bam");

        LongphaseHap::new(id, &diag_bam, &vcf, LongphaseConfig::default()).run()?;
        LongphaseHap::new(id, &mrd_bam, &vcf, LongphaseConfig::default()).run()
    }

    #[test]
    fn run_longphase_modcall() -> anyhow::Result<()> {   
        init();
        let id = "ADJAGBA";
        let time = "diag";
        LongphaseModcallSolo::initialize(id, time, Config::default())?.run()
    }

    #[test]
    fn run_longphase_phase() -> anyhow::Result<()> {   
        init();
        let id = "ADJAGBA";
        LongphasePhase::initialize(id, Config::default())?.run()
    }

    #[test]
    fn variant_parse() -> anyhow::Result<()> {
        let row = "chr1\t1366\t.\tC\tCCCT\t8.2\tPASS\t.\tGT:GQ:DP:AD:VAF:PL\t1/1:4:6:1,4:0.66667:6,4,0";
        let variant: VcfVariant = row.parse()?;
        let var_string = variant.into_vcf_row();
        assert_eq!(row, &var_string);

        let row = "chr1\t1366\t.\tC\tCCCT\t8.2\tPASS\t.";
        let variant: VcfVariant = row.parse()?;
        let var_string = variant.into_vcf_row();
        assert_eq!(row, &var_string);

        let row = "chr1\t2628434\t.\tC\tT\t17.973\tPASS\tH;FAU=0;FCU=7;FGU=0;FTU=7;RAU=0;RCU=2;RGU=0;RTU=2\tGT:GQ:DP:AF:AD:NAF:NDP:NAD:AU:CU:GU:TU:NAU:NCU:NGU:NTU\t0/1:17:18:0.5:0,9:0:11:0,0:0:9:0:9:0:11:0:0";
        let variant: VcfVariant = row.parse()?;
        let var_string = variant.into_vcf_row();
        assert_eq!(row, &var_string);

        let row = "chr1\t52232\t.\tC\tCT\t18\t.\t.\tGT:GQ:DP:AD:AF\t1/.:1:24:3,5:0.208333";
        let variant: VcfVariant = row.parse()?;
        let var_string = variant.into_vcf_row();
        assert_eq!(row, &var_string);

        let row = "chr1\t52232\t.\tC\tCT\t18\t.\t.\tGT:GQ:DP:AD:AF\t1/1:1:24:3,5:0.208333";
        let variant_b: VcfVariant = row.parse()?;
        assert_eq!(variant, variant_b);
        let row = "chr1\t475157\t.\tA\tG\t12.301\tPASS\tH;FAU=2;FCU=0;FGU=2;FTU=0;RAU=3;RCU=0;RGU=3;RTU=0\tGT:GQ:DP:AF:AD:NAF:NDP:NAD:AU:CU:GU:TU:NAU:NCU:NGU:NTU\t0/1:12:10:0.5:0,5:0.0769:13:0,1:5:0:5:0:12:0:1:0";
        let variant: VcfVariant = row.parse()?;
        let var_string = variant.into_vcf_row();
        assert_eq!(row, &var_string);

        let row = "chr1\t161417408\tr_10_0\tT\t[chr1:161417447[TTGGCAGGTTCC\t.\tPASS\tSVTYPE=BND;MATEID=r_10_1;SVINSLEN=11;SVINSSEQ=TTGGCAGGTTC\tTR:VR\t22:3\t12:0";
        let variant: VcfVariant = row.parse()?;
        println!("{variant:#?}");
        let u = variant.bnd_desc();
        println!("{u:#?}");

        // Severus mates are not in RC
        let vcf = "chr7\t27304522\tseverus_BND6747_1\tN\t[chr6:32688062[N\t60\tPASS\tPRECISE;SVTYPE=BND;MATE_ID=severus_BND6747_2;STRANDS=--;MAPQ=60;CLUSTERID=severus_2\tGT:VAF:hVAF:DR:DV\t0/1:0.29:0.29,0,0:12:5";
        let variant: VcfVariant = vcf.parse()?;
        let bnd_a = variant.bnd_desc()?;

        let vcf = "chr6\t32688062\tseverus_BND6747_2\tN\t[chr7:27304522[N\t60\tPASS\tPRECISE;SVTYPE=BND;MATE_ID=severus_BND6747_1;STRANDS=--;MAPQ=60;CLUSTERID=severus_2 GT:VAF:hVAF:DR:DV\t0/1:0.29:0.29,0,0:12:5";
        let variant: VcfVariant = vcf.parse()?;
        let bnd_b = variant.bnd_desc()?;
        assert_eq!(bnd_a, bnd_b.rc());

        println!("{bnd_a}\n{bnd_b}");

        // Savana here each mate are in RC 
        let vcf = "chr10\t102039096\tID_35957_2\tG\t]chr10:101973386]G\t.\tPASS\tSVTYPE=BND;MATEID=ID_35957_1;TUMOUR_READ_SUPPORT=7;TUMOUR_ALN_SUPPORT=7;NORMAL_READ_SUPPORT=0;NORMAL_ALN_SUPPORT=0;SVLEN=65710;BP_NOTATION=+-;SOURCE=SUPPLEMENTARY;CLUSTERED_READS_TUMOUR=7;CLUSTERED_READS_NORMAL=0;ORIGIN_STARTS_STD_DEV=0.35;ORIGIN_MAPQ_MEAN=60;ORIGIN_EVENT_SIZE_STD_DEV=7.248;ORIGIN_EVENT_SIZE_MEDIAN=65710;ORIGIN_EVENT_SIZE_MEAN=65705.4;END_STARTS_STD_DEV=7.007;END_MAPQ_MEAN=60;END_EVENT_SIZE_STD_DEV=7.248;END_EVENT_SIZE_MEDIAN=65710;END_EVENT_SIZE_MEAN=65705.4;TUMOUR_DP_BEFORE=38,29;TUMOUR_DP_AT=44,21;TUMOUR_DP_AFTER=44,21;NORMAL_DP_BEFORE=13,15;NORMAL_DP_AT=13,15;NORMAL_DP_AFTER=13,15;TUMOUR_AF=0.159,0.333;NORMAL_AF=0,0;TUMOUR_TOTAL_HP_AT=20,16,8;NORMAL_TOTAL_HP_AT=6,7,0;TUMOUR_ALT_HP=0,1,6;TUMOUR_PS=101917152;NORMAL_ALT_HP=0,0,0;CLASS=PREDICTED_SOMATIC\tGT\t0/1";
        let variant: VcfVariant = vcf.parse()?;
        let bnd_a = variant.bnd_desc()?;

        let vcf = "chr10\t101973386\tID_35957_1\tA\tA[chr10:102039096[\t.\tPASS\tSVTYPE=BND;MATEID=ID_35957_2;TUMOUR_READ_SUPPORT=7;TUMOUR_ALN_SUPPORT=7;NORMAL_READ_SUPPORT=0;NORMAL_ALN_SUPPORT=0;SVLEN=65710;BP_NOTATION=+-;SOURCE=SUPPLEMENTARY;CLUSTERED_READS_TUMOUR=7;CLUSTERED_READS_NORMAL=0;ORIGIN_STARTS_STD_DEV=0.35;ORIGIN_MAPQ_MEAN=60;ORIGIN_EVENT_SIZE_STD_DEV=7.248;ORIGIN_EVENT_SIZE_MEDIAN=65710;ORIGIN_EVENT_SIZE_MEAN=65705.4;END_STARTS_STD_DEV=7.007;END_MAPQ_MEAN=60;END_EVENT_SIZE_STD_DEV=7.248;END_EVENT_SIZE_MEDIAN=65710;END_EVENT_SIZE_MEAN=65705.4;TUMOUR_DP_BEFORE=29,38;TUMOUR_DP_AT=21,44;TUMOUR_DP_AFTER=21,44;NORMAL_DP_BEFORE=15,13;NORMAL_DP_AT=15,13;NORMAL_DP_AFTER=15,13;TUMOUR_AF=0.333,0.159;NORMAL_AF=0,0;TUMOUR_TOTAL_HP_AT=17,0,4;NORMAL_TOTAL_HP_AT=5,7,3;TUMOUR_ALT_HP=0,6,1;TUMOUR_PS=101917152;NORMAL_ALT_HP=0,0,0;CLASS=PREDICTED_SOMATIC\tGT\t0/1";
        let variant: VcfVariant = vcf.parse()?;
        let bnd_b = variant.bnd_desc()?;
        assert_eq!(bnd_a, bnd_b);

        println!("{bnd_a}\n{bnd_b}");

        // Deletions
        // Severus
        let vcf = "chr7\t143674704\tseverus_DEL7318\tN\t<DEL>\t60\tPASS≥\tPRECISE;SVTYPE=DEL;SVLEN=3642;END=143678346;STRANDS=+-;MAPQ=60\tGT:GQ:VAF:hVAF:DR:DV\t0/1:114:0.39:0.39,0,0:14:9";
        let variant: VcfVariant = vcf.parse()?;

        println!("{:?}", variant.infos);
        println!("{:?}", variant.formats);
        let del = variant.deletion_desc().unwrap();
        println!("{:?}", del);
        println!("{:?} {:?}", del.len(), variant.formats.n_alt_depth());

        assert_eq!("chr7:143674704_143678346_del", variant.deletion_desc().unwrap().to_string());

        println!("--\n");
        let vcf="chr7\t144003249\tr_106\tC\t<DEL>\t.\tPASS\tEND=144142733;SVTYPE=DEL;SVLEN=-139484;SVINSLEN=4;SVINSSEQ=GCCA\tTR:VR\t12:10\t51:0";
        let variant: VcfVariant = vcf.parse()?;

        println!("{:?}", variant.infos);
        println!("{:?}", variant.formats);
        let del = variant.deletion_desc().unwrap();
        println!("{:?}", del);
        println!("{:?} {:?}", del.len(), variant.formats.n_alt_depth());

        let path = "/data/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_Genes.bed";
        let r = read_bed(path)?;
        
        let deleted_genes = bedrow_overlaps_par(&r, &vec![&GenomeRange { contig: variant.position.contig, range: del.start..del.end }]).into_iter().filter_map(|e| {
            e.name
        }).collect::<Vec<String>>().join(", ");
        println!("{deleted_genes}");


        Ok(())
    }

    #[test]
    fn variant_load_deepvariant() -> anyhow::Result<()> {   
        init();
        let id = "ADJAGBA";
        let time = "diag";
        let dv = DeepVariant::initialize(id, time, Config::default())?;
        let annotations = Annotations::default();
        let variant_collection = dv.variants(&annotations)?;
        println!("Deepvariant for {id} {time}: variants {} {}", variant_collection.variants.len(), variant_collection.vcf.n_variants);
        Ok(())
    }

    #[test]
    fn variant_load_clairs() -> anyhow::Result<()> {   
        init();
        let id = "ADJAGBA";
        let clairs = ClairS::initialize(id, Config::default())?;
        let annotations = Annotations::default();
        let variant_collection = clairs.variants(&annotations)?;
        println!("ClairS for {id}: variants {} {}", variant_collection.variants.len(), variant_collection.vcf.n_variants);
        Ok(())
    }

    #[test]
    fn variant_load_nanomonsv() -> anyhow::Result<()> {   
        init();
        let id = "ADJAGBA";
        let nanomonsv = NanomonSV::initialize(id, Config::default())?;
        let annotations = Annotations::default();
        let variant_collection = nanomonsv.variants(&annotations)?;
        println!("NanomonSV for {id}: variants {} {}", variant_collection.variants.len(), variant_collection.vcf.n_variants);
        println!("{:?}", variant_collection.variants.first());
        Ok(())
    }

    #[test]
    fn variant_load_clairs_germline() -> anyhow::Result<()> {   
        init();
        let id = "ADJAGBA";
        let clairs = ClairS::initialize(id, Config::default())?;
        let annotations = Annotations::default();
        let germline_variant_collection = clairs.germline(&annotations)?;
        println!("ClairS for {id}: variants {} {}", germline_variant_collection.variants.len(), germline_variant_collection.vcf.n_variants);
        Ok(())
    }

    #[test]
    fn pipe_somatic() -> anyhow::Result<()> {   
        init();
        let collections = Collections::new(
            CollectionsConfig::default()
        )?;
        for (a, _) in collections.bam_pairs().iter() {
            if  ["AUBERT", "BAFFREAU", "BAILLEUL"].contains(&a.id.as_str()) {
                continue;
            }
            if let Err(e) = SomaticPipe::initialize(&a.id, Config::default()).map(|mut p| if p.should_run() {
                if let Err(e) = p.run() {
                    error!("{e}");
                }
            }) {
                error!("{e}");
            }
        }
        Ok(())
        // let id = "VILI";
        // SomaticPipe::initialize(id, Config::default())?.run()
    }

    #[test]
    fn overlaps() {
        init();
        let positions = vec![
            &GenomePosition { contig: 1, position: 100 },
            &GenomePosition { contig: 1, position: 150 },
            &GenomePosition { contig: 1, position: 200 },
            &GenomePosition { contig: 2, position: 150 },
        ];

        let ranges = vec![
            &GenomeRange { contig: 1, range: 50..150 },
            &GenomeRange { contig: 2, range: 100..200 },
        ];

        let parallel_overlapping_indices = overlaps_par(&positions, &ranges);
        assert_eq!(parallel_overlapping_indices, vec![0, 3])
    }

    #[test]
    fn bed_read() -> anyhow::Result<()> {
        init();
        let path = &Config::default().mask_bed("ADJAGBA");
        let r = read_bed(path)?;
        println!("{}", r.len());
        Ok(())
    }

    #[test]
    fn test_read_dict() -> anyhow::Result<()> {
        init();
        let genome = read_dict(&Config::default().dict_file)?;
        let genome_length: usize = genome.into_iter().map(|(_, len)| len as usize).sum();
        println!("{genome_length}");
        Ok(())
    }

    #[test]
    fn bases_at() -> anyhow::Result<()> {
        init();
        let id = "ADJAGBA";
        let c = Config::default();
        let chr = "chr3";
        let position = 62416039; // 1-based
        let mut bam = rust_htslib::bam::IndexedReader::from_path(c.solo_bam(id, "diag"))?;
        let p = nt_pileup(&mut bam, chr, position - 1, false)?.iter().map(|e| String::from_utf8(vec![*e]).unwrap()).collect::<Vec<_>>();
        let mut counts = HashMap::new();

        for item in p.iter() {
            *counts.entry(item.as_str()).or_insert(0) += 1;
        }

        for (key, value) in &counts {
            println!("{}: {}", key, value);
        }

        assert_eq!(8, *counts.get("C").unwrap());
        assert_eq!(13, *counts.get("G").unwrap());
        assert_eq!(6, *counts.get("D").unwrap());

        let chr = "chr1";
        let position = 3220; // 1-based

        let mut bam = rust_htslib::bam::IndexedReader::from_path(c.solo_bam(id, "mrd"))?;
        let p = counts_at(&mut bam, chr, position - 1)?;

        println!("{p:#?}");
        Ok(())
    }

    #[test]
    fn seq_at() -> anyhow::Result<()> {
        init();
        let c = Config::default();
        let chr = "chr3";
        let position = 716766;

        let mut fasta_reader  = noodles_fasta::indexed_reader::Builder::default().build_from_path(c.reference)?;
        let r = io::fasta::sequence_at(&mut fasta_reader, chr, position, 10)?;
        println!("{r} ({} {:.2})", r.len(), estimate_shannon_entropy(r.as_str()));
        Ok(())
    }

    #[test]
    fn ins_at() -> anyhow::Result<()> {
        init();
        let id = "ADJAGBA";
        let c = Config::default();
        let chr = "chr1";
        let position = 52232; // 1-based like in vcf
        let mut bam = rust_htslib::bam::IndexedReader::from_path(c.solo_bam(id, "mrd"))?;
        // let p = ins_pileup(&mut bam, chr, position - 1, true)?.iter().map(|e| String::from_utf8(vec![*e]).unwrap()).collect::<Vec<_>>();
        let counts = counts_ins_at(&mut bam, chr, position -1)?;
        for (key, value) in &counts {
            println!("{}: {}", key, value);
        }
        
        Ok(())
    }

    #[test]
    fn vep_line() -> anyhow::Result<()> {
        init();

        let line = "chr2_1922358_-/T\tchr2:1922357-1922358\tT\tMYT1L\tNM_001303052.2\tTranscript\tintron_variant\t-\t-\t-\t-\t-\t-\tIMPACT=MODIFIER;STRAND=-1;SYMBOL=MYT1L;SOURCE=chm13v2.0_RefSeq_Liftoff_v5.1_sorted.gff3.gz;HGVSc=NM_001303052.2:c.1619-613dup";
        let vep_line: VepLine = line.parse()?;
        println!("{vep_line:#?}");

        let vep: VEP = VEP::try_from(&vep_line)?;
        println!("{vep:#?}");

        Ok(())
    }

    #[test]
    fn savana_cn() -> anyhow::Result<()> {
        init();
        let id = "CAMARA";
        // let s = SavanaCopyNumber::load_id(id, Config::default())?;
        let s = SavanaReadCounts::load_id(id, Config::default())?;
        println!("tumoral reads: {}", s.n_tumoral_reads());
        println!("normal reads: {}", s.n_normal_reads());
        println!("tumoral:\n{:#?}", s.norm_chr_counts());
        Ok(())
    }

    #[test]
    fn coverage() -> anyhow::Result<()> {
        init();
        let id = "CAMARA";
        let time = "diag";
        WGSBamStats::new(id, time, Config::default())?.print();
        Ok(())
    }

    #[test]
    fn load_bam() -> anyhow::Result<()> {
        init();
        let id = "ADJAGBA";
        let time = "diag";
        let bam_path = Config::default().solo_bam(id, time);
        WGSBam::new(Path::new(&bam_path).to_path_buf())?;
        Ok(())
    }


    #[test]
    fn tar() -> anyhow::Result<()> {
        init();
        scan_archive("/data/lto/20241030_CUNVI-DG-N20_SEBZA-DG-N21.tar")?;
        Ok(())
    }

    #[test]
    fn run_somatic() -> anyhow::Result<()> {
        init();
         let  collections = Collections::new(
            CollectionsConfig::default()
        )?;
        let bams = collections.bam.by_id_completed(15.0, 10.0);
        let n = bams.len();
        let mut config = Config::default();
        // config.somatic_scan_force = true;
        warn!("{n} cases");
        for (i, bam) in bams.iter().enumerate() {
            let id = &bam.id;
            warn!("{i}/{n} {id}");
            if id == "BANGA" {
                continue;
            }
            if id == "ARM" {
                continue;
            }


            match SomaticPipe::initialize(id, config.clone())?.run() {
                Ok(_) => (),
                Err(e) => error!("{id} {e}"),
            };
        }
        Ok(())
    }

    #[test]
    fn somatic_cases() -> anyhow::Result<()> {
        init();
        let id = "ACHITE";
        let config = Config { somatic_pipe_force: true, ..Default::default() };
        match SomaticPipe::initialize(id, config)?.run() {
            Ok(_) => (),
            Err(e) => error!("{id} {e}"),
        };
        Ok(())
    }
    
    #[test]
    fn load_variants() -> anyhow::Result<()> {
        init();
        let id = "ACHITE";
        let config = Config::default();
        let path = format!("{}/{id}/diag/{id}_somatic_variants.bit", config.result_dir);
        let variants = variant_collection::Variants::load_from_file(&path)?;
        println!("n variants {}", variants.data.len());

        let n_vep: usize = variants.data.iter().map(|v| v.vep().len()).sum();
        println!("VEP: {n_vep}");

        let translocations = variants.get_alteration_cat(AlterationCategory::TRL);
        println!("{} translocations", translocations.len());
        let threshold = 5;
        let res = group_variants_by_bnd_desc(&translocations, 5);
        
        let rres = group_variants_by_bnd_rc(&res, threshold);

        rres.iter().for_each(|group| {
            println!("{} {}", group.0.len(), group.1.len());
        });
       
        Ok(())
    }

    #[test]
    fn load_fc() -> anyhow::Result<()> {
        init();
        // FlowCells::load_archive_from_scan("/data/lto", "/data/archives.json")?;
        let r = FlowCells::load("/home/prom/mnt/store", 
            "/data/pandora_id_inputs.json", "/data/archives.json.gz")?;
        println!("{r:#?}");
        Ok(())
    }

    #[test]
    fn alt_cat() -> anyhow::Result<()> {
        let id = "ADJAGBA";
        let config = Config::default();
        let path = format!("{}/{id}/diag/somatic_variants.json.gz", config.result_dir);
        let variants = variant_collection::Variants::load_from_json(&path)?;
        println!("n variants {}", variants.data.len());
        variants.data.iter()
            .filter(|v| v.alteration_category().contains(&AlterationCategory::TRL))
            .for_each(|v| {
            println!("{:?} {}", 
                v.vcf_variants.iter().map(|v| v.bnd_desc()).collect::<Vec<_>>(), 
                v.annotations.iter().filter(|a| matches!(a, Annotation::Callers(..))).map(|a| a.to_string()).collect::<Vec<String>>().join(";")
            )
        });
        Ok(())
    }

    #[test]
    fn variants_stats() -> anyhow::Result<()> {
        init();
        let config = Config::default();

        let all_variants_bit = find_files(&format!("{}/*/diag/*_somatic_variants.bit", config.result_dir))?;
        for v in all_variants_bit.into_iter() {
            let id = v.file_name().unwrap().to_str().unwrap().split("_somatic").next().unwrap();
            println!("{id}");

            let config = Config::default();
            let path = format!("{}/{id}/diag/{id}_somatic_variants.bit", config.result_dir);
            match variant_collection::Variants::load_from_file(&path) {
                Ok(mut variants) => {
                    let (mut high_depth_ranges, _) =
                    somatic_depth_quality_ranges(&id, &config)?;
                    high_depth_ranges.par_sort_by_key(|r| (r.contig, r.range.start));

                    let res = VariantsStats::new(&mut variants, id, &config, &high_depth_ranges)?.save_to_json(&format!(
                        "{}/{id}/diag/{id}_somatic_variants_stats.json.gz", config.result_dir));
                    if res.is_err() {
                        info!("{:#?}", res);
                    }
                },
                Err(e) => error!("{e}"),
            }
        }
        Ok(())

    }

    #[test]
    fn constit_stats() {
        init();
        let id = "ADJAGBA";
        let config = Config::default();

        let _ = const_stats(id.to_string(), config);
    }

    #[test]
    fn test_bnd() -> anyhow::Result<()> {
        init();
        let id = "COIFFET";
        let config = Config::default();

        let annotations = Annotations::default();
        let s = Savana::initialize(id, config)?.variants(&annotations)?;
        s.variants.iter().for_each(|e| {
            if let Ok(bnd) = e.bnd_desc() {
                println!("{}\t{}\t{}", e.position , e.reference, e.alternative);
                println!("{:#?}", bnd);
            }
        });
        Ok(())
    }

    #[test]
    fn parse_savana_seg() {
        init();
        let r = SavanaCN::parse_file("ADJAGBA", &config::Config::default()).unwrap().segments;
        println!("{} lines", r.len());
        println!("{:#?}", r.first().unwrap());
    }

    #[test]
    fn whole_scan() -> anyhow::Result<()> {
        init();
        let id = "CHENU";
        let mut config = Config::default();
        let u = config.solo_bam(id, "mrd");
        println!("{u}");

        config.somatic_scan_force = true;
        somatic_scan(id, &config)?;
        Ok(())
    }

    #[test]
    fn parse_gff() -> anyhow::Result<()> {
        init();
        let id = "ADJAGBA";
        let config = Config::default();
        let path = format!("{}/{id}/diag/somatic_variants.bit", config.result_dir);
        let exon_ranges = features_ranges("exon", &config)?;
        let exon_ranges = merge_overlapping_genome_ranges(&exon_ranges);
        
        let variants = variant_collection::Variants::load_from_file(&path)?;
        let full = variants_stats::somatic_rates(&variants.data, &exon_ranges, &config);
        info!("{full:#?}");

        // let restrained: Vec<variant_collection::Variant> = variants.data.iter().filter(|v| v.vcf_variants.len() >= 2)
        //     .cloned().collect();
        // let min_2 = variants_stats::somatic_rates(&restrained, &exon_ranges, &config);
        // info!("{min_2:#?}");
        //
        // let restrained: Vec<variant_collection::Variant> = restrained.iter().filter(|v| v.vcf_variants.len() >= 3)
        //     .cloned().collect();
        // let min_3 = variants_stats::somatic_rates(&restrained, &exon_ranges, &config);
        // info!("{min_3:#?}");

        // let mut high_depth_ranges = variants_stats::high_depth_somatic(id, &config)?;
        // high_depth_ranges.par_sort_by_key(|r| ( r.contig, r.range.start ));
        //
        // let exon_ranges_ref: Vec<&GenomeRange> = exon_ranges.iter().collect();
        // let exons_high_depth = range_intersection_par(&high_depth_ranges.iter().collect::<Vec<&GenomeRange>>(), &exon_ranges_ref);
        //
        // let full = variants_stats::somatic_rates(&variants.data, &exons_high_depth, &config);
        // info!("{full:#?}");
        //


        // info!("n variants loaded: {}", variants.data.len());
        // 
        // let r = features_ranges("exon", &config::Config::default())?;
        // info!("n exon: {}", r.len());
        //
        // let merged = merge_overlapping_genome_ranges(&r);
        // info!("n merged exon: {}", merged.len());
        //
        // let ol = par_overlaps(&variants.data, &r);
        // info!("variants in exon {}", ol.len());
        //
        // let n_coding = ol.iter().filter_map(|i| variants.data[*i].best_vep().ok() ).filter_map(|bv| bv.impact()).filter(|impact| *impact <= VepImpact::MODERATE).count();
        // info!("coding variants {n_coding}");
        // 
        // let n_bases_m = merged.par_iter().map(|gr| gr.length()).sum::<u32>();
        // info!("{n_bases_m}nt");
        // 
        // let mega_base_m = n_bases_m as f64 / 10.0e6;
        //
        // let wgs_len = read_dict(&config.dict_file)?.iter().map(|(_, l)| *l).sum::<u32>();
        // info!("wgs len {wgs_len}");
        // let rate_wgs = variants.data.len() as f64 / (wgs_len as f64 / 10.0e6);
        // info!("somatic mutation rate {rate_wgs:.2}/mb");
        //
        // let n_exons_mb = ol.len() as f64 / mega_base_m;
        // info!("somatic mutation rate in the coding region {n_exons_mb:.2}/mb");
        // 
        // let n_exons_mb = n_coding as f64 / mega_base_m;
        // info!("somatic non synonymous mutation rate in the coding region {n_exons_mb:.2}/mb");

        Ok(())
    }

    fn gr(contig: u8, start: u32, end: u32) -> GenomeRange {
        GenomeRange {
            contig,
            range: start..end,
        }
    }

    #[test]
    fn test_both_empty() {
        let a: Vec<&GenomeRange> = vec![];
        let b: Vec<&GenomeRange> = vec![];
        let result = range_intersection_par(&a, &b);
        assert!(result.is_empty());
    }

    #[test]
    fn test_one_empty() {
        let a = [gr(1, 0, 100)];
        let b: Vec<&GenomeRange> = vec![];
        let a_refs: Vec<&GenomeRange> = a.iter().collect();
        let mut result = range_intersection_par(&a_refs, &b);
        sort_ranges(&mut result);
        assert!(result.is_empty());
    }

    #[test]
    fn test_single_range_no_overlap() {
        let a = [gr(1, 0, 100)];
        let b = [gr(1, 100, 200)];
        let a_refs: Vec<&GenomeRange> = a.iter().collect();
        let b_refs: Vec<&GenomeRange> = b.iter().collect();
        let mut result = range_intersection_par(&a_refs, &b_refs);
        sort_ranges(&mut result);
        assert!(result.is_empty());
    }

    #[test]
    fn test_single_range_full_overlap() {
        let a = [gr(1, 0, 100)];
        let b = [gr(1, 0, 100)];
        let a_refs: Vec<&GenomeRange> = a.iter().collect();
        let b_refs: Vec<&GenomeRange> = b.iter().collect();
        let mut result = range_intersection_par(&a_refs, &b_refs);
        let mut expected = [gr(1, 0, 100)];
        sort_ranges(&mut result);
        sort_ranges(&mut expected);
        assert_eq!(result, expected);
    }

    #[test]
    fn test_different_contigs() {
        let a = [gr(1, 0, 100)];
        let b = [gr(2, 0, 100)];
        let a_refs: Vec<&GenomeRange> = a.iter().collect();
        let b_refs: Vec<&GenomeRange> = b.iter().collect();
        let mut result = range_intersection_par(&a_refs, &b_refs);
        sort_ranges(&mut result);
        assert!(result.is_empty());
    }

    #[test]
    fn test_touching_ranges() {
        let a = [gr(1, 0, 100)];
        let b = [gr(1, 100, 200)];
        let a_refs: Vec<&GenomeRange> = a.iter().collect();
        let b_refs: Vec<&GenomeRange> = b.iter().collect();
        let mut result = range_intersection_par(&a_refs, &b_refs);
        sort_ranges(&mut result);
        assert!(result.is_empty());
    }

    #[test]
    fn test_complete_subrange() {
        let a = [gr(1, 0, 200)];
        let b = [gr(1, 50, 150)];
        let a_refs: Vec<&GenomeRange> = a.iter().collect();
        let b_refs: Vec<&GenomeRange> = b.iter().collect();
        let mut result = range_intersection_par(&a_refs, &b_refs);
        let mut expected = [gr(1, 50, 150)];
        sort_ranges(&mut result);
        sort_ranges(&mut expected);
        assert_eq!(result, expected);
    }

    #[test]
    fn test_multiple_overlaps_same_contig() {
        let a = [gr(1, 0, 50), gr(1, 75, 125)];
        let b = [gr(1, 25, 100), gr(1, 150, 200)];
        let a_refs: Vec<&GenomeRange> = a.iter().collect();
        let b_refs: Vec<&GenomeRange> = b.iter().collect();
        let mut result = range_intersection_par(&a_refs, &b_refs);
        let mut expected = [gr(1, 25, 50), gr(1, 75, 100)];
        sort_ranges(&mut result);
        sort_ranges(&mut expected);
        assert_eq!(result, expected);
    }

    #[test]
    fn test_multiple_contigs() {
        let a = [gr(1, 0, 100), gr(2, 50, 150), gr(3, 200, 300)];
        let b = [gr(1, 50, 150), gr(2, 0, 100), gr(4, 0, 100)];
        let a_refs: Vec<&GenomeRange> = a.iter().collect();
        let b_refs: Vec<&GenomeRange> = b.iter().collect();
        let mut result = range_intersection_par(&a_refs, &b_refs);
        let mut expected = [gr(1, 50, 100), gr(2, 50, 100)];
        sort_ranges(&mut result);
        sort_ranges(&mut expected);
        assert_eq!(result, expected);
    }

    #[test]
    fn test_adjacent_ranges() {
        let a = [gr(1, 0, 50), gr(1, 50, 100)];
        let b = [gr(1, 25, 75)];
        let a_refs: Vec<&GenomeRange> = a.iter().collect();
        let b_refs: Vec<&GenomeRange> = b.iter().collect();
        let mut result = range_intersection_par(&a_refs, &b_refs);
        let mut expected = [gr(1, 25, 50), gr(1, 50, 75)];
        sort_ranges(&mut result);
        sort_ranges(&mut expected);
        assert_eq!(result, expected);
    }

    #[test]
    fn test_minimal_overlap() {
        let a = [gr(1, 0, 100)];
        let b = [gr(1, 99, 200)];
        let a_refs: Vec<&GenomeRange> = a.iter().collect();
        let b_refs: Vec<&GenomeRange> = b.iter().collect();
        let mut result = range_intersection_par(&a_refs, &b_refs);
        let mut expected = [gr(1, 99, 100)];
        sort_ranges(&mut result);
        sort_ranges(&mut expected);
        assert_eq!(result, expected);
    }

    #[test]
    fn todo_scan() -> anyhow::Result<()> {
        init();
        let mut collections = Collections::new(
            CollectionsConfig::default()
        )?;
        collections.todo_bam_count(&Config::default())?;
        collections.tasks.iter().for_each(|t| info!("{t}"));

        let pool = rayon::ThreadPoolBuilder::new()
            .num_threads(100)
            .build()
            .unwrap();

        pool.install(move || {
            collections.tasks.into_iter().for_each(|t| {
                // info!("{t}");
                if let Err(e) = t.run() {
                    error!("{e}");
                }
            });
        });

        Ok(())
    }

    /// helper to build a forward‑strand BND (same contig) where
    ///   A = pos,  B = pos + 5
    fn fwd(contig: &str, pos: u32) -> BNDDesc {
        BNDDesc {
            a_contig: contig.into(),
            a_position: pos,
            a_sens: true,
            b_contig: contig.into(),
            b_position: pos + 5,
            b_sens: true,
            added_nt: String::new(),
        }
    }

    /// Build a six‑node *forward* chain relying **only** on `auto_connect()`
    /// (no manual edges) and assert the Hamiltonian path spans all nodes.
    #[test]
    fn hamiltonian_chain_auto() {
        // positions 10,15   20,25   …  60,65  satisfy B(u) ≤ A(v)
        let bnds: Vec<BNDDesc> = (1..=6).map(|i| fwd("chr1", i * 10)).collect();
        let g: BNDGraph<()> = bnds.to_bnd_graph(); // trait uses auto_connect()
        // ensure auto_connect produced 5 edges in a line
        assert_eq!(g.inner().edge_count(), 5);
        let path = g.hamiltonian_path().expect("chain should be Hamiltonian");
        assert_eq!(path.len(), 6);
    }

    /// Two disconnected "V" shapes -> auto_connect creates 2 edges in each
    /// component, no Hamiltonian path, components sorted by size.
    #[test]
    fn components_after_auto_connect() {
        // comp1: a->b<-c on reverse strand of chrX
        let a = BNDDesc { 
            a_contig: "chrX".into(), a_position: 300, a_sens: false,
            b_contig: "chrX".into(), b_position: 250, b_sens: false, 
            added_nt: String::new() 
        };
        let b = BNDDesc { 
            a_contig: "chrX".into(), a_position: 200, a_sens: false, 
            b_contig: "chrX".into(), b_position: 150, b_sens: false, 
            added_nt: String::new() 
        };
        let c = BNDDesc { 
            a_contig: "chrX".into(), a_position: 100, a_sens: false, 
            b_contig: "chrX".into(), b_position:  50, b_sens: false,
            added_nt: String::new() 
        };
        // comp2: three‑node forward chain on chrY
        let d = fwd("chrY", 10);
        let e = fwd("chrY", 20);
        let f = fwd("chrY", 30);
        let g: BNDGraph<()> = vec![a,b,c,d,e,f].to_bnd_graph();
        assert!(g.hamiltonian_path().is_none());
        let comps = g.components_by_size();
        comps.iter().for_each(|a| println!("{}", g.fmt_path(a) ));
        assert_eq!(comps.len(), 2);
        assert_eq!(comps[0].len(), 3);
        assert_eq!(comps[1].len(), 3);
    }

    #[test]
    fn bnd_connections() {
        // comp1: a->b<-c on reverse strand of chrX
        let a = BNDDesc { 
            a_contig: "chrX".into(), a_position: 300, a_sens: true,
            b_contig: "chr14".into(), b_position: 250, b_sens: false, 
            added_nt: String::new() 
        };
        let b = BNDDesc { 
            a_contig: "chr14".into(), a_position: 200, a_sens: false, 
            b_contig: "chrX".into(), b_position: 301, b_sens: true, 
            added_nt: String::new() 
        };
        let c = BNDDesc { 
            a_contig: "chrX".into(), a_position: 302, a_sens: true, 
            b_contig: "chrZ".into(), b_position:  50, b_sens: false,
            added_nt: String::new() 
        };
        // comp2: three‑node forward chain on chrY
        let d = fwd("chrY", 10);
        let e = fwd("chrY", 20);
        let f = fwd("chrY", 30);
        let g: BNDGraph<()> = vec![a,b,c,d,e,f].to_bnd_graph();
        assert!(g.hamiltonian_path().is_none());
        let comps = g.components_by_size();
        comps.iter().for_each(|a| println!("{}", g.fmt_path(a) ));
        assert_eq!(comps.len(), 2);
        assert_eq!(comps[0].len(), 3);
        assert_eq!(comps[1].len(), 3);
    }
}