use std::sync::{Arc, Mutex}; pub mod commands; pub mod config; pub mod modkit; pub mod callers; pub mod runners; pub mod collection; pub mod functions; pub mod helpers; pub mod variant; pub mod io; // pub mod vcf_reader; #[macro_use] extern crate lazy_static; // Define DOCKER_ID lock for handling Docker kill when ctrlc is pressed lazy_static! { static ref DOCKER_ID: Arc>> = Arc::new(Mutex::new(Vec::new())); } #[cfg(test)] mod tests { use std::{fs, path::Path}; use callers::{nanomonsv::nanomonsv_create_pon, savana::Savana, severus::{Severus, SeverusSolo}}; use collection::{Initialize, InitializeSolo, Version}; use commands::{longphase::{LongphaseConfig, LongphaseHap, LongphaseModcallSolo, LongphasePhase}, modkit::{bed_methyl, ModkitConfig}}; use functions::assembler::{Assembler, AssemblerConfig}; use log::info; // use pandora_lib_assembler::assembler::AssembleConfig; use rayon::prelude::*; use runners::Run; use variant::variant::{Variant, Variants}; use self::{callers::deep_variant::DeepVariantConfig, collection::pod5::{FlowCellCase, Pod5Collection}, commands::dorado, config::Config}; use super::*; use crate::{callers::{clairs::{ClairS, ClairSConfig}, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVConfig, NanomonSVSolo}}, collection::{bam::{self, BamType}, run_tasks, variants::VariantsCollection, vcf::VcfCollection, Collections, CollectionsConfig}, commands::{bcftools::{bcftools_keep_pass, BcftoolsConfig}, dorado::Dorado}}; // export RUST_LOG="debug" fn init() { let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")) .is_test(true) .try_init(); } #[test] fn it_works() { let bam_path = "/data/longreads_basic_pipe/PARACHINI/diag/PARACHINI_diag_hs1.bam"; modkit::modkit(bam_path); } #[test] fn run_dorado() -> anyhow::Result<()> { let case = FlowCellCase { id: "CONSIGNY".to_string(), time_point: "mrd".to_string(), barcode: "07".to_string(), pod_dir: "/data/run_data/20240326-CL/CONSIGNY-MRD-NB07_RICCO-DIAG-NB08/20240326_1355_1E_PAU78333_bc25da25/pod5_pass/barcode07".into() }; dorado::Dorado::init(case, Config::default())?.run_pipe() } #[test] fn pod5() -> anyhow::Result<()> { let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")) .build(); let _ = Pod5Collection::new( "/data/run_data", "/data/flow_cells.tsv", "/data/longreads_basic_pipe", )?; // let runs = Runs::import_dir("/home/prom/store/banana-pool/run_data", "/data/flow_cells.tsv")?; Ok(()) } #[test_log::test] fn bam() -> anyhow::Result<()> { let bam_collection = bam::load_bam_collection("/data/longreads_basic_pipe"); bam_collection .bams .iter() .filter(|b| matches!(b.bam_type, BamType::Panel(_))) .for_each(|b| println!("{b:#?}")); let u = bam_collection.get("PARACHINI", "mrd"); println!("{u:#?}"); Ok(()) } #[test_log::test] fn vcf() -> anyhow::Result<()> { let mut vcf_collection = VcfCollection::new("/data/longreads_basic_pipe"); vcf_collection.sort_by_id(); vcf_collection .vcfs .iter() .for_each(|v| v.println().unwrap()); Ok(()) } // pod5 view -I /data/run_data/20240903-CL/ARMEM-DG-N02_ASSJU-DG-N03/20240903_1428_1B_PAW47629_fc24c3cf/pod5/PAW47629_fc24c3cf_77b07847_0.pod5 | head -5000 | awk '{if(NR==1){print "target,"$0}else{print "subset_1.pod5,"$0}}' > /tmp/subset_ids.csv // pod5 subset /data/run_data/20240903-CL/ARMEM-DG-N02_ASSJU-DG-N03/20240903_1428_1B_PAW47629_fc24c3cf/pod5/PAW47629_fc24c3cf_77b07847_0.pod5 --csv /tmp/subset_ids.csv -o /data/test_suite/pod5/muxed/ #[test_log::test] fn mux() -> anyhow::Result<()> { let result_dir = "/data/test_suite/results".to_string(); let cases = vec![ FlowCellCase { id: "test_02".to_string(), time_point: "diag".to_string(), barcode: "02".to_string(), pod_dir: "/data/test_suite/pod5/muxed".into() }, FlowCellCase { id: "test_03".to_string(), time_point: "diag".to_string(), barcode: "03".to_string(), pod_dir: "/data/test_suite/pod5/muxed".into() }, ]; cases.iter().for_each(|c| { let dir = format!("{result_dir}/{}", c.id); if Path::new(&dir).exists() { fs::remove_dir_all(dir).unwrap(); } }); let config = Config { result_dir, ..Default::default() }; Dorado::from_mux(cases, config) } #[test_log::test] fn clairs() -> anyhow::Result<()> { let config = ClairSConfig { result_dir: "/data/test".to_string(), ..ClairSConfig::default() }; ClairS::new("test_a", "/data/test_data/subset.bam", "/data/test_data/subset_mrd.bam", config).run() } #[test_log::test] fn nanomonsv() -> anyhow::Result<()> { // let config = NanomonSVConfig { // result_dir: "/data/test".to_string(), // ..NanomonSVConfig::default() // }; // NanomonSV::new("test_a", "/data/test_data/subset.bam", "/data/test_data/subset_mrd.bam", config).run() let bam = |id:&str, time_point: &str| format!("/data/longreads_basic_pipe/{id}/{time_point}/{id}_{time_point}_hs1.bam"); let id = "HAMROUNE"; NanomonSV::new(id, &bam(id, "diag"), &bam(id, "mrd"), NanomonSVConfig::default()).run() } #[test] fn nanomddonsv_solo() -> anyhow::Result<()> { init(); let id = "BRETON"; let time_point = "diag"; NanomonSVSolo::new(id, &format!("/data/longreads_basic_pipe/{id}/{time_point}/{id}_{time_point}_hs1.bam"), time_point, NanomonSVConfig::default()).run()?; // let time_point = "mrd"; // for id in ["MERY", "CAMARA", "FRANIATTE", "FERATI", "IQBAL", "COLLE", "JOLIVET", "BAFFREAU", "MANCUS", "BELARBI", "BENGUIRAT", "HENAUX", "MEDDAH"] { // // NanomonSVSolo::new(id, &format!("/data/longreads_basic_pipe/{id}/{time_point}/{id}_{time_point}_hs1.bam"), time_point, NanomonSVConfig::default()).run()?; // } Ok(()) } // cargo test run -- --nocapture; ~/run_scripts/notify_finish.sh & #[test] fn todo_all() -> anyhow::Result<()> { init(); // let config = CollectionsConfig::default(); let config = CollectionsConfig { pod_dir: "/data/store".to_string(), ..Default::default() }; info!("Runing todo with config: {:#?}", config); let mut collections = Collections::new(config)?; collections.todo()?; collections.tasks.iter().for_each(|t| println!("{t}")); println!("{}", collections.tasks.len()); Ok(()) } #[test] fn todo_agg() -> anyhow::Result<()> { init(); let config = CollectionsConfig::default(); info!("Runing todo with config: {:#?}", config); let collections = Collections::new(config)?; let agg_tasks = collections.todo_variants_agg()?; println!("{:#?}", agg_tasks); println!("{}", agg_tasks.len()); Ok(()) } #[test] fn run_agg() -> anyhow::Result<()> { init(); let config = CollectionsConfig { id_black_list: vec!["MANCUSO".to_string(),"HAMROUNE".to_string()], ..Default::default() }; info!("Runing todo with config: {:#?}", config); let mut collections = Collections::new(config)?; collections.tasks = collections.todo_variants_agg()?; collections.run()?; Ok(()) } // export RUST_LOG="debug" #[test_log::test] fn run_t() -> anyhow::Result<()> { // let config = CollectionsConfig::default(); let config = CollectionsConfig { pod_dir: "/data/store".to_string(), ..Default::default() }; run_tasks(config) } #[test_log::test] fn somatic() -> anyhow::Result<()> { let variants_collection = VariantsCollection::new("/data/longreads_basic_pipe")?; variants_collection.data.iter().for_each(|v| println!("{}\t{}", v.id, v.path.display())); Ok(()) } // #[test_log::test] // fn bcftools_pass() { // let config = BcftoolsConfig::default(); // let id = "RICCO"; // let time = "diag"; // let caller = "DeepVariant"; // // Config::default(); // // // let (i, o) = // // let i = format!("/data/longreads_basic_pipe/{id}/{time}/nanomonsv/{id}_diag.nanomonsv.result.vcf"); // // let o = format!("/data/longreads_basic_pipe/{id}/{time}/nanomonsv/{id}_diag_nanomonsv_PASSED.vcf.gz"); // bcftools_keep_pass(&i, &o, config).unwrap(); // } #[test_log::test] fn bam_ok() -> anyhow::Result<()> { let collections = Collections::new( CollectionsConfig::default() )?; let mut res: Vec<_> = collections.bam.by_id_completed(15.0, 10.0).iter().map(|b| { (b.id.to_string(), b.time_point.to_string(), b.path.to_str().unwrap().to_string()) }).collect(); res.sort_by_key(|b| b.1.clone()); res.sort_by_key(|b| b.0.clone()); res.iter().for_each(|(id, tp, path)| println!("{id}\t{tp}\t{path}")); Ok(()) } #[test_log::test] fn todo_assembler() -> anyhow::Result<()> { let collections = Collections::new( CollectionsConfig::default() )?; collections.todo_assembler()?; Ok(()) } #[test] fn sv_pon() -> anyhow::Result<()> { init(); nanomonsv_create_pon(NanomonSVConfig::default(), "/data/ref/hs1/nanomonsv_pon.vcf.gz") } #[test] fn todo_mod() -> anyhow::Result<()> { init(); let collections = Collections::new( CollectionsConfig::default() )?; collections.todo_mod_pileup(); Ok(()) } #[test] fn todo_deepv() -> anyhow::Result<()> { init(); let collections = Collections::new( CollectionsConfig::default() )?; let tasks = collections.todo_deepvariants(); tasks.iter().for_each(|t| info!("{t}")); info!("n tasks {}", tasks.len()); Ok(()) } #[test] fn todo_clairs() -> anyhow::Result<()> { init(); let collections = Collections::new( CollectionsConfig::default() )?; collections.todo_clairs().iter().for_each(|t| info!("{t}")); Ok(()) } #[test] fn run_assemblers() -> anyhow::Result<()> { Assembler::new("CAMEL".to_string(), "diag".to_string(), AssemblerConfig::default()).run() } // #[test] // fn run_dmr_par() -> anyhow::Result<()> { // init(); // let collections = Collections::new( // CollectionsConfig::default() // )?; // let tasks = collections.todo_dmr_c_diag_mrd(); // tasks.iter().for_each(|t| info!("{t}")); // let len = tasks.len(); // // let pool = ThreadPoolBuilder::new().num_threads(10).build().unwrap(); // // pool.install(|| { // // tasks.par_iter().enumerate().for_each(|(i, t)| { // // let config = ModkitConfig {threads: 2, ..Default::default() }; // // if let collection::CollectionsTasks::DMRCDiagMrd { id, .. } = t { let _ = dmr_c_mrd_diag(id, &config); } // // println!("⚡ {i}/{len}"); // // }); // // }); // Ok(()) // } #[test] fn run_mod_par() -> anyhow::Result<()> { init(); let collections = Collections::new( CollectionsConfig::default() )?; let tasks = collections.todo_mod_pileup(); let len = tasks.len(); tasks.par_iter().enumerate().for_each(|(i, t)| { let config = ModkitConfig {threads: 2, ..Default::default() }; if let collection::CollectionsTasks::ModPileup { bam, .. } = t { let _ = bed_methyl(bam.to_owned(), &config); } println!("⚡ {i}/{len}"); }); Ok(()) } #[test] fn run_severus() -> anyhow::Result<()> { init(); Severus::initialize("CAMEL", Config::default())?.run() } #[test] fn run_severus_solo() -> anyhow::Result<()> { init(); SeverusSolo::initialize("CAMEL","diag", Config::default())?.run() } #[test] fn run_savana() -> anyhow::Result<()> { init(); Savana::initialize("LEVASSEUR", Config::default())?.run() } #[test] fn check_versions() -> anyhow::Result<()> { init(); let config = Config::default(); let v = Savana::version(&config)?; info!("Savanna version {v}"); let v = Severus::version(&config)?; info!("Severus version {v}"); Ok(()) } #[test] fn run_multi_deepvariant() -> anyhow::Result<()> { init(); let mut collections = Collections::new( CollectionsConfig::default() )?; collections.run_deepvariant() } #[test] fn run_clairs() -> anyhow::Result<()> { init(); let id = "HAMROUNE"; let diag_bam = format!("/data/longreads_basic_pipe/{id}/diag/{id}_diag_hs1.bam"); let mrd_bam = format!("/data/longreads_basic_pipe/{id}/mrd/{id}_mrd_hs1.bam"); ClairS::new(id, &diag_bam, &mrd_bam, ClairSConfig::default()).run() } #[test] fn run_deepvariant() -> anyhow::Result<()> { init(); DeepVariant::initialize("HAMROUNE", "diag", Config::default())?.run() } #[test] fn run_longphase() -> anyhow::Result<()> { init(); let id = "BECERRA"; let diag_bam = format!("/data/longreads_basic_pipe/{id}/diag/{id}_diag_hs1.bam"); let vcf = format!("/data/longreads_basic_pipe/{id}/diag/ClairS/clair3_normal_tumoral_germline_output.vcf.gz"); let mrd_bam = format!("/data/longreads_basic_pipe/{id}/mrd/{id}_mrd_hs1.bam"); LongphaseHap::new(id, &diag_bam, &vcf, LongphaseConfig::default()).run()?; LongphaseHap::new(id, &mrd_bam, &vcf, LongphaseConfig::default()).run() } #[test] fn run_longphase_modcall() -> anyhow::Result<()> { init(); let id = "ADJAGBA"; let time = "diag"; LongphaseModcallSolo::initialize(id, time, Config::default())?.run() } #[test] fn run_longphase_phase() -> anyhow::Result<()> { init(); let id = "ADJAGBA"; LongphasePhase::initialize(id, Config::default())?.run() } #[test] fn variant_parse() -> anyhow::Result<()> { let row = "chr1\t1366\t.\tC\tCCCT\t8.2\tPASS\t.\tGT:GQ:DP:AD:VAF:PL\t1/1:4:6:1,4:0.666667:6,4,0"; let variant: Variant = row.parse()?; let var_string = variant.into_vcf_row(); assert_eq!(row, &var_string); let row = "chr1\t1366\t.\tC\tCCCT\t8.2\tPASS\t."; let variant: Variant = row.parse()?; let var_string = variant.into_vcf_row(); assert_eq!(row, &var_string); Ok(()) } #[test] fn variant_load_deepvariant() -> anyhow::Result<()> { init(); let id = "ADJAGBA"; let time = "diag"; let mut dv = DeepVariant::initialize(id, time, Config::default())?; dv.run()?; let variants = dv.variants()?; println!("Deepvariant for {id} {time}: variants {}", variants.len()); Ok(()) } }