lib.rs 69 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634
  1. //! # 🧬 Long-read Somatic Variant Calling and Analysis Framework
  2. //!
  3. //! This Rust library provides a modular, parallelizable framework for somatic variant calling, annotation, and interpretation from long-read sequencing data. It is designed to support full pipelines for research and clinical workflows across multiple variant callers and analysis stages.
  4. //!
  5. //! The library also serves as an extensible platform that developers can leverage to add custom features, integrate new tools, and tailor workflows to specific use cases.
  6. //!
  7. //! ## 🧩 Key Features
  8. //!
  9. //! - **POD5 Demultiplexing and Alignment**: End-to-end support for processing ONT POD5 files:
  10. //! - Barcode-aware demultiplexing using metadata CSVs
  11. //! - POD5 subsetting and organization by case
  12. //! - Integration with basecallers (e.g., Dorado) for read alignment
  13. //! - **Pipeline Management**: Full orchestration of Dockerized execution pipelines for tools such as ClairS, Nanomonsv, DeepVariant, Savana, Modkit, and Severus.
  14. //! - **Flexible Configuration**: Centralized configuration system (`Config`, `CollectionsConfig`) for all modules and pipelines.
  15. //! - **Input Abstraction**: Unified handling of BAM, POD5, and VCF file collections across cohorts and directories.
  16. //! - **Variant Processing**: Modular loading, filtering, statistical analysis, and annotation of somatic and germline variants.
  17. //! - **Haplotype Phasing and Methylation**: Support for LongPhase-based phasing and Modkit methylation pileups with support for multi-threaded pileup and aggregation.
  18. //! - **Parallel Execution**: Uses `rayon` for efficient multicore parallelization over large cohorts and tasks.
  19. //!
  20. //! ## 📚 Module Highlights
  21. //!
  22. //! - `callers`: Interfaces to variant calling tools (ClairS, DeepVariant, Nanomonsv, Savana, etc...)
  23. //! - `runners`: Pipeline runners (e.g. `Somatic`, `SeverusSolo`, `LongphasePhase`) that manage end-to-end execution.
  24. //! - `collection`: Organizes input data across BAMs, VCFs, and POD5 files with auto-detection of completed runs.
  25. //! - `annotation`: VEP line parsing and high-level annotation aggregation.
  26. //! - `pipes`: Composition modules for executing pipelines across callers and post-processing steps.
  27. //! - `functions`: Custom logic for genome assembly, entropy estimation, and internal tooling.
  28. //! - `positions`, `variant`, `helpers`: Utilities for SV modeling, variant filtering, position overlap logic, and helper methods.
  29. //!
  30. //! ## ⚡ Workflow Overview
  31. //!
  32. //! ### 1. 📦 From POD5 to BAM Alignment
  33. //!
  34. //! - **Demultiplexing**: POD5 files are subset and demuxed using barcodes (via CSV metadata).
  35. //! - **Flowcell Case Management**: Each sample is identified by a [`collection::pod5::FlowCellCase`] containing its ID, time point, and POD5 directory.
  36. //! - **Alignment**: The [`commands::dorado::Dorado`] module handles alignment of POD5 reads to reference genome, producing BAMs.
  37. //!
  38. //! ```rust
  39. //! let case = FlowCellCase { id: "PATIENT1", time_point: "diag", barcode: "01", pod_dir: "...".into() };
  40. //! Dorado::init(case, Config::default())?.run_pipe()?;
  41. //! ```
  42. //!
  43. //! ### 2. 🧬 Variant Calling (BAM ➝ VCF)
  44. //!
  45. //! Using the aligned BAMs, multiple variant callers can be run in parallel. The [`callers`] and [`runners`] modules support:
  46. //!
  47. //! - **ClairS** – somatic small variant calling with LongPhase haplotagging
  48. //! - **Nanomonsv** – structural variants (SV)
  49. //! - **DeepVariant** – germline small variants
  50. //! - **Savana** – SVs and copy number variations (CNV)
  51. //! - **Modkit** – methylation pileups
  52. //! - **LongPhase** – phasing and modcalling
  53. //!
  54. //! All workflows can be triggered per-case or per-cohort using `Collections` or `Somatic` runners.
  55. //!
  56. //! ```rust
  57. //! ClairS::initialize("PATIENT1", Config::default())?.run()?;
  58. //! NanomonSV::initialize("PATIENT1", Config::default())?.run()?;
  59. //! ```
  60. //!
  61. //! ### 3. 📈 Aggregation & Statistics (VCF ➝ JSON / Stats)
  62. //!
  63. //! After variant calling:
  64. //!
  65. //! - Annotate with VEP ([`annotation`] module)
  66. //! - Load and filter with [`variant::variant_collection`]
  67. //! - Compute variant and region-level stats (e.g., mutation rates, alteration categories, coding overlaps)
  68. //!
  69. //! ```rust
  70. //! let variants = Variants::load_from_json("/path/to/somatic_variants.json.gz")?;
  71. //! let stats = VariantsStats::new(&variants, "PATIENT1", &config)?;
  72. //! stats.save_to_json("/output/path/stats.json.gz")?;
  73. //! ```
  74. //!
  75. //! ### 4. 🧠 Intelligent Task Management (`collection` module)
  76. //!
  77. //! - Auto-discovers available samples, POD5s, BAMs, and VCFs
  78. //! - Detects missing outputs and creates task lists
  79. //! - Tasks are parallelizable using Rayon and can be run on-demand
  80. //!
  81. //! ```rust
  82. //! let mut collections = Collections::new(CollectionsConfig::default())?;
  83. //! collections.todo()?; // Identify missing steps
  84. //! collections.run()?; // Run them automatically
  85. //! ```
  86. //!
  87. //! ## 🔬 Testing
  88. //!
  89. //! Integration tests demonstrate the entire pipeline. Run with logging enabled:
  90. //!
  91. //! ```bash
  92. //! export RUST_LOG=debug
  93. //! cargo test -- --nocapture
  94. //! ```
  95. //!
  96. //! ## 🧪 Example Use Cases
  97. //!
  98. //! - Full somatic variant calling pipeline on matched tumor/normal samples
  99. //! - POD5-based pipeline from raw signal to variants
  100. //! - Aggregation and annotation of SVs across a clinical cohort
  101. //! - Methylation analysis using nanopore-specific tools
  102. //! - Variant calling and analysis in large-scale longitudinal studies
  103. //!
  104. //! ## 🚀 Getting Started
  105. //!
  106. //! All workflows are initialized from `Config` and driven by the `Collections` structure:
  107. //!
  108. //! ```rust
  109. //! let collections = Collections::new(CollectionsConfig::default())?;
  110. //! collections.todo()?;
  111. //! collections.run()?;
  112. //! ```
  113. //!
  114. //! ## 🔗 References
  115. //!
  116. //! **Basecalling and alignment**
  117. //! - Dorado: <https://github.com/nanoporetech/dorado>
  118. //!
  119. //! **Variants Callers**
  120. //! - ClairS: <https://github.com/HKU-BAL/ClairS>
  121. //! - Nanomonsv: <https://github.com/friend1ws/nanomonsv>
  122. //! - Savana: <https://github.com/cortes-ciriano-lab/savana>
  123. //! - DeepVariant: <https://github.com/google/deepvariant>
  124. //! - DeepSomatic: <https://github.com/google/deepsomatic>
  125. //! - LongPhase: <https://github.com/PorubskyResearch/LongPhase>
  126. //! - Modkit: <https://github.com/nanoporetech/modkit>
  127. //!
  128. //! **Variants annotation**
  129. //! - VEP: <https://www.ensembl.org/info/docs/tools/vep/index.html>
  130. //!
  131. //! ---
  132. use std::sync::{Arc, Mutex};
  133. pub mod commands;
  134. pub mod config;
  135. pub mod modkit;
  136. pub mod callers;
  137. pub mod runners;
  138. pub mod collection;
  139. pub mod functions;
  140. pub mod helpers;
  141. pub mod variant;
  142. pub mod io;
  143. pub mod pipes;
  144. pub mod positions;
  145. pub mod annotation;
  146. pub mod cases;
  147. pub mod scan;
  148. pub mod math;
  149. #[macro_use]
  150. extern crate lazy_static;
  151. // Define DOCKER_ID lock for handling Docker kill when ctrlc is pressed
  152. lazy_static! {
  153. static ref DOCKER_ID: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));
  154. }
  155. #[cfg(test)]
  156. mod tests {
  157. use std::{collections::HashMap, fs, path::Path};
  158. use annotation::{vep::{VepLine, VEP}, Annotations};
  159. use callers::{nanomonsv::nanomonsv_create_pon, savana::{Savana, SavanaReadCounts}, severus::{Severus, SeverusSolo}};
  160. use collection::{bam::{counts_at, counts_ins_at, nt_pileup, WGSBam, WGSBamStats}, Initialize, InitializeSolo, Version};
  161. use commands::{longphase::{LongphaseConfig, LongphaseHap, LongphaseModcallSolo, LongphasePhase}, modkit::{bed_methyl, ModkitConfig}};
  162. use functions::assembler::{Assembler, AssemblerConfig};
  163. use helpers::estimate_shannon_entropy;
  164. use io::bed::read_bed;
  165. use itertools::Itertools;
  166. use log::{debug, error, info, warn};
  167. use pandora_lib_variants::variants::VariantCategory;
  168. use pipes::somatic::SomaticPipe;
  169. use positions::{overlaps_par, GenomePosition, GenomeRange};
  170. use rayon::prelude::*;
  171. use runners::Run;
  172. use variant::{variant::{Variants, VcfVariant}, variant_collection};
  173. use self::{collection::pod5::{FlowCellCase, Pod5Collection}, commands::dorado, config::Config};
  174. use super::*;
  175. use crate::{annotation::Annotation, callers::{clairs::ClairS, deep_somatic::DeepSomatic, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVSolo}, savana::SavanaCN}, collection::{bam::{self, nt_pileup_new}, flowcells::{scan_archive, FlowCells}, run_tasks, vcf::VcfCollection, Collections, CollectionsConfig, ShouldRun}, commands::dorado::Dorado, helpers::find_files, io::{bed::bedrow_overlaps_par, dict::read_dict, gff::features_ranges}, pipes::somatic::const_stats, positions::{merge_overlapping_genome_ranges, range_intersection_par, sort_ranges}, scan::scan::somatic_scan, variant::{variant::{AlterationCategory, BNDDesc, BNDGraph, GroupByThreshold, ToBNDGraph}, variant_collection::{group_variants_by_bnd_desc, group_variants_by_bnd_rc, Variant, VariantCollection}, variants_stats::{self, somatic_depth_quality_ranges, VariantsStats}}};
  176. // export RUST_LOG="debug"
  177. fn init() {
  178. let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
  179. .is_test(true)
  180. .try_init();
  181. }
  182. #[test]
  183. fn it_works() {
  184. let bam_path = "/data/longreads_basic_pipe/PARACHINI/diag/PARACHINI_diag_hs1.bam";
  185. modkit::modkit(bam_path);
  186. }
  187. #[test]
  188. fn run_dorado() -> anyhow::Result<()> {
  189. let case = FlowCellCase {
  190. id: "CONSIGNY".to_string(),
  191. time_point: "mrd".to_string(), barcode: "07".to_string(), pod_dir: "/data/run_data/20240326-CL/CONSIGNY-MRD-NB07_RICCO-DIAG-NB08/20240326_1355_1E_PAU78333_bc25da25/pod5_pass/barcode07".into()
  192. };
  193. dorado::Dorado::init(case, Config::default())?.run_pipe()
  194. }
  195. #[test]
  196. fn pod5() -> anyhow::Result<()> {
  197. let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
  198. .build();
  199. let coll = Pod5Collection::new(
  200. "/data/run_data",
  201. "/data/flow_cells.tsv",
  202. "/data/longreads_basic_pipe",
  203. )?;
  204. println!("{coll:#?}");
  205. // let runs = Runs::import_dir("/home/prom/store/banana-pool/run_data", "/data/flow_cells.tsv")?;
  206. Ok(())
  207. }
  208. #[test]
  209. fn bam() -> anyhow::Result<()> {
  210. init();
  211. let bam_collection = bam::load_bam_collection("/data/longreads_basic_pipe");
  212. bam_collection
  213. .bams
  214. .iter()
  215. // .filter(|b| matches!(b.bam_type, BamType::Panel(_)))
  216. .for_each(|b| println!("{b:#?}"));
  217. let u = bam_collection.get("PARACHINI", "mrd");
  218. println!("{u:#?}");
  219. Ok(())
  220. }
  221. #[test]
  222. fn vcf() -> anyhow::Result<()> {
  223. init();
  224. let mut vcf_collection = VcfCollection::new("/data/longreads_basic_pipe");
  225. vcf_collection.sort_by_id();
  226. vcf_collection
  227. .vcfs
  228. .iter()
  229. .for_each(|v| v.println().unwrap());
  230. Ok(())
  231. }
  232. // pod5 view -I /data/run_data/20240903-CL/ARMEM-DG-N02_ASSJU-DG-N03/20240903_1428_1B_PAW47629_fc24c3cf/pod5/PAW47629_fc24c3cf_77b07847_0.pod5 | head -5000 | awk '{if(NR==1){print "target,"$0}else{print "subset_1.pod5,"$0}}' > /tmp/subset_ids.csv
  233. // pod5 subset /data/run_data/20240903-CL/ARMEM-DG-N02_ASSJU-DG-N03/20240903_1428_1B_PAW47629_fc24c3cf/pod5/PAW47629_fc24c3cf_77b07847_0.pod5 --csv /tmp/subset_ids.csv -o /data/test_suite/pod5/muxed/
  234. #[test]
  235. fn mux() -> anyhow::Result<()> {
  236. init();
  237. let result_dir = "/data/test_suite/results".to_string();
  238. let cases = vec![
  239. FlowCellCase { id: "test_02".to_string(), time_point: "diag".to_string(), barcode: "02".to_string(), pod_dir: "/data/test_suite/pod5/muxed".into() },
  240. FlowCellCase { id: "test_03".to_string(), time_point: "diag".to_string(), barcode: "03".to_string(), pod_dir: "/data/test_suite/pod5/muxed".into() },
  241. ];
  242. cases.iter().for_each(|c| {
  243. let dir = format!("{result_dir}/{}", c.id);
  244. if Path::new(&dir).exists() {
  245. fs::remove_dir_all(dir).unwrap();
  246. }
  247. });
  248. let config = Config { result_dir, ..Default::default() };
  249. Dorado::from_mux(cases, config)
  250. }
  251. // #[test_log::test]
  252. // fn clairs() -> anyhow::Result<()> {
  253. // let config = ClairSConfig {
  254. // result_dir: "/data/test".to_string(),
  255. // ..ClairSConfig::default()
  256. // };
  257. // ClairS::new("test_a", "/data/test_data/subset.bam", "/data/test_data/subset_mrd.bam", config).run()
  258. // }
  259. #[test]
  260. fn nanomonsv() -> anyhow::Result<()> {
  261. init();
  262. let id = "HAMROUNE";
  263. NanomonSV::initialize(id, Config::default())?.run()
  264. }
  265. #[test]
  266. fn nanomonsv_solo() -> anyhow::Result<()> {
  267. init();
  268. NanomonSVSolo::initialize("GRAND", "diag", Config::default())?.run()
  269. }
  270. // cargo test run -- --nocapture; ~/run_scripts/notify_finish.sh &
  271. #[test]
  272. fn todo_all() -> anyhow::Result<()> {
  273. init();
  274. // let config = CollectionsConfig::default();
  275. let config = CollectionsConfig { pod_dir: "/data/run_data".to_string(), ..Default::default() };
  276. info!("Runing todo with config: {:#?}", config);
  277. let mut collections = Collections::new(config)?;
  278. collections.todo()?;
  279. collections.tasks.iter().for_each(|t| println!("{t}"));
  280. println!("{}", collections.tasks.len());
  281. Ok(())
  282. }
  283. // #[test]
  284. // fn todo_agg() -> anyhow::Result<()> {
  285. // init();
  286. // let config = CollectionsConfig::default();
  287. // info!("Runing todo with config: {:#?}", config);
  288. // let collections = Collections::new(config)?;
  289. // let agg_tasks = collections.todo_variants_agg()?;
  290. // println!("{:#?}", agg_tasks);
  291. // println!("{}", agg_tasks.len());
  292. // Ok(())
  293. // }
  294. // #[test]
  295. // fn run_agg() -> anyhow::Result<()> {
  296. // init();
  297. // let config = CollectionsConfig {
  298. // id_black_list: vec!["MANCUSO".to_string(),"HAMROUNE".to_string()],
  299. // ..Default::default()
  300. // };
  301. // info!("Runing todo with config: {:#?}", config);
  302. // let mut collections = Collections::new(config)?;
  303. // collections.tasks = collections.todo_variants_agg()?;
  304. // collections.run()?;
  305. //
  306. // Ok(())
  307. // }
  308. // export RUST_LOG="debug"
  309. #[test]
  310. fn run_t() -> anyhow::Result<()> {
  311. init();
  312. // let config = CollectionsConfig::default();
  313. let config = CollectionsConfig { pod_dir: "/data/run_data".to_string(), ..Default::default() };
  314. run_tasks(config)
  315. }
  316. // #[test_log::test]
  317. // fn bcftools_pass() {
  318. // let config = BcftoolsConfig::default();
  319. // let id = "RICCO";
  320. // let time = "diag";
  321. // let caller = "DeepVariant";
  322. //
  323. // Config::default();
  324. //
  325. // // let (i, o) =
  326. // // let i = format!("/data/longreads_basic_pipe/{id}/{time}/nanomonsv/{id}_diag.nanomonsv.result.vcf");
  327. // // let o = format!("/data/longreads_basic_pipe/{id}/{time}/nanomonsv/{id}_diag_nanomonsv_PASSED.vcf.gz");
  328. // bcftools_keep_pass(&i, &o, config).unwrap();
  329. // }
  330. #[test]
  331. fn bam_ok() -> anyhow::Result<()> {
  332. init();
  333. let collections = Collections::new(
  334. CollectionsConfig::default()
  335. )?;
  336. let mut res: Vec<_> = collections.bam.by_id_completed(15.0, 10.0).iter().map(|b| {
  337. (b.id.to_string(), b.time_point.to_string(), b.path.to_str().unwrap().to_string())
  338. }).collect();
  339. res.sort_by_key(|b| b.1.clone());
  340. res.sort_by_key(|b| b.0.clone());
  341. res.iter().for_each(|(id, tp, path)| println!("{id}\t{tp}\t{path}"));
  342. Ok(())
  343. }
  344. #[test]
  345. fn todo_assembler() -> anyhow::Result<()> {
  346. init();
  347. let collections = Collections::new(
  348. CollectionsConfig::default()
  349. )?;
  350. collections.todo_assembler()?;
  351. Ok(())
  352. }
  353. #[test]
  354. fn sv_pon() -> anyhow::Result<()> {
  355. init();
  356. nanomonsv_create_pon(&Config::default(), "/data/ref/hs1/nanomonsv_pon.vcf.gz")
  357. }
  358. #[test]
  359. fn todo_mod() -> anyhow::Result<()> {
  360. init();
  361. let collections = Collections::new(
  362. CollectionsConfig::default()
  363. )?;
  364. collections.todo_mod_pileup();
  365. Ok(())
  366. }
  367. #[test]
  368. fn todo_deepv() -> anyhow::Result<()> {
  369. init();
  370. let collections = Collections::new(
  371. CollectionsConfig::default()
  372. )?;
  373. let tasks = collections.todo_deepvariants();
  374. tasks.iter().for_each(|t| info!("{t}"));
  375. info!("n tasks {}", tasks.len());
  376. Ok(())
  377. }
  378. #[test]
  379. fn todo_clairs() -> anyhow::Result<()> {
  380. init();
  381. let collections = Collections::new(
  382. CollectionsConfig::default()
  383. )?;
  384. collections.todo_clairs().iter().for_each(|t| info!("{t}"));
  385. Ok(())
  386. }
  387. #[test]
  388. fn run_assemblers() -> anyhow::Result<()> {
  389. Assembler::new("CAMEL".to_string(), "diag".to_string(), AssemblerConfig::default()).run()
  390. }
  391. // #[test]
  392. // fn run_dmr_par() -> anyhow::Result<()> {
  393. // init();
  394. // let collections = Collections::new(
  395. // CollectionsConfig::default()
  396. // )?;
  397. // let tasks = collections.todo_dmr_c_diag_mrd();
  398. // tasks.iter().for_each(|t| info!("{t}"));
  399. // let len = tasks.len();
  400. // // let pool = ThreadPoolBuilder::new().num_threads(10).build().unwrap();
  401. // // pool.install(|| {
  402. // // tasks.par_iter().enumerate().for_each(|(i, t)| {
  403. // // let config = ModkitConfig {threads: 2, ..Default::default() };
  404. // // if let collection::CollectionsTasks::DMRCDiagMrd { id, .. } = t { let _ = dmr_c_mrd_diag(id, &config); }
  405. // // println!("⚡ {i}/{len}");
  406. // // });
  407. // // });
  408. // Ok(())
  409. // }
  410. #[test]
  411. fn run_mod_par() -> anyhow::Result<()> {
  412. init();
  413. let collections = Collections::new(
  414. CollectionsConfig::default()
  415. )?;
  416. let tasks = collections.todo_mod_pileup();
  417. let len = tasks.len();
  418. tasks.par_iter().enumerate().for_each(|(i, t)| {
  419. let config = ModkitConfig {threads: 2, ..Default::default() };
  420. if let collection::CollectionsTasks::ModPileup { bam, .. } = t { let _ = bed_methyl(bam.to_owned(), &config); }
  421. println!("⚡ {i}/{len}");
  422. });
  423. Ok(())
  424. }
  425. #[test]
  426. fn run_severus() -> anyhow::Result<()> {
  427. init();
  428. Severus::initialize("BANGA", Config::default())?.run()
  429. }
  430. // #[test]
  431. // fn run_severus_solo() -> anyhow::Result<()> {
  432. // init();
  433. // SeverusSolo::initialize("CAMEL","diag", Config::default())?.run()
  434. // }
  435. #[test]
  436. fn run_savana() -> anyhow::Result<()> {
  437. init();
  438. let collections = Collections::new(
  439. CollectionsConfig::default()
  440. )?;
  441. for bam in collections.bam.by_id_completed(15.0, 10.0).iter() {
  442. let id = &bam.id;
  443. match ClairS::initialize(id, Config::default())?.run() {
  444. Ok(_) => match Savana::initialize(id, Config::default())?.run() {
  445. Ok(_) => (),
  446. Err(e) => error!("{e}"),
  447. },
  448. Err(e) => error!("{e}"),
  449. }
  450. ;
  451. }
  452. Ok(())
  453. }
  454. #[test]
  455. fn check_versions() -> anyhow::Result<()> {
  456. init();
  457. let config = Config::default();
  458. let v = Savana::version(&config)?;
  459. info!("Savanna version {v}");
  460. let v = Severus::version(&config)?;
  461. info!("Severus version {v}");
  462. Ok(())
  463. }
  464. #[test]
  465. fn run_multi_deepvariant() -> anyhow::Result<()> {
  466. init();
  467. let mut collections = Collections::new(
  468. CollectionsConfig::default()
  469. )?;
  470. collections.run_deepvariant()
  471. }
  472. #[test]
  473. fn run_deepvariant() -> anyhow::Result<()> {
  474. init();
  475. DeepVariant::initialize("HAMROUNE", "diag", Config::default())?.run()
  476. }
  477. #[test]
  478. fn run_clairs() -> anyhow::Result<()> {
  479. init();
  480. ClairS::initialize("ADJAGBA", Config::default())?.run()
  481. }
  482. #[test]
  483. fn run_longphase() -> anyhow::Result<()> {
  484. init();
  485. let id = "BECERRA";
  486. let diag_bam = format!("/data/longreads_basic_pipe/{id}/diag/{id}_diag_hs1.bam");
  487. let vcf = format!("/data/longreads_basic_pipe/{id}/diag/ClairS/clair3_normal_tumoral_germline_output.vcf.gz");
  488. let mrd_bam = format!("/data/longreads_basic_pipe/{id}/mrd/{id}_mrd_hs1.bam");
  489. LongphaseHap::new(id, &diag_bam, &vcf, LongphaseConfig::default()).run()?;
  490. LongphaseHap::new(id, &mrd_bam, &vcf, LongphaseConfig::default()).run()
  491. }
  492. #[test]
  493. fn run_longphase_modcall() -> anyhow::Result<()> {
  494. init();
  495. let id = "ADJAGBA";
  496. let time = "diag";
  497. LongphaseModcallSolo::initialize(id, time, Config::default())?.run()
  498. }
  499. #[test]
  500. fn run_longphase_phase() -> anyhow::Result<()> {
  501. init();
  502. let id = "ADJAGBA";
  503. LongphasePhase::initialize(id, Config::default())?.run()
  504. }
  505. #[test]
  506. fn snv_parse() -> anyhow::Result<()> {
  507. init();
  508. // ClairS
  509. let row = "chr1\t10407\t.\tA\tG\t10.1\tPASS\tF\tGT:GQ:DP:AD:AF\t0/1:10:31:10,20:0.6452";
  510. let variant: VcfVariant = row.parse()?;
  511. let var_string = variant.into_vcf_row();
  512. assert_eq!(row, &var_string);
  513. let mut variant_col = VariantCollection {
  514. variants: vec![variant],
  515. vcf: collection::vcf::Vcf::new("/data/longreads_basic_pipe/ACHITE/diag/ClairS/ACHITE_diag_clairs_PASSED.vcf.gz".into())?,
  516. caller: Annotation::Callers(annotation::Caller::ClairS, annotation::Sample::Somatic)
  517. };
  518. let annotations = Annotations::default();
  519. variant_col.annotate_with_constit_bam(&annotations, "/data/longreads_basic_pipe/ACHITE/mrd/ACHITE_mrd_hs1.bam", 1)?;
  520. // DeepVariant
  521. let row = "chr1\t10407\t.\tA\tG\t7.4\tPASS\t.\tGT:GQ:DP:AD:VAF:PL\t1/1:5:9:1,8:0.888889:5,5,0";
  522. variant_col.variants.push(row.parse()?);
  523. let anns: Vec<Vec<Annotation>> = variant_col.variants.iter()
  524. .filter_map(|e| {
  525. annotations.store.get(&e.hash()).map(|v| v.value().to_owned())
  526. })
  527. .collect();
  528. assert_eq!(anns[0], anns[1]);
  529. Ok(())
  530. }
  531. #[test]
  532. fn deletion_parse() -> anyhow::Result<()> {
  533. init();
  534. // Clairs
  535. let row = "chr1\t16760\t.\tAaag\tA\t8.48\tPASS\tF\tGT:GQ:DP:AD:AF\t0/1:8:39:22,17:0.4359";
  536. let variant: VcfVariant = row.parse()?;
  537. let var_string = variant.into_vcf_row();
  538. // case are not keeped
  539. assert_eq!(&var_string, "chr1\t16760\t.\tAAAG\tA\t8.48\tPASS\tF\tGT:GQ:DP:AD:AF\t0/1:8:39:22,17:0.4359");
  540. assert_eq!(AlterationCategory::DEL, variant.alteration_category());
  541. let mut variant_col = VariantCollection {
  542. variants: vec![variant],
  543. vcf: collection::vcf::Vcf::new("/data/longreads_basic_pipe/ACHITE/diag/ClairS/ACHITE_diag_clairs_PASSED.vcf.gz".into())?,
  544. caller: Annotation::Callers(annotation::Caller::ClairS, annotation::Sample::Somatic)
  545. };
  546. let annotations = Annotations::default();
  547. // DeepVariant, VAF is not an f32 at it should be, sometimes too long removed the last number for eq
  548. let row = "chr12\t326911\t.\tGTGTA\tG\t4.7\tPASS\t.\tGT:GQ:DP:AD:VAF:PL\t0/1:5:8:5,3:0.37500:2,0,21";
  549. let variant: VcfVariant = row.parse()?;
  550. let var_string = variant.into_vcf_row();
  551. assert_eq!(row, &var_string);
  552. variant_col.variants.push(row.parse()?);
  553. assert_eq!(AlterationCategory::DEL, variant.alteration_category());
  554. // Severus, 0000 added to VAF and hVAF
  555. let row = "chr10\t108974982\tseverus_DEL885\tN\t<DEL>\t60\tPASS\tPRECISE;SVTYPE=DEL;SVLEN=474;END=108975456;STRANDS=+-;INSIDE_VNTR=TRUE;MAPQ=60\tGT:GQ:VAF:hVAF:DR:DV\t0/1:61:0.30000:0.30000,0.00000,0.00000:7:3";
  556. let variant: VcfVariant = row.parse()?;
  557. let var_string = variant.into_vcf_row();
  558. assert_eq!(row, &var_string);
  559. assert_eq!(AlterationCategory::DEL, variant.alteration_category());
  560. variant_col.variants.push(row.parse()?);
  561. // Nanomonsv dont parse last format remove: \t22:0 and nt putted in uppercase
  562. let row = "chr14\t5247080\td_106\tG\t<DEL>\t.\tPASS\tEND=5247255;SVTYPE=DEL;SVLEN=-175;SVINSLEN=39;SVINSSEQ=ATAACCCAGGTGATATAACACTTCTTTAGGCTCTGCCTA\tTR:VR\t18:3";
  563. let variant: VcfVariant = row.parse()?;
  564. let var_string = variant.into_vcf_row();
  565. assert_eq!(row, &var_string);
  566. assert_eq!(AlterationCategory::DEL, variant.alteration_category());
  567. variant_col.variants.push(row.parse()?);
  568. let row = "chr1\t20654667\t.\tTG\tT\t3.5\tPASS\t.\tGT:GQ:DP:AD:VAF:PL\t0/1:3:23:20,3:0.13044:0,0,16";
  569. let variant: VcfVariant = row.parse()?;
  570. let var_string = variant.into_vcf_row();
  571. assert_eq!(row, &var_string);
  572. assert_eq!(AlterationCategory::DEL, variant.alteration_category());
  573. variant_col.variants.push(row.parse()?);
  574. let row = "chr1\t2160094\t.\tTCTGACAGCCTGGAACAGCACCCACAACCGCAGGTGAGCATCTGACAGCCCGCAGCAGCACCCACACGCACAGGTGAGAATCTGACAGCCCGGAGCAGCACCCACACAGGCAGGGGAGCATCTGACATCCTGGAGCAGCACCGACAACCCCAGGTGAGCAACTGAGAGCCTGGAACAGCACCCACACCCCCAGGTGAGAATCTGACAGCCTGGAAGAGCACCCCACATCCCCGGGTGAGCATCTGACAGCCTGGAACAGCATCAACACCCCCAGGTGAGCATCCGATAGCCTGGAGCAGCACCCACACCCTCAGGTGAGCATCTGACAGCCTGGAACAGCAACCACACCCCCAGGTGAACATCTGACAGCCCGGAGCAGCACCCACACCCCCAGGTGAGCATCTGACAGCCTGGAACAGCACCCACACCCCCAGGAGAGCATCCGGTAGCCTGGAGCAGAACCCACACCCACAGGCGAGCATCTGACAGCCTGGGTTGGCACCCACACCCCCAGGTGAGCATCTGATGGTCTGGAGCAGCACCCACACCTACAGGAGAGCATCTGACAACCTGCAACAGAACCCAAACCCCCAGGTGAGCATCTGACAGACTGGAACAGCACCCTGCACCCCCAGGTGAGTATCTGACGGCCTGGAACAGAACACACAAGCCCAGGTGAGCATCCGACAGCCTGGAGCAGCACCCACACCCCCAGGTGAGCATCAGACAGCCTGGAGTAGCACCCCACACCCCCAGGTGAGCATCCGACAGCCTGGAGCAGCACCCACACTCACCAGGTGAGCATGTGACATGCTGGAACAGCACCCACACCCCCAGGCGAGCATCTGACAGCCTGGAGCGGCACCCCACACCCCCAGGTGAGCATCGGACAGCCTGGAGCAGTACCCACACCCCCAGGTGAGCATCCGACAGCCTGGAACAGCACCCACACCCCCAGGTGAGCATCTGACAGACAGGAACAGCACCCACATGTCCAGGTGAGCCTCTGACAGACTGGAACAGCACGCGCACCCCCAGGTGAGCATCTGACAGGCTGGAACAGCACCCACACCCCCAGGAGAGCATCTTACAGCATGTAACAGCACCCACACACCCACGTGAGCATCTGACAGCCTGGAACAGCACCCTGCACCCCCAGGTGCGCACGTGACAGCCTGGAACAGCACCCACACCCCCAGGCGAGCATCGGACAGCCTGGAGCAGCACCCCACACCCGCAGGTGAGCATCCGACAGCCTGGAGCAGCACCCACACCCCCAGGTGAGCATGTGACAGCCTGGAACAGCACCCACACCCCCAGGCGAGCATCTGACAGCCTGGAGCAACACCCCACACCCCCAGGTGAGCATCGAACTGTCTGGAGCAGCACCCACAACCACAGGTGGGCATCGGAGAGAGTGGAGCAGCGCCCAGACCACCAGGCAAGCATCTGACAGCCTGGAGCAGTGCCCACACCCCCATGTGAGCATCTGACAGTATGGAGCAGCACCCACAGCCCAAGGTGAGCATCTGACAACCTGGAGCAGCACCCACACCCCCAGGCGAGCATCTGAACGCACAGAGCAGCACCCACACCCCCAGGCGAGCATCCGACAGCGTGGAGCAGCACCCACACTCCCAGGTGCGCGTGTGATGGTCTGGGGCAGCACCCACACACACAGGTGAGCCTGCGACAGCCTGGAGCAGCACCCACAGCCCCAGGTGAGCATCTGATGGTCTGGAGCAGCACCCACAACCACAGGTGAGCATCGGAGAGACTGGAGCAGCGCCGAAACCCCCAGGCGAACATTTGAGAGCCTGGAGCAGTGCCGACACCCCCAGGTGAGCATCTGACACCGTGGAGCAGCACCCACAGCCCAAGGTGAGCATCTGACAACCTGGAGCAGCACCCACAGCCCCAGGCGAGCATTTGAACGCACGGAGCAGGACCTACAGCCCCAGGCGAGCATCCGACAGCCTGGAGCAGCACCCACACACCCAGGTGAGCATCTGACAGCCTGGAGCAGCACCCACAACCCCAGGGGAGCATCTGACCGCATGGAATGGCATCCTCACCCGTAGGTGAACGTCCGACAGCCTGGAGCAGCACCCACACCCCCAGGTGAGCATCTGACAGCCTGGAACAGCACCTGCACCCCCGGGTGAGGATCAGATAGCCTGGAGCAGCACCCACACTCCAGGTGAGCATCTGACAGCCTGAAGCAGCACCCACACCAACAGGTGAGCATCTGACAGCCTGGAAGAGCACCCACAACCCCACGTGAGCATCTGACAGCCTGGAACAGGACCCTGCACCCCAAAGTGAGCATCTGACAACCTGGAGCAGGAACCACAACCCCAGGTGAGCATCTGATAGCCTGGAATAGCACCCACACACCCAGGTGAGCATCTGAGAGCCTGGAGCAGCACCCACACCCCCAGGTGAGCATCCGACAGCCTGGAACAGTACCCACACACCCAGGCGAGCATCTGACAGCCTGAAACAGCACACACACTCCCAGGTGAGCATCTCATATGCTGGAACAGCACCCACACCCCCAGGTGAGCATCTGACTGCCCGGAGCAGCACGCACACCCCCGGGTGAGCATCTGATAGCCTGGAACAGCACCCACACCCCCAGATGAGCATCCGACAGCCTGGAGCGGAGCCCACAGCCCCAGGCGAGCATCTGACAGCCTGGAACAGCACCCTGCATCCCCGGGTGAGGATCAGACAGCCTGGAGCAGCACCCACACTCCAGGTGAGCATCTGACAGCCTGAAGCAGCACCCACACCAACAGGTGAGCCTCTGACAGCCTGCAACAGCACCCACACCCCAAGGTGAGCATCTGACAGCCTGGAAGAGGACCCTGCAGCCCCAAGTGAGCATCTGACAACCTGAAGCAGCAACCACACTCCAAGGTGAGCATCCAACAGCCTGGAACAGCACCCACACACCCAGGTGAGCATCTGACAGCCTGGAGCAGCACCACACCCCCAGGTGAGCATCCGACAGCCTGGAACAGCACCTACAAACCCAGGAGAGCATCCGACAGCCTGGAGCAGCACCCACACCCCCAGGCGAGCATCTGACAGCCCGGAGCAGCACGCAAACCCCCAGGTGAGCATCTGATACCCTGGAACAGCACCCACACACCCAGGTGAGCATCCGACAGCCTAGAGCTGAACCCACACCAACAGGAGAGCATCTGACAGCCTGGGTCGGCACCCACACCCCCAGGTGAGCATCTGACAGCCTGGAACAGCACCCTGCACCCTCAGGTGCGCACGTGACAGCCTGGAACAGCACCCACACACCCAGGCGAGCATCTGACGGCCTGGAACGGCACCCACACCCCGAGGCGAGCATGGGACAGCCTGGAGAGCAGCCACACCCTCAGATGAGCATCTGACAGCCTGGAACAGCACCCTGCACCCCCAGGTGAGCATCTGACAGCCTGGAACAGGACCCACGTCCCCAGGCGAGCAA\tT\t3.1\tPASS\t.\tGT:GQ:DP:AD:VAF:PL\t0/1:3:15:11,4:0.26667:0,0,22";
  575. let variant: VcfVariant = row.parse()?;
  576. let var_string = variant.into_vcf_row();
  577. assert_eq!(row, &var_string);
  578. assert_eq!(AlterationCategory::DEL, variant.alteration_category());
  579. variant_col.variants.push(row.parse()?);
  580. let row = "chr1\t27073397\t.\tTGGGG\tT\t28.9\tPASS\t.\tGT:GQ:DP:AD:VAF:PL\t./.:2:13:1,3:0.23077:24,2,26";
  581. let variant: VcfVariant = row.parse()?;
  582. let var_string = variant.into_vcf_row();
  583. assert_eq!(row, &var_string);
  584. assert_eq!(AlterationCategory::DEL, variant.alteration_category());
  585. variant_col.variants.push(row.parse()?);
  586. let row = "chrY\t639648\t.\tCTTTTTT\tC\t10.4\tPASS\t.\tGT:GQ:DP:AD:VAF:PL\t0/1:1:7:0,2:0.28571:4,0,2";
  587. let variant: VcfVariant = row.parse()?;
  588. let var_string = variant.into_vcf_row();
  589. assert_eq!(row, &var_string);
  590. assert_eq!(AlterationCategory::DEL, variant.alteration_category());
  591. variant_col.variants.push(row.parse()?);
  592. let row = "chr11\t31260259\tseverus_BND1077_1\tN\t]chr11:36214171]N\t60\tPASS\tPRECISE;SVTYPE=BND;SVLEN=4953912;MATE_ID=severus_BND1077_2;STRANDS=+-;MAPQ=60\tGT:GQ:VAF:hVAF:DR:DV\t0/1:68:0.67000:0.67000,0.00000,0.00000:4:8";
  593. let variant: VcfVariant = row.parse()?;
  594. let var_string = variant.into_vcf_row();
  595. assert_eq!(row, &var_string);
  596. assert_eq!(AlterationCategory::DEL, variant.alteration_category());
  597. println!("{:#?}", variant.deletion_desc());
  598. variant_col.variants.push(row.parse()?);
  599. variant_col.annotate_with_constit_bam(
  600. &annotations,
  601. "/data/longreads_basic_pipe/ACHITE/mrd/ACHITE_mrd_hs1.bam",
  602. 1
  603. )?;
  604. Ok(())
  605. }
  606. #[test]
  607. fn insertion_parse() -> anyhow::Result<()> {
  608. init();
  609. // Clairs
  610. let row = "chr1\t23005\t.\tT\tTCAC\t3.1\tPASS\tF\tGT:GQ:DP:AD:A\t0/1:3:13:9,4:0.3077";
  611. let variant: VcfVariant = row.parse()?;
  612. let var_string = variant.into_vcf_row();
  613. assert_eq!(&var_string, row);
  614. assert_eq!(AlterationCategory::INS, variant.alteration_category());
  615. let mut variant_col = VariantCollection {
  616. variants: vec![variant],
  617. vcf: collection::vcf::Vcf::new("/data/longreads_basic_pipe/ACHITE/diag/ClairS/ACHITE_diag_clairs_PASSED.vcf.gz".into())?,
  618. caller: Annotation::Callers(annotation::Caller::ClairS, annotation::Sample::Somatic)
  619. };
  620. let annotations = Annotations::default();
  621. // DeepVariant, VAF is not an f32 at it should be, sometimes too long removed the last number for eq
  622. let row = "chrX\t152732993\t.\tG\tGTTTTTTTTT\t18.9\tPASS\t.\tGT:GQ:DP:AD:VAF:PL\t1/1:3:6:0,5:0.50000:15,7,3";
  623. let variant: VcfVariant = row.parse()?;
  624. let var_string = variant.into_vcf_row();
  625. assert_eq!(row, &var_string);
  626. // variant_col.variants.push(row.parse()?);
  627. assert_eq!(AlterationCategory::INS, variant.alteration_category());
  628. // Severus, 0000 added to VAF and hVAF
  629. let row = "chr11\t26669932\tseverus_INS9403\tN\tT\t60\tPASS\tPRECISE;SVTYPE=INS;SVLEN=460;INSIDE_VNTR=TRUE;MAPQ=60\tGT:GQ:VAF:hVAF:DR:DV\t0/1:71:0.27000:0.27000,0.00000,0.00000:8:3";
  630. let variant: VcfVariant = row.parse()?;
  631. let var_string = variant.into_vcf_row();
  632. assert_eq!(row, &var_string);
  633. assert_eq!(AlterationCategory::INS, variant.alteration_category());
  634. // variant_col.variants.push(row.parse()?);
  635. // Nanomonsv dont parse last format remove: \t22:0 and nt putted in uppercase
  636. let row = "chr8\t87940084\td_333\tT\t<INS>\t.\tPASS\tEND=87940201;SVTYPE=INS;SVINSLEN=172;SVINSSEQ=TA\tTR:VR\t9:5";
  637. let variant: VcfVariant = row.parse()?;
  638. let var_string = variant.into_vcf_row();
  639. assert_eq!(row, &var_string);
  640. assert_eq!(AlterationCategory::INS, variant.alteration_category());
  641. // variant_col.variants.push(row.parse()?);
  642. //
  643. let row = "chr5\t36122736\t.\tT\tTGCTCCG\t3.9\tPASS\t.\tGT:GQ:DP:AD:VAF:PL\t0/1:4:28:11,16:0.57143:1,0,37";
  644. let variant: VcfVariant = row.parse()?;
  645. let var_string = variant.into_vcf_row();
  646. assert_eq!(row, &var_string);
  647. assert_eq!(AlterationCategory::INS, variant.alteration_category());
  648. variant_col.variants = Vec::new();
  649. variant_col.variants.push(row.parse()?);
  650. variant_col.annotate_with_constit_bam(
  651. &annotations,
  652. "/data/longreads_basic_pipe/PASSARD/mrd/PASSARD_mrd_hs1.bam",
  653. 1
  654. )?;
  655. println!("{variant_col:?}");
  656. println!("{annotations:?}");
  657. Ok(())
  658. }
  659. #[test]
  660. fn trl_parse() -> anyhow::Result<()> {
  661. init();
  662. let row = "chr2\t207968575\tID_16420_1\ta\ta]chr11:41497080]\t.\tPASS\tSVTYPE=BND;MATEID=ID_16420_2;TUMOUR_READ_SUPPORT=4;TUMOUR_ALN_SUPPORT=4;NORMAL_READ_SUPPORT=0;NORMAL_ALN_SUPPORT=0;SVLEN=0;BP_NOTATION=++;SOURCE=SUPPLEMENTARY;CLUSTERED_READS_TUMOUR=4;CLUSTERED_READS_NORMAL=0;ORIGIN_STARTS_STD_DEV=0.433;ORIGIN_MAPQ_MEAN=56.25;ORIGIN_EVENT_SIZE_STD_DEV=0;ORIGIN_EVENT_SIZE_MEDIAN=0;ORIGIN_EVENT_SIZE_MEAN=0;END_STARTS_STD_DEV=17.754;END_MAPQ_MEAN=56.25;END_EVENT_SIZE_STD_DEV=0;END_EVENT_SIZE_MEDIAN=0;END_EVENT_SIZE_MEAN=0;TUMOUR_DP_BEFORE=8,11;TUMOUR_DP_AT=4,11;TUMOUR_DP_AFTER=4,11;NORMAL_DP_BEFORE=7,16;NORMAL_DP_AT=7,16;NORMAL_DP_AFTER=7,16;TUMOUR_AF=1,0.364;NORMAL_AF=0,0;TUMOUR_TOTAL_HP_AT=1,3,0;NORMAL_TOTAL_HP_AT=4,3,0;TUMOUR_ALT_HP=2,1,1;TUMOUR_PS=207946665;NORMAL_ALT_HP=0,0,0;CLASS=PREDICTED_SOMATIC\tGT\t0/1";
  663. let variant: VcfVariant = row.parse()?;
  664. let var_string = variant.into_vcf_row();
  665. let u = variant.n_alt_depth();
  666. println!("{u:?}");
  667. Ok(())
  668. }
  669. #[test]
  670. fn dup_parse() -> anyhow::Result<()> {
  671. init();
  672. let row = "chr9\t91352364\tID_14667_1\tC\t]chr9:98584394]C\t.\tPASS\tSVTYPE=BND;MATEID=ID_14667_2;TUMOUR_READ_SUPPORT=3;TUMOUR_ALN_SUPPORT=3;NORMAL_READ_SUPPORT=0;NORMAL_ALN_SUPPORT=0;SVLEN=7232030;BP_NOTATION=-+;SOURCE=SUPPLEMENTARY;CLUSTERED_READS_TUMOUR=3;CLUSTERED_READS_NORMAL=0;ORIGIN_STARTS_STD_DEV=0.471;ORIGIN_MAPQ_MEAN=60;ORIGIN_EVENT_SIZE_STD_DEV=8.524;ORIGIN_EVENT_SIZE_MEDIAN=7232030;ORIGIN_EVENT_SIZE_MEAN=7232020;END_STARTS_STD_DEV=8.731;END_MAPQ_MEAN=60;END_EVENT_SIZE_STD_DEV=8.524;END_EVENT_SIZE_MEDIAN=7232030;END_EVENT_SIZE_MEAN=7232020;TUMOUR_DP_BEFORE=16,38;TUMOUR_DP_AT=20,33;TUMOUR_DP_AFTER=20,33;NORMAL_DP_BEFORE=26,22;NORMAL_DP_AT=26,22;NORMAL_DP_AFTER=26,22;TUMOUR_AF=0.15,0.091;NORMAL_AF=0,0;TUMOUR_TOTAL_HP_AT=7,12,1;NORMAL_TOTAL_HP_AT=13,13,0;TUMOUR_ALT_HP=0,2,1;TUMOUR_PS=90796185;NORMAL_ALT_HP=0,0,0;CLASS=PREDICTED_SOMATIC\tGT\t0/1";
  673. let variant: VcfVariant = row.parse()?;
  674. assert_eq!(AlterationCategory::DUP, variant.alteration_category());
  675. let row = "chr9\t98584394\tID_14667_2\tC\tC[chr9:91352364[\t.\tPASS\tSVTYPE=BND;MATEID=ID_14667_1;TUMOUR_READ_SUPPORT=3;TUMOUR_ALN_SUPPORT=3;NORMAL_READ_SUPPORT=0;NORMAL_ALN_SUPPORT=0;SVLEN=7232030;BP_NOTATION=-+;SOURCE=SUPPLEMENTARY;CLUSTERED_READS_TUMOUR=3;CLUSTERED_READS_NORMAL=0;ORIGIN_STARTS_STD_DEV=0.471;ORIGIN_MAPQ_MEAN=60;ORIGIN_EVENT_SIZE_STD_DEV=8.524;ORIGIN_EVENT_SIZE_MEDIAN=7232030;ORIGIN_EVENT_SIZE_MEAN=7232020;END_STARTS_STD_DEV=8.731;END_MAPQ_MEAN=60;END_EVENT_SIZE_STD_DEV=8.524;END_EVENT_SIZE_MEDIAN=7232030;END_EVENT_SIZE_MEAN=7232020;TUMOUR_DP_BEFORE=38,16;TUMOUR_DP_AT=33,20;TUMOUR_DP_AFTER=33,20;NORMAL_DP_BEFORE=22,26;NORMAL_DP_AT=22,26;NORMAL_DP_AFTER=22,26;TUMOUR_AF=0.091,0.15;NORMAL_AF=0,0;TUMOUR_TOTAL_HP_AT=15,18,0;NORMAL_TOTAL_HP_AT=11,10,1;TUMOUR_ALT_HP=1,0,2;TUMOUR_PS=98176815;NORMAL_ALT_HP=0,0,0;CLASS=PREDICTED_SOMATIC\tGT\t0/1";
  676. let variant: VcfVariant = row.parse()?;
  677. assert_eq!(AlterationCategory::DUP, variant.alteration_category());
  678. let row = "chr1\t9218455\tr_0\tC\t<DUP>\t.\tPASS\tSVTYPE=DUP;SVLEN=12572;END=9231027\tTR:VR\t37:9";
  679. let variant: VcfVariant = row.parse()?;
  680. println!("{:#?}", variant.alteration_category());
  681. println!("{:#?}", variant.bnd_desc());
  682. let row = "chr1\t47110295\tseverus_BND178_1\tN\t]chr1:47192169]N\t60.00\tPASS\tPRECISE;SVTYPE=BND;SVLEN=81874;MATE_ID=severus_BND178_2;STRANDS=+-;MAPQ=60\tGT:GQ:VAF:hVAF:DR:DV\t0/1:66:0.38000:0.38000,0.00000,0.00000:8:5";
  683. let variant: VcfVariant = row.parse()?;
  684. println!("{:#?}", variant.alteration_category());
  685. println!("{:#?}", variant.bnd_desc());
  686. Ok(())
  687. }
  688. #[test]
  689. fn variant_parse() -> anyhow::Result<()> {
  690. let row = "chr1\t1366\t.\tC\tCCCT\t8.2\tPASS\t.\tGT:GQ:DP:AD:VAF:PL\t1/1:4:6:1,4:0.66667:6,4,0";
  691. let variant: VcfVariant = row.parse()?;
  692. let var_string = variant.into_vcf_row();
  693. assert_eq!(row, &var_string);
  694. let row = "chr1\t1366\t.\tC\tCCCT\t8.2\tPASS\t.";
  695. let variant: VcfVariant = row.parse()?;
  696. let var_string = variant.into_vcf_row();
  697. assert_eq!(row, &var_string);
  698. let row = "chr1\t2628434\t.\tC\tT\t17.973\tPASS\tH;FAU=0;FCU=7;FGU=0;FTU=7;RAU=0;RCU=2;RGU=0;RTU=2\tGT:GQ:DP:AF:AD:NAF:NDP:NAD:AU:CU:GU:TU:NAU:NCU:NGU:NTU\t0/1:17:18:0.5:0,9:0:11:0,0:0:9:0:9:0:11:0:0";
  699. let variant: VcfVariant = row.parse()?;
  700. let var_string = variant.into_vcf_row();
  701. assert_eq!(row, &var_string);
  702. let row = "chr1\t52232\t.\tC\tCT\t18\t.\t.\tGT:GQ:DP:AD:AF\t1/.:1:24:3,5:0.208333";
  703. let variant: VcfVariant = row.parse()?;
  704. let var_string = variant.into_vcf_row();
  705. assert_eq!(row, &var_string);
  706. let row = "chr1\t52232\t.\tC\tCT\t18\t.\t.\tGT:GQ:DP:AD:AF\t1/1:1:24:3,5:0.208333";
  707. let variant_b: VcfVariant = row.parse()?;
  708. assert_eq!(variant, variant_b);
  709. let row = "chr1\t475157\t.\tA\tG\t12.301\tPASS\tH;FAU=2;FCU=0;FGU=2;FTU=0;RAU=3;RCU=0;RGU=3;RTU=0\tGT:GQ:DP:AF:AD:NAF:NDP:NAD:AU:CU:GU:TU:NAU:NCU:NGU:NTU\t0/1:12:10:0.5:0,5:0.0769:13:0,1:5:0:5:0:12:0:1:0";
  710. let variant: VcfVariant = row.parse()?;
  711. let var_string = variant.into_vcf_row();
  712. assert_eq!(row, &var_string);
  713. let row = "chr1\t161417408\tr_10_0\tT\t[chr1:161417447[TTGGCAGGTTCC\t.\tPASS\tSVTYPE=BND;MATEID=r_10_1;SVINSLEN=11;SVINSSEQ=TTGGCAGGTTC\tTR:VR\t22:3\t12:0";
  714. let variant: VcfVariant = row.parse()?;
  715. println!("{variant:#?}");
  716. let u = variant.bnd_desc();
  717. println!("{u:#?}");
  718. // Severus mates are not in RC
  719. let vcf = "chr7\t27304522\tseverus_BND6747_1\tN\t[chr6:32688062[N\t60\tPASS\tPRECISE;SVTYPE=BND;MATE_ID=severus_BND6747_2;STRANDS=--;MAPQ=60;CLUSTERID=severus_2\tGT:VAF:hVAF:DR:DV\t0/1:0.29:0.29,0,0:12:5";
  720. let variant: VcfVariant = vcf.parse()?;
  721. let bnd_a = variant.bnd_desc()?;
  722. let vcf = "chr6\t32688062\tseverus_BND6747_2\tN\t[chr7:27304522[N\t60\tPASS\tPRECISE;SVTYPE=BND;MATE_ID=severus_BND6747_1;STRANDS=--;MAPQ=60;CLUSTERID=severus_2 GT:VAF:hVAF:DR:DV\t0/1:0.29:0.29,0,0:12:5";
  723. let variant: VcfVariant = vcf.parse()?;
  724. let bnd_b = variant.bnd_desc()?;
  725. assert_eq!(bnd_a, bnd_b.rc());
  726. println!("{bnd_a}\n{bnd_b}");
  727. // Savana here each mate are in RC
  728. let vcf = "chr10\t102039096\tID_35957_2\tG\t]chr10:101973386]G\t.\tPASS\tSVTYPE=BND;MATEID=ID_35957_1;TUMOUR_READ_SUPPORT=7;TUMOUR_ALN_SUPPORT=7;NORMAL_READ_SUPPORT=0;NORMAL_ALN_SUPPORT=0;SVLEN=65710;BP_NOTATION=+-;SOURCE=SUPPLEMENTARY;CLUSTERED_READS_TUMOUR=7;CLUSTERED_READS_NORMAL=0;ORIGIN_STARTS_STD_DEV=0.35;ORIGIN_MAPQ_MEAN=60;ORIGIN_EVENT_SIZE_STD_DEV=7.248;ORIGIN_EVENT_SIZE_MEDIAN=65710;ORIGIN_EVENT_SIZE_MEAN=65705.4;END_STARTS_STD_DEV=7.007;END_MAPQ_MEAN=60;END_EVENT_SIZE_STD_DEV=7.248;END_EVENT_SIZE_MEDIAN=65710;END_EVENT_SIZE_MEAN=65705.4;TUMOUR_DP_BEFORE=38,29;TUMOUR_DP_AT=44,21;TUMOUR_DP_AFTER=44,21;NORMAL_DP_BEFORE=13,15;NORMAL_DP_AT=13,15;NORMAL_DP_AFTER=13,15;TUMOUR_AF=0.159,0.333;NORMAL_AF=0,0;TUMOUR_TOTAL_HP_AT=20,16,8;NORMAL_TOTAL_HP_AT=6,7,0;TUMOUR_ALT_HP=0,1,6;TUMOUR_PS=101917152;NORMAL_ALT_HP=0,0,0;CLASS=PREDICTED_SOMATIC\tGT\t0/1";
  729. let variant: VcfVariant = vcf.parse()?;
  730. let bnd_a = variant.bnd_desc()?;
  731. let vcf = "chr10\t101973386\tID_35957_1\tA\tA[chr10:102039096[\t.\tPASS\tSVTYPE=BND;MATEID=ID_35957_2;TUMOUR_READ_SUPPORT=7;TUMOUR_ALN_SUPPORT=7;NORMAL_READ_SUPPORT=0;NORMAL_ALN_SUPPORT=0;SVLEN=65710;BP_NOTATION=+-;SOURCE=SUPPLEMENTARY;CLUSTERED_READS_TUMOUR=7;CLUSTERED_READS_NORMAL=0;ORIGIN_STARTS_STD_DEV=0.35;ORIGIN_MAPQ_MEAN=60;ORIGIN_EVENT_SIZE_STD_DEV=7.248;ORIGIN_EVENT_SIZE_MEDIAN=65710;ORIGIN_EVENT_SIZE_MEAN=65705.4;END_STARTS_STD_DEV=7.007;END_MAPQ_MEAN=60;END_EVENT_SIZE_STD_DEV=7.248;END_EVENT_SIZE_MEDIAN=65710;END_EVENT_SIZE_MEAN=65705.4;TUMOUR_DP_BEFORE=29,38;TUMOUR_DP_AT=21,44;TUMOUR_DP_AFTER=21,44;NORMAL_DP_BEFORE=15,13;NORMAL_DP_AT=15,13;NORMAL_DP_AFTER=15,13;TUMOUR_AF=0.333,0.159;NORMAL_AF=0,0;TUMOUR_TOTAL_HP_AT=17,0,4;NORMAL_TOTAL_HP_AT=5,7,3;TUMOUR_ALT_HP=0,6,1;TUMOUR_PS=101917152;NORMAL_ALT_HP=0,0,0;CLASS=PREDICTED_SOMATIC\tGT\t0/1";
  732. let variant: VcfVariant = vcf.parse()?;
  733. let bnd_b = variant.bnd_desc()?;
  734. assert_eq!(bnd_a, bnd_b);
  735. println!("{bnd_a}\n{bnd_b}");
  736. // Deletions
  737. // Severus
  738. let vcf = "chr7\t143674704\tseverus_DEL7318\tN\t<DEL>\t60\tPASS≥\tPRECISE;SVTYPE=DEL;SVLEN=3642;END=143678346;STRANDS=+-;MAPQ=60\tGT:GQ:VAF:hVAF:DR:DV\t0/1:114:0.39:0.39,0,0:14:9";
  739. let variant: VcfVariant = vcf.parse()?;
  740. println!("{:?}", variant.infos);
  741. println!("{:?}", variant.formats);
  742. let del = variant.deletion_desc().unwrap();
  743. println!("{:?}", del);
  744. println!("{:?} {:?}", del.len(), variant.formats.n_alt_depth());
  745. assert_eq!("chr7:143674704_143678346_del", variant.deletion_desc().unwrap().to_string());
  746. println!("--\n");
  747. let vcf="chr7\t144003249\tr_106\tC\t<DEL>\t.\tPASS\tEND=144142733;SVTYPE=DEL;SVLEN=-139484;SVINSLEN=4;SVINSSEQ=GCCA\tTR:VR\t12:10\t51:0";
  748. let variant: VcfVariant = vcf.parse()?;
  749. println!("{:?}", variant.infos);
  750. println!("{:?}", variant.formats);
  751. let del = variant.deletion_desc().unwrap();
  752. println!("{:?}", del);
  753. println!("{:?} {:?}", del.len(), variant.formats.n_alt_depth());
  754. let path = "/data/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_Genes.bed";
  755. let r = read_bed(path)?;
  756. let deleted_genes = bedrow_overlaps_par(&r, &vec![&GenomeRange { contig: variant.position.contig, range: del.start..del.end }]).into_iter().filter_map(|e| {
  757. e.name
  758. }).collect::<Vec<String>>().join(", ");
  759. println!("{deleted_genes}");
  760. Ok(())
  761. }
  762. #[test]
  763. fn variant_load_deepvariant() -> anyhow::Result<()> {
  764. init();
  765. let id = "ADJAGBA";
  766. let time = "diag";
  767. let dv = DeepVariant::initialize(id, time, Config::default())?;
  768. let annotations = Annotations::default();
  769. let variant_collection = dv.variants(&annotations)?;
  770. println!("Deepvariant for {id} {time}: variants {} {}", variant_collection.variants.len(), variant_collection.vcf.n_variants);
  771. Ok(())
  772. }
  773. #[test]
  774. fn variant_load_clairs() -> anyhow::Result<()> {
  775. init();
  776. let id = "ELKLIFI";
  777. let clairs = NanomonSV::initialize(id, Config::default())?;
  778. let u = clairs.should_run();
  779. info!("should_run {u:?}");
  780. // let annotations = Annotations::default();
  781. // let variant_collection = clairs.variants(&annotations)?;
  782. // println!("ClairS for {id}: variants {} {}", variant_collection.variants.len(), variant_collection.vcf.n_variants);
  783. Ok(())
  784. }
  785. #[test]
  786. fn variant_load_nanomonsv() -> anyhow::Result<()> {
  787. init();
  788. let id = "ADJAGBA";
  789. let nanomonsv = NanomonSV::initialize(id, Config::default())?;
  790. let annotations = Annotations::default();
  791. let variant_collection = nanomonsv.variants(&annotations)?;
  792. println!("NanomonSV for {id}: variants {} {}", variant_collection.variants.len(), variant_collection.vcf.n_variants);
  793. println!("{:?}", variant_collection.variants.first());
  794. Ok(())
  795. }
  796. // #[test]
  797. // fn variant_load_clairs_germline() -> anyhow::Result<()> {
  798. // init();
  799. // let id = "ADJAGBA";
  800. // let clairs = ClairS::initialize(id, Config::default())?;
  801. // let annotations = Annotations::default();
  802. // let germline_variant_collection = clairs.germline(&annotations)?;
  803. // println!("ClairS for {id}: variants {} {}", germline_variant_collection.variants.len(), germline_variant_collection.vcf.n_variants);
  804. // Ok(())
  805. // }
  806. #[test]
  807. fn pipe_somatic() -> anyhow::Result<()> {
  808. init();
  809. let collections = Collections::new(
  810. CollectionsConfig::default()
  811. )?;
  812. let c = Config {
  813. somatic_pipe_force: true,
  814. ..Default::default()
  815. };
  816. for (a, _) in collections.bam_pairs().iter() {
  817. if ["AUBERT", "BAFFREAU", "BAILLEUL"].contains(&a.id.as_str()) {
  818. continue;
  819. }
  820. if let Err(e) = SomaticPipe::initialize(&a.id, c.clone()).map(|mut p| if p.should_run() {
  821. if let Err(e) = p.run() {
  822. error!("{e}");
  823. }
  824. }) {
  825. error!("{e}");
  826. }
  827. }
  828. Ok(())
  829. // let id = "VILI";
  830. // SomaticPipe::initialize(id, Config::default())?.run()
  831. }
  832. #[test]
  833. fn overlaps() {
  834. init();
  835. let positions = vec![
  836. &GenomePosition { contig: 1, position: 100 },
  837. &GenomePosition { contig: 1, position: 150 },
  838. &GenomePosition { contig: 1, position: 200 },
  839. &GenomePosition { contig: 2, position: 150 },
  840. ];
  841. let ranges = vec![
  842. &GenomeRange { contig: 1, range: 50..150 },
  843. &GenomeRange { contig: 2, range: 100..200 },
  844. ];
  845. let parallel_overlapping_indices = overlaps_par(&positions, &ranges);
  846. assert_eq!(parallel_overlapping_indices, vec![0, 3])
  847. }
  848. #[test]
  849. fn bed_read() -> anyhow::Result<()> {
  850. init();
  851. let path = &Config::default().mask_bed("ADJAGBA");
  852. let r = read_bed(path)?;
  853. println!("{}", r.len());
  854. Ok(())
  855. }
  856. #[test]
  857. fn test_read_dict() -> anyhow::Result<()> {
  858. init();
  859. let genome = read_dict(&Config::default().dict_file)?;
  860. let genome_length: usize = genome.into_iter().map(|(_, len)| len as usize).sum();
  861. println!("{genome_length}");
  862. Ok(())
  863. }
  864. #[test]
  865. fn bases_at() -> anyhow::Result<()> {
  866. init();
  867. let id = "ADJAGBA";
  868. let c = Config::default();
  869. let chr = "chr3";
  870. let position = 62416039; // 1-based
  871. let mut bam = rust_htslib::bam::IndexedReader::from_path(c.solo_bam(id, "diag"))?;
  872. let p = nt_pileup(&mut bam, chr, position - 1, false)?.iter().map(|e| String::from_utf8(vec![*e]).unwrap()).collect::<Vec<_>>();
  873. let mut counts = HashMap::new();
  874. for item in p.iter() {
  875. *counts.entry(item.as_str()).or_insert(0) += 1;
  876. }
  877. for (key, value) in &counts {
  878. println!("{}: {}", key, value);
  879. }
  880. assert_eq!(8, *counts.get("C").unwrap());
  881. assert_eq!(13, *counts.get("G").unwrap());
  882. assert_eq!(6, *counts.get("D").unwrap());
  883. let chr = "chr1";
  884. let position = 3220; // 1-based
  885. let mut bam = rust_htslib::bam::IndexedReader::from_path(c.solo_bam(id, "mrd"))?;
  886. let p = counts_at(&mut bam, chr, position - 1)?;
  887. println!("{p:#?}");
  888. Ok(())
  889. }
  890. #[test]
  891. fn seq_at() -> anyhow::Result<()> {
  892. init();
  893. let c = Config::default();
  894. let chr = "chr1";
  895. let position = 16761;
  896. let mut fasta_reader = noodles_fasta::indexed_reader::Builder::default().build_from_path(c.reference)?;
  897. let r = io::fasta::sequence_at(&mut fasta_reader, chr, position, 3)?;
  898. println!("{r} ({} {:.2})", r.len(), estimate_shannon_entropy(r.as_str()));
  899. Ok(())
  900. }
  901. #[test]
  902. fn ins_at() -> anyhow::Result<()> {
  903. init();
  904. let id = "PASSARD";
  905. let c = Config::default();
  906. let chr = "chr5";
  907. let position = 36122736; // 1-based like in vcf
  908. let mut bam = rust_htslib::bam::IndexedReader::from_path(c.solo_bam(id, "mrd"))?;
  909. // let p = ins_pileup(&mut bam, chr, position - 1, true)?.iter().map(|e| String::from_utf8(vec![*e]).unwrap()).collect::<Vec<_>>();
  910. let counts = counts_ins_at(&mut bam, chr, position -1)?;
  911. println!("{counts:?}");
  912. for (key, value) in &counts {
  913. println!("{}: {}", key, value);
  914. }
  915. Ok(())
  916. }
  917. // #[test]
  918. // fn del_at() -> anyhow::Result<()> {
  919. // let id = "PASSARD";
  920. // let c = Config::default();
  921. //
  922. // let mut bam = rust_htslib::bam::IndexedReader::from_path(c.solo_bam(id, "mrd"))?;
  923. //
  924. // let pileup_start = crate::collection::bam::nt_pileup(
  925. // &mut bam,
  926. // "chr5",
  927. // 36122735,
  928. // false,
  929. // )?;
  930. //
  931. // let uu = crate::collection::bam::nt_pileup_new(
  932. // &mut bam,
  933. // "chr5",
  934. // 36122735,
  935. // false,
  936. // )?;
  937. //
  938. // let pileup_end = crate::collection::bam::nt_pileup_new(
  939. // &mut bam,
  940. // &var.position.contig(),
  941. // del_repr.end.saturating_sub(1),
  942. // false,
  943. // )?;
  944. //
  945. // println!("{pileup_start:?}");
  946. // println!("{uu:?}");
  947. // let depth = uu.len().max(pileup_end.len());
  948. //
  949. // Ok(())
  950. //
  951. // }
  952. #[test]
  953. fn vep_line() -> anyhow::Result<()> {
  954. init();
  955. let line = "chr2_1922358_-/T\tchr2:1922357-1922358\tT\tMYT1L\tNM_001303052.2\tTranscript\tintron_variant\t-\t-\t-\t-\t-\t-\tIMPACT=MODIFIER;STRAND=-1;SYMBOL=MYT1L;SOURCE=chm13v2.0_RefSeq_Liftoff_v5.1_sorted.gff3.gz;HGVSc=NM_001303052.2:c.1619-613dup";
  956. let vep_line: VepLine = line.parse()?;
  957. println!("{vep_line:#?}");
  958. let vep: VEP = VEP::try_from(&vep_line)?;
  959. println!("{vep:#?}");
  960. Ok(())
  961. }
  962. #[test]
  963. fn savana_cn() -> anyhow::Result<()> {
  964. init();
  965. let id = "CAMARA";
  966. // let s = SavanaCopyNumber::load_id(id, Config::default())?;
  967. let s = SavanaReadCounts::load_id(id, Config::default())?;
  968. println!("tumoral reads: {}", s.n_tumoral_reads());
  969. println!("normal reads: {}", s.n_normal_reads());
  970. println!("tumoral:\n{:#?}", s.norm_chr_counts());
  971. Ok(())
  972. }
  973. #[test]
  974. fn load_bam() -> anyhow::Result<()> {
  975. init();
  976. let id = "ADJAGBA";
  977. let time = "diag";
  978. let bam_path = Config::default().solo_bam(id, time);
  979. WGSBam::new(Path::new(&bam_path).to_path_buf())?;
  980. Ok(())
  981. }
  982. #[test]
  983. fn tar() -> anyhow::Result<()> {
  984. init();
  985. scan_archive("/data/lto/20241030_CUNVI-DG-N20_SEBZA-DG-N21.tar")?;
  986. Ok(())
  987. }
  988. #[test]
  989. fn run_somatic() -> anyhow::Result<()> {
  990. init();
  991. let collections = Collections::new(
  992. CollectionsConfig::default()
  993. )?;
  994. let bams = collections.bam.by_id_completed(15.0, 10.0);
  995. let n = bams.len();
  996. let mut config = Config::default();
  997. config.somatic_scan_force = true;
  998. warn!("{n} cases");
  999. for (i, bam) in bams.iter().enumerate() {
  1000. let id = &bam.id;
  1001. warn!("{i}/{n} {id}");
  1002. // if id == "BANGA" {
  1003. // continue;
  1004. // }
  1005. if id == "ARM" {
  1006. continue;
  1007. }
  1008. match SomaticPipe::initialize(id, config.clone())?.run() {
  1009. Ok(_) => (),
  1010. Err(e) => error!("{id} {e}"),
  1011. };
  1012. }
  1013. Ok(())
  1014. }
  1015. #[test]
  1016. fn somatic_cases() -> anyhow::Result<()> {
  1017. init();
  1018. let id = "PASSARD";
  1019. let config = Config { somatic_pipe_force: true, ..Default::default() };
  1020. match SomaticPipe::initialize(id, config)?.run() {
  1021. Ok(_) => (),
  1022. Err(e) => error!("{id} {e}"),
  1023. };
  1024. Ok(())
  1025. }
  1026. #[test]
  1027. fn load_variants() -> anyhow::Result<()> {
  1028. init();
  1029. let id = "ACHITE";
  1030. let config = Config::default();
  1031. let path = format!("{}/{id}/diag/{id}_somatic_variants.bit", config.result_dir);
  1032. let variants = variant_collection::Variants::load_from_file(&path)?;
  1033. println!("n variants {}", variants.data.len());
  1034. let n_vep: usize = variants.data.iter().map(|v| v.vep().len()).sum();
  1035. println!("VEP: {n_vep}");
  1036. let translocations = variants.get_alteration_cat(AlterationCategory::TRL);
  1037. println!("{} translocations", translocations.len());
  1038. let threshold = 5;
  1039. let res = group_variants_by_bnd_desc(&translocations, 5);
  1040. let rres = group_variants_by_bnd_rc(&res, threshold);
  1041. rres.iter().for_each(|group| {
  1042. println!("{} {}", group.0.len(), group.1.len());
  1043. });
  1044. Ok(())
  1045. }
  1046. #[test]
  1047. fn load_fc() -> anyhow::Result<()> {
  1048. init();
  1049. // FlowCells::load_archive_from_scan("/data/lto", "/data/archives.json")?;
  1050. let r = FlowCells::load("/home/prom/mnt/store",
  1051. "/data/pandora_id_inputs.json", "/data/archives.json.gz")?;
  1052. println!("{r:#?}");
  1053. Ok(())
  1054. }
  1055. #[test]
  1056. fn alt_cat() -> anyhow::Result<()> {
  1057. let id = "ADJAGBA";
  1058. let config = Config::default();
  1059. let path = format!("{}/{id}/diag/somatic_variants.json.gz", config.result_dir);
  1060. let variants = variant_collection::Variants::load_from_json(&path)?;
  1061. println!("n variants {}", variants.data.len());
  1062. variants.data.iter()
  1063. .filter(|v| v.alteration_category().contains(&AlterationCategory::TRL))
  1064. .for_each(|v| {
  1065. println!("{:?} {}",
  1066. v.vcf_variants.iter().map(|v| v.bnd_desc()).collect::<Vec<_>>(),
  1067. v.annotations.iter().filter(|a| matches!(a, Annotation::Callers(..))).map(|a| a.to_string()).collect::<Vec<String>>().join(";")
  1068. )
  1069. });
  1070. Ok(())
  1071. }
  1072. #[test]
  1073. fn variants_stats() -> anyhow::Result<()> {
  1074. init();
  1075. let config = Config::default();
  1076. let all_variants_bit = find_files(&format!("{}/*/diag/*_somatic_variants.bit", config.result_dir))?;
  1077. for v in all_variants_bit.into_iter() {
  1078. let id = v.file_name().unwrap().to_str().unwrap().split("_somatic").next().unwrap();
  1079. println!("{id}");
  1080. let config = Config::default();
  1081. let path = format!("{}/{id}/diag/{id}_somatic_variants.bit", config.result_dir);
  1082. match variant_collection::Variants::load_from_file(&path) {
  1083. Ok(mut variants) => {
  1084. let (mut high_depth_ranges, _) =
  1085. somatic_depth_quality_ranges(&id, &config)?;
  1086. high_depth_ranges.par_sort_by_key(|r| (r.contig, r.range.start));
  1087. let res = VariantsStats::new(&mut variants, id, &config, &high_depth_ranges)?.save_to_json(&format!(
  1088. "{}/{id}/diag/{id}_somatic_variants_stats.json.gz", config.result_dir));
  1089. if res.is_err() {
  1090. info!("{:#?}", res);
  1091. }
  1092. },
  1093. Err(e) => error!("{e}"),
  1094. }
  1095. }
  1096. Ok(())
  1097. }
  1098. #[test]
  1099. fn constit_stats() {
  1100. init();
  1101. let id = "ADJAGBA";
  1102. let config = Config::default();
  1103. let _ = const_stats(id.to_string(), config);
  1104. }
  1105. #[test]
  1106. fn test_bnd() -> anyhow::Result<()> {
  1107. init();
  1108. let id = "COIFFET";
  1109. let config = Config::default();
  1110. let annotations = Annotations::default();
  1111. let s = Savana::initialize(id, config)?.variants(&annotations)?;
  1112. s.variants.iter().for_each(|e| {
  1113. if let Ok(bnd) = e.bnd_desc() {
  1114. println!("{}\t{}\t{}", e.position , e.reference, e.alternative);
  1115. println!("{:#?}", bnd);
  1116. }
  1117. });
  1118. Ok(())
  1119. }
  1120. #[test]
  1121. fn parse_savana_seg() {
  1122. init();
  1123. let r = SavanaCN::parse_file("ADJAGBA", &config::Config::default()).unwrap().segments;
  1124. println!("{} lines", r.len());
  1125. println!("{:#?}", r.first().unwrap());
  1126. }
  1127. #[test]
  1128. fn whole_scan() -> anyhow::Result<()> {
  1129. init();
  1130. let id = "CHENU";
  1131. let mut config = Config::default();
  1132. let u = config.solo_bam(id, "mrd");
  1133. println!("{u}");
  1134. config.somatic_scan_force = true;
  1135. somatic_scan(id, &config)?;
  1136. Ok(())
  1137. }
  1138. #[test]
  1139. fn parse_gff() -> anyhow::Result<()> {
  1140. init();
  1141. let id = "ADJAGBA";
  1142. let config = Config::default();
  1143. let path = format!("{}/{id}/diag/somatic_variants.bit", config.result_dir);
  1144. let exon_ranges = features_ranges("exon", &config)?;
  1145. let exon_ranges = merge_overlapping_genome_ranges(&exon_ranges);
  1146. let variants = variant_collection::Variants::load_from_file(&path)?;
  1147. let full = variants_stats::somatic_rates(&variants.data, &exon_ranges, &config);
  1148. info!("{full:#?}");
  1149. // let restrained: Vec<variant_collection::Variant> = variants.data.iter().filter(|v| v.vcf_variants.len() >= 2)
  1150. // .cloned().collect();
  1151. // let min_2 = variants_stats::somatic_rates(&restrained, &exon_ranges, &config);
  1152. // info!("{min_2:#?}");
  1153. //
  1154. // let restrained: Vec<variant_collection::Variant> = restrained.iter().filter(|v| v.vcf_variants.len() >= 3)
  1155. // .cloned().collect();
  1156. // let min_3 = variants_stats::somatic_rates(&restrained, &exon_ranges, &config);
  1157. // info!("{min_3:#?}");
  1158. // let mut high_depth_ranges = variants_stats::high_depth_somatic(id, &config)?;
  1159. // high_depth_ranges.par_sort_by_key(|r| ( r.contig, r.range.start ));
  1160. //
  1161. // let exon_ranges_ref: Vec<&GenomeRange> = exon_ranges.iter().collect();
  1162. // let exons_high_depth = range_intersection_par(&high_depth_ranges.iter().collect::<Vec<&GenomeRange>>(), &exon_ranges_ref);
  1163. //
  1164. // let full = variants_stats::somatic_rates(&variants.data, &exons_high_depth, &config);
  1165. // info!("{full:#?}");
  1166. //
  1167. // info!("n variants loaded: {}", variants.data.len());
  1168. //
  1169. // let r = features_ranges("exon", &config::Config::default())?;
  1170. // info!("n exon: {}", r.len());
  1171. //
  1172. // let merged = merge_overlapping_genome_ranges(&r);
  1173. // info!("n merged exon: {}", merged.len());
  1174. //
  1175. // let ol = par_overlaps(&variants.data, &r);
  1176. // info!("variants in exon {}", ol.len());
  1177. //
  1178. // let n_coding = ol.iter().filter_map(|i| variants.data[*i].best_vep().ok() ).filter_map(|bv| bv.impact()).filter(|impact| *impact <= VepImpact::MODERATE).count();
  1179. // info!("coding variants {n_coding}");
  1180. //
  1181. // let n_bases_m = merged.par_iter().map(|gr| gr.length()).sum::<u32>();
  1182. // info!("{n_bases_m}nt");
  1183. //
  1184. // let mega_base_m = n_bases_m as f64 / 10.0e6;
  1185. //
  1186. // let wgs_len = read_dict(&config.dict_file)?.iter().map(|(_, l)| *l).sum::<u32>();
  1187. // info!("wgs len {wgs_len}");
  1188. // let rate_wgs = variants.data.len() as f64 / (wgs_len as f64 / 10.0e6);
  1189. // info!("somatic mutation rate {rate_wgs:.2}/mb");
  1190. //
  1191. // let n_exons_mb = ol.len() as f64 / mega_base_m;
  1192. // info!("somatic mutation rate in the coding region {n_exons_mb:.2}/mb");
  1193. //
  1194. // let n_exons_mb = n_coding as f64 / mega_base_m;
  1195. // info!("somatic non synonymous mutation rate in the coding region {n_exons_mb:.2}/mb");
  1196. Ok(())
  1197. }
  1198. fn gr(contig: u8, start: u32, end: u32) -> GenomeRange {
  1199. GenomeRange {
  1200. contig,
  1201. range: start..end,
  1202. }
  1203. }
  1204. #[test]
  1205. fn test_both_empty() {
  1206. let a: Vec<&GenomeRange> = vec![];
  1207. let b: Vec<&GenomeRange> = vec![];
  1208. let result = range_intersection_par(&a, &b);
  1209. assert!(result.is_empty());
  1210. }
  1211. #[test]
  1212. fn test_one_empty() {
  1213. let a = [gr(1, 0, 100)];
  1214. let b: Vec<&GenomeRange> = vec![];
  1215. let a_refs: Vec<&GenomeRange> = a.iter().collect();
  1216. let mut result = range_intersection_par(&a_refs, &b);
  1217. sort_ranges(&mut result);
  1218. assert!(result.is_empty());
  1219. }
  1220. #[test]
  1221. fn test_single_range_no_overlap() {
  1222. let a = [gr(1, 0, 100)];
  1223. let b = [gr(1, 100, 200)];
  1224. let a_refs: Vec<&GenomeRange> = a.iter().collect();
  1225. let b_refs: Vec<&GenomeRange> = b.iter().collect();
  1226. let mut result = range_intersection_par(&a_refs, &b_refs);
  1227. sort_ranges(&mut result);
  1228. assert!(result.is_empty());
  1229. }
  1230. #[test]
  1231. fn test_single_range_full_overlap() {
  1232. let a = [gr(1, 0, 100)];
  1233. let b = [gr(1, 0, 100)];
  1234. let a_refs: Vec<&GenomeRange> = a.iter().collect();
  1235. let b_refs: Vec<&GenomeRange> = b.iter().collect();
  1236. let mut result = range_intersection_par(&a_refs, &b_refs);
  1237. let mut expected = [gr(1, 0, 100)];
  1238. sort_ranges(&mut result);
  1239. sort_ranges(&mut expected);
  1240. assert_eq!(result, expected);
  1241. }
  1242. #[test]
  1243. fn test_different_contigs() {
  1244. let a = [gr(1, 0, 100)];
  1245. let b = [gr(2, 0, 100)];
  1246. let a_refs: Vec<&GenomeRange> = a.iter().collect();
  1247. let b_refs: Vec<&GenomeRange> = b.iter().collect();
  1248. let mut result = range_intersection_par(&a_refs, &b_refs);
  1249. sort_ranges(&mut result);
  1250. assert!(result.is_empty());
  1251. }
  1252. #[test]
  1253. fn test_touching_ranges() {
  1254. let a = [gr(1, 0, 100)];
  1255. let b = [gr(1, 100, 200)];
  1256. let a_refs: Vec<&GenomeRange> = a.iter().collect();
  1257. let b_refs: Vec<&GenomeRange> = b.iter().collect();
  1258. let mut result = range_intersection_par(&a_refs, &b_refs);
  1259. sort_ranges(&mut result);
  1260. assert!(result.is_empty());
  1261. }
  1262. #[test]
  1263. fn test_complete_subrange() {
  1264. let a = [gr(1, 0, 200)];
  1265. let b = [gr(1, 50, 150)];
  1266. let a_refs: Vec<&GenomeRange> = a.iter().collect();
  1267. let b_refs: Vec<&GenomeRange> = b.iter().collect();
  1268. let mut result = range_intersection_par(&a_refs, &b_refs);
  1269. let mut expected = [gr(1, 50, 150)];
  1270. sort_ranges(&mut result);
  1271. sort_ranges(&mut expected);
  1272. assert_eq!(result, expected);
  1273. }
  1274. #[test]
  1275. fn test_multiple_overlaps_same_contig() {
  1276. let a = [gr(1, 0, 50), gr(1, 75, 125)];
  1277. let b = [gr(1, 25, 100), gr(1, 150, 200)];
  1278. let a_refs: Vec<&GenomeRange> = a.iter().collect();
  1279. let b_refs: Vec<&GenomeRange> = b.iter().collect();
  1280. let mut result = range_intersection_par(&a_refs, &b_refs);
  1281. let mut expected = [gr(1, 25, 50), gr(1, 75, 100)];
  1282. sort_ranges(&mut result);
  1283. sort_ranges(&mut expected);
  1284. assert_eq!(result, expected);
  1285. }
  1286. #[test]
  1287. fn test_multiple_contigs() {
  1288. let a = [gr(1, 0, 100), gr(2, 50, 150), gr(3, 200, 300)];
  1289. let b = [gr(1, 50, 150), gr(2, 0, 100), gr(4, 0, 100)];
  1290. let a_refs: Vec<&GenomeRange> = a.iter().collect();
  1291. let b_refs: Vec<&GenomeRange> = b.iter().collect();
  1292. let mut result = range_intersection_par(&a_refs, &b_refs);
  1293. let mut expected = [gr(1, 50, 100), gr(2, 50, 100)];
  1294. sort_ranges(&mut result);
  1295. sort_ranges(&mut expected);
  1296. assert_eq!(result, expected);
  1297. }
  1298. #[test]
  1299. fn test_adjacent_ranges() {
  1300. let a = [gr(1, 0, 50), gr(1, 50, 100)];
  1301. let b = [gr(1, 25, 75)];
  1302. let a_refs: Vec<&GenomeRange> = a.iter().collect();
  1303. let b_refs: Vec<&GenomeRange> = b.iter().collect();
  1304. let mut result = range_intersection_par(&a_refs, &b_refs);
  1305. let mut expected = [gr(1, 25, 50), gr(1, 50, 75)];
  1306. sort_ranges(&mut result);
  1307. sort_ranges(&mut expected);
  1308. assert_eq!(result, expected);
  1309. }
  1310. #[test]
  1311. fn test_minimal_overlap() {
  1312. let a = [gr(1, 0, 100)];
  1313. let b = [gr(1, 99, 200)];
  1314. let a_refs: Vec<&GenomeRange> = a.iter().collect();
  1315. let b_refs: Vec<&GenomeRange> = b.iter().collect();
  1316. let mut result = range_intersection_par(&a_refs, &b_refs);
  1317. let mut expected = [gr(1, 99, 100)];
  1318. sort_ranges(&mut result);
  1319. sort_ranges(&mut expected);
  1320. assert_eq!(result, expected);
  1321. }
  1322. #[test]
  1323. fn todo_scan() -> anyhow::Result<()> {
  1324. init();
  1325. let mut collections = Collections::new(
  1326. CollectionsConfig::default()
  1327. )?;
  1328. collections.todo_bam_count(&Config::default())?;
  1329. collections.tasks.iter().for_each(|t| info!("{t}"));
  1330. let pool = rayon::ThreadPoolBuilder::new()
  1331. .num_threads(100)
  1332. .build()
  1333. .unwrap();
  1334. pool.install(move || {
  1335. collections.tasks.into_iter().for_each(|t| {
  1336. // info!("{t}");
  1337. if let Err(e) = t.run() {
  1338. error!("{e}");
  1339. }
  1340. });
  1341. });
  1342. Ok(())
  1343. }
  1344. /// helper to build a forward‑strand BND (same contig) where
  1345. /// A = pos, B = pos + 5
  1346. fn fwd(contig: &str, pos: u32) -> BNDDesc {
  1347. BNDDesc {
  1348. a_contig: contig.into(),
  1349. a_position: pos,
  1350. a_sens: true,
  1351. b_contig: contig.into(),
  1352. b_position: pos + 5,
  1353. b_sens: true,
  1354. added_nt: String::new(),
  1355. }
  1356. }
  1357. /// Build a six‑node *forward* chain relying **only** on `auto_connect()`
  1358. /// (no manual edges) and assert the Hamiltonian path spans all nodes.
  1359. #[test]
  1360. fn hamiltonian_chain_auto() {
  1361. // positions 10,15 20,25 … 60,65 satisfy B(u) ≤ A(v)
  1362. let bnds: Vec<BNDDesc> = (1..=6).map(|i| fwd("chr1", i * 10)).collect();
  1363. let g: BNDGraph<()> = bnds.to_bnd_graph(); // trait uses auto_connect()
  1364. // ensure auto_connect produced 5 edges in a line
  1365. assert_eq!(g.inner().edge_count(), 5);
  1366. let path = g.hamiltonian_path().expect("chain should be Hamiltonian");
  1367. assert_eq!(path.len(), 6);
  1368. }
  1369. /// Two disconnected "V" shapes -> auto_connect creates 2 edges in each
  1370. /// component, no Hamiltonian path, components sorted by size.
  1371. #[test]
  1372. fn components_after_auto_connect() {
  1373. // comp1: a->b<-c on reverse strand of chrX
  1374. let a = BNDDesc {
  1375. a_contig: "chrX".into(), a_position: 300, a_sens: false,
  1376. b_contig: "chrX".into(), b_position: 250, b_sens: false,
  1377. added_nt: String::new()
  1378. };
  1379. let b = BNDDesc {
  1380. a_contig: "chrX".into(), a_position: 200, a_sens: false,
  1381. b_contig: "chrX".into(), b_position: 150, b_sens: false,
  1382. added_nt: String::new()
  1383. };
  1384. let c = BNDDesc {
  1385. a_contig: "chrX".into(), a_position: 100, a_sens: false,
  1386. b_contig: "chrX".into(), b_position: 50, b_sens: false,
  1387. added_nt: String::new()
  1388. };
  1389. // comp2: three‑node forward chain on chrY
  1390. let d = fwd("chrY", 10);
  1391. let e = fwd("chrY", 20);
  1392. let f = fwd("chrY", 30);
  1393. let g: BNDGraph<()> = vec![a,b,c,d,e,f].to_bnd_graph();
  1394. assert!(g.hamiltonian_path().is_none());
  1395. let comps = g.components_by_size();
  1396. comps.iter().for_each(|a| println!("{}", g.fmt_path(a) ));
  1397. assert_eq!(comps.len(), 2);
  1398. assert_eq!(comps[0].len(), 3);
  1399. assert_eq!(comps[1].len(), 3);
  1400. }
  1401. #[test]
  1402. fn bnd_connections() {
  1403. // comp1: a->b<-c on reverse strand of chrX
  1404. let a = BNDDesc {
  1405. a_contig: "chrX".into(), a_position: 300, a_sens: true,
  1406. b_contig: "chr14".into(), b_position: 250, b_sens: false,
  1407. added_nt: String::new()
  1408. };
  1409. let b = BNDDesc {
  1410. a_contig: "chr14".into(), a_position: 200, a_sens: false,
  1411. b_contig: "chrX".into(), b_position: 301, b_sens: true,
  1412. added_nt: String::new()
  1413. };
  1414. let c = BNDDesc {
  1415. a_contig: "chrX".into(), a_position: 302, a_sens: true,
  1416. b_contig: "chrZ".into(), b_position: 50, b_sens: false,
  1417. added_nt: String::new()
  1418. };
  1419. // comp2: three‑node forward chain on chrY
  1420. let d = fwd("chrY", 10);
  1421. let e = fwd("chrY", 20);
  1422. let f = fwd("chrY", 30);
  1423. let g: BNDGraph<()> = vec![a,b,c,d,e,f].to_bnd_graph();
  1424. assert!(g.hamiltonian_path().is_none());
  1425. let comps = g.components_by_size();
  1426. comps.iter().for_each(|a| println!("{}", g.fmt_path(a) ));
  1427. assert_eq!(comps.len(), 2);
  1428. assert_eq!(comps[0].len(), 3);
  1429. assert_eq!(comps[1].len(), 3);
  1430. }
  1431. }