lib.rs 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731
  1. use std::sync::{Arc, Mutex};
  2. pub mod commands;
  3. pub mod config;
  4. pub mod modkit;
  5. pub mod callers;
  6. pub mod runners;
  7. pub mod collection;
  8. pub mod functions;
  9. pub mod helpers;
  10. pub mod variant;
  11. pub mod io;
  12. pub mod pipes;
  13. pub mod positions;
  14. pub mod annotation;
  15. pub mod cases;
  16. #[macro_use]
  17. extern crate lazy_static;
  18. // Define DOCKER_ID lock for handling Docker kill when ctrlc is pressed
  19. lazy_static! {
  20. static ref DOCKER_ID: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));
  21. }
  22. #[cfg(test)]
  23. mod tests {
  24. use std::{collections::HashMap, fs, path::Path};
  25. use annotation::{vep::{VepLine, VEP}, Annotations};
  26. use callers::{nanomonsv::nanomonsv_create_pon, savana::{Savana, SavanaReadCounts}, severus::{Severus, SeverusSolo}};
  27. use collection::{bam::{counts_at, counts_ins_at, nt_pileup, WGSBam, WGSBamStats}, Initialize, InitializeSolo, Version};
  28. use commands::{longphase::{LongphaseConfig, LongphaseHap, LongphaseModcallSolo, LongphasePhase}, modkit::{bed_methyl, ModkitConfig}};
  29. use functions::assembler::{Assembler, AssemblerConfig};
  30. use helpers::estimate_shannon_entropy;
  31. use io::bed::read_bed;
  32. use log::{error, info, warn};
  33. use pipes::somatic::Somatic;
  34. use positions::{overlaps_par, GenomePosition, GenomeRange};
  35. use rayon::prelude::*;
  36. use runners::Run;
  37. use variant::{variant::{Variants, VcfVariant}, variant_collection};
  38. use self::{collection::pod5::{FlowCellCase, Pod5Collection}, commands::dorado, config::Config};
  39. use super::*;
  40. use crate::{annotation::Annotation, callers::{clairs::ClairS, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVSolo}, Callers}, collection::{bam, pod5::{scan_archive, FlowCells}, run_tasks, vcf::VcfCollection, Collections, CollectionsConfig}, commands::dorado::Dorado, variant::variant::AlterationCategory};
  41. // export RUST_LOG="debug"
  42. fn init() {
  43. let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
  44. .is_test(true)
  45. .try_init();
  46. }
  47. #[test]
  48. fn it_works() {
  49. let bam_path = "/data/longreads_basic_pipe/PARACHINI/diag/PARACHINI_diag_hs1.bam";
  50. modkit::modkit(bam_path);
  51. }
  52. #[test]
  53. fn run_dorado() -> anyhow::Result<()> {
  54. let case = FlowCellCase {
  55. id: "CONSIGNY".to_string(), time_point: "mrd".to_string(), barcode: "07".to_string(), pod_dir: "/data/run_data/20240326-CL/CONSIGNY-MRD-NB07_RICCO-DIAG-NB08/20240326_1355_1E_PAU78333_bc25da25/pod5_pass/barcode07".into() };
  56. dorado::Dorado::init(case, Config::default())?.run_pipe()
  57. }
  58. #[test]
  59. fn pod5() -> anyhow::Result<()> {
  60. let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
  61. .build();
  62. let coll = Pod5Collection::new(
  63. "/data/run_data",
  64. "/data/flow_cells.tsv",
  65. "/data/longreads_basic_pipe",
  66. )?;
  67. println!("{coll:#?}");
  68. // let runs = Runs::import_dir("/home/prom/store/banana-pool/run_data", "/data/flow_cells.tsv")?;
  69. Ok(())
  70. }
  71. #[test]
  72. fn bam() -> anyhow::Result<()> {
  73. init();
  74. let bam_collection = bam::load_bam_collection("/data/longreads_basic_pipe");
  75. bam_collection
  76. .bams
  77. .iter()
  78. // .filter(|b| matches!(b.bam_type, BamType::Panel(_)))
  79. .for_each(|b| println!("{b:#?}"));
  80. let u = bam_collection.get("PARACHINI", "mrd");
  81. println!("{u:#?}");
  82. Ok(())
  83. }
  84. #[test]
  85. fn vcf() -> anyhow::Result<()> {
  86. init();
  87. let mut vcf_collection = VcfCollection::new("/data/longreads_basic_pipe");
  88. vcf_collection.sort_by_id();
  89. vcf_collection
  90. .vcfs
  91. .iter()
  92. .for_each(|v| v.println().unwrap());
  93. Ok(())
  94. }
  95. // pod5 view -I /data/run_data/20240903-CL/ARMEM-DG-N02_ASSJU-DG-N03/20240903_1428_1B_PAW47629_fc24c3cf/pod5/PAW47629_fc24c3cf_77b07847_0.pod5 | head -5000 | awk '{if(NR==1){print "target,"$0}else{print "subset_1.pod5,"$0}}' > /tmp/subset_ids.csv
  96. // pod5 subset /data/run_data/20240903-CL/ARMEM-DG-N02_ASSJU-DG-N03/20240903_1428_1B_PAW47629_fc24c3cf/pod5/PAW47629_fc24c3cf_77b07847_0.pod5 --csv /tmp/subset_ids.csv -o /data/test_suite/pod5/muxed/
  97. #[test]
  98. fn mux() -> anyhow::Result<()> {
  99. init();
  100. let result_dir = "/data/test_suite/results".to_string();
  101. let cases = vec![
  102. FlowCellCase { id: "test_02".to_string(), time_point: "diag".to_string(), barcode: "02".to_string(), pod_dir: "/data/test_suite/pod5/muxed".into() },
  103. FlowCellCase { id: "test_03".to_string(), time_point: "diag".to_string(), barcode: "03".to_string(), pod_dir: "/data/test_suite/pod5/muxed".into() },
  104. ];
  105. cases.iter().for_each(|c| {
  106. let dir = format!("{result_dir}/{}", c.id);
  107. if Path::new(&dir).exists() {
  108. fs::remove_dir_all(dir).unwrap();
  109. }
  110. });
  111. let config = Config { result_dir, ..Default::default() };
  112. Dorado::from_mux(cases, config)
  113. }
  114. // #[test_log::test]
  115. // fn clairs() -> anyhow::Result<()> {
  116. // let config = ClairSConfig {
  117. // result_dir: "/data/test".to_string(),
  118. // ..ClairSConfig::default()
  119. // };
  120. // ClairS::new("test_a", "/data/test_data/subset.bam", "/data/test_data/subset_mrd.bam", config).run()
  121. // }
  122. #[test]
  123. fn nanomonsv() -> anyhow::Result<()> {
  124. init();
  125. let id = "HAMROUNE";
  126. NanomonSV::initialize(id, Config::default())?.run()
  127. }
  128. #[test]
  129. fn nanomddonsv_solo() -> anyhow::Result<()> {
  130. init();
  131. NanomonSVSolo::initialize("BRETON", "diag", Config::default())?.run()
  132. }
  133. // cargo test run -- --nocapture; ~/run_scripts/notify_finish.sh &
  134. #[test]
  135. fn todo_all() -> anyhow::Result<()> {
  136. init();
  137. // let config = CollectionsConfig::default();
  138. let config = CollectionsConfig { pod_dir: "/data/run_data".to_string(), ..Default::default() };
  139. info!("Runing todo with config: {:#?}", config);
  140. let mut collections = Collections::new(config)?;
  141. collections.todo()?;
  142. collections.tasks.iter().for_each(|t| println!("{t}"));
  143. println!("{}", collections.tasks.len());
  144. Ok(())
  145. }
  146. #[test]
  147. fn todo_agg() -> anyhow::Result<()> {
  148. init();
  149. let config = CollectionsConfig::default();
  150. info!("Runing todo with config: {:#?}", config);
  151. let collections = Collections::new(config)?;
  152. let agg_tasks = collections.todo_variants_agg()?;
  153. println!("{:#?}", agg_tasks);
  154. println!("{}", agg_tasks.len());
  155. Ok(())
  156. }
  157. #[test]
  158. fn run_agg() -> anyhow::Result<()> {
  159. init();
  160. let config = CollectionsConfig {
  161. id_black_list: vec!["MANCUSO".to_string(),"HAMROUNE".to_string()],
  162. ..Default::default()
  163. };
  164. info!("Runing todo with config: {:#?}", config);
  165. let mut collections = Collections::new(config)?;
  166. collections.tasks = collections.todo_variants_agg()?;
  167. collections.run()?;
  168. Ok(())
  169. }
  170. // export RUST_LOG="debug"
  171. #[test]
  172. fn run_t() -> anyhow::Result<()> {
  173. init();
  174. // let config = CollectionsConfig::default();
  175. let config = CollectionsConfig { pod_dir: "/data/run_data".to_string(), ..Default::default() };
  176. run_tasks(config)
  177. }
  178. // #[test_log::test]
  179. // fn bcftools_pass() {
  180. // let config = BcftoolsConfig::default();
  181. // let id = "RICCO";
  182. // let time = "diag";
  183. // let caller = "DeepVariant";
  184. //
  185. // Config::default();
  186. //
  187. // // let (i, o) =
  188. // // let i = format!("/data/longreads_basic_pipe/{id}/{time}/nanomonsv/{id}_diag.nanomonsv.result.vcf");
  189. // // let o = format!("/data/longreads_basic_pipe/{id}/{time}/nanomonsv/{id}_diag_nanomonsv_PASSED.vcf.gz");
  190. // bcftools_keep_pass(&i, &o, config).unwrap();
  191. // }
  192. #[test]
  193. fn bam_ok() -> anyhow::Result<()> {
  194. init();
  195. let collections = Collections::new(
  196. CollectionsConfig::default()
  197. )?;
  198. let mut res: Vec<_> = collections.bam.by_id_completed(15.0, 10.0).iter().map(|b| {
  199. (b.id.to_string(), b.time_point.to_string(), b.path.to_str().unwrap().to_string())
  200. }).collect();
  201. res.sort_by_key(|b| b.1.clone());
  202. res.sort_by_key(|b| b.0.clone());
  203. res.iter().for_each(|(id, tp, path)| println!("{id}\t{tp}\t{path}"));
  204. Ok(())
  205. }
  206. #[test]
  207. fn todo_assembler() -> anyhow::Result<()> {
  208. init();
  209. let collections = Collections::new(
  210. CollectionsConfig::default()
  211. )?;
  212. collections.todo_assembler()?;
  213. Ok(())
  214. }
  215. #[test]
  216. fn sv_pon() -> anyhow::Result<()> {
  217. init();
  218. nanomonsv_create_pon(&Config::default(), "/data/ref/hs1/nanomonsv_pon.vcf.gz")
  219. }
  220. #[test]
  221. fn todo_mod() -> anyhow::Result<()> {
  222. init();
  223. let collections = Collections::new(
  224. CollectionsConfig::default()
  225. )?;
  226. collections.todo_mod_pileup();
  227. Ok(())
  228. }
  229. #[test]
  230. fn todo_deepv() -> anyhow::Result<()> {
  231. init();
  232. let collections = Collections::new(
  233. CollectionsConfig::default()
  234. )?;
  235. let tasks = collections.todo_deepvariants();
  236. tasks.iter().for_each(|t| info!("{t}"));
  237. info!("n tasks {}", tasks.len());
  238. Ok(())
  239. }
  240. #[test]
  241. fn todo_clairs() -> anyhow::Result<()> {
  242. init();
  243. let collections = Collections::new(
  244. CollectionsConfig::default()
  245. )?;
  246. collections.todo_clairs().iter().for_each(|t| info!("{t}"));
  247. Ok(())
  248. }
  249. #[test]
  250. fn run_assemblers() -> anyhow::Result<()> {
  251. Assembler::new("CAMEL".to_string(), "diag".to_string(), AssemblerConfig::default()).run()
  252. }
  253. // #[test]
  254. // fn run_dmr_par() -> anyhow::Result<()> {
  255. // init();
  256. // let collections = Collections::new(
  257. // CollectionsConfig::default()
  258. // )?;
  259. // let tasks = collections.todo_dmr_c_diag_mrd();
  260. // tasks.iter().for_each(|t| info!("{t}"));
  261. // let len = tasks.len();
  262. // // let pool = ThreadPoolBuilder::new().num_threads(10).build().unwrap();
  263. // // pool.install(|| {
  264. // // tasks.par_iter().enumerate().for_each(|(i, t)| {
  265. // // let config = ModkitConfig {threads: 2, ..Default::default() };
  266. // // if let collection::CollectionsTasks::DMRCDiagMrd { id, .. } = t { let _ = dmr_c_mrd_diag(id, &config); }
  267. // // println!("⚡ {i}/{len}");
  268. // // });
  269. // // });
  270. // Ok(())
  271. // }
  272. #[test]
  273. fn run_mod_par() -> anyhow::Result<()> {
  274. init();
  275. let collections = Collections::new(
  276. CollectionsConfig::default()
  277. )?;
  278. let tasks = collections.todo_mod_pileup();
  279. let len = tasks.len();
  280. tasks.par_iter().enumerate().for_each(|(i, t)| {
  281. let config = ModkitConfig {threads: 2, ..Default::default() };
  282. if let collection::CollectionsTasks::ModPileup { bam, .. } = t { let _ = bed_methyl(bam.to_owned(), &config); }
  283. println!("⚡ {i}/{len}");
  284. });
  285. Ok(())
  286. }
  287. #[test]
  288. fn run_severus() -> anyhow::Result<()> {
  289. init();
  290. Severus::initialize("CAMEL", Config::default())?.run()
  291. }
  292. #[test]
  293. fn run_severus_solo() -> anyhow::Result<()> {
  294. init();
  295. SeverusSolo::initialize("CAMEL","diag", Config::default())?.run()
  296. }
  297. #[test]
  298. fn run_savana() -> anyhow::Result<()> {
  299. init();
  300. let collections = Collections::new(
  301. CollectionsConfig::default()
  302. )?;
  303. for bam in collections.bam.by_id_completed(15.0, 10.0).iter() {
  304. let id = &bam.id;
  305. match ClairS::initialize(id, Config::default())?.run() {
  306. Ok(_) => match Savana::initialize(id, Config::default())?.run() {
  307. Ok(_) => (),
  308. Err(e) => error!("{e}"),
  309. },
  310. Err(e) => error!("{e}"),
  311. }
  312. ;
  313. }
  314. Ok(())
  315. }
  316. #[test]
  317. fn check_versions() -> anyhow::Result<()> {
  318. init();
  319. let config = Config::default();
  320. let v = Savana::version(&config)?;
  321. info!("Savanna version {v}");
  322. let v = Severus::version(&config)?;
  323. info!("Severus version {v}");
  324. Ok(())
  325. }
  326. #[test]
  327. fn run_multi_deepvariant() -> anyhow::Result<()> {
  328. init();
  329. let mut collections = Collections::new(
  330. CollectionsConfig::default()
  331. )?;
  332. collections.run_deepvariant()
  333. }
  334. #[test]
  335. fn run_deepvariant() -> anyhow::Result<()> {
  336. init();
  337. DeepVariant::initialize("HAMROUNE", "diag", Config::default())?.run()
  338. }
  339. #[test]
  340. fn run_clairs() -> anyhow::Result<()> {
  341. init();
  342. ClairS::initialize("ADJAGBA", Config::default())?.run()
  343. }
  344. #[test]
  345. fn run_longphase() -> anyhow::Result<()> {
  346. init();
  347. let id = "BECERRA";
  348. let diag_bam = format!("/data/longreads_basic_pipe/{id}/diag/{id}_diag_hs1.bam");
  349. let vcf = format!("/data/longreads_basic_pipe/{id}/diag/ClairS/clair3_normal_tumoral_germline_output.vcf.gz");
  350. let mrd_bam = format!("/data/longreads_basic_pipe/{id}/mrd/{id}_mrd_hs1.bam");
  351. LongphaseHap::new(id, &diag_bam, &vcf, LongphaseConfig::default()).run()?;
  352. LongphaseHap::new(id, &mrd_bam, &vcf, LongphaseConfig::default()).run()
  353. }
  354. #[test]
  355. fn run_longphase_modcall() -> anyhow::Result<()> {
  356. init();
  357. let id = "ADJAGBA";
  358. let time = "diag";
  359. LongphaseModcallSolo::initialize(id, time, Config::default())?.run()
  360. }
  361. #[test]
  362. fn run_longphase_phase() -> anyhow::Result<()> {
  363. init();
  364. let id = "ADJAGBA";
  365. LongphasePhase::initialize(id, Config::default())?.run()
  366. }
  367. #[test]
  368. fn variant_parse() -> anyhow::Result<()> {
  369. let row = "chr1\t1366\t.\tC\tCCCT\t8.2\tPASS\t.\tGT:GQ:DP:AD:VAF:PL\t1/1:4:6:1,4:0.666667:6,4,0";
  370. let variant: VcfVariant = row.parse()?;
  371. let var_string = variant.into_vcf_row();
  372. assert_eq!(row, &var_string);
  373. let row = "chr1\t1366\t.\tC\tCCCT\t8.2\tPASS\t.";
  374. let variant: VcfVariant = row.parse()?;
  375. let var_string = variant.into_vcf_row();
  376. assert_eq!(row, &var_string);
  377. let row = "chr1\t2628434\t.\tC\tT\t17.973\tPASS\tH;FAU=0;FCU=7;FGU=0;FTU=7;RAU=0;RCU=2;RGU=0;RTU=2\tGT:GQ:DP:AF:AD:NAF:NDP:NAD:AU:CU:GU:TU:NAU:NCU:NGU:NTU\t0/1:17:18:0.5:0,9:0:11:0,0:0:9:0:9:0:11:0:0";
  378. let variant: VcfVariant = row.parse()?;
  379. let var_string = variant.into_vcf_row();
  380. assert_eq!(row, &var_string);
  381. let row = "chr1\t52232\t.\tC\tCT\t18\t.\t.\tGT:GQ:DP:AD:AF\t1/.:1:24:3,5:0.208333";
  382. let variant: VcfVariant = row.parse()?;
  383. let var_string = variant.into_vcf_row();
  384. assert_eq!(row, &var_string);
  385. let row = "chr1\t52232\t.\tC\tCT\t18\t.\t.\tGT:GQ:DP:AD:AF\t1/1:1:24:3,5:0.208333";
  386. let variant_b: VcfVariant = row.parse()?;
  387. assert_eq!(variant, variant_b);
  388. let row = "chr1\t475157\t.\tA\tG\t12.301\tPASS\tH;FAU=2;FCU=0;FGU=2;FTU=0;RAU=3;RCU=0;RGU=3;RTU=0\tGT:GQ:DP:AF:AD:NAF:NDP:NAD:AU:CU:GU:TU:NAU:NCU:NGU:NTU\t0/1:12:10:0.5:0,5:0.0769:13:0,1:5:0:5:0:12:0:1:0";
  389. let variant: VcfVariant = row.parse()?;
  390. let var_string = variant.into_vcf_row();
  391. assert_eq!(row, &var_string);
  392. let row = "chr1\t161417408\tr_10_0\tT\t[chr1:161417447[TTGGCAGGTTCC\t.\tPASS\tSVTYPE=BND;MATEID=r_10_1;SVINSLEN=11;SVINSSEQ=TTGGCAGGTTC\tTR:VR\t22:3\t12:0";
  393. let variant: VcfVariant = row.parse()?;
  394. println!("{variant:#?}");
  395. Ok(())
  396. }
  397. #[test]
  398. fn variant_load_deepvariant() -> anyhow::Result<()> {
  399. init();
  400. let id = "ADJAGBA";
  401. let time = "diag";
  402. let dv = DeepVariant::initialize(id, time, Config::default())?;
  403. let annotations = Annotations::default();
  404. let variant_collection = dv.variants(&annotations)?;
  405. println!("Deepvariant for {id} {time}: variants {} {}", variant_collection.variants.len(), variant_collection.vcf.n_variants);
  406. Ok(())
  407. }
  408. #[test]
  409. fn variant_load_clairs() -> anyhow::Result<()> {
  410. init();
  411. let id = "ADJAGBA";
  412. let clairs = ClairS::initialize(id, Config::default())?;
  413. let annotations = Annotations::default();
  414. let variant_collection = clairs.variants(&annotations)?;
  415. println!("ClairS for {id}: variants {} {}", variant_collection.variants.len(), variant_collection.vcf.n_variants);
  416. Ok(())
  417. }
  418. #[test]
  419. fn variant_load_nanomonsv() -> anyhow::Result<()> {
  420. init();
  421. let id = "ADJAGBA";
  422. let nanomonsv = NanomonSV::initialize(id, Config::default())?;
  423. let annotations = Annotations::default();
  424. let variant_collection = nanomonsv.variants(&annotations)?;
  425. println!("NanomonSV for {id}: variants {} {}", variant_collection.variants.len(), variant_collection.vcf.n_variants);
  426. println!("{:?}", variant_collection.variants.first());
  427. Ok(())
  428. }
  429. #[test]
  430. fn variant_load_clairs_germline() -> anyhow::Result<()> {
  431. init();
  432. let id = "ADJAGBA";
  433. let clairs = ClairS::initialize(id, Config::default())?;
  434. let annotations = Annotations::default();
  435. let germline_variant_collection = clairs.germline(&annotations)?;
  436. println!("ClairS for {id}: variants {} {}", germline_variant_collection.variants.len(), germline_variant_collection.vcf.n_variants);
  437. Ok(())
  438. }
  439. #[test]
  440. fn pipe_somatic() -> anyhow::Result<()> {
  441. init();
  442. let id = "ADJAGBA";
  443. Somatic::initialize(id, Config::default())?.run()
  444. }
  445. #[test]
  446. fn overlaps() {
  447. init();
  448. let positions = vec![
  449. &GenomePosition { contig: 1, position: 100 },
  450. &GenomePosition { contig: 1, position: 150 },
  451. &GenomePosition { contig: 1, position: 200 },
  452. &GenomePosition { contig: 2, position: 150 },
  453. ];
  454. let ranges = vec![
  455. &GenomeRange { contig: 1, range: 50..150 },
  456. &GenomeRange { contig: 2, range: 100..200 },
  457. ];
  458. let parallel_overlapping_indices = overlaps_par(&positions, &ranges);
  459. assert_eq!(parallel_overlapping_indices, vec![0, 3])
  460. }
  461. #[test]
  462. fn bed_read() -> anyhow::Result<()> {
  463. init();
  464. let path = &Config::default().mask_bed("ADJAGBA");
  465. let r = read_bed(path)?;
  466. println!("{}", r.len());
  467. Ok(())
  468. }
  469. #[test]
  470. fn bases_at() -> anyhow::Result<()> {
  471. init();
  472. let id = "ADJAGBA";
  473. let c = Config::default();
  474. let chr = "chr3";
  475. let position = 62416039; // 1-based
  476. let mut bam = rust_htslib::bam::IndexedReader::from_path(c.solo_bam(id, "diag"))?;
  477. let p = nt_pileup(&mut bam, chr, position - 1, false)?.iter().map(|e| String::from_utf8(vec![*e]).unwrap()).collect::<Vec<_>>();
  478. let mut counts = HashMap::new();
  479. for item in p.iter() {
  480. *counts.entry(item.as_str()).or_insert(0) += 1;
  481. }
  482. for (key, value) in &counts {
  483. println!("{}: {}", key, value);
  484. }
  485. assert_eq!(8, *counts.get("C").unwrap());
  486. assert_eq!(13, *counts.get("G").unwrap());
  487. assert_eq!(6, *counts.get("D").unwrap());
  488. let chr = "chr1";
  489. let position = 3220; // 1-based
  490. let mut bam = rust_htslib::bam::IndexedReader::from_path(c.solo_bam(id, "mrd"))?;
  491. let p = counts_at(&mut bam, chr, position - 1)?;
  492. println!("{p:#?}");
  493. Ok(())
  494. }
  495. #[test]
  496. fn seq_at() -> anyhow::Result<()> {
  497. init();
  498. let c = Config::default();
  499. let chr = "chr3";
  500. let position = 716766;
  501. let mut fasta_reader = noodles_fasta::indexed_reader::Builder::default().build_from_path(c.reference)?;
  502. let r = io::fasta::sequence_at(&mut fasta_reader, chr, position, 10)?;
  503. println!("{r} ({} {:.2})", r.len(), estimate_shannon_entropy(r.as_str()));
  504. Ok(())
  505. }
  506. #[test]
  507. fn ins_at() -> anyhow::Result<()> {
  508. init();
  509. let id = "ADJAGBA";
  510. let c = Config::default();
  511. let chr = "chr1";
  512. let position = 52232; // 1-based like in vcf
  513. let mut bam = rust_htslib::bam::IndexedReader::from_path(c.solo_bam(id, "mrd"))?;
  514. // let p = ins_pileup(&mut bam, chr, position - 1, true)?.iter().map(|e| String::from_utf8(vec![*e]).unwrap()).collect::<Vec<_>>();
  515. let counts = counts_ins_at(&mut bam, chr, position -1)?;
  516. for (key, value) in &counts {
  517. println!("{}: {}", key, value);
  518. }
  519. Ok(())
  520. }
  521. #[test]
  522. fn vep_line() -> anyhow::Result<()> {
  523. init();
  524. let line = "chr2_1922358_-/T\tchr2:1922357-1922358\tT\tMYT1L\tNM_001303052.2\tTranscript\tintron_variant\t-\t-\t-\t-\t-\t-\tIMPACT=MODIFIER;STRAND=-1;SYMBOL=MYT1L;SOURCE=chm13v2.0_RefSeq_Liftoff_v5.1_sorted.gff3.gz;HGVSc=NM_001303052.2:c.1619-613dup";
  525. let vep_line: VepLine = line.parse()?;
  526. println!("{vep_line:#?}");
  527. let vep: VEP = VEP::try_from(&vep_line)?;
  528. println!("{vep:#?}");
  529. Ok(())
  530. }
  531. #[test]
  532. fn savana_cn() -> anyhow::Result<()> {
  533. init();
  534. let id = "CAMARA";
  535. // let s = SavanaCopyNumber::load_id(id, Config::default())?;
  536. let s = SavanaReadCounts::load_id(id, Config::default())?;
  537. println!("tumoral reads: {}", s.n_tumoral_reads());
  538. println!("normal reads: {}", s.n_normal_reads());
  539. println!("tumoral:\n{:#?}", s.norm_chr_counts());
  540. Ok(())
  541. }
  542. #[test]
  543. fn coverage() -> anyhow::Result<()> {
  544. init();
  545. let id = "CAMARA";
  546. let time = "diag";
  547. WGSBamStats::new(id, time, Config::default())?.print();
  548. Ok(())
  549. }
  550. #[test]
  551. fn load_bam() -> anyhow::Result<()> {
  552. init();
  553. let id = "ADJAGBA";
  554. let time = "diag";
  555. let bam_path = Config::default().solo_bam(id, time);
  556. WGSBam::new(Path::new(&bam_path).to_path_buf())?;
  557. Ok(())
  558. }
  559. #[test]
  560. fn tar() -> anyhow::Result<()> {
  561. init();
  562. scan_archive("/data/lto/20241030_CUNVI-DG-N20_SEBZA-DG-N21.tar")?;
  563. Ok(())
  564. }
  565. #[test]
  566. fn run_somatic() -> anyhow::Result<()> {
  567. init();
  568. let collections = Collections::new(
  569. CollectionsConfig::default()
  570. )?;
  571. let bams = collections.bam.by_id_completed(15.0, 10.0);
  572. let n = bams.len();
  573. warn!("{n} cases");
  574. for (i, bam) in bams.iter().enumerate() {
  575. let id = &bam.id;
  576. warn!("{i}/{n} {id}");
  577. if id == "BANGA" {
  578. continue;
  579. }
  580. match Somatic::initialize(id, Config::default())?.run() {
  581. Ok(_) => (),
  582. Err(e) => error!("{id} {e}"),
  583. };
  584. }
  585. Ok(())
  586. }
  587. #[test]
  588. fn run_somatic_case() -> anyhow::Result<()> {
  589. init();
  590. let id = "ADJAGBA";
  591. let mut config = Config::default();
  592. config.somatic_pipe_force = true;
  593. match Somatic::initialize(id, config)?.run() {
  594. Ok(_) => (),
  595. Err(e) => error!("{id} {e}"),
  596. };
  597. Ok(())
  598. }
  599. #[test]
  600. fn load_variants() -> anyhow::Result<()> {
  601. init();
  602. let id = "ADJAGBA";
  603. let config = Config::default();
  604. let path = format!("{}/{id}/diag/somatic_variants.json.gz", config.result_dir);
  605. let variants = variant_collection::Variants::load_from_json(&path)?;
  606. println!("n variants {}", variants.data.len());
  607. let n_vep: usize = variants.data.iter().map(|v| v.vep().len()).sum();
  608. println!("VEP: {n_vep}");
  609. Ok(())
  610. }
  611. #[test]
  612. fn load_fc() -> anyhow::Result<()> {
  613. init();
  614. // FlowCells::load_archive_from_scan("/data/lto", "/data/archives.json")?;
  615. let r = FlowCells::load("/data/run_data", "/data/inputs_ids.json", "/data/archives.json")?;
  616. println!("{r:#?}");
  617. Ok(())
  618. }
  619. #[test]
  620. fn alt_cat() -> anyhow::Result<()> {
  621. let id = "ADJAGBA";
  622. let config = Config::default();
  623. let path = format!("{}/{id}/diag/somatic_variants.json.gz", config.result_dir);
  624. let variants = variant_collection::Variants::load_from_json(&path)?;
  625. println!("n variants {}", variants.data.len());
  626. variants.data.iter()
  627. .filter(|v| v.alteration_category().contains(&AlterationCategory::TRL))
  628. .for_each(|v| {
  629. println!("{:?} {}",
  630. v.vcf_variants.iter().map(|v| v.bnd_desc()).collect::<Vec<_>>(),
  631. v.annotations.iter().filter(|a| matches!(a, Annotation::Callers(..))).map(|a| a.to_string()).collect::<Vec<String>>().join(";")
  632. )
  633. });
  634. Ok(())
  635. }
  636. }