lib.rs 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695
  1. use std::sync::{Arc, Mutex};
  2. pub mod commands;
  3. pub mod config;
  4. pub mod modkit;
  5. pub mod callers;
  6. pub mod runners;
  7. pub mod collection;
  8. pub mod functions;
  9. pub mod helpers;
  10. pub mod variant;
  11. pub mod io;
  12. pub mod pipes;
  13. pub mod positions;
  14. pub mod annotation;
  15. pub mod cases;
  16. #[macro_use]
  17. extern crate lazy_static;
  18. // Define DOCKER_ID lock for handling Docker kill when ctrlc is pressed
  19. lazy_static! {
  20. static ref DOCKER_ID: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));
  21. }
  22. #[cfg(test)]
  23. mod tests {
  24. use std::{collections::HashMap, fs, path::Path};
  25. use annotation::{vep::{VepLine, VEP}, Annotations};
  26. use callers::{nanomonsv::nanomonsv_create_pon, savana::{Savana, SavanaReadCounts}, severus::{Severus, SeverusSolo}};
  27. use collection::{bam::{counts_at, counts_ins_at, nt_pileup, WGSBam, WGSBamStats}, pod5::{Pod5, Pod5Config}, Initialize, InitializeSolo, Version};
  28. use commands::{longphase::{LongphaseConfig, LongphaseHap, LongphaseModcallSolo, LongphasePhase}, modkit::{bed_methyl, ModkitConfig}};
  29. use functions::assembler::{Assembler, AssemblerConfig};
  30. use helpers::estimate_shannon_entropy;
  31. use io::bed::read_bed;
  32. use log::{error, info, warn};
  33. use pipes::somatic::Somatic;
  34. use positions::{overlaps_par, GenomePosition, GenomeRange};
  35. use rayon::prelude::*;
  36. use runners::Run;
  37. use variant::{variant::{Variants, VcfVariant}, variant_collection};
  38. use self::{collection::pod5::{FlowCellCase, Pod5Collection}, commands::dorado, config::Config};
  39. use super::*;
  40. use crate::{callers::{clairs::ClairS, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVSolo}}, collection::{bam, run_tasks, vcf::VcfCollection, Collections, CollectionsConfig}, commands::dorado::Dorado};
  41. // export RUST_LOG="debug"
  42. fn init() {
  43. let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
  44. .is_test(true)
  45. .try_init();
  46. }
  47. #[test]
  48. fn it_works() {
  49. let bam_path = "/data/longreads_basic_pipe/PARACHINI/diag/PARACHINI_diag_hs1.bam";
  50. modkit::modkit(bam_path);
  51. }
  52. #[test]
  53. fn run_dorado() -> anyhow::Result<()> {
  54. let case = FlowCellCase {
  55. id: "CONSIGNY".to_string(), time_point: "mrd".to_string(), barcode: "07".to_string(), pod_dir: "/data/run_data/20240326-CL/CONSIGNY-MRD-NB07_RICCO-DIAG-NB08/20240326_1355_1E_PAU78333_bc25da25/pod5_pass/barcode07".into() };
  56. dorado::Dorado::init(case, Config::default())?.run_pipe()
  57. }
  58. #[test]
  59. fn pod5() -> anyhow::Result<()> {
  60. let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
  61. .build();
  62. let _ = Pod5Collection::new(
  63. "/data/run_data",
  64. "/data/flow_cells.tsv",
  65. "/data/longreads_basic_pipe",
  66. )?;
  67. // let runs = Runs::import_dir("/home/prom/store/banana-pool/run_data", "/data/flow_cells.tsv")?;
  68. Ok(())
  69. }
  70. #[test]
  71. fn bam() -> anyhow::Result<()> {
  72. init();
  73. let bam_collection = bam::load_bam_collection("/data/longreads_basic_pipe");
  74. bam_collection
  75. .bams
  76. .iter()
  77. // .filter(|b| matches!(b.bam_type, BamType::Panel(_)))
  78. .for_each(|b| println!("{b:#?}"));
  79. let u = bam_collection.get("PARACHINI", "mrd");
  80. println!("{u:#?}");
  81. Ok(())
  82. }
  83. #[test]
  84. fn vcf() -> anyhow::Result<()> {
  85. init();
  86. let mut vcf_collection = VcfCollection::new("/data/longreads_basic_pipe");
  87. vcf_collection.sort_by_id();
  88. vcf_collection
  89. .vcfs
  90. .iter()
  91. .for_each(|v| v.println().unwrap());
  92. Ok(())
  93. }
  94. // pod5 view -I /data/run_data/20240903-CL/ARMEM-DG-N02_ASSJU-DG-N03/20240903_1428_1B_PAW47629_fc24c3cf/pod5/PAW47629_fc24c3cf_77b07847_0.pod5 | head -5000 | awk '{if(NR==1){print "target,"$0}else{print "subset_1.pod5,"$0}}' > /tmp/subset_ids.csv
  95. // pod5 subset /data/run_data/20240903-CL/ARMEM-DG-N02_ASSJU-DG-N03/20240903_1428_1B_PAW47629_fc24c3cf/pod5/PAW47629_fc24c3cf_77b07847_0.pod5 --csv /tmp/subset_ids.csv -o /data/test_suite/pod5/muxed/
  96. #[test]
  97. fn mux() -> anyhow::Result<()> {
  98. init();
  99. let result_dir = "/data/test_suite/results".to_string();
  100. let cases = vec![
  101. FlowCellCase { id: "test_02".to_string(), time_point: "diag".to_string(), barcode: "02".to_string(), pod_dir: "/data/test_suite/pod5/muxed".into() },
  102. FlowCellCase { id: "test_03".to_string(), time_point: "diag".to_string(), barcode: "03".to_string(), pod_dir: "/data/test_suite/pod5/muxed".into() },
  103. ];
  104. cases.iter().for_each(|c| {
  105. let dir = format!("{result_dir}/{}", c.id);
  106. if Path::new(&dir).exists() {
  107. fs::remove_dir_all(dir).unwrap();
  108. }
  109. });
  110. let config = Config { result_dir, ..Default::default() };
  111. Dorado::from_mux(cases, config)
  112. }
  113. // #[test_log::test]
  114. // fn clairs() -> anyhow::Result<()> {
  115. // let config = ClairSConfig {
  116. // result_dir: "/data/test".to_string(),
  117. // ..ClairSConfig::default()
  118. // };
  119. // ClairS::new("test_a", "/data/test_data/subset.bam", "/data/test_data/subset_mrd.bam", config).run()
  120. // }
  121. #[test]
  122. fn nanomonsv() -> anyhow::Result<()> {
  123. init();
  124. let id = "HAMROUNE";
  125. NanomonSV::initialize(id, Config::default())?.run()
  126. }
  127. #[test]
  128. fn nanomddonsv_solo() -> anyhow::Result<()> {
  129. init();
  130. NanomonSVSolo::initialize("BRETON", "diag", Config::default())?.run()
  131. }
  132. // cargo test run -- --nocapture; ~/run_scripts/notify_finish.sh &
  133. #[test]
  134. fn todo_all() -> anyhow::Result<()> {
  135. init();
  136. // let config = CollectionsConfig::default();
  137. let config = CollectionsConfig { pod_dir: "/data/run_data".to_string(), ..Default::default() };
  138. info!("Runing todo with config: {:#?}", config);
  139. let mut collections = Collections::new(config)?;
  140. collections.todo()?;
  141. collections.tasks.iter().for_each(|t| println!("{t}"));
  142. println!("{}", collections.tasks.len());
  143. Ok(())
  144. }
  145. #[test]
  146. fn todo_agg() -> anyhow::Result<()> {
  147. init();
  148. let config = CollectionsConfig::default();
  149. info!("Runing todo with config: {:#?}", config);
  150. let collections = Collections::new(config)?;
  151. let agg_tasks = collections.todo_variants_agg()?;
  152. println!("{:#?}", agg_tasks);
  153. println!("{}", agg_tasks.len());
  154. Ok(())
  155. }
  156. #[test]
  157. fn run_agg() -> anyhow::Result<()> {
  158. init();
  159. let config = CollectionsConfig {
  160. id_black_list: vec!["MANCUSO".to_string(),"HAMROUNE".to_string()],
  161. ..Default::default()
  162. };
  163. info!("Runing todo with config: {:#?}", config);
  164. let mut collections = Collections::new(config)?;
  165. collections.tasks = collections.todo_variants_agg()?;
  166. collections.run()?;
  167. Ok(())
  168. }
  169. // export RUST_LOG="debug"
  170. #[test]
  171. fn run_t() -> anyhow::Result<()> {
  172. init();
  173. // let config = CollectionsConfig::default();
  174. let config = CollectionsConfig { pod_dir: "/data/run_data".to_string(), ..Default::default() };
  175. run_tasks(config)
  176. }
  177. // #[test_log::test]
  178. // fn bcftools_pass() {
  179. // let config = BcftoolsConfig::default();
  180. // let id = "RICCO";
  181. // let time = "diag";
  182. // let caller = "DeepVariant";
  183. //
  184. // Config::default();
  185. //
  186. // // let (i, o) =
  187. // // let i = format!("/data/longreads_basic_pipe/{id}/{time}/nanomonsv/{id}_diag.nanomonsv.result.vcf");
  188. // // let o = format!("/data/longreads_basic_pipe/{id}/{time}/nanomonsv/{id}_diag_nanomonsv_PASSED.vcf.gz");
  189. // bcftools_keep_pass(&i, &o, config).unwrap();
  190. // }
  191. #[test]
  192. fn bam_ok() -> anyhow::Result<()> {
  193. init();
  194. let collections = Collections::new(
  195. CollectionsConfig::default()
  196. )?;
  197. let mut res: Vec<_> = collections.bam.by_id_completed(15.0, 10.0).iter().map(|b| {
  198. (b.id.to_string(), b.time_point.to_string(), b.path.to_str().unwrap().to_string())
  199. }).collect();
  200. res.sort_by_key(|b| b.1.clone());
  201. res.sort_by_key(|b| b.0.clone());
  202. res.iter().for_each(|(id, tp, path)| println!("{id}\t{tp}\t{path}"));
  203. Ok(())
  204. }
  205. #[test]
  206. fn todo_assembler() -> anyhow::Result<()> {
  207. init();
  208. let collections = Collections::new(
  209. CollectionsConfig::default()
  210. )?;
  211. collections.todo_assembler()?;
  212. Ok(())
  213. }
  214. #[test]
  215. fn sv_pon() -> anyhow::Result<()> {
  216. init();
  217. nanomonsv_create_pon(&Config::default(), "/data/ref/hs1/nanomonsv_pon.vcf.gz")
  218. }
  219. #[test]
  220. fn todo_mod() -> anyhow::Result<()> {
  221. init();
  222. let collections = Collections::new(
  223. CollectionsConfig::default()
  224. )?;
  225. collections.todo_mod_pileup();
  226. Ok(())
  227. }
  228. #[test]
  229. fn todo_deepv() -> anyhow::Result<()> {
  230. init();
  231. let collections = Collections::new(
  232. CollectionsConfig::default()
  233. )?;
  234. let tasks = collections.todo_deepvariants();
  235. tasks.iter().for_each(|t| info!("{t}"));
  236. info!("n tasks {}", tasks.len());
  237. Ok(())
  238. }
  239. #[test]
  240. fn todo_clairs() -> anyhow::Result<()> {
  241. init();
  242. let collections = Collections::new(
  243. CollectionsConfig::default()
  244. )?;
  245. collections.todo_clairs().iter().for_each(|t| info!("{t}"));
  246. Ok(())
  247. }
  248. #[test]
  249. fn run_assemblers() -> anyhow::Result<()> {
  250. Assembler::new("CAMEL".to_string(), "diag".to_string(), AssemblerConfig::default()).run()
  251. }
  252. // #[test]
  253. // fn run_dmr_par() -> anyhow::Result<()> {
  254. // init();
  255. // let collections = Collections::new(
  256. // CollectionsConfig::default()
  257. // )?;
  258. // let tasks = collections.todo_dmr_c_diag_mrd();
  259. // tasks.iter().for_each(|t| info!("{t}"));
  260. // let len = tasks.len();
  261. // // let pool = ThreadPoolBuilder::new().num_threads(10).build().unwrap();
  262. // // pool.install(|| {
  263. // // tasks.par_iter().enumerate().for_each(|(i, t)| {
  264. // // let config = ModkitConfig {threads: 2, ..Default::default() };
  265. // // if let collection::CollectionsTasks::DMRCDiagMrd { id, .. } = t { let _ = dmr_c_mrd_diag(id, &config); }
  266. // // println!("⚡ {i}/{len}");
  267. // // });
  268. // // });
  269. // Ok(())
  270. // }
  271. #[test]
  272. fn run_mod_par() -> anyhow::Result<()> {
  273. init();
  274. let collections = Collections::new(
  275. CollectionsConfig::default()
  276. )?;
  277. let tasks = collections.todo_mod_pileup();
  278. let len = tasks.len();
  279. tasks.par_iter().enumerate().for_each(|(i, t)| {
  280. let config = ModkitConfig {threads: 2, ..Default::default() };
  281. if let collection::CollectionsTasks::ModPileup { bam, .. } = t { let _ = bed_methyl(bam.to_owned(), &config); }
  282. println!("⚡ {i}/{len}");
  283. });
  284. Ok(())
  285. }
  286. #[test]
  287. fn run_severus() -> anyhow::Result<()> {
  288. init();
  289. Severus::initialize("CAMEL", Config::default())?.run()
  290. }
  291. #[test]
  292. fn run_severus_solo() -> anyhow::Result<()> {
  293. init();
  294. SeverusSolo::initialize("CAMEL","diag", Config::default())?.run()
  295. }
  296. #[test]
  297. fn run_savana() -> anyhow::Result<()> {
  298. init();
  299. let collections = Collections::new(
  300. CollectionsConfig::default()
  301. )?;
  302. for bam in collections.bam.by_id_completed(15.0, 10.0).iter() {
  303. let id = &bam.id;
  304. match ClairS::initialize(id, Config::default())?.run() {
  305. Ok(_) => match Savana::initialize(id, Config::default())?.run() {
  306. Ok(_) => (),
  307. Err(e) => error!("{e}"),
  308. },
  309. Err(e) => error!("{e}"),
  310. }
  311. ;
  312. }
  313. Ok(())
  314. }
  315. #[test]
  316. fn check_versions() -> anyhow::Result<()> {
  317. init();
  318. let config = Config::default();
  319. let v = Savana::version(&config)?;
  320. info!("Savanna version {v}");
  321. let v = Severus::version(&config)?;
  322. info!("Severus version {v}");
  323. Ok(())
  324. }
  325. #[test]
  326. fn run_multi_deepvariant() -> anyhow::Result<()> {
  327. init();
  328. let mut collections = Collections::new(
  329. CollectionsConfig::default()
  330. )?;
  331. collections.run_deepvariant()
  332. }
  333. #[test]
  334. fn run_deepvariant() -> anyhow::Result<()> {
  335. init();
  336. DeepVariant::initialize("HAMROUNE", "diag", Config::default())?.run()
  337. }
  338. #[test]
  339. fn run_clairs() -> anyhow::Result<()> {
  340. init();
  341. ClairS::initialize("ADJAGBA", Config::default())?.run()
  342. }
  343. #[test]
  344. fn run_longphase() -> anyhow::Result<()> {
  345. init();
  346. let id = "BECERRA";
  347. let diag_bam = format!("/data/longreads_basic_pipe/{id}/diag/{id}_diag_hs1.bam");
  348. let vcf = format!("/data/longreads_basic_pipe/{id}/diag/ClairS/clair3_normal_tumoral_germline_output.vcf.gz");
  349. let mrd_bam = format!("/data/longreads_basic_pipe/{id}/mrd/{id}_mrd_hs1.bam");
  350. LongphaseHap::new(id, &diag_bam, &vcf, LongphaseConfig::default()).run()?;
  351. LongphaseHap::new(id, &mrd_bam, &vcf, LongphaseConfig::default()).run()
  352. }
  353. #[test]
  354. fn run_longphase_modcall() -> anyhow::Result<()> {
  355. init();
  356. let id = "ADJAGBA";
  357. let time = "diag";
  358. LongphaseModcallSolo::initialize(id, time, Config::default())?.run()
  359. }
  360. #[test]
  361. fn run_longphase_phase() -> anyhow::Result<()> {
  362. init();
  363. let id = "ADJAGBA";
  364. LongphasePhase::initialize(id, Config::default())?.run()
  365. }
  366. #[test]
  367. fn variant_parse() -> anyhow::Result<()> {
  368. let row = "chr1\t1366\t.\tC\tCCCT\t8.2\tPASS\t.\tGT:GQ:DP:AD:VAF:PL\t1/1:4:6:1,4:0.666667:6,4,0";
  369. let variant: VcfVariant = row.parse()?;
  370. let var_string = variant.into_vcf_row();
  371. assert_eq!(row, &var_string);
  372. let row = "chr1\t1366\t.\tC\tCCCT\t8.2\tPASS\t.";
  373. let variant: VcfVariant = row.parse()?;
  374. let var_string = variant.into_vcf_row();
  375. assert_eq!(row, &var_string);
  376. let row = "chr1\t2628434\t.\tC\tT\t17.973\tPASS\tH;FAU=0;FCU=7;FGU=0;FTU=7;RAU=0;RCU=2;RGU=0;RTU=2\tGT:GQ:DP:AF:AD:NAF:NDP:NAD:AU:CU:GU:TU:NAU:NCU:NGU:NTU\t0/1:17:18:0.5:0,9:0:11:0,0:0:9:0:9:0:11:0:0";
  377. let variant: VcfVariant = row.parse()?;
  378. let var_string = variant.into_vcf_row();
  379. assert_eq!(row, &var_string);
  380. let row = "chr1\t52232\t.\tC\tCT\t18\t.\t.\tGT:GQ:DP:AD:AF\t1/.:1:24:3,5:0.208333";
  381. let variant: VcfVariant = row.parse()?;
  382. let var_string = variant.into_vcf_row();
  383. assert_eq!(row, &var_string);
  384. let row = "chr1\t52232\t.\tC\tCT\t18\t.\t.\tGT:GQ:DP:AD:AF\t1/1:1:24:3,5:0.208333";
  385. let variant_b: VcfVariant = row.parse()?;
  386. assert_eq!(variant, variant_b);
  387. let row = "chr1\t475157\t.\tA\tG\t12.301\tPASS\tH;FAU=2;FCU=0;FGU=2;FTU=0;RAU=3;RCU=0;RGU=3;RTU=0\tGT:GQ:DP:AF:AD:NAF:NDP:NAD:AU:CU:GU:TU:NAU:NCU:NGU:NTU\t0/1:12:10:0.5:0,5:0.0769:13:0,1:5:0:5:0:12:0:1:0";
  388. let variant: VcfVariant = row.parse()?;
  389. let var_string = variant.into_vcf_row();
  390. assert_eq!(row, &var_string);
  391. Ok(())
  392. }
  393. #[test]
  394. fn variant_load_deepvariant() -> anyhow::Result<()> {
  395. init();
  396. let id = "ADJAGBA";
  397. let time = "diag";
  398. let dv = DeepVariant::initialize(id, time, Config::default())?;
  399. let annotations = Annotations::default();
  400. let variant_collection = dv.variants(&annotations)?;
  401. println!("Deepvariant for {id} {time}: variants {} {}", variant_collection.variants.len(), variant_collection.vcf.n_variants);
  402. Ok(())
  403. }
  404. #[test]
  405. fn variant_load_clairs() -> anyhow::Result<()> {
  406. init();
  407. let id = "ADJAGBA";
  408. let clairs = ClairS::initialize(id, Config::default())?;
  409. let annotations = Annotations::default();
  410. let variant_collection = clairs.variants(&annotations)?;
  411. println!("ClairS for {id}: variants {} {}", variant_collection.variants.len(), variant_collection.vcf.n_variants);
  412. Ok(())
  413. }
  414. #[test]
  415. fn variant_load_clairs_germline() -> anyhow::Result<()> {
  416. init();
  417. let id = "ADJAGBA";
  418. let clairs = ClairS::initialize(id, Config::default())?;
  419. let annotations = Annotations::default();
  420. let germline_variant_collection = clairs.germline(&annotations)?;
  421. println!("ClairS for {id}: variants {} {}", germline_variant_collection.variants.len(), germline_variant_collection.vcf.n_variants);
  422. Ok(())
  423. }
  424. #[test]
  425. fn pipe_somatic() -> anyhow::Result<()> {
  426. init();
  427. let id = "ADJAGBA";
  428. Somatic::initialize(id, Config::default())?.run()
  429. }
  430. #[test]
  431. fn overlaps() {
  432. init();
  433. let positions = vec![
  434. &GenomePosition { contig: 1, position: 100 },
  435. &GenomePosition { contig: 1, position: 150 },
  436. &GenomePosition { contig: 1, position: 200 },
  437. &GenomePosition { contig: 2, position: 150 },
  438. ];
  439. let ranges = vec![
  440. &GenomeRange { contig: 1, range: 50..150 },
  441. &GenomeRange { contig: 2, range: 100..200 },
  442. ];
  443. let parallel_overlapping_indices = overlaps_par(&positions, &ranges);
  444. assert_eq!(parallel_overlapping_indices, vec![0, 3])
  445. }
  446. #[test]
  447. fn bed_read() -> anyhow::Result<()> {
  448. init();
  449. let path = &Config::default().mask_bed("ADJAGBA");
  450. let r = read_bed(path)?;
  451. println!("{}", r.len());
  452. Ok(())
  453. }
  454. #[test]
  455. fn bases_at() -> anyhow::Result<()> {
  456. init();
  457. let id = "ADJAGBA";
  458. let c = Config::default();
  459. let chr = "chr3";
  460. let position = 62416039; // 1-based
  461. let mut bam = rust_htslib::bam::IndexedReader::from_path(c.solo_bam(id, "diag"))?;
  462. let p = nt_pileup(&mut bam, chr, position - 1, false)?.iter().map(|e| String::from_utf8(vec![*e]).unwrap()).collect::<Vec<_>>();
  463. let mut counts = HashMap::new();
  464. for item in p.iter() {
  465. *counts.entry(item.as_str()).or_insert(0) += 1;
  466. }
  467. for (key, value) in &counts {
  468. println!("{}: {}", key, value);
  469. }
  470. assert_eq!(8, *counts.get("C").unwrap());
  471. assert_eq!(13, *counts.get("G").unwrap());
  472. assert_eq!(6, *counts.get("D").unwrap());
  473. let chr = "chr1";
  474. let position = 3220; // 1-based
  475. let mut bam = rust_htslib::bam::IndexedReader::from_path(c.solo_bam(id, "mrd"))?;
  476. let p = counts_at(&mut bam, chr, position - 1)?;
  477. println!("{p:#?}");
  478. Ok(())
  479. }
  480. #[test]
  481. fn seq_at() -> anyhow::Result<()> {
  482. init();
  483. let c = Config::default();
  484. let chr = "chr3";
  485. let position = 716766;
  486. let mut fasta_reader = noodles_fasta::indexed_reader::Builder::default().build_from_path(c.reference)?;
  487. let r = pipes::somatic::sequence_at(&mut fasta_reader, chr, position, 10)?;
  488. println!("{r} ({} {:.2})", r.len(), estimate_shannon_entropy(r.as_str()));
  489. Ok(())
  490. }
  491. #[test]
  492. fn ins_at() -> anyhow::Result<()> {
  493. init();
  494. let id = "ADJAGBA";
  495. let c = Config::default();
  496. let chr = "chr1";
  497. let position = 52232; // 1-based like in vcf
  498. let mut bam = rust_htslib::bam::IndexedReader::from_path(c.solo_bam(id, "mrd"))?;
  499. // let p = ins_pileup(&mut bam, chr, position - 1, true)?.iter().map(|e| String::from_utf8(vec![*e]).unwrap()).collect::<Vec<_>>();
  500. let counts = counts_ins_at(&mut bam, chr, position -1)?;
  501. for (key, value) in &counts {
  502. println!("{}: {}", key, value);
  503. }
  504. Ok(())
  505. }
  506. #[test]
  507. fn vep_line() -> anyhow::Result<()> {
  508. init();
  509. let line = "chr2_1922358_-/T\tchr2:1922357-1922358\tT\tMYT1L\tNM_001303052.2\tTranscript\tintron_variant\t-\t-\t-\t-\t-\t-\tIMPACT=MODIFIER;STRAND=-1;SYMBOL=MYT1L;SOURCE=chm13v2.0_RefSeq_Liftoff_v5.1_sorted.gff3.gz;HGVSc=NM_001303052.2:c.1619-613dup";
  510. let vep_line: VepLine = line.parse()?;
  511. println!("{vep_line:#?}");
  512. let vep: VEP = VEP::try_from(&vep_line)?;
  513. println!("{vep:#?}");
  514. Ok(())
  515. }
  516. #[test]
  517. fn savana_cn() -> anyhow::Result<()> {
  518. init();
  519. let id = "CAMARA";
  520. // let s = SavanaCopyNumber::load_id(id, Config::default())?;
  521. let s = SavanaReadCounts::load_id(id, Config::default())?;
  522. println!("tumoral reads: {}", s.n_tumoral_reads());
  523. println!("normal reads: {}", s.n_normal_reads());
  524. println!("tumoral:\n{:#?}", s.norm_chr_counts());
  525. Ok(())
  526. }
  527. #[test]
  528. fn coverage() -> anyhow::Result<()> {
  529. init();
  530. let id = "CAMARA";
  531. let time = "diag";
  532. WGSBamStats::new(id, time, Config::default())?.print();
  533. Ok(())
  534. }
  535. #[test]
  536. fn load_bam() -> anyhow::Result<()> {
  537. init();
  538. let id = "ADJAGBA";
  539. let time = "diag";
  540. let bam_path = Config::default().solo_bam(id, time);
  541. WGSBam::new(Path::new(&bam_path).to_path_buf())?;
  542. Ok(())
  543. }
  544. #[test]
  545. fn tar() {
  546. init();
  547. let mut ar = tar::Archive::new(fs::File::open("/data/lto/20241030_CUNVI-DG-N20_SEBZA-DG-N21.tar").unwrap());
  548. for file in ar.entries().unwrap() {
  549. let file = file.unwrap();
  550. let p = String::from_utf8(file.path_bytes().into_owned()).unwrap();
  551. if p.contains("/pod5/") {
  552. let u = Pod5::from_path(&file.path().unwrap().into_owned(), &Pod5Config::default());
  553. println!("{p}\n{u:#?}");
  554. }
  555. }
  556. }
  557. #[test]
  558. fn run_somatic() -> anyhow::Result<()> {
  559. init();
  560. let collections = Collections::new(
  561. CollectionsConfig::default()
  562. )?;
  563. let bams = collections.bam.by_id_completed(15.0, 10.0);
  564. let n = bams.len();
  565. warn!("{n} cases");
  566. for (i, bam) in bams.iter().enumerate() {
  567. let id = &bam.id;
  568. warn!("{i}/{n} {id}");
  569. if id == "BANGA" {
  570. continue;
  571. }
  572. match Somatic::initialize(id, Config::default())?.run() {
  573. Ok(_) => (),
  574. Err(e) => error!("{id} {e}"),
  575. };
  576. }
  577. Ok(())
  578. }
  579. #[test]
  580. fn run_somatic_case() -> anyhow::Result<()> {
  581. init();
  582. let id = "ADJAGBA";
  583. let mut config = Config::default();
  584. config.somatic_pipe_force = true;
  585. match Somatic::initialize(id, config)?.run() {
  586. Ok(_) => (),
  587. Err(e) => error!("{id} {e}"),
  588. };
  589. Ok(())
  590. }
  591. #[test]
  592. fn load_variants() -> anyhow::Result<()> {
  593. init();
  594. let id = "ADJAGBA";
  595. let config = Config::default();
  596. let path = format!("{}/{id}/diag//somatic_variants.json.gz", config.result_dir);
  597. let variants = variant_collection::Variants::load_from_json(&path)?;
  598. println!("n variants {}", variants.data.len());
  599. let n_vep: usize = variants.data.iter().map(|v| v.vep().len()).sum();
  600. println!("VEP: {n_vep}");
  601. Ok(())
  602. }
  603. }