lib.rs 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. use std::sync::{Arc, Mutex};
  2. pub mod commands;
  3. pub mod config;
  4. pub mod modkit;
  5. pub mod callers;
  6. pub mod runners;
  7. pub mod collection;
  8. pub mod functions;
  9. pub mod helpers;
  10. pub mod variant;
  11. pub mod io;
  12. // pub mod vcf_reader;
  13. #[macro_use]
  14. extern crate lazy_static;
  15. // Define DOCKER_ID lock for handling Docker kill when ctrlc is pressed
  16. lazy_static! {
  17. static ref DOCKER_ID: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));
  18. }
  19. #[cfg(test)]
  20. mod tests {
  21. use std::{fs, path::Path};
  22. use callers::{nanomonsv::nanomonsv_create_pon, savana::Savana, severus::{Severus, SeverusSolo}};
  23. use collection::{Initialize, InitializeSolo, Version};
  24. use commands::{longphase::{LongphaseConfig, LongphaseHap, LongphaseModcallSolo, LongphasePhase}, modkit::{bed_methyl, ModkitConfig}};
  25. use functions::assembler::{Assembler, AssemblerConfig};
  26. use log::info;
  27. // use pandora_lib_assembler::assembler::AssembleConfig;
  28. use rayon::prelude::*;
  29. use runners::Run;
  30. use variant::variant::{Variant, Variants};
  31. use self::{callers::deep_variant::DeepVariantConfig, collection::pod5::{FlowCellCase, Pod5Collection}, commands::dorado, config::Config};
  32. use super::*;
  33. use crate::{callers::{clairs::{ClairS, ClairSConfig}, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVConfig, NanomonSVSolo}}, collection::{bam::{self, BamType}, run_tasks, variants::VariantsCollection, vcf::VcfCollection, Collections, CollectionsConfig}, commands::{bcftools::{bcftools_keep_pass, BcftoolsConfig}, dorado::Dorado}};
  34. // export RUST_LOG="debug"
  35. fn init() {
  36. let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
  37. .is_test(true)
  38. .try_init();
  39. }
  40. #[test]
  41. fn it_works() {
  42. let bam_path = "/data/longreads_basic_pipe/PARACHINI/diag/PARACHINI_diag_hs1.bam";
  43. modkit::modkit(bam_path);
  44. }
  45. #[test]
  46. fn run_dorado() -> anyhow::Result<()> {
  47. let case = FlowCellCase {
  48. id: "CONSIGNY".to_string(), time_point: "mrd".to_string(), barcode: "07".to_string(), pod_dir: "/data/run_data/20240326-CL/CONSIGNY-MRD-NB07_RICCO-DIAG-NB08/20240326_1355_1E_PAU78333_bc25da25/pod5_pass/barcode07".into() };
  49. dorado::Dorado::init(case, Config::default())?.run_pipe()
  50. }
  51. #[test]
  52. fn pod5() -> anyhow::Result<()> {
  53. let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
  54. .build();
  55. let _ = Pod5Collection::new(
  56. "/data/run_data",
  57. "/data/flow_cells.tsv",
  58. "/data/longreads_basic_pipe",
  59. )?;
  60. // let runs = Runs::import_dir("/home/prom/store/banana-pool/run_data", "/data/flow_cells.tsv")?;
  61. Ok(())
  62. }
  63. #[test_log::test]
  64. fn bam() -> anyhow::Result<()> {
  65. let bam_collection = bam::load_bam_collection("/data/longreads_basic_pipe");
  66. bam_collection
  67. .bams
  68. .iter()
  69. .filter(|b| matches!(b.bam_type, BamType::Panel(_)))
  70. .for_each(|b| println!("{b:#?}"));
  71. let u = bam_collection.get("PARACHINI", "mrd");
  72. println!("{u:#?}");
  73. Ok(())
  74. }
  75. #[test_log::test]
  76. fn vcf() -> anyhow::Result<()> {
  77. let mut vcf_collection = VcfCollection::new("/data/longreads_basic_pipe");
  78. vcf_collection.sort_by_id();
  79. vcf_collection
  80. .vcfs
  81. .iter()
  82. .for_each(|v| v.println().unwrap());
  83. Ok(())
  84. }
  85. // pod5 view -I /data/run_data/20240903-CL/ARMEM-DG-N02_ASSJU-DG-N03/20240903_1428_1B_PAW47629_fc24c3cf/pod5/PAW47629_fc24c3cf_77b07847_0.pod5 | head -5000 | awk '{if(NR==1){print "target,"$0}else{print "subset_1.pod5,"$0}}' > /tmp/subset_ids.csv
  86. // pod5 subset /data/run_data/20240903-CL/ARMEM-DG-N02_ASSJU-DG-N03/20240903_1428_1B_PAW47629_fc24c3cf/pod5/PAW47629_fc24c3cf_77b07847_0.pod5 --csv /tmp/subset_ids.csv -o /data/test_suite/pod5/muxed/
  87. #[test_log::test]
  88. fn mux() -> anyhow::Result<()> {
  89. let result_dir = "/data/test_suite/results".to_string();
  90. let cases = vec![
  91. FlowCellCase { id: "test_02".to_string(), time_point: "diag".to_string(), barcode: "02".to_string(), pod_dir: "/data/test_suite/pod5/muxed".into() },
  92. FlowCellCase { id: "test_03".to_string(), time_point: "diag".to_string(), barcode: "03".to_string(), pod_dir: "/data/test_suite/pod5/muxed".into() },
  93. ];
  94. cases.iter().for_each(|c| {
  95. let dir = format!("{result_dir}/{}", c.id);
  96. if Path::new(&dir).exists() {
  97. fs::remove_dir_all(dir).unwrap();
  98. }
  99. });
  100. let config = Config { result_dir, ..Default::default() };
  101. Dorado::from_mux(cases, config)
  102. }
  103. #[test_log::test]
  104. fn clairs() -> anyhow::Result<()> {
  105. let config = ClairSConfig {
  106. result_dir: "/data/test".to_string(),
  107. ..ClairSConfig::default()
  108. };
  109. ClairS::new("test_a", "/data/test_data/subset.bam", "/data/test_data/subset_mrd.bam", config).run()
  110. }
  111. #[test_log::test]
  112. fn nanomonsv() -> anyhow::Result<()> {
  113. // let config = NanomonSVConfig {
  114. // result_dir: "/data/test".to_string(),
  115. // ..NanomonSVConfig::default()
  116. // };
  117. // NanomonSV::new("test_a", "/data/test_data/subset.bam", "/data/test_data/subset_mrd.bam", config).run()
  118. let bam = |id:&str, time_point: &str| format!("/data/longreads_basic_pipe/{id}/{time_point}/{id}_{time_point}_hs1.bam");
  119. let id = "HAMROUNE";
  120. NanomonSV::new(id, &bam(id, "diag"), &bam(id, "mrd"), NanomonSVConfig::default()).run()
  121. }
  122. #[test]
  123. fn nanomddonsv_solo() -> anyhow::Result<()> {
  124. init();
  125. let id = "BRETON";
  126. let time_point = "diag";
  127. NanomonSVSolo::new(id, &format!("/data/longreads_basic_pipe/{id}/{time_point}/{id}_{time_point}_hs1.bam"), time_point, NanomonSVConfig::default()).run()?;
  128. // let time_point = "mrd";
  129. // for id in ["MERY", "CAMARA", "FRANIATTE", "FERATI", "IQBAL", "COLLE", "JOLIVET", "BAFFREAU", "MANCUS", "BELARBI", "BENGUIRAT", "HENAUX", "MEDDAH"] {
  130. //
  131. // NanomonSVSolo::new(id, &format!("/data/longreads_basic_pipe/{id}/{time_point}/{id}_{time_point}_hs1.bam"), time_point, NanomonSVConfig::default()).run()?;
  132. // }
  133. Ok(())
  134. }
  135. // cargo test run -- --nocapture; ~/run_scripts/notify_finish.sh &
  136. #[test]
  137. fn todo_all() -> anyhow::Result<()> {
  138. init();
  139. // let config = CollectionsConfig::default();
  140. let config = CollectionsConfig { pod_dir: "/data/store".to_string(), ..Default::default() };
  141. info!("Runing todo with config: {:#?}", config);
  142. let mut collections = Collections::new(config)?;
  143. collections.todo()?;
  144. collections.tasks.iter().for_each(|t| println!("{t}"));
  145. println!("{}", collections.tasks.len());
  146. Ok(())
  147. }
  148. #[test]
  149. fn todo_agg() -> anyhow::Result<()> {
  150. init();
  151. let config = CollectionsConfig::default();
  152. info!("Runing todo with config: {:#?}", config);
  153. let collections = Collections::new(config)?;
  154. let agg_tasks = collections.todo_variants_agg()?;
  155. println!("{:#?}", agg_tasks);
  156. println!("{}", agg_tasks.len());
  157. Ok(())
  158. }
  159. #[test]
  160. fn run_agg() -> anyhow::Result<()> {
  161. init();
  162. let config = CollectionsConfig {
  163. id_black_list: vec!["MANCUSO".to_string(),"HAMROUNE".to_string()],
  164. ..Default::default()
  165. };
  166. info!("Runing todo with config: {:#?}", config);
  167. let mut collections = Collections::new(config)?;
  168. collections.tasks = collections.todo_variants_agg()?;
  169. collections.run()?;
  170. Ok(())
  171. }
  172. // export RUST_LOG="debug"
  173. #[test_log::test]
  174. fn run_t() -> anyhow::Result<()> {
  175. // let config = CollectionsConfig::default();
  176. let config = CollectionsConfig { pod_dir: "/data/store".to_string(), ..Default::default() };
  177. run_tasks(config)
  178. }
  179. #[test_log::test]
  180. fn somatic() -> anyhow::Result<()> {
  181. let variants_collection = VariantsCollection::new("/data/longreads_basic_pipe")?;
  182. variants_collection.data.iter().for_each(|v| println!("{}\t{}", v.id, v.path.display()));
  183. Ok(())
  184. }
  185. // #[test_log::test]
  186. // fn bcftools_pass() {
  187. // let config = BcftoolsConfig::default();
  188. // let id = "RICCO";
  189. // let time = "diag";
  190. // let caller = "DeepVariant";
  191. //
  192. // Config::default();
  193. //
  194. // // let (i, o) =
  195. // // let i = format!("/data/longreads_basic_pipe/{id}/{time}/nanomonsv/{id}_diag.nanomonsv.result.vcf");
  196. // // let o = format!("/data/longreads_basic_pipe/{id}/{time}/nanomonsv/{id}_diag_nanomonsv_PASSED.vcf.gz");
  197. // bcftools_keep_pass(&i, &o, config).unwrap();
  198. // }
  199. #[test_log::test]
  200. fn bam_ok() -> anyhow::Result<()> {
  201. let collections = Collections::new(
  202. CollectionsConfig::default()
  203. )?;
  204. let mut res: Vec<_> = collections.bam.by_id_completed(15.0, 10.0).iter().map(|b| {
  205. (b.id.to_string(), b.time_point.to_string(), b.path.to_str().unwrap().to_string())
  206. }).collect();
  207. res.sort_by_key(|b| b.1.clone());
  208. res.sort_by_key(|b| b.0.clone());
  209. res.iter().for_each(|(id, tp, path)| println!("{id}\t{tp}\t{path}"));
  210. Ok(())
  211. }
  212. #[test_log::test]
  213. fn todo_assembler() -> anyhow::Result<()> {
  214. let collections = Collections::new(
  215. CollectionsConfig::default()
  216. )?;
  217. collections.todo_assembler()?;
  218. Ok(())
  219. }
  220. #[test]
  221. fn sv_pon() -> anyhow::Result<()> {
  222. init();
  223. nanomonsv_create_pon(NanomonSVConfig::default(), "/data/ref/hs1/nanomonsv_pon.vcf.gz")
  224. }
  225. #[test]
  226. fn todo_mod() -> anyhow::Result<()> {
  227. init();
  228. let collections = Collections::new(
  229. CollectionsConfig::default()
  230. )?;
  231. collections.todo_mod_pileup();
  232. Ok(())
  233. }
  234. #[test]
  235. fn todo_deepv() -> anyhow::Result<()> {
  236. init();
  237. let collections = Collections::new(
  238. CollectionsConfig::default()
  239. )?;
  240. let tasks = collections.todo_deepvariants();
  241. tasks.iter().for_each(|t| info!("{t}"));
  242. info!("n tasks {}", tasks.len());
  243. Ok(())
  244. }
  245. #[test]
  246. fn todo_clairs() -> anyhow::Result<()> {
  247. init();
  248. let collections = Collections::new(
  249. CollectionsConfig::default()
  250. )?;
  251. collections.todo_clairs().iter().for_each(|t| info!("{t}"));
  252. Ok(())
  253. }
  254. #[test]
  255. fn run_assemblers() -> anyhow::Result<()> {
  256. Assembler::new("CAMEL".to_string(), "diag".to_string(), AssemblerConfig::default()).run()
  257. }
  258. // #[test]
  259. // fn run_dmr_par() -> anyhow::Result<()> {
  260. // init();
  261. // let collections = Collections::new(
  262. // CollectionsConfig::default()
  263. // )?;
  264. // let tasks = collections.todo_dmr_c_diag_mrd();
  265. // tasks.iter().for_each(|t| info!("{t}"));
  266. // let len = tasks.len();
  267. // // let pool = ThreadPoolBuilder::new().num_threads(10).build().unwrap();
  268. // // pool.install(|| {
  269. // // tasks.par_iter().enumerate().for_each(|(i, t)| {
  270. // // let config = ModkitConfig {threads: 2, ..Default::default() };
  271. // // if let collection::CollectionsTasks::DMRCDiagMrd { id, .. } = t { let _ = dmr_c_mrd_diag(id, &config); }
  272. // // println!("⚡ {i}/{len}");
  273. // // });
  274. // // });
  275. // Ok(())
  276. // }
  277. #[test]
  278. fn run_mod_par() -> anyhow::Result<()> {
  279. init();
  280. let collections = Collections::new(
  281. CollectionsConfig::default()
  282. )?;
  283. let tasks = collections.todo_mod_pileup();
  284. let len = tasks.len();
  285. tasks.par_iter().enumerate().for_each(|(i, t)| {
  286. let config = ModkitConfig {threads: 2, ..Default::default() };
  287. if let collection::CollectionsTasks::ModPileup { bam, .. } = t { let _ = bed_methyl(bam.to_owned(), &config); }
  288. println!("⚡ {i}/{len}");
  289. });
  290. Ok(())
  291. }
  292. #[test]
  293. fn run_severus() -> anyhow::Result<()> {
  294. init();
  295. Severus::initialize("CAMEL", Config::default())?.run()
  296. }
  297. #[test]
  298. fn run_severus_solo() -> anyhow::Result<()> {
  299. init();
  300. SeverusSolo::initialize("CAMEL","diag", Config::default())?.run()
  301. }
  302. #[test]
  303. fn run_savana() -> anyhow::Result<()> {
  304. init();
  305. Savana::initialize("LEVASSEUR", Config::default())?.run()
  306. }
  307. #[test]
  308. fn check_versions() -> anyhow::Result<()> {
  309. init();
  310. let config = Config::default();
  311. let v = Savana::version(&config)?;
  312. info!("Savanna version {v}");
  313. let v = Severus::version(&config)?;
  314. info!("Severus version {v}");
  315. Ok(())
  316. }
  317. #[test]
  318. fn run_multi_deepvariant() -> anyhow::Result<()> {
  319. init();
  320. let mut collections = Collections::new(
  321. CollectionsConfig::default()
  322. )?;
  323. collections.run_deepvariant()
  324. }
  325. #[test]
  326. fn run_clairs() -> anyhow::Result<()> {
  327. init();
  328. let id = "HAMROUNE";
  329. let diag_bam = format!("/data/longreads_basic_pipe/{id}/diag/{id}_diag_hs1.bam");
  330. let mrd_bam = format!("/data/longreads_basic_pipe/{id}/mrd/{id}_mrd_hs1.bam");
  331. ClairS::new(id, &diag_bam, &mrd_bam, ClairSConfig::default()).run()
  332. }
  333. #[test]
  334. fn run_deepvariant() -> anyhow::Result<()> {
  335. init();
  336. DeepVariant::initialize("HAMROUNE", "diag", Config::default())?.run()
  337. }
  338. #[test]
  339. fn run_longphase() -> anyhow::Result<()> {
  340. init();
  341. let id = "BECERRA";
  342. let diag_bam = format!("/data/longreads_basic_pipe/{id}/diag/{id}_diag_hs1.bam");
  343. let vcf = format!("/data/longreads_basic_pipe/{id}/diag/ClairS/clair3_normal_tumoral_germline_output.vcf.gz");
  344. let mrd_bam = format!("/data/longreads_basic_pipe/{id}/mrd/{id}_mrd_hs1.bam");
  345. LongphaseHap::new(id, &diag_bam, &vcf, LongphaseConfig::default()).run()?;
  346. LongphaseHap::new(id, &mrd_bam, &vcf, LongphaseConfig::default()).run()
  347. }
  348. #[test]
  349. fn run_longphase_modcall() -> anyhow::Result<()> {
  350. init();
  351. let id = "ADJAGBA";
  352. let time = "diag";
  353. LongphaseModcallSolo::initialize(id, time, Config::default())?.run()
  354. }
  355. #[test]
  356. fn run_longphase_phase() -> anyhow::Result<()> {
  357. init();
  358. let id = "ADJAGBA";
  359. LongphasePhase::initialize(id, Config::default())?.run()
  360. }
  361. #[test]
  362. fn variant_parse() -> anyhow::Result<()> {
  363. let row = "chr1\t1366\t.\tC\tCCCT\t8.2\tPASS\t.\tGT:GQ:DP:AD:VAF:PL\t1/1:4:6:1,4:0.666667:6,4,0";
  364. let variant: Variant = row.parse()?;
  365. let var_string = variant.into_vcf_row();
  366. assert_eq!(row, &var_string);
  367. let row = "chr1\t1366\t.\tC\tCCCT\t8.2\tPASS\t.";
  368. let variant: Variant = row.parse()?;
  369. let var_string = variant.into_vcf_row();
  370. assert_eq!(row, &var_string);
  371. Ok(())
  372. }
  373. #[test]
  374. fn variant_load_deepvariant() -> anyhow::Result<()> {
  375. init();
  376. let id = "ADJAGBA";
  377. let time = "diag";
  378. let mut dv = DeepVariant::initialize(id, time, Config::default())?;
  379. dv.run()?;
  380. let variants = dv.variants()?;
  381. println!("Deepvariant for {id} {time}: variants {}", variants.len());
  382. Ok(())
  383. }
  384. }