lib.rs 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. use std::sync::{Arc, Mutex};
  2. pub mod commands;
  3. pub mod config;
  4. pub mod modkit;
  5. pub mod callers;
  6. pub mod runners;
  7. pub mod collection;
  8. pub mod functions;
  9. pub mod helpers;
  10. #[macro_use]
  11. extern crate lazy_static;
  12. // Define DOCKER_ID lock for handling Docker kill when ctrlc is pressed
  13. lazy_static! {
  14. static ref DOCKER_ID: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));
  15. }
  16. #[cfg(test)]
  17. mod tests {
  18. use std::{fs, path::Path};
  19. use callers::{nanomonsv::nanomonsv_create_pon, savana::Savana, severus::Severus};
  20. use collection::{InitSomatic, Version};
  21. use commands::{longphase::{LongphaseHap, LongphaseConfig}, modkit::{bed_methyl, ModkitConfig}};
  22. use functions::assembler::{Assembler, AssemblerConfig};
  23. use log::info;
  24. // use pandora_lib_assembler::assembler::AssembleConfig;
  25. use rayon::prelude::*;
  26. use runners::Run;
  27. use self::{callers::deep_variant::DeepVariantConfig, collection::pod5::{FlowCellCase, Pod5Collection}, commands::dorado, config::Config};
  28. use super::*;
  29. use crate::{callers::{clairs::{ClairS, ClairSConfig}, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVConfig, NanomonSVSolo}}, collection::{bam::{self, BamType}, run_tasks, variants::VariantsCollection, vcf::VcfCollection, Collections, CollectionsConfig}, commands::{bcftools::{bcftools_keep_pass, BcftoolsConfig}, dorado::Dorado}};
  30. // export RUST_LOG="debug"
  31. fn init() {
  32. let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
  33. .is_test(true)
  34. .try_init();
  35. }
  36. #[test]
  37. fn it_works() {
  38. let bam_path = "/data/longreads_basic_pipe/PARACHINI/diag/PARACHINI_diag_hs1.bam";
  39. modkit::modkit(bam_path);
  40. }
  41. #[test]
  42. fn run_dorado() -> anyhow::Result<()> {
  43. let case = FlowCellCase {
  44. id: "CONSIGNY".to_string(), time_point: "mrd".to_string(), barcode: "07".to_string(), pod_dir: "/data/run_data/20240326-CL/CONSIGNY-MRD-NB07_RICCO-DIAG-NB08/20240326_1355_1E_PAU78333_bc25da25/pod5_pass/barcode07".into() };
  45. dorado::Dorado::init(case, Config::default())?.run_pipe()
  46. }
  47. #[test]
  48. fn pod5() -> anyhow::Result<()> {
  49. let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
  50. .build();
  51. let _ = Pod5Collection::new(
  52. "/data/run_data",
  53. "/data/flow_cells.tsv",
  54. "/data/longreads_basic_pipe",
  55. )?;
  56. // let runs = Runs::import_dir("/home/prom/store/banana-pool/run_data", "/data/flow_cells.tsv")?;
  57. Ok(())
  58. }
  59. #[test_log::test]
  60. fn bam() -> anyhow::Result<()> {
  61. let bam_collection = bam::load_bam_collection("/data/longreads_basic_pipe");
  62. bam_collection
  63. .bams
  64. .iter()
  65. .filter(|b| matches!(b.bam_type, BamType::Panel(_)))
  66. .for_each(|b| println!("{b:#?}"));
  67. let u = bam_collection.get("PARACHINI", "mrd");
  68. println!("{u:#?}");
  69. Ok(())
  70. }
  71. #[test_log::test]
  72. fn vcf() -> anyhow::Result<()> {
  73. let mut vcf_collection = VcfCollection::new("/data/longreads_basic_pipe");
  74. vcf_collection.sort_by_id();
  75. vcf_collection
  76. .vcfs
  77. .iter()
  78. .for_each(|v| v.println().unwrap());
  79. Ok(())
  80. }
  81. // pod5 view -I /data/run_data/20240903-CL/ARMEM-DG-N02_ASSJU-DG-N03/20240903_1428_1B_PAW47629_fc24c3cf/pod5/PAW47629_fc24c3cf_77b07847_0.pod5 | head -5000 | awk '{if(NR==1){print "target,"$0}else{print "subset_1.pod5,"$0}}' > /tmp/subset_ids.csv
  82. // pod5 subset /data/run_data/20240903-CL/ARMEM-DG-N02_ASSJU-DG-N03/20240903_1428_1B_PAW47629_fc24c3cf/pod5/PAW47629_fc24c3cf_77b07847_0.pod5 --csv /tmp/subset_ids.csv -o /data/test_suite/pod5/muxed/
  83. #[test_log::test]
  84. fn mux() -> anyhow::Result<()> {
  85. let result_dir = "/data/test_suite/results".to_string();
  86. let cases = vec![
  87. FlowCellCase { id: "test_02".to_string(), time_point: "diag".to_string(), barcode: "02".to_string(), pod_dir: "/data/test_suite/pod5/muxed".into() },
  88. FlowCellCase { id: "test_03".to_string(), time_point: "diag".to_string(), barcode: "03".to_string(), pod_dir: "/data/test_suite/pod5/muxed".into() },
  89. ];
  90. cases.iter().for_each(|c| {
  91. let dir = format!("{result_dir}/{}", c.id);
  92. if Path::new(&dir).exists() {
  93. fs::remove_dir_all(dir).unwrap();
  94. }
  95. });
  96. let config = Config { result_dir, ..Default::default() };
  97. Dorado::from_mux(cases, config)
  98. }
  99. #[test_log::test]
  100. fn deep_variant() -> anyhow::Result<()> {
  101. // let config = DeepVariantConfig {
  102. // result_dir: "/data/test".to_string(),
  103. // ..DeepVariantConfig::default()
  104. // };
  105. // DeepVariant::new("test_a", "diag", "/data/test_data/subset.bam", config).run()
  106. let config = DeepVariantConfig {
  107. result_dir: "/data/test".to_string(),
  108. ..DeepVariantConfig::default()
  109. };
  110. DeepVariant::new("LEVASSEUR", "mrd", "/data/longreads_basic_pipe/LEVASSEUR/mrd/LEVASSEUR_mrd_hs1.bam", config).run()
  111. }
  112. #[test_log::test]
  113. fn clairs() -> anyhow::Result<()> {
  114. let config = ClairSConfig {
  115. result_dir: "/data/test".to_string(),
  116. ..ClairSConfig::default()
  117. };
  118. ClairS::new("test_a", "/data/test_data/subset.bam", "/data/test_data/subset_mrd.bam", config).run()
  119. }
  120. #[test_log::test]
  121. fn nanomonsv() -> anyhow::Result<()> {
  122. // let config = NanomonSVConfig {
  123. // result_dir: "/data/test".to_string(),
  124. // ..NanomonSVConfig::default()
  125. // };
  126. // NanomonSV::new("test_a", "/data/test_data/subset.bam", "/data/test_data/subset_mrd.bam", config).run()
  127. let bam = |id:&str, time_point: &str| format!("/data/longreads_basic_pipe/{id}/{time_point}/{id}_{time_point}_hs1.bam");
  128. NanomonSV::new("CAZIER", &bam("CAZIER", "diag"), &bam("CAZIER", "mrd"), NanomonSVConfig::default()).run()
  129. }
  130. #[test]
  131. fn nanomonsv_solo() -> anyhow::Result<()> {
  132. init();
  133. let id = "BRETON";
  134. let time_point = "diag";
  135. NanomonSVSolo::new(id, &format!("/data/longreads_basic_pipe/{id}/{time_point}/{id}_{time_point}_hs1.bam"), time_point, NanomonSVConfig::default()).run()?;
  136. // let time_point = "mrd";
  137. // for id in ["MERY", "CAMARA", "FRANIATTE", "FERATI", "IQBAL", "COLLE", "JOLIVET", "BAFFREAU", "MANCUS", "BELARBI", "BENGUIRAT", "HENAUX", "MEDDAH"] {
  138. //
  139. // NanomonSVSolo::new(id, &format!("/data/longreads_basic_pipe/{id}/{time_point}/{id}_{time_point}_hs1.bam"), time_point, NanomonSVConfig::default()).run()?;
  140. // }
  141. Ok(())
  142. }
  143. // cargo test run -- --nocapture; ~/run_scripts/notify_finish.sh &
  144. #[test]
  145. fn todo_all() -> anyhow::Result<()> {
  146. init();
  147. // let config = CollectionsConfig::default();
  148. let config = CollectionsConfig { pod_dir: "/data/store".to_string(), ..Default::default() };
  149. info!("Runing todo with config: {:#?}", config);
  150. let mut collections = Collections::new(config)?;
  151. collections.todo()?;
  152. collections.tasks.iter().for_each(|t| println!("{t}"));
  153. println!("{}", collections.tasks.len());
  154. Ok(())
  155. }
  156. #[test]
  157. fn todo_agg() -> anyhow::Result<()> {
  158. init();
  159. let config = CollectionsConfig::default();
  160. info!("Runing todo with config: {:#?}", config);
  161. let collections = Collections::new(config)?;
  162. let agg_tasks = collections.todo_variants_agg()?;
  163. println!("{:#?}", agg_tasks);
  164. println!("{}", agg_tasks.len());
  165. Ok(())
  166. }
  167. #[test]
  168. fn run_agg() -> anyhow::Result<()> {
  169. init();
  170. let config = CollectionsConfig {
  171. id_black_list: vec!["MANCUSO".to_string(),"HAMROUNE".to_string()],
  172. ..Default::default()
  173. };
  174. info!("Runing todo with config: {:#?}", config);
  175. let mut collections = Collections::new(config)?;
  176. collections.tasks = collections.todo_variants_agg()?;
  177. collections.run()?;
  178. Ok(())
  179. }
  180. // export RUST_LOG="debug"
  181. #[test_log::test]
  182. fn run_t() -> anyhow::Result<()> {
  183. // let config = CollectionsConfig::default();
  184. let config = CollectionsConfig { pod_dir: "/data/store".to_string(), ..Default::default() };
  185. run_tasks(config)
  186. }
  187. #[test_log::test]
  188. fn somatic() -> anyhow::Result<()> {
  189. let variants_collection = VariantsCollection::new("/data/longreads_basic_pipe")?;
  190. variants_collection.data.iter().for_each(|v| println!("{}\t{}", v.id, v.path.display()));
  191. Ok(())
  192. }
  193. #[test_log::test]
  194. fn bcftools_pass() {
  195. let config = BcftoolsConfig::default();
  196. let id = "RICCO";
  197. let i = format!("/data/longreads_basic_pipe/{id}/diag/nanomonsv/{id}_diag.nanomonsv.result.vcf");
  198. let o = format!("/data/longreads_basic_pipe/{id}/diag/nanomonsv/{id}_diag_nanomonsv_PASSED.vcf.gz");
  199. bcftools_keep_pass(&i, &o, config).unwrap();
  200. }
  201. #[test_log::test]
  202. fn bam_ok() -> anyhow::Result<()> {
  203. let collections = Collections::new(
  204. CollectionsConfig::default()
  205. )?;
  206. let mut res: Vec<_> = collections.bam.by_id_completed(15.0, 10.0).iter().map(|b| {
  207. (b.id.to_string(), b.time_point.to_string(), b.path.to_str().unwrap().to_string())
  208. }).collect();
  209. res.sort_by_key(|b| b.1.clone());
  210. res.sort_by_key(|b| b.0.clone());
  211. res.iter().for_each(|(id, tp, path)| println!("{id}\t{tp}\t{path}"));
  212. Ok(())
  213. }
  214. #[test_log::test]
  215. fn todo_assembler() -> anyhow::Result<()> {
  216. let collections = Collections::new(
  217. CollectionsConfig::default()
  218. )?;
  219. collections.todo_assembler()?;
  220. Ok(())
  221. }
  222. #[test]
  223. fn sv_pon() -> anyhow::Result<()> {
  224. init();
  225. nanomonsv_create_pon(NanomonSVConfig::default(), "/data/ref/hs1/nanomonsv_pon.vcf.gz")
  226. }
  227. #[test]
  228. fn todo_mod() -> anyhow::Result<()> {
  229. init();
  230. let collections = Collections::new(
  231. CollectionsConfig::default()
  232. )?;
  233. collections.todo_mod_pileup();
  234. Ok(())
  235. }
  236. #[test]
  237. fn todo_deepv() -> anyhow::Result<()> {
  238. init();
  239. let collections = Collections::new(
  240. CollectionsConfig::default()
  241. )?;
  242. let tasks = collections.todo_deepvariants();
  243. tasks.iter().for_each(|t| info!("{t}"));
  244. info!("n tasks {}", tasks.len());
  245. Ok(())
  246. }
  247. #[test]
  248. fn todo_clairs() -> anyhow::Result<()> {
  249. init();
  250. let collections = Collections::new(
  251. CollectionsConfig::default()
  252. )?;
  253. collections.todo_clairs().iter().for_each(|t| info!("{t}"));
  254. Ok(())
  255. }
  256. #[test]
  257. fn run_assemblers() -> anyhow::Result<()> {
  258. Assembler::new("CAMEL".to_string(), "diag".to_string(), AssemblerConfig::default()).run()
  259. }
  260. // #[test]
  261. // fn run_dmr_par() -> anyhow::Result<()> {
  262. // init();
  263. // let collections = Collections::new(
  264. // CollectionsConfig::default()
  265. // )?;
  266. // let tasks = collections.todo_dmr_c_diag_mrd();
  267. // tasks.iter().for_each(|t| info!("{t}"));
  268. // let len = tasks.len();
  269. // // let pool = ThreadPoolBuilder::new().num_threads(10).build().unwrap();
  270. // // pool.install(|| {
  271. // // tasks.par_iter().enumerate().for_each(|(i, t)| {
  272. // // let config = ModkitConfig {threads: 2, ..Default::default() };
  273. // // if let collection::CollectionsTasks::DMRCDiagMrd { id, .. } = t { let _ = dmr_c_mrd_diag(id, &config); }
  274. // // println!("⚡ {i}/{len}");
  275. // // });
  276. // // });
  277. // Ok(())
  278. // }
  279. #[test]
  280. fn run_mod_par() -> anyhow::Result<()> {
  281. init();
  282. let collections = Collections::new(
  283. CollectionsConfig::default()
  284. )?;
  285. let tasks = collections.todo_mod_pileup();
  286. let len = tasks.len();
  287. tasks.par_iter().enumerate().for_each(|(i, t)| {
  288. let config = ModkitConfig {threads: 2, ..Default::default() };
  289. if let collection::CollectionsTasks::ModPileup { bam, .. } = t { let _ = bed_methyl(bam.to_owned(), &config); }
  290. println!("⚡ {i}/{len}");
  291. });
  292. Ok(())
  293. }
  294. #[test]
  295. fn run_severus() -> anyhow::Result<()> {
  296. init();
  297. Severus::initialize("ACHITE", Config::default())?.run()
  298. }
  299. #[test]
  300. fn run_savana() -> anyhow::Result<()> {
  301. init();
  302. Savana::initialize("BECERRA", Config::default())?.run()
  303. }
  304. #[test]
  305. fn check_versions() -> anyhow::Result<()> {
  306. init();
  307. let config = Config::default();
  308. let v = Savana::version(&config)?;
  309. info!("Savanna version {v}");
  310. let v = Severus::version(&config)?;
  311. info!("Severus version {v}");
  312. Ok(())
  313. }
  314. #[test]
  315. fn run_multi_deepvariant() -> anyhow::Result<()> {
  316. init();
  317. let mut collections = Collections::new(
  318. CollectionsConfig::default()
  319. )?;
  320. collections.run_deepvariant()
  321. }
  322. #[test]
  323. fn run_clairs() -> anyhow::Result<()> {
  324. init();
  325. let collections = Collections::new(
  326. CollectionsConfig::default()
  327. )?;
  328. collections.run_clairs()
  329. }
  330. #[test]
  331. fn run_longphase() -> anyhow::Result<()> {
  332. init();
  333. let id = "CUNY";
  334. let diag_bam = format!("/data/longreads_basic_pipe/{id}/diag/{id}_diag_hs1.bam");
  335. let vcf = format!("/data/longreads_basic_pipe/{id}/diag/ClairS/clair3_normal_tumoral_germline_output.vcf.gz");
  336. let mrd_bam = format!("/data/longreads_basic_pipe/{id}/mrd/{id}_mrd_hs1.bam");
  337. LongphaseHap::new(id, &diag_bam, &vcf, LongphaseConfig::default()).run()?;
  338. LongphaseHap::new(id, &mrd_bam, &vcf, LongphaseConfig::default()).run()
  339. }
  340. }