lib.rs 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. use std::sync::{Arc, Mutex};
  2. pub mod commands;
  3. pub mod config;
  4. pub mod modkit;
  5. pub mod callers;
  6. pub mod runners;
  7. pub mod collection;
  8. pub mod functions;
  9. pub mod helpers;
  10. #[macro_use]
  11. extern crate lazy_static;
  12. // Define DOCKER_ID lock for handling Docker kill when ctrlc is pressed
  13. lazy_static! {
  14. static ref DOCKER_ID: Arc<Mutex<Option<String>>> = Arc::new(Mutex::new(None));
  15. }
  16. #[cfg(test)]
  17. mod tests {
  18. use std::{fs, path::Path};
  19. use callers::nanomonsv::nanomonsv_create_pon;
  20. use commands::modkit::{bed_methyl, ModkitConfig};
  21. use log::info;
  22. use rayon::prelude::*;
  23. use self::{callers::deep_variant::DeepVariantConfig, collection::pod5::{FlowCellCase, Pod5Collection}, commands::dorado, config::Config};
  24. use super::*;
  25. use crate::{callers::{clairs::{ClairS, ClairSConfig}, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVConfig, NanomonSVSolo}}, collection::{bam::{self, BamType}, run_tasks, variants::VariantsCollection, vcf::VcfCollection, Collections, CollectionsConfig}, commands::{bcftools::{bcftools_keep_pass, BcftoolsConfig}, dorado::Dorado}};
  26. fn init() {
  27. let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
  28. .is_test(true)
  29. .try_init();
  30. }
  31. // export RUST_LOG="debug"
  32. #[test]
  33. fn it_works() {
  34. let bam_path = "/data/longreads_basic_pipe/PARACHINI/diag/PARACHINI_diag_hs1.bam";
  35. modkit::modkit(bam_path);
  36. }
  37. #[test]
  38. fn run_dorado() -> anyhow::Result<()> {
  39. let case = FlowCellCase {
  40. id: "CONSIGNY".to_string(), time_point: "mrd".to_string(), barcode: "07".to_string(), pod_dir: "/data/run_data/20240326-CL/CONSIGNY-MRD-NB07_RICCO-DIAG-NB08/20240326_1355_1E_PAU78333_bc25da25/pod5_pass/barcode07".into() };
  41. dorado::Dorado::init(case, Config::default())?.run_pipe()
  42. }
  43. #[test]
  44. fn pod5() -> anyhow::Result<()> {
  45. let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
  46. .build();
  47. let _ = Pod5Collection::new(
  48. "/data/run_data",
  49. "/data/flow_cells.tsv",
  50. "/data/longreads_basic_pipe",
  51. )?;
  52. // let runs = Runs::import_dir("/home/prom/store/banana-pool/run_data", "/data/flow_cells.tsv")?;
  53. Ok(())
  54. }
  55. #[test_log::test]
  56. fn bam() -> anyhow::Result<()> {
  57. let bam_collection = bam::load_bam_collection("/data/longreads_basic_pipe");
  58. bam_collection
  59. .bams
  60. .iter()
  61. .filter(|b| matches!(b.bam_type, BamType::Panel(_)))
  62. .for_each(|b| println!("{b:#?}"));
  63. let u = bam_collection.get("PARACHINI", "mrd");
  64. println!("{u:#?}");
  65. Ok(())
  66. }
  67. #[test_log::test]
  68. fn vcf() -> anyhow::Result<()> {
  69. let mut vcf_collection = VcfCollection::new("/data/longreads_basic_pipe");
  70. vcf_collection.sort_by_id();
  71. vcf_collection
  72. .vcfs
  73. .iter()
  74. .for_each(|v| v.println().unwrap());
  75. Ok(())
  76. }
  77. // pod5 view -I /data/run_data/20240903-CL/ARMEM-DG-N02_ASSJU-DG-N03/20240903_1428_1B_PAW47629_fc24c3cf/pod5/PAW47629_fc24c3cf_77b07847_0.pod5 | head -5000 | awk '{if(NR==1){print "target,"$0}else{print "subset_1.pod5,"$0}}' > /tmp/subset_ids.csv
  78. // pod5 subset /data/run_data/20240903-CL/ARMEM-DG-N02_ASSJU-DG-N03/20240903_1428_1B_PAW47629_fc24c3cf/pod5/PAW47629_fc24c3cf_77b07847_0.pod5 --csv /tmp/subset_ids.csv -o /data/test_suite/pod5/muxed/
  79. #[test_log::test]
  80. fn mux() -> anyhow::Result<()> {
  81. let result_dir = "/data/test_suite/results".to_string();
  82. let cases = vec![
  83. FlowCellCase { id: "test_02".to_string(), time_point: "diag".to_string(), barcode: "02".to_string(), pod_dir: "/data/test_suite/pod5/muxed".into() },
  84. FlowCellCase { id: "test_03".to_string(), time_point: "diag".to_string(), barcode: "03".to_string(), pod_dir: "/data/test_suite/pod5/muxed".into() },
  85. ];
  86. cases.iter().for_each(|c| {
  87. let dir = format!("{result_dir}/{}", c.id);
  88. if Path::new(&dir).exists() {
  89. fs::remove_dir_all(dir).unwrap();
  90. }
  91. });
  92. let config = Config { result_dir, ..Default::default() };
  93. Dorado::from_mux(cases, config)
  94. }
  95. #[test_log::test]
  96. fn deep_variant() -> anyhow::Result<()> {
  97. let config = DeepVariantConfig {
  98. result_dir: "/data/test".to_string(),
  99. ..DeepVariantConfig::default()
  100. };
  101. DeepVariant::new("test_a", "diag", "/data/test_data/subset.bam", config).run()
  102. }
  103. #[test_log::test]
  104. fn clairs() -> anyhow::Result<()> {
  105. let config = ClairSConfig {
  106. result_dir: "/data/test".to_string(),
  107. ..ClairSConfig::default()
  108. };
  109. ClairS::new("test_a", "/data/test_data/subset.bam", "/data/test_data/subset_mrd.bam", config).run()
  110. }
  111. #[test_log::test]
  112. fn nanomonsv() -> anyhow::Result<()> {
  113. let config = NanomonSVConfig {
  114. result_dir: "/data/test".to_string(),
  115. ..NanomonSVConfig::default()
  116. };
  117. NanomonSV::new("test_a", "/data/test_data/subset.bam", "/data/test_data/subset_mrd.bam", config).run()
  118. }
  119. #[test]
  120. fn nanomonsv_solo() -> anyhow::Result<()> {
  121. init();
  122. // let id = "CAZIER";
  123. // let time_point = "diag";
  124. // NanomonSVSolo::new(id, &format!("/data/longreads_basic_pipe/{id}/{time_point}/{id}_{time_point}_hs1.bam"), time_point, NanomonSVConfig::default()).run()?;
  125. let time_point = "mrd";
  126. for id in ["MERY", "CAMARA", "FRANIATTE", "FERATI", "IQBAL", "COLLE", "JOLIVET", "BAFFREAU", "MANCUS", "BELARBI", "BENGUIRAT", "HENAUX", "MEDDAH"] {
  127. NanomonSVSolo::new(id, &format!("/data/longreads_basic_pipe/{id}/{time_point}/{id}_{time_point}_hs1.bam"), time_point, NanomonSVConfig::default()).run()?;
  128. }
  129. Ok(())
  130. }
  131. // cargo test run -- --nocapture; ~/run_scripts/notify_finish.sh &
  132. #[test]
  133. fn todo_all() -> anyhow::Result<()> {
  134. init();
  135. let mut collections = Collections::new(
  136. CollectionsConfig::default()
  137. )?;
  138. info!("Runing todo with config: {:#?}", collections);
  139. collections.todo()?;
  140. println!("{:#?}", collections.tasks);
  141. println!("{}", collections.tasks.len());
  142. Ok(())
  143. }
  144. #[test]
  145. fn todo_agg() -> anyhow::Result<()> {
  146. init();
  147. let collections = Collections::new(
  148. CollectionsConfig::default()
  149. )?;
  150. info!("Runing todo with config: {:#?}", collections);
  151. let agg_tasks = collections.todo_variants_agg()?;
  152. println!("{:#?}", agg_tasks);
  153. println!("{}", agg_tasks.len());
  154. Ok(())
  155. }
  156. #[test]
  157. fn run_agg() -> anyhow::Result<()> {
  158. init();
  159. let config = CollectionsConfig {
  160. id_black_list: vec!["MANCUSO".to_string(),"HAMROUNE".to_string()],
  161. ..Default::default()
  162. };
  163. let mut collections = Collections::new(config)?;
  164. info!("Runing todo with config: {:#?}", collections);
  165. collections.tasks = collections.todo_variants_agg()?;
  166. collections.run()?;
  167. Ok(())
  168. }
  169. // export RUST_LOG="debug"
  170. #[test_log::test]
  171. fn run_t() -> anyhow::Result<()> {
  172. run_tasks(CollectionsConfig::default())
  173. }
  174. #[test_log::test]
  175. fn somatic() -> anyhow::Result<()> {
  176. let variants_collection = VariantsCollection::new("/data/longreads_basic_pipe")?;
  177. variants_collection.data.iter().for_each(|v| println!("{}\t{}", v.id, v.path.display()));
  178. Ok(())
  179. }
  180. #[test_log::test]
  181. fn bcftools_pass() {
  182. let config = BcftoolsConfig::default();
  183. let id = "RICCO";
  184. let i = format!("/data/longreads_basic_pipe/{id}/diag/nanomonsv/{id}_diag.nanomonsv.result.vcf");
  185. let o = format!("/data/longreads_basic_pipe/{id}/diag/nanomonsv/{id}_diag_nanomonsv_PASSED.vcf.gz");
  186. bcftools_keep_pass(&i, &o, config).unwrap();
  187. }
  188. #[test_log::test]
  189. fn bam_ok() -> anyhow::Result<()> {
  190. let collections = Collections::new(
  191. CollectionsConfig::default()
  192. )?;
  193. let mut res: Vec<_> = collections.bam.by_id_completed(15.0, 10.0).iter().map(|b| {
  194. (b.id.to_string(), b.time_point.to_string(), b.path.to_str().unwrap().to_string())
  195. }).collect();
  196. res.sort_by_key(|b| b.1.clone());
  197. res.sort_by_key(|b| b.0.clone());
  198. res.iter().for_each(|(id, tp, path)| println!("{id}\t{tp}\t{path}"));
  199. Ok(())
  200. }
  201. #[test_log::test]
  202. fn todo_assembler() -> anyhow::Result<()> {
  203. let collections = Collections::new(
  204. CollectionsConfig::default()
  205. )?;
  206. collections.todo_assembler()?;
  207. Ok(())
  208. }
  209. #[test]
  210. fn sv_pon() -> anyhow::Result<()> {
  211. init();
  212. nanomonsv_create_pon(NanomonSVConfig::default(), "/data/ref/hs1/nanomonsv_pon.vcf.gz")
  213. }
  214. #[test]
  215. fn todo_mod() -> anyhow::Result<()> {
  216. init();
  217. let collections = Collections::new(
  218. CollectionsConfig::default()
  219. )?;
  220. collections.todo_mod_pileup();
  221. Ok(())
  222. }
  223. #[test]
  224. fn run_mod_par() -> anyhow::Result<()> {
  225. init();
  226. let collections = Collections::new(
  227. CollectionsConfig::default()
  228. )?;
  229. let tasks = collections.todo_mod_pileup();
  230. let len = tasks.len();
  231. tasks.par_iter().enumerate().for_each(|(i, t)| {
  232. let config = ModkitConfig {threads: 2, ..Default::default() };
  233. if let collection::CollectionsTasks::ModPileup { bam, .. } = t { let _ = bed_methyl(bam.to_owned(), &config); }
  234. println!("⚡ {i}/{len}");
  235. });
  236. Ok(())
  237. }
  238. }