config.rs 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
  1. #[derive(Debug, Clone)]
  2. pub struct Config {
  3. pub pod_dir: String,
  4. pub result_dir: String,
  5. pub align: AlignConfig,
  6. pub reference: String,
  7. pub reference_name: String,
  8. pub dict_file: String,
  9. pub refseq_gff: String,
  10. pub docker_max_memory_go: u16,
  11. pub savana_bin: String,
  12. pub savana_threads: u8,
  13. pub tumoral_name: String,
  14. pub normal_name: String,
  15. pub haplotagged_bam_tag_name: String,
  16. pub count_dir_name: String,
  17. pub count_bin_size: u32,
  18. pub count_n_chunks: u32,
  19. pub savana_output_dir: String,
  20. pub savana_copy_number: String,
  21. pub savana_read_counts: String,
  22. pub germline_phased_vcf: String,
  23. pub savana_passed_vcf: String,
  24. pub conda_sh: String,
  25. pub savana_force: bool,
  26. pub deepvariant_output_dir: String,
  27. pub severus_bin: String,
  28. pub severus_force: bool,
  29. pub severus_threads: u8,
  30. pub vntrs_bed: String,
  31. pub severus_pon: String,
  32. pub severus_output_dir: String,
  33. pub severus_solo_output_dir: String,
  34. pub longphase_bin: String,
  35. pub longphase_threads: u8,
  36. pub longphase_modcall_vcf: String,
  37. pub modkit_bin: String,
  38. pub modkit_summary_threads: u8,
  39. pub modkit_summary_file: String,
  40. pub longphase_modcall_threads: u8,
  41. pub deepvariant_threads: u8,
  42. pub deepvariant_bin_version: String,
  43. pub deepvariant_model_type: String,
  44. pub deepvariant_force: bool,
  45. pub deepsomatic_output_dir: String,
  46. pub deepsomatic_threads: u8,
  47. pub deepsomatic_bin_version: String,
  48. pub deepsomatic_model_type: String,
  49. pub bam_min_mapq: u8,
  50. pub bam_n_threads: u8,
  51. pub db_cases_path: String,
  52. pub somatic_pipe_stats: String,
  53. pub clairs_threads: u8,
  54. pub clairs_force: bool,
  55. pub clairs_platform: String,
  56. pub clairs_output_dir: String,
  57. pub mask_bed: String,
  58. pub somatic_min_constit_depth: u16,
  59. pub somatic_max_alt_constit: u16,
  60. pub min_shannon_entropy: f64,
  61. pub nanomonsv_bin: String,
  62. pub nanomonsv_output_dir: String,
  63. pub nanomonsv_force: bool,
  64. pub nanomonsv_threads: u8,
  65. pub nanomonsv_passed_vcf: String,
  66. pub nanomonsv_solo_output_dir: String,
  67. pub nanomonsv_solo_passed_vcf: String,
  68. pub somatic_pipe_force: bool,
  69. pub min_high_quality_depth: u32,
  70. }
  71. // Here comes names that can't be changed from output of tools
  72. lazy_static! {
  73. static ref DEEPVARIANT_OUTPUT_NAME: &'static str = "{id}_{time}_DeepVariant.vcf.gz";
  74. static ref CLAIRS_OUTPUT_NAME: &'static str = "output.vcf.gz";
  75. static ref CLAIRS_OUTPUT_INDELS_NAME: &'static str = "indel.vcf.gz";
  76. static ref CLAIRS_GERMLINE_NORMAL: &'static str = "clair3_normal_germline_output.vcf.gz";
  77. static ref CLAIRS_GERMLINE_TUMOR: &'static str = "clair3_tumor_germline_output.vcf.gz";
  78. }
  79. impl Default for Config {
  80. fn default() -> Self {
  81. Self {
  82. pod_dir: "/data/run_data".to_string(),
  83. align: Default::default(),
  84. // Reference genome
  85. reference: "/data/ref/hs1/chm13v2.0.fa".to_string(),
  86. reference_name: "hs1".to_string(),
  87. dict_file: "/data/ref/hs1/chm13v2.0.dict".to_string(),
  88. refseq_gff: "/data/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_sorted.gff3.gz".to_string(),
  89. docker_max_memory_go: 400,
  90. // File structure
  91. result_dir: "/data/longreads_basic_pipe".to_string(),
  92. tumoral_name: "diag".to_string(),
  93. normal_name: "mrd".to_string(),
  94. haplotagged_bam_tag_name: "HP".to_string(),
  95. count_dir_name: "counts".to_string(),
  96. count_bin_size: 1_000,
  97. count_n_chunks: 1_000,
  98. bam_min_mapq: 40,
  99. bam_n_threads: 150,
  100. db_cases_path: "/data/cases.sqlite".to_string(),
  101. //
  102. mask_bed: "{result_dir}/{id}/diag/mask.bed".to_string(),
  103. germline_phased_vcf: "{result_dir}/{id}/diag/{id}_variants_constit_phased.vcf.gz
  104. "
  105. .to_string(),
  106. conda_sh: "/data/miniconda3/etc/profile.d/conda.sh".to_string(),
  107. somatic_pipe_stats: "{result_dir}/{id}/diag/somatic_pipe_stats"
  108. .to_string(),
  109. // DeepVariant
  110. deepvariant_output_dir: "{result_dir}/{id}/{time}/DeepVariant".to_string(),
  111. deepvariant_threads: 150,
  112. deepvariant_bin_version: "1.8.0".to_string(),
  113. deepvariant_model_type: "ONT_R104".to_string(),
  114. deepvariant_force: false,
  115. // DeepSomatic
  116. deepsomatic_output_dir: "{result_dir}/{id}/{time}/DeepSomatic".to_string(),
  117. deepsomatic_threads: 155,
  118. deepsomatic_bin_version: "1.8.0".to_string(),
  119. deepsomatic_model_type: "ONT".to_string(),
  120. // ClairS
  121. clairs_output_dir: "{result_dir}/{id}/diag/ClairS".to_string(),
  122. clairs_threads: 155,
  123. clairs_platform: "ont_r10_dorado_sup_5khz_ssrs".to_string(),
  124. clairs_force: false,
  125. // Savana
  126. savana_bin: "/home/prom/.local/bin/savana".to_string(),
  127. savana_threads: 150,
  128. savana_output_dir: "{result_dir}/{id}/diag/savana".to_string(),
  129. savana_passed_vcf: "{output_dir}/{id}_diag_savana_PASSED.vcf.gz".to_string(),
  130. savana_copy_number: "{output_dir}/{id}_diag_{reference_name}_{haplotagged_bam_tag_name}_segmented_absolute_copy_number.tsv".to_string(),
  131. savana_read_counts: "{output_dir}/{id}_diag_{reference_name}_{haplotagged_bam_tag_name}_raw_read_counts.tsv".to_string(),
  132. savana_force: false,
  133. // Severus
  134. severus_bin: "/data/tools/Severus/severus.py".to_string(),
  135. severus_threads: 32,
  136. vntrs_bed: "/data/ref/hs1/vntrs_chm13.bed".to_string(),
  137. severus_pon: "/data/ref/hs1/PoN_1000G_chm13.tsv.gz".to_string(),
  138. severus_output_dir: "{result_dir}/{id}/diag/severus".to_string(),
  139. severus_solo_output_dir: "{result_dir}/{id}/{time}/severus".to_string(),
  140. severus_force: false,
  141. // Longphase
  142. longphase_bin: "/data/tools/longphase_linux-x64".to_string(),
  143. longphase_threads: 150,
  144. longphase_modcall_threads: 8, // ! out of memory
  145. longphase_modcall_vcf:
  146. "{result_dir}/{id}/{time}/5mC_5hmC/{id}_{time}_5mC_5hmC_modcall.vcf.gz".to_string(),
  147. // modkit
  148. modkit_bin: "modkit".to_string(),
  149. modkit_summary_threads: 50,
  150. modkit_summary_file: "{result_dir}/{id}/{time}/{id}_{time}_5mC_5hmC_summary.txt"
  151. .to_string(),
  152. // Nanomonsv
  153. // tabix, bgzip, mafft in PATH
  154. // pip install pysam, parasail; pip install nanomonsv
  155. nanomonsv_bin: "/home/prom/.local/bin/nanomonsv".to_string(),
  156. nanomonsv_output_dir: "{result_dir}/{id}/{time}/nanomonsv".to_string(),
  157. nanomonsv_threads: 150,
  158. nanomonsv_force: false,
  159. nanomonsv_passed_vcf: "{output_dir}/{id}_diag_nanomonsv_PASSED.vcf.gz".to_string(),
  160. nanomonsv_solo_output_dir: "{result_dir}/{id}/{time}/nanomonsv-solo".to_string(),
  161. nanomonsv_solo_passed_vcf: "{output_dir}/{id}_{time}_nanomonsv-solo_PASSED.vcf.gz"
  162. .to_string(),
  163. // Pipe
  164. somatic_pipe_force: true,
  165. somatic_min_constit_depth: 5,
  166. somatic_max_alt_constit: 1,
  167. min_shannon_entropy: 1.0,
  168. min_high_quality_depth: 14,
  169. }
  170. }
  171. }
  172. #[derive(Debug, Clone)]
  173. pub struct AlignConfig {
  174. pub dorado_bin: String,
  175. pub dorado_basecall_arg: String,
  176. pub ref_fa: String,
  177. pub ref_mmi: String,
  178. pub samtools_view_threads: u16,
  179. pub samtools_sort_threads: u16,
  180. }
  181. impl Default for AlignConfig {
  182. fn default() -> Self {
  183. Self {
  184. dorado_bin: "/data/tools/dorado-0.9.1-linux-x64/bin/dorado".to_string(),
  185. dorado_basecall_arg: "-x 'cuda:0,1,2,3' sup,5mC_5hmC".to_string(), // since v0.8.0 need
  186. // to specify cuda devices (exclude the T1000)
  187. ref_fa: "/data/ref/hs1/chm13v2.0.fa".to_string(),
  188. ref_mmi: "/data/ref/chm13v2.0.mmi".to_string(),
  189. samtools_view_threads: 20,
  190. samtools_sort_threads: 50,
  191. }
  192. }
  193. }
  194. impl Config {
  195. pub fn tumoral_dir(&self, id: &str) -> String {
  196. format!("{}/{}/{}", self.result_dir, id, self.tumoral_name)
  197. }
  198. pub fn normal_dir(&self, id: &str) -> String {
  199. format!("{}/{}/{}", self.result_dir, id, self.normal_name)
  200. }
  201. pub fn solo_dir(&self, id: &str, time: &str) -> String {
  202. format!("{}/{}/{}", self.result_dir, id, time)
  203. }
  204. pub fn solo_bam(&self, id: &str, time: &str) -> String {
  205. format!(
  206. "{}/{}_{}_{}.bam",
  207. self.solo_dir(id, time),
  208. id,
  209. time,
  210. self.reference_name,
  211. )
  212. }
  213. pub fn tumoral_bam(&self, id: &str) -> String {
  214. format!(
  215. "{}/{}_{}_{}.bam",
  216. self.tumoral_dir(id),
  217. id,
  218. self.tumoral_name,
  219. self.reference_name,
  220. )
  221. }
  222. pub fn normal_bam(&self, id: &str) -> String {
  223. format!(
  224. "{}/{}_{}_{}.bam",
  225. self.normal_dir(id),
  226. id,
  227. self.normal_name,
  228. self.reference_name,
  229. )
  230. }
  231. pub fn tumoral_haplotagged_bam(&self, id: &str) -> String {
  232. format!(
  233. "{}/{}_{}_{}_{}.bam",
  234. self.tumoral_dir(id),
  235. id,
  236. self.tumoral_name,
  237. self.reference_name,
  238. self.haplotagged_bam_tag_name
  239. )
  240. }
  241. pub fn normal_haplotagged_bam(&self, id: &str) -> String {
  242. format!(
  243. "{}/{}_{}_{}_{}.bam",
  244. self.normal_dir(id),
  245. id,
  246. self.normal_name,
  247. self.reference_name,
  248. self.haplotagged_bam_tag_name
  249. )
  250. }
  251. pub fn normal_dir_count(&self, id: &str) -> String {
  252. format!("{}/{}", self.normal_dir(id), self.count_dir_name)
  253. }
  254. pub fn tumoral_dir_count(&self, id: &str) -> String {
  255. format!("{}/{}", self.tumoral_dir(id), self.count_dir_name)
  256. }
  257. pub fn mask_bed(&self, id: &str) -> String {
  258. self.mask_bed
  259. .replace("{result_dir}", &self.result_dir)
  260. .replace("{id}", id)
  261. }
  262. pub fn germline_phased_vcf(&self, id: &str) -> String {
  263. self.germline_phased_vcf
  264. .replace("{result_dir}", &self.result_dir)
  265. .replace("{id}", id)
  266. }
  267. pub fn somatic_pipe_stats(&self, id: &str) -> String {
  268. self.somatic_pipe_stats
  269. .replace("{result_dir}", &self.result_dir)
  270. .replace("{id}", id)
  271. }
  272. // DeepVariant
  273. pub fn deepvariant_output_dir(&self, id: &str, time: &str) -> String {
  274. self.deepvariant_output_dir
  275. .replace("{result_dir}", &self.result_dir)
  276. .replace("{id}", id)
  277. .replace("{time}", time)
  278. }
  279. pub fn deepvariant_output_vcf(&self, id: &str, time: &str) -> String {
  280. format!(
  281. "{}/{}",
  282. self.deepvariant_output_dir(id, time),
  283. *DEEPVARIANT_OUTPUT_NAME
  284. )
  285. .replace("{id}", id)
  286. .replace("{time}", time)
  287. }
  288. // DeepSomatic
  289. pub fn deepsomatic_output_dir(&self, id: &str) -> String {
  290. self.deepsomatic_output_dir
  291. .replace("{result_dir}", &self.result_dir)
  292. .replace("{id}", id)
  293. .replace("{time}", &self.tumoral_name)
  294. }
  295. // ClairS
  296. pub fn clairs_output_dir(&self, id: &str) -> String {
  297. self.clairs_output_dir
  298. .replace("{result_dir}", &self.result_dir)
  299. .replace("{id}", id)
  300. }
  301. pub fn clairs_output_vcfs(&self, id: &str) -> (String, String) {
  302. let dir = self.clairs_output_dir(id);
  303. (
  304. format!("{dir}/{}", *CLAIRS_OUTPUT_NAME),
  305. format!("{dir}/{}", *CLAIRS_OUTPUT_INDELS_NAME),
  306. )
  307. }
  308. pub fn clairs_germline_normal_vcf(&self, id: &str) -> String {
  309. let dir = self.clairs_output_dir(id);
  310. format!("{dir}/{}", *CLAIRS_GERMLINE_NORMAL)
  311. }
  312. pub fn clairs_germline_tumor_vcf(&self, id: &str) -> String {
  313. let dir = self.clairs_output_dir(id);
  314. format!("{dir}/{}", *CLAIRS_GERMLINE_TUMOR)
  315. }
  316. pub fn clairs_germline_passed_vcf(&self, id: &str) -> String {
  317. let dir = self.clairs_output_dir(id);
  318. format!("{dir}/{id}_diag_clair3-germline_PASSED.vcf.gz")
  319. }
  320. // Nanomonsv
  321. pub fn nanomonsv_output_dir(&self, id: &str, time: &str) -> String {
  322. self.nanomonsv_output_dir
  323. .replace("{result_dir}", &self.result_dir)
  324. .replace("{id}", id)
  325. .replace("{time}", time)
  326. }
  327. pub fn nanomonsv_passed_vcf(&self, id: &str) -> String {
  328. self.nanomonsv_passed_vcf
  329. .replace("{output_dir}", &self.nanomonsv_output_dir(id, "diag"))
  330. .replace("{id}", id)
  331. }
  332. // Nanomonsv solo
  333. pub fn nanomonsv_solo_output_dir(&self, id: &str, time: &str) -> String {
  334. self.nanomonsv_solo_output_dir
  335. .replace("{result_dir}", &self.result_dir)
  336. .replace("{id}", id)
  337. .replace("{time}", time)
  338. }
  339. pub fn nanomonsv_solo_passed_vcf(&self, id: &str, time: &str) -> String {
  340. self.nanomonsv_solo_passed_vcf
  341. .replace("{output_dir}", &self.nanomonsv_solo_output_dir(id, time))
  342. .replace("{id}", id)
  343. .replace("{time}", time)
  344. }
  345. // Savana
  346. pub fn savana_output_dir(&self, id: &str) -> String {
  347. self.savana_output_dir
  348. .replace("{result_dir}", &self.result_dir)
  349. .replace("{id}", id)
  350. }
  351. pub fn savana_output_vcf(&self, id: &str) -> String {
  352. let output_dir = self.savana_output_dir(id);
  353. format!(
  354. "{output_dir}/{id}_{}_{}_{}.classified.somatic.vcf",
  355. self.tumoral_name, self.reference_name, self.haplotagged_bam_tag_name
  356. )
  357. }
  358. pub fn savana_passed_vcf(&self, id: &str) -> String {
  359. self.savana_passed_vcf
  360. .replace("{output_dir}", &self.savana_output_dir(id))
  361. .replace("{id}", id)
  362. }
  363. // {output_dir}/{id}_diag_{reference_name}_{haplotagged_bam_tag_name}
  364. pub fn savana_read_counts(&self, id: &str) -> String {
  365. self.savana_read_counts
  366. .replace("{output_dir}", &self.savana_output_dir(id))
  367. .replace("{id}", id)
  368. .replace("{reference_name}", &self.reference_name)
  369. .replace("{haplotagged_bam_tag_name}", &self.haplotagged_bam_tag_name)
  370. }
  371. pub fn savana_copy_number(&self, id: &str) -> String {
  372. self.savana_copy_number
  373. .replace("{output_dir}", &self.savana_output_dir(id))
  374. .replace("{id}", id)
  375. .replace("{reference_name}", &self.reference_name)
  376. .replace("{haplotagged_bam_tag_name}", &self.haplotagged_bam_tag_name)
  377. }
  378. // Severus
  379. pub fn severus_output_dir(&self, id: &str) -> String {
  380. self.severus_output_dir
  381. .replace("{result_dir}", &self.result_dir)
  382. .replace("{id}", id)
  383. }
  384. pub fn severus_output_vcf(&self, id: &str) -> String {
  385. let output_dir = self.severus_output_dir(id);
  386. format!("{output_dir}/somatic_SVs/severus_somatic.vcf")
  387. }
  388. pub fn severus_passed_vcf(&self, id: &str) -> String {
  389. format!(
  390. "{}/{}_diag_severus_PASSED.vcf.gz",
  391. &self.severus_output_dir(id),
  392. id
  393. )
  394. }
  395. // Severus solo
  396. pub fn severus_solo_output_dir(&self, id: &str, time: &str) -> String {
  397. self.severus_solo_output_dir
  398. .replace("{result_dir}", &self.result_dir)
  399. .replace("{id}", id)
  400. .replace("{time}", time)
  401. }
  402. pub fn severus_solo_output_vcf(&self, id: &str, time: &str) -> String {
  403. let output_dir = self.severus_solo_output_dir(id, time);
  404. format!("{output_dir}/all_SVs/severus_all.vcf")
  405. }
  406. pub fn severus_solo_passed_vcf(&self, id: &str, time: &str) -> String {
  407. format!(
  408. "{}/{}_{}_severus-solo_PASSED.vcf.gz",
  409. &self.severus_solo_output_dir(id, time),
  410. id,
  411. time
  412. )
  413. }
  414. pub fn constit_vcf(&self, id: &str) -> String {
  415. self.clairs_germline_passed_vcf(id)
  416. // format!("{}/{}_variants_constit.vcf.gz", self.tumoral_dir(id), id)
  417. }
  418. pub fn constit_phased_vcf(&self, id: &str) -> String {
  419. format!(
  420. "{}/{}_variants_constit_phased.vcf.gz",
  421. self.tumoral_dir(id),
  422. id
  423. )
  424. }
  425. pub fn modkit_summary_file(&self, id: &str, time: &str) -> String {
  426. self.modkit_summary_file
  427. .replace("{result_dir}", &self.result_dir)
  428. .replace("{id}", id)
  429. .replace("{time}", time)
  430. }
  431. pub fn longphase_modcall_vcf(&self, id: &str, time: &str) -> String {
  432. self.longphase_modcall_vcf
  433. .replace("{result_dir}", &self.result_dir)
  434. .replace("{id}", id)
  435. .replace("{time}", time)
  436. }
  437. }