config.rs 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482
  1. #[derive(Debug, Clone)]
  2. pub struct Config {
  3. pub pod_dir: String,
  4. pub result_dir: String,
  5. pub align: AlignConfig,
  6. pub reference: String,
  7. pub reference_name: String,
  8. pub dict_file: String,
  9. pub docker_max_memory_go: u16,
  10. pub savana_bin: String,
  11. pub savana_threads: u8,
  12. pub tumoral_name: String,
  13. pub normal_name: String,
  14. pub haplotagged_bam_tag_name: String,
  15. pub savana_output_dir: String,
  16. pub savana_copy_number: String,
  17. pub savana_read_counts: String,
  18. pub germline_phased_vcf: String,
  19. pub savana_passed_vcf: String,
  20. pub conda_sh: String,
  21. pub savana_force: bool,
  22. pub deepvariant_output_dir: String,
  23. pub severus_bin: String,
  24. pub severus_force: bool,
  25. pub severus_threads: u8,
  26. pub vntrs_bed: String,
  27. pub severus_pon: String,
  28. pub severus_output_dir: String,
  29. pub severus_solo_output_dir: String,
  30. pub longphase_bin: String,
  31. pub longphase_threads: u8,
  32. pub longphase_modcall_vcf: String,
  33. pub modkit_bin: String,
  34. pub modkit_summary_threads: u8,
  35. pub modkit_summary_file: String,
  36. pub longphase_modcall_threads: u8,
  37. pub deepvariant_threads: u8,
  38. pub deepvariant_bin_version: String,
  39. pub deepvariant_model_type: String,
  40. pub deepvariant_force: bool,
  41. pub deepsomatic_output_dir: String,
  42. pub deepsomatic_threads: u8,
  43. pub deepsomatic_bin_version: String,
  44. pub deepsomatic_model_type: String,
  45. pub bam_min_mapq: u8,
  46. pub bam_n_threads: u8,
  47. pub db_cases_path: String,
  48. pub somatic_pipe_stats: String,
  49. pub clairs_threads: u8,
  50. pub clairs_force: bool,
  51. pub clairs_platform: String,
  52. pub clairs_output_dir: String,
  53. pub mask_bed: String,
  54. pub solo_min_constit_depth: u16,
  55. pub solo_max_alt_constit: u16,
  56. pub min_shannon_entropy: f64,
  57. pub nanomonsv_bin: String,
  58. pub nanomonsv_output_dir: String,
  59. pub nanomonsv_force: bool,
  60. pub nanomonsv_threads: u8,
  61. pub nanomonsv_passed_vcf: String,
  62. pub nanomonsv_solo_output_dir: String,
  63. pub nanomonsv_solo_passed_vcf: String,
  64. pub somatic_pipe_force: bool,
  65. }
  66. // Here comes names that can't be changed from output of tools
  67. lazy_static! {
  68. static ref DEEPVARIANT_OUTPUT_NAME: &'static str = "{id}_{time}_DeepVariant.vcf.gz";
  69. static ref CLAIRS_OUTPUT_NAME: &'static str = "output.vcf.gz";
  70. static ref CLAIRS_OUTPUT_INDELS_NAME: &'static str = "indel.vcf.gz";
  71. static ref CLAIRS_GERMLINE_NORMAL: &'static str = "clair3_normal_germline_output.vcf.gz";
  72. static ref CLAIRS_GERMLINE_TUMOR: &'static str = "clair3_tumor_germline_output.vcf.gz";
  73. }
  74. impl Default for Config {
  75. fn default() -> Self {
  76. Self {
  77. pod_dir: "/data/run_data".to_string(),
  78. align: Default::default(),
  79. // Reference genome
  80. reference: "/data/ref/hs1/chm13v2.0.fa".to_string(),
  81. reference_name: "hs1".to_string(),
  82. dict_file: "/data/ref/hs1/chm13v2.0.dict".to_string(),
  83. docker_max_memory_go: 400,
  84. // File structure
  85. result_dir: "/data/longreads_basic_pipe".to_string(),
  86. tumoral_name: "diag".to_string(),
  87. normal_name: "mrd".to_string(),
  88. haplotagged_bam_tag_name: "HP".to_string(),
  89. bam_min_mapq: 40,
  90. bam_n_threads: 150,
  91. db_cases_path: "/data/cases.sqlite".to_string(),
  92. //
  93. mask_bed: "{result_dir}/{id}/diag/mask.bed".to_string(),
  94. germline_phased_vcf: "{result_dir}/{id}/diag/{id}_variants_constit_phased.vcf.gz
  95. "
  96. .to_string(),
  97. conda_sh: "/data/miniconda3/etc/profile.d/conda.sh".to_string(),
  98. somatic_pipe_stats: "{result_dir}/{id}/diag/somatic_pipe_stats"
  99. .to_string(),
  100. // DeepVariant
  101. deepvariant_output_dir: "{result_dir}/{id}/{time}/DeepVariant".to_string(),
  102. deepvariant_threads: 150,
  103. deepvariant_bin_version: "1.8.0".to_string(),
  104. deepvariant_model_type: "ONT_R104".to_string(),
  105. deepvariant_force: false,
  106. // DeepSomatic
  107. deepsomatic_output_dir: "{result_dir}/{id}/{time}/DeepSomatic".to_string(),
  108. deepsomatic_threads: 155,
  109. deepsomatic_bin_version: "1.8.0".to_string(),
  110. deepsomatic_model_type: "ONT".to_string(),
  111. // ClairS
  112. clairs_output_dir: "{result_dir}/{id}/diag/ClairS".to_string(),
  113. clairs_threads: 155,
  114. clairs_platform: "ont_r10_dorado_sup_5khz_ssrs".to_string(),
  115. clairs_force: false,
  116. // Savana
  117. savana_bin: "savana".to_string(),
  118. savana_threads: 150,
  119. savana_output_dir: "{result_dir}/{id}/diag/savana".to_string(),
  120. savana_passed_vcf: "{output_dir}/{id}_diag_savana_PASSED.vcf.gz".to_string(),
  121. savana_copy_number: "{output_dir}/{id}_diag_{reference_name}_{haplotagged_bam_tag_name}_segmented_absolute_copy_number.tsv".to_string(),
  122. savana_read_counts: "{output_dir}/{id}_diag_{reference_name}_{haplotagged_bam_tag_name}_raw_read_counts.tsv".to_string(),
  123. savana_force: false,
  124. // Severus
  125. severus_bin: "/data/tools/Severus/severus.py".to_string(),
  126. severus_threads: 32,
  127. vntrs_bed: "/data/ref/hs1/vntrs_chm13.bed".to_string(),
  128. severus_pon: "/data/ref/hs1/PoN_1000G_chm13.tsv.gz".to_string(),
  129. severus_output_dir: "{result_dir}/{id}/diag/severus".to_string(),
  130. severus_solo_output_dir: "{result_dir}/{id}/{time}/severus".to_string(),
  131. severus_force: false,
  132. // Longphase
  133. longphase_bin: "/data/tools/longphase_linux-x64".to_string(),
  134. longphase_threads: 150,
  135. longphase_modcall_threads: 8, // ! out of memory
  136. longphase_modcall_vcf:
  137. "{result_dir}/{id}/{time}/5mC_5hmC/{id}_{time}_5mC_5hmC_modcall.vcf.gz".to_string(),
  138. // modkit
  139. modkit_bin: "modkit".to_string(),
  140. modkit_summary_threads: 50,
  141. modkit_summary_file: "{result_dir}/{id}/{time}/{id}_{time}_5mC_5hmC_summary.txt"
  142. .to_string(),
  143. // Nanomonsv
  144. nanomonsv_bin: "nanomonsv".to_string(),
  145. nanomonsv_output_dir: "{result_dir}/{id}/{time}/nanomonsv".to_string(),
  146. nanomonsv_threads: 150,
  147. nanomonsv_force: false,
  148. nanomonsv_passed_vcf: "{output_dir}/{id}_diag_nanomonsv_PASSED.vcf.gz".to_string(),
  149. nanomonsv_solo_output_dir: "{result_dir}/{id}/{time}/nanomonsv-solo".to_string(),
  150. nanomonsv_solo_passed_vcf: "{output_dir}/{id}_{time}_nanomonsv-solo_PASSED.vcf.gz"
  151. .to_string(),
  152. // Pipe
  153. somatic_pipe_force: true,
  154. solo_min_constit_depth: 5,
  155. solo_max_alt_constit: 1,
  156. min_shannon_entropy: 1.0,
  157. }
  158. }
  159. }
  160. #[derive(Debug, Clone)]
  161. pub struct AlignConfig {
  162. pub dorado_bin: String,
  163. pub dorado_basecall_arg: String,
  164. pub ref_fa: String,
  165. pub ref_mmi: String,
  166. pub samtools_view_threads: u16,
  167. pub samtools_sort_threads: u16,
  168. }
  169. impl Default for AlignConfig {
  170. fn default() -> Self {
  171. Self {
  172. dorado_bin: "/data/tools/dorado-0.9.1-linux-x64/bin/dorado".to_string(),
  173. dorado_basecall_arg: "-x 'cuda:0,1,2,3' sup,5mC_5hmC".to_string(), // since v0.8.0 need
  174. // to specify cuda devices (exclude the T1000)
  175. ref_fa: "/data/ref/hs1/chm13v2.0.fa".to_string(),
  176. ref_mmi: "/data/ref/chm13v2.0.mmi".to_string(),
  177. samtools_view_threads: 20,
  178. samtools_sort_threads: 50,
  179. }
  180. }
  181. }
  182. impl Config {
  183. pub fn tumoral_dir(&self, id: &str) -> String {
  184. format!("{}/{}/{}", self.result_dir, id, self.tumoral_name)
  185. }
  186. pub fn normal_dir(&self, id: &str) -> String {
  187. format!("{}/{}/{}", self.result_dir, id, self.normal_name)
  188. }
  189. pub fn solo_dir(&self, id: &str, time: &str) -> String {
  190. format!("{}/{}/{}", self.result_dir, id, time)
  191. }
  192. pub fn solo_bam(&self, id: &str, time: &str) -> String {
  193. format!(
  194. "{}/{}_{}_{}.bam",
  195. self.solo_dir(id, time),
  196. id,
  197. time,
  198. self.reference_name,
  199. )
  200. }
  201. pub fn tumoral_bam(&self, id: &str) -> String {
  202. format!(
  203. "{}/{}_{}_{}.bam",
  204. self.tumoral_dir(id),
  205. id,
  206. self.tumoral_name,
  207. self.reference_name,
  208. )
  209. }
  210. pub fn normal_bam(&self, id: &str) -> String {
  211. format!(
  212. "{}/{}_{}_{}.bam",
  213. self.normal_dir(id),
  214. id,
  215. self.normal_name,
  216. self.reference_name,
  217. )
  218. }
  219. pub fn tumoral_haplotagged_bam(&self, id: &str) -> String {
  220. format!(
  221. "{}/{}_{}_{}_{}.bam",
  222. self.tumoral_dir(id),
  223. id,
  224. self.tumoral_name,
  225. self.reference_name,
  226. self.haplotagged_bam_tag_name
  227. )
  228. }
  229. pub fn normal_haplotagged_bam(&self, id: &str) -> String {
  230. format!(
  231. "{}/{}_{}_{}_{}.bam",
  232. self.normal_dir(id),
  233. id,
  234. self.normal_name,
  235. self.reference_name,
  236. self.haplotagged_bam_tag_name
  237. )
  238. }
  239. pub fn mask_bed(&self, id: &str) -> String {
  240. self.mask_bed
  241. .replace("{result_dir}", &self.result_dir)
  242. .replace("{id}", id)
  243. }
  244. pub fn germline_phased_vcf(&self, id: &str) -> String {
  245. self.germline_phased_vcf
  246. .replace("{result_dir}", &self.result_dir)
  247. .replace("{id}", id)
  248. }
  249. pub fn somatic_pipe_stats(&self, id: &str) -> String {
  250. self.somatic_pipe_stats
  251. .replace("{result_dir}", &self.result_dir)
  252. .replace("{id}", id)
  253. }
  254. // DeepVariant
  255. pub fn deepvariant_output_dir(&self, id: &str, time: &str) -> String {
  256. self.deepvariant_output_dir
  257. .replace("{result_dir}", &self.result_dir)
  258. .replace("{id}", id)
  259. .replace("{time}", time)
  260. }
  261. pub fn deepvariant_output_vcf(&self, id: &str, time: &str) -> String {
  262. format!(
  263. "{}/{}",
  264. self.deepvariant_output_dir(id, time),
  265. *DEEPVARIANT_OUTPUT_NAME
  266. )
  267. .replace("{id}", id)
  268. .replace("{time}", time)
  269. }
  270. // DeepSomatic
  271. pub fn deepsomatic_output_dir(&self, id: &str) -> String {
  272. self.deepsomatic_output_dir
  273. .replace("{result_dir}", &self.result_dir)
  274. .replace("{id}", id)
  275. .replace("{time}", &self.tumoral_name)
  276. }
  277. // ClairS
  278. pub fn clairs_output_dir(&self, id: &str) -> String {
  279. self.clairs_output_dir
  280. .replace("{result_dir}", &self.result_dir)
  281. .replace("{id}", id)
  282. }
  283. pub fn clairs_output_vcfs(&self, id: &str) -> (String, String) {
  284. let dir = self.clairs_output_dir(id);
  285. (
  286. format!("{dir}/{}", *CLAIRS_OUTPUT_NAME),
  287. format!("{dir}/{}", *CLAIRS_OUTPUT_INDELS_NAME),
  288. )
  289. }
  290. pub fn clairs_germline_normal_vcf(&self, id: &str) -> String {
  291. let dir = self.clairs_output_dir(id);
  292. format!("{dir}/{}", *CLAIRS_GERMLINE_NORMAL)
  293. }
  294. pub fn clairs_germline_tumor_vcf(&self, id: &str) -> String {
  295. let dir = self.clairs_output_dir(id);
  296. format!("{dir}/{}", *CLAIRS_GERMLINE_TUMOR)
  297. }
  298. pub fn clairs_germline_passed_vcf(&self, id: &str) -> String {
  299. let dir = self.clairs_output_dir(id);
  300. format!("{dir}/{id}_diag_clair3-germline_PASSED.vcf.gz")
  301. }
  302. // Nanomonsv
  303. pub fn nanomonsv_output_dir(&self, id: &str, time: &str) -> String {
  304. self.nanomonsv_output_dir
  305. .replace("{result_dir}", &self.result_dir)
  306. .replace("{id}", id)
  307. .replace("{time}", time)
  308. }
  309. pub fn nanomonsv_passed_vcf(&self, id: &str) -> String {
  310. self.nanomonsv_passed_vcf
  311. .replace("{output_dir}", &self.nanomonsv_output_dir(id, "diag"))
  312. .replace("{id}", id)
  313. }
  314. // Nanomonsv solo
  315. pub fn nanomonsv_solo_output_dir(&self, id: &str, time: &str) -> String {
  316. self.nanomonsv_solo_output_dir
  317. .replace("{result_dir}", &self.result_dir)
  318. .replace("{id}", id)
  319. .replace("{time}", time)
  320. }
  321. pub fn nanomonsv_solo_passed_vcf(&self, id: &str, time: &str) -> String {
  322. self.nanomonsv_solo_passed_vcf
  323. .replace("{output_dir}", &self.nanomonsv_solo_output_dir(id, time))
  324. .replace("{id}", id)
  325. .replace("{time}", time)
  326. }
  327. // Savana
  328. pub fn savana_output_dir(&self, id: &str) -> String {
  329. self.savana_output_dir
  330. .replace("{result_dir}", &self.result_dir)
  331. .replace("{id}", id)
  332. }
  333. pub fn savana_output_vcf(&self, id: &str) -> String {
  334. let output_dir = self.savana_output_dir(id);
  335. format!(
  336. "{output_dir}/{id}_{}_{}_{}.classified.somatic.vcf",
  337. self.tumoral_name, self.reference_name, self.haplotagged_bam_tag_name
  338. )
  339. }
  340. pub fn savana_passed_vcf(&self, id: &str) -> String {
  341. self.savana_passed_vcf
  342. .replace("{output_dir}", &self.savana_output_dir(id))
  343. .replace("{id}", id)
  344. }
  345. // {output_dir}/{id}_diag_{reference_name}_{haplotagged_bam_tag_name}
  346. pub fn savana_read_counts(&self, id: &str) -> String {
  347. self.savana_read_counts
  348. .replace("{output_dir}", &self.savana_output_dir(id))
  349. .replace("{id}", id)
  350. .replace("{reference_name}", &self.reference_name)
  351. .replace("{haplotagged_bam_tag_name}", &self.haplotagged_bam_tag_name)
  352. }
  353. pub fn savana_copy_number(&self, id: &str) -> String {
  354. self.savana_copy_number
  355. .replace("{output_dir}", &self.savana_output_dir(id))
  356. .replace("{id}", id)
  357. .replace("{reference_name}", &self.reference_name)
  358. .replace("{haplotagged_bam_tag_name}", &self.haplotagged_bam_tag_name)
  359. }
  360. // Severus
  361. pub fn severus_output_dir(&self, id: &str) -> String {
  362. self.severus_output_dir
  363. .replace("{result_dir}", &self.result_dir)
  364. .replace("{id}", id)
  365. }
  366. pub fn severus_output_vcf(&self, id: &str) -> String {
  367. let output_dir = self.severus_output_dir(id);
  368. format!("{output_dir}/somatic_SVs/severus_somatic.vcf")
  369. }
  370. pub fn severus_passed_vcf(&self, id: &str) -> String {
  371. format!(
  372. "{}/{}_diag_severus_PASSED.vcf.gz",
  373. &self.severus_output_dir(id),
  374. id
  375. )
  376. }
  377. // Severus solo
  378. pub fn severus_solo_output_dir(&self, id: &str, time: &str) -> String {
  379. self.severus_solo_output_dir
  380. .replace("{result_dir}", &self.result_dir)
  381. .replace("{id}", id)
  382. .replace("{time}", time)
  383. }
  384. pub fn severus_solo_output_vcf(&self, id: &str, time: &str) -> String {
  385. let output_dir = self.severus_solo_output_dir(id, time);
  386. format!("{output_dir}/all_SVs/severus_all.vcf")
  387. }
  388. pub fn severus_solo_passed_vcf(&self, id: &str, time: &str) -> String {
  389. format!(
  390. "{}/{}_{}_severus-solo_PASSED.vcf.gz",
  391. &self.severus_solo_output_dir(id, time),
  392. id,
  393. time
  394. )
  395. }
  396. pub fn constit_vcf(&self, id: &str) -> String {
  397. self.clairs_germline_passed_vcf(id)
  398. // format!("{}/{}_variants_constit.vcf.gz", self.tumoral_dir(id), id)
  399. }
  400. pub fn constit_phased_vcf(&self, id: &str) -> String {
  401. format!(
  402. "{}/{}_variants_constit_phased.vcf.gz",
  403. self.tumoral_dir(id),
  404. id
  405. )
  406. }
  407. pub fn modkit_summary_file(&self, id: &str, time: &str) -> String {
  408. self.modkit_summary_file
  409. .replace("{result_dir}", &self.result_dir)
  410. .replace("{id}", id)
  411. .replace("{time}", time)
  412. }
  413. pub fn longphase_modcall_vcf(&self, id: &str, time: &str) -> String {
  414. self.longphase_modcall_vcf
  415. .replace("{result_dir}", &self.result_dir)
  416. .replace("{id}", id)
  417. .replace("{time}", time)
  418. }
  419. }