config.rs 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. #[derive(Debug, Clone)]
  2. pub struct Config {
  3. pub pod_dir: String,
  4. pub result_dir: String,
  5. pub align: AlignConfig,
  6. pub reference: String,
  7. pub reference_name: String,
  8. pub savana_bin: String,
  9. pub savana_threads: u8,
  10. pub tumoral_name: String,
  11. pub normal_name: String,
  12. pub haplotagged_bam_tag_name: String,
  13. pub savana_output_dir: String,
  14. pub germline_phased_vcf: String,
  15. pub savana_passed_vcf: String,
  16. pub conda_sh: String,
  17. pub savana_force: bool,
  18. pub deepvariant_output_dir: String,
  19. pub severus_bin: String,
  20. pub severus_force: bool,
  21. pub severus_threads: u8,
  22. pub vntrs_bed: String,
  23. pub severus_pon: String,
  24. pub severus_output_dir: String,
  25. pub severus_solo_output_dir: String,
  26. pub longphase_bin: String,
  27. pub longphase_threads: u8,
  28. pub longphase_modcall_vcf: String,
  29. pub modkit_bin: String,
  30. pub modkit_summary_threads: u8,
  31. pub modkit_summary_file: String,
  32. pub longphase_modcall_threads: u8,
  33. pub deepvariant_threads: u8,
  34. pub deepvariant_bin_version: String,
  35. pub deepvariant_model_type: String,
  36. pub deepvariant_force: bool,
  37. pub deepsomatic_output_dir: String,
  38. pub deepsomatic_threads: u8,
  39. pub deepsomatic_bin_version: String,
  40. pub deepsomatic_model_type: String,
  41. pub clairs_threads: u8,
  42. pub clairs_force: bool,
  43. pub clairs_platform: String,
  44. pub clairs_output_dir: String,
  45. pub mask_bed: String,
  46. pub solo_min_constit_depth: u16,
  47. pub solo_max_alt_constit: u16,
  48. pub min_shannon_entropy: f64,
  49. pub nanomonsv_bin: String,
  50. pub nanomonsv_output_dir: String,
  51. pub nanomonsv_force: bool,
  52. pub nanomonsv_threads: u8,
  53. pub nanomonsv_passed_vcf: String,
  54. pub nanomonsv_solo_output_dir: String,
  55. pub nanomonsv_solo_passed_vcf: String,
  56. }
  57. // Here comes names that can't be changed from output of tools
  58. lazy_static! {
  59. static ref DEEPVARIANT_OUTPUT_NAME: &'static str = "{id}_{time}_DeepVariant.vcf.gz";
  60. static ref CLAIRS_OUTPUT_NAME: &'static str = "output.vcf.gz";
  61. static ref CLAIRS_OUTPUT_INDELS_NAME: &'static str = "indel.vcf.gz";
  62. static ref CLAIRS_GERMLINE_NORMAL: &'static str = "clair3_normal_germline_output.vcf.gz";
  63. static ref CLAIRS_GERMLINE_TUMOR: &'static str = "clair3_tumor_germline_output.vcf.gz";
  64. }
  65. impl Default for Config {
  66. fn default() -> Self {
  67. Self {
  68. pod_dir: "/data/run_data".to_string(),
  69. align: Default::default(),
  70. // Reference genome
  71. reference: "/data/ref/hs1/chm13v2.0.fa".to_string(),
  72. reference_name: "hs1".to_string(),
  73. // File structure
  74. result_dir: "/data/longreads_basic_pipe".to_string(),
  75. tumoral_name: "diag".to_string(),
  76. normal_name: "mrd".to_string(),
  77. haplotagged_bam_tag_name: "HP".to_string(),
  78. //
  79. mask_bed: "{result_dir}/{id}/diag/mask.bed".to_string(),
  80. germline_phased_vcf: "{result_dir}/{id}/diag/{id}_variants_constit_phased.vcf.gz
  81. "
  82. .to_string(),
  83. conda_sh: "/data/miniconda3/etc/profile.d/conda.sh".to_string(),
  84. // DeepVariant
  85. deepvariant_output_dir: "{result_dir}/{id}/{time}/DeepVariant".to_string(),
  86. deepvariant_threads: 155,
  87. deepvariant_bin_version: "1.8.0".to_string(),
  88. deepvariant_model_type: "ONT_R104".to_string(),
  89. deepvariant_force: false,
  90. // DeepSomatic
  91. deepsomatic_output_dir: "{result_dir}/{id}/{time}/DeepSomatic".to_string(),
  92. deepsomatic_threads: 155,
  93. deepsomatic_bin_version: "1.8.0".to_string(),
  94. deepsomatic_model_type: "ONT".to_string(),
  95. // ClairS
  96. clairs_output_dir: "{result_dir}/{id}/diag/ClairS".to_string(),
  97. clairs_threads: 155,
  98. clairs_platform: "ont_r10_dorado_sup_5khz_ssrs".to_string(),
  99. clairs_force: false,
  100. // Savana
  101. savana_bin: "savana".to_string(),
  102. savana_threads: 150,
  103. savana_output_dir: "{result_dir}/{id}/diag/savana".to_string(),
  104. savana_passed_vcf: "{output_dir}/{id}_diag_savana_PASSED.vcf".to_string(),
  105. savana_force: false,
  106. // Severus
  107. severus_bin: "/data/tools/Severus/severus.py".to_string(),
  108. severus_threads: 32,
  109. vntrs_bed: "/data/ref/hs1/vntrs_chm13.bed".to_string(),
  110. severus_pon: "/data/ref/hs1/PoN_1000G_chm13.tsv.gz".to_string(),
  111. severus_output_dir: "{result_dir}/{id}/diag/severus".to_string(),
  112. severus_solo_output_dir: "{result_dir}/{id}/{time}/severus".to_string(),
  113. severus_force: false,
  114. // Longphase
  115. longphase_bin: "/data/tools/longphase_linux-x64".to_string(),
  116. longphase_threads: 150,
  117. longphase_modcall_threads: 8, // ! out of memory
  118. longphase_modcall_vcf:
  119. "{result_dir}/{id}/{time}/5mC_5hmC/{id}_{time}_5mC_5hmC_modcall.vcf.gz".to_string(),
  120. // modkit
  121. modkit_bin: "modkit".to_string(),
  122. modkit_summary_threads: 50,
  123. modkit_summary_file: "{result_dir}/{id}/{time}/{id}_{time}_5mC_5hmC_summary.txt"
  124. .to_string(),
  125. // Nanomonsv
  126. nanomonsv_bin: "nanomonsv".to_string(),
  127. nanomonsv_output_dir: "{result_dir}/{id}/{time}/nanomonsv".to_string(),
  128. nanomonsv_threads: 150,
  129. nanomonsv_force: false,
  130. nanomonsv_passed_vcf: "{output_dir}/{id}_diag_nanomonsv_PASSED.vcf.gz".to_string(),
  131. nanomonsv_solo_output_dir: "{result_dir}/{id}/{time}/nanomonsv-solo".to_string(),
  132. nanomonsv_solo_passed_vcf: "{output_dir}/{id}_{time}_nanomonsv-solo_PASSED.vcf.gz"
  133. .to_string(),
  134. // Pipe
  135. solo_min_constit_depth: 5,
  136. solo_max_alt_constit: 1,
  137. min_shannon_entropy: 1.0,
  138. }
  139. }
  140. }
  141. #[derive(Debug, Clone)]
  142. pub struct AlignConfig {
  143. pub dorado_bin: String,
  144. pub dorado_basecall_arg: String,
  145. pub ref_fa: String,
  146. pub ref_mmi: String,
  147. pub samtools_view_threads: u16,
  148. pub samtools_sort_threads: u16,
  149. }
  150. impl Default for AlignConfig {
  151. fn default() -> Self {
  152. Self {
  153. dorado_bin: "/data/tools/dorado-0.9.0-linux-x64/bin/dorado".to_string(),
  154. dorado_basecall_arg: "-x 'cuda:0,1,2,3' sup,5mC_5hmC".to_string(), // since v0.8.0 need
  155. // to specify cuda devices (exclude the T1000)
  156. ref_fa: "/data/ref/hs1/chm13v2.0.fa".to_string(),
  157. ref_mmi: "/data/ref/chm13v2.0.mmi".to_string(),
  158. samtools_view_threads: 20,
  159. samtools_sort_threads: 50,
  160. }
  161. }
  162. }
  163. impl Config {
  164. pub fn tumoral_dir(&self, id: &str) -> String {
  165. format!("{}/{}/{}", self.result_dir, id, self.tumoral_name)
  166. }
  167. pub fn normal_dir(&self, id: &str) -> String {
  168. format!("{}/{}/{}", self.result_dir, id, self.normal_name)
  169. }
  170. pub fn solo_dir(&self, id: &str, time: &str) -> String {
  171. format!("{}/{}/{}", self.result_dir, id, time)
  172. }
  173. pub fn solo_bam(&self, id: &str, time: &str) -> String {
  174. format!(
  175. "{}/{}_{}_{}.bam",
  176. self.solo_dir(id, time),
  177. id,
  178. time,
  179. self.reference_name,
  180. )
  181. }
  182. pub fn tumoral_bam(&self, id: &str) -> String {
  183. format!(
  184. "{}/{}_{}_{}.bam",
  185. self.tumoral_dir(id),
  186. id,
  187. self.tumoral_name,
  188. self.reference_name,
  189. )
  190. }
  191. pub fn normal_bam(&self, id: &str) -> String {
  192. format!(
  193. "{}/{}_{}_{}.bam",
  194. self.normal_dir(id),
  195. id,
  196. self.normal_name,
  197. self.reference_name,
  198. )
  199. }
  200. pub fn tumoral_haplotagged_bam(&self, id: &str) -> String {
  201. format!(
  202. "{}/{}_{}_{}_{}.bam",
  203. self.tumoral_dir(id),
  204. id,
  205. self.tumoral_name,
  206. self.reference_name,
  207. self.haplotagged_bam_tag_name
  208. )
  209. }
  210. pub fn normal_haplotagged_bam(&self, id: &str) -> String {
  211. format!(
  212. "{}/{}_{}_{}_{}.bam",
  213. self.normal_dir(id),
  214. id,
  215. self.normal_name,
  216. self.reference_name,
  217. self.haplotagged_bam_tag_name
  218. )
  219. }
  220. pub fn mask_bed(&self, id: &str) -> String {
  221. self.mask_bed
  222. .replace("{result_dir}", &self.result_dir)
  223. .replace("{id}", id)
  224. }
  225. pub fn germline_phased_vcf(&self, id: &str) -> String {
  226. self.germline_phased_vcf
  227. .replace("{result_dir}", &self.result_dir)
  228. .replace("{id}", id)
  229. }
  230. // DeepVariant
  231. pub fn deepvariant_output_dir(&self, id: &str, time: &str) -> String {
  232. self.deepvariant_output_dir
  233. .replace("{result_dir}", &self.result_dir)
  234. .replace("{id}", id)
  235. .replace("{time}", time)
  236. }
  237. pub fn deepvariant_output_vcf(&self, id: &str, time: &str) -> String {
  238. format!(
  239. "{}/{}",
  240. self.deepvariant_output_dir(id, time),
  241. *DEEPVARIANT_OUTPUT_NAME
  242. )
  243. .replace("{id}", id)
  244. .replace("{time}", time)
  245. }
  246. // DeepSomatic
  247. pub fn deepsomatic_output_dir(&self, id: &str) -> String {
  248. self.deepsomatic_output_dir
  249. .replace("{result_dir}", &self.result_dir)
  250. .replace("{id}", id)
  251. .replace("{time}", &self.tumoral_name)
  252. }
  253. // ClairS
  254. pub fn clairs_output_dir(&self, id: &str) -> String {
  255. self.clairs_output_dir
  256. .replace("{result_dir}", &self.result_dir)
  257. .replace("{id}", id)
  258. }
  259. pub fn clairs_output_vcfs(&self, id: &str) -> (String, String) {
  260. let dir = self.clairs_output_dir(id);
  261. (
  262. format!("{dir}/{}", *CLAIRS_OUTPUT_NAME),
  263. format!("{dir}/{}", *CLAIRS_OUTPUT_INDELS_NAME),
  264. )
  265. }
  266. pub fn clairs_germline_normal_vcf(&self, id: &str) -> String {
  267. let dir = self.clairs_output_dir(id);
  268. format!("{dir}/{}", *CLAIRS_GERMLINE_NORMAL)
  269. }
  270. pub fn clairs_germline_tumor_vcf(&self, id: &str) -> String {
  271. let dir = self.clairs_output_dir(id);
  272. format!("{dir}/{}", *CLAIRS_GERMLINE_TUMOR)
  273. }
  274. pub fn clairs_germline_passed_vcf(&self, id: &str) -> String {
  275. let dir = self.clairs_output_dir(id);
  276. format!("{dir}/{id}_diag_clair3-germline_PASSED.vcf.gz")
  277. }
  278. // Nanomonsv
  279. pub fn nanomonsv_output_dir(&self, id: &str, time: &str) -> String {
  280. self.nanomonsv_output_dir
  281. .replace("{result_dir}", &self.result_dir)
  282. .replace("{id}", id)
  283. .replace("{time}", time)
  284. }
  285. pub fn nanomonsv_passed_vcf(&self, id: &str) -> String {
  286. self.nanomonsv_passed_vcf
  287. .replace("{output_dir}", &self.nanomonsv_output_dir(id, "diag"))
  288. .replace("{id}", id)
  289. }
  290. // Nanomonsv solo
  291. pub fn nanomonsv_solo_output_dir(&self, id: &str, time: &str) -> String {
  292. self.nanomonsv_solo_output_dir
  293. .replace("{result_dir}", &self.result_dir)
  294. .replace("{id}", id)
  295. .replace("{time}", time)
  296. }
  297. pub fn nanomonsv_solo_passed_vcf(&self, id: &str, time: &str) -> String {
  298. self.nanomonsv_solo_passed_vcf
  299. .replace("{output_dir}", &self.nanomonsv_solo_output_dir(id, time))
  300. .replace("{id}", id)
  301. .replace("{time}", time)
  302. }
  303. // Savana
  304. pub fn savana_output_dir(&self, id: &str) -> String {
  305. self.savana_output_dir
  306. .replace("{result_dir}", &self.result_dir)
  307. .replace("{id}", id)
  308. }
  309. pub fn savana_output_vcf(&self, id: &str) -> String {
  310. let output_dir = self.savana_output_dir(id);
  311. format!("{output_dir}/{id}_diag_hs1_hp.classified.somatic.vcf")
  312. }
  313. pub fn savana_passed_vcf(&self, id: &str) -> String {
  314. self.savana_passed_vcf
  315. .replace("{output_dir}", &self.savana_output_dir(id))
  316. .replace("{id}", id)
  317. }
  318. // Severus
  319. pub fn severus_output_dir(&self, id: &str) -> String {
  320. self.severus_output_dir
  321. .replace("{result_dir}", &self.result_dir)
  322. .replace("{id}", id)
  323. }
  324. pub fn severus_output_vcf(&self, id: &str) -> String {
  325. let output_dir = self.severus_output_dir(id);
  326. format!("{output_dir}/somatic_SVs/severus_somatic.vcf")
  327. }
  328. pub fn severus_passed_vcf(&self, id: &str) -> String {
  329. format!(
  330. "{}/{}_diag_severus_PASSED.vcf.gz",
  331. &self.severus_output_dir(id),
  332. id
  333. )
  334. }
  335. // Severus solo
  336. pub fn severus_solo_output_dir(&self, id: &str, time: &str) -> String {
  337. self.severus_solo_output_dir
  338. .replace("{result_dir}", &self.result_dir)
  339. .replace("{id}", id)
  340. .replace("{time}", time)
  341. }
  342. pub fn severus_solo_output_vcf(&self, id: &str, time: &str) -> String {
  343. let output_dir = self.severus_solo_output_dir(id, time);
  344. format!("{output_dir}/all_SVs/severus_all.vcf")
  345. }
  346. pub fn severus_solo_passed_vcf(&self, id: &str, time: &str) -> String {
  347. format!(
  348. "{}/{}_{}_severus-solo_PASSED.vcf.gz",
  349. &self.severus_solo_output_dir(id, time),
  350. id,
  351. time
  352. )
  353. }
  354. pub fn constit_vcf(&self, id: &str) -> String {
  355. self.clairs_germline_passed_vcf(id)
  356. // format!("{}/{}_variants_constit.vcf.gz", self.tumoral_dir(id), id)
  357. }
  358. pub fn constit_phased_vcf(&self, id: &str) -> String {
  359. format!(
  360. "{}/{}_variants_constit_phased.vcf.gz",
  361. self.tumoral_dir(id),
  362. id
  363. )
  364. }
  365. pub fn modkit_summary_file(&self, id: &str, time: &str) -> String {
  366. self.modkit_summary_file
  367. .replace("{result_dir}", &self.result_dir)
  368. .replace("{id}", id)
  369. .replace("{time}", time)
  370. }
  371. pub fn longphase_modcall_vcf(&self, id: &str, time: &str) -> String {
  372. self.longphase_modcall_vcf
  373. .replace("{result_dir}", &self.result_dir)
  374. .replace("{id}", id)
  375. .replace("{time}", time)
  376. }
  377. }