config.rs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353
  1. #[derive(Debug, Clone)]
  2. pub struct Config {
  3. pub pod_dir: String,
  4. pub result_dir: String,
  5. pub align: AlignConfig,
  6. pub reference: String,
  7. pub reference_name: String,
  8. pub savana_bin: String,
  9. pub savana_threads: u8,
  10. pub tumoral_name: String,
  11. pub normal_name: String,
  12. pub haplotagged_bam_tag_name: String,
  13. pub savana_output_dir: String,
  14. pub germline_phased_vcf: String,
  15. pub savana_passed_vcf: String,
  16. pub conda_sh: String,
  17. pub savana_force: bool,
  18. pub deepvariant_output_dir: String,
  19. pub severus_bin: String,
  20. pub severus_force: bool,
  21. pub severus_threads: u8,
  22. pub vntrs_bed: String,
  23. pub severus_pon: String,
  24. pub severus_output_dir: String,
  25. pub severus_solo_output_dir: String,
  26. pub longphase_bin: String,
  27. pub longphase_threads: u8,
  28. pub longphase_modcall_vcf: String,
  29. pub modkit_bin: String,
  30. pub modkit_summary_threads: u8,
  31. pub modkit_summary_file: String,
  32. pub longphase_modcall_threads: u8,
  33. pub deepvariant_threads: u8,
  34. pub deepvariant_bin_version: String,
  35. pub deepvariant_model_type: String,
  36. pub deepvariant_force: bool,
  37. pub clairs_threads: u8,
  38. pub clairs_force: bool,
  39. pub clairs_platform: String,
  40. pub clairs_output_dir: String,
  41. pub mask_bed: String,
  42. }
  43. // Here comes names that can't be changed from output of tools
  44. lazy_static! {
  45. static ref DEEPVARIANT_OUTPUT_NAME: &'static str = "{id}_{time}_DeepVariant.vcf.gz";
  46. static ref CLAIRS_OUTPUT_NAME: &'static str = "output.vcf.gz";
  47. static ref CLAIRS_OUTPUT_INDELS_NAME: &'static str = "indel.vcf.gz";
  48. static ref CLAIRS_GERMLINE_NORMAL: &'static str = "clair3_normal_germline_output.vcf.gz";
  49. static ref CLAIRS_GERMLINE_TUMOR: &'static str = "clair3_tumor_germline_output.vcf.gz";
  50. }
  51. impl Default for Config {
  52. fn default() -> Self {
  53. Self {
  54. pod_dir: "/data/run_data".to_string(),
  55. align: Default::default(),
  56. // Reference genome
  57. reference: "/data/ref/hs1/chm13v2.0.fa".to_string(),
  58. reference_name: "hs1".to_string(),
  59. // File structure
  60. result_dir: "/data/longreads_basic_pipe".to_string(),
  61. tumoral_name: "diag".to_string(),
  62. normal_name: "mrd".to_string(),
  63. haplotagged_bam_tag_name: "hp".to_string(),
  64. //
  65. mask_bed: "{result_dir}/{id}/diag/mask.bed".to_string(),
  66. germline_phased_vcf:
  67. "{result_dir}/{id}/diag/ClairS/clair3_normal_tumoral_germline_output_PS.vcf"
  68. .to_string(),
  69. conda_sh: "/data/miniconda3/etc/profile.d/conda.sh".to_string(),
  70. // DeepVariant
  71. deepvariant_output_dir: "{result_dir}/{id}/{time}/DeepVariant".to_string(),
  72. deepvariant_threads: 155,
  73. deepvariant_bin_version: "1.8.0".to_string(),
  74. deepvariant_model_type: "ONT_R104".to_string(),
  75. deepvariant_force: false,
  76. // ClairS
  77. clairs_output_dir: "{result_dir}/{id}/diag/ClairS".to_string(),
  78. clairs_threads: 155,
  79. clairs_platform: "ont_r10_dorado_sup_5khz_ssrs".to_string(),
  80. clairs_force: false,
  81. // Savana
  82. savana_bin: "savana".to_string(),
  83. savana_threads: 150,
  84. savana_output_dir: "{result_dir}/{id}/diag/savana".to_string(),
  85. savana_passed_vcf: "{output_dir}/{id}_diag_savana_PASSED.vcf".to_string(),
  86. savana_force: false,
  87. // Severus
  88. severus_bin: "/data/tools/Severus/severus.py".to_string(),
  89. severus_threads: 32,
  90. vntrs_bed: "/data/ref/hs1/vntrs_chm13.bed".to_string(),
  91. severus_pon: "/data/ref/hs1/PoN_1000G_chm13.tsv.gz".to_string(),
  92. severus_output_dir: "{result_dir}/{id}/diag/severus".to_string(),
  93. severus_solo_output_dir: "{result_dir}/{id}/{time}/severus".to_string(),
  94. severus_force: false,
  95. // Longphase
  96. longphase_bin: "/data/tools/longphase_linux-x64".to_string(),
  97. longphase_threads: 150,
  98. longphase_modcall_threads: 8, // ! out of memory
  99. longphase_modcall_vcf:
  100. "{result_dir}/{id}/{time}/5mC_5hmC/{id}_{time}_5mC_5hmC_modcall.vcf.gz".to_string(),
  101. // modkit
  102. modkit_bin: "modkit".to_string(),
  103. modkit_summary_threads: 50,
  104. modkit_summary_file: "{result_dir}/{id}/{time}/{id}_{time}_5mC_5hmC_summary.txt"
  105. .to_string(),
  106. }
  107. }
  108. }
  109. #[derive(Debug, Clone)]
  110. pub struct AlignConfig {
  111. pub dorado_bin: String,
  112. pub dorado_basecall_arg: String,
  113. // pub dorado_sequencing_kit: String,
  114. pub ref_fa: String,
  115. pub ref_mmi: String,
  116. pub samtools_view_threads: u16,
  117. pub samtools_sort_threads: u16,
  118. }
  119. impl Default for AlignConfig {
  120. fn default() -> Self {
  121. Self {
  122. dorado_bin: "/data/tools/dorado-0.9.0-linux-x64/bin/dorado".to_string(),
  123. dorado_basecall_arg: "-x 'cuda:0,1,2,3' sup,5mC_5hmC".to_string(), // since v0.8.0 need
  124. // dorado_sequencing_kit: "SQK-NBD114-24".to_string(),
  125. // to specify cuda devices (exclude the T1000)
  126. ref_fa: "/data/ref/hs1/chm13v2.0.fa".to_string(),
  127. ref_mmi: "/data/ref/chm13v2.0.mmi".to_string(),
  128. samtools_view_threads: 20,
  129. samtools_sort_threads: 50,
  130. }
  131. }
  132. }
  133. impl Config {
  134. pub fn tumoral_dir(&self, id: &str) -> String {
  135. format!("{}/{}/{}", self.result_dir, id, self.tumoral_name)
  136. }
  137. pub fn normal_dir(&self, id: &str) -> String {
  138. format!("{}/{}/{}", self.result_dir, id, self.normal_name)
  139. }
  140. pub fn solo_dir(&self, id: &str, time: &str) -> String {
  141. format!("{}/{}/{}", self.result_dir, id, time)
  142. }
  143. pub fn solo_bam(&self, id: &str, time: &str) -> String {
  144. format!(
  145. "{}/{}_{}_{}.bam",
  146. self.solo_dir(id, time),
  147. id,
  148. time,
  149. self.reference_name,
  150. )
  151. }
  152. pub fn tumoral_bam(&self, id: &str) -> String {
  153. format!(
  154. "{}/{}_{}_{}.bam",
  155. self.tumoral_dir(id),
  156. id,
  157. self.tumoral_name,
  158. self.reference_name,
  159. )
  160. }
  161. pub fn normal_bam(&self, id: &str) -> String {
  162. format!(
  163. "{}/{}_{}_{}.bam",
  164. self.normal_dir(id),
  165. id,
  166. self.normal_name,
  167. self.reference_name,
  168. )
  169. }
  170. pub fn tumoral_haplotagged_bam(&self, id: &str) -> String {
  171. format!(
  172. "{}/{}_{}_{}_{}.bam",
  173. self.tumoral_dir(id),
  174. id,
  175. self.tumoral_name,
  176. self.reference_name,
  177. self.haplotagged_bam_tag_name
  178. )
  179. }
  180. pub fn normal_haplotagged_bam(&self, id: &str) -> String {
  181. format!(
  182. "{}/{}_{}_{}_{}.bam",
  183. self.normal_dir(id),
  184. id,
  185. self.normal_name,
  186. self.reference_name,
  187. self.haplotagged_bam_tag_name
  188. )
  189. }
  190. pub fn mask_bed(&self, id: &str) -> String {
  191. self.mask_bed
  192. .replace("{result_dir}", &self.result_dir)
  193. .replace("{id}", id)
  194. }
  195. pub fn germline_phased_vcf(&self, id: &str) -> String {
  196. self.germline_phased_vcf
  197. .replace("{result_dir}", &self.result_dir)
  198. .replace("{id}", id)
  199. }
  200. // DeepVariant
  201. pub fn deepvariant_output_dir(&self, id: &str, time: &str) -> String {
  202. self.deepvariant_output_dir
  203. .replace("{result_dir}", &self.result_dir)
  204. .replace("{id}", id)
  205. .replace("{time}", time)
  206. }
  207. pub fn deepvariant_output_vcf(&self, id: &str, time: &str) -> String {
  208. format!(
  209. "{}/{}",
  210. self.deepvariant_output_dir(id, time),
  211. *DEEPVARIANT_OUTPUT_NAME
  212. )
  213. .replace("{id}", id)
  214. .replace("{time}", time)
  215. }
  216. // ClairS
  217. pub fn clairs_output_dir(&self, id: &str) -> String {
  218. self.clairs_output_dir
  219. .replace("{result_dir}", &self.result_dir)
  220. .replace("{id}", id)
  221. }
  222. pub fn clairs_output_vcfs(&self, id: &str) -> (String, String) {
  223. let dir = self.clairs_output_dir(id);
  224. (format!("{dir}/{}", *CLAIRS_OUTPUT_NAME), format!("{dir}/{}", *CLAIRS_OUTPUT_INDELS_NAME))
  225. }
  226. pub fn clairs_germline_normal_vcf(&self, id: &str) -> String {
  227. let dir = self.clairs_output_dir(id);
  228. format!("{dir}/{}", *CLAIRS_GERMLINE_NORMAL)
  229. }
  230. pub fn clairs_germline_tumor_vcf(&self, id: &str) -> String {
  231. let dir = self.clairs_output_dir(id);
  232. format!("{dir}/{}", *CLAIRS_GERMLINE_TUMOR)
  233. }
  234. pub fn clairs_germline_passed_vcf(&self, id: &str) -> String {
  235. let dir = self.clairs_output_dir(id);
  236. format!("{dir}/{id}_diag_clair3-germline_PASSED.vcf.gz")
  237. }
  238. // Savana
  239. pub fn savana_output_dir(&self, id: &str) -> String {
  240. self.savana_output_dir
  241. .replace("{result_dir}", &self.result_dir)
  242. .replace("{id}", id)
  243. }
  244. pub fn savana_output_vcf(&self, id: &str) -> String {
  245. let output_dir = self.savana_output_dir(id);
  246. format!("{output_dir}/{id}_diag_hs1_hp.classified.somatic.vcf")
  247. }
  248. pub fn savana_passed_vcf(&self, id: &str) -> String {
  249. self.savana_passed_vcf
  250. .replace("{output_dir}", &self.savana_output_dir(id))
  251. .replace("{id}", id)
  252. }
  253. // Severus
  254. pub fn severus_output_dir(&self, id: &str) -> String {
  255. self.severus_output_dir
  256. .replace("{result_dir}", &self.result_dir)
  257. .replace("{id}", id)
  258. }
  259. pub fn severus_output_vcf(&self, id: &str) -> String {
  260. let output_dir = self.severus_output_dir(id);
  261. format!("{output_dir}/somatic_SVs/severus_somatic.vcf")
  262. }
  263. pub fn severus_passed_vcf(&self, id: &str) -> String {
  264. format!(
  265. "{}/{}_diag_severus_PASSED.vcf.gz",
  266. &self.severus_output_dir(id),
  267. id
  268. )
  269. }
  270. // Severus solo
  271. pub fn severus_solo_output_dir(&self, id: &str, time: &str) -> String {
  272. self.severus_solo_output_dir
  273. .replace("{result_dir}", &self.result_dir)
  274. .replace("{id}", id)
  275. .replace("{time}", time)
  276. }
  277. pub fn severus_solo_output_vcf(&self, id: &str, time: &str) -> String {
  278. let output_dir = self.severus_solo_output_dir(id, time);
  279. format!("{output_dir}/all_SVs/severus_all.vcf")
  280. }
  281. pub fn severus_solo_passed_vcf(&self, id: &str, time: &str) -> String {
  282. format!(
  283. "{}/{}_{}_severus-solo_PASSED.vcf.gz",
  284. &self.severus_solo_output_dir(id, time),
  285. id,
  286. time
  287. )
  288. }
  289. pub fn constit_vcf(&self, id: &str) -> String {
  290. format!("{}/{}_variants_constit.vcf.gz", self.tumoral_dir(id), id)
  291. }
  292. pub fn constit_phased_vcf(&self, id: &str) -> String {
  293. format!("{}/{}_variants_constit_PS.vcf.gz", self.tumoral_dir(id), id)
  294. }
  295. pub fn modkit_summary_file(&self, id: &str, time: &str) -> String {
  296. self.modkit_summary_file
  297. .replace("{result_dir}", &self.result_dir)
  298. .replace("{id}", id)
  299. .replace("{time}", time)
  300. }
  301. pub fn longphase_modcall_vcf(&self, id: &str, time: &str) -> String {
  302. self.longphase_modcall_vcf
  303. .replace("{result_dir}", &self.result_dir)
  304. .replace("{id}", id)
  305. .replace("{time}", time)
  306. }
  307. }