| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433 |
- #[derive(Debug, Clone)]
- pub struct Config {
- pub pod_dir: String,
- pub result_dir: String,
- pub align: AlignConfig,
- pub reference: String,
- pub reference_name: String,
- pub savana_bin: String,
- pub savana_threads: u8,
- pub tumoral_name: String,
- pub normal_name: String,
- pub haplotagged_bam_tag_name: String,
- pub savana_output_dir: String,
- pub germline_phased_vcf: String,
- pub savana_passed_vcf: String,
- pub conda_sh: String,
- pub savana_force: bool,
- pub deepvariant_output_dir: String,
- pub severus_bin: String,
- pub severus_force: bool,
- pub severus_threads: u8,
- pub vntrs_bed: String,
- pub severus_pon: String,
- pub severus_output_dir: String,
- pub severus_solo_output_dir: String,
- pub longphase_bin: String,
- pub longphase_threads: u8,
- pub longphase_modcall_vcf: String,
- pub modkit_bin: String,
- pub modkit_summary_threads: u8,
- pub modkit_summary_file: String,
- pub longphase_modcall_threads: u8,
- pub deepvariant_threads: u8,
- pub deepvariant_bin_version: String,
- pub deepvariant_model_type: String,
- pub deepvariant_force: bool,
- pub deepsomatic_output_dir: String,
- pub deepsomatic_threads: u8,
- pub deepsomatic_bin_version: String,
- pub deepsomatic_model_type: String,
- pub clairs_threads: u8,
- pub clairs_force: bool,
- pub clairs_platform: String,
- pub clairs_output_dir: String,
- pub mask_bed: String,
- pub solo_min_constit_depth: u16,
- pub solo_max_alt_constit: u16,
- pub min_shannon_entropy: f64,
- pub nanomonsv_bin: String,
- pub nanomonsv_output_dir: String,
- pub nanomonsv_force: bool,
- pub nanomonsv_threads: u8,
- pub nanomonsv_passed_vcf: String,
- pub nanomonsv_solo_output_dir: String,
- pub nanomonsv_solo_passed_vcf: String,
- }
- // Here comes names that can't be changed from output of tools
- lazy_static! {
- static ref DEEPVARIANT_OUTPUT_NAME: &'static str = "{id}_{time}_DeepVariant.vcf.gz";
- static ref CLAIRS_OUTPUT_NAME: &'static str = "output.vcf.gz";
- static ref CLAIRS_OUTPUT_INDELS_NAME: &'static str = "indel.vcf.gz";
- static ref CLAIRS_GERMLINE_NORMAL: &'static str = "clair3_normal_germline_output.vcf.gz";
- static ref CLAIRS_GERMLINE_TUMOR: &'static str = "clair3_tumor_germline_output.vcf.gz";
- }
- impl Default for Config {
- fn default() -> Self {
- Self {
- pod_dir: "/data/run_data".to_string(),
- align: Default::default(),
- // Reference genome
- reference: "/data/ref/hs1/chm13v2.0.fa".to_string(),
- reference_name: "hs1".to_string(),
- // File structure
- result_dir: "/data/longreads_basic_pipe".to_string(),
- tumoral_name: "diag".to_string(),
- normal_name: "mrd".to_string(),
- haplotagged_bam_tag_name: "HP".to_string(),
- //
- mask_bed: "{result_dir}/{id}/diag/mask.bed".to_string(),
- germline_phased_vcf: "{result_dir}/{id}/diag/{id}_variants_constit_phased.vcf.gz
- "
- .to_string(),
- conda_sh: "/data/miniconda3/etc/profile.d/conda.sh".to_string(),
- // DeepVariant
- deepvariant_output_dir: "{result_dir}/{id}/{time}/DeepVariant".to_string(),
- deepvariant_threads: 155,
- deepvariant_bin_version: "1.8.0".to_string(),
- deepvariant_model_type: "ONT_R104".to_string(),
- deepvariant_force: false,
- // DeepSomatic
- deepsomatic_output_dir: "{result_dir}/{id}/{time}/DeepSomatic".to_string(),
- deepsomatic_threads: 155,
- deepsomatic_bin_version: "1.8.0".to_string(),
- deepsomatic_model_type: "ONT".to_string(),
- // ClairS
- clairs_output_dir: "{result_dir}/{id}/diag/ClairS".to_string(),
- clairs_threads: 155,
- clairs_platform: "ont_r10_dorado_sup_5khz_ssrs".to_string(),
- clairs_force: false,
- // Savana
- savana_bin: "savana".to_string(),
- savana_threads: 150,
- savana_output_dir: "{result_dir}/{id}/diag/savana".to_string(),
- savana_passed_vcf: "{output_dir}/{id}_diag_savana_PASSED.vcf".to_string(),
- savana_force: false,
- // Severus
- severus_bin: "/data/tools/Severus/severus.py".to_string(),
- severus_threads: 32,
- vntrs_bed: "/data/ref/hs1/vntrs_chm13.bed".to_string(),
- severus_pon: "/data/ref/hs1/PoN_1000G_chm13.tsv.gz".to_string(),
- severus_output_dir: "{result_dir}/{id}/diag/severus".to_string(),
- severus_solo_output_dir: "{result_dir}/{id}/{time}/severus".to_string(),
- severus_force: false,
- // Longphase
- longphase_bin: "/data/tools/longphase_linux-x64".to_string(),
- longphase_threads: 150,
- longphase_modcall_threads: 8, // ! out of memory
- longphase_modcall_vcf:
- "{result_dir}/{id}/{time}/5mC_5hmC/{id}_{time}_5mC_5hmC_modcall.vcf.gz".to_string(),
- // modkit
- modkit_bin: "modkit".to_string(),
- modkit_summary_threads: 50,
- modkit_summary_file: "{result_dir}/{id}/{time}/{id}_{time}_5mC_5hmC_summary.txt"
- .to_string(),
- // Nanomonsv
- nanomonsv_bin: "nanomonsv".to_string(),
- nanomonsv_output_dir: "{result_dir}/{id}/{time}/nanomonsv".to_string(),
- nanomonsv_threads: 150,
- nanomonsv_force: false,
- nanomonsv_passed_vcf: "{output_dir}/{id}_diag_nanomonsv_PASSED.vcf.gz".to_string(),
- nanomonsv_solo_output_dir: "{result_dir}/{id}/{time}/nanomonsv-solo".to_string(),
- nanomonsv_solo_passed_vcf: "{output_dir}/{id}_{time}_nanomonsv-solo_PASSED.vcf.gz"
- .to_string(),
- // Pipe
- solo_min_constit_depth: 5,
- solo_max_alt_constit: 1,
- min_shannon_entropy: 1.0,
- }
- }
- }
- #[derive(Debug, Clone)]
- pub struct AlignConfig {
- pub dorado_bin: String,
- pub dorado_basecall_arg: String,
- pub ref_fa: String,
- pub ref_mmi: String,
- pub samtools_view_threads: u16,
- pub samtools_sort_threads: u16,
- }
- impl Default for AlignConfig {
- fn default() -> Self {
- Self {
- dorado_bin: "/data/tools/dorado-0.9.0-linux-x64/bin/dorado".to_string(),
- dorado_basecall_arg: "-x 'cuda:0,1,2,3' sup,5mC_5hmC".to_string(), // since v0.8.0 need
- // to specify cuda devices (exclude the T1000)
- ref_fa: "/data/ref/hs1/chm13v2.0.fa".to_string(),
- ref_mmi: "/data/ref/chm13v2.0.mmi".to_string(),
- samtools_view_threads: 20,
- samtools_sort_threads: 50,
- }
- }
- }
- impl Config {
- pub fn tumoral_dir(&self, id: &str) -> String {
- format!("{}/{}/{}", self.result_dir, id, self.tumoral_name)
- }
- pub fn normal_dir(&self, id: &str) -> String {
- format!("{}/{}/{}", self.result_dir, id, self.normal_name)
- }
- pub fn solo_dir(&self, id: &str, time: &str) -> String {
- format!("{}/{}/{}", self.result_dir, id, time)
- }
- pub fn solo_bam(&self, id: &str, time: &str) -> String {
- format!(
- "{}/{}_{}_{}.bam",
- self.solo_dir(id, time),
- id,
- time,
- self.reference_name,
- )
- }
- pub fn tumoral_bam(&self, id: &str) -> String {
- format!(
- "{}/{}_{}_{}.bam",
- self.tumoral_dir(id),
- id,
- self.tumoral_name,
- self.reference_name,
- )
- }
- pub fn normal_bam(&self, id: &str) -> String {
- format!(
- "{}/{}_{}_{}.bam",
- self.normal_dir(id),
- id,
- self.normal_name,
- self.reference_name,
- )
- }
- pub fn tumoral_haplotagged_bam(&self, id: &str) -> String {
- format!(
- "{}/{}_{}_{}_{}.bam",
- self.tumoral_dir(id),
- id,
- self.tumoral_name,
- self.reference_name,
- self.haplotagged_bam_tag_name
- )
- }
- pub fn normal_haplotagged_bam(&self, id: &str) -> String {
- format!(
- "{}/{}_{}_{}_{}.bam",
- self.normal_dir(id),
- id,
- self.normal_name,
- self.reference_name,
- self.haplotagged_bam_tag_name
- )
- }
- pub fn mask_bed(&self, id: &str) -> String {
- self.mask_bed
- .replace("{result_dir}", &self.result_dir)
- .replace("{id}", id)
- }
- pub fn germline_phased_vcf(&self, id: &str) -> String {
- self.germline_phased_vcf
- .replace("{result_dir}", &self.result_dir)
- .replace("{id}", id)
- }
- // DeepVariant
- pub fn deepvariant_output_dir(&self, id: &str, time: &str) -> String {
- self.deepvariant_output_dir
- .replace("{result_dir}", &self.result_dir)
- .replace("{id}", id)
- .replace("{time}", time)
- }
- pub fn deepvariant_output_vcf(&self, id: &str, time: &str) -> String {
- format!(
- "{}/{}",
- self.deepvariant_output_dir(id, time),
- *DEEPVARIANT_OUTPUT_NAME
- )
- .replace("{id}", id)
- .replace("{time}", time)
- }
- // DeepSomatic
- pub fn deepsomatic_output_dir(&self, id: &str) -> String {
- self.deepsomatic_output_dir
- .replace("{result_dir}", &self.result_dir)
- .replace("{id}", id)
- .replace("{time}", &self.tumoral_name)
- }
- // ClairS
- pub fn clairs_output_dir(&self, id: &str) -> String {
- self.clairs_output_dir
- .replace("{result_dir}", &self.result_dir)
- .replace("{id}", id)
- }
- pub fn clairs_output_vcfs(&self, id: &str) -> (String, String) {
- let dir = self.clairs_output_dir(id);
- (
- format!("{dir}/{}", *CLAIRS_OUTPUT_NAME),
- format!("{dir}/{}", *CLAIRS_OUTPUT_INDELS_NAME),
- )
- }
- pub fn clairs_germline_normal_vcf(&self, id: &str) -> String {
- let dir = self.clairs_output_dir(id);
- format!("{dir}/{}", *CLAIRS_GERMLINE_NORMAL)
- }
- pub fn clairs_germline_tumor_vcf(&self, id: &str) -> String {
- let dir = self.clairs_output_dir(id);
- format!("{dir}/{}", *CLAIRS_GERMLINE_TUMOR)
- }
- pub fn clairs_germline_passed_vcf(&self, id: &str) -> String {
- let dir = self.clairs_output_dir(id);
- format!("{dir}/{id}_diag_clair3-germline_PASSED.vcf.gz")
- }
- // Nanomonsv
- pub fn nanomonsv_output_dir(&self, id: &str, time: &str) -> String {
- self.nanomonsv_output_dir
- .replace("{result_dir}", &self.result_dir)
- .replace("{id}", id)
- .replace("{time}", time)
- }
- pub fn nanomonsv_passed_vcf(&self, id: &str) -> String {
- self.nanomonsv_passed_vcf
- .replace("{output_dir}", &self.nanomonsv_output_dir(id, "diag"))
- .replace("{id}", id)
- }
- // Nanomonsv solo
- pub fn nanomonsv_solo_output_dir(&self, id: &str, time: &str) -> String {
- self.nanomonsv_solo_output_dir
- .replace("{result_dir}", &self.result_dir)
- .replace("{id}", id)
- .replace("{time}", time)
- }
- pub fn nanomonsv_solo_passed_vcf(&self, id: &str, time: &str) -> String {
- self.nanomonsv_solo_passed_vcf
- .replace("{output_dir}", &self.nanomonsv_solo_output_dir(id, time))
- .replace("{id}", id)
- .replace("{time}", time)
- }
- // Savana
- pub fn savana_output_dir(&self, id: &str) -> String {
- self.savana_output_dir
- .replace("{result_dir}", &self.result_dir)
- .replace("{id}", id)
- }
- pub fn savana_output_vcf(&self, id: &str) -> String {
- let output_dir = self.savana_output_dir(id);
- format!("{output_dir}/{id}_diag_hs1_hp.classified.somatic.vcf")
- }
- pub fn savana_passed_vcf(&self, id: &str) -> String {
- self.savana_passed_vcf
- .replace("{output_dir}", &self.savana_output_dir(id))
- .replace("{id}", id)
- }
- // Severus
- pub fn severus_output_dir(&self, id: &str) -> String {
- self.severus_output_dir
- .replace("{result_dir}", &self.result_dir)
- .replace("{id}", id)
- }
- pub fn severus_output_vcf(&self, id: &str) -> String {
- let output_dir = self.severus_output_dir(id);
- format!("{output_dir}/somatic_SVs/severus_somatic.vcf")
- }
- pub fn severus_passed_vcf(&self, id: &str) -> String {
- format!(
- "{}/{}_diag_severus_PASSED.vcf.gz",
- &self.severus_output_dir(id),
- id
- )
- }
- // Severus solo
- pub fn severus_solo_output_dir(&self, id: &str, time: &str) -> String {
- self.severus_solo_output_dir
- .replace("{result_dir}", &self.result_dir)
- .replace("{id}", id)
- .replace("{time}", time)
- }
- pub fn severus_solo_output_vcf(&self, id: &str, time: &str) -> String {
- let output_dir = self.severus_solo_output_dir(id, time);
- format!("{output_dir}/all_SVs/severus_all.vcf")
- }
- pub fn severus_solo_passed_vcf(&self, id: &str, time: &str) -> String {
- format!(
- "{}/{}_{}_severus-solo_PASSED.vcf.gz",
- &self.severus_solo_output_dir(id, time),
- id,
- time
- )
- }
- pub fn constit_vcf(&self, id: &str) -> String {
- self.clairs_germline_passed_vcf(id)
- // format!("{}/{}_variants_constit.vcf.gz", self.tumoral_dir(id), id)
- }
- pub fn constit_phased_vcf(&self, id: &str) -> String {
- format!(
- "{}/{}_variants_constit_phased.vcf.gz",
- self.tumoral_dir(id),
- id
- )
- }
- pub fn modkit_summary_file(&self, id: &str, time: &str) -> String {
- self.modkit_summary_file
- .replace("{result_dir}", &self.result_dir)
- .replace("{id}", id)
- .replace("{time}", time)
- }
- pub fn longphase_modcall_vcf(&self, id: &str, time: &str) -> String {
- self.longphase_modcall_vcf
- .replace("{result_dir}", &self.result_dir)
- .replace("{id}", id)
- .replace("{time}", time)
- }
- }
|