# Pandora configuration template ####################################### # General filesystem layout / I/O ####################################### # Directory where POD / run description files are located. pod_dir = "/data/run_data" # Root directory where all results will be written. result_dir = "/mnt/beegfs02/scratch/t_steimle/data/wgs" # Temporary directory. tmp_dir = "/mnt/beegfs02/scratch/t_steimle/tmp" # Should use Slurm as runner slurm_runner = true # Run cache directory. run_cache_dir = "/home/t_steimle/data/prom_runs" # Software threads threads = 5 # Singularity bin singularity_bin = "module load singularity-ce && singularity" # Temporary directory used when unarchiving input data. unarchive_tmp_dir = "/data/unarchived" # Maximum memory available for dockerized tools, in GiB. docker_max_memory_go = 400 # Path to the SQLite database of processed cases. db_cases_path = "/data/cases.sqlite" # Path to the conda activation script. conda_sh = "/mnt/beegfs02/software/recherche/miniconda/25.1.1/etc/profile.d/conda.sh" ####################################### # Reference genome & annotations ####################################### # Reference FASTA used throughout the pipeline. reference = "/home/t_steimle/ref/hs1/chm13v2.0.fa" # Short reference name used in filenames. reference_name = "hs1" # Pseudoautosomal regions (PARs) BED file. pseudoautosomal_regions_bed = "/home/t_steimle/ref/hs1/chm13v2.0_PAR.bed" # Sequence dictionary (.dict) for the reference. dict_file = "/data/ref/hs1/chm13v2.0.dict" # RefSeq GFF3 annotation (sorted/indexed). refseq_gff = "/data/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_sorted.gff3.gz" # Template for mask BED file (low-quality / filtered regions). # {result_dir} -> global result directory # {id} -> case identifier mask_bed = "{result_dir}/{id}/diag/mask.bed" # BED file with early-replicating regions. early_bed = "/data/ref/hs1/replication_early_25_hs1.bed" # BED file with late-replicating regions. late_bed = "/data/ref/hs1/replication_late_75_hs1.bed" # BED file with CpG coordinates. cpg_bed = "/data/ref/hs1/hs1/hs1_CpG.bed" # Panels of interest: [ [name, bed_path], ... ] panels = [ ["OncoT", "/data/ref/hs1/V1_V2_V3_V4_V5_intersect_targets_hs1_uniq.bed"], ["variable_chips","/data/ref/hs1/top_1500_sd_pos.bed"], ] ####################################### # Sample naming / BAM handling ####################################### # Tumor sample label (used in paths & filenames). tumoral_name = "diag" # Normal sample label. normal_name = "norm" # BAM tag name used for haplotagged reads. haplotagged_bam_tag_name = "HP" # Minimum MAPQ for reads kept during BAM filtering. bam_min_mapq = 40 # Threads for BAM-level operations (view/sort/index…). bam_n_threads = 150 # Number of reads sampled for BAM composition estimation. bam_composition_sample_size = 20000 ####################################### # Coverage counting / somatic-scan ####################################### # Name of directory (under each sample dir) where counts are stored. count_dir_name = "counts" # Bin size (bp) for count files. count_bin_size = 1000 # Number of chunks used to split contigs for counting. count_n_chunks = 1000 # Force recomputation of counting even if outputs exist. somatic_scan_force = false ####################################### # Somatic pipeline global settings ####################################### # Force recomputation of the entire somatic pipeline. somatic_pipe_force = true # Default thread count for heavy tools. somatic_pipe_threads = 150 # Template for somatic pipeline statistics directory. # {result_dir}, {id} somatic_pipe_stats = "{result_dir}/{id}/diag/somatic_pipe_stats" ####################################### # Filtering / QC thresholds ####################################### # Minimum depth in constitutional sample to consider site evaluable. somatic_min_constit_depth = 5 # Maximum allowed ALT count in constitutional sample for a somatic call. somatic_max_alt_constit = 1 # Window size (bp) for sequence entropy around variants. entropy_seq_len = 10 # Minimum Shannon entropy threshold. min_shannon_entropy = 1.0 # Max depth considered "low quality". max_depth_low_quality = 20 # Min depth considered "high quality". min_high_quality_depth = 14 # Minimum number of callers required to keep a variant. min_n_callers = 1 ####################################### # DeepVariant configuration ####################################### # DeepVariant output directory template. # {result_dir}, {id}, {time} deepvariant_output_dir = "{result_dir}/{id}/{time}/DeepVariant" # Threads for DeepVariant. deepvariant_threads = 20 # DeepVariant singularity image path deepvariant_image = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/deepvariant_latest.sif" # DeepVariant model type (e.g. ONT). deepvariant_model_type = "ONT_R104" # Force DeepVariant recomputation. deepvariant_force = false ####################################### # DeepSomatic configuration ####################################### # DeepSomatic output directory template. # {result_dir}, {id}, {time} deepsomatic_output_dir = "{result_dir}/{id}/{time}/DeepSomatic" # Threads for DeepSomatic. deepsomatic_threads = 20 # DeepVariant singularity image path deepsomatic_image = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/deepsomatic_latest.sif" # DeepSomatic model type. deepsomatic_model_type = "ONT" # Force DeepSomatic recomputation. deepsomatic_force = false ####################################### # ClairS configuration ####################################### # Threads for ClairS. clairs_threads = 40 # ClairS docker tag. clairs_image = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/clairs_latest.sif" # Force ClairS recomputation. clairs_force = false # Platform preset for ClairS. clairs_platform = "ont_r10_dorado_sup_5khz_ssrs" # ClairS output directory template. # {result_dir}, {id} clairs_output_dir = "{result_dir}/{id}/diag/ClairS" ####################################### # Savana configuration ####################################### # Savana binary (name or full path). savana_bin = "/home/t_steimle/.conda/envs/savana_env/bin/savana" # Threads for Savana. savana_threads = 40 # Savana output directory template. # {result_dir}, {id} savana_output_dir = "{result_dir}/{id}/diag/savana" # Savana copy-number output file. # {output_dir}, {id}, {reference_name}, {haplotagged_bam_tag_name} savana_copy_number = "{output_dir}/{id}_diag_{reference_name}_{haplotagged_bam_tag_name}_segmented_absolute_copy_number.tsv" # Savana raw read counts file. savana_read_counts = "{output_dir}/{id}_diag_{reference_name}_{haplotagged_bam_tag_name}_raw_read_counts.tsv" # Savana passed VCF. savana_passed_vcf = "{output_dir}/{id}_diag_savana_PASSED.vcf.gz" # Force Savana recomputation. savana_force = false # Constitutional phased VCF template. # {result_dir}, {id} germline_phased_vcf = "{result_dir}/{id}/diag/{id}_variants_constit_phased.vcf.gz" ####################################### # Severus configuration ####################################### # Path to Severus script. severus_bin = " /home/t_steimle/somatic_pipe_tools/Severus/severus.py" # Force Severus recomputation. severus_force = false # Threads for Severus. severus_threads = 32 # VNTRs BED for Severus. vntrs_bed = "/home/t_steimle/ref/hs1/vntrs_chm13.bed" # Path of the Severus panel of normals. severus_pon = "/home/t_steimle/ref/hs1/PoN_1000G_chm13.tsv.gz" # Paired Severus output directory. # {result_dir}, {id} severus_output_dir = "{result_dir}/{id}/diag/severus" # Solo Severus output directory. # {result_dir}, {id}, {time} severus_solo_output_dir = "{result_dir}/{id}/{time}/severus" ####################################### # Straglr configuration ####################################### # Path to Straglr executable. straglr_bin = "/home/t_steimle/.conda/envs/straglr_env/bin/straglr.py" # Path to STR loci BED file for Straglr. # # RepeatMasker Simple_repeat straglr_loci_bed = "/home/t_steimle/ref/hs1/simple_repeat_ucsc_hs1.bed" # Minimum allele size difference in bp to report as changed between normal and tumoral straglr_min_size_diff = 4 # Minimum read support required for an allele to be considered for # change between normal and tumoral straglr_min_support_diff = 2 # Minimum read support for STR genotyping. straglr_min_support = 2 # Minimum cluster size for STR detection. straglr_min_cluster_size = 2 # Whether to genotype in size mode. straglr_genotype_in_size = true # Template for paired Straglr output directory. # # Placeholders: `{result_dir}`, `{id}`. straglr_output_dir = "{result_dir}/{id}/diag/straglr" # Template for solo Straglr output directory. # # Placeholders: `{result_dir}`, `{id}`, `{time}`. straglr_solo_output_dir = "{result_dir}/{id}/{time}/straglr" # Force Straglr recomputation. straglr_force = false ####################################### # Marlin ####################################### marlin_bed = "/home/t_steimle/ref/hs1/marlin_v1.probes_t2t.bed" ####################################### # Bcftools configuration ####################################### # Path to longphase binary. bcftools_bin = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/bcftools" # Threads for longphase. bcftools_threads = 30 ####################################### # Longphase configuration ####################################### # Path to longphase binary. longphase_bin = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/longphase_linux-x64" # Threads for longphase. longphase_threads = 20 # Threads for longphase modcall step. # limit memory usage here longphase_modcall_threads = 6 # Force longphase recomputation (haplotagging/phasing). longphase_force = false # Longphase modcall VCF template. # {result_dir}, {id}, {time} longphase_modcall_vcf = "{result_dir}/{id}/{time}/5mC_5hmC/{id}_{time}_5mC_5hmC_modcall.vcf.gz" ####################################### # Modkit configuration ####################################### # Path to modkit binary. modkit_bin = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/modkit_latest/modkit" # Threads for `modkit summary`. modkit_summary_threads = 40 # Modkit summary file template. # {result_dir}, {id}, {time} modkit_summary_file = "{result_dir}/{id}/{time}/{id}_{time}_5mC_5hmC_summary.txt" ####################################### # Nanomonsv configuration ####################################### # Path to nanomonsv binary. nanomonsv_bin = "/home/t_steimle/.conda/envs/nanomonsv_env/bin/nanomonsv" # Paired nanomonsv output directory template. # {result_dir}, {id}, {time} nanomonsv_output_dir = "{result_dir}/{id}/{time}/nanomonsv" # Force nanomonsv recomputation. nanomonsv_force = false # Threads for nanomonsv. nanomonsv_threads = 40 # Paired nanomonsv PASSED VCF template. # {output_dir}, {id} nanomonsv_passed_vcf = "{output_dir}/{id}_diag_nanomonsv_PASSED.vcf.gz" # Solo nanomonsv output directory template. # {result_dir}, {id}, {time} nanomonsv_solo_output_dir = "{result_dir}/{id}/{time}/nanomonsv-solo" # Solo nanomonsv PASSED VCF template. # {output_dir}, {id}, {time} nanomonsv_solo_passed_vcf = "{output_dir}/{id}_{time}_nanomonsv-solo_PASSED.vcf.gz" # Path to simple repeat BED file for nanomonsv. # https://github.com/friend1ws/nanomonsv # Warning TBI index should exists nanomonsv_simple_repeat_bed = "/home/t_steimle/ref/hs1/human_chm13v2.0_simpleRepeat.bed.gz" ####################################### # PromethION metadata ####################################### # Directory containing PromethION run metadata. promethion_runs_metadata_dir = "/data/promethion-runs-metadata" # JSON file mapping flowcell IDs / runs for Pandora. promethion_runs_input = "/data/pandora-flowcell-id.json" ####################################### # Alignment / basecalling (Dorado) ####################################### [align] # Path to Dorado binary. dorado_bin = "/mnt/beegfs02/scratch/t_steimle/tools/dorado-latest-linux-x64/bin/dorado" # Dorado basecalling arguments (device, model, modifications…). dorado_basecall_arg = "-x 'cuda:all' sup,5mC_5hmC" # Should dorado re-align after demux ? dorado_should_realign = false # Dorado aligner threads number dorado_aligner_threads = 10 # Reference FASTA used for alignment. ref_fa = "/mnt/beegfs02/scratch/t_steimle/ref/hs1/chm13v2.0.fa" # Minimap2 index used for alignment. ref_mmi = "" # Samtools bin samtools_bin = "/mnt/beegfs02/scratch/t_steimle/tools/samtools" # Threads for `samtools view`. samtools_view_threads = 10 # Threads for `samtools sort`. samtools_sort_threads = 20 # Threads for `samtools merge`. samtools_merge_threads = 40 # Threads for `samtools split`. samtools_split_threads = 20