| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458 |
- # Pandora configuration template
- #######################################
- # General filesystem layout / I/O
- #######################################
- # Directory where POD / run description files are located.
- pod_dir = "/data/run_data"
- # Root directory where all results will be written.
- result_dir = "/mnt/beegfs02/scratch/t_steimle/data/wgs"
- # Temporary directory.
- tmp_dir = "/mnt/beegfs02/scratch/t_steimle/tmp"
- # Should use Slurm as runner
- slurm_runner = true
- # Run cache directory.
- run_cache_dir = "/home/t_steimle/data/prom_runs"
- # Software threads
- threads = 5
- # Singularity bin
- singularity_bin = "module load singularity-ce && singularity"
- # Temporary directory used when unarchiving input data.
- unarchive_tmp_dir = "/data/unarchived"
- # Maximum memory available for dockerized tools, in GiB.
- docker_max_memory_go = 400
- # Path to the SQLite database of processed cases.
- db_cases_path = "/data/cases.sqlite"
- # Path to the conda activation script.
- conda_sh = "/mnt/beegfs02/software/recherche/miniconda/25.1.1/etc/profile.d/conda.sh"
- #######################################
- # Reference genome & annotations
- #######################################
- # Reference FASTA used throughout the pipeline.
- reference = "/home/t_steimle/ref/hs1/chm13v2.0.fa"
- # Short reference name used in filenames.
- reference_name = "hs1"
- # Pseudoautosomal regions (PARs) BED file.
- pseudoautosomal_regions_bed = "/home/t_steimle/ref/hs1/chm13v2.0_PAR.bed"
- # Sequence dictionary (.dict) for the reference.
- dict_file = "/data/ref/hs1/chm13v2.0.dict"
- # RefSeq GFF3 annotation (sorted/indexed).
- refseq_gff = "/data/ref/hs1/chm13v2.0_RefSeq_Liftoff_v5.1_sorted.gff3.gz"
- # Template for mask BED file (low-quality / filtered regions).
- # {result_dir} -> global result directory
- # {id} -> case identifier
- mask_bed = "{result_dir}/{id}/diag/mask.bed"
- # BED file with early-replicating regions.
- early_bed = "/data/ref/hs1/replication_early_25_hs1.bed"
- # BED file with late-replicating regions.
- late_bed = "/data/ref/hs1/replication_late_75_hs1.bed"
- # BED file with CpG coordinates.
- cpg_bed = "/data/ref/hs1/hs1/hs1_CpG.bed"
- # Panels of interest: [ [name, bed_path], ... ]
- panels = [
- ["OncoT", "/data/ref/hs1/V1_V2_V3_V4_V5_intersect_targets_hs1_uniq.bed"],
- ["variable_chips","/data/ref/hs1/top_1500_sd_pos.bed"],
- ]
- #######################################
- # Sample naming / BAM handling
- #######################################
- # Tumor sample label (used in paths & filenames).
- tumoral_name = "diag"
- # Normal sample label.
- normal_name = "norm"
- # BAM tag name used for haplotagged reads.
- haplotagged_bam_tag_name = "HP"
- # Minimum MAPQ for reads kept during BAM filtering.
- bam_min_mapq = 40
- # Threads for BAM-level operations (view/sort/index…).
- bam_n_threads = 150
- # Number of reads sampled for BAM composition estimation.
- bam_composition_sample_size = 20000
- #######################################
- # Coverage counting / somatic-scan
- #######################################
- # Name of directory (under each sample dir) where counts are stored.
- count_dir_name = "counts"
- # Bin size (bp) for count files.
- count_bin_size = 1000
- # Number of chunks used to split contigs for counting.
- count_n_chunks = 1000
- # Force recomputation of counting even if outputs exist.
- somatic_scan_force = false
- #######################################
- # Somatic pipeline global settings
- #######################################
- # Force recomputation of the entire somatic pipeline.
- somatic_pipe_force = true
- # Default thread count for heavy tools.
- somatic_pipe_threads = 150
- # Template for somatic pipeline statistics directory.
- # {result_dir}, {id}
- somatic_pipe_stats = "{result_dir}/{id}/diag/somatic_pipe_stats"
- #######################################
- # Filtering / QC thresholds
- #######################################
- # Minimum depth in constitutional sample to consider site evaluable.
- somatic_min_constit_depth = 5
- # Maximum allowed ALT count in constitutional sample for a somatic call.
- somatic_max_alt_constit = 1
- # Window size (bp) for sequence entropy around variants.
- entropy_seq_len = 10
- # Minimum Shannon entropy threshold.
- min_shannon_entropy = 1.0
- # Max depth considered "low quality".
- max_depth_low_quality = 20
- # Min depth considered "high quality".
- min_high_quality_depth = 14
- # Minimum number of callers required to keep a variant.
- min_n_callers = 1
- #######################################
- # DeepVariant configuration
- #######################################
- # DeepVariant output directory template.
- # {result_dir}, {id}, {time}
- deepvariant_output_dir = "{result_dir}/{id}/{time}/DeepVariant"
- # Threads for DeepVariant.
- deepvariant_threads = 20
- # DeepVariant singularity image path
- deepvariant_image = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/deepvariant_latest.sif"
- # DeepVariant model type (e.g. ONT).
- deepvariant_model_type = "ONT_R104"
- # Force DeepVariant recomputation.
- deepvariant_force = false
- #######################################
- # DeepSomatic configuration
- #######################################
- # DeepSomatic output directory template.
- # {result_dir}, {id}, {time}
- deepsomatic_output_dir = "{result_dir}/{id}/{time}/DeepSomatic"
- # Threads for DeepSomatic.
- deepsomatic_threads = 20
- # DeepVariant singularity image path
- deepsomatic_image = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/deepsomatic_latest.sif"
- # DeepSomatic model type.
- deepsomatic_model_type = "ONT"
- # Force DeepSomatic recomputation.
- deepsomatic_force = false
- #######################################
- # ClairS configuration
- #######################################
- # Threads for ClairS.
- clairs_threads = 40
- # ClairS docker tag.
- clairs_image = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/clairs_latest.sif"
- # Force ClairS recomputation.
- clairs_force = false
- # Platform preset for ClairS.
- clairs_platform = "ont_r10_dorado_sup_5khz_ssrs"
- # ClairS output directory template.
- # {result_dir}, {id}
- clairs_output_dir = "{result_dir}/{id}/diag/ClairS"
- #######################################
- # Savana configuration
- #######################################
- # Savana binary (name or full path).
- savana_bin = "/home/t_steimle/.conda/envs/savana_env/bin/savana"
- # Threads for Savana.
- savana_threads = 40
- # Savana output directory template.
- # {result_dir}, {id}
- savana_output_dir = "{result_dir}/{id}/diag/savana"
- # Savana copy-number output file.
- # {output_dir}, {id}, {reference_name}, {haplotagged_bam_tag_name}
- savana_copy_number = "{output_dir}/{id}_diag_{reference_name}_{haplotagged_bam_tag_name}_segmented_absolute_copy_number.tsv"
- # Savana raw read counts file.
- savana_read_counts = "{output_dir}/{id}_diag_{reference_name}_{haplotagged_bam_tag_name}_raw_read_counts.tsv"
- # Savana passed VCF.
- savana_passed_vcf = "{output_dir}/{id}_diag_savana_PASSED.vcf.gz"
- # Force Savana recomputation.
- savana_force = false
- # Constitutional phased VCF template.
- # {result_dir}, {id}
- germline_phased_vcf = "{result_dir}/{id}/diag/{id}_variants_constit_phased.vcf.gz"
- #######################################
- # Severus configuration
- #######################################
- # Path to Severus script.
- severus_bin = " /home/t_steimle/somatic_pipe_tools/Severus/severus.py"
- # Force Severus recomputation.
- severus_force = false
- # Threads for Severus.
- severus_threads = 32
- # VNTRs BED for Severus.
- vntrs_bed = "/home/t_steimle/ref/hs1/vntrs_chm13.bed"
- # Path of the Severus panel of normals.
- severus_pon = "/home/t_steimle/ref/hs1/PoN_1000G_chm13.tsv.gz"
- # Paired Severus output directory.
- # {result_dir}, {id}
- severus_output_dir = "{result_dir}/{id}/diag/severus"
- # Solo Severus output directory.
- # {result_dir}, {id}, {time}
- severus_solo_output_dir = "{result_dir}/{id}/{time}/severus"
- #######################################
- # Straglr configuration
- #######################################
- # Path to Straglr executable.
- straglr_bin = "/home/t_steimle/.conda/envs/straglr_env/bin/straglr.py"
- # Path to STR loci BED file for Straglr.
- #
- # RepeatMasker Simple_repeat
- straglr_loci_bed = "/home/t_steimle/ref/hs1/simple_repeat_ucsc_hs1.bed"
- # Minimum allele size difference in bp to report as changed between normal and tumoral
- straglr_min_size_diff = 4
- # Minimum read support required for an allele to be considered for
- # change between normal and tumoral
- straglr_min_support_diff = 2
- # Minimum read support for STR genotyping.
- straglr_min_support = 2
- # Minimum cluster size for STR detection.
- straglr_min_cluster_size = 2
- # Whether to genotype in size mode.
- straglr_genotype_in_size = true
- # Template for paired Straglr output directory.
- #
- # Placeholders: `{result_dir}`, `{id}`.
- straglr_output_dir = "{result_dir}/{id}/diag/straglr"
- # Template for solo Straglr output directory.
- #
- # Placeholders: `{result_dir}`, `{id}`, `{time}`.
- straglr_solo_output_dir = "{result_dir}/{id}/{time}/straglr"
- # Force Straglr recomputation.
- straglr_force = false
- #######################################
- # Marlin
- #######################################
- marlin_bed = "/home/t_steimle/ref/hs1/marlin_v1.probes_t2t.bed"
- #######################################
- # Bcftools configuration
- #######################################
- # Path to longphase binary.
- bcftools_bin = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/bcftools"
- # Threads for longphase.
- bcftools_threads = 30
- #######################################
- # Longphase configuration
- #######################################
- # Path to longphase binary.
- longphase_bin = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/longphase_linux-x64"
- # Threads for longphase.
- longphase_threads = 20
- # Threads for longphase modcall step.
- # limit memory usage here
- longphase_modcall_threads = 6
- # Force longphase recomputation (haplotagging/phasing).
- longphase_force = false
- # Longphase modcall VCF template.
- # {result_dir}, {id}, {time}
- longphase_modcall_vcf = "{result_dir}/{id}/{time}/5mC_5hmC/{id}_{time}_5mC_5hmC_modcall.vcf.gz"
- #######################################
- # Modkit configuration
- #######################################
- # Path to modkit binary.
- modkit_bin = "/mnt/beegfs02/scratch/t_steimle/somatic_pipe_tools/modkit_latest/modkit"
- # Threads for `modkit summary`.
- modkit_summary_threads = 40
- # Modkit summary file template.
- # {result_dir}, {id}, {time}
- modkit_summary_file = "{result_dir}/{id}/{time}/{id}_{time}_5mC_5hmC_summary.txt"
- #######################################
- # Nanomonsv configuration
- #######################################
- # Path to nanomonsv binary.
- nanomonsv_bin = "/home/t_steimle/.conda/envs/nanomonsv_env/bin/nanomonsv"
- # Paired nanomonsv output directory template.
- # {result_dir}, {id}, {time}
- nanomonsv_output_dir = "{result_dir}/{id}/{time}/nanomonsv"
- # Force nanomonsv recomputation.
- nanomonsv_force = false
- # Threads for nanomonsv.
- nanomonsv_threads = 40
- # Paired nanomonsv PASSED VCF template.
- # {output_dir}, {id}
- nanomonsv_passed_vcf = "{output_dir}/{id}_diag_nanomonsv_PASSED.vcf.gz"
- # Solo nanomonsv output directory template.
- # {result_dir}, {id}, {time}
- nanomonsv_solo_output_dir = "{result_dir}/{id}/{time}/nanomonsv-solo"
- # Solo nanomonsv PASSED VCF template.
- # {output_dir}, {id}, {time}
- nanomonsv_solo_passed_vcf = "{output_dir}/{id}_{time}_nanomonsv-solo_PASSED.vcf.gz"
- # Path to simple repeat BED file for nanomonsv.
- # https://github.com/friend1ws/nanomonsv
- # Warning TBI index should exists
- nanomonsv_simple_repeat_bed = "/home/t_steimle/ref/hs1/human_chm13v2.0_simpleRepeat.bed.gz"
- #######################################
- # PromethION metadata
- #######################################
- # Directory containing PromethION run metadata.
- promethion_runs_metadata_dir = "/data/promethion-runs-metadata"
- # JSON file mapping flowcell IDs / runs for Pandora.
- promethion_runs_input = "/data/pandora-flowcell-id.json"
- #######################################
- # Alignment / basecalling (Dorado)
- #######################################
- [align]
- # Path to Dorado binary.
- dorado_bin = "/mnt/beegfs02/scratch/t_steimle/tools/dorado-latest-linux-x64/bin/dorado"
- # Dorado basecalling arguments (device, model, modifications…).
- dorado_basecall_arg = "-x 'cuda:all' sup,5mC_5hmC"
- # Should dorado re-align after demux ?
- dorado_should_realign = false
- # Dorado aligner threads number
- dorado_aligner_threads = 10
- # Reference FASTA used for alignment.
- ref_fa = "/mnt/beegfs02/scratch/t_steimle/ref/hs1/chm13v2.0.fa"
- # Minimap2 index used for alignment.
- ref_mmi = ""
- # Samtools bin
- samtools_bin = "/mnt/beegfs02/scratch/t_steimle/tools/samtools"
- # Threads for `samtools view`.
- samtools_view_threads = 10
- # Threads for `samtools sort`.
- samtools_sort_threads = 20
- # Threads for `samtools merge`.
- samtools_merge_threads = 40
- # Threads for `samtools split`.
- samtools_split_threads = 20
|