|
|
@@ -2,6 +2,7 @@ use anyhow::Context;
|
|
|
use log::{debug, info};
|
|
|
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
|
|
|
use regex::Regex;
|
|
|
+use rust_htslib::bam::{self, Read};
|
|
|
use std::{
|
|
|
fmt, fs,
|
|
|
path::{Path, PathBuf},
|
|
|
@@ -15,10 +16,13 @@ use crate::{
|
|
|
vcf::Vcf,
|
|
|
},
|
|
|
commands::{
|
|
|
- SlurmRunner, bcftools::{BcftoolsConcat, BcftoolsKeepPass}, run_many_sbatch
|
|
|
+ bcftools::{BcftoolsConcat, BcftoolsKeepPass},
|
|
|
+ run_many_sbatch, SlurmRunner,
|
|
|
},
|
|
|
config::Config,
|
|
|
- helpers::{is_file_older, remove_dir_if_exists},
|
|
|
+ helpers::{
|
|
|
+ get_genome_sizes, is_file_older, remove_dir_if_exists, split_genome_into_n_regions_exact,
|
|
|
+ },
|
|
|
io::vcf::read_vcf,
|
|
|
pipes::{InitializeSolo, ShouldRun, Version},
|
|
|
runners::Run,
|
|
|
@@ -236,14 +240,15 @@ impl JobCommand for DeepVariant {
|
|
|
|
|
|
format!(
|
|
|
"{singularity_bin} exec --nv \
|
|
|
- --bind /data:/data \
|
|
|
+ --bind /mnt:/mnt \
|
|
|
+ --bind /home/t_steimle:/home/t_steimle \
|
|
|
--bind {output_dir}:/output \
|
|
|
{image} \
|
|
|
/opt/deepvariant/bin/run_deepvariant \
|
|
|
--model_type={model_type} \
|
|
|
--ref={reference} \
|
|
|
--reads={bam} \
|
|
|
- --regions='{regions}' \
|
|
|
+ --regions=\"{regions}\" \
|
|
|
{haploid_flag} \
|
|
|
--par_regions_bed={par_bed} \
|
|
|
--output_vcf={output_vcf} \
|
|
|
@@ -690,40 +695,6 @@ fn parse_deepvariant_version(output: &str) -> anyhow::Result<String> {
|
|
|
.to_string())
|
|
|
}
|
|
|
|
|
|
-/// Splits a slice of regions into `n` approximately equal chunks.
|
|
|
-///
|
|
|
-/// Each chunk is joined with commas into a single string suitable for
|
|
|
-/// DeepVariant's `--regions` argument.
|
|
|
-///
|
|
|
-/// # Arguments
|
|
|
-///
|
|
|
-/// * `all_regions` - Slice of region strings (e.g., `["chr1", "chr2", ...]`)
|
|
|
-/// * `n` - Target number of chunks
|
|
|
-///
|
|
|
-/// # Returns
|
|
|
-///
|
|
|
-/// A vector of comma-separated region strings. Length may be less than `n`
|
|
|
-/// if there are fewer regions than requested chunks.
|
|
|
-///
|
|
|
-/// # Examples
|
|
|
-///
|
|
|
-/// ```
|
|
|
-/// let regions = ["chr1", "chr2", "chr3", "chr4", "chr5"];
|
|
|
-/// let chunks = split_regions_into_n(®ions, 2);
|
|
|
-/// assert_eq!(chunks, vec!["chr1,chr2,chr3", "chr4,chr5"]);
|
|
|
-/// ```
|
|
|
-fn split_regions_into_n(all_regions: &[&str], n: usize) -> Vec<String> {
|
|
|
- let len = all_regions.len();
|
|
|
- if n == 0 || len == 0 {
|
|
|
- return Vec::new();
|
|
|
- }
|
|
|
- let chunk_size = len.div_ceil(n); // ceil
|
|
|
- all_regions
|
|
|
- .chunks(chunk_size)
|
|
|
- .map(|chunk| chunk.join(","))
|
|
|
- .collect()
|
|
|
-}
|
|
|
-
|
|
|
/// Run DeepVariant in 4 sbatch jobs (by regions), then merge their PASS VCFs
|
|
|
/// into the final deepvariant_solo_passed_vcf().
|
|
|
pub fn run_deepvariant_quartered_sbatch_with_merge(
|
|
|
@@ -731,6 +702,7 @@ pub fn run_deepvariant_quartered_sbatch_with_merge(
|
|
|
time_point: &str,
|
|
|
config: &Config,
|
|
|
) -> anyhow::Result<Vec<CapturedOutput>> {
|
|
|
+ let n_parts = 4;
|
|
|
let base = DeepVariant::initialize(id, time_point, config)?;
|
|
|
|
|
|
// if already up-to-date, nothing to do
|
|
|
@@ -739,9 +711,20 @@ pub fn run_deepvariant_quartered_sbatch_with_merge(
|
|
|
return Ok(Vec::new());
|
|
|
}
|
|
|
|
|
|
+ let bam_path = config.solo_bam(id, time_point);
|
|
|
+
|
|
|
// 1) split regions into 4 chunks
|
|
|
- let all_regions: Vec<&str> = base.regions.split(',').collect();
|
|
|
- let region_chunks = split_regions_into_n(&all_regions, 4);
|
|
|
+ let reader = bam::Reader::from_path(&bam_path)
|
|
|
+ .with_context(|| format!("Failed to open BAM: {bam_path}"))?;
|
|
|
+ let header = bam::Header::from_template(reader.header());
|
|
|
+ let genome_sizes = get_genome_sizes(&header)?;
|
|
|
+
|
|
|
+ // Split genome into regions
|
|
|
+ let region_chunks = split_genome_into_n_regions_exact(&genome_sizes, n_parts)
|
|
|
+ .into_iter()
|
|
|
+ .map(|v| v.join(" "))
|
|
|
+ .collect::<Vec<String>>();
|
|
|
+
|
|
|
let n_parts = region_chunks.len();
|
|
|
|
|
|
// Build parallel jobs
|