|
|
@@ -8,16 +8,16 @@ use rayon::{
|
|
|
iter::{IntoParallelIterator, ParallelIterator},
|
|
|
slice::ParallelSliceMut,
|
|
|
};
|
|
|
-use rust_htslib::bam::IndexedReader;
|
|
|
+use rust_htslib::bam::{self, IndexedReader, Read};
|
|
|
|
|
|
-use crate::helpers::is_file_older;
|
|
|
+use crate::helpers::{bam_contigs, get_genome_sizes, is_file_older};
|
|
|
use crate::io::writers::get_gz_writer;
|
|
|
use crate::math::filter_outliers_modified_z_score_with_indices;
|
|
|
|
|
|
use crate::pipes::{Initialize, ShouldRun};
|
|
|
use crate::runners::Run;
|
|
|
use crate::variant::vcf_variant::Label;
|
|
|
-use crate::{config::Config, io::dict::read_dict, scan::bin::Bin};
|
|
|
+use crate::{config::Config, scan::bin::Bin};
|
|
|
|
|
|
/// Represents a count of reads in a genomic bin, including various metrics and outlier information.
|
|
|
#[derive(Debug)]
|
|
|
@@ -287,15 +287,27 @@ impl fmt::Display for BinOutlier {
|
|
|
/// - A `Bin` object cannot be created for a specific region.
|
|
|
/// - Any I/O operation (e.g., writing results) fails.
|
|
|
pub fn par_whole_scan(id: &str, time_point: &str, config: &Config) -> anyhow::Result<()> {
|
|
|
- let bin_size = config.count_bin_size;
|
|
|
- let chunk_n_bin = config.count_n_chunks;
|
|
|
+ let bin_size = config.count_bin_size ;
|
|
|
+ let chunk_n_bin = config.count_n_chunks ;
|
|
|
let bam_path = &config.solo_bam(id, time_point);
|
|
|
let out_dir = config.somatic_scan_solo_output_dir(id, time_point);
|
|
|
|
|
|
info!("Starting whole genome scan for {bam_path}, with bin size of {bin_size} nt and by chunks of {chunk_n_bin} bins.");
|
|
|
fs::create_dir_all(&out_dir)?;
|
|
|
|
|
|
- for (contig, length) in read_dict(&config.dict_file)? {
|
|
|
+ let reader = bam::Reader::from_path(bam_path)
|
|
|
+ .with_context(|| format!("Failed to open BAM: {bam_path}"))?;
|
|
|
+ let header = bam::Header::from_template(reader.header());
|
|
|
+ let contigs: Vec<(String, u32)> = get_genome_sizes(&header)?
|
|
|
+ .into_iter()
|
|
|
+ .map(|(ctg, len)| {
|
|
|
+ u32::try_from(len)
|
|
|
+ .map(|l| (ctg.clone(), l))
|
|
|
+ .with_context(|| format!("Contig {ctg} length {len} exceeds u32::MAX"))
|
|
|
+ })
|
|
|
+ .collect::<anyhow::Result<_>>()?;
|
|
|
+
|
|
|
+ for (contig, length) in contigs {
|
|
|
let out_file = config.somatic_scan_solo_count_file(id, time_point, &contig);
|
|
|
// let out_file = format!("{out_dir}/{contig}_count.tsv.gz");
|
|
|
|
|
|
@@ -544,33 +556,34 @@ impl ShouldRun for SomaticScan {
|
|
|
/// Determines whether SomaticScan should re-run by checking whether
|
|
|
/// any of the count output files are outdated or missing relative to the BAMs.
|
|
|
fn should_run(&self) -> bool {
|
|
|
- let mrd_bam_path = &self.config.normal_bam(&self.id);
|
|
|
- let diag_bam_path = &self.config.tumoral_bam(&self.id);
|
|
|
-
|
|
|
- match read_dict(&self.config.dict_file) {
|
|
|
- Ok(dict) => {
|
|
|
- for (contig, _) in dict {
|
|
|
- let diag_count_file = self
|
|
|
- .config
|
|
|
- .somatic_scan_tumoral_count_file(&self.id, &contig);
|
|
|
- if is_file_older(&diag_count_file, diag_bam_path, true).unwrap_or(true) {
|
|
|
- return true;
|
|
|
- }
|
|
|
- let mrd_count_file = self
|
|
|
- .config
|
|
|
- .somatic_scan_normal_count_file(&self.id, &contig);
|
|
|
- if is_file_older(&mrd_count_file, mrd_bam_path, true).unwrap_or(true) {
|
|
|
- return true;
|
|
|
- }
|
|
|
- }
|
|
|
- false
|
|
|
- }
|
|
|
+ let mrd_bam_path = self.config.normal_bam(&self.id);
|
|
|
+ let diag_bam_path = self.config.tumoral_bam(&self.id);
|
|
|
+
|
|
|
+ let contigs = match bam_contigs(&diag_bam_path) {
|
|
|
+ Ok(c) => c,
|
|
|
Err(e) => {
|
|
|
- error!("Failed to read dict file: {}\n{e}", self.config.dict_file);
|
|
|
- // Don't run if dict is unreadable
|
|
|
- false
|
|
|
+ error!("Failed to read BAM header: {e}");
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ for contig in contigs {
|
|
|
+ let diag_count_file = self
|
|
|
+ .config
|
|
|
+ .somatic_scan_tumoral_count_file(&self.id, &contig);
|
|
|
+ if is_file_older(&diag_count_file, &diag_bam_path, true).unwrap_or(true) {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+
|
|
|
+ let mrd_count_file = self
|
|
|
+ .config
|
|
|
+ .somatic_scan_normal_count_file(&self.id, &contig);
|
|
|
+ if is_file_older(&mrd_count_file, &mrd_bam_path, true).unwrap_or(true) {
|
|
|
+ return true;
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ false
|
|
|
}
|
|
|
}
|
|
|
|