|
@@ -106,8 +106,17 @@ impl From<&Bin> for BinCount {
|
|
|
/// A new `BinCount` instance populated with data from the `Bin`.
|
|
/// A new `BinCount` instance populated with data from the `Bin`.
|
|
|
fn from(bin: &Bin) -> Self {
|
|
fn from(bin: &Bin) -> Self {
|
|
|
let n_reads = bin.n_reads();
|
|
let n_reads = bin.n_reads();
|
|
|
- let n_reads_float = n_reads as f64;
|
|
|
|
|
let (n_sa, n_start, n_end) = bin.count_reads_sa_start_end();
|
|
let (n_sa, n_start, n_end) = bin.count_reads_sa_start_end();
|
|
|
|
|
+ let (ratio_sa, ratio_start, ratio_end) = if n_reads == 0 {
|
|
|
|
|
+ (f64::NAN, f64::NAN, f64::NAN)
|
|
|
|
|
+ } else {
|
|
|
|
|
+ let n_reads_float = n_reads as f64;
|
|
|
|
|
+ (
|
|
|
|
|
+ n_sa as f64 / n_reads_float,
|
|
|
|
|
+ n_start as f64 / n_reads_float,
|
|
|
|
|
+ n_end as f64 / n_reads_float,
|
|
|
|
|
+ )
|
|
|
|
|
+ };
|
|
|
|
|
|
|
|
Self {
|
|
Self {
|
|
|
contig: bin.contig.clone(),
|
|
contig: bin.contig.clone(),
|
|
@@ -115,9 +124,9 @@ impl From<&Bin> for BinCount {
|
|
|
end: bin.end,
|
|
end: bin.end,
|
|
|
n_reads: n_reads as u32,
|
|
n_reads: n_reads as u32,
|
|
|
coverage: bin.mean_coverage_from_depths(),
|
|
coverage: bin.mean_coverage_from_depths(),
|
|
|
- ratio_sa: n_sa as f64 / n_reads_float,
|
|
|
|
|
- ratio_start: n_start as f64 / n_reads_float,
|
|
|
|
|
- ratio_end: n_end as f64 / n_reads_float,
|
|
|
|
|
|
|
+ ratio_sa,
|
|
|
|
|
+ ratio_start,
|
|
|
|
|
+ ratio_end,
|
|
|
outlier: None,
|
|
outlier: None,
|
|
|
depths: bin.depths.clone(),
|
|
depths: bin.depths.clone(),
|
|
|
low_qualities: bin.low_qualities.clone(),
|
|
low_qualities: bin.low_qualities.clone(),
|
|
@@ -345,6 +354,10 @@ fn build_bin_defs_grid(
|
|
|
bin_size: u32,
|
|
bin_size: u32,
|
|
|
chunk_n_bin: u32,
|
|
chunk_n_bin: u32,
|
|
|
) -> (Vec<BinDef>, Vec<Option<usize>>) {
|
|
) -> (Vec<BinDef>, Vec<Option<usize>>) {
|
|
|
|
|
+ if bin_size == 0 || chunk_n_bin == 0 {
|
|
|
|
|
+ return (Vec::new(), Vec::new());
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
let n_bin = contig_len / bin_size; // keep your bitwise logic
|
|
let n_bin = contig_len / bin_size; // keep your bitwise logic
|
|
|
let n_chunks = n_bin.div_ceil(chunk_n_bin); // keep your bitwise logic
|
|
let n_chunks = n_bin.div_ceil(chunk_n_bin); // keep your bitwise logic
|
|
|
|
|
|
|
@@ -477,6 +490,9 @@ pub fn scan_contig(
|
|
|
chunk_n_bin: u32,
|
|
chunk_n_bin: u32,
|
|
|
min_mapq: u8,
|
|
min_mapq: u8,
|
|
|
) -> anyhow::Result<Vec<BinCount>> {
|
|
) -> anyhow::Result<Vec<BinCount>> {
|
|
|
|
|
+ anyhow::ensure!(bin_size > 0, "bin_size must be > 0");
|
|
|
|
|
+ anyhow::ensure!(chunk_n_bin > 0, "chunk_n_bin must be > 0");
|
|
|
|
|
+
|
|
|
// Build bin definitions EXACTLY like the old code (bitwise-identical ranges),
|
|
// Build bin definitions EXACTLY like the old code (bitwise-identical ranges),
|
|
|
// plus a safe mapping from grid-bin index -> defs index.
|
|
// plus a safe mapping from grid-bin index -> defs index.
|
|
|
let (defs, grid_to_def) = build_bin_defs_grid(contig_len, bin_size, chunk_n_bin);
|
|
let (defs, grid_to_def) = build_bin_defs_grid(contig_len, bin_size, chunk_n_bin);
|
|
@@ -742,7 +758,7 @@ pub fn par_whole_scan(id: &str, time_point: &str, config: &Config) -> anyhow::Re
|
|
|
.with_context(|| format!("failed to create output dir: {out_parent:?}"))?;
|
|
.with_context(|| format!("failed to create output dir: {out_parent:?}"))?;
|
|
|
|
|
|
|
|
let tmp_path = out_parent.join(format!(
|
|
let tmp_path = out_parent.join(format!(
|
|
|
- ".{}.{}.{}.tmp",
|
|
|
|
|
|
|
+ ".{}.{}.{}.tmp.gz",
|
|
|
contig,
|
|
contig,
|
|
|
std::process::id(),
|
|
std::process::id(),
|
|
|
std::thread::current().name().unwrap_or("rayon")
|
|
std::thread::current().name().unwrap_or("rayon")
|
|
@@ -792,20 +808,13 @@ pub fn par_whole_scan(id: &str, time_point: &str, config: &Config) -> anyhow::Re
|
|
|
/// method to identify outliers and marks them in the `outlier` field of each `BinCount`.
|
|
/// method to identify outliers and marks them in the `outlier` field of each `BinCount`.
|
|
|
///
|
|
///
|
|
|
/// ## Workflow:
|
|
/// ## Workflow:
|
|
|
-/// 1. Wraps the input slice in a `Mutex` for thread-safe access.
|
|
|
|
|
-/// 2. Defines helper functions to extract specific ratios from `BinCount` objects.
|
|
|
|
|
-/// 3. Iterates over three outlier types: SA, Start, and End.
|
|
|
|
|
-/// 4. For each type:
|
|
|
|
|
-/// a. Filters and collects non-NaN ratios along with their indices.
|
|
|
|
|
-/// b. Identifies outliers using the `filter_outliers_modified_z_score_with_indices` function.
|
|
|
|
|
-/// c. Marks identified outliers in the original `BinCount` objects.
|
|
|
|
|
|
|
+/// 1. Materializes `(index, [ratio_sa, ratio_start, ratio_end])` for each bin.
|
|
|
|
|
+/// 2. For each outlier type, filters non-NaN ratios and runs modified Z-score detection.
|
|
|
|
|
+/// 3. Marks outlier labels back into `bin_counts` in place.
|
|
|
///
|
|
///
|
|
|
/// ## Parallelization:
|
|
/// ## Parallelization:
|
|
|
-/// - Uses `par_iter()` for parallel processing of `BinCount` objects.
|
|
|
|
|
-/// - Applies outlier marking in parallel using `par_iter().for_each()`.
|
|
|
|
|
-///
|
|
|
|
|
-/// ## Thread Safety:
|
|
|
|
|
-/// - Uses `Mutex` to ensure thread-safe access to the shared `bin_counts` slice.
|
|
|
|
|
|
|
+/// - Uses Rayon `par_iter()` for ratio extraction/filtering.
|
|
|
|
|
+/// - Final outlier labels are applied sequentially to the mutable slice.
|
|
|
///
|
|
///
|
|
|
/// ## Outlier Types:
|
|
/// ## Outlier Types:
|
|
|
/// - `BinOutlier::SA`: Outliers in supplementary alignment ratio.
|
|
/// - `BinOutlier::SA`: Outliers in supplementary alignment ratio.
|
|
@@ -902,7 +911,7 @@ pub fn somatic_scan(id: &str, config: &Config) -> anyhow::Result<()> {
|
|
|
/// This struct encapsulates:
|
|
/// This struct encapsulates:
|
|
|
/// - Initialization and conditional cleanup of prior outputs
|
|
/// - Initialization and conditional cleanup of prior outputs
|
|
|
/// - Logic for checking whether a re-run is necessary (based on BAM and output timestamps)
|
|
/// - Logic for checking whether a re-run is necessary (based on BAM and output timestamps)
|
|
|
-/// - Coordinated parallel scanning of both normal and tumoral inputs
|
|
|
|
|
|
|
+/// - Coordinated scanning of both normal and tumoral inputs
|
|
|
#[derive(Debug)]
|
|
#[derive(Debug)]
|
|
|
pub struct SomaticScan {
|
|
pub struct SomaticScan {
|
|
|
id: String,
|
|
id: String,
|
|
@@ -934,8 +943,14 @@ impl Initialize for SomaticScan {
|
|
|
|
|
|
|
|
// Force re-run: clean up previous output directories for both normal and tumoral scans
|
|
// Force re-run: clean up previous output directories for both normal and tumoral scans
|
|
|
if somatic_scan.config.somatic_scan_force {
|
|
if somatic_scan.config.somatic_scan_force {
|
|
|
- fs::remove_dir_all(somatic_scan.config.somatic_scan_normal_output_dir(id))?;
|
|
|
|
|
- fs::remove_dir_all(somatic_scan.config.somatic_scan_tumoral_output_dir(id))?;
|
|
|
|
|
|
|
+ let normal_out = somatic_scan.config.somatic_scan_normal_output_dir(id);
|
|
|
|
|
+ let tumoral_out = somatic_scan.config.somatic_scan_tumoral_output_dir(id);
|
|
|
|
|
+ if Path::new(&normal_out).exists() {
|
|
|
|
|
+ fs::remove_dir_all(&normal_out)?;
|
|
|
|
|
+ }
|
|
|
|
|
+ if Path::new(&tumoral_out).exists() {
|
|
|
|
|
+ fs::remove_dir_all(&tumoral_out)?;
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
Ok(somatic_scan)
|
|
Ok(somatic_scan)
|
|
@@ -978,7 +993,7 @@ impl ShouldRun for SomaticScan {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
impl Run for SomaticScan {
|
|
impl Run for SomaticScan {
|
|
|
- /// Executes the full scan pipeline in parallel for first normal then tumoral samples.
|
|
|
|
|
|
|
+ /// Executes the full scan pipeline sequentially: normal then tumoral.
|
|
|
///
|
|
///
|
|
|
/// # Returns
|
|
/// # Returns
|
|
|
/// An error if the underlying scan function (`par_whole_scan`) fails for either sample.
|
|
/// An error if the underlying scan function (`par_whole_scan`) fails for either sample.
|