|
|
@@ -2,7 +2,6 @@ use std::{
|
|
|
collections::HashMap,
|
|
|
fmt,
|
|
|
fs::{self, metadata},
|
|
|
- os::unix::fs::MetadataExt,
|
|
|
path::{Path, PathBuf},
|
|
|
thread,
|
|
|
time::SystemTime,
|
|
|
@@ -27,9 +26,8 @@ use crate::{
|
|
|
functions::{
|
|
|
assembler::{Assembler, AssemblerConfig},
|
|
|
variants::{RunVariantsAgg, VariantsConfig},
|
|
|
- whole_scan::{WholeScan, WholeScanConfig},
|
|
|
},
|
|
|
- runners::Run,
|
|
|
+ runners::Run, scan::scan::{par_whole_scan, par_whole_scan_local},
|
|
|
};
|
|
|
|
|
|
pub mod bam;
|
|
|
@@ -138,37 +136,6 @@ impl Collections {
|
|
|
.into_values()
|
|
|
.for_each(|data| tasks.push(CollectionsTasks::DemuxAlign(data)));
|
|
|
|
|
|
- // Whole scan
|
|
|
- for bam in self
|
|
|
- .bam
|
|
|
- .by_id_completed(self.config.min_diag_cov, self.config.min_mrd_cov)
|
|
|
- {
|
|
|
- let config = WholeScanConfig::default();
|
|
|
- let scan_dir = format!(
|
|
|
- "{}/{}/{}/{}",
|
|
|
- &config.result_dir, bam.id, bam.time_point, config.scan_dir
|
|
|
- );
|
|
|
- if PathBuf::from(&scan_dir).exists() {
|
|
|
- let dir_mod: DateTime<Utc> = fs::metadata(&scan_dir)?.modified()?.into();
|
|
|
- if bam.modified > dir_mod {
|
|
|
- fs::remove_dir_all(&scan_dir)?;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if !PathBuf::from(&scan_dir).exists() {
|
|
|
- tasks.push(CollectionsTasks::WholeScan {
|
|
|
- id: bam.id,
|
|
|
- time_point: bam.time_point,
|
|
|
- bam: bam
|
|
|
- .path
|
|
|
- .to_str()
|
|
|
- .context("Cant convert path to string")?
|
|
|
- .to_string(),
|
|
|
- config: WholeScanConfig::default(),
|
|
|
- });
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
// de novo
|
|
|
// tasks.extend(self.todo_assembler()?);
|
|
|
|
|
|
@@ -313,6 +280,38 @@ impl Collections {
|
|
|
self.tasks = hs.into_values().collect();
|
|
|
}
|
|
|
|
|
|
+ pub fn todo_bam_count(&mut self, config: &Config) -> anyhow::Result<()> {
|
|
|
+ // Whole scan
|
|
|
+ for wgs_bam in self
|
|
|
+ .bam
|
|
|
+ .by_id_completed(self.config.min_diag_cov, self.config.min_mrd_cov)
|
|
|
+ {
|
|
|
+ let id = wgs_bam.id.as_str();
|
|
|
+
|
|
|
+ let count_dir = match wgs_bam.time_point.as_str() {
|
|
|
+ "diag" => config.tumoral_dir_count(id),
|
|
|
+ "mrd" => config.normal_dir_count(id),
|
|
|
+ _ => anyhow::bail!("Unknown bam time point {}", wgs_bam.time_point),
|
|
|
+ };
|
|
|
+
|
|
|
+ if PathBuf::from(&count_dir).exists() {
|
|
|
+ let dir_mod: DateTime<Utc> = fs::metadata(&count_dir)?.modified()?.into();
|
|
|
+ if wgs_bam.modified > dir_mod {
|
|
|
+ fs::remove_dir_all(&count_dir)?;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if !PathBuf::from(&count_dir).exists() {
|
|
|
+ self.tasks.push(CollectionsTasks::CountBam {
|
|
|
+ bam_path: wgs_bam.path.to_string_lossy().to_string(),
|
|
|
+ count_dir,
|
|
|
+ config: config.clone(),
|
|
|
+ });
|
|
|
+ }
|
|
|
+ }
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+
|
|
|
// No pair needed
|
|
|
pub fn todo_assembler(&self) -> anyhow::Result<Vec<CollectionsTasks>> {
|
|
|
let mut tasks = Vec::new();
|
|
|
@@ -546,47 +545,47 @@ impl Collections {
|
|
|
/// * `anyhow::Result<Vec<CollectionsTasks>>` - A Result containing a vector of `CollectionsTasks::Variants`
|
|
|
/// if successful, or an error if file metadata cannot be accessed.
|
|
|
///
|
|
|
- pub fn todo_variants_agg(&self) -> anyhow::Result<Vec<CollectionsTasks>> {
|
|
|
- let mut tasks = Vec::new();
|
|
|
- let config = VariantsConfig::default();
|
|
|
- let vcfs_ids = self.vcf.group_by_id();
|
|
|
- for pair in &self.bam_pairs() {
|
|
|
- if self.config.id_black_list.contains(&pair.0.id) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- let const_path = format!(
|
|
|
- "{}/{}/diag/{}_constit.bytes.gz",
|
|
|
- &config.result_dir, &pair.0.id, &pair.0.id
|
|
|
- );
|
|
|
- let constit = Path::new(&const_path);
|
|
|
-
|
|
|
- if constit.exists() {
|
|
|
- let vcfs: Vec<_> = vcfs_ids.iter().filter(|(id, _)| id == &pair.0.id).collect();
|
|
|
- if let Some((_, vcfs)) = vcfs.first() {
|
|
|
- let mtime = constit
|
|
|
- .metadata()
|
|
|
- .context(format!("Can't access file metadata {const_path}."))?
|
|
|
- .mtime();
|
|
|
- let n_new = vcfs
|
|
|
- .iter()
|
|
|
- .filter(|vcf| mtime < vcf.file_metadata.mtime())
|
|
|
- .count();
|
|
|
- if n_new > 0 {
|
|
|
- tasks.push(CollectionsTasks::SomaticVariants {
|
|
|
- id: pair.0.id.clone(),
|
|
|
- config: config.clone(),
|
|
|
- });
|
|
|
- }
|
|
|
- }
|
|
|
- } else {
|
|
|
- tasks.push(CollectionsTasks::SomaticVariants {
|
|
|
- id: pair.0.id.clone(),
|
|
|
- config: config.clone(),
|
|
|
- });
|
|
|
- }
|
|
|
- }
|
|
|
- Ok(tasks)
|
|
|
- }
|
|
|
+ // pub fn todo_variants_agg(&self) -> anyhow::Result<Vec<CollectionsTasks>> {
|
|
|
+ // let mut tasks = Vec::new();
|
|
|
+ // let config = VariantsConfig::default();
|
|
|
+ // let vcfs_ids = self.vcf.group_by_id();
|
|
|
+ // for pair in &self.bam_pairs() {
|
|
|
+ // if self.config.id_black_list.contains(&pair.0.id) {
|
|
|
+ // continue;
|
|
|
+ // }
|
|
|
+ // let const_path = format!(
|
|
|
+ // "{}/{}/diag/{}_constit.bytes.gz",
|
|
|
+ // &config.result_dir, &pair.0.id, &pair.0.id
|
|
|
+ // );
|
|
|
+ // let constit = Path::new(&const_path);
|
|
|
+ //
|
|
|
+ // if constit.exists() {
|
|
|
+ // let vcfs: Vec<_> = vcfs_ids.iter().filter(|(id, _)| id == &pair.0.id).collect();
|
|
|
+ // if let Some((_, vcfs)) = vcfs.first() {
|
|
|
+ // let mtime = constit
|
|
|
+ // .metadata()
|
|
|
+ // .context(format!("Can't access file metadata {const_path}."))?
|
|
|
+ // .mtime();
|
|
|
+ // let n_new = vcfs
|
|
|
+ // .iter()
|
|
|
+ // .filter(|vcf| mtime < vcf.file_metadata.mtime())
|
|
|
+ // .count();
|
|
|
+ // if n_new > 0 {
|
|
|
+ // tasks.push(CollectionsTasks::SomaticVariants {
|
|
|
+ // id: pair.0.id.clone(),
|
|
|
+ // config: config.clone(),
|
|
|
+ // });
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // } else {
|
|
|
+ // tasks.push(CollectionsTasks::SomaticVariants {
|
|
|
+ // id: pair.0.id.clone(),
|
|
|
+ // config: config.clone(),
|
|
|
+ // });
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // Ok(tasks)
|
|
|
+ // }
|
|
|
|
|
|
/// Runs all tasks in the collection.
|
|
|
///
|
|
|
@@ -710,11 +709,10 @@ impl Collections {
|
|
|
pub enum CollectionsTasks {
|
|
|
Align(FlowCellCase),
|
|
|
DemuxAlign(Vec<FlowCellCase>),
|
|
|
- WholeScan {
|
|
|
- id: String,
|
|
|
- time_point: String,
|
|
|
- bam: String,
|
|
|
- config: WholeScanConfig,
|
|
|
+ CountBam {
|
|
|
+ bam_path: String,
|
|
|
+ count_dir: String,
|
|
|
+ config: Config,
|
|
|
},
|
|
|
Assemble {
|
|
|
id: String,
|
|
|
@@ -767,12 +765,11 @@ impl CollectionsTasks {
|
|
|
CollectionsTasks::NanomonSV { id, .. } => {
|
|
|
NanomonSV::initialize(&id, Config::default())?.run()
|
|
|
}
|
|
|
- CollectionsTasks::WholeScan {
|
|
|
- id,
|
|
|
- time_point,
|
|
|
- bam,
|
|
|
+ CollectionsTasks::CountBam {
|
|
|
+ bam_path,
|
|
|
+ count_dir,
|
|
|
config,
|
|
|
- } => WholeScan::new(id, time_point, bam, config)?.run(),
|
|
|
+ } => par_whole_scan(&count_dir, &bam_path, &config),
|
|
|
CollectionsTasks::SomaticVariants { id, config } => {
|
|
|
RunVariantsAgg::new(id, config).run()
|
|
|
}
|
|
|
@@ -791,7 +788,7 @@ impl CollectionsTasks {
|
|
|
CollectionsTasks::DemuxAlign(_) => 1,
|
|
|
CollectionsTasks::ModPileup { .. } => 2,
|
|
|
CollectionsTasks::DMRCDiagMrd { .. } => 3,
|
|
|
- CollectionsTasks::WholeScan { .. } => 4,
|
|
|
+ CollectionsTasks::CountBam { .. } => 4,
|
|
|
CollectionsTasks::Assemble { .. } => 5,
|
|
|
CollectionsTasks::DeepVariant { .. } => 6,
|
|
|
CollectionsTasks::ClairS { .. } => 7,
|
|
|
@@ -851,12 +848,11 @@ impl fmt::Display for CollectionsTasks {
|
|
|
NanomonSV { id } => {
|
|
|
write!(f, "NanomonSV calling task for {id}")
|
|
|
}
|
|
|
- WholeScan {
|
|
|
- id,
|
|
|
- bam,
|
|
|
- time_point,
|
|
|
+ CountBam {
|
|
|
+ bam_path,
|
|
|
+ count_dir,
|
|
|
..
|
|
|
- } => write!(f, "Whole scan for {} {}, bam: {}", id, time_point, bam),
|
|
|
+ } => write!(f, "Whole bam count for bam: {bam_path} into {count_dir}"),
|
|
|
SomaticVariants { id, .. } => write!(f, "Variants aggregation for {}", id),
|
|
|
Assemble { id, time_point, .. } => {
|
|
|
write!(f, "De novo assemblage for {} {}", id, time_point)
|