|
|
@@ -1,12 +1,15 @@
|
|
|
use std::{
|
|
|
collections::HashMap,
|
|
|
- fmt, fs,
|
|
|
+ fmt,
|
|
|
+ fs::{self, metadata},
|
|
|
path::{Path, PathBuf},
|
|
|
+ time::SystemTime,
|
|
|
};
|
|
|
|
|
|
use anyhow::Context;
|
|
|
+use chrono::{DateTime, Utc};
|
|
|
+use glob::glob;
|
|
|
use log::{info, warn};
|
|
|
-use pandora_lib_scan::par_whole_scan;
|
|
|
|
|
|
use self::{bam::BamCollection, pod5::Pod5Collection, vcf::VcfCollection};
|
|
|
use crate::{
|
|
|
@@ -18,7 +21,11 @@ use crate::{
|
|
|
collection::pod5::FlowCellCase,
|
|
|
commands::dorado::Dorado as BasecallAlign,
|
|
|
config::Config,
|
|
|
- functions::whole_scan::{WholeScan, WholeScanConfig},
|
|
|
+ functions::{
|
|
|
+ assembler::{Assembler, AssemblerConfig},
|
|
|
+ variants::{Variants, VariantsConfig},
|
|
|
+ whole_scan::{WholeScan, WholeScanConfig},
|
|
|
+ },
|
|
|
};
|
|
|
|
|
|
pub mod bam;
|
|
|
@@ -131,8 +138,8 @@ impl Collections {
|
|
|
&config.result_dir, bam.id, bam.time_point, config.scan_dir
|
|
|
);
|
|
|
if PathBuf::from(&scan_dir).exists() {
|
|
|
- let dir_mod = fs::metadata(&scan_dir)?.modified()?;
|
|
|
- if bam.file_metadata.modified()? > dir_mod {
|
|
|
+ let dir_mod: DateTime<Utc> = fs::metadata(&scan_dir)?.modified()?.into();
|
|
|
+ if bam.modified > dir_mod {
|
|
|
fs::remove_dir_all(&scan_dir)?;
|
|
|
}
|
|
|
}
|
|
|
@@ -148,11 +155,6 @@ impl Collections {
|
|
|
.to_string(),
|
|
|
config: WholeScanConfig::default(),
|
|
|
});
|
|
|
- // par_whole_scan(
|
|
|
- // "/data/ref/hs1/chm13v2.0.dict",
|
|
|
- // bam.path.to_str().context("Cant convert path to string")?,
|
|
|
- // &scan_dir,
|
|
|
- // )?;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
@@ -163,21 +165,16 @@ impl Collections {
|
|
|
self.bam.get(id, "diag").first(),
|
|
|
self.bam.get(id, "mrd").first(),
|
|
|
) {
|
|
|
- let diag_modified = diag
|
|
|
- .file_metadata
|
|
|
- .modified()
|
|
|
- .expect("Can't read Bam modified time.");
|
|
|
- let mrd_modified = mrd
|
|
|
- .file_metadata
|
|
|
- .modified()
|
|
|
- .expect("Can't read Bam modified time.");
|
|
|
+ let diag_modified = diag.modified;
|
|
|
+ let mrd_modified = mrd.modified;
|
|
|
let mut rm_paths: Vec<&Path> = vcfs
|
|
|
.iter()
|
|
|
.flat_map(|vcf| {
|
|
|
- let vcf_mod = vcf
|
|
|
+ let vcf_mod: DateTime<Utc> = vcf
|
|
|
.file_metadata
|
|
|
.modified()
|
|
|
- .expect("Can't read VCF modified time.");
|
|
|
+ .expect("Can't read VCF modified time.")
|
|
|
+ .into();
|
|
|
|
|
|
// For somatic caller erase if one bam (diag or mrd) is more recent.
|
|
|
if vcf.caller != "DeepVariant" {
|
|
|
@@ -263,6 +260,13 @@ impl Collections {
|
|
|
});
|
|
|
|
|
|
// Variants aggregation
|
|
|
+ // info!("Looking for variants aggregation tasks...");
|
|
|
+ // self.bam.bams.iter().filter(|b| b.time_point == "diag" ).for_each(|bam| {
|
|
|
+ // let id = bam.id;
|
|
|
+ // });
|
|
|
+
|
|
|
+ // de novo
|
|
|
+ tasks.extend(self.todo_assembler()?);
|
|
|
|
|
|
// Tasks sorting and dedup
|
|
|
let mut hs = HashMap::new();
|
|
|
@@ -273,6 +277,62 @@ impl Collections {
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
+ pub fn tasks_dedup(&mut self) {
|
|
|
+ let mut hs = HashMap::new();
|
|
|
+ self.tasks.clone().into_iter().for_each(|t| {
|
|
|
+ hs.insert(t.to_string(), t);
|
|
|
+ });
|
|
|
+ self.tasks = hs.into_values().collect();
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn todo_assembler(&mut self) -> anyhow::Result<Vec<CollectionsTasks>> {
|
|
|
+ let mut tasks = Vec::new();
|
|
|
+ let config = AssemblerConfig::default();
|
|
|
+ for b in &self.bam.bams {
|
|
|
+ let assemblies_dir = format!(
|
|
|
+ "{}/{}/{}/{}",
|
|
|
+ config.result_dir, b.id, b.time_point, config.output_dir_name
|
|
|
+ );
|
|
|
+
|
|
|
+ if !Path::new(&assemblies_dir).exists() {
|
|
|
+ tasks.push(CollectionsTasks::Assemble {
|
|
|
+ id: b.id.clone(),
|
|
|
+ time_point: b.time_point.clone(),
|
|
|
+ config: config.clone(),
|
|
|
+ });
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ let pattern = format!("{assemblies_dir}/*/*.bam");
|
|
|
+ let mut mtimes: Vec<SystemTime> = glob(&pattern)?
|
|
|
+ .filter_map(|entry| entry.ok())
|
|
|
+ .filter_map(|path| metadata(path).ok()?.modified().ok())
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ if mtimes.is_empty() {
|
|
|
+ tasks.push(CollectionsTasks::Assemble {
|
|
|
+ id: b.id.clone(),
|
|
|
+ time_point: b.time_point.clone(),
|
|
|
+ config: config.clone(),
|
|
|
+ });
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ mtimes.sort_unstable();
|
|
|
+ mtimes.dedup();
|
|
|
+ let max_mtime: DateTime<Utc> =
|
|
|
+ mtimes.last().context("No modified time")?.to_owned().into();
|
|
|
+ if b.modified > max_mtime {
|
|
|
+ tasks.push(CollectionsTasks::Assemble {
|
|
|
+ id: b.id.clone(),
|
|
|
+ time_point: b.time_point.clone(),
|
|
|
+ config: config.clone(),
|
|
|
+ });
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ Ok(tasks)
|
|
|
+ }
|
|
|
+
|
|
|
pub fn run(&mut self) -> anyhow::Result<()> {
|
|
|
// self.tasks.reverse();
|
|
|
if self.tasks.is_empty() {
|
|
|
@@ -297,7 +357,7 @@ impl Collections {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-#[derive(Debug)]
|
|
|
+#[derive(Debug, Clone)]
|
|
|
pub enum CollectionsTasks {
|
|
|
Align(FlowCellCase),
|
|
|
DemuxAlign(Vec<FlowCellCase>),
|
|
|
@@ -325,6 +385,15 @@ pub enum CollectionsTasks {
|
|
|
bam: String,
|
|
|
config: WholeScanConfig,
|
|
|
},
|
|
|
+ Variants {
|
|
|
+ id: String,
|
|
|
+ config: VariantsConfig,
|
|
|
+ },
|
|
|
+ Assemble {
|
|
|
+ id: String,
|
|
|
+ time_point: String,
|
|
|
+ config: AssemblerConfig,
|
|
|
+ },
|
|
|
}
|
|
|
|
|
|
impl CollectionsTasks {
|
|
|
@@ -368,6 +437,16 @@ impl CollectionsTasks {
|
|
|
} => {
|
|
|
WholeScan::new(id, time_point, bam, config)?.run()?;
|
|
|
}
|
|
|
+ CollectionsTasks::Variants { id, config } => {
|
|
|
+ Variants::new(id, config).run()?;
|
|
|
+ }
|
|
|
+ CollectionsTasks::Assemble {
|
|
|
+ id,
|
|
|
+ time_point,
|
|
|
+ config,
|
|
|
+ } => {
|
|
|
+ Assembler::new(id, time_point, config).run()?;
|
|
|
+ }
|
|
|
}
|
|
|
Ok(())
|
|
|
}
|
|
|
@@ -418,6 +497,10 @@ impl fmt::Display for CollectionsTasks {
|
|
|
)
|
|
|
}
|
|
|
WholeScan { id, bam, .. } => write!(f, "Whole scan for id: {}, bam: {}", id, bam),
|
|
|
+ Variants { id, .. } => write!(f, "Variants aggregation for id: {}", id),
|
|
|
+ Assemble { id, time_point, .. } => {
|
|
|
+ write!(f, "Assembly for id: {}, time point: {}", id, time_point)
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
}
|