|
|
@@ -2,6 +2,7 @@ use std::{
|
|
|
collections::HashMap,
|
|
|
fmt,
|
|
|
fs::{self, metadata},
|
|
|
+ os::unix::fs::MetadataExt,
|
|
|
path::{Path, PathBuf},
|
|
|
time::SystemTime,
|
|
|
};
|
|
|
@@ -268,12 +269,16 @@ impl Collections {
|
|
|
// de novo
|
|
|
tasks.extend(self.todo_assembler()?);
|
|
|
|
|
|
- // Tasks sorting and dedup
|
|
|
+ // Tasks sorting
|
|
|
+ tasks.sort_by_key(|task| task.get_order());
|
|
|
+
|
|
|
+ // Tasks dedup
|
|
|
let mut hs = HashMap::new();
|
|
|
tasks.into_iter().for_each(|t| {
|
|
|
hs.insert(t.to_string(), t);
|
|
|
});
|
|
|
self.tasks = hs.into_values().collect();
|
|
|
+
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
@@ -285,7 +290,8 @@ impl Collections {
|
|
|
self.tasks = hs.into_values().collect();
|
|
|
}
|
|
|
|
|
|
- pub fn todo_assembler(&mut self) -> anyhow::Result<Vec<CollectionsTasks>> {
|
|
|
+ // No pair needed
|
|
|
+ pub fn todo_assembler(&self) -> anyhow::Result<Vec<CollectionsTasks>> {
|
|
|
let mut tasks = Vec::new();
|
|
|
let config = AssemblerConfig::default();
|
|
|
for b in &self.bam.bams {
|
|
|
@@ -333,6 +339,64 @@ impl Collections {
|
|
|
Ok(tasks)
|
|
|
}
|
|
|
|
|
|
+ pub fn bam_pairs(&self) -> Vec<(bam::Bam, bam::Bam)> {
|
|
|
+ let mut ids: Vec<String> = self.bam.bams.iter().map(|b| b.id.clone()).collect();
|
|
|
+ ids.sort();
|
|
|
+ ids.dedup();
|
|
|
+
|
|
|
+ ids.iter()
|
|
|
+ .filter_map(|id| {
|
|
|
+ match (
|
|
|
+ self.bam.get(id, "diag").first(),
|
|
|
+ self.bam.get(id, "mrd").first(),
|
|
|
+ ) {
|
|
|
+ (Some(&diag), Some(&mrd)) => Some((diag.clone(), mrd.clone())),
|
|
|
+ _ => None,
|
|
|
+ }
|
|
|
+ })
|
|
|
+ .collect()
|
|
|
+ }
|
|
|
+
|
|
|
+ // UNTESTED
|
|
|
+ pub fn todo_variants_agg(&self) -> anyhow::Result<Vec<CollectionsTasks>> {
|
|
|
+ let mut tasks = Vec::new();
|
|
|
+ let config = VariantsConfig::default();
|
|
|
+ let vcfs_ids = self.vcf.group_by_id();
|
|
|
+ for pair in &self.bam_pairs() {
|
|
|
+ let const_path = format!(
|
|
|
+ "{}/{}/diag/{}_constit.bytes.gz",
|
|
|
+ &config.result_dir, &pair.0.id, &pair.0.id
|
|
|
+ );
|
|
|
+ let constit = Path::new(&const_path);
|
|
|
+
|
|
|
+ if constit.exists() {
|
|
|
+ let vcfs: Vec<_> = vcfs_ids.iter().filter(|(id, _)| id == &pair.0.id).collect();
|
|
|
+ if let Some((_, vcfs)) = vcfs.first() {
|
|
|
+ let mtime = constit
|
|
|
+ .metadata()
|
|
|
+ .context(format!("Can't access file metadata {const_path}."))?
|
|
|
+ .mtime();
|
|
|
+ let n_new = vcfs
|
|
|
+ .iter()
|
|
|
+ .filter(|vcf| mtime < vcf.file_metadata.mtime())
|
|
|
+ .count();
|
|
|
+ if n_new > 0 {
|
|
|
+ tasks.push(CollectionsTasks::Variants {
|
|
|
+ id: pair.0.id.clone(),
|
|
|
+ config: config.clone(),
|
|
|
+ });
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ tasks.push(CollectionsTasks::Variants {
|
|
|
+ id: pair.0.id.clone(),
|
|
|
+ config: config.clone(),
|
|
|
+ });
|
|
|
+ }
|
|
|
+ }
|
|
|
+ Ok(tasks)
|
|
|
+ }
|
|
|
+
|
|
|
pub fn run(&mut self) -> anyhow::Result<()> {
|
|
|
// self.tasks.reverse();
|
|
|
if self.tasks.is_empty() {
|
|
|
@@ -344,8 +408,6 @@ impl Collections {
|
|
|
}
|
|
|
} else {
|
|
|
let n_tasks = self.tasks.len();
|
|
|
- let mut tasks = self.tasks.clone();
|
|
|
- tasks.sort_by_key(|task| task.get_order());
|
|
|
warn!("{n_tasks} tasks to run");
|
|
|
let mut i = 1;
|
|
|
while let Some(task) = self.tasks.pop() {
|