|
@@ -1,4 +1,7 @@
|
|
|
-use std::path::PathBuf;
|
|
|
|
|
|
|
+use std::{
|
|
|
|
|
+ fs,
|
|
|
|
|
+ path::{Path, PathBuf},
|
|
|
|
|
+};
|
|
|
|
|
|
|
|
use hashbrown::HashMap;
|
|
use hashbrown::HashMap;
|
|
|
use log::{info, warn};
|
|
use log::{info, warn};
|
|
@@ -17,9 +20,29 @@ use crate::{
|
|
|
|
|
|
|
|
pub mod bam;
|
|
pub mod bam;
|
|
|
pub mod pod5;
|
|
pub mod pod5;
|
|
|
-pub mod vcf;
|
|
|
|
|
pub mod somatic_variants;
|
|
pub mod somatic_variants;
|
|
|
|
|
+pub mod vcf;
|
|
|
|
|
+
|
|
|
|
|
+#[derive(Debug)]
|
|
|
|
|
+pub struct CollectionsConfig {
|
|
|
|
|
+ pub pod_dir: String,
|
|
|
|
|
+ pub corrected_fc_path: String,
|
|
|
|
|
+ pub result_dir: String,
|
|
|
|
|
+ pub min_diag_cov: f32,
|
|
|
|
|
+ pub min_mrd_cov: f32,
|
|
|
|
|
+}
|
|
|
|
|
|
|
|
|
|
+impl Default for CollectionsConfig {
|
|
|
|
|
+ fn default() -> Self {
|
|
|
|
|
+ Self {
|
|
|
|
|
+ pod_dir: "/data/run_data".to_string(),
|
|
|
|
|
+ corrected_fc_path: "/data/flow_cells.tsv".to_string(),
|
|
|
|
|
+ result_dir: "/data/longreads_basic_pipe".to_string(),
|
|
|
|
|
+ min_diag_cov: 15.0,
|
|
|
|
|
+ min_mrd_cov: 10.0,
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
#[derive(Debug)]
|
|
#[derive(Debug)]
|
|
|
pub struct Collections {
|
|
pub struct Collections {
|
|
|
pub pod5: Pod5Collection,
|
|
pub pod5: Pod5Collection,
|
|
@@ -42,7 +65,7 @@ impl Collections {
|
|
|
})
|
|
})
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- pub fn todo(&mut self) {
|
|
|
|
|
|
|
+ pub fn todo(&mut self, min_diag_cov: f32, min_mrd_cov: f32) {
|
|
|
info!("Looking for base calling tasks...");
|
|
info!("Looking for base calling tasks...");
|
|
|
let mut to_demux = Vec::new();
|
|
let mut to_demux = Vec::new();
|
|
|
|
|
|
|
@@ -83,101 +106,118 @@ impl Collections {
|
|
|
.into_values()
|
|
.into_values()
|
|
|
.for_each(|data| self.tasks.push(CollectionsTasks::DemuxAlign(data)));
|
|
.for_each(|data| self.tasks.push(CollectionsTasks::DemuxAlign(data)));
|
|
|
|
|
|
|
|
|
|
+ // Remove VCF anterior to BAM
|
|
|
|
|
+ let vcf_by_id = self.vcf.group_by_id();
|
|
|
|
|
+ vcf_by_id.iter().for_each(|(id, vcfs)| {
|
|
|
|
|
+ if let (Some(diag), Some(mrd)) = (
|
|
|
|
|
+ self.bam.get(id, "diag").first(),
|
|
|
|
|
+ self.bam.get(id, "mrd").first(),
|
|
|
|
|
+ ) {
|
|
|
|
|
+ let diag_modified = diag
|
|
|
|
|
+ .file_metadata
|
|
|
|
|
+ .modified()
|
|
|
|
|
+ .expect("Can't read Bam modified time.");
|
|
|
|
|
+ let mrd_modified = mrd
|
|
|
|
|
+ .file_metadata
|
|
|
|
|
+ .modified()
|
|
|
|
|
+ .expect("Can't read Bam modified time.");
|
|
|
|
|
+ let mut rm_paths: Vec<&Path> = vcfs
|
|
|
|
|
+ .iter()
|
|
|
|
|
+ .flat_map(|vcf| {
|
|
|
|
|
+ let vcf_mod = vcf
|
|
|
|
|
+ .file_metadata
|
|
|
|
|
+ .modified()
|
|
|
|
|
+ .expect("Can't read VCF modified time.");
|
|
|
|
|
+
|
|
|
|
|
+ // For somatic caller erase if one bam (diag or mrd) is more recent.
|
|
|
|
|
+ if vcf.caller != "DeepVariant" {
|
|
|
|
|
+ if vcf_mod < diag_modified || vcf_mod < mrd_modified {
|
|
|
|
|
+ vec![vcf.path.parent().unwrap()]
|
|
|
|
|
+ } else {
|
|
|
|
|
+ vec![]
|
|
|
|
|
+ }
|
|
|
|
|
+ } else if (vcf.time_point == "diag" && vcf_mod < diag_modified)
|
|
|
|
|
+ || (vcf.time_point == "mrd" && vcf_mod < mrd_modified)
|
|
|
|
|
+ {
|
|
|
|
|
+ vec![vcf.path.parent().unwrap()]
|
|
|
|
|
+ } else {
|
|
|
|
|
+ vec![]
|
|
|
|
|
+ }
|
|
|
|
|
+ })
|
|
|
|
|
+ .collect();
|
|
|
|
|
+ rm_paths.sort();
|
|
|
|
|
+ rm_paths.dedup();
|
|
|
|
|
+ println!("{rm_paths:#?}");
|
|
|
|
|
+ rm_paths
|
|
|
|
|
+ .iter()
|
|
|
|
|
+ .for_each(|p| fs::remove_dir_all(p).expect("Can't erase caller dir."))
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
// Variant calling
|
|
// Variant calling
|
|
|
info!("Looking for variant calling tasks...");
|
|
info!("Looking for variant calling tasks...");
|
|
|
- let mut looked_ids = Vec::new();
|
|
|
|
|
- for (id, vcfs) in self.vcf.group_by_id() {
|
|
|
|
|
|
|
+ self.bam.bams.iter().map(|b| b.id.clone()).for_each(|id| {
|
|
|
if let (Some(diag), Some(mrd)) = (
|
|
if let (Some(diag), Some(mrd)) = (
|
|
|
self.bam.get(&id, "diag").first(),
|
|
self.bam.get(&id, "diag").first(),
|
|
|
self.bam.get(&id, "mrd").first(),
|
|
self.bam.get(&id, "mrd").first(),
|
|
|
) {
|
|
) {
|
|
|
- let caller_time: Vec<(&str, &str)> = vcfs
|
|
|
|
|
- .iter()
|
|
|
|
|
- .map(|vcf| (vcf.caller.as_str(), vcf.time_point.as_str()))
|
|
|
|
|
- .collect();
|
|
|
|
|
|
|
+ if let (Some(diag_cramino), Some(mrd_cramino)) = (&diag.cramino, &mrd.cramino) {
|
|
|
|
|
+ if diag_cramino.mean_coverage >= min_diag_cov.into()
|
|
|
|
|
+ && mrd_cramino.mean_coverage >= min_mrd_cov.into()
|
|
|
|
|
+ {
|
|
|
|
|
+ let caller_time: Vec<(&str, &str)> = vcf_by_id
|
|
|
|
|
+ .iter()
|
|
|
|
|
+ .filter(|(i, _)| *i == id)
|
|
|
|
|
+ .flat_map(|(_, vcfs)| {
|
|
|
|
|
+ vcfs.iter()
|
|
|
|
|
+ .map(|vcf| (vcf.caller.as_str(), vcf.time_point.as_str()))
|
|
|
|
|
+ })
|
|
|
|
|
+ .collect();
|
|
|
|
|
|
|
|
- if !caller_time.contains(&("clairs", "diag"))
|
|
|
|
|
- || !caller_time.contains(&("clairs_indel", "diag"))
|
|
|
|
|
- {
|
|
|
|
|
- self.tasks.push(CollectionsTasks::ClairS {
|
|
|
|
|
- id: id.to_string(),
|
|
|
|
|
- diag_bam: diag.path.to_str().unwrap().to_string(),
|
|
|
|
|
- mrd_bam: mrd.path.to_str().unwrap().to_string(),
|
|
|
|
|
- config: ClairSConfig::default(),
|
|
|
|
|
- });
|
|
|
|
|
- }
|
|
|
|
|
- if !caller_time.contains(&("DeepVariant", "diag")) {
|
|
|
|
|
- self.tasks.push(CollectionsTasks::DeepVariant {
|
|
|
|
|
- id: id.to_string(),
|
|
|
|
|
- time_point: "diag".to_string(),
|
|
|
|
|
- bam: diag.path.to_str().unwrap().to_string(),
|
|
|
|
|
- config: DeepVariantConfig::default(),
|
|
|
|
|
- });
|
|
|
|
|
- }
|
|
|
|
|
- if !caller_time.contains(&("DeepVariant", "mrd")) {
|
|
|
|
|
- self.tasks.push(CollectionsTasks::DeepVariant {
|
|
|
|
|
- id: id.to_string(),
|
|
|
|
|
- time_point: "mrd".to_string(),
|
|
|
|
|
- bam: mrd.path.to_str().unwrap().to_string(),
|
|
|
|
|
- config: DeepVariantConfig::default(),
|
|
|
|
|
- });
|
|
|
|
|
- }
|
|
|
|
|
- if !caller_time.contains(&("nanomonsv", "diag")) {
|
|
|
|
|
- self.tasks.push(CollectionsTasks::NanomonSV {
|
|
|
|
|
- id: id.to_string(),
|
|
|
|
|
- diag_bam: diag.path.to_str().unwrap().to_string(),
|
|
|
|
|
- mrd_bam: mrd.path.to_str().unwrap().to_string(),
|
|
|
|
|
- config: NanomonSVConfig::default(),
|
|
|
|
|
- });
|
|
|
|
|
|
|
+ if !caller_time.contains(&("clairs", "diag"))
|
|
|
|
|
+ || !caller_time.contains(&("clairs_indel", "diag"))
|
|
|
|
|
+ {
|
|
|
|
|
+ self.tasks.push(CollectionsTasks::ClairS {
|
|
|
|
|
+ id: id.to_string(),
|
|
|
|
|
+ diag_bam: diag.path.to_str().unwrap().to_string(),
|
|
|
|
|
+ mrd_bam: mrd.path.to_str().unwrap().to_string(),
|
|
|
|
|
+ config: ClairSConfig::default(),
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+ if !caller_time.contains(&("DeepVariant", "diag")) {
|
|
|
|
|
+ self.tasks.push(CollectionsTasks::DeepVariant {
|
|
|
|
|
+ id: id.to_string(),
|
|
|
|
|
+ time_point: "diag".to_string(),
|
|
|
|
|
+ bam: diag.path.to_str().unwrap().to_string(),
|
|
|
|
|
+ config: DeepVariantConfig::default(),
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+ if !caller_time.contains(&("DeepVariant", "mrd")) {
|
|
|
|
|
+ self.tasks.push(CollectionsTasks::DeepVariant {
|
|
|
|
|
+ id: id.to_string(),
|
|
|
|
|
+ time_point: "mrd".to_string(),
|
|
|
|
|
+ bam: mrd.path.to_str().unwrap().to_string(),
|
|
|
|
|
+ config: DeepVariantConfig::default(),
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+ if !caller_time.contains(&("nanomonsv", "diag")) {
|
|
|
|
|
+ self.tasks.push(CollectionsTasks::NanomonSV {
|
|
|
|
|
+ id: id.to_string(),
|
|
|
|
|
+ diag_bam: diag.path.to_str().unwrap().to_string(),
|
|
|
|
|
+ mrd_bam: mrd.path.to_str().unwrap().to_string(),
|
|
|
|
|
+ config: NanomonSVConfig::default(),
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
- looked_ids.push(id.clone());
|
|
|
|
|
}
|
|
}
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- // ids without any vcf
|
|
|
|
|
- self.bam
|
|
|
|
|
- .bams
|
|
|
|
|
- .iter()
|
|
|
|
|
- .map(|b| b.id.clone())
|
|
|
|
|
- .filter(|id| !looked_ids.contains(id))
|
|
|
|
|
- .for_each(|id| {
|
|
|
|
|
- if let (Some(diag), Some(mrd)) = (
|
|
|
|
|
- self.bam.get(&id, "diag").first(),
|
|
|
|
|
- self.bam.get(&id, "mrd").first(),
|
|
|
|
|
- ) {
|
|
|
|
|
- self.tasks.push(CollectionsTasks::ClairS {
|
|
|
|
|
- id: id.to_string(),
|
|
|
|
|
- diag_bam: diag.path.to_str().unwrap().to_string(),
|
|
|
|
|
- mrd_bam: mrd.path.to_str().unwrap().to_string(),
|
|
|
|
|
- config: ClairSConfig::default(),
|
|
|
|
|
- });
|
|
|
|
|
-
|
|
|
|
|
- self.tasks.push(CollectionsTasks::DeepVariant {
|
|
|
|
|
- id: id.to_string(),
|
|
|
|
|
- time_point: "diag".to_string(),
|
|
|
|
|
- bam: diag.path.to_str().unwrap().to_string(),
|
|
|
|
|
- config: DeepVariantConfig::default(),
|
|
|
|
|
- });
|
|
|
|
|
- self.tasks.push(CollectionsTasks::DeepVariant {
|
|
|
|
|
- id: id.to_string(),
|
|
|
|
|
- time_point: "mrd".to_string(),
|
|
|
|
|
- bam: mrd.path.to_str().unwrap().to_string(),
|
|
|
|
|
- config: DeepVariantConfig::default(),
|
|
|
|
|
- });
|
|
|
|
|
- self.tasks.push(CollectionsTasks::NanomonSV {
|
|
|
|
|
- id: id.to_string(),
|
|
|
|
|
- diag_bam: diag.path.to_str().unwrap().to_string(),
|
|
|
|
|
- mrd_bam: mrd.path.to_str().unwrap().to_string(),
|
|
|
|
|
- config: NanomonSVConfig::default(),
|
|
|
|
|
- });
|
|
|
|
|
- }
|
|
|
|
|
- });
|
|
|
|
|
|
|
+ });
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
pub fn run(&mut self) -> anyhow::Result<()> {
|
|
pub fn run(&mut self) -> anyhow::Result<()> {
|
|
|
- self.tasks.reverse();
|
|
|
|
|
|
|
+ // self.tasks.reverse();
|
|
|
if self.tasks.is_empty() {
|
|
if self.tasks.is_empty() {
|
|
|
- self.todo();
|
|
|
|
|
|
|
+ self.todo(15.0, 10.0);
|
|
|
if self.tasks.is_empty() {
|
|
if self.tasks.is_empty() {
|
|
|
return Ok(());
|
|
return Ok(());
|
|
|
} else {
|
|
} else {
|
|
@@ -259,3 +299,31 @@ impl CollectionsTasks {
|
|
|
Ok(())
|
|
Ok(())
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+pub fn run_tasks(config: CollectionsConfig) -> anyhow::Result<()> {
|
|
|
|
|
+ let mut last_n = Vec::new();
|
|
|
|
|
+ loop {
|
|
|
|
|
+ let mut collection = Collections::new(
|
|
|
|
|
+ &config.pod_dir,
|
|
|
|
|
+ &config.corrected_fc_path,
|
|
|
|
|
+ &config.result_dir,
|
|
|
|
|
+ )?;
|
|
|
|
|
+ collection.todo(config.min_diag_cov, config.min_mrd_cov);
|
|
|
|
|
+ if collection.tasks.is_empty() {
|
|
|
|
|
+ warn!("All results are update");
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ let n_tasks = collection.tasks.len();
|
|
|
|
|
+ warn!("{n_tasks} tasks to run");
|
|
|
|
|
+ if last_n.len() > 2
|
|
|
|
|
+ && last_n[last_n.len() - 1] == n_tasks
|
|
|
|
|
+ && last_n[last_n.len() - 2] == n_tasks
|
|
|
|
|
+ {
|
|
|
|
|
+ warn!("Tasks stalled");
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ last_n.push(n_tasks);
|
|
|
|
|
+ collection.run()?;
|
|
|
|
|
+ }
|
|
|
|
|
+ Ok(())
|
|
|
|
|
+}
|