|
|
@@ -1,9 +1,5 @@
|
|
|
-use std::{
|
|
|
- fs,
|
|
|
- path::{Path, PathBuf},
|
|
|
-};
|
|
|
+use std::{collections::HashMap, fmt, fs, path::Path};
|
|
|
|
|
|
-use hashbrown::HashMap;
|
|
|
use log::{info, warn};
|
|
|
|
|
|
use self::{bam::BamCollection, pod5::Pod5Collection, vcf::VcfCollection};
|
|
|
@@ -13,14 +9,14 @@ use crate::{
|
|
|
deep_variant::{DeepVariant, DeepVariantConfig},
|
|
|
nanomonsv::{NanomonSV, NanomonSVConfig},
|
|
|
},
|
|
|
- collection::pod5::{FlowCellCase, Pod5Type},
|
|
|
- commands::dorado::Dorado,
|
|
|
+ collection::pod5::FlowCellCase,
|
|
|
+ commands::dorado::Dorado as BasecallAlign,
|
|
|
config::Config,
|
|
|
};
|
|
|
|
|
|
pub mod bam;
|
|
|
pub mod pod5;
|
|
|
-pub mod somatic_variants;
|
|
|
+pub mod variants;
|
|
|
pub mod vcf;
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
@@ -67,44 +63,45 @@ impl Collections {
|
|
|
|
|
|
pub fn todo(&mut self, min_diag_cov: f32, min_mrd_cov: f32) {
|
|
|
info!("Looking for base calling tasks...");
|
|
|
- let mut to_demux = Vec::new();
|
|
|
-
|
|
|
- for run in self.pod5.runs.iter() {
|
|
|
- for fc in run.flowcells.iter() {
|
|
|
- let acq_id = fc.pod5_info.acquisition_id.clone();
|
|
|
- for case in fc.cases.iter() {
|
|
|
- let bams_ids: Vec<String> = self
|
|
|
- .bam
|
|
|
- .get(&case.id, &case.time_point)
|
|
|
- .iter()
|
|
|
- .flat_map(|b| {
|
|
|
- b.composition
|
|
|
- .iter()
|
|
|
- .map(|c| c.0.clone())
|
|
|
- .collect::<Vec<String>>()
|
|
|
- })
|
|
|
- .filter(|id| *id == acq_id)
|
|
|
- .collect();
|
|
|
- if bams_ids.is_empty() {
|
|
|
- match fc.pod5_type {
|
|
|
- Pod5Type::Raw => to_demux.push(case.clone()),
|
|
|
- Pod5Type::Demuxed => {
|
|
|
- self.tasks.push(CollectionsTasks::Align(case.clone()))
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- // Group for muxed and push task with all cases
|
|
|
- let mut grouped: HashMap<PathBuf, Vec<FlowCellCase>> = HashMap::new();
|
|
|
- for case in to_demux {
|
|
|
- grouped.entry(case.pod_dir.clone()).or_default().push(case);
|
|
|
- }
|
|
|
- grouped
|
|
|
- .into_values()
|
|
|
- .for_each(|data| self.tasks.push(CollectionsTasks::DemuxAlign(data)));
|
|
|
+ let mut tasks = Vec::new();
|
|
|
+ // let mut to_demux = Vec::new();
|
|
|
+ //
|
|
|
+ // for run in self.pod5.runs.iter() {
|
|
|
+ // for fc in run.flowcells.iter() {
|
|
|
+ // let acq_id = fc.pod5_info.acquisition_id.clone();
|
|
|
+ // for case in fc.cases.iter() {
|
|
|
+ // let bams_ids: Vec<String> = self
|
|
|
+ // .bam
|
|
|
+ // .get(&case.id, &case.time_point)
|
|
|
+ // .iter()
|
|
|
+ // .flat_map(|b| {
|
|
|
+ // b.composition
|
|
|
+ // .iter()
|
|
|
+ // .map(|c| c.0.clone())
|
|
|
+ // .collect::<Vec<String>>()
|
|
|
+ // })
|
|
|
+ // .filter(|id| *id == acq_id)
|
|
|
+ // .collect();
|
|
|
+ // if bams_ids.is_empty() {
|
|
|
+ // match fc.pod5_type {
|
|
|
+ // Pod5Type::Raw => to_demux.push(case.clone()),
|
|
|
+ // Pod5Type::Demuxed => {
|
|
|
+ // tasks.push(CollectionsTasks::Align(case.clone()))
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ //
|
|
|
+ // // Group for muxed and push task with all cases
|
|
|
+ // let mut grouped: HashMap<PathBuf, Vec<FlowCellCase>> = HashMap::new();
|
|
|
+ // for case in to_demux {
|
|
|
+ // grouped.entry(case.pod_dir.clone()).or_default().push(case);
|
|
|
+ // }
|
|
|
+ // grouped
|
|
|
+ // .into_values()
|
|
|
+ // .for_each(|data| tasks.push(CollectionsTasks::DemuxAlign(data)));
|
|
|
|
|
|
// Remove VCF anterior to BAM
|
|
|
let vcf_by_id = self.vcf.group_by_id();
|
|
|
@@ -176,7 +173,7 @@ impl Collections {
|
|
|
if !caller_time.contains(&("clairs", "diag"))
|
|
|
|| !caller_time.contains(&("clairs_indel", "diag"))
|
|
|
{
|
|
|
- self.tasks.push(CollectionsTasks::ClairS {
|
|
|
+ tasks.push(CollectionsTasks::ClairS {
|
|
|
id: id.to_string(),
|
|
|
diag_bam: diag.path.to_str().unwrap().to_string(),
|
|
|
mrd_bam: mrd.path.to_str().unwrap().to_string(),
|
|
|
@@ -184,7 +181,7 @@ impl Collections {
|
|
|
});
|
|
|
}
|
|
|
if !caller_time.contains(&("DeepVariant", "diag")) {
|
|
|
- self.tasks.push(CollectionsTasks::DeepVariant {
|
|
|
+ tasks.push(CollectionsTasks::DeepVariant {
|
|
|
id: id.to_string(),
|
|
|
time_point: "diag".to_string(),
|
|
|
bam: diag.path.to_str().unwrap().to_string(),
|
|
|
@@ -192,7 +189,7 @@ impl Collections {
|
|
|
});
|
|
|
}
|
|
|
if !caller_time.contains(&("DeepVariant", "mrd")) {
|
|
|
- self.tasks.push(CollectionsTasks::DeepVariant {
|
|
|
+ tasks.push(CollectionsTasks::DeepVariant {
|
|
|
id: id.to_string(),
|
|
|
time_point: "mrd".to_string(),
|
|
|
bam: mrd.path.to_str().unwrap().to_string(),
|
|
|
@@ -200,7 +197,7 @@ impl Collections {
|
|
|
});
|
|
|
}
|
|
|
if !caller_time.contains(&("nanomonsv", "diag")) {
|
|
|
- self.tasks.push(CollectionsTasks::NanomonSV {
|
|
|
+ tasks.push(CollectionsTasks::NanomonSV {
|
|
|
id: id.to_string(),
|
|
|
diag_bam: diag.path.to_str().unwrap().to_string(),
|
|
|
mrd_bam: mrd.path.to_str().unwrap().to_string(),
|
|
|
@@ -211,6 +208,12 @@ impl Collections {
|
|
|
}
|
|
|
}
|
|
|
});
|
|
|
+ let mut hs = HashMap::new();
|
|
|
+ tasks.into_iter().for_each(|t| {
|
|
|
+ hs.insert(t.to_string(), t);
|
|
|
+ });
|
|
|
+
|
|
|
+ self.tasks = hs.into_values().collect();
|
|
|
}
|
|
|
|
|
|
pub fn run(&mut self) -> anyhow::Result<()> {
|
|
|
@@ -265,10 +268,10 @@ impl CollectionsTasks {
|
|
|
pub fn run(self) -> anyhow::Result<()> {
|
|
|
match self {
|
|
|
CollectionsTasks::Align(case) => {
|
|
|
- Dorado::init(case.clone(), Config::default())?.run_pipe()?;
|
|
|
+ BasecallAlign::init(case.clone(), Config::default())?.run_pipe()?;
|
|
|
}
|
|
|
CollectionsTasks::DemuxAlign(cases) => {
|
|
|
- Dorado::from_mux(cases, Config::default())?;
|
|
|
+ BasecallAlign::from_mux(cases, Config::default())?;
|
|
|
}
|
|
|
CollectionsTasks::DeepVariant {
|
|
|
id,
|
|
|
@@ -299,6 +302,54 @@ impl CollectionsTasks {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+// Implement Display for CollectionsTasks
|
|
|
+impl fmt::Display for CollectionsTasks {
|
|
|
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
|
+ use CollectionsTasks::*;
|
|
|
+
|
|
|
+ match self {
|
|
|
+ Align(case) => write!(f, "Align task with: {:#?}", case),
|
|
|
+ DemuxAlign(cases) => write!(f, "DemuxAlign task with: {:#?}", cases),
|
|
|
+ DeepVariant {
|
|
|
+ id,
|
|
|
+ time_point,
|
|
|
+ bam,
|
|
|
+ ..
|
|
|
+ } => {
|
|
|
+ write!(
|
|
|
+ f,
|
|
|
+ "DeepVariant task with id: {}, time_point: {}, bam: {}",
|
|
|
+ id, time_point, bam
|
|
|
+ )
|
|
|
+ }
|
|
|
+ ClairS {
|
|
|
+ id,
|
|
|
+ diag_bam,
|
|
|
+ mrd_bam,
|
|
|
+ ..
|
|
|
+ } => {
|
|
|
+ write!(
|
|
|
+ f,
|
|
|
+ "ClairS task with id: {}, diag_bam: {}, mrd_bam: {}",
|
|
|
+ id, diag_bam, mrd_bam
|
|
|
+ )
|
|
|
+ }
|
|
|
+ NanomonSV {
|
|
|
+ id,
|
|
|
+ diag_bam,
|
|
|
+ mrd_bam,
|
|
|
+ ..
|
|
|
+ } => {
|
|
|
+ write!(
|
|
|
+ f,
|
|
|
+ "NanomonSV task with id: {}, diag_bam: {}, mrd_bam: {}",
|
|
|
+ id, diag_bam, mrd_bam
|
|
|
+ )
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
pub fn run_tasks(config: CollectionsConfig) -> anyhow::Result<()> {
|
|
|
let mut last_n = Vec::new();
|
|
|
loop {
|
|
|
@@ -318,7 +369,7 @@ pub fn run_tasks(config: CollectionsConfig) -> anyhow::Result<()> {
|
|
|
&& last_n[last_n.len() - 1] == n_tasks
|
|
|
&& last_n[last_n.len() - 2] == n_tasks
|
|
|
{
|
|
|
- warn!("Tasks stalled");
|
|
|
+ warn!("Tasks don't progress");
|
|
|
break;
|
|
|
}
|
|
|
last_n.push(n_tasks);
|