|
|
@@ -2,7 +2,7 @@ use anyhow::{anyhow, Context, Result};
|
|
|
use chrono::{DateTime, Utc};
|
|
|
use csv::ReaderBuilder;
|
|
|
use glob::glob;
|
|
|
-use log::{info, warn};
|
|
|
+use log::warn;
|
|
|
use pandora_lib_pod5::Pod5Info;
|
|
|
use serde::Deserialize;
|
|
|
use std::{
|
|
|
@@ -24,7 +24,7 @@ pub struct Pod5 {
|
|
|
pub file_metadata: Metadata,
|
|
|
}
|
|
|
|
|
|
-#[derive(Debug, Clone)]
|
|
|
+#[derive(Debug, Clone, PartialEq)]
|
|
|
pub enum Pod5Type {
|
|
|
Raw,
|
|
|
Demuxed,
|
|
|
@@ -144,6 +144,8 @@ pub struct FlowCell {
|
|
|
pub struct Runs {
|
|
|
pub importation_date: DateTime<Utc>,
|
|
|
pub runs: Vec<Run>,
|
|
|
+ pub bam_dir: String,
|
|
|
+ pub pod5_dir: String,
|
|
|
}
|
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
@@ -151,11 +153,48 @@ pub struct FlowCellCase {
|
|
|
pub id: String,
|
|
|
pub time_point: String,
|
|
|
pub barcode: String,
|
|
|
+ pub basecalled: Option<bool>,
|
|
|
+}
|
|
|
+
|
|
|
+impl FlowCellCase {
|
|
|
+ pub fn basecalled(&mut self, bam_dir: &str, acquisition_id: String) -> bool {
|
|
|
+ if let Some(b) = self.basecalled {
|
|
|
+ return b;
|
|
|
+ } else if let std::result::Result::Ok(p) = PathBuf::from_str(&format!(
|
|
|
+ "{bam_dir}/{}/{}/{}_{}_hs1.bam",
|
|
|
+ self.id,
|
|
|
+ self.time_point.to_lowercase(),
|
|
|
+ self.id,
|
|
|
+ self.time_point.to_lowercase()
|
|
|
+ )) {
|
|
|
+ if p.exists() {
|
|
|
+ let has_id = pandora_lib_pileup::bam_compo(p.to_str().unwrap(), 20000)
|
|
|
+ .unwrap()
|
|
|
+ .iter()
|
|
|
+ .flat_map(|(rg, _)| {
|
|
|
+ if let Some(index) = rg.find('_') {
|
|
|
+ let fc_id: &str = &rg[..index];
|
|
|
+ vec![fc_id.to_string()]
|
|
|
+ } else {
|
|
|
+ vec![]
|
|
|
+ }
|
|
|
+ })
|
|
|
+ .filter(|i| *i == acquisition_id)
|
|
|
+ .count()
|
|
|
+ > 0;
|
|
|
+ if has_id {
|
|
|
+ self.basecalled = Some(true);
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ false
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
impl Runs {
|
|
|
- pub fn import_dir(dir: &str, corrected_fc_path: &str) -> Result<Self> {
|
|
|
- let pod5 = list_pod_files(dir)?;
|
|
|
+ pub fn import_dir(pod5_dir: &str, corrected_fc_path: &str, bam_dir: &str) -> Result<Self> {
|
|
|
+ let pod5 = list_pod_files(pod5_dir)?;
|
|
|
println!("N pod5 {}", pod5.len());
|
|
|
|
|
|
let mut fc: HashMap<String, Vec<Pod5>> = HashMap::new();
|
|
|
@@ -198,10 +237,15 @@ impl Runs {
|
|
|
|
|
|
let cases: Vec<FlowCellCase> = sel
|
|
|
.iter()
|
|
|
- .map(|e| FlowCellCase {
|
|
|
- id: e.id.clone(),
|
|
|
- time_point: e.time_point.clone(),
|
|
|
- barcode: e.barcode_number.clone(),
|
|
|
+ .map(|e| {
|
|
|
+ let mut c = FlowCellCase {
|
|
|
+ id: e.id.clone(),
|
|
|
+ time_point: e.time_point.clone(),
|
|
|
+ barcode: e.barcode_number.clone(),
|
|
|
+ basecalled: None,
|
|
|
+ };
|
|
|
+ c.basecalled(bam_dir, pod5_info.acquisition_id.clone());
|
|
|
+ c
|
|
|
})
|
|
|
.collect();
|
|
|
|
|
|
@@ -235,6 +279,8 @@ impl Runs {
|
|
|
Ok(Self {
|
|
|
importation_date: Utc::now(),
|
|
|
runs,
|
|
|
+ bam_dir: bam_dir.to_string(),
|
|
|
+ pod5_dir: pod5_dir.to_string(),
|
|
|
})
|
|
|
}
|
|
|
|
|
|
@@ -267,38 +313,14 @@ impl Runs {
|
|
|
});
|
|
|
}
|
|
|
|
|
|
- pub fn check_local(&self, dir: &str) -> anyhow::Result<()> {
|
|
|
+ pub fn check_local(&self) -> anyhow::Result<()> {
|
|
|
let mut res = Vec::new();
|
|
|
for run in self.runs.iter() {
|
|
|
for fc in run.flowcells.iter() {
|
|
|
for c in fc.cases.iter() {
|
|
|
- let bases_called = if let std::result::Result::Ok(p) =
|
|
|
- PathBuf::from_str(&format!(
|
|
|
- "{dir}/{}/{}/{}_{}_hs1.bam",
|
|
|
- c.id,
|
|
|
- c.time_point.to_lowercase(),
|
|
|
- c.id,
|
|
|
- c.time_point.to_lowercase()
|
|
|
- )) {
|
|
|
- if p.exists() {
|
|
|
- let has_id = pandora_lib_pileup::bam_compo(p.to_str().unwrap(), 20000).unwrap()
|
|
|
- .iter()
|
|
|
- .flat_map(|(rg, _)| {
|
|
|
- if let Some(index) = rg.find('_') {
|
|
|
- let fc_id: &str = &rg[..index];
|
|
|
- vec![fc_id.to_string()]
|
|
|
- } else {
|
|
|
- vec![]
|
|
|
- }
|
|
|
- })
|
|
|
- .filter(|i| *i == fc.pod5_info.acquisition_id)
|
|
|
- .count()
|
|
|
- > 0;
|
|
|
- if has_id {
|
|
|
- "✅".to_string()
|
|
|
- } else {
|
|
|
- "❌".to_string()
|
|
|
- }
|
|
|
+ let bases_called = if let Some(b) = c.basecalled {
|
|
|
+ if b {
|
|
|
+ "✅".to_string()
|
|
|
} else {
|
|
|
"❌".to_string()
|
|
|
}
|
|
|
@@ -325,6 +347,117 @@ impl Runs {
|
|
|
println!("{}", res.join("\n"));
|
|
|
Ok(())
|
|
|
}
|
|
|
+
|
|
|
+ pub fn fc_done(&self) {
|
|
|
+ for run in self.runs.iter() {
|
|
|
+ for fc in run.flowcells.iter() {
|
|
|
+ let n_called = fc
|
|
|
+ .cases
|
|
|
+ .iter()
|
|
|
+ .filter(|c| if let Some(b) = c.basecalled { b } else { false })
|
|
|
+ .count();
|
|
|
+ if n_called != 0 && n_called == fc.cases.len() {
|
|
|
+ let s = [
|
|
|
+ format!("{}/{}", run.run_name, fc.flowcell_name),
|
|
|
+ fc.pod5_info.acquisition_id.to_string(),
|
|
|
+ format!("{:#?}", fc.cases),
|
|
|
+ ]
|
|
|
+ .join("\t");
|
|
|
+ println!("{s}");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn todo(&self) {
|
|
|
+ let run_dir = &self.pod5_dir;
|
|
|
+ for run in self.runs.iter() {
|
|
|
+ for fc in run.flowcells.iter() {
|
|
|
+ let to_call: Vec<_> = fc
|
|
|
+ .cases
|
|
|
+ .iter()
|
|
|
+ .filter(|c| if let Some(b) = c.basecalled { !b } else { true })
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ if !to_call.is_empty() {
|
|
|
+ if fc.pod5_type == Pod5Type::Raw && to_call.len() != fc.cases.len() {
|
|
|
+ println!("No solution for: {}/{}", run.run_name, fc.flowcell_name);
|
|
|
+ } else {
|
|
|
+ match fc.pod5_type {
|
|
|
+ Pod5Type::Raw => {
|
|
|
+ let cases: Vec<String> = to_call
|
|
|
+ .iter()
|
|
|
+ .map(|c| {
|
|
|
+ let bc = c.barcode.replace("NB", "");
|
|
|
+ let tp = c.time_point.to_lowercase();
|
|
|
+ [bc, c.id.to_string(), tp].join(" ")
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+ println!(
|
|
|
+ "from_mux.sh {}/{}/{} {}",
|
|
|
+ run_dir,
|
|
|
+ run.run_name,
|
|
|
+ fc.flowcell_name,
|
|
|
+ cases.join(" ")
|
|
|
+ );
|
|
|
+ }
|
|
|
+ Pod5Type::Demuxed => to_call.iter().for_each(|c| {
|
|
|
+ let bc = c.barcode.replace("NB", "");
|
|
|
+ let tp = c.time_point.to_lowercase();
|
|
|
+ let bam = format!(
|
|
|
+ "{}/{}/{}/{}_{}_hs1.bam",
|
|
|
+ self.bam_dir, c.id, c.time_point, c.id, c.time_point
|
|
|
+ );
|
|
|
+ if PathBuf::from(bam).exists() {
|
|
|
+ let pod_dir: Vec<String> = fc
|
|
|
+ .pod5
|
|
|
+ .iter()
|
|
|
+ .filter(|p| {
|
|
|
+ p.path.contains(&format!("barcode{}", bc.clone()))
|
|
|
+ })
|
|
|
+ .take(1)
|
|
|
+ .map(|p| p.path.to_string())
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ let pod_dir = pod_dir.first().unwrap();
|
|
|
+ let mut pod_dir = PathBuf::from(pod_dir);
|
|
|
+ pod_dir.pop();
|
|
|
+
|
|
|
+ println!(
|
|
|
+ "complete_bam.sh {} {} {}",
|
|
|
+ c.id,
|
|
|
+ tp,
|
|
|
+ pod_dir.to_string_lossy()
|
|
|
+ )
|
|
|
+ } else {
|
|
|
+ let pod_dir: Vec<String> = fc
|
|
|
+ .pod5
|
|
|
+ .iter()
|
|
|
+ .filter(|p| {
|
|
|
+ p.path.contains(&format!("barcode{}", bc.clone()))
|
|
|
+ })
|
|
|
+ .take(1)
|
|
|
+ .map(|p| p.path.to_string())
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ let pod_dir = pod_dir.first().unwrap();
|
|
|
+ let mut pod_dir = PathBuf::from(pod_dir);
|
|
|
+ pod_dir.pop();
|
|
|
+
|
|
|
+ println!(
|
|
|
+ "dorado.sh {} {} {}",
|
|
|
+ c.id,
|
|
|
+ tp,
|
|
|
+ pod_dir.to_string_lossy()
|
|
|
+ )
|
|
|
+ }
|
|
|
+ }),
|
|
|
+ };
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
#[derive(Debug, Deserialize, Clone)]
|