|
|
@@ -2,7 +2,7 @@ use anyhow::{anyhow, Context, Result};
|
|
|
use chrono::{DateTime, Utc};
|
|
|
use csv::ReaderBuilder;
|
|
|
use glob::glob;
|
|
|
-use log::warn;
|
|
|
+use log::{info, warn};
|
|
|
use pandora_lib_pod5::Pod5Info;
|
|
|
use serde::Deserialize;
|
|
|
use std::{
|
|
|
@@ -17,7 +17,7 @@ use std::{
|
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
pub struct Pod5 {
|
|
|
- pub path: String,
|
|
|
+ pub path: PathBuf,
|
|
|
pub pod5_type: Pod5Type,
|
|
|
pub run_name: String,
|
|
|
pub flowcell_name: String,
|
|
|
@@ -89,7 +89,7 @@ impl Pod5 {
|
|
|
.to_string();
|
|
|
|
|
|
Ok(Self {
|
|
|
- path: s.to_string(),
|
|
|
+ path: path.to_path_buf(),
|
|
|
pod5_type,
|
|
|
run_name,
|
|
|
flowcell_name,
|
|
|
@@ -146,8 +146,24 @@ pub struct FlowCell {
|
|
|
pub pod5: Vec<Pod5>,
|
|
|
}
|
|
|
|
|
|
+// impl FlowCell {
|
|
|
+// pub fn cases_pod5_dir(&self) -> Vec<PathBuf> {
|
|
|
+// match self.pod5_type {
|
|
|
+// Pod5Type::Raw => {
|
|
|
+// let p = self.pod5.first().unwrap();
|
|
|
+// vec![p.path.parent().unwrap().to_path_buf()]
|
|
|
+// },
|
|
|
+// Pod5Type::Demuxed => {
|
|
|
+// self.cases.iter().map(|c| {
|
|
|
+// let str_barcode = format!("barcode{}", c.barcode);
|
|
|
+// })
|
|
|
+// },
|
|
|
+// }
|
|
|
+// }
|
|
|
+// }
|
|
|
+
|
|
|
#[derive(Debug)]
|
|
|
-pub struct Runs {
|
|
|
+pub struct Pod5Collection {
|
|
|
pub importation_date: DateTime<Utc>,
|
|
|
pub runs: Vec<Run>,
|
|
|
pub bam_dir: String,
|
|
|
@@ -159,49 +175,50 @@ pub struct FlowCellCase {
|
|
|
pub id: String,
|
|
|
pub time_point: String,
|
|
|
pub barcode: String,
|
|
|
- pub basecalled: Option<bool>,
|
|
|
+ pub pod_dir: PathBuf,
|
|
|
+ // pub basecalled: Option<bool>,
|
|
|
}
|
|
|
|
|
|
impl FlowCellCase {
|
|
|
- pub fn basecalled(&mut self, bam_dir: &str, acquisition_id: String) -> bool {
|
|
|
- if let Some(b) = self.basecalled {
|
|
|
- return b;
|
|
|
- } else if let std::result::Result::Ok(p) = PathBuf::from_str(&format!(
|
|
|
- "{bam_dir}/{}/{}/{}_{}_hs1.bam",
|
|
|
- self.id,
|
|
|
- self.time_point.to_lowercase(),
|
|
|
- self.id,
|
|
|
- self.time_point.to_lowercase()
|
|
|
- )) {
|
|
|
- if p.exists() {
|
|
|
- let has_id = pandora_lib_pileup::bam_compo(p.to_str().unwrap(), 20000)
|
|
|
- .unwrap()
|
|
|
- .iter()
|
|
|
- .flat_map(|(rg, _)| {
|
|
|
- if let Some(index) = rg.find('_') {
|
|
|
- let fc_id: &str = &rg[..index];
|
|
|
- vec![fc_id.to_string()]
|
|
|
- } else {
|
|
|
- vec![]
|
|
|
- }
|
|
|
- })
|
|
|
- .filter(|i| *i == acquisition_id)
|
|
|
- .count()
|
|
|
- > 0;
|
|
|
- if has_id {
|
|
|
- self.basecalled = Some(true);
|
|
|
- return true;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- false
|
|
|
- }
|
|
|
+ // pub fn basecalled(&mut self, bam_dir: &str, acquisition_id: String) -> bool {
|
|
|
+ // if let Some(b) = self.basecalled {
|
|
|
+ // return b;
|
|
|
+ // } else if let std::result::Result::Ok(p) = PathBuf::from_str(&format!(
|
|
|
+ // "{bam_dir}/{}/{}/{}_{}_hs1.bam",
|
|
|
+ // self.id,
|
|
|
+ // self.time_point.to_lowercase(),
|
|
|
+ // self.id,
|
|
|
+ // self.time_point.to_lowercase()
|
|
|
+ // )) {
|
|
|
+ // if p.exists() {
|
|
|
+ // let has_id = pandora_lib_pileup::bam_compo(p.to_str().unwrap(), 20000)
|
|
|
+ // .unwrap()
|
|
|
+ // .iter()
|
|
|
+ // .flat_map(|(rg, _)| {
|
|
|
+ // if let Some(index) = rg.find('_') {
|
|
|
+ // let fc_id: &str = &rg[..index];
|
|
|
+ // vec![fc_id.to_string()]
|
|
|
+ // } else {
|
|
|
+ // vec![]
|
|
|
+ // }
|
|
|
+ // })
|
|
|
+ // .filter(|i| *i == acquisition_id)
|
|
|
+ // .count()
|
|
|
+ // > 0;
|
|
|
+ // if has_id {
|
|
|
+ // self.basecalled = Some(true);
|
|
|
+ // return true;
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // false
|
|
|
+ // }
|
|
|
}
|
|
|
|
|
|
-impl Runs {
|
|
|
+impl Pod5Collection {
|
|
|
pub fn import_dir(pod5_dir: &str, corrected_fc_path: &str, bam_dir: &str) -> Result<Self> {
|
|
|
let pod5 = list_pod_files(pod5_dir)?;
|
|
|
- println!("N pod5 {}", pod5.len());
|
|
|
+ info!("n pod5 {}", pod5.len());
|
|
|
|
|
|
let mut fc: HashMap<String, Vec<Pod5>> = HashMap::new();
|
|
|
for pod in pod5 {
|
|
|
@@ -214,7 +231,7 @@ impl Runs {
|
|
|
.into_values()
|
|
|
.map(|v| {
|
|
|
let first = &v[0];
|
|
|
- let pod5_info = Pod5Info::from_pod5(&first.path);
|
|
|
+ let pod5_info = Pod5Info::from_pod5(first.path.to_str().unwrap());
|
|
|
let flowcell_name = first.flowcell_name.clone();
|
|
|
|
|
|
let sel: Vec<FCLine> = corrected_fc
|
|
|
@@ -244,14 +261,23 @@ impl Runs {
|
|
|
let cases: Vec<FlowCellCase> = sel
|
|
|
.iter()
|
|
|
.map(|e| {
|
|
|
- let mut c = FlowCellCase {
|
|
|
+ let pod_dir = match first.pod5_type {
|
|
|
+ Pod5Type::Raw => {
|
|
|
+ first.path.parent().unwrap().to_path_buf()
|
|
|
+ }
|
|
|
+ Pod5Type::Demuxed => {
|
|
|
+ let mut bc_dir = first.path.parent().unwrap().parent().unwrap().to_path_buf();
|
|
|
+ bc_dir.push(format!("barcode{}", e.barcode_number.replace("NB", "")));
|
|
|
+ bc_dir
|
|
|
+ },
|
|
|
+ };
|
|
|
+
|
|
|
+ FlowCellCase {
|
|
|
id: e.id.clone(),
|
|
|
time_point: e.time_point.clone(),
|
|
|
barcode: e.barcode_number.clone(),
|
|
|
- basecalled: None,
|
|
|
- };
|
|
|
- c.basecalled(bam_dir, pod5_info.acquisition_id.clone());
|
|
|
- c
|
|
|
+ pod_dir,
|
|
|
+ }
|
|
|
})
|
|
|
.collect();
|
|
|
|
|
|
@@ -319,152 +345,152 @@ impl Runs {
|
|
|
});
|
|
|
}
|
|
|
|
|
|
- pub fn check_local(&self) -> anyhow::Result<()> {
|
|
|
- let mut res = Vec::new();
|
|
|
- for run in self.runs.iter() {
|
|
|
- for fc in run.flowcells.iter() {
|
|
|
- for c in fc.cases.iter() {
|
|
|
- let bases_called = if let Some(b) = c.basecalled {
|
|
|
- if b {
|
|
|
- "✅".to_string()
|
|
|
- } else {
|
|
|
- "❌".to_string()
|
|
|
- }
|
|
|
- } else {
|
|
|
- "❌".to_string()
|
|
|
- };
|
|
|
-
|
|
|
- let s = [
|
|
|
- c.id.to_string(),
|
|
|
- c.time_point.to_string(),
|
|
|
- c.barcode.to_string(),
|
|
|
- run.run_name.clone(),
|
|
|
- fc.flowcell_name.to_string(),
|
|
|
- fc.pod5_type.to_string(),
|
|
|
- fc.pod5_info.acquisition_id.clone(),
|
|
|
- bases_called,
|
|
|
- ]
|
|
|
- .join("\t");
|
|
|
- res.push(s);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- res.sort();
|
|
|
- println!("{}", res.join("\n"));
|
|
|
- Ok(())
|
|
|
- }
|
|
|
-
|
|
|
- pub fn fc_done(&self) {
|
|
|
- for run in self.runs.iter() {
|
|
|
- for fc in run.flowcells.iter() {
|
|
|
- let n_called = fc
|
|
|
- .cases
|
|
|
- .iter()
|
|
|
- .filter(|c| if let Some(b) = c.basecalled { b } else { false })
|
|
|
- .count();
|
|
|
- if n_called != 0 && n_called == fc.cases.len() {
|
|
|
- let s = [
|
|
|
- format!("{}/{}", run.run_name, fc.flowcell_name),
|
|
|
- fc.pod5_info.acquisition_id.to_string(),
|
|
|
- format!("{:#?}", fc.cases),
|
|
|
- ]
|
|
|
- .join("\t");
|
|
|
- println!("{s}");
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- pub fn todo(&self) {
|
|
|
- let run_dir = &self.pod5_dir;
|
|
|
- for run in self.runs.iter() {
|
|
|
- for fc in run.flowcells.iter() {
|
|
|
- let to_call: Vec<_> = fc
|
|
|
- .cases
|
|
|
- .iter()
|
|
|
- .filter(|c| if let Some(b) = c.basecalled { !b } else { true })
|
|
|
- .collect();
|
|
|
-
|
|
|
- if !to_call.is_empty() {
|
|
|
- if fc.pod5_type == Pod5Type::Raw && to_call.len() != fc.cases.len() {
|
|
|
- println!("No solution for: {}/{}", run.run_name, fc.flowcell_name);
|
|
|
- } else {
|
|
|
- match fc.pod5_type {
|
|
|
- Pod5Type::Raw => {
|
|
|
- let cases: Vec<String> = to_call
|
|
|
- .iter()
|
|
|
- .map(|c| {
|
|
|
- let bc = c.barcode.replace("NB", "");
|
|
|
- let tp = c.time_point.to_lowercase();
|
|
|
- [bc, c.id.to_string(), tp].join(" ")
|
|
|
- })
|
|
|
- .collect();
|
|
|
- println!(
|
|
|
- "from_mux.sh {}/{}/{} {}",
|
|
|
- run_dir,
|
|
|
- run.run_name,
|
|
|
- fc.flowcell_name,
|
|
|
- cases.join(" ")
|
|
|
- );
|
|
|
- }
|
|
|
- Pod5Type::Demuxed => to_call.iter().for_each(|c| {
|
|
|
- let bc = c.barcode.replace("NB", "");
|
|
|
- let tp = c.time_point.to_lowercase();
|
|
|
- let bam = format!(
|
|
|
- "{}/{}/{}/{}_{}_hs1.bam",
|
|
|
- self.bam_dir, c.id, c.time_point, c.id, c.time_point
|
|
|
- );
|
|
|
- if PathBuf::from(bam).exists() {
|
|
|
- let pod_dir: Vec<String> = fc
|
|
|
- .pod5
|
|
|
- .iter()
|
|
|
- .filter(|p| {
|
|
|
- p.path.contains(&format!("barcode{}", bc.clone()))
|
|
|
- })
|
|
|
- .take(1)
|
|
|
- .map(|p| p.path.to_string())
|
|
|
- .collect();
|
|
|
-
|
|
|
- let pod_dir = pod_dir.first().unwrap();
|
|
|
- let mut pod_dir = PathBuf::from(pod_dir);
|
|
|
- pod_dir.pop();
|
|
|
-
|
|
|
- // TODO sheduler
|
|
|
- println!(
|
|
|
- "complete_bam.sh {} {} {}",
|
|
|
- c.id,
|
|
|
- tp,
|
|
|
- pod_dir.to_string_lossy()
|
|
|
- )
|
|
|
- } else {
|
|
|
- let pod_dir: Vec<String> = fc
|
|
|
- .pod5
|
|
|
- .iter()
|
|
|
- .filter(|p| {
|
|
|
- p.path.contains(&format!("barcode{}", bc.clone()))
|
|
|
- })
|
|
|
- .take(1)
|
|
|
- .map(|p| p.path.to_string())
|
|
|
- .collect();
|
|
|
-
|
|
|
- let pod_dir = pod_dir.first().unwrap();
|
|
|
- let mut pod_dir = PathBuf::from(pod_dir);
|
|
|
- pod_dir.pop();
|
|
|
-
|
|
|
- println!(
|
|
|
- "dorado.sh {} {} {}",
|
|
|
- c.id,
|
|
|
- tp,
|
|
|
- pod_dir.to_string_lossy()
|
|
|
- )
|
|
|
- }
|
|
|
- }),
|
|
|
- };
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
+ // pub fn check_local(&self) -> anyhow::Result<()> {
|
|
|
+ // let mut res = Vec::new();
|
|
|
+ // for run in self.runs.iter() {
|
|
|
+ // for fc in run.flowcells.iter() {
|
|
|
+ // for c in fc.cases.iter() {
|
|
|
+ // let bases_called = if let Some(b) = c.basecalled {
|
|
|
+ // if b {
|
|
|
+ // "✅".to_string()
|
|
|
+ // } else {
|
|
|
+ // "❌".to_string()
|
|
|
+ // }
|
|
|
+ // } else {
|
|
|
+ // "❌".to_string()
|
|
|
+ // };
|
|
|
+ //
|
|
|
+ // let s = [
|
|
|
+ // c.id.to_string(),
|
|
|
+ // c.time_point.to_string(),
|
|
|
+ // c.barcode.to_string(),
|
|
|
+ // run.run_name.clone(),
|
|
|
+ // fc.flowcell_name.to_string(),
|
|
|
+ // fc.pod5_type.to_string(),
|
|
|
+ // fc.pod5_info.acquisition_id.clone(),
|
|
|
+ // bases_called,
|
|
|
+ // ]
|
|
|
+ // .join("\t");
|
|
|
+ // res.push(s);
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // res.sort();
|
|
|
+ // println!("{}", res.join("\n"));
|
|
|
+ // Ok(())
|
|
|
+ // }
|
|
|
+
|
|
|
+ // pub fn fc_done(&self) {
|
|
|
+ // for run in self.runs.iter() {
|
|
|
+ // for fc in run.flowcells.iter() {
|
|
|
+ // let n_called = fc
|
|
|
+ // .cases
|
|
|
+ // .iter()
|
|
|
+ // .filter(|c| if let Some(b) = c.basecalled { b } else { false })
|
|
|
+ // .count();
|
|
|
+ // if n_called != 0 && n_called == fc.cases.len() {
|
|
|
+ // let s = [
|
|
|
+ // format!("{}/{}", run.run_name, fc.flowcell_name),
|
|
|
+ // fc.pod5_info.acquisition_id.to_string(),
|
|
|
+ // format!("{:#?}", fc.cases),
|
|
|
+ // ]
|
|
|
+ // .join("\t");
|
|
|
+ // println!("{s}");
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+
|
|
|
+ // pub fn todo(&self) {
|
|
|
+ // let run_dir = &self.pod5_dir;
|
|
|
+ // for run in self.runs.iter() {
|
|
|
+ // for fc in run.flowcells.iter() {
|
|
|
+ // let to_call: Vec<_> = fc
|
|
|
+ // .cases
|
|
|
+ // .iter()
|
|
|
+ // .filter(|c| if let Some(b) = c.basecalled { !b } else { true })
|
|
|
+ // .collect();
|
|
|
+ //
|
|
|
+ // if !to_call.is_empty() {
|
|
|
+ // if fc.pod5_type == Pod5Type::Raw && to_call.len() != fc.cases.len() {
|
|
|
+ // println!("No solution for: {}/{}", run.run_name, fc.flowcell_name);
|
|
|
+ // } else {
|
|
|
+ // match fc.pod5_type {
|
|
|
+ // Pod5Type::Raw => {
|
|
|
+ // let cases: Vec<String> = to_call
|
|
|
+ // .iter()
|
|
|
+ // .map(|c| {
|
|
|
+ // let bc = c.barcode.replace("NB", "");
|
|
|
+ // let tp = c.time_point.to_lowercase();
|
|
|
+ // [bc, c.id.to_string(), tp].join(" ")
|
|
|
+ // })
|
|
|
+ // .collect();
|
|
|
+ // println!(
|
|
|
+ // "from_mux.sh {}/{}/{} {}",
|
|
|
+ // run_dir,
|
|
|
+ // run.run_name,
|
|
|
+ // fc.flowcell_name,
|
|
|
+ // cases.join(" ")
|
|
|
+ // );
|
|
|
+ // }
|
|
|
+ // Pod5Type::Demuxed => to_call.iter().for_each(|c| {
|
|
|
+ // let bc = c.barcode.replace("NB", "");
|
|
|
+ // let tp = c.time_point.to_lowercase();
|
|
|
+ // let bam = format!(
|
|
|
+ // "{}/{}/{}/{}_{}_hs1.bam",
|
|
|
+ // self.bam_dir, c.id, c.time_point, c.id, c.time_point
|
|
|
+ // );
|
|
|
+ // if PathBuf::from(bam).exists() {
|
|
|
+ // let pod_dir: Vec<String> = fc
|
|
|
+ // .pod5
|
|
|
+ // .iter()
|
|
|
+ // .filter(|p| {
|
|
|
+ // p.path.contains(&format!("barcode{}", bc.clone()))
|
|
|
+ // })
|
|
|
+ // .take(1)
|
|
|
+ // .map(|p| p.path.to_string())
|
|
|
+ // .collect();
|
|
|
+ //
|
|
|
+ // let pod_dir = pod_dir.first().unwrap();
|
|
|
+ // let mut pod_dir = PathBuf::from(pod_dir);
|
|
|
+ // pod_dir.pop();
|
|
|
+ //
|
|
|
+ // // TODO sheduler
|
|
|
+ // println!(
|
|
|
+ // "complete_bam.sh {} {} {}",
|
|
|
+ // c.id,
|
|
|
+ // tp,
|
|
|
+ // pod_dir.to_string_lossy()
|
|
|
+ // )
|
|
|
+ // } else {
|
|
|
+ // let pod_dir: Vec<String> = fc
|
|
|
+ // .pod5
|
|
|
+ // .iter()
|
|
|
+ // .filter(|p| {
|
|
|
+ // p.path.contains(&format!("barcode{}", bc.clone()))
|
|
|
+ // })
|
|
|
+ // .take(1)
|
|
|
+ // .map(|p| p.path.to_string())
|
|
|
+ // .collect();
|
|
|
+ //
|
|
|
+ // let pod_dir = pod_dir.first().unwrap();
|
|
|
+ // let mut pod_dir = PathBuf::from(pod_dir);
|
|
|
+ // pod_dir.pop();
|
|
|
+ //
|
|
|
+ // println!(
|
|
|
+ // "dorado.sh {} {} {}",
|
|
|
+ // c.id,
|
|
|
+ // tp,
|
|
|
+ // pod_dir.to_string_lossy()
|
|
|
+ // )
|
|
|
+ // }
|
|
|
+ // }),
|
|
|
+ // };
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // }
|
|
|
+ // }
|
|
|
|
|
|
pub fn ids(&self) -> Vec<String> {
|
|
|
let mut ids: Vec<String> = self
|
|
|
@@ -510,7 +536,12 @@ pub fn load_flowcells_corrected_names(file_path: &str) -> anyhow::Result<Vec<FCL
|
|
|
|
|
|
let mut records = Vec::new();
|
|
|
for result in rdr.deserialize() {
|
|
|
- let record: FCLine = result?;
|
|
|
+ let mut record: FCLine = result?;
|
|
|
+
|
|
|
+ // formating
|
|
|
+ record.time_point = record.time_point.to_lowercase();
|
|
|
+ record.id = record.id.to_uppercase();
|
|
|
+
|
|
|
records.push(record);
|
|
|
}
|
|
|
|