Thomas преди 1 година
родител
ревизия
62d7b32549
променени са 2 файла, в които са добавени 206 реда и са изтрити 38 реда
  1. 37 2
      src/lib.rs
  2. 169 36
      src/pod5.rs

+ 37 - 2
src/lib.rs

@@ -30,8 +30,43 @@ mod tests {
         let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
             .build();
 
-        let runs = Runs::import_dir("/home/prom/store/banana-pool/run_data", "/data/flow_cells.tsv")?;
-        runs.check_local("/data/longreads_basic_pipe")?;
+        let runs = Runs::import_dir(
+            "/data/run_data",
+            "/data/flow_cells.tsv",
+            "/data/longreads_basic_pipe",
+        )?;
+        // let runs = Runs::import_dir("/home/prom/store/banana-pool/run_data", "/data/flow_cells.tsv")?;
+        runs.check_local()?;
+        Ok(())
+    }
+
+    #[test]
+    fn todo() -> anyhow::Result<()> {
+        let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
+            .build();
+
+        let runs = Runs::import_dir(
+            "/data/run_data",
+            "/data/flow_cells.tsv",
+            "/data/longreads_basic_pipe",
+        )?;
+        // let runs = Runs::import_dir("/home/prom/store/banana-pool/run_data", "/data/flow_cells.tsv")?;
+        runs.todo();
+        Ok(())
+    }
+
+    #[test]
+    fn done() -> anyhow::Result<()> {
+        let _ = env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
+            .build();
+
+        let runs = Runs::import_dir(
+            "/data/run_data",
+            "/data/flow_cells.tsv",
+            "/data/longreads_basic_pipe",
+        )?;
+        // let runs = Runs::import_dir("/home/prom/store/banana-pool/run_data", "/data/flow_cells.tsv")?;
+        runs.fc_done();
         Ok(())
     }
 }

+ 169 - 36
src/pod5.rs

@@ -2,7 +2,7 @@ use anyhow::{anyhow, Context, Result};
 use chrono::{DateTime, Utc};
 use csv::ReaderBuilder;
 use glob::glob;
-use log::{info, warn};
+use log::warn;
 use pandora_lib_pod5::Pod5Info;
 use serde::Deserialize;
 use std::{
@@ -24,7 +24,7 @@ pub struct Pod5 {
     pub file_metadata: Metadata,
 }
 
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq)]
 pub enum Pod5Type {
     Raw,
     Demuxed,
@@ -144,6 +144,8 @@ pub struct FlowCell {
 pub struct Runs {
     pub importation_date: DateTime<Utc>,
     pub runs: Vec<Run>,
+    pub bam_dir: String,
+    pub pod5_dir: String,
 }
 
 #[derive(Debug, Clone)]
@@ -151,11 +153,48 @@ pub struct FlowCellCase {
     pub id: String,
     pub time_point: String,
     pub barcode: String,
+    pub basecalled: Option<bool>,
+}
+
+impl FlowCellCase {
+    pub fn basecalled(&mut self, bam_dir: &str, acquisition_id: String) -> bool {
+        if let Some(b) = self.basecalled {
+            return b;
+        } else if let std::result::Result::Ok(p) = PathBuf::from_str(&format!(
+            "{bam_dir}/{}/{}/{}_{}_hs1.bam",
+            self.id,
+            self.time_point.to_lowercase(),
+            self.id,
+            self.time_point.to_lowercase()
+        )) {
+            if p.exists() {
+                let has_id = pandora_lib_pileup::bam_compo(p.to_str().unwrap(), 20000)
+                    .unwrap()
+                    .iter()
+                    .flat_map(|(rg, _)| {
+                        if let Some(index) = rg.find('_') {
+                            let fc_id: &str = &rg[..index];
+                            vec![fc_id.to_string()]
+                        } else {
+                            vec![]
+                        }
+                    })
+                    .filter(|i| *i == acquisition_id)
+                    .count()
+                    > 0;
+                if has_id {
+                    self.basecalled = Some(true);
+                    return true;
+                }
+            }
+        }
+        false
+    }
 }
 
 impl Runs {
-    pub fn import_dir(dir: &str, corrected_fc_path: &str) -> Result<Self> {
-        let pod5 = list_pod_files(dir)?;
+    pub fn import_dir(pod5_dir: &str, corrected_fc_path: &str, bam_dir: &str) -> Result<Self> {
+        let pod5 = list_pod_files(pod5_dir)?;
         println!("N pod5 {}", pod5.len());
 
         let mut fc: HashMap<String, Vec<Pod5>> = HashMap::new();
@@ -198,10 +237,15 @@ impl Runs {
 
                 let cases: Vec<FlowCellCase> = sel
                     .iter()
-                    .map(|e| FlowCellCase {
-                        id: e.id.clone(),
-                        time_point: e.time_point.clone(),
-                        barcode: e.barcode_number.clone(),
+                    .map(|e| {
+                        let mut c = FlowCellCase {
+                            id: e.id.clone(),
+                            time_point: e.time_point.clone(),
+                            barcode: e.barcode_number.clone(),
+                            basecalled: None,
+                        };
+                        c.basecalled(bam_dir, pod5_info.acquisition_id.clone());
+                        c
                     })
                     .collect();
 
@@ -235,6 +279,8 @@ impl Runs {
         Ok(Self {
             importation_date: Utc::now(),
             runs,
+            bam_dir: bam_dir.to_string(),
+            pod5_dir: pod5_dir.to_string(),
         })
     }
 
@@ -267,38 +313,14 @@ impl Runs {
         });
     }
 
-    pub fn check_local(&self, dir: &str) -> anyhow::Result<()> {
+    pub fn check_local(&self) -> anyhow::Result<()> {
         let mut res = Vec::new();
         for run in self.runs.iter() {
             for fc in run.flowcells.iter() {
                 for c in fc.cases.iter() {
-                    let bases_called = if let std::result::Result::Ok(p) =
-                        PathBuf::from_str(&format!(
-                            "{dir}/{}/{}/{}_{}_hs1.bam",
-                            c.id,
-                            c.time_point.to_lowercase(),
-                            c.id,
-                            c.time_point.to_lowercase()
-                        )) {
-                        if p.exists() {
-                            let has_id = pandora_lib_pileup::bam_compo(p.to_str().unwrap(), 20000).unwrap()
-                                .iter()
-                                .flat_map(|(rg, _)| {
-                                    if let Some(index) = rg.find('_') {
-                                        let fc_id: &str = &rg[..index];
-                                        vec![fc_id.to_string()]
-                                    } else {
-                                        vec![]
-                                    }
-                                })
-                                .filter(|i| *i == fc.pod5_info.acquisition_id)
-                                .count()
-                            > 0;
-                            if has_id {
-                                "✅".to_string()
-                            } else {
-                                "❌".to_string()
-                            }
+                    let bases_called = if let Some(b) = c.basecalled {
+                        if b {
+                            "✅".to_string()
                         } else {
                             "❌".to_string()
                         }
@@ -325,6 +347,117 @@ impl Runs {
         println!("{}", res.join("\n"));
         Ok(())
     }
+
+    pub fn fc_done(&self) {
+        for run in self.runs.iter() {
+            for fc in run.flowcells.iter() {
+                let n_called = fc
+                    .cases
+                    .iter()
+                    .filter(|c| if let Some(b) = c.basecalled { b } else { false })
+                    .count();
+                if n_called != 0 && n_called == fc.cases.len() {
+                    let s = [
+                        format!("{}/{}", run.run_name, fc.flowcell_name),
+                        fc.pod5_info.acquisition_id.to_string(),
+                        format!("{:#?}", fc.cases),
+                    ]
+                    .join("\t");
+                    println!("{s}");
+                }
+            }
+        }
+    }
+
+    pub fn todo(&self) {
+        let run_dir = &self.pod5_dir;
+        for run in self.runs.iter() {
+            for fc in run.flowcells.iter() {
+                let to_call: Vec<_> = fc
+                    .cases
+                    .iter()
+                    .filter(|c| if let Some(b) = c.basecalled { !b } else { true })
+                    .collect();
+
+                if !to_call.is_empty() {
+                    if fc.pod5_type == Pod5Type::Raw && to_call.len() != fc.cases.len() {
+                        println!("No solution for: {}/{}", run.run_name, fc.flowcell_name);
+                    } else {
+                        match fc.pod5_type {
+                            Pod5Type::Raw => {
+                                let cases: Vec<String> = to_call
+                                    .iter()
+                                    .map(|c| {
+                                        let bc = c.barcode.replace("NB", "");
+                                        let tp = c.time_point.to_lowercase();
+                                        [bc, c.id.to_string(), tp].join(" ")
+                                    })
+                                    .collect();
+                                println!(
+                                    "from_mux.sh {}/{}/{} {}",
+                                    run_dir,
+                                    run.run_name,
+                                    fc.flowcell_name,
+                                    cases.join(" ")
+                                );
+                            }
+                            Pod5Type::Demuxed => to_call.iter().for_each(|c| {
+                                let bc = c.barcode.replace("NB", "");
+                                let tp = c.time_point.to_lowercase();
+                                let bam = format!(
+                                    "{}/{}/{}/{}_{}_hs1.bam",
+                                    self.bam_dir, c.id, c.time_point, c.id, c.time_point
+                                );
+                                if PathBuf::from(bam).exists() {
+                                    let pod_dir: Vec<String> = fc
+                                        .pod5
+                                        .iter()
+                                        .filter(|p| {
+                                            p.path.contains(&format!("barcode{}", bc.clone()))
+                                        })
+                                        .take(1)
+                                        .map(|p| p.path.to_string())
+                                        .collect();
+
+                                    let pod_dir = pod_dir.first().unwrap();
+                                    let mut pod_dir = PathBuf::from(pod_dir);
+                                    pod_dir.pop();
+
+                                    println!(
+                                        "complete_bam.sh {} {} {}",
+                                        c.id,
+                                        tp,
+                                        pod_dir.to_string_lossy()
+                                    )
+                                } else {
+                                    let pod_dir: Vec<String> = fc
+                                        .pod5
+                                        .iter()
+                                        .filter(|p| {
+                                            p.path.contains(&format!("barcode{}", bc.clone()))
+                                        })
+                                        .take(1)
+                                        .map(|p| p.path.to_string())
+                                        .collect();
+
+                                    let pod_dir = pod_dir.first().unwrap();
+                                    let mut pod_dir = PathBuf::from(pod_dir);
+                                    pod_dir.pop();
+
+                                    println!(
+                                        "dorado.sh {} {} {}",
+                                        c.id,
+                                        tp,
+                                        pod_dir.to_string_lossy()
+                                    )
+                                }
+                            }),
+                        };
+                    }
+                }
+            }
+        }
+    }
 }
 
 #[derive(Debug, Deserialize, Clone)]