Browse Source

read_blastn_bed

Thomas 1 năm trước cách đây
mục cha
commit
e716e03ff7
1 tập tin đã thay đổi với 41 bổ sung37 xóa
  1. 41 37
      src/lib.rs

+ 41 - 37
src/lib.rs

@@ -12,20 +12,18 @@ use nom::AsBytes;
 use pandora_lib_blastn::BlastResult;
 use pandora_lib_igv::{BamTrack, BedTrack, Track};
 use petgraph::{
-    algo::dijkstra,
-    data::{Build, DataMap},
     dot::Dot,
     graph::NodeIndex,
     stable_graph::StableUnGraph,
-    visit::{EdgeRef, IntoNeighbors, NodeIndexable},
 };
 use regex::Regex;
 use rust_htslib::bam::{Read, Reader, Record};
 use std::{
     collections::{HashMap, HashSet, VecDeque},
     fs::{self, File},
-    io::{BufRead, BufReader, Cursor, Write},
+    io::{BufRead, BufReader, Write},
     path::{Path, PathBuf},
+    str::FromStr,
 };
 use uuid::Uuid;
 
@@ -615,13 +613,17 @@ pub fn dedup_dir(dir: &str) -> anyhow::Result<()> {
                 if let Some(file_name) = path.file_name().and_then(|name| name.to_str()) {
                     if re.is_match(file_name) {
                         if let Some(input_id) = path.file_stem().and_then(|n| n.to_str()) {
-                            let mut bed_blast: Vec<String> = read_tsv_file(path.to_str().unwrap())?
-                                .iter()
-                                .map(|r| format!("{}{}", r.name, r.strand))
-                                .collect();
+                            let mut bed_blast: Vec<String> =
+                                read_blastn_bed(path.to_str().unwrap())?
+                                    .iter()
+                                    .map(|r| format!("{}{}", r.name, r.strand))
+                                    .collect();
                             bed_blast.sort();
                             let key = bed_blast.join("|");
-                            bed_hm.entry(key).or_default().push(input_id.to_owned().replace("_flye", ""));
+                            bed_hm
+                                .entry(key)
+                                .or_default()
+                                .push(input_id.to_owned().replace("_flye", ""));
                         }
                     }
                 }
@@ -643,12 +645,10 @@ pub fn dedup_dir(dir: &str) -> anyhow::Result<()> {
         dir_flye(dir, false)?;
     }
 
-
-
     Ok(())
 }
 
-pub struct BedRow {
+pub struct BlastnBedRow {
     pub contig: String,
     pub start: u32,
     pub end: u32,
@@ -657,30 +657,20 @@ pub struct BedRow {
     pub strand: String,
 }
 
-// Function to read a TSV file and return a Vec of Row structs
-fn read_tsv_file(file_path: &str) -> anyhow::Result<Vec<BedRow>> {
-    // Open the file
-    let file = File::open(file_path)?;
-    let reader = BufReader::new(file);
-
-    // Create a vector to store the rows
-    let mut rows = Vec::new();
-
-    // Iterate over each line in the file
-    for line in reader.lines() {
-        // Unwrap the line, skipping any that cause errors
-        let line = line?;
+impl FromStr for BlastnBedRow {
+    type Err = anyhow::Error;
 
-        // Split the line by tabs
+    fn from_str(line: &str) -> anyhow::Result<Self> {
         let fields: Vec<&str> = line.split('\t').collect();
 
-        // Ensure the line has the correct number of fields
         if fields.len() != 6 {
-            continue; // Skip lines with incorrect number of fields
+            return Err(anyhow!(
+                "Error while parsing bed row, number of fields doesn't match {}",
+                line
+            ));
         }
 
-        // Parse the fields and create a Row struct
-        let row = BedRow {
+        let row = BlastnBedRow {
             contig: fields[0].to_string(),
             start: fields[1].parse()?,
             end: fields[2].parse()?,
@@ -689,12 +679,19 @@ fn read_tsv_file(file_path: &str) -> anyhow::Result<Vec<BedRow>> {
             strand: fields[5].to_string(),
         };
 
-        // Add the row to the vector
-        rows.push(row);
+        Ok(row)
     }
+}
+
+fn read_blastn_bed(file_path: &str) -> anyhow::Result<Vec<BlastnBedRow>> {
+    let file = File::open(file_path)?;
+    let reader = BufReader::new(file);
 
-    // Return the vector of rows
-    Ok(rows)
+    reader
+        .lines()
+        .map_while(Result::ok)
+        .map(|s| BlastnBedRow::from_str(&s))
+        .collect()
 }
 
 pub fn igv_link(dir: &str, contig_id: &str) -> anyhow::Result<String> {
@@ -740,7 +737,11 @@ fn merge_bam_files(input_bam_paths: Vec<String>, output_bam_path: &str) -> anyho
     let header = rust_htslib::bam::Header::from_template(bam1.header());
 
     // Create a new BAM writer with the header from the first BAM file
-    let mut output_bam = rust_htslib::bam::Writer::from_path(output_bam_path, &header, rust_htslib::bam::Format::Bam)?;
+    let mut output_bam = rust_htslib::bam::Writer::from_path(
+        output_bam_path,
+        &header,
+        rust_htslib::bam::Format::Bam,
+    )?;
 
     // Write records from the first BAM file to the output BAM file
     for result in bam1.records() {
@@ -838,7 +839,11 @@ mod tests {
     #[test]
     fn tmp() {
         init();
-        dir_flye("/data/tmp/scan_ca67d4bc-a18e-40ab-9e0a-af90116ca20b/reads/chr9", true).unwrap();
+        dir_flye(
+            "/data/tmp/scan_ca67d4bc-a18e-40ab-9e0a-af90116ca20b/reads/chr9",
+            true,
+        )
+        .unwrap();
     }
 
     #[test]
@@ -846,5 +851,4 @@ mod tests {
         init();
         dedup_dir("/data/tmp/scan_7ed2f43c-d16d-4dcc-bdb4-fb619d082991").unwrap();
     }
-
 }