|
|
@@ -12,20 +12,18 @@ use nom::AsBytes;
|
|
|
use pandora_lib_blastn::BlastResult;
|
|
|
use pandora_lib_igv::{BamTrack, BedTrack, Track};
|
|
|
use petgraph::{
|
|
|
- algo::dijkstra,
|
|
|
- data::{Build, DataMap},
|
|
|
dot::Dot,
|
|
|
graph::NodeIndex,
|
|
|
stable_graph::StableUnGraph,
|
|
|
- visit::{EdgeRef, IntoNeighbors, NodeIndexable},
|
|
|
};
|
|
|
use regex::Regex;
|
|
|
use rust_htslib::bam::{Read, Reader, Record};
|
|
|
use std::{
|
|
|
collections::{HashMap, HashSet, VecDeque},
|
|
|
fs::{self, File},
|
|
|
- io::{BufRead, BufReader, Cursor, Write},
|
|
|
+ io::{BufRead, BufReader, Write},
|
|
|
path::{Path, PathBuf},
|
|
|
+ str::FromStr,
|
|
|
};
|
|
|
use uuid::Uuid;
|
|
|
|
|
|
@@ -615,13 +613,17 @@ pub fn dedup_dir(dir: &str) -> anyhow::Result<()> {
|
|
|
if let Some(file_name) = path.file_name().and_then(|name| name.to_str()) {
|
|
|
if re.is_match(file_name) {
|
|
|
if let Some(input_id) = path.file_stem().and_then(|n| n.to_str()) {
|
|
|
- let mut bed_blast: Vec<String> = read_tsv_file(path.to_str().unwrap())?
|
|
|
- .iter()
|
|
|
- .map(|r| format!("{}{}", r.name, r.strand))
|
|
|
- .collect();
|
|
|
+ let mut bed_blast: Vec<String> =
|
|
|
+ read_blastn_bed(path.to_str().unwrap())?
|
|
|
+ .iter()
|
|
|
+ .map(|r| format!("{}{}", r.name, r.strand))
|
|
|
+ .collect();
|
|
|
bed_blast.sort();
|
|
|
let key = bed_blast.join("|");
|
|
|
- bed_hm.entry(key).or_default().push(input_id.to_owned().replace("_flye", ""));
|
|
|
+ bed_hm
|
|
|
+ .entry(key)
|
|
|
+ .or_default()
|
|
|
+ .push(input_id.to_owned().replace("_flye", ""));
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
@@ -643,12 +645,10 @@ pub fn dedup_dir(dir: &str) -> anyhow::Result<()> {
|
|
|
dir_flye(dir, false)?;
|
|
|
}
|
|
|
|
|
|
-
|
|
|
-
|
|
|
Ok(())
|
|
|
}
|
|
|
|
|
|
-pub struct BedRow {
|
|
|
+pub struct BlastnBedRow {
|
|
|
pub contig: String,
|
|
|
pub start: u32,
|
|
|
pub end: u32,
|
|
|
@@ -657,30 +657,20 @@ pub struct BedRow {
|
|
|
pub strand: String,
|
|
|
}
|
|
|
|
|
|
-// Function to read a TSV file and return a Vec of Row structs
|
|
|
-fn read_tsv_file(file_path: &str) -> anyhow::Result<Vec<BedRow>> {
|
|
|
- // Open the file
|
|
|
- let file = File::open(file_path)?;
|
|
|
- let reader = BufReader::new(file);
|
|
|
-
|
|
|
- // Create a vector to store the rows
|
|
|
- let mut rows = Vec::new();
|
|
|
-
|
|
|
- // Iterate over each line in the file
|
|
|
- for line in reader.lines() {
|
|
|
- // Unwrap the line, skipping any that cause errors
|
|
|
- let line = line?;
|
|
|
+impl FromStr for BlastnBedRow {
|
|
|
+ type Err = anyhow::Error;
|
|
|
|
|
|
- // Split the line by tabs
|
|
|
+ fn from_str(line: &str) -> anyhow::Result<Self> {
|
|
|
let fields: Vec<&str> = line.split('\t').collect();
|
|
|
|
|
|
- // Ensure the line has the correct number of fields
|
|
|
if fields.len() != 6 {
|
|
|
- continue; // Skip lines with incorrect number of fields
|
|
|
+ return Err(anyhow!(
|
|
|
+ "Error while parsing bed row, number of fields doesn't match {}",
|
|
|
+ line
|
|
|
+ ));
|
|
|
}
|
|
|
|
|
|
- // Parse the fields and create a Row struct
|
|
|
- let row = BedRow {
|
|
|
+ let row = BlastnBedRow {
|
|
|
contig: fields[0].to_string(),
|
|
|
start: fields[1].parse()?,
|
|
|
end: fields[2].parse()?,
|
|
|
@@ -689,12 +679,19 @@ fn read_tsv_file(file_path: &str) -> anyhow::Result<Vec<BedRow>> {
|
|
|
strand: fields[5].to_string(),
|
|
|
};
|
|
|
|
|
|
- // Add the row to the vector
|
|
|
- rows.push(row);
|
|
|
+ Ok(row)
|
|
|
}
|
|
|
+}
|
|
|
+
|
|
|
+fn read_blastn_bed(file_path: &str) -> anyhow::Result<Vec<BlastnBedRow>> {
|
|
|
+ let file = File::open(file_path)?;
|
|
|
+ let reader = BufReader::new(file);
|
|
|
|
|
|
- // Return the vector of rows
|
|
|
- Ok(rows)
|
|
|
+ reader
|
|
|
+ .lines()
|
|
|
+ .map_while(Result::ok)
|
|
|
+ .map(|s| BlastnBedRow::from_str(&s))
|
|
|
+ .collect()
|
|
|
}
|
|
|
|
|
|
pub fn igv_link(dir: &str, contig_id: &str) -> anyhow::Result<String> {
|
|
|
@@ -740,7 +737,11 @@ fn merge_bam_files(input_bam_paths: Vec<String>, output_bam_path: &str) -> anyho
|
|
|
let header = rust_htslib::bam::Header::from_template(bam1.header());
|
|
|
|
|
|
// Create a new BAM writer with the header from the first BAM file
|
|
|
- let mut output_bam = rust_htslib::bam::Writer::from_path(output_bam_path, &header, rust_htslib::bam::Format::Bam)?;
|
|
|
+ let mut output_bam = rust_htslib::bam::Writer::from_path(
|
|
|
+ output_bam_path,
|
|
|
+ &header,
|
|
|
+ rust_htslib::bam::Format::Bam,
|
|
|
+ )?;
|
|
|
|
|
|
// Write records from the first BAM file to the output BAM file
|
|
|
for result in bam1.records() {
|
|
|
@@ -838,7 +839,11 @@ mod tests {
|
|
|
#[test]
|
|
|
fn tmp() {
|
|
|
init();
|
|
|
- dir_flye("/data/tmp/scan_ca67d4bc-a18e-40ab-9e0a-af90116ca20b/reads/chr9", true).unwrap();
|
|
|
+ dir_flye(
|
|
|
+ "/data/tmp/scan_ca67d4bc-a18e-40ab-9e0a-af90116ca20b/reads/chr9",
|
|
|
+ true,
|
|
|
+ )
|
|
|
+ .unwrap();
|
|
|
}
|
|
|
|
|
|
#[test]
|
|
|
@@ -846,5 +851,4 @@ mod tests {
|
|
|
init();
|
|
|
dedup_dir("/data/tmp/scan_7ed2f43c-d16d-4dcc-bdb4-fb619d082991").unwrap();
|
|
|
}
|
|
|
-
|
|
|
}
|