|
|
@@ -7,12 +7,111 @@ use std::{
|
|
|
process::{Command, Stdio},
|
|
|
};
|
|
|
|
|
|
-use anyhow::{Context, Ok};
|
|
|
+use anyhow::{anyhow, Context, Ok};
|
|
|
use config::Config;
|
|
|
use log::info;
|
|
|
use minimap2::{Mapping, Strand};
|
|
|
use serde::Serialize;
|
|
|
|
|
|
+pub struct Blast {
|
|
|
+ config: Config,
|
|
|
+ fa_path: String,
|
|
|
+ results: Option<Vec<BlastResult>>,
|
|
|
+}
|
|
|
+
|
|
|
+impl Blast {
|
|
|
+ pub fn init(fa_path: &str) -> anyhow::Result<Self> {
|
|
|
+ Ok(Blast {
|
|
|
+ config: Config::get()?,
|
|
|
+ fa_path: fa_path.to_string(),
|
|
|
+ results: None,
|
|
|
+ })
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn run(mut self) -> anyhow::Result<Self> {
|
|
|
+ info!("Running blastn in {}", self.fa_path);
|
|
|
+
|
|
|
+ let blastn_bin = self.config.blastn_bin.clone();
|
|
|
+ let blast_db = self.config.blast_db.clone();
|
|
|
+
|
|
|
+ let file_contents =
|
|
|
+ fs::read_to_string(self.fa_path.clone()).context("Failed to read the file")?;
|
|
|
+
|
|
|
+ let mut cmd = Command::new(blastn_bin)
|
|
|
+ .arg("-db")
|
|
|
+ .arg(blast_db)
|
|
|
+ .arg("-outfmt")
|
|
|
+ .arg("6")
|
|
|
+ .arg("-num_threads")
|
|
|
+ .arg(self.config.num_threads.to_string())
|
|
|
+ .stdin(Stdio::piped())
|
|
|
+ .stdout(Stdio::piped())
|
|
|
+ .spawn()
|
|
|
+ .expect("Blastn failed to start");
|
|
|
+
|
|
|
+ {
|
|
|
+ let stdin = cmd.stdin.as_mut().expect("Failed to open stdin");
|
|
|
+ stdin
|
|
|
+ .write_all(file_contents.as_bytes())
|
|
|
+ .expect("Failed to write to stdin");
|
|
|
+ }
|
|
|
+
|
|
|
+ let output = cmd.wait_with_output().expect("Failed to read stdout");
|
|
|
+
|
|
|
+ self.results = Some(parse_blastn_touput(
|
|
|
+ String::from_utf8_lossy(&output.stdout).to_string(),
|
|
|
+ ));
|
|
|
+
|
|
|
+ Ok(self)
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn keep_best(mut self) -> Self {
|
|
|
+ if let Some(results) = self.results {
|
|
|
+ let mut results: Vec<BlastPile> = results
|
|
|
+ .iter()
|
|
|
+ .take(self.config.max_results)
|
|
|
+ .map(|v| BlastPile::new(v))
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ allocate_y_positions(&mut results);
|
|
|
+
|
|
|
+ let results: Vec<BlastResult> = results
|
|
|
+ .into_iter()
|
|
|
+ .filter(|r| r.y == 0)
|
|
|
+ .map(|r| r.data)
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ self.results = Some(results);
|
|
|
+ }
|
|
|
+
|
|
|
+ self
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn to_mapping(self) -> Option<Vec<Mapping>> {
|
|
|
+ if let Some(results) = self.results {
|
|
|
+ Some(results.iter().map(Mapping::from).collect())
|
|
|
+ } else {
|
|
|
+ None
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn to_json(self) -> anyhow::Result<String> {
|
|
|
+ if let Some(results) = self.results {
|
|
|
+ let json = serde_json::to_string(&results)?;
|
|
|
+ Ok(json)
|
|
|
+ } else {
|
|
|
+ Err(anyhow!("No results"))
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn save_file(self, file_path: &str) -> anyhow::Result<()> {
|
|
|
+ let data = self.to_json()?;
|
|
|
+ let mut file = File::create(file_path)?;
|
|
|
+ file.write_all(data.as_bytes())?;
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
// cat /home/prom/Documents/Programmes/desc_seq_lib/data_test/419b7353-bc8c-4ffe-8ad6-0bbfac8c0cfa.fasta | blastn -db hs1_simple_chr.fa -outfmt 6
|
|
|
pub fn run_blastn(query_fa_path: &str) -> anyhow::Result<String> {
|
|
|
info!("Running blastn in {query_fa_path}");
|
|
|
@@ -108,16 +207,6 @@ impl TryFrom<String> for BlastResult {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-pub fn blastn(fa_path: &str, output_json: &str) -> anyhow::Result<()> {
|
|
|
- let res = run_blastn(fa_path)?;
|
|
|
- let mut res = parse_blastn_touput(res);
|
|
|
- res.sort_by(|a, b| b.bit_score.total_cmp(&a.bit_score));
|
|
|
- let json = serde_json::to_string(&res)?;
|
|
|
- let mut file = File::create(output_json)?;
|
|
|
- file.write_all(json.as_bytes())?;
|
|
|
- Ok(())
|
|
|
-}
|
|
|
-
|
|
|
#[derive(Debug)]
|
|
|
struct BlastPile {
|
|
|
x: usize,
|
|
|
@@ -200,6 +289,27 @@ pub fn blast(fa_path: &str) -> anyhow::Result<Vec<Mapping>> {
|
|
|
Ok(best_blastn(fa_path)?.iter().map(Mapping::from).collect())
|
|
|
}
|
|
|
|
|
|
+fn evalue_to_mapq(evalue: f64) -> u32 {
|
|
|
+ if evalue <= 0.0 {
|
|
|
+ return 60; // Max MAPQ for highly significant hits.
|
|
|
+ }
|
|
|
+
|
|
|
+ // Convert e-value to a logarithmic scale and then to a MAPQ score.
|
|
|
+ let mapq = -10.0 * evalue.log10();
|
|
|
+ println!("{mapq}");
|
|
|
+
|
|
|
+ // Clamp the value between 0 and 60, which is a typical range for MAPQ scores.
|
|
|
+ let clamped_mapq = if mapq > 60.0 {
|
|
|
+ 60.0
|
|
|
+ } else if mapq < 0.0 {
|
|
|
+ 0.0
|
|
|
+ } else {
|
|
|
+ mapq
|
|
|
+ };
|
|
|
+
|
|
|
+ clamped_mapq as u32
|
|
|
+}
|
|
|
+
|
|
|
impl From<&BlastResult> for Mapping {
|
|
|
fn from(br: &BlastResult) -> Self {
|
|
|
let query_len = NonZeroI32::new(br.q_end as i32 - br.q_start as i32 + 1);
|
|
|
@@ -208,7 +318,7 @@ impl From<&BlastResult> for Mapping {
|
|
|
} else {
|
|
|
(Strand::Reverse, br.s_end as i32, br.s_start as i32)
|
|
|
};
|
|
|
- let mapq = (br.bit_score.min(60.0) as u32).min(60);
|
|
|
+ let mapq = evalue_to_mapq(br.e_value);
|
|
|
|
|
|
Mapping {
|
|
|
query_name: Some(br.query_id.clone()),
|
|
|
@@ -241,7 +351,11 @@ mod tests {
|
|
|
|
|
|
let fa = "test_data/419b7353-bc8c-4ffe-8ad6-0bbfac8c0cfa.fasta";
|
|
|
|
|
|
- let res = best_blastn(fa)?;
|
|
|
+ let res = Blast::init(fa)?
|
|
|
+ .run()?
|
|
|
+ .keep_best()
|
|
|
+ .results
|
|
|
+ .context("No results")?;
|
|
|
println!("{res:#?}");
|
|
|
|
|
|
assert_eq!(res[0].q_end, 2838);
|
|
|
@@ -251,10 +365,14 @@ mod tests {
|
|
|
}
|
|
|
|
|
|
#[test]
|
|
|
- fn blast() -> anyhow::Result<()> {
|
|
|
+ fn mapping() -> anyhow::Result<()> {
|
|
|
let fa = "test_data/419b7353-bc8c-4ffe-8ad6-0bbfac8c0cfa.fasta";
|
|
|
|
|
|
- let res = crate::blast(fa)?;
|
|
|
+ let res = Blast::init(fa)?
|
|
|
+ .run()?
|
|
|
+ .keep_best()
|
|
|
+ .to_mapping()
|
|
|
+ .context("No results")?;
|
|
|
println!("{res:#?}");
|
|
|
Ok(())
|
|
|
}
|