Thomas 1 year ago
parent
commit
008179ce65
2 changed files with 139 additions and 18 deletions
  1. 6 3
      src/config.rs
  2. 133 15
      src/lib.rs

+ 6 - 3
src/config.rs

@@ -1,7 +1,7 @@
 use std::path::PathBuf;
 
-use serde::{Serialize, Deserialize};
 use anyhow::Result;
+use serde::{Deserialize, Serialize};
 
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct Config {
@@ -27,6 +27,9 @@ impl Config {
         Ok(confy::load("pandora_lib_blastn", None)?)
     }
     pub fn path() -> Result<PathBuf> {
-        Ok(confy::get_configuration_file_path("pandora_lib_blastn", None)?)
-    } 
+        Ok(confy::get_configuration_file_path(
+            "pandora_lib_blastn",
+            None,
+        )?)
+    }
 }

+ 133 - 15
src/lib.rs

@@ -7,12 +7,111 @@ use std::{
     process::{Command, Stdio},
 };
 
-use anyhow::{Context, Ok};
+use anyhow::{anyhow, Context, Ok};
 use config::Config;
 use log::info;
 use minimap2::{Mapping, Strand};
 use serde::Serialize;
 
+pub struct Blast {
+    config: Config,
+    fa_path: String,
+    results: Option<Vec<BlastResult>>,
+}
+
+impl Blast {
+    pub fn init(fa_path: &str) -> anyhow::Result<Self> {
+        Ok(Blast {
+            config: Config::get()?,
+            fa_path: fa_path.to_string(),
+            results: None,
+        })
+    }
+
+    pub fn run(mut self) -> anyhow::Result<Self> {
+        info!("Running blastn in {}", self.fa_path);
+
+        let blastn_bin = self.config.blastn_bin.clone();
+        let blast_db = self.config.blast_db.clone();
+
+        let file_contents =
+            fs::read_to_string(self.fa_path.clone()).context("Failed to read the file")?;
+
+        let mut cmd = Command::new(blastn_bin)
+            .arg("-db")
+            .arg(blast_db)
+            .arg("-outfmt")
+            .arg("6")
+            .arg("-num_threads")
+            .arg(self.config.num_threads.to_string())
+            .stdin(Stdio::piped())
+            .stdout(Stdio::piped())
+            .spawn()
+            .expect("Blastn failed to start");
+
+        {
+            let stdin = cmd.stdin.as_mut().expect("Failed to open stdin");
+            stdin
+                .write_all(file_contents.as_bytes())
+                .expect("Failed to write to stdin");
+        }
+
+        let output = cmd.wait_with_output().expect("Failed to read stdout");
+
+        self.results = Some(parse_blastn_touput(
+            String::from_utf8_lossy(&output.stdout).to_string(),
+        ));
+
+        Ok(self)
+    }
+
+    pub fn keep_best(mut self) -> Self {
+        if let Some(results) = self.results {
+            let mut results: Vec<BlastPile> = results
+                .iter()
+                .take(self.config.max_results)
+                .map(|v| BlastPile::new(v))
+                .collect();
+
+            allocate_y_positions(&mut results);
+
+            let results: Vec<BlastResult> = results
+                .into_iter()
+                .filter(|r| r.y == 0)
+                .map(|r| r.data)
+                .collect();
+
+            self.results = Some(results);
+        }
+
+        self
+    }
+
+    pub fn to_mapping(self) -> Option<Vec<Mapping>> {
+        if let Some(results) = self.results {
+            Some(results.iter().map(Mapping::from).collect())
+        } else {
+            None
+        }
+    }
+
+    pub fn to_json(self) -> anyhow::Result<String> {
+        if let Some(results) = self.results {
+            let json = serde_json::to_string(&results)?;
+            Ok(json)
+        } else {
+            Err(anyhow!("No results"))
+        }
+    }
+
+    pub fn save_file(self, file_path: &str) -> anyhow::Result<()> {
+        let data = self.to_json()?;
+        let mut file = File::create(file_path)?;
+        file.write_all(data.as_bytes())?;
+        Ok(())
+    }
+}
+
 // cat /home/prom/Documents/Programmes/desc_seq_lib/data_test/419b7353-bc8c-4ffe-8ad6-0bbfac8c0cfa.fasta | blastn -db hs1_simple_chr.fa -outfmt 6
 pub fn run_blastn(query_fa_path: &str) -> anyhow::Result<String> {
     info!("Running blastn in {query_fa_path}");
@@ -108,16 +207,6 @@ impl TryFrom<String> for BlastResult {
     }
 }
 
-pub fn blastn(fa_path: &str, output_json: &str) -> anyhow::Result<()> {
-    let res = run_blastn(fa_path)?;
-    let mut res = parse_blastn_touput(res);
-    res.sort_by(|a, b| b.bit_score.total_cmp(&a.bit_score));
-    let json = serde_json::to_string(&res)?;
-    let mut file = File::create(output_json)?;
-    file.write_all(json.as_bytes())?;
-    Ok(())
-}
-
 #[derive(Debug)]
 struct BlastPile {
     x: usize,
@@ -200,6 +289,27 @@ pub fn blast(fa_path: &str) -> anyhow::Result<Vec<Mapping>> {
     Ok(best_blastn(fa_path)?.iter().map(Mapping::from).collect())
 }
 
+fn evalue_to_mapq(evalue: f64) -> u32 {
+    if evalue <= 0.0 {
+        return 60; // Max MAPQ for highly significant hits.
+    }
+
+    // Convert e-value to a logarithmic scale and then to a MAPQ score.
+    let mapq = -10.0 * evalue.log10();
+    println!("{mapq}");
+
+    // Clamp the value between 0 and 60, which is a typical range for MAPQ scores.
+    let clamped_mapq = if mapq > 60.0 {
+        60.0
+    } else if mapq < 0.0 {
+        0.0
+    } else {
+        mapq
+    };
+
+    clamped_mapq as u32
+}
+
 impl From<&BlastResult> for Mapping {
     fn from(br: &BlastResult) -> Self {
         let query_len = NonZeroI32::new(br.q_end as i32 - br.q_start as i32 + 1);
@@ -208,7 +318,7 @@ impl From<&BlastResult> for Mapping {
         } else {
             (Strand::Reverse, br.s_end as i32, br.s_start as i32)
         };
-        let mapq = (br.bit_score.min(60.0) as u32).min(60);
+        let mapq = evalue_to_mapq(br.e_value);
 
         Mapping {
             query_name: Some(br.query_id.clone()),
@@ -241,7 +351,11 @@ mod tests {
 
         let fa = "test_data/419b7353-bc8c-4ffe-8ad6-0bbfac8c0cfa.fasta";
 
-        let res = best_blastn(fa)?;
+        let res = Blast::init(fa)?
+            .run()?
+            .keep_best()
+            .results
+            .context("No results")?;
         println!("{res:#?}");
 
         assert_eq!(res[0].q_end, 2838);
@@ -251,10 +365,14 @@ mod tests {
     }
 
     #[test]
-    fn blast() -> anyhow::Result<()> {
+    fn mapping() -> anyhow::Result<()> {
         let fa = "test_data/419b7353-bc8c-4ffe-8ad6-0bbfac8c0cfa.fasta";
 
-        let res = crate::blast(fa)?;
+        let res = Blast::init(fa)?
+            .run()?
+            .keep_best()
+            .to_mapping()
+            .context("No results")?;
         println!("{res:#?}");
         Ok(())
     }