Browse Source

first commit

Thomas 1 year ago
commit
5982bbcfb2
4 changed files with 216 additions and 0 deletions
  1. 1 0
      .gitignore
  2. 16 0
      Cargo.lock
  3. 7 0
      Cargo.toml
  4. 192 0
      src/lib.rs

+ 1 - 0
.gitignore

@@ -0,0 +1 @@
+/target

+ 16 - 0
Cargo.lock

@@ -0,0 +1,16 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "anyhow"
+version = "1.0.86"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
+
+[[package]]
+name = "lib_chain"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+]

+ 7 - 0
Cargo.toml

@@ -0,0 +1,7 @@
+[package]
+name = "lib_chain"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+anyhow = "1.0.86"

+ 192 - 0
src/lib.rs

@@ -0,0 +1,192 @@
+use std::collections::HashMap;
+use std::fs::File;
+use std::io::{BufRead, BufReader, Result};
+
+use anyhow::Context;
+
+#[derive(Debug)]
+struct ChainRecord {
+    score: i32,
+    chr1: String,
+    start1: i32,
+    strand1: char,
+    size1: i32,
+    chr2: String,
+    start2: i32,
+    strand2: char,
+    size2: i32,
+    chain_id: i32,
+    block_sizes: Vec<i32>,
+    q_starts: Vec<i32>,
+    t_starts: Vec<i32>,
+}
+
+impl ChainRecord {
+    // Convert query position to target position
+    fn query_to_target(&self, query_pos: i32) -> Option<(String, i32)> {
+        if query_pos < self.start1 || query_pos >= self.start1 + self.size1 {
+            return None; // Query position is outside the block range
+        }
+
+        let mut q_pos = self.start1;
+        let mut t_pos = self.start2;
+
+        for i in 0..self.block_sizes.len() {
+            let block_size = self.block_sizes[i];
+            let q_start = self.q_starts[i];
+            let t_start = self.t_starts[i];
+
+            if query_pos >= q_start && query_pos < q_start + block_size {
+                let offset = query_pos - q_start;
+                t_pos += t_start + offset;
+                return Some((self.chr2.clone(), t_pos));
+            }
+
+            q_pos += block_size;
+            t_pos += block_size;
+        }
+
+        None
+    }
+}
+
+fn parse_chain_file(filename: &str) -> anyhow::Result<HashMap<String, Vec<ChainRecord>>> {
+    let file = File::open(filename).context("Failed to open chain file")?;
+    let reader = BufReader::new(file);
+
+    let mut records: HashMap<String, Vec<ChainRecord>> = HashMap::new();
+    let mut current_record: Option<ChainRecord> = None;
+
+    for line in reader.lines() {
+        let line = line.context("Failed to read line from chain file")?;
+
+        if line.starts_with("chain") {
+            if let Some(record) = current_record.take() {
+                records.entry(record.chr1.clone()).or_default().push(record);
+            }
+            let fields: Vec<&str> = line.split_whitespace().collect();
+            if fields.len() != 13 {
+                continue; // Invalid line format
+            }
+
+            let score = fields[1].parse::<i32>().context("Failed to parse score")?;
+            let chr1 = fields[2].to_string();
+            let start1 = fields[3].parse::<i32>().context("Failed to parse start1")?;
+            let strand1 = fields[4].chars().next().unwrap();
+            let size1 = fields[5].parse::<i32>().context("Failed to parse size1")?;
+            let chr2 = fields[7].to_string();
+            let start2 = fields[8].parse::<i32>().context("Failed to parse start2")?;
+            let strand2 = fields[9].chars().next().unwrap();
+            let size2 = fields[10].parse::<i32>().context("Failed to parse size2")?;
+            let chain_id = fields[12]
+                .parse::<i32>()
+                .context("Failed to parse chain_id")?;
+            let block_sizes = Vec::new();
+            let q_starts = Vec::new();
+            let t_starts = Vec::new();
+
+            current_record = Some(ChainRecord {
+                score,
+                chr1,
+                start1,
+                strand1,
+                size1,
+                chr2,
+                start2,
+                strand2,
+                size2,
+                chain_id,
+                block_sizes,
+                q_starts,
+                t_starts,
+            });
+        } else if line.is_empty() {
+            if let Some(record) = current_record.take() {
+                records.entry(record.chr1.clone()).or_default().push(record)
+            }
+        } else if let Some(ref mut record) = current_record {
+            let fields: Vec<i32> = line
+                .split_whitespace()
+                .map(|s| s.parse::<i32>().context("Failed to parse block field"))
+                .collect::<anyhow::Result<Vec<i32>>>()?;
+
+            if fields.len() == 3 {
+                record.block_sizes.push(fields[0]);
+                record.q_starts.push(fields[1]);
+                record.t_starts.push(fields[2]);
+            }
+        }
+    }
+
+    Ok(records)
+}
+
+// Find the ChainRecord corresponding to the given chromosome and query position
+fn find_chain_record<'a>(
+    records: &'a HashMap<String, Vec<ChainRecord>>,
+    chr: &str,
+    pos: i32,
+) -> Option<&'a ChainRecord> {
+    if let Some(chain_records) = records.get(chr) {
+        for record in chain_records {
+            if pos >= record.start1 && pos < record.start1 + record.size1 {
+                return Some(record);
+            }
+        }
+    }
+    None
+}
+
+pub struct LiftOver {
+    chain_file: String,
+    records: HashMap<String, Vec<ChainRecord>>,
+}
+
+impl LiftOver {
+    pub fn init(chain_file: &str) -> anyhow::Result<Self> {
+        Ok(LiftOver {
+            chain_file: chain_file.to_string(),
+            records: parse_chain_file(chain_file)?,
+        })
+    }
+
+    pub fn lift(&self, query_chr: &str, query_pos: i32) -> (String, i32) {
+        if let Some(record) = find_chain_record(&self.records, query_chr, query_pos) {
+            if let Some((target_chr, target_pos)) = record.query_to_target(query_pos) {
+                return (target_chr, target_pos)
+            }
+        }
+        (query_pos.to_string(), query_pos)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn it_works() -> anyhow::Result<()> {
+        let records = parse_chain_file("/data/ref/hs1/chm13v2-hg19.chain")?;
+
+        let query_chr = "chr1";
+        let query_pos = 12345;
+
+        if let Some(record) = find_chain_record(&records, query_chr, query_pos) {
+            if let Some((target_chr, target_pos)) = record.query_to_target(query_pos) {
+                println!(
+                "Query position {} on chromosome {} corresponds to target position {} on chromosome {}",
+                query_pos, query_chr, target_pos, target_chr
+            );
+            } else {
+                println!(
+                    "Query position {} on chromosome {} is outside the alignment blocks",
+                    query_pos, query_chr
+                );
+            }
+        } else {
+            println!("{}\t{}", query_chr, query_pos);
+        }
+
+        Ok(())
+    }
+}