|
|
@@ -0,0 +1,192 @@
|
|
|
+use std::collections::HashMap;
|
|
|
+use std::fs::File;
|
|
|
+use std::io::{BufRead, BufReader, Result};
|
|
|
+
|
|
|
+use anyhow::Context;
|
|
|
+
|
|
|
+#[derive(Debug)]
|
|
|
+struct ChainRecord {
|
|
|
+ score: i32,
|
|
|
+ chr1: String,
|
|
|
+ start1: i32,
|
|
|
+ strand1: char,
|
|
|
+ size1: i32,
|
|
|
+ chr2: String,
|
|
|
+ start2: i32,
|
|
|
+ strand2: char,
|
|
|
+ size2: i32,
|
|
|
+ chain_id: i32,
|
|
|
+ block_sizes: Vec<i32>,
|
|
|
+ q_starts: Vec<i32>,
|
|
|
+ t_starts: Vec<i32>,
|
|
|
+}
|
|
|
+
|
|
|
+impl ChainRecord {
|
|
|
+ // Convert query position to target position
|
|
|
+ fn query_to_target(&self, query_pos: i32) -> Option<(String, i32)> {
|
|
|
+ if query_pos < self.start1 || query_pos >= self.start1 + self.size1 {
|
|
|
+ return None; // Query position is outside the block range
|
|
|
+ }
|
|
|
+
|
|
|
+ let mut q_pos = self.start1;
|
|
|
+ let mut t_pos = self.start2;
|
|
|
+
|
|
|
+ for i in 0..self.block_sizes.len() {
|
|
|
+ let block_size = self.block_sizes[i];
|
|
|
+ let q_start = self.q_starts[i];
|
|
|
+ let t_start = self.t_starts[i];
|
|
|
+
|
|
|
+ if query_pos >= q_start && query_pos < q_start + block_size {
|
|
|
+ let offset = query_pos - q_start;
|
|
|
+ t_pos += t_start + offset;
|
|
|
+ return Some((self.chr2.clone(), t_pos));
|
|
|
+ }
|
|
|
+
|
|
|
+ q_pos += block_size;
|
|
|
+ t_pos += block_size;
|
|
|
+ }
|
|
|
+
|
|
|
+ None
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+fn parse_chain_file(filename: &str) -> anyhow::Result<HashMap<String, Vec<ChainRecord>>> {
|
|
|
+ let file = File::open(filename).context("Failed to open chain file")?;
|
|
|
+ let reader = BufReader::new(file);
|
|
|
+
|
|
|
+ let mut records: HashMap<String, Vec<ChainRecord>> = HashMap::new();
|
|
|
+ let mut current_record: Option<ChainRecord> = None;
|
|
|
+
|
|
|
+ for line in reader.lines() {
|
|
|
+ let line = line.context("Failed to read line from chain file")?;
|
|
|
+
|
|
|
+ if line.starts_with("chain") {
|
|
|
+ if let Some(record) = current_record.take() {
|
|
|
+ records.entry(record.chr1.clone()).or_default().push(record);
|
|
|
+ }
|
|
|
+ let fields: Vec<&str> = line.split_whitespace().collect();
|
|
|
+ if fields.len() != 13 {
|
|
|
+ continue; // Invalid line format
|
|
|
+ }
|
|
|
+
|
|
|
+ let score = fields[1].parse::<i32>().context("Failed to parse score")?;
|
|
|
+ let chr1 = fields[2].to_string();
|
|
|
+ let start1 = fields[3].parse::<i32>().context("Failed to parse start1")?;
|
|
|
+ let strand1 = fields[4].chars().next().unwrap();
|
|
|
+ let size1 = fields[5].parse::<i32>().context("Failed to parse size1")?;
|
|
|
+ let chr2 = fields[7].to_string();
|
|
|
+ let start2 = fields[8].parse::<i32>().context("Failed to parse start2")?;
|
|
|
+ let strand2 = fields[9].chars().next().unwrap();
|
|
|
+ let size2 = fields[10].parse::<i32>().context("Failed to parse size2")?;
|
|
|
+ let chain_id = fields[12]
|
|
|
+ .parse::<i32>()
|
|
|
+ .context("Failed to parse chain_id")?;
|
|
|
+ let block_sizes = Vec::new();
|
|
|
+ let q_starts = Vec::new();
|
|
|
+ let t_starts = Vec::new();
|
|
|
+
|
|
|
+ current_record = Some(ChainRecord {
|
|
|
+ score,
|
|
|
+ chr1,
|
|
|
+ start1,
|
|
|
+ strand1,
|
|
|
+ size1,
|
|
|
+ chr2,
|
|
|
+ start2,
|
|
|
+ strand2,
|
|
|
+ size2,
|
|
|
+ chain_id,
|
|
|
+ block_sizes,
|
|
|
+ q_starts,
|
|
|
+ t_starts,
|
|
|
+ });
|
|
|
+ } else if line.is_empty() {
|
|
|
+ if let Some(record) = current_record.take() {
|
|
|
+ records.entry(record.chr1.clone()).or_default().push(record)
|
|
|
+ }
|
|
|
+ } else if let Some(ref mut record) = current_record {
|
|
|
+ let fields: Vec<i32> = line
|
|
|
+ .split_whitespace()
|
|
|
+ .map(|s| s.parse::<i32>().context("Failed to parse block field"))
|
|
|
+ .collect::<anyhow::Result<Vec<i32>>>()?;
|
|
|
+
|
|
|
+ if fields.len() == 3 {
|
|
|
+ record.block_sizes.push(fields[0]);
|
|
|
+ record.q_starts.push(fields[1]);
|
|
|
+ record.t_starts.push(fields[2]);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ Ok(records)
|
|
|
+}
|
|
|
+
|
|
|
+// Find the ChainRecord corresponding to the given chromosome and query position
|
|
|
+fn find_chain_record<'a>(
|
|
|
+ records: &'a HashMap<String, Vec<ChainRecord>>,
|
|
|
+ chr: &str,
|
|
|
+ pos: i32,
|
|
|
+) -> Option<&'a ChainRecord> {
|
|
|
+ if let Some(chain_records) = records.get(chr) {
|
|
|
+ for record in chain_records {
|
|
|
+ if pos >= record.start1 && pos < record.start1 + record.size1 {
|
|
|
+ return Some(record);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ None
|
|
|
+}
|
|
|
+
|
|
|
+pub struct LiftOver {
|
|
|
+ chain_file: String,
|
|
|
+ records: HashMap<String, Vec<ChainRecord>>,
|
|
|
+}
|
|
|
+
|
|
|
+impl LiftOver {
|
|
|
+ pub fn init(chain_file: &str) -> anyhow::Result<Self> {
|
|
|
+ Ok(LiftOver {
|
|
|
+ chain_file: chain_file.to_string(),
|
|
|
+ records: parse_chain_file(chain_file)?,
|
|
|
+ })
|
|
|
+ }
|
|
|
+
|
|
|
+ pub fn lift(&self, query_chr: &str, query_pos: i32) -> (String, i32) {
|
|
|
+ if let Some(record) = find_chain_record(&self.records, query_chr, query_pos) {
|
|
|
+ if let Some((target_chr, target_pos)) = record.query_to_target(query_pos) {
|
|
|
+ return (target_chr, target_pos)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ (query_pos.to_string(), query_pos)
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+#[cfg(test)]
|
|
|
+mod tests {
|
|
|
+ use super::*;
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn it_works() -> anyhow::Result<()> {
|
|
|
+ let records = parse_chain_file("/data/ref/hs1/chm13v2-hg19.chain")?;
|
|
|
+
|
|
|
+ let query_chr = "chr1";
|
|
|
+ let query_pos = 12345;
|
|
|
+
|
|
|
+ if let Some(record) = find_chain_record(&records, query_chr, query_pos) {
|
|
|
+ if let Some((target_chr, target_pos)) = record.query_to_target(query_pos) {
|
|
|
+ println!(
|
|
|
+ "Query position {} on chromosome {} corresponds to target position {} on chromosome {}",
|
|
|
+ query_pos, query_chr, target_pos, target_chr
|
|
|
+ );
|
|
|
+ } else {
|
|
|
+ println!(
|
|
|
+ "Query position {} on chromosome {} is outside the alignment blocks",
|
|
|
+ query_pos, query_chr
|
|
|
+ );
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ println!("{}\t{}", query_chr, query_pos);
|
|
|
+ }
|
|
|
+
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+}
|