use std::{ fs::File, io::{BufReader, Read, Write}, path::Path, }; use anyhow::Context; use log::debug; use noodles_bgzf as bgzf; use crate::io::writers::{finalize_bgzf_file, get_gz_writer}; pub type BGZFReader = bgzf::io::Reader; pub type BGZFWriter = bgzf::io::Writer; pub fn get_reader(path: &str) -> anyhow::Result> { debug!("Reading: {path}"); let file_type = Path::new(path) .extension() .and_then(|s| s.to_str()) .with_context(|| format!("can't parse extension from {path}"))?; anyhow::ensure!( matches!(file_type, "gz" | "vcf" | "bed" | "tsv" | "json" | "chain"), "unknown file type: {file_type}" ); let raw_reader = File::open(path).with_context(|| format!("failed to open {path}"))?; match file_type { "gz" => { let reader = BGZFReader::new(raw_reader); Ok(Box::new(BufReader::new(reader))) } "vcf" | "bed" | "tsv" | "json" | "chain" => Ok(Box::new(BufReader::new(raw_reader))), _ => unreachable!(), } } pub fn get_gz_reader(path: &str) -> anyhow::Result> { debug!("Reading: {path}"); let file_type = Path::new(path) .extension() .and_then(|s| s.to_str()) .with_context(|| format!("can't parse extension from {path}"))?; let path = if file_type != "gz" { compress_to_bgzip(path)? } else { path.to_string() }; let file = File::open(&path).with_context(|| format!("failed to open BGZF file: {path}"))?; Ok(BGZFReader::new(file)) } pub fn compress_to_bgzip(input_path: &str) -> anyhow::Result { let output_path = format!("{input_path}.gz"); if Path::new(&output_path).exists() { return Ok(output_path); } debug!("Compressing {input_path}"); let input_file = File::open(input_path) .with_context(|| format!("failed to open input file: {input_path}"))?; let mut reader = BufReader::new(input_file); let mut writer = get_gz_writer(&output_path, false)?; let mut buffer = [0u8; 8192]; loop { let n = reader .read(&mut buffer) .with_context(|| format!("failed reading input file: {input_path}"))?; if n == 0 { break; } writer .write_all(&buffer[..n]) .with_context(|| format!("failed writing BGZF file: {output_path}"))?; } finalize_bgzf_file(writer, &output_path)?; Ok(output_path) }