| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- use std::{
- fs::File,
- io::{BufReader, Read, Write},
- path::Path,
- };
- use anyhow::Context;
- use bgzip::{BGZFReader, BGZFWriter, Compression};
- use log::debug;
- pub fn get_reader(path: &str) -> anyhow::Result<Box<dyn std::io::Read>> {
- debug!("Reading: {path}");
- let file_type = *path
- .split(".")
- .collect::<Vec<&str>>()
- .last()
- .context(format!("Can't parse {path}"))?;
- assert!(file_type == "gz" || file_type == "vcf" || file_type == "bed" || file_type == "tsv");
- let raw_reader: Box<dyn std::io::Read> = Box::new(File::open(path)?);
- match file_type {
- "gz" => {
- let reader = Box::new(BGZFReader::new(raw_reader)?);
- Ok(Box::new(BufReader::new(reader)))
- }
- "vcf" | "bed" | "tsv" => Ok(Box::new(BufReader::new(raw_reader))),
- t => {
- panic!("unknown file type: {}", t)
- }
- }
- }
- pub fn get_gz_reader(path: &str) -> anyhow::Result<BGZFReader<File>> {
- debug!("Reading: {path}");
- let file_type = *path
- .split(".")
- .collect::<Vec<&str>>()
- .last()
- .context("Can't parse {path}.")?;
- let path = if file_type != "gz" {
- compress_to_bgzip(path)?
- } else {
- path.to_string()
- };
- let reader = File::open(path)?;
- Ok(BGZFReader::new(reader)?)
- }
- pub fn compress_to_bgzip(input_path: &str) -> anyhow::Result<String> {
- let output_path = format!("{}.gz", input_path);
- if Path::new(&output_path).exists() {
- return Ok(output_path);
- }
- debug!("Compressing {input_path}");
- let input_file = File::open(input_path)?;
- let mut reader = BufReader::new(input_file);
- let output_file = File::create(&output_path)?;
- let mut writer = BGZFWriter::new(output_file, Compression::default());
- let mut buffer = [0; 8192];
- loop {
- let bytes_read = reader.read(&mut buffer)?;
- if bytes_read == 0 {
- break;
- }
- writer.write_all(&buffer[..bytes_read])?;
- }
- writer.close()?;
- Ok(output_path)
- }
|