| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293 |
- use std::{
- fs::File,
- io::{BufReader, Read, Write},
- path::Path,
- };
- use anyhow::Context;
- use log::debug;
- use noodles_bgzf as bgzf;
- use crate::io::writers::{finalize_bgzf_file, get_gz_writer};
- pub type BGZFReader<R> = bgzf::io::Reader<R>;
- pub type BGZFWriter<W> = bgzf::io::Writer<W>;
- pub fn get_reader(path: &str) -> anyhow::Result<Box<dyn Read>> {
- debug!("Reading: {path}");
- let file_type = Path::new(path)
- .extension()
- .and_then(|s| s.to_str())
- .with_context(|| format!("can't parse extension from {path}"))?;
- anyhow::ensure!(
- matches!(file_type, "gz" | "vcf" | "bed" | "tsv" | "json" | "chain"),
- "unknown file type: {file_type}"
- );
- let raw_reader = File::open(path).with_context(|| format!("failed to open {path}"))?;
- match file_type {
- "gz" => {
- let reader = BGZFReader::new(raw_reader);
- Ok(Box::new(BufReader::new(reader)))
- }
- "vcf" | "bed" | "tsv" | "json" | "chain" => Ok(Box::new(BufReader::new(raw_reader))),
- _ => unreachable!(),
- }
- }
- pub fn get_gz_reader(path: &str) -> anyhow::Result<BGZFReader<File>> {
- debug!("Reading: {path}");
- let file_type = Path::new(path)
- .extension()
- .and_then(|s| s.to_str())
- .with_context(|| format!("can't parse extension from {path}"))?;
- let path = if file_type != "gz" {
- compress_to_bgzip(path)?
- } else {
- path.to_string()
- };
- let file = File::open(&path).with_context(|| format!("failed to open BGZF file: {path}"))?;
- Ok(BGZFReader::new(file))
- }
- pub fn compress_to_bgzip(input_path: &str) -> anyhow::Result<String> {
- let output_path = format!("{input_path}.gz");
- if Path::new(&output_path).exists() {
- return Ok(output_path);
- }
- debug!("Compressing {input_path}");
- let input_file = File::open(input_path)
- .with_context(|| format!("failed to open input file: {input_path}"))?;
- let mut reader = BufReader::new(input_file);
- let mut writer = get_gz_writer(&output_path, false)?;
- let mut buffer = [0u8; 8192];
- loop {
- let n = reader
- .read(&mut buffer)
- .with_context(|| format!("failed reading input file: {input_path}"))?;
- if n == 0 {
- break;
- }
- writer
- .write_all(&buffer[..n])
- .with_context(|| format!("failed writing BGZF file: {output_path}"))?;
- }
- finalize_bgzf_file(writer, &output_path)?;
- Ok(output_path)
- }
|