readers.rs 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. use std::{
  2. fs::File,
  3. io::{BufReader, Read, Write},
  4. path::Path,
  5. };
  6. use anyhow::Context;
  7. use log::debug;
  8. use noodles_bgzf as bgzf;
  9. use crate::io::writers::{finalize_bgzf_file, get_gz_writer};
  10. pub type BGZFReader<R> = bgzf::io::Reader<R>;
  11. pub type BGZFWriter<W> = bgzf::io::Writer<W>;
  12. pub fn get_reader(path: &str) -> anyhow::Result<Box<dyn Read>> {
  13. debug!("Reading: {path}");
  14. let file_type = Path::new(path)
  15. .extension()
  16. .and_then(|s| s.to_str())
  17. .with_context(|| format!("can't parse extension from {path}"))?;
  18. anyhow::ensure!(
  19. matches!(file_type, "gz" | "vcf" | "bed" | "tsv" | "json" | "chain"),
  20. "unknown file type: {file_type}"
  21. );
  22. let raw_reader = File::open(path).with_context(|| format!("failed to open {path}"))?;
  23. match file_type {
  24. "gz" => {
  25. let reader = BGZFReader::new(raw_reader);
  26. Ok(Box::new(BufReader::new(reader)))
  27. }
  28. "vcf" | "bed" | "tsv" | "json" | "chain" => Ok(Box::new(BufReader::new(raw_reader))),
  29. _ => unreachable!(),
  30. }
  31. }
  32. pub fn get_gz_reader(path: &str) -> anyhow::Result<BGZFReader<File>> {
  33. debug!("Reading: {path}");
  34. let file_type = Path::new(path)
  35. .extension()
  36. .and_then(|s| s.to_str())
  37. .with_context(|| format!("can't parse extension from {path}"))?;
  38. let path = if file_type != "gz" {
  39. compress_to_bgzip(path)?
  40. } else {
  41. path.to_string()
  42. };
  43. let file = File::open(&path).with_context(|| format!("failed to open BGZF file: {path}"))?;
  44. Ok(BGZFReader::new(file))
  45. }
  46. pub fn compress_to_bgzip(input_path: &str) -> anyhow::Result<String> {
  47. let output_path = format!("{input_path}.gz");
  48. if Path::new(&output_path).exists() {
  49. return Ok(output_path);
  50. }
  51. debug!("Compressing {input_path}");
  52. let input_file = File::open(input_path)
  53. .with_context(|| format!("failed to open input file: {input_path}"))?;
  54. let mut reader = BufReader::new(input_file);
  55. let mut writer = get_gz_writer(&output_path, false)?;
  56. let mut buffer = [0u8; 8192];
  57. loop {
  58. let n = reader
  59. .read(&mut buffer)
  60. .with_context(|| format!("failed reading input file: {input_path}"))?;
  61. if n == 0 {
  62. break;
  63. }
  64. writer
  65. .write_all(&buffer[..n])
  66. .with_context(|| format!("failed writing BGZF file: {output_path}"))?;
  67. }
  68. finalize_bgzf_file(writer, &output_path)?;
  69. Ok(output_path)
  70. }