readers.rs 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. use std::{
  2. fs::File,
  3. io::{BufReader, Read, Write},
  4. path::Path,
  5. };
  6. use anyhow::Context;
  7. use bgzip::{BGZFReader, BGZFWriter, Compression};
  8. use log::debug;
  9. pub fn get_reader(path: &str) -> anyhow::Result<Box<dyn std::io::Read>> {
  10. debug!("Reading: {path}");
  11. let file_type = *path
  12. .split(".")
  13. .collect::<Vec<&str>>()
  14. .last()
  15. .context(format!("Can't parse {path}"))?;
  16. assert!(file_type == "gz" || file_type == "vcf" || file_type == "bed" || file_type == "tsv");
  17. let raw_reader: Box<dyn std::io::Read> = Box::new(File::open(path)?);
  18. match file_type {
  19. "gz" => {
  20. let reader = Box::new(BGZFReader::new(raw_reader)?);
  21. Ok(Box::new(BufReader::new(reader)))
  22. }
  23. "vcf" | "bed" | "tsv" => Ok(Box::new(BufReader::new(raw_reader))),
  24. t => {
  25. panic!("unknown file type: {}", t)
  26. }
  27. }
  28. }
  29. pub fn get_gz_reader(path: &str) -> anyhow::Result<BGZFReader<File>> {
  30. debug!("Reading: {path}");
  31. let file_type = *path
  32. .split(".")
  33. .collect::<Vec<&str>>()
  34. .last()
  35. .context("Can't parse {path}.")?;
  36. let path = if file_type != "gz" {
  37. compress_to_bgzip(path)?
  38. } else {
  39. path.to_string()
  40. };
  41. let reader = File::open(path)?;
  42. Ok(BGZFReader::new(reader)?)
  43. }
  44. pub fn compress_to_bgzip(input_path: &str) -> anyhow::Result<String> {
  45. let output_path = format!("{}.gz", input_path);
  46. if Path::new(&output_path).exists() {
  47. return Ok(output_path);
  48. }
  49. debug!("Compressing {input_path}");
  50. let input_file = File::open(input_path)?;
  51. let mut reader = BufReader::new(input_file);
  52. let output_file = File::create(&output_path)?;
  53. let mut writer = BGZFWriter::new(output_file, Compression::default());
  54. let mut buffer = [0; 8192];
  55. loop {
  56. let bytes_read = reader.read(&mut buffer)?;
  57. if bytes_read == 0 {
  58. break;
  59. }
  60. writer.write_all(&buffer[..bytes_read])?;
  61. }
  62. writer.close()?;
  63. Ok(output_path)
  64. }