vcf.rs 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. use anyhow::{anyhow, Context};
  2. use chrono::{DateTime, Utc};
  3. use csi::binning_index::ReferenceSequence;
  4. use glob::glob;
  5. use log::warn;
  6. use std::{fs::Metadata, os::unix::fs::MetadataExt, path::PathBuf};
  7. use noodles_csi as csi;
  8. use num_format::{Locale, ToFormattedString};
  9. #[derive(Debug)]
  10. pub struct Vcf {
  11. pub id: String,
  12. pub caller: String,
  13. pub time_point: String,
  14. pub path: PathBuf,
  15. pub file_metadata: Metadata,
  16. pub n_variants: u64,
  17. }
  18. impl Vcf {
  19. pub fn new(path: PathBuf) -> anyhow::Result<Self> {
  20. let stem = path
  21. .file_stem()
  22. .context("Can't parse stem")?
  23. .to_string_lossy()
  24. .to_string();
  25. let stem_splt: Vec<&str> = stem.split('_').collect();
  26. let id = stem_splt[0].to_string();
  27. let time_point = stem_splt[1].to_string();
  28. let caller = stem_splt[2..stem_splt.len() - 1].join("_");
  29. if !PathBuf::from(format!("{}.csi", path.display())).exists() {
  30. return Err(anyhow!("No csi for {}", path.display()));
  31. }
  32. let n_variants = n_variants(path.to_str().context("Can't convert path to str")?)?;
  33. let file_metadata = path.metadata()?;
  34. Ok(Self {
  35. id,
  36. caller,
  37. time_point,
  38. path,
  39. file_metadata,
  40. n_variants,
  41. })
  42. }
  43. pub fn modified(&self) -> anyhow::Result<DateTime<Utc>> {
  44. Ok(self.file_metadata.modified().unwrap().into())
  45. }
  46. pub fn size(&self) -> u64 {
  47. self.file_metadata.size()
  48. }
  49. pub fn tsv(&self) -> anyhow::Result<String> {
  50. Ok([
  51. self.id.clone(),
  52. self.time_point.clone(),
  53. self.caller.clone(),
  54. self.n_variants.to_string(),
  55. self.modified()?.to_string(),
  56. self.size().to_string(),
  57. self.path.display().to_string(),
  58. ]
  59. .join("\t"))
  60. }
  61. pub fn println(&self) -> anyhow::Result<()> {
  62. let formated_n_variants = self.n_variants.to_formatted_string(&Locale::en);
  63. let formated_modified = self.modified()?.naive_local().to_string();
  64. let formated_size = format!("{:#}", byte_unit::Byte::from_u64(self.size()));
  65. println!(
  66. "{}",
  67. [
  68. self.id.to_string(),
  69. self.time_point.to_string(),
  70. self.caller.to_string(),
  71. formated_n_variants,
  72. formated_modified,
  73. formated_size,
  74. self.path.display().to_string()
  75. ]
  76. .join("\t")
  77. );
  78. Ok(())
  79. }
  80. }
  81. #[derive(Debug)]
  82. pub struct VcfCollection {
  83. pub vcfs: Vec<Vcf>,
  84. }
  85. impl VcfCollection {
  86. // pub fn print_tsv(&self) {
  87. // for vcf in self.vcfs.iter() {}
  88. // }
  89. pub fn sort_by_id(&mut self) {
  90. self.vcfs.sort_by_key(|v| v.id.clone());
  91. }
  92. }
  93. pub fn load_vcf_collection(result_dir: &str) -> VcfCollection {
  94. let mut vcfs = Vec::new();
  95. let pattern = format!("{}/*/*/*/*_PASSED.vcf.gz", result_dir);
  96. for entry in glob(&pattern).expect("Failed to read glob pattern") {
  97. match entry {
  98. Ok(path) => match Vcf::new(path) {
  99. Ok(vcf) => vcfs.push(vcf),
  100. Err(e) => warn!("{e}"),
  101. },
  102. Err(e) => warn!("Error: {:?}", e),
  103. }
  104. }
  105. VcfCollection { vcfs }
  106. }
  107. pub fn n_variants(path: &str) -> anyhow::Result<u64> {
  108. let csi_src = format!("{path}.csi");
  109. let index = csi::read(csi_src)?;
  110. let mut n = 0;
  111. for reference_sequence in index.reference_sequences() {
  112. if let Some(metadata) = reference_sequence.metadata() {
  113. n += metadata.mapped_record_count()
  114. }
  115. }
  116. Ok(n)
  117. }