Browse Source

demux + merge

Thomas 1 year ago
parent
commit
39730822d3
4 changed files with 341 additions and 159 deletions
  1. 220 48
      src/commands/dorado.rs
  2. 39 0
      src/config.rs
  3. 67 91
      src/lib.rs
  4. 15 20
      src/vcf.rs

+ 220 - 48
src/commands/dorado.rs

@@ -9,24 +9,26 @@ use duct::cmd;
 use log::{info, warn};
 use uuid::Uuid;
 
+use crate::{config::Config, pod5::FlowCellCase};
+
 pub trait Run {
     fn run(self) -> anyhow::Result<()>;
 }
 
 #[derive(Debug, Clone)]
-pub struct DoradoConfig {
+pub struct DoradoParams {
     pub ref_fa: String,
     pub ref_mmi: String,
     pub name: String,
     pub time: String,
     pub pod_dir: String,
-    pub dorado_threads: u16,
     pub samtools_view_threads: u16,
     pub samtools_sort_threads: u16,
 }
 
 pub struct Dorado {
-    config: DoradoConfig,
+    config: Config,
+    case: FlowCellCase,
     case_dir: String,
     time_dir: String,
     bam: String,
@@ -37,11 +39,11 @@ pub struct Dorado {
 }
 
 impl Dorado {
-    pub fn init(config: DoradoConfig) -> anyhow::Result<Self> {
+    pub fn init(case: FlowCellCase, config: Config) -> anyhow::Result<Self> {
         let data_dir = "/data/longreads_basic_pipe";
-        let case_dir = format!("{}/{}", data_dir, config.name);
-        let time_dir = format!("{}/{}", case_dir, config.time);
-        let bam = format!("{}/{}_{}_hs1.bam", time_dir, config.name, config.time);
+        let case_dir = format!("{}/{}", data_dir, case.id);
+        let time_dir = format!("{}/{}", case_dir, case.time_point);
+        let bam = format!("{}/{}_{}_hs1.bam", time_dir, case.id, case.time_point);
 
         Ok(Self {
             config,
@@ -52,11 +54,20 @@ impl Dorado {
             case_dir,
             time_dir,
             bam,
+            case,
         })
     }
     fn create_reference_mmi(&self) -> anyhow::Result<()> {
-        if !std::path::Path::new(&self.config.ref_mmi).exists() {
-            cmd!("minimap2", "-x", "map-ont", "-d", &self.config.ref_mmi, &self.config.ref_fa).run()?;
+        if !std::path::Path::new(&self.config.align.ref_mmi).exists() {
+            cmd!(
+                "minimap2",
+                "-x",
+                "map-ont",
+                "-d",
+                &self.config.align.ref_mmi,
+                &self.config.align.ref_fa
+            )
+            .run()?;
         }
         Ok(())
     }
@@ -72,11 +83,11 @@ impl Dorado {
     }
 
     fn basecall_align(&mut self, dorado_bin: &str) -> anyhow::Result<()> {
-        let pod_dir = &self.config.pod_dir;
-        let ref_mmi = &self.config.ref_mmi;
+        let pod_dir = &self.case.pod_dir.display();
+        let ref_mmi = &self.config.align.ref_mmi;
         let bam = &self.bam;
-        let samtools_view_threads = self.config.samtools_view_threads;
-        let samtools_sort_threads = self.config.samtools_sort_threads;
+        let samtools_view_threads = self.config.align.samtools_view_threads;
+        let samtools_sort_threads = self.config.align.samtools_sort_threads;
 
         let dorado = format!(
             "{dorado_bin} basecaller sup,5mC_5hmC {pod_dir} --trim all --reference {ref_mmi}"
@@ -85,6 +96,7 @@ impl Dorado {
         let samtools_sort = format!("samtools sort -@ {samtools_sort_threads} /dev/stdin -o {bam}");
         let pipe = format!("{dorado} | {samtools_view} | {samtools_sort}");
         info!("Running: {pipe}");
+
         let pipe_cmd = cmd!("bash", "-c", pipe);
         let mut reader = pipe_cmd.stdout_capture().reader()?;
 
@@ -117,45 +129,53 @@ impl Dorado {
         Ok(())
     }
 
+    pub fn index(&self) -> anyhow::Result<()> {
+        let t = self.config.align.samtools_view_threads.to_string();
+        let cmd = format!("index -@ {t} {}", &self.bam);
+        info!("Running  samtools {cmd}");
+        cmd!("samtools", "index", "-@", &t, &self.bam).run()?;
+        Ok(())
+    }
+
     fn run_cramino(&self) -> anyhow::Result<()> {
         let cramino_out = format!(
             "{}/{}_{}_hs1_cramino.txt",
-            self.time_dir, self.config.name, self.config.time
+            self.time_dir, self.case.id, self.case.time_point
         );
-        if !Path::new(&cramino_out).exists() {
-            info!("Quality control of BAM: {}", self.bam);
-            let output = duct::cmd!(
-                "cramino",
-                "-t",
-                "150",
-                "--hist",
-                "--checksum",
-                "--karyotype",
-                &self.bam
-            )
-            .stdout_capture()
-            .unchecked()
-            .run()?;
+        // if !Path::new(&cramino_out).exists() {
+        info!("Quality control of BAM: {}", self.bam);
+        let output = duct::cmd!(
+            "cramino",
+            "-t",
+            "150",
+            "--hist",
+            "--checksum",
+            "--karyotype",
+            &self.bam
+        )
+        .stdout_capture()
+        .unchecked()
+        .run()?;
 
-            fs::write(&cramino_out, output.stdout)?;
-        }
+        fs::write(cramino_out, output.stdout)?;
+        // }
         Ok(())
     }
 
     fn run_modkit(&self) -> anyhow::Result<()> {
         let mod_summary = format!(
             "{}/{}_{}_5mC_5hmC_summary.txt",
-            self.time_dir, self.config.name, self.config.time
+            self.time_dir, self.case.id, self.case.time_point
         );
-        if !Path::new(&mod_summary).exists() {
-            info!("Generating base modification summary for BAM: {}", self.bam);
-            let output = cmd!("modkit", "summary", "-t", "50", &self.bam)
-                .stdout_capture()
-                .unchecked()
-                .run()?;
-
-            fs::write(&mod_summary, output.stdout)?;
-        }
+        // if !Path::new(&mod_summary).exists() {
+        info!("Generating base modification summary for BAM: {}", self.bam);
+        let output = cmd!("modkit", "summary", "-t", "50", &self.bam)
+            .stdout_capture()
+            .unchecked()
+            .run()?;
+
+        fs::write(mod_summary, output.stdout)?;
+        // }
         Ok(())
     }
 
@@ -163,7 +183,7 @@ impl Dorado {
         let bam = &self.bam;
         let fastq = format!(
             "{}/{}/{}/{}_{}.fastq.gz",
-            self.case_dir, self.config.name, self.config.time, self.config.name, self.config.time
+            self.case_dir, self.case.id, self.case.time_point, self.case.id, self.case.time_point
         );
         if !std::path::Path::new(&fastq).exists() {
             let samtools = format!("samtools fastq -@ 150 {bam}");
@@ -179,21 +199,170 @@ impl Dorado {
     fn merge_bam(&self, bam: &Path) -> anyhow::Result<()> {
         let into = PathBuf::from(&self.bam);
         let dir = into.parent().unwrap();
+
         let original_file = into.file_name().unwrap().to_string_lossy().to_string();
         let original_i = dir.join(format!("{original_file}.bai"));
+        if !original_i.exists() {
+            self.index()?;
+        }
+
         let tmp_original_file = format!("{}.bam", Uuid::new_v4());
         let tmp_original = dir.join(tmp_original_file.clone());
         let tmp_original_i = dir.join(format!("{tmp_original_file}.bai"));
-        fs::rename(bam, tmp_original)?;
-        fs::rename(original_i, tmp_original_i)?;
 
-        cmd!("samtools merge -@ 160 -h {bam} {into} {bam} {tmp_original}").run()?;
+        info!("Moving {} to {}", &into.display(), &tmp_original.display());
+        fs::rename(&into, &tmp_original)?;
+        info!(
+            "Moving {} to {}",
+            &original_i.display(),
+            &tmp_original_i.display()
+        );
+        fs::rename(original_i, tmp_original_i.clone())?;
+
+        let cmd = format!(
+            "samtools merge -@ 160 -h {} {} {} {}",
+            bam.display(),
+            into.display(),
+            bam.display(),
+            tmp_original.display()
+        );
+        info!("Running {cmd}");
+        cmd!(
+            "samtools",
+            "merge",
+            "-@",
+            "160",
+            "-h",
+            bam,
+            into,
+            bam,
+            tmp_original.clone()
+        )
+        .run()?;
+        fs::remove_file(tmp_original)?;
+        fs::remove_file(tmp_original_i)?;
+        self.index()?;
         Ok(())
     }
-}
 
-impl Run for Dorado {
-    fn run(mut self) -> anyhow::Result<()> {
+    pub fn from_mux(cases: Vec<FlowCellCase>, config: Config) -> anyhow::Result<()> {
+        // Creating a temporary directory
+        let tmp_dir = format!("{}/.{}", config.result_dir, Uuid::new_v4());
+        info!("Creating tmp dir {tmp_dir}");
+        fs::create_dir(&tmp_dir)?;
+
+        // Dorado base calling and align into a temporary bam file
+        let muxed_bam = format!("{tmp_dir}/muxed.bam");
+        let dorado_bin = &config.align.dorado_bin;
+        let dorado_arg = &config.align.dorado_basecall_arg;
+        let pod_dir = cases[0].pod_dir.display();
+        let ref_mmi = &config.align.ref_mmi;
+        let samtools_view_threads = config.align.samtools_view_threads;
+
+        let dorado = format!(
+            "{dorado_bin} basecaller {dorado_arg} {pod_dir} --trim all --reference {ref_mmi}"
+        );
+        let samtools_view =
+            format!("samtools view -h -@ {samtools_view_threads} -b -o {muxed_bam}");
+        let pipe = format!("{dorado} | {samtools_view}");
+        info!("Running: {pipe}");
+        let pipe_cmd = cmd!("bash", "-c", pipe);
+        pipe_cmd.run()?;
+
+        info!("Basecalling ✅");
+
+        // Demux the temporary bam file
+        // Get the sequencing kit from the first pod5 file
+        let muxed_pod_dir = &cases.first().unwrap().pod_dir;
+        let pod_path = fs::read_dir(muxed_pod_dir)?
+            .filter_map(|p| p.ok())
+            .map(|p| p.path())
+            .filter(|p| p.extension().unwrap() == "pod5")
+            .take(1)
+            .collect::<Vec<PathBuf>>()
+            .pop()
+            .unwrap();
+        let sequencing_kit = pandora_lib_pod5::Pod5Info::from_pod5(pod_path.to_str().unwrap())
+            .sequencing_kit
+            .to_uppercase();
+
+        let tmp_demux_dir = format!("{tmp_dir}/demuxed");
+        fs::create_dir(&tmp_demux_dir)?;
+
+        info!("Demux from {sequencing_kit} into {tmp_demux_dir}",);
+
+        duct::cmd!(
+            &config.align.dorado_bin,
+            "demux",
+            "--output-dir",
+            &tmp_demux_dir,
+            "--kit-name",
+            &sequencing_kit,
+            &tmp_dir,
+        )
+        .run()?;
+        info!("Demux ✅");
+
+        for case in cases.iter() {
+            let bam = format!(
+                "{tmp_demux_dir}/{sequencing_kit}_barcode{}.bam",
+                case.barcode
+            );
+
+            // Trim
+            let trimmed_bam = format!(
+                "{tmp_demux_dir}/{sequencing_kit}_barcode{}_trimmed.bam",
+                case.barcode
+            );
+            let pipe = format!(
+                "{} trim {bam} | samtools view -h -@ {} -b /dev/stdin -o {trimmed_bam}",
+                config.align.dorado_bin, &config.align.samtools_view_threads
+            );
+            cmd!("bash", "-c", pipe).run()?;
+
+            // Align
+            let aligned_bam = format!(
+                "{tmp_demux_dir}/{sequencing_kit}_barcode{}_aligned.bam",
+                case.barcode
+            );
+            let dorado = format!(
+                "{} aligner --threads 160 {} {trimmed_bam}",
+                config.align.dorado_bin, config.align.ref_fa,
+            );
+            let samtools_view = format!(
+                "samtools view -h -@ {} -b /dev/stdin",
+                &config.align.samtools_view_threads
+            );
+            let samtools_sort = format!(
+                "samtools sort -@ {} /dev/stdin -o {aligned_bam}",
+                &config.align.samtools_sort_threads
+            );
+            let pipe = format!("{dorado} | {samtools_view} | {samtools_sort}");
+            info!("Running {pipe}");
+
+            cmd!("bash", "-c", pipe).run()?;
+
+            let d = Dorado::init(case.clone(), config.clone())?;
+            d.create_directories()?;
+
+            if PathBuf::from(&d.bam).exists() {
+                info!("merge");
+                d.merge_bam(&PathBuf::from(aligned_bam))?;
+            } else {
+                info!("Moving from {} to {}", bam, d.bam);
+                fs::rename(aligned_bam, d.bam.clone())?;
+                d.index()?;
+            }
+
+            d.run_cramino()?;
+            d.run_modkit()?;
+        }
+        fs::remove_dir(tmp_dir)?;
+
+        Ok(())
+    }
+
+    pub fn run_pipe(&mut self) -> anyhow::Result<()> {
         let start_time = std::time::SystemTime::now();
         self.start_time = start_time;
 
@@ -204,7 +373,10 @@ impl Run for Dorado {
         self.create_reference_mmi()?;
         self.create_directories()?;
 
-        info!("Reading {} pod5 from: {}", self.config.time, self.config.pod_dir);
+        info!(
+            "Reading {} pod5 from: {}",
+            self.case.time_point, self.config.pod_dir
+        );
         let bam_path = std::path::Path::new(&self.bam);
 
         if !bam_path.exists() {

+ 39 - 0
src/config.rs

@@ -0,0 +1,39 @@
+#[derive(Debug, Clone)]
+pub struct Config {
+    pub pod_dir: String,
+    pub result_dir: String,
+    pub align: AlignConfig,
+}
+
+impl Default for Config {
+    fn default() -> Self {
+        Self {
+            pod_dir: "/data/run_data".to_string(),
+            result_dir: "/data/longreads_basic_pipe".to_string(),
+            align: Default::default(),
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct AlignConfig {
+    pub dorado_bin: String,
+    pub dorado_basecall_arg: String,
+    pub ref_fa: String,
+    pub ref_mmi: String,
+    pub samtools_view_threads: u16,
+    pub samtools_sort_threads: u16,
+}
+
+impl Default for AlignConfig {
+    fn default() -> Self {
+        Self {
+            dorado_bin: "/data/tools/dorado-0.7.2-linux-x64/bin/dorado".to_string(),
+            dorado_basecall_arg: "sup,5mC_5hmC".to_string(),
+            ref_fa: "/data/ref/hs1/chm13v2.0.fa".to_string(),
+            ref_mmi: "/data/ref/chm13v2.0.mmi".to_string(),
+            samtools_view_threads: 20,
+            samtools_sort_threads: 30,
+        }
+    }
+}

+ 67 - 91
src/lib.rs

@@ -1,13 +1,15 @@
-use std::path::PathBuf;
+use std::{collections::HashMap, path::PathBuf};
 
 use bam::BamCollection;
-use commands::dorado::{Dorado, DoradoConfig, Run};
-use log::{info, warn};
-use pod5::{Pod5Collection, Pod5Type};
-use vcf::{load_vcf_collection, VcfCollection};
+use commands::dorado::{Dorado, Run};
+use config::Config;
+use log::info;
+use pod5::{FlowCellCase, Pod5Collection, Pod5Type};
+use vcf::VcfCollection;
 
 pub mod bam;
 pub mod commands;
+pub mod config;
 pub mod modkit;
 pub mod pod5;
 mod vcf;
@@ -22,15 +24,10 @@ pub struct Collections {
 
 impl Collections {
     pub fn new(pod_dir: &str, corrected_fc_path: &str, result_dir: &str) -> anyhow::Result<Self> {
-        // let pod5 = Pod5Collection::import_dir(
-        //     "/data/run_data",
-        //     "/data/flow_cells.tsv",
-        //     "/data/longreads_basic_pipe",
-        // )?;
         let pod5 = Pod5Collection::import_dir(pod_dir, corrected_fc_path, result_dir)?;
-
         let bam = BamCollection::new(result_dir);
-        let vcf = load_vcf_collection(result_dir);
+        let vcf = VcfCollection::new(result_dir);
+
         Ok(Self {
             pod5,
             bam,
@@ -41,102 +38,79 @@ impl Collections {
 
     pub fn todo(&mut self) {
         info!("Looking for base calling tasks...");
+        let mut to_demux = Vec::new();
 
         // let bams_acquisitions_ids = self.bam.by_acquisition_id();
         for run in self.pod5.runs.iter() {
             for fc in run.flowcells.iter() {
                 let acq_id = fc.pod5_info.acquisition_id.clone();
                 for case in fc.cases.iter() {
-                    let bams = self.bam.get(&case.id, &case.time_point);
-                    if bams.is_empty() {
-                        self.tasks.push(CollectionsTasks::CreateBam {
-                            id: case.id.clone(),
-                            time_point: case.time_point.clone(),
-                            pod5_type: fc.pod5_type.clone(),
-                            pod5_dir: case.pod_dir.clone(),
+                    let bams_ids: Vec<String> = self
+                        .bam
+                        .get(&case.id, &case.time_point)
+                        .iter()
+                        .flat_map(|b| {
+                            b.composition
+                                .iter()
+                                .map(|c| c.0.clone())
+                                .collect::<Vec<String>>()
                         })
-                    } else {
-                        let acq_ids: Vec<String> = bams
-                            .iter()
-                            .flat_map(|b| {
-                                b.composition.iter().map(|(acq_id, _)| acq_id.to_string())
-                            })
-                            .collect();
-
-                        if !acq_ids.contains(&acq_id) {
-                            self.tasks.push(CollectionsTasks::CompleteBam {
-                                id: case.id.clone(),
-                                time_point: case.time_point.clone(),
-                                pod5_type: fc.pod5_type.clone(),
-                                pod5_dir: case.pod_dir.clone(),
-                            });
+                        .filter(|id| *id == acq_id)
+                        .collect();
+                    if bams_ids.is_empty() {
+                        match fc.pod5_type {
+                            Pod5Type::Raw => to_demux.push(case.clone()),
+                            Pod5Type::Demuxed => {
+                                self.tasks.push(CollectionsTasks::Align(case.clone()))
+                            }
                         }
                     }
                 }
             }
         }
+
+        let mut grouped: HashMap<PathBuf, Vec<FlowCellCase>> = HashMap::new();
+        for case in to_demux {
+            grouped
+                .entry(case.pod_dir.clone())
+                .or_default()
+                .push(case);
+        }
+        grouped
+            .into_values()
+            .for_each(|data| self.tasks.push(CollectionsTasks::DemuxAlign(data)));
     }
 
-    pub fn run(&mut self) {
+    pub fn run(&mut self) -> anyhow::Result<()> {
         if self.tasks.is_empty() {
             self.todo();
-            self.run();
+            self.run()?;
         } else {
             while let Some(task) = self.tasks.pop() {
-                task.run();
+                task.run()?;
             }
         }
+        Ok(())
     }
 }
 
 #[derive(Debug)]
 pub enum CollectionsTasks {
-    CreateBam {
-        id: String,
-        time_point: String,
-        pod5_type: Pod5Type,
-        pod5_dir: PathBuf,
-    },
-    CompleteBam {
-        id: String,
-        time_point: String,
-        pod5_type: Pod5Type,
-        pod5_dir: PathBuf,
-    },
+    Align(FlowCellCase),
+    DemuxAlign(Vec<FlowCellCase>),
 }
 
-impl CollectionsTasks {
-    pub fn run(&self) {
+impl Run for CollectionsTasks {
+    fn run(self) -> anyhow::Result<()> {
         match self {
-            CollectionsTasks::CreateBam {
-                id,
-                time_point,
-                pod5_type,
-                pod5_dir,
-            } => {
-                if *pod5_type != Pod5Type::Raw {
-                    let d = Dorado::init(DoradoConfig {
-                        ref_fa: "/data/ref/hs1/chm13v2.0.fa".to_string(),
-                        ref_mmi: "/data/ref/chm13v2.0.mmi".to_string(),
-                        name: id.to_string(),
-                        time: time_point.to_string(),
-                        pod_dir: pod5_dir.display().to_string(),
-                        dorado_threads: 150,
-                        samtools_view_threads: 20,
-                        samtools_sort_threads: 30,
-                    })
-                    .unwrap();
-                    d.run().unwrap();
-                }
+            CollectionsTasks::Align(case) => {
+                Dorado::init(case.clone(), Config::default())?.run_pipe()?;
             }
-
-            CollectionsTasks::CompleteBam {
-                id: _,
-                time_point: _,
-                pod5_type: _,
-                pod5_dir: _,
-            } => warn!("TODO"),
+            CollectionsTasks::DemuxAlign(cases) => {
+                Dorado::from_mux(cases, Config::default())?;
+            },
         }
+        Ok(())
     }
 }
 
@@ -155,17 +129,9 @@ mod tests {
 
     #[test]
     fn run_dorado() -> anyhow::Result<()> {
-        let d = dorado::Dorado::init(dorado::DoradoConfig {
-            ref_fa: "/data/ref/hs1/chm13v2.0.fa".to_string(),
-            ref_mmi: "/data/ref/chm13v2.0.mmi".to_string(),
-            name: "CONSIGNY".to_string(),
-            time: "mrd".to_string(),
-            pod_dir: "/data/run_data/20240326-CL/CONSIGNY-MRD-NB07_RICCO-DIAG-NB08/20240326_1355_1E_PAU78333_bc25da25/pod5_pass/barcode07".to_string(),
-            dorado_threads: 150,
-            samtools_view_threads: 20,
-            samtools_sort_threads: 30,
-        })?;
-        d.run()
+        let case = FlowCellCase { 
+            id: "CONSIGNY".to_string(), time_point: "mrd".to_string(), barcode: "07".to_string(), pod_dir: "/data/run_data/20240326-CL/CONSIGNY-MRD-NB07_RICCO-DIAG-NB08/20240326_1355_1E_PAU78333_bc25da25/pod5_pass/barcode07".into() };
+        dorado::Dorado::init(case, Config::default())?.run_pipe()
     }
 
     #[test]
@@ -200,7 +166,7 @@ mod tests {
 
     #[test_log::test]
     fn vcf() -> anyhow::Result<()> {
-        let mut vcf_collection = vcf::load_vcf_collection("/data/longreads_basic_pipe");
+        let mut vcf_collection = VcfCollection::new("/data/longreads_basic_pipe");
         vcf_collection.sort_by_id();
 
         vcf_collection
@@ -219,7 +185,17 @@ mod tests {
             "/data/longreads_basic_pipe",
         )?;
         // collections.vcf.print_tsv();
-        collections.run();
+        collections.run()?;
         Ok(())
     }
+
+    #[test_log::test]
+    fn mux() -> anyhow::Result<()> {
+        
+        let cases = vec![
+            FlowCellCase { id: "test_04".to_string(), time_point: "diag".to_string(), barcode: "04".to_string(), pod_dir: "/data/test_d".into() },
+            FlowCellCase { id: "test_05".to_string(), time_point: "diag".to_string(), barcode: "05".to_string(), pod_dir: "/data/test_d".into() },
+        ];
+        Dorado::from_mux(cases, Config::default())
+    }
 }

+ 15 - 20
src/vcf.rs

@@ -95,32 +95,27 @@ pub struct VcfCollection {
 }
 
 impl VcfCollection {
-    // pub fn print_tsv(&self) {
-    //     for vcf in self.vcfs.iter() {}
-    // }
+    pub fn new(result_dir: &str) -> Self {
+        let mut vcfs = Vec::new();
+        let pattern = format!("{}/*/*/*/*_PASSED.vcf.gz", result_dir);
+
+        for entry in glob(&pattern).expect("Failed to read glob pattern") {
+            match entry {
+                Ok(path) => match Vcf::new(path) {
+                    Ok(vcf) => vcfs.push(vcf),
+                    Err(e) => warn!("{e}"),
+                },
+                Err(e) => warn!("Error: {:?}", e),
+            }
+        }
 
+        VcfCollection { vcfs }
+    }
     pub fn sort_by_id(&mut self) {
         self.vcfs.sort_by_key(|v| v.id.clone());
     }
 }
 
-pub fn load_vcf_collection(result_dir: &str) -> VcfCollection {
-    let mut vcfs = Vec::new();
-    let pattern = format!("{}/*/*/*/*_PASSED.vcf.gz", result_dir);
-
-    for entry in glob(&pattern).expect("Failed to read glob pattern") {
-        match entry {
-            Ok(path) => match Vcf::new(path) {
-                Ok(vcf) => vcfs.push(vcf),
-                Err(e) => warn!("{e}"),
-            },
-            Err(e) => warn!("Error: {:?}", e),
-        }
-    }
-
-    VcfCollection { vcfs }
-}
-
 pub fn n_variants(path: &str) -> anyhow::Result<u64> {
     let csi_src = format!("{path}.csi");
     let index = csi::read(csi_src)?;