Browse Source

clone Variants

Thomas 10 months ago
parent
commit
e7aea9ea63
7 changed files with 31 additions and 690 deletions
  1. 0 566
      :a
  2. 0 1
      src/collection/mod.rs
  3. 0 40
      src/collection/pod5.rs
  4. 0 64
      src/collection/variants.rs
  5. 18 1
      src/lib.rs
  6. 2 16
      src/variant/variant.rs
  7. 11 2
      src/variant/variant_collection.rs

+ 0 - 566
:a

@@ -1,566 +0,0 @@
-use anyhow::{anyhow, Context, Result};
-use chrono::{DateTime, Utc};
-use csv::ReaderBuilder;
-use glob::glob;
-use hashbrown::HashMap;
-use log::{info, warn};
-use rayon::prelude::*;
-use serde::{Deserialize, Serialize};
-use std::{
-    fmt::Display,
-    fs::{self, File, Metadata},
-    io::{self, BufRead},
-    os::unix::fs::MetadataExt,
-    path::PathBuf,
-};
-
-use crate::io::pod5_infos::Pod5Info;
-
-#[derive(Debug, Clone)]
-pub struct Pod5 {
-    pub path: PathBuf,
-    pub pod5_type: Pod5Type,
-    pub run_name: String,
-    pub flowcell_name: String,
-    pub file_metadata: Metadata,
-}
-
-#[derive(Debug, Clone, PartialEq)]
-pub enum Pod5Type {
-    Raw,
-    Demuxed,
-}
-
-impl Display for Pod5Type {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let s = match self {
-            Pod5Type::Raw => "raw",
-            Pod5Type::Demuxed => "demuxed",
-        };
-        f.write_str(s)
-    }
-}
-
-#[derive(Debug, Clone)]
-pub struct Pod5Config {
-    pub base_dir: String,
-    pub type_raw: String,
-    pub type_demuxed: String,
-    pub run_dir_n: u8,
-    pub flowcell_dir_n: u8,
-}
-
-impl Default for Pod5Config {
-    fn default() -> Self {
-        Self {
-            base_dir: "/data/run_data".to_string(),
-            type_raw: "/pod5/".to_string(),
-            type_demuxed: "/pod5_pass/".to_string(),
-            run_dir_n: 0,
-            flowcell_dir_n: 1,
-        }
-    }
-}
-
-impl Pod5 {
-    pub fn from_path(path: &PathBuf, config: &Pod5Config) -> Result<Self> {
-        let s = path
-            .to_str()
-            .context("Can't convert PathBuf to str {path:?}")?;
-        let pod5_type = if s.contains(&config.type_raw) {
-            Pod5Type::Raw
-        } else if s.contains(&config.type_demuxed) {
-            Pod5Type::Demuxed
-        } else {
-            return Err(anyhow!("Can't find the pod5 type {s}"));
-        };
-
-        let file_metadata = fs::metadata(path)?;
-
-        let sr = s.replace(&config.base_dir, "");
-        let components: Vec<&str> = sr.split('/').filter(|c| !c.is_empty()).collect();
-
-        let run_name = components
-            .get(config.run_dir_n as usize)
-            .context("Can't get run_name")?
-            .to_string();
-        let flowcell_name = components
-            .get(config.flowcell_dir_n as usize)
-            .context("Can't get flowcell_name")?
-            .to_string();
-
-        Ok(Self {
-            path: path.to_path_buf(),
-            pod5_type,
-            run_name,
-            flowcell_name,
-            file_metadata,
-        })
-    }
-}
-
-pub fn list_pod_files(dir: &str) -> Result<Vec<Pod5>> {
-    let pattern = format!("{}/**/*.pod5", dir);
-    let mut pod_files = Vec::new();
-
-    let conf = Pod5Config {
-        base_dir: if dir.ends_with('/') {
-            dir.to_string()
-        } else {
-            format!("{dir}/")
-        },
-        ..Pod5Config::default()
-    };
-
-    for entry in glob(&pattern).expect("Failed to read glob pattern") {
-        match entry {
-            Ok(path) => {
-                let p = path.to_str().context("Can't parse path to string {path}")?;
-                if p.contains("/pod5_fail/") || p.contains("/pod5_skip/") {
-                    continue;
-                }
-                match Pod5::from_path(&path, &conf) {
-                    Ok(pod5) => pod_files.push(pod5),
-                    Err(e) => warn!("{e}"),
-                }
-            }
-            Err(e) => warn!("Error: {:?}", e),
-        }
-    }
-    Ok(pod_files)
-}
-
-#[derive(Debug)]
-pub struct Run {
-    pub run_name: String,
-    pub flowcells: Vec<FlowCell>,
-}
-
-#[derive(Debug, Clone)]
-pub struct FlowCell {
-    pub flowcell_name: String,
-    pub corrected_name: String,
-    pub cases: Vec<FlowCellCase>,
-    pub run_name: String,
-    pub pod5_type: Pod5Type,
-    pub pod5_info: Pod5Info,
-    pub pod5: Vec<Pod5>,
-}
-
-// impl FlowCell {
-//     pub fn cases_pod5_dir(&self) -> Vec<PathBuf> {
-//         match self.pod5_type {
-//             Pod5Type::Raw => {
-//                 let p = self.pod5.first().unwrap();
-//                 vec![p.path.parent().unwrap().to_path_buf()]
-//             },
-//             Pod5Type::Demuxed => {
-//                 self.cases.iter().map(|c| {
-//                     let str_barcode = format!("barcode{}", c.barcode);
-//                 })
-//             },
-//         }
-//     }
-// }
-
-#[derive(Debug, Default)]
-pub struct Pod5Collection {
-    pub importation_date: DateTime<Utc>,
-    pub runs: Vec<Run>,
-    pub bam_dir: String,
-    pub pod5_dir: String,
-}
-
-#[derive(Debug, Clone, Default)]
-pub struct FlowCellCase {
-    pub id: String,
-    pub time_point: String,
-    pub barcode: String,
-    pub pod_dir: PathBuf,
-    // pub basecalled: Option<bool>,
-}
-
-impl Pod5Collection {
-    pub fn new(pod5_dir: &str, corrected_fc_path: &str, bam_dir: &str) -> Result<Self> {
-        let pod5 = list_pod_files(pod5_dir)?;
-        info!("n pod5 {}", pod5.len());
-
-        let mut fc: HashMap<String, Vec<Pod5>> = HashMap::new();
-        for pod in pod5 {
-            let k = format!("{}-{}", pod.run_name, pod.flowcell_name);
-            fc.entry(k).or_default().push(pod);
-        }
-
-        let corrected_fc = load_flowcells_corrected_names(corrected_fc_path)?;
-        let flow_cells: Vec<FlowCell> = fc
-            .par_values()
-            .map(|v| {
-                let first = &v[0];
-                let pod5_info = Pod5Info::from_pod5(first.path.to_str().unwrap());
-                let flowcell_name = first.flowcell_name.clone();
-
-                let sel: Vec<FCLine> = corrected_fc
-                    .iter()
-                    .filter(|e| e.flow_cell == flowcell_name)
-                    .cloned()
-                    .collect();
-
-                let mut corrected_name: Vec<String> = sel
-                    .clone()
-                    .into_iter()
-                    .map(|e| e.ref_flow_cell)
-                    .filter(|e| !e.is_empty())
-                    .collect();
-                corrected_name.dedup();
-
-                if corrected_name.len() > 1 {
-                    panic!("Multiple corrected flow_cells for {v:?}");
-                }
-
-                let corrected_name = if !corrected_name.is_empty() {
-                    corrected_name.first().unwrap().to_string()
-                } else {
-                    "".to_string()
-                };
-
-                let cases: Vec<FlowCellCase> = sel
-                    .iter()
-                    .map(|e| {
-                        let pod_dir = match first.pod5_type {
-                            Pod5Type::Raw => first.path.parent().unwrap().to_path_buf(),
-                            Pod5Type::Demuxed => {
-                                let mut bc_dir =
-                                    first.path.parent().unwrap().parent().unwrap().to_path_buf();
-                                bc_dir
-                                    .push(format!("barcode{}", e.barcode_number.replace("NB", "")));
-                                bc_dir
-                            }
-                        };
-
-                        FlowCellCase {
-                            id: e.id.clone(),
-                            time_point: e.time_point.clone(),
-                            barcode: e.barcode_number.clone(),
-                            pod_dir,
-                        }
-                    })
-                    .collect();
-
-                FlowCell {
-                    flowcell_name,
-                    corrected_name,
-                    cases,
-                    run_name: first.run_name.clone(),
-                    pod5_type: first.pod5_type.clone(),
-                    pod5_info,
-                    pod5: v.to_vec(),
-                }
-            })
-            .collect();
-
-        let mut runs = HashMap::new();
-        for fc in flow_cells {
-            runs.entry(fc.run_name.clone())
-                .or_insert_with(Vec::new)
-                .push(fc);
-        }
-
-        let runs: Vec<Run> = runs
-            .into_values()
-            .map(|v| Run {
-                run_name: v[0].run_name.clone(),
-                flowcells: v.to_vec(),
-            })
-            .collect();
-
-        Ok(Self {
-            importation_date: Utc::now(),
-            runs,
-            bam_dir: bam_dir.to_string(),
-            pod5_dir: pod5_dir.to_string(),
-        })
-    }
-
-    pub fn print_info(&self) {
-        self.runs.iter().for_each(|run| {
-            run.flowcells.iter().for_each(|fc| {
-                let total_size: u64 = fc.pod5.iter().map(|p| p.file_metadata.size()).sum();
-                let n_files = fc.pod5.len();
-                let dates: Vec<DateTime<Utc>> = fc
-                    .pod5
-                    .iter()
-                    .map(|p| p.file_metadata.modified().unwrap().into())
-                    .collect();
-                let from = dates.iter().min().unwrap();
-                let to = dates.iter().max().unwrap();
-                let s = [
-                    run.run_name.clone(),
-                    from.to_string(),
-                    to.to_string(),
-                    n_files.to_string(),
-                    total_size.to_string(),
-                    fc.flowcell_name.to_string(),
-                    fc.pod5_type.to_string(),
-                    fc.pod5_info.acquisition_id.clone(),
-                    format!("{:?}", fc.cases),
-                ]
-                .join("\t");
-                println!("{s}");
-            });
-        });
-    }
-
-    // pub fn check_local(&self) -> anyhow::Result<()> {
-    //     let mut res = Vec::new();
-    //     for run in self.runs.iter() {
-    //         for fc in run.flowcells.iter() {
-    //             for c in fc.cases.iter() {
-    //                 let bases_called = if let Some(b) = c.basecalled {
-    //                     if b {
-    //                         "✅".to_string()
-    //                     } else {
-    //                         "❌".to_string()
-    //                     }
-    //                 } else {
-    //                     "❌".to_string()
-    //                 };
-    //
-    //                 let s = [
-    //                     c.id.to_string(),
-    //                     c.time_point.to_string(),
-    //                     c.barcode.to_string(),
-    //                     run.run_name.clone(),
-    //                     fc.flowcell_name.to_string(),
-    //                     fc.pod5_type.to_string(),
-    //                     fc.pod5_info.acquisition_id.clone(),
-    //                     bases_called,
-    //                 ]
-    //                 .join("\t");
-    //                 res.push(s);
-    //             }
-    //         }
-    //     }
-    //     res.sort();
-    //     println!("{}", res.join("\n"));
-    //     Ok(())
-    // }
-
-    // pub fn fc_done(&self) {
-    //     for run in self.runs.iter() {
-    //         for fc in run.flowcells.iter() {
-    //             let n_called = fc
-    //                 .cases
-    //                 .iter()
-    //                 .filter(|c| if let Some(b) = c.basecalled { b } else { false })
-    //                 .count();
-    //             if n_called != 0 && n_called == fc.cases.len() {
-    //                 let s = [
-    //                     format!("{}/{}", run.run_name, fc.flowcell_name),
-    //                     fc.pod5_info.acquisition_id.to_string(),
-    //                     format!("{:#?}", fc.cases),
-    //                 ]
-    //                 .join("\t");
-    //                 println!("{s}");
-    //             }
-    //         }
-    //     }
-    // }
-
-    // pub fn todo(&self) {
-    //     let run_dir = &self.pod5_dir;
-    //     for run in self.runs.iter() {
-    //         for fc in run.flowcells.iter() {
-    //             let to_call: Vec<_> = fc
-    //                 .cases
-    //                 .iter()
-    //                 .filter(|c| if let Some(b) = c.basecalled { !b } else { true })
-    //                 .collect();
-    //
-    //             if !to_call.is_empty() {
-    //                 if fc.pod5_type == Pod5Type::Raw && to_call.len() != fc.cases.len() {
-    //                     println!("No solution for: {}/{}", run.run_name, fc.flowcell_name);
-    //                 } else {
-    //                     match fc.pod5_type {
-    //                         Pod5Type::Raw => {
-    //                             let cases: Vec<String> = to_call
-    //                                 .iter()
-    //                                 .map(|c| {
-    //                                     let bc = c.barcode.replace("NB", "");
-    //                                     let tp = c.time_point.to_lowercase();
-    //                                     [bc, c.id.to_string(), tp].join(" ")
-    //                                 })
-    //                                 .collect();
-    //                             println!(
-    //                                 "from_mux.sh {}/{}/{} {}",
-    //                                 run_dir,
-    //                                 run.run_name,
-    //                                 fc.flowcell_name,
-    //                                 cases.join(" ")
-    //                             );
-    //                         }
-    //                         Pod5Type::Demuxed => to_call.iter().for_each(|c| {
-    //                             let bc = c.barcode.replace("NB", "");
-    //                             let tp = c.time_point.to_lowercase();
-    //                             let bam = format!(
-    //                                 "{}/{}/{}/{}_{}_hs1.bam",
-    //                                 self.bam_dir, c.id, c.time_point, c.id, c.time_point
-    //                             );
-    //                             if PathBuf::from(bam).exists() {
-    //                                 let pod_dir: Vec<String> = fc
-    //                                     .pod5
-    //                                     .iter()
-    //                                     .filter(|p| {
-    //                                         p.path.contains(&format!("barcode{}", bc.clone()))
-    //                                     })
-    //                                     .take(1)
-    //                                     .map(|p| p.path.to_string())
-    //                                     .collect();
-    //
-    //                                 let pod_dir = pod_dir.first().unwrap();
-    //                                 let mut pod_dir = PathBuf::from(pod_dir);
-    //                                 pod_dir.pop();
-    //
-    //                                 // TODO sheduler
-    //                                 println!(
-    //                                     "complete_bam.sh {} {} {}",
-    //                                     c.id,
-    //                                     tp,
-    //                                     pod_dir.to_string_lossy()
-    //                                 )
-    //                             } else {
-    //                                 let pod_dir: Vec<String> = fc
-    //                                     .pod5
-    //                                     .iter()
-    //                                     .filter(|p| {
-    //                                         p.path.contains(&format!("barcode{}", bc.clone()))
-    //                                     })
-    //                                     .take(1)
-    //                                     .map(|p| p.path.to_string())
-    //                                     .collect();
-    //
-    //                                 let pod_dir = pod_dir.first().unwrap();
-    //                                 let mut pod_dir = PathBuf::from(pod_dir);
-    //                                 pod_dir.pop();
-    //
-    //                                 println!(
-    //                                     "dorado.sh {} {} {}",
-    //                                     c.id,
-    //                                     tp,
-    //                                     pod_dir.to_string_lossy()
-    //                                 )
-    //                             }
-    //                         }),
-    //                     };
-    //                 }
-    //             }
-    //         }
-    //     }
-    // }
-
-    pub fn ids(&self) -> Vec<String> {
-        let mut ids: Vec<String> = self
-            .runs
-            .iter()
-            .flat_map(|r| {
-                r.flowcells
-                    .iter()
-                    .flat_map(|f| {
-                        f.cases
-                            .iter()
-                            .map(|c| c.id.clone())
-                            .collect::<Vec<String>>()
-                    })
-                    .collect::<Vec<String>>()
-            })
-            .collect();
-        ids.sort();
-        ids.dedup();
-        ids
-    }
-}
-
-#[derive(Debug, Deserialize, Clone)]
-pub struct FCLine {
-    pub id: String,
-    pub time_point: String,
-    pub barcode_number: String,
-    pub flow_cell: String,
-    pub run: String,
-    pub path: String,
-    pub ref_flow_cell: String,
-}
-
-pub fn load_flowcells_corrected_names(file_path: &str) -> anyhow::Result<Vec<FCLine>> {
-    let file = File::open(file_path)?;
-
-    let mut rdr = ReaderBuilder::new()
-        .delimiter(b'\t')
-        .has_headers(true)
-        .from_reader(file);
-
-    let mut records = Vec::new();
-    for result in rdr.deserialize() {
-        let mut record: FCLine = result?;
-
-        // formating
-        record.time_point = record.time_point.to_lowercase();
-        record.id = record.id.to_uppercase();
-
-        records.push(record);
-    }
-
-    Ok(records)
-}
-
-#[derive(Debug, Serialize, Deserialize)]
-struct MinKnowSampleSheet {
-    pub protocol_run_id: String,
-    pub position_id: String,
-    pub flow_cell_id: String,
-    pub sample_id: String,
-    pub experiment_id: String,
-    pub flow_cell_product_code: String,
-    pub kit: String,
-}
-
-impl TryFrom<&str> for MinKnowSampleSheet {
-    type Error = anyhow::Error;
-
-    fn try_from(value: &str) -> anyhow::Result<Self> {
-        let cells: Vec<&str> = value.split(",").collect();
-        if cells.len() != 7 {
-            return Err(anyhow::anyhow!(
-                "Number of cells not equal to definition. {value}"
-            ));
-        }
-
-        Ok(Self {
-            protocol_run_id: cells[0].to_string(),
-            position_id: cells[1].to_string(),
-            flow_cell_id: cells[2].to_string(),
-            sample_id: cells[3].to_string(),
-            experiment_id: cells[4].to_string(),
-            flow_cell_product_code: cells[5].to_string(),
-            kit: cells[6].to_string(),
-        })
-    }
-}
-
-impl MinKnowSampleSheet {
-    pub fn from_path(path: &str) -> anyhow::Result<Self> {
-        let file = File::open(path).map_err(|e| format!("Can't open file: {path}\n{e}"))?;
-        let reader = io::BufReader::new(file);
-        for (i, line) in reader.lines().enumerate() {
-            let line = line.map_err(|e| format!("Error parsing line: {line:?}\n\t{e}"))?;
-            if i == 0 && line != "protocol_run_id,position_id,flow_cell_id,sample_id,experiment_id,flow_cell_product_code,kit" {
-                return Err(anyhow::anyhow!("File header doesnt correspond to MinKnwo sample sheet: {line}"));
-            } else if i == 1 {
-                return Ok(line.as_str().try_into()?);
-            } else {
-                return Err(anyhow::anyhow!("Wrong MinKnow sample sheet format."));
-            }
-        }
-                        return Err(anyhow::anyhow!("Wrong MinKnow sample sheet format."));
-
-    }
-}

+ 0 - 1
src/collection/mod.rs

@@ -35,7 +35,6 @@ use crate::{
 pub mod bam;
 pub mod modbases;
 pub mod pod5;
-pub mod variants;
 pub mod vcf;
 
 #[derive(Debug, Clone)]

+ 0 - 40
src/collection/pod5.rs

@@ -740,46 +740,6 @@ impl FlowCellExperiment {
     }
 }
 
-// #[derive(Debug, Serialize, Deserialize, Clone)]
-// pub struct FlowCellArchived {
-//     pub archive_id: String,
-//     pub last_seen: DateTime<Utc>,
-//     pub run: MinKnowSampleSheet,
-// }
-
-// impl FlowCellArchived {
-//     pub fn from_tar(tar_path: &str) -> Result<Vec<(String, u64, u64)>> {
-//         // Open the tar file safely with context for errors
-//         let file = File::open(tar_path)
-//             .with_context(|| format!("Failed to open tar file at path: {}", tar_path))?;
-//
-//         let mut archive = tar::Archive::new(file);
-//         let mut result = Vec::new();
-//
-//         // Iterate through the entries in the archive
-//         for entry in archive.entries_with_seek()? {
-//             let file = entry.context("Failed to read an entry from the tar archive")?;
-//
-//             // Extract file properties safely
-//             let size = file.size();
-//             let modified = file
-//                 .header()
-//                 .mtime()
-//                 .context("Failed to get modification time")?;
-//             let path = file
-//                 .path()
-//                 .context("Failed to get file path from tar entry")?
-//                 .to_string_lossy()
-//                 .into_owned();
-//
-//             println!("{path}");
-//             result.push((path, size, modified));
-//         }
-//
-//         Ok(result)
-//     }
-// }
-
 pub fn scan_archive(
     tar_path: &str,
 ) -> anyhow::Result<(MinKnowSampleSheet, Vec<(String, u64, DateTime<Utc>)>)> {

+ 0 - 64
src/collection/variants.rs

@@ -1,64 +0,0 @@
-// use std::{fs::Metadata, path::PathBuf};
-
-// use anyhow::Context;
-// use glob::glob;
-// use log::warn;
-// use rayon::prelude::*;
-
-// pub struct VariantsCollection {
-//     pub data: Vec<VariantsCase>,
-// }
-//
-// #[derive(Debug)]
-// pub struct VariantsCase {
-//     pub path: PathBuf,
-//     pub id: String,
-//     pub file_metadata: Metadata,
-// }
-//
-// impl VariantsCase {
-//     pub fn new(path: PathBuf) -> anyhow::Result<Self> {
-//         let id = path
-//             .ancestors()
-//             .nth(2)
-//             .and_then(|p| p.file_name())
-//             .and_then(|name| name.to_str())
-//             .map(String::from)
-//             .context(format!(
-//                 "Invalid path structure: unable to extract ID for {}",
-//                 path.display()
-//             ))?;
-//
-//         let file_metadata = path.metadata().context(format!(
-//             "Failed to read file metadata for {}",
-//             path.display()
-//         ))?;
-//
-//         Ok(VariantsCase {
-//             path,
-//             id,
-//             file_metadata,
-//         })
-//     }
-// }
-
-// impl VariantsCollection {
-//     pub fn new(result_dir: &str) -> anyhow::Result<Self> {
-//         let pattern = format!("{}/*/*/*_variants.bytes.gz", result_dir);
-//         let data = glob(&pattern)
-//             .expect("Failed to read glob pattern")
-//             .par_bridge()
-//             .filter_map(|entry| {
-//                 match entry {
-//                     Ok(path) => match VariantsCase::new(path) {
-//                         Ok(vc) => return Some(vc),
-//                         Err(err) => warn!("{err}"),
-//                     },
-//                     Err(e) => warn!("Error: {:?}", e),
-//                 }
-//                 None
-//             })
-//             .collect();
-//         Ok(VariantsCollection { data })
-//     }
-// }

+ 18 - 1
src/lib.rs

@@ -43,7 +43,7 @@ mod tests {
 
     use self::{collection::pod5::{FlowCellCase, Pod5Collection}, commands::dorado, config::Config};
     use super::*;
-    use crate::{callers::{clairs::ClairS, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVSolo}}, collection::{bam, pod5::{scan_archive, FlowCells}, run_tasks, vcf::VcfCollection, Collections, CollectionsConfig}, commands::dorado::Dorado};
+    use crate::{annotation::Annotation, callers::{clairs::ClairS, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVSolo}, Callers}, collection::{bam, pod5::{scan_archive, FlowCells}, run_tasks, vcf::VcfCollection, Collections, CollectionsConfig}, commands::dorado::Dorado, variant::variant::AlterationCategory};
 
     // export RUST_LOG="debug"
     fn init() {
@@ -692,4 +692,21 @@ mod tests {
         println!("{r:#?}");
         Ok(())
     }
+
+    #[test]
+    fn alt_cat() -> anyhow::Result<()> {
+        let id = "ADJAGBA";
+        let config = Config::default();
+        let path = format!("{}/{id}/diag/somatic_variants.json.gz", config.result_dir);
+        let variants = variant_collection::Variants::load_from_json(&path)?;
+        println!("n variants {}", variants.data.len());
+        variants.data.iter()
+            .filter(|v| v.alteration_category().contains(&AlterationCategory::BND))
+            .for_each(|v| {
+            println!("{}", [v.position.contig(), (v.position.position + 1).to_string(), v.reference.to_string(), v.alternative.to_string(), v.annotations.iter().filter(|a| matches!(a, Annotation::Callers(..)))
+                .map(|a| a.to_string()).collect::<Vec<String>>().join(";")].join("\t"))
+        });
+        Ok(())
+
+    }
 }

+ 2 - 16
src/variant/variant.rs

@@ -193,26 +193,12 @@ impl VcfVariant {
             _ => match self.svtype() {
                 Some(sv_type) => AlterationCategory::from(sv_type),
                 None => AlterationCategory::Other,
-            }, // (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Nucleotides(_)) => {
-               //     AlterationCategory::Rep
-               // }
-               // (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Unstructured(_)) => {
-               //     AlterationCategory::Other
-               // }
-               // (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Nucleotide(_)) => {
-               //     AlterationCategory::Other
-               // }
-               // (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Nucleotides(_)) => {
-               //     AlterationCategory::Other
-               // }
-               // (ReferenceAlternative::Unstructured(_), ReferenceAlternative::Unstructured(_)) => {
-               //     AlterationCategory::Other
-               // }
+            },
         }
     }
 }
 
-#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
+#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
 pub enum AlterationCategory {
     SNV,
     DEL,

+ 11 - 2
src/variant/variant_collection.rs

@@ -190,7 +190,7 @@ impl VariantCollection {
     }
 }
 
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct Variant {
     pub hash: Hash128,
     pub position: GenomePosition,
@@ -221,9 +221,18 @@ impl Variant {
             })
             .collect()
     }
+
+    pub fn alteration_category(&self) -> Vec<AlterationCategory> {
+        self.vcf_variants
+            .iter()
+            .map(|v| v.alteration_category())
+            .collect::<HashSet<_>>()
+            .into_iter()
+            .collect()
+    }
 }
 
-#[derive(Debug, Default, Serialize, Deserialize)]
+#[derive(Debug, Default, Serialize, Deserialize, Clone)]
 pub struct Variants {
     pub data: Vec<Variant>,
 }