Thomas 1 жил өмнө
parent
commit
426513200b
3 өөрчлөгдсөн 1359 нэмэгдсэн , 15 устгасан
  1. 1201 0
      <
  2. 144 0
      gg.txt
  3. 14 15
      src/lib.rs

+ 1201 - 0
<

@@ -0,0 +1,1201 @@
+pub mod breakpoint;
+pub mod genomic_graph;
+// mod phase;
+
+use anyhow::{anyhow, Ok, Result};
+use fasta::record::Sequence;
+use log::info;
+use minimap2::{Aligner, Mapping};
+use noodles_fasta as fasta;
+use num_format::{CustomFormat, Grouping, ToFormattedString, WriteFormatted};
+use petgraph::{dot::Dot, prelude::*};
+use rust_htslib::bam::{self, Record};
+use std::{
+    collections::HashMap,
+    fmt,
+    fs::{self, File},
+    io::{BufReader, BufWriter, Write},
+    path::PathBuf,
+    process::{Command, Stdio},
+};
+use uuid::Uuid;
+
+#[derive(Debug, Clone)]
+pub struct Genome {
+    pub chromosomes: HashMap<String, Chromosome>,
+}
+
+#[derive(Debug, Clone)]
+pub struct Chromosome {
+    contigs: Vec<Contig>,
+}
+
+#[derive(Debug, Clone)]
+pub struct Contig {
+    pub id: String,
+    // contig seq on ref
+    pub mappings: Vec<Mapping>,
+    // reads on ref
+    pub supporting_records: Option<Vec<Record>>,
+    pub sequence: String,
+    pub contig_ref: ContigRef,
+}
+
+#[derive(Debug, Clone)]
+pub enum ContigRef {
+    Unique(Mapping),
+    Chimeric((Mapping, Mapping)),
+    ChimericTriple((Mapping, Mapping, Mapping)),
+    ChimericMultiple((Mapping, Vec<Mapping>, Mapping)),
+    LeftAmbiguity((Vec<Mapping>, Mapping)),
+    RightAmbiguity((Mapping, Vec<Mapping>)),
+    Ambigous(Vec<Mapping>),
+}
+
+impl fmt::Display for ContigRef {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        let str = match self {
+            ContigRef::Unique(m) => mapping_to_string(m),
+            ContigRef::Chimeric((a, b)) => {
+                format!("{}<->{}", mapping_to_string(a), mapping_to_string(b))
+            }
+            ContigRef::ChimericMultiple((a, v, b)) => format!(
+                "{}<->{}<->{}",
+                mapping_to_string(a),
+                mappings_to_string(v),
+                mapping_to_string(b)
+            ),
+            ContigRef::LeftAmbiguity((v, b)) => {
+                format!("{}<->{}", mappings_to_string(v), mapping_to_string(b))
+            }
+            ContigRef::RightAmbiguity((a, v)) => {
+                format!("{}<->{}", mapping_to_string(a), mappings_to_string(v))
+            }
+            ContigRef::Ambigous(v) => format!("{}", mappings_to_string(v)),
+            ContigRef::ChimericTriple((a, b, c)) => format!(
+                "{}<->{}<->{}",
+                mapping_to_string(a),
+                mapping_to_string(b),
+                mapping_to_string(c)
+            ),
+        };
+        fmt.write_str(&str).unwrap();
+
+        std::result::Result::Ok(())
+    }
+}
+
+impl ContigRef {
+    pub fn breakpoints(&self) -> Option<Vec<Mapping>> {
+        match self {
+            ContigRef::Unique(_) => None,
+            ContigRef::Chimeric((a, b)) => Some(vec![a.clone(), b.clone()]),
+            ContigRef::ChimericTriple((a, b, c)) => Some(vec![a.clone(), b.clone(), c.clone()]),
+            ContigRef::ChimericMultiple(_) => None,
+            ContigRef::LeftAmbiguity(_) => None,
+            ContigRef::RightAmbiguity(_) => None,
+            ContigRef::Ambigous(_) => None,
+        }
+    }
+    pub fn breakpoints_repr(&self) -> Option<Vec<String>> {
+        let left = "►";
+        let right = "◄";
+        let bp_right = "▐";
+        let bp_left = "▌";
+        let format = CustomFormat::builder()
+            .grouping(Grouping::Standard)
+            .minus_sign("-")
+            .separator("_")
+            .build()
+            .unwrap();
+
+        let get_sign = |m: &Mapping| -> &str {
+            match m.strand {
+                minimap2::Strand::Forward => left,
+                minimap2::Strand::Reverse => right,
+            }
+        };
+
+        let uk = "UNKNOWN".to_string();
+        let mut res = Vec::new();
+        if let Some(breakpoints) = self.breakpoints() {
+            for v in breakpoints.windows(2) {
+                let mut bp_string = format!("{}:", v[0].target_name.clone().unwrap_or(uk.clone()));
+                let _ = bp_string
+                    .write_formatted(&v[0].target_end, &format)
+                    .unwrap();
+                bp_string = format!(
+                    "{bp_string}{}{bp_left}{bp_right}{}{}:",
+                    get_sign(&v[0]),
+                    get_sign(&v[1]),
+                    v[1].target_name.clone().unwrap_or(uk.clone())
+                );
+                let _ = bp_string
+                    .write_formatted(&v[1].target_start, &format)
+                    .unwrap();
+                res.push(bp_string);
+            }
+        }
+
+        if !res.is_empty() {
+            Some(res)
+        } else {
+            None
+        }
+    }
+    pub fn desc(&self) -> Option<String> {
+        let uk = "UNKNOWN".to_string();
+        let to_desc = |v: &mut Vec<Mapping>| -> String {
+            v.sort_by(|a, b| a.query_start.cmp(&b.query_start));
+            let v: Vec<String> = v
+                .into_iter()
+                .map(|e| {
+                    let strand = match e.strand {
+                        minimap2::Strand::Forward => "",
+                        minimap2::Strand::Reverse => "_rev",
+                    };
+                    format!(
+                        "{}:{}_{}{}",
+                        e.target_name.clone().unwrap_or(uk.clone()),
+                        e.target_start,
+                        e.target_end,
+                        strand
+                    )
+                })
+                .collect();
+            format!("[{}]", v.join(";"))
+        };
+
+        match self {
+            ContigRef::Unique(a) => Some(format!(
+                "{}:{}_{}",
+                a.target_name.clone().unwrap_or(uk.clone()),
+                a.target_start,
+                a.target_end
+            )),
+            ContigRef::Chimeric((a, b)) => Some(to_desc(&mut vec![a.to_owned(), b.to_owned()])),
+            ContigRef::ChimericTriple((a, b, c)) => {
+                Some(to_desc(&mut vec![a.to_owned(), b.to_owned(), c.to_owned()]))
+            }
+            ContigRef::ChimericMultiple(_) => None,
+            ContigRef::LeftAmbiguity(_) => None,
+            ContigRef::RightAmbiguity(_) => None,
+            ContigRef::Ambigous(a) => Some(to_desc(&mut a.to_owned())),
+        }
+    }
+
+    pub fn hgvs(&self) -> Option<String> {
+        let uk = "UNKNOWN".to_string();
+        match self {
+            ContigRef::Unique(_) => None,
+            ContigRef::Chimeric((a, b)) => {
+                if a.target_name == b.target_name {
+                    let chr = a.target_name.clone().unwrap_or(uk.clone());
+                    let del_start = a.target_end;
+                    let del_end = b.target_start;
+                    let hgvs = format!("{chr}:{del_start}_{del_end}");
+                    Some(hgvs)
+                } else {
+                    let a_chr = a.target_name.clone().unwrap_or(uk.clone());
+                    let a_bp = a.target_end;
+                    let b_chr = b.target_name.clone().unwrap_or(uk.clone());
+                    let b_bp = b.target_end;
+                    let hgvs = format!("{a_chr}:{a_bp}delins[{b_chr}:{b_bp}]");
+                    Some(hgvs)
+                }
+            }
+            ContigRef::ChimericMultiple(_) => None,
+            ContigRef::LeftAmbiguity(_) => None,
+            ContigRef::RightAmbiguity(_) => None,
+            ContigRef::Ambigous(_) => None,
+            ContigRef::ChimericTriple((a, b, c)) => {
+                let mut v = [a, b, c];
+                v.sort_by(|a, b| a.query_start.cmp(&b.query_start));
+                let (a, b, c) = (
+                    *v.get(0).clone().unwrap(),
+                    *v.get(1).clone().unwrap(),
+                    *v.get(2).clone().unwrap(),
+                );
+                let a_target_name = a.target_name.clone().unwrap_or(uk.clone());
+                let b_target_name = b.target_name.clone().unwrap_or(uk.clone());
+                let c_target_name = c.target_name.clone().unwrap_or(uk.clone());
+
+                // if a_target_name != b_target_name {}
+
+                // Insertions
+                // prioritize first len
+                let (bp_a_1, bp_a_2) = if a.query_end <= b.query_end {
+                    // TODO add inserted nt
+                    (
+                        (a.target_name.clone().unwrap_or(uk.clone()), a.target_end),
+                        (b.target_name.clone().unwrap_or(uk.clone()), b.target_start),
+                    )
+                } else {
+                    let diff = a.query_end - b.query_start;
+                    (
+                        (a.target_name.clone().unwrap_or(uk.clone()), a.target_end),
+                        (
+                            b.target_name.clone().unwrap_or(uk.clone()),
+                            b.target_start + diff,
+                        ),
+                    )
+                };
+                let (bp_b_1, bp_b_2) = if b.query_end <= c.query_end {
+                    // TODO add inserted nt
+                    (
+                        (b.target_name.clone().unwrap_or(uk.clone()), b.target_end),
+                        (c.target_name.clone().unwrap_or(uk.clone()), c.target_start),
+                    )
+                } else {
+                    let diff = b.query_end - c.query_start;
+                    (
+                        (b.target_name.clone().unwrap_or(uk.clone()), b.target_end),
+                        (
+                            c.target_name.clone().unwrap_or(uk.clone()),
+                            c.target_start + diff,
+                        ),
+                    )
+                };
+                if bp_a_1.0 == bp_b_2.0 {
+                    let hgvs = format!(
+                        "{}:{}_{}ins[{}:{}_{}]",
+                        bp_a_1.0, bp_a_1.1, bp_b_2.1, bp_a_2.0, bp_a_2.1, bp_b_1.1
+                    );
+                    Some(hgvs)
+                } else {
+                    None
+                }
+            }
+        }
+    }
+}
+
+pub fn mapping_to_string(mapping: &Mapping) -> String {
+    let uk = "UNKNOWN".to_string();
+    format!(
+        "{}:{}-{}({}:{}-{})",
+        mapping.target_name.clone().unwrap_or(uk.clone()),
+        mapping.target_start,
+        mapping.target_end,
+        mapping.query_name.clone().unwrap_or(uk),
+        mapping.query_start,
+        mapping.query_end
+    )
+}
+
+fn mappings_to_string(mappings: &Vec<Mapping>) -> String {
+    let v = mappings
+        .iter()
+        .map(mapping_to_string)
+        .collect::<Vec<String>>();
+    v.join("//")
+}
+
+pub fn get_ref_pos(mappings: Vec<Mapping>) -> Result<ContigRef> {
+    let mut mappings = mappings;
+    mappings.sort_by(|a, b| a.query_start.cmp(&b.query_start));
+
+    if mappings.len() == 1 {
+        return Ok(ContigRef::Unique(mappings.get(0).unwrap().clone()));
+    } else {
+        let mut grouped: Vec<Vec<Mapping>> = group_mappings(&mut mappings)?;
+        // let mut grouped: VecDeque<Vec<Mapping>> = group_mappings(&mut mappings)?.into();
+
+        if grouped.len() == 1 {
+            let r = grouped.into_iter().flat_map(|e| e).collect();
+            return Ok(ContigRef::Ambigous(r));
+        } else if grouped.len() >= 2 {
+            // let first = grouped.pop_back().unwrap();
+            // let last = grouped.pop_front().unwrap();
+            let first = grouped.first().unwrap().to_vec();
+            let last = grouped.last().unwrap().to_vec();
+            grouped.remove(0);
+            grouped.remove(grouped.len() - 1);
+            assert!(first[0].query_start < last[0].query_start);
+
+            if grouped.len() == 0 {
+                if first.len() == 1 && last.len() == 1 {
+                    return Ok(ContigRef::Chimeric((
+                        first.get(0).unwrap().clone(),
+                        last.get(0).unwrap().clone(),
+                    )));
+                } else if first.len() == 1 {
+                    return Ok(ContigRef::RightAmbiguity((
+                        first.get(0).unwrap().clone(),
+                        last.clone(),
+                    )));
+                } else if last.len() == 1 {
+                    return Ok(ContigRef::LeftAmbiguity((
+                        first.clone(),
+                        last.get(0).unwrap().clone(),
+                    )));
+                } else {
+                    let all: Vec<Mapping> = vec![first, last].into_iter().flat_map(|e| e).collect();
+                    return Ok(ContigRef::Ambigous(all));
+                }
+            }
+            if first.len() == 1 && last.len() == 1 {
+                if grouped.len() == 1 {
+                    return Ok(ContigRef::ChimericTriple((
+                        first.get(0).unwrap().clone(),
+                        grouped.get(0).unwrap().get(0).unwrap().clone(),
+                        last.get(0).unwrap().clone(),
+                    )));
+                } else {
+                    return Ok(ContigRef::ChimericMultiple((
+                        first.get(0).unwrap().clone(),
+                        grouped.into_iter().flat_map(|e| e).collect(),
+                        last.get(0).unwrap().clone(),
+                    )));
+                }
+            } else if first.len() == 1 {
+                let right: Vec<Mapping> = vec![grouped.into_iter().flat_map(|e| e).collect(), last]
+                    .into_iter()
+                    .flat_map(|e| e)
+                    .collect();
+                return Ok(ContigRef::RightAmbiguity((
+                    first.get(0).unwrap().clone(),
+                    right,
+                )));
+            } else if last.len() == 1 {
+                let left: Vec<Mapping> = vec![first, grouped.into_iter().flat_map(|e| e).collect()]
+                    .into_iter()
+                    .flat_map(|e| e)
+                    .collect();
+                return Ok(ContigRef::LeftAmbiguity((
+                    left,
+                    last.get(0).unwrap().clone(),
+                )));
+            } else {
+                let all: Vec<Mapping> =
+                    vec![first, grouped.into_iter().flat_map(|e| e).collect(), last]
+                        .into_iter()
+                        .flat_map(|e| e)
+                        .collect();
+                return Ok(ContigRef::Ambigous(all));
+            }
+        } else {
+            return Ok(ContigRef::Ambigous(
+                grouped.into_iter().flat_map(|e| e).collect(),
+            ));
+        }
+    }
+}
+
+impl Genome {
+    pub fn new() -> Self {
+        Genome {
+            chromosomes: HashMap::new(),
+        }
+    }
+
+    pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, Chromosome> {
+        self.chromosomes.iter()
+    }
+
+    pub fn contigs(&self) -> impl Iterator<Item = &Contig> {
+        self.chromosomes.iter().flat_map(|(_, e)| e.iter())
+    }
+
+    pub fn add_contig(
+        &mut self,
+        id: String,
+        mappings: Vec<Mapping>,
+        supporting_records: Option<Vec<Record>>,
+        sequence: String,
+    ) -> Result<()> {
+        let mut mappings = mappings;
+        mappings.sort_by(|a, b| a.query_start.cmp(&b.query_start));
+        let new_contig = Contig {
+            id,
+            mappings: mappings.clone(),
+            supporting_records,
+            sequence,
+            contig_ref: get_ref_pos(mappings)?,
+        };
+        // get the category of Mapping
+        match new_contig.contig_ref.clone() {
+            ContigRef::Unique(contig_mapping) => {
+                match self
+                    .chromosomes
+                    .get_mut(&contig_mapping.target_name.unwrap())
+                {
+                    Some(chromosome) => {
+                        chromosome.contigs.push(new_contig);
+                    }
+                    None => (),
+                }
+            }
+            ContigRef::Chimeric((a, b)) => {
+                let a_target_name = a.target_name.unwrap();
+                let b_target_name = b.target_name.unwrap();
+                if a_target_name == b_target_name {
+                    if let Some(chromosome) = self.chromosomes.get_mut(&a_target_name) {
+                        chromosome.contigs.push(new_contig);
+                    } else {
+                        self.chromosomes.insert(
+                            a_target_name,
+                            Chromosome {
+                                contigs: vec![new_contig],
+                            },
+                        );
+                    }
+                } else {
+                    let chimeric_name = format!("{}-{}", a_target_name, b_target_name);
+                    if let Some(chromosome) = self.chromosomes.get_mut(&chimeric_name) {
+                        chromosome.contigs.push(new_contig);
+                    } else {
+                        self.chromosomes.insert(
+                            chimeric_name,
+                            Chromosome {
+                                contigs: vec![new_contig],
+                            },
+                        );
+                    }
+                }
+            }
+            ContigRef::ChimericMultiple((left, _, right)) => {
+                let left_target_name = left.target_name.unwrap();
+                let right_target_name = right.target_name.unwrap();
+                if left_target_name == right_target_name {
+                    if let Some(chromosome) = self.chromosomes.get_mut(&left_target_name) {
+                        chromosome.contigs.push(new_contig);
+                    } else {
+                        self.chromosomes.insert(
+                            left_target_name,
+                            Chromosome {
+                                contigs: vec![new_contig],
+                            },
+                        );
+                    }
+                } else {
+                    let chimeric_name = format!("{}-{}", left_target_name, right_target_name);
+                    if let Some(chromosome) = self.chromosomes.get_mut(&chimeric_name) {
+                        chromosome.contigs.push(new_contig);
+                    } else {
+                        self.chromosomes.insert(
+                            chimeric_name,
+                            Chromosome {
+                                contigs: vec![new_contig],
+                            },
+                        );
+                    }
+                }
+            }
+            _ => {
+                if let Some(chromosome) = self.chromosomes.get_mut("Ambigous") {
+                    chromosome.contigs.push(new_contig);
+                } else {
+                    self.chromosomes.insert(
+                        "Ambigous".to_string(),
+                        Chromosome {
+                            contigs: vec![new_contig],
+                        },
+                    );
+                }
+            }
+        };
+
+        Ok(())
+    }
+
+    pub fn add_contig_from_seq(
+        &mut self,
+        name: String,
+        sequence: &[u8],
+        aligner: impl Fn(String) -> Result<Vec<Mapping>>,
+    ) -> Result<()> {
+        let mappings = aligner(String::from_utf8(sequence.to_vec())?)?;
+        // println!("{mappings:?}");
+        self.add_contig(name, mappings, None, String::from_utf8(sequence.to_vec())?)?;
+        Ok(())
+    }
+
+    pub fn write_contigs_sequences(&self, dir: &str) {
+        for contig in self.contigs() {
+            contig.write_fasta(&format!("{dir}/{}.fasta", contig.id))
+        }
+        // self.iter().for_each(|(_, chr)| {
+        //     chr.iter().for_each(|c| c.write_fasta(&format!("{dir}/{}.fasta", c.id)))
+        // });
+    }
+
+    pub fn from_contigs_sequences(dir: &str) -> Result<Self> {
+        let aligner_url = "http://localhost:4444/align";
+        let aligner = aligner_client::dist_align(aligner_url.to_string());
+        let mut genome = Self::new();
+        // let paths = get_ext_paths(dir, "fasta")?;
+        let paths = get_contigs_fa_paths(dir)?;
+        for path in paths {
+            let fa = read_fasta(path.to_str().unwrap())?;
+            for (name, sequence) in fa {
+                genome.add_contig_from_seq(name, sequence.as_ref(), &aligner)?;
+            }
+        }
+        Ok(genome)
+    }
+    //
+    // pub fn from_dir_bed(dir: &str)  {
+    //
+    // }
+
+    // pub fn write_records(&self, file: &str) {
+    //     let mut records =  Vec::new();
+    //     for (name, chromosome) in self.chromosomes.iter() {
+    //         for contig in chromosome.iter() {
+    //             if let Some(rec) = &contig.supporting_records {
+    //                 records.extend(rec.iter());
+    //             };
+    //         }
+    //     }
+    //     // header
+    //
+    //     // writer
+    //     rust_htslib::bam::Writer::from_path(path, header, format)
+    // }
+
+    pub fn stats(&self) {
+        for (k, v) in self.chromosomes.iter() {
+            info!("{}:{}", k, v.contigs.len());
+        }
+    }
+
+    pub fn chromosome(&self, chromosome: &str) -> Option<Vec<Contig>> {
+        self.chromosomes
+            .get(chromosome)
+            .map(|chr| chr.contigs.clone())
+    }
+}
+
+impl Chromosome {
+    pub fn iter(&self) -> std::slice::Iter<'_, Contig> {
+        self.contigs.iter()
+    }
+}
+
+impl Contig {
+    // pub fn sort(&mut self) {
+    //     // sorting by target order
+    //     self.mappings
+    //         .sort_by(|a, b| a.target_start.cmp(&b.target_start));
+    // }
+
+    pub fn to_igv(&self, dir_path: &str) -> Result<()> {
+        let supporting_records = self.supporting_records.clone().ok_or(anyhow!("no reads"))?;
+        let contig_name = if let Some(hgvs) = self.hgvs() {
+            hgvs
+        } else {
+            self.id.clone()
+        };
+        let contig_dir = format!("{dir_path}/{contig_name}");
+        fs::create_dir_all(contig_dir.clone())?;
+
+        let fasta_path = format!("{contig_dir}/contig.fa");
+        write_fasta(&fasta_path, &vec![(self.id.clone(), self.sequence.clone())]);
+        write_fai(&fasta_path);
+
+        let reads_path = format!("{contig_dir}/reads.fa");
+        let n_reads = supporting_records
+            .clone()
+            .into_iter()
+            .map(|r| {
+                (
+                    String::from_utf8(r.qname().to_vec()).unwrap(),
+                    String::from_utf8(r.seq().as_bytes()).unwrap(),
+                )
+            })
+            .collect();
+        write_fasta(&reads_path, &n_reads);
+
+        let bam_path = format!("{contig_dir}/{}.bam", self.id);
+        write_bam(&fasta_path, &reads_path, &bam_path)?;
+
+        let bed_path = format!("{contig_dir}/contig.bed");
+        match &self.contig_ref {
+            ContigRef::Chimeric((a, b)) => {
+                let d = vec![
+                    (
+                        self.id.clone(),
+                        a.query_start,
+                        a.query_end,
+                        format!(
+                            "{}:{}-{}",
+                            a.target_name.clone().unwrap(),
+                            a.target_start,
+                            a.target_end
+                        ),
+                    ),
+                    (
+                        self.id.clone(),
+                        b.query_start,
+                        b.query_end,
+                        format!(
+                            "{}:{}-{}",
+                            b.target_name.clone().unwrap(),
+                            b.target_start,
+                            b.target_end
+                        ),
+                    ),
+                ];
+                write_bed(&bed_path, &d)?;
+            }
+            ContigRef::ChimericTriple((a, b, c)) => {
+                let d: Vec<(String, i32, i32, String)> = [a, b, c]
+                    .iter()
+                    .map(|r| {
+                        (
+                            self.id.clone(),
+                            r.query_start,
+                            r.query_end,
+                            format!(
+                                "{}:{}-{}",
+                                r.target_name.clone().unwrap(),
+                                r.target_start,
+                                r.target_end
+                            ),
+                        )
+                    })
+                    .collect();
+                write_bed(&bed_path, &d)?;
+            }
+            _ => (),
+        }
+
+        Ok(())
+    }
+
+    // bug cigar len != seq len
+    // pub fn write_bam(&self, path: &str) -> Result<()> {
+    //     let aligner = Aligner::builder()
+    //         .asm5()
+    //         .with_threads(8)
+    //         .with_cigar()
+    //         .with_seq(self.sequence.as_bytes())
+    //         .expect("Unable to build index");
+    //
+    //     let mut mappings = Vec::new();
+    //     let supporting_records = self
+    //         .supporting_records
+    //         .clone()
+    //         .ok_or(anyhow!("no supporting records"))?;
+    //     for record in supporting_records.iter() {
+    //         let seq = record.seq().as_bytes();
+    //         let alignment = aligner
+    //             .map(&seq, false, false, None, None)
+    //             .expect("Unable to align");
+    //         mappings.push(alignment);
+    //     }
+    //     let mut mappings: Vec<_> = mappings.into_iter().flatten().collect();
+    //     mappings.sort_by(|a, b| a.target_start.cmp(&b.target_start));
+    //
+    //     let mut header = bam::Header::new();
+    //     let mut sq_record = rust_htslib::bam::header::HeaderRecord::new(b"SQ");
+    //     sq_record.push_tag(b"SN", self.id.clone());
+    //     sq_record.push_tag(b"LN", self.sequence.len());
+    //     header.push_record(&sq_record);
+    //
+    //     let mut out = bam::Writer::from_path(path, &header, bam::Format::Bam).unwrap();
+    //
+    //     // copy reverse reads to new BAM file
+    //     for mapping in mappings.iter() {
+    //         let record = minimap2::htslib::mapping_to_record(
+    //             Some(mapping),
+    //             self.sequence.as_bytes(),
+    //             header.clone(),
+    //             None,
+    //             Some(Uuid::new_v4().as_bytes()),
+    //         );
+    //         let _ = out.write(&record);
+    //     }
+    //     rust_htslib::bam::index::build(path, None, rust_htslib::bam::index::Type::Bai, 1)?;
+    //     Ok(())
+    // }
+
+    pub fn hgvs(&self) -> Option<String> {
+        self.contig_ref.hgvs()
+    }
+
+    pub fn desc(&self) -> Option<String> {
+        self.contig_ref.desc()
+    }
+
+    pub fn breakpoints_repr(&self) -> Option<Vec<String>> {
+        self.contig_ref.breakpoints_repr()
+    }
+
+    pub fn write_fasta(&self, fasta_path: &str) {
+        write_fasta(fasta_path, &vec![(self.id.clone(), self.sequence.clone())]);
+    }
+}
+
+fn group_mappings(mappings: &mut [Mapping]) -> Result<Vec<Vec<Mapping>>> {
+    // sort alignments by query_start
+    mappings.sort_by(|a, b| a.query_start.cmp(&b.query_start));
+
+    let mut alignments: Vec<Vec<Mapping>> = vec![];
+    // group by overlapps > 30
+    for aln in mappings.iter() {
+        let last = alignments.last_mut();
+        if let Some(l) = last {
+            if l.iter()
+                .filter(|a| a.query_end - aln.query_start > 30)
+                .count()
+                > 0
+            {
+                l.push(aln.clone());
+            } else {
+                alignments.push(vec![aln.clone()]);
+            }
+        } else {
+            alignments.push(vec![aln.clone()]);
+        }
+    }
+
+    Ok(alignments)
+}
+
+pub fn write_bed(path: &str, d: &[(String, i32, i32, String)]) -> Result<()> {
+    let file = File::create(path).unwrap();
+    let mut writer = BufWriter::new(file);
+    for (chr, start, end, value) in d.iter() {
+        let row = format!(
+            "{}\n",
+            [
+                chr.to_string(),
+                start.to_string(),
+                end.to_string(),
+                value.to_string()
+            ]
+            .join("\t")
+        );
+        writer.write_all(row.as_bytes())?;
+    }
+    Ok(())
+}
+
+// unique
+pub fn write_fastq(fastq_path: &str, d: &Vec<Record>) -> Result<()> {
+    let file = File::create(fastq_path)?;
+    let mut writer = BufWriter::new(file);
+    for record in d {
+        let name = String::from_utf8(record.qname().to_vec()).unwrap();
+        writer.write_all(format!("@{name}\n").as_bytes())?;
+        let seq = record.seq().as_bytes();
+        writer.write_all(&seq)?;
+        writer.write_all(b"\n+\n")?;
+        let qual = record.qual();
+        writer.write_all(qual)?;
+    }
+    Ok(())
+}
+
+pub fn write_fasta(fasta_path: &str, d: &Vec<(String, String)>) {
+    let file = File::create(fasta_path).unwrap();
+    let mut writer = fasta::writer::Builder::default().build_with_writer(file);
+    let mut passed = Vec::new();
+    for (name, sequence) in d {
+        let name = name.to_string();
+        if sequence.is_empty() {
+            continue;
+        }
+        if passed.contains(&name) {
+            continue;
+        }
+        passed.push(name.clone());
+        let record = fasta::Record::new(
+            fasta::record::Definition::new(name.as_str(), None),
+            fasta::record::Sequence::from(sequence.as_bytes().to_vec()),
+        );
+        writer.write_record(&record).unwrap();
+    }
+}
+
+pub fn write_fai(path: &str) {
+    let mut faidx = Command::new("samtools")
+        .arg("faidx")
+        .arg(path)
+        .spawn()
+        .expect("Samtools faidx failed to start");
+    faidx.wait().unwrap();
+}
+
+pub fn write_bam(ref_path: &str, reads_path: &str, bam_path: &str) -> Result<()> {
+    let rg_id = uuid::Uuid::new_v4();
+
+    let mm2 = Command::new("minimap2")
+        .arg("-t")
+        .arg("128")
+        .arg("-ax")
+        .arg("map-ont")
+        .arg("-R")
+        .arg(format!(
+            "@RG\\tPL:ONTASM_PROM\\tID:ONTASM_{rg_id}\\tSM:{rg_id}\\tLB:ONTASM_NB_PROM"
+        ))
+        .arg(ref_path)
+        .arg(reads_path)
+        .stdout(Stdio::piped())
+        .stderr(Stdio::piped())
+        .spawn()
+        .expect("Minimap2 failed to start");
+
+    let view = Command::new("sambamba")
+        .arg("view")
+        .arg("-h")
+        .arg("-S")
+        .arg("-t")
+        .arg("20")
+        .arg("--format=bam")
+        .arg("/dev/stdin")
+        .stdin(Stdio::from(mm2.stdout.unwrap()))
+        .stdout(Stdio::piped())
+        .stderr(Stdio::piped())
+        .spawn()
+        .expect("Sambamba view failed to start");
+
+    let mut sort = Command::new("sambamba")
+        .arg("sort")
+        .arg("-t")
+        .arg("20")
+        .arg("/dev/stdin")
+        .arg("-o")
+        .arg(bam_path)
+        .stderr(Stdio::piped())
+        .stdin(Stdio::from(view.stdout.unwrap()))
+        .spawn()
+        .expect("Sambamba sort failed to start");
+
+    sort.wait().unwrap();
+    Ok(())
+}
+
+pub fn read_fasta(path: &str) -> Result<Vec<(String, Sequence)>> {
+    let mut reader = File::open(path)
+        .map(BufReader::new)
+        .map(fasta::Reader::new)?;
+
+    let mut res = Vec::new();
+    for result in reader.records() {
+        let record = result?;
+        let u = String::from_utf8(record.name().to_vec())?;
+        let s = record.sequence().to_owned();
+        res.push((u, s));
+    }
+
+    Ok(res)
+}
+
+// fn get_ext_paths(dir: &str, ext: &str) -> Result<Vec<PathBuf>> {
+//     let paths = std::fs::read_dir(dir)?
+//         // Filter out all those directory entries which couldn't be read
+//         .filter_map(|res| res.ok())
+//         // Map the directory entries to paths
+//         .map(|dir_entry| dir_entry.path())
+//         // Filter out all paths with extensions other than `csv`
+//         .filter_map(|path| {
+//             if path.extension().map_or(false, |xt| xt == ext) {
+//                 Some(path)
+//             } else {
+//                 None
+//             }
+//         })
+//         .collect::<Vec<_>>();
+//     Ok(paths)
+// }
+
+fn get_contigs_fa_paths(dir: &str) -> Result<Vec<PathBuf>> {
+    let pattern = format!("{}/**/*_flye.fa", dir);
+    let fa_paths: Vec<PathBuf> = glob::glob(&pattern)
+        .expect("Failed to read glob pattern")
+        .filter_map(Result::ok)
+        .collect();
+
+    Ok(fa_paths)
+}
+
+pub fn dot_graph(
+    graph: &StableGraph<String, String>,
+    way: &[NodeIndex],
+    value: &str,
+    color: &str,
+    erase: bool,
+) -> String {
+    let mut g = graph.clone();
+
+    // erase labels
+    if erase {
+        g.edge_weights_mut().for_each(|e| *e = "".to_string());
+    }
+
+    let mut labels = Vec::new();
+    for window in way.windows(2) {
+        let edge_id = g.find_edge(window[0], window[1]).unwrap();
+        let edge = g.edge_weight_mut(edge_id).unwrap();
+        let v = if !edge.is_empty() {
+            let mut v: Vec<&str> = edge.split(",").filter(|e| !e.is_empty()).collect();
+            v.push(value);
+            v.join(", ")
+        } else {
+            value.to_string()
+        };
+        labels.push(v.clone());
+        *edge = v;
+    }
+
+    if erase {
+        g.retain_edges(|g, i| g.edge_weight(i).unwrap().to_string() != "".to_string());
+        g.retain_nodes(|g, n| g.neighbors_undirected(n).count() > 0);
+    }
+
+    let mut dot = Dot::new(&g).to_string().replace("\\\"", "");
+
+    labels.sort_by_key(|b| std::cmp::Reverse(b.len()));
+    // labels.sort_by(|a, b| b.len().cmp(&a.len()));
+    for label in labels {
+        let label_str = format!("label = \"{label}\"");
+        dot = dot.replace(
+            &format!("{label_str} ]\n"),
+            &format!("{label_str}, color = \"{color}\" ]\n"),
+        );
+    }
+    dot
+}
+
+/// for non overlapping ways
+pub fn dot_graph_biall(
+    graph: &StableGraph<String, String>,
+    ways: &[Vec<NodeIndex>],
+    values: Vec<&str>,
+    colors: Vec<&str>,
+    erase: bool,
+) -> String {
+    let mut g = graph.clone();
+    // erase labels
+    if erase {
+        g.edge_weights_mut().for_each(|e| *e = "".to_string());
+    }
+
+    let mut labels = Vec::new();
+    for ((way, value), color) in ways.iter().zip(values.into_iter()).zip(colors.into_iter()) {
+        for window in way.windows(2) {
+            let edge_id = g.find_edge(window[0], window[1]).unwrap();
+            let edge = g.edge_weight_mut(edge_id).unwrap();
+            let v = if !edge.is_empty() {
+                let mut v: Vec<&str> = edge.split(",").filter(|e| !e.is_empty()).collect();
+                v.push(value);
+                v.join(", ")
+            } else {
+                value.to_string()
+            };
+            labels.push((v.clone(), color));
+            *edge = v;
+        }
+    }
+
+    // g.retain_edges(|g, i| g.edge_weight(i).unwrap().to_string() != "".to_string());
+    g.retain_edges(|g, i| !g.edge_weight(i).unwrap().is_empty());
+    g.retain_nodes(|g, n| g.neighbors_undirected(n).count() > 0);
+
+    let mut dot = Dot::new(&g).to_string().replace("\\\"", "");
+
+    labels.sort_by(|a, b| b.0.len().cmp(&a.0.len()));
+    for (label, color) in labels {
+        let label_str = format!("label = \"{label}\"");
+        dot = dot.replace(
+            &format!("{label_str} ]\n"),
+            &format!("{label_str}, color = \"{color}\" ]\n"),
+        );
+    }
+    dot
+}
+
+#[cfg(test)]
+mod tests {
+    use env_logger::Env;
+
+    use super::*;
+    use crate::{
+        genomic_graph::GenomicGraph,
+        // phase::{variants_phasing, write_phases_bed},
+    };
+
+    fn init() {
+        let _ = env_logger::Builder::from_env(Env::default().default_filter_or("info"))
+            .is_test(true)
+            .try_init();
+    }
+
+    #[test]
+    fn it_works() -> Result<()> {
+        let _ = env_logger::builder().is_test(true).try_init();
+        let contig_fa = "./data_test/contig_2.fa";
+        let aligner_url = "http://localhost:4444/align";
+
+        let mut genome = Genome::new();
+        let aligner = aligner_client::dist_align(aligner_url.to_string());
+
+        let sequences = read_fasta(contig_fa)?;
+        for (name, seq) in sequences {
+            genome.add_contig_from_seq(name.clone(), &seq.as_ref().to_vec(), &aligner)?;
+            let mut seqc: Vec<u8> = seq.complement().map(|e| e.unwrap()).collect();
+            seqc.reverse();
+            genome.add_contig_from_seq(format!("{name}_rev"), &seqc, &aligner)?;
+            println!("Sending");
+        }
+        genome.iter().for_each(|(_, c)| {
+            c.iter().for_each(|cont| {
+                println!("{}", cont.contig_ref.desc().unwrap());
+            });
+        });
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_graph() -> Result<()> {
+        init();
+
+        let case = "SALICETTO";
+        let chrom = vec!["chr10"];
+        info!("This record will be captured by `cargo test`");
+
+        let dir = format!("/data/longreads_basic_pipe/{case}/diag/asm_contis");
+
+        // Load from fasta in dir.
+        let genome = Genome::from_contigs_sequences(&dir)?;
+        genome.stats();
+        let mut genomic_graph = GenomicGraph::from_genome(&genome);
+
+        let sens = vec![true, false];
+        let pos = vec![0, i32::MAX];
+        let mut all_ways = Vec::new();
+        if chrom.len() > 1 {
+            (0..4).into_iter().for_each(|i| {
+                let start_pos = if i < 2 { 0 } else { i32::MAX };
+                let end_pos = pos[i % 2];
+
+                (0..4).into_iter().for_each(|i| {
+                    let start_sens = if i < 2 { true } else { false };
+                    let end_sens = sens[i % 2];
+                    (0..4).into_iter().for_each(|i| {
+                        let start_chr = if i < 2 { chrom[0] } else { chrom[1] };
+                        let end_chr = chrom[i % 2];
+                        let start = (start_sens, start_chr, start_pos);
+                        let end = (end_sens, end_chr, end_pos);
+
+                        let (oriented_graph, _integrated_graph, ways) =
+                            genomic_graph.ways(start, end);
+
+                        let dot = oriented_graph.dot_graph();
+                        println!("dot\n{dot}");
+
+                        for (_i, way) in ways.iter().enumerate() {
+                            let s = way
+                                .iter()
+                                .map(|(_, _, _, s)| s.to_string())
+                                .collect::<Vec<String>>()
+                                .join("");
+                            all_ways.push(s);
+                        }
+                    });
+                });
+            });
+        } else {
+            let start_chr = chrom[0];
+            let end_chr = chrom[0];
+            let start = (true, start_chr, 0);
+            let end = (true, end_chr, i32::MAX);
+
+            let (oriented_graph, _integrated_graph, ways) = genomic_graph.ways(start, end);
+
+            let dot = oriented_graph.dot_graph();
+            println!("dot\n{dot}");
+
+            for (_i, way) in ways.iter().enumerate() {
+                let s = way
+                    .iter()
+                    .map(|(_, _, _, s)| s.to_string())
+                    .collect::<Vec<String>>()
+                    .join("");
+                all_ways.push(s);
+            }
+        }
+
+        all_ways.dedup();
+        all_ways
+            .iter()
+            .enumerate()
+            .for_each(|(i, s)| println!("{i}.\t{s}"));
+
+        // let s = Dot::new(&integrated_graph).to_string().replace("\\\"", "");
+        // let x11_colors: Vec<String> = vec![
+        //     String::from("Red"),
+        //     String::from("Green"),
+        //     String::from("Blue"),
+        //     String::from("Cyan"),
+        //     String::from("Magenta"),
+        //     String::from("Yellow"),
+        //     String::from("DarkRed"),
+        //     String::from("DarkGreen"),
+        //     String::from("DarkBlue"),
+        //     String::from("DarkCyan"),
+        //     String::from("DarkMagenta"),
+        //     String::from("DarkYellow"),
+        //     String::from("LightRed"),
+        //     String::from("LightGreen"),
+        //     String::from("LightBlue"),
+        //     String::from("LightCyan"),
+        //     String::from("LightMagenta"),
+        //     String::from("LightYellow"),
+        //     String::from("Orange"),
+        //     String::from("Brown"),
+        //     String::from("Beige"),
+        // ];
+        // let mut s = s.clone();
+        // ways.iter().enumerate().for_each(|(i, _)| {
+        //     s = s.replace(
+        //         &format!("[ label = \"{}\" ]", i + 1),
+        //         &format!(
+        //             "[ label = \"{}\" color = \"{}\" ]",
+        //             i + 1,
+        //             x11_colors[i].to_string()
+        //         ),
+        //     );
+        // });
+        // println!("{s}");
+        //
+        // for (i, way) in ways.iter().enumerate() {
+        //     let s = way
+        //         .iter()
+        //         .map(|(_, _, _, s)| s.to_string())
+        //         .collect::<Vec<String>>()
+        //         .join("");
+        //     println!("{}.\t{s}", i + 1);
+        // }
+        Ok(())
+    }
+
+    #[test]
+    fn dir() {
+        init();
+        let id = "ROBIN";
+        info!("This record will be captured by `cargo test`");
+
+        let dir = format!("/data/longreads_basic_pipe/{id}/diag/scan/reads",);
+
+        // Load from fasta in dir.
+        let genome = Genome::from_contigs_sequences(&dir).unwrap();
+        genome.stats();
+        let mut res: Vec<String> = genome
+            .iter()
+            .flat_map(|(_s, chrom)| {
+                chrom.iter().filter_map(|c| c.hgvs())
+                // .map(|c| println!("{c}"))
+            })
+            .collect();
+        res.sort();
+        res.dedup();
+        res.iter().for_each(|s| println!("{s}"));
+
+        // println!("{genome:#?}");
+    }
+}

+ 144 - 0
gg.txt

@@ -0,0 +1,144 @@
+chr10:107109268delins[chr7:145519601] C
+chr10:26736222-26730664 C
+chr10:26739674-26735600 C
+chr10:27351549-26649493ins[chr10:26595398-26596849] C
+chr10:27352298-26595398 C
+chr10:45546831-45543496ins[chr10:45543496-45543596] C
+chr10:76379455delins[chr5:3312011] C
+
+chr11:118605063(ARCN1)delins[chr1:13776484(KAZN)] P
+chr11:118606511(ARCN1)delins[chr1:13775079(KAZN)] P
+
+chr11:22377207(ANOS)delins[chr4:146266192]  ~
+
+chr11:36723850-34067196 C
+
+chr13:100403067-109924898 ~
+
+chr13:45308135delins[chr1:35019801] C
+chr13:45680578-45685715 C
+chr13:64000762delins[chr10:60518083]  C
+chr14:100716155-100743032 C
+chr14:14905132-14897796ins[chr5:109844072-109846684]  C
+chr14:15484954-15474142ins[chrX:46917380-46917586]  C
+chr14:15484964-15474375ins[chrX:46917380-46917586]  C
+chr14:16506565-16518730 ~
+chr14:29157989-29170340 C
+chr14:29342536delins[chr19:9618039] C
+chr14:99355868-99346742ins[chr10:100720207-100724415] C
+chr14:99355910-99346742ins[chr5:142528037-142530165] C
+chr16:19895845-19863982 C
+chr16:38605562-38974738 A
+chr16:54421945-54411130ins[chr16:54414638-54415069] C
+chr16:94154618-29408429ins[chr9:35311091-35311192]  C
+chr16:94157052-29408429ins[chr9:35311091-35311192]  C
+chr16:94588094delins[chr6:169583755]  C
+chr17:30428706-30430886 C
+chr17:30430906-30431114 C
+chr17:58995435-59003319 C
+chr17:59003319-58996578 C
+chr17:8315295-8323741 C
+chr17:8324218-8315295 C
+chr17:8325125-8315295 C
+chr18:12366854delins[chr2:6679752]  C
+chr18:77037624-77032094ins[chrX:75624792-75627956]  C
+chr19:19861162-19866379 C
+chr19:44471462-44465447ins[chr14:28900368-28903524] A
+
+chr1:13776286(KAZN)delins[chr11:118606383(ARCN1)] P
+chr1:13776286(KAZN)delins[chr11:118606511(ARCN1)] P
+chr1:13777790(KAZN)delins[chr11:118606511(ARCN1)] P
+chr1:13782214(KAZN)delins[chr11:118606511(ARCN1)] P
+
+chr1:145169225-16408562 C
+chr1:196089493-196197388  C
+chr1:196090579-196197388  C
+chr1:196091057-196197388  C
+chr1:196093260-196197388  C
+chr1:196099126-196205964  C
+chr1:196099126-196207494  C
+chr1:196099126-196209875  C
+chr1:196099126-196212224  C
+chr1:196099126-196212244  C
+chr1:229048238-229064575  C
+chr1:28202794-28194693ins[chr12:11416321-11417598]  ~
+chr1:9770600delins[chr17:57890164]  ~
+chr20:10853949-10847560ins[chr20:10851413-10850795] C
+chr20:10856140-10848441ins[chr20:10850795-10851413] C
+chr20:10856142-10846846ins[chr20:10850795-10851413] C
+chr20:10856998-10846846ins[chr20:10850795-10851413] C
+chr20:25860456-26014478 C
+chr20:5795219delins[chr1:101136458] C
+chr22:17444201-17444202 C
+chr22:29131548delins[chr8:126658945]  C
+chr22:9604488delins[chr4:193480318] C
+chr4:120713772delins[chr18:77032160]  C
+chr5:102878603-102865889  C
+chr5:138214046-138200018
+chr5:152618771-152615722ins[chr11:95443426-95449448]
+chr5:154806312-154801831
+chr5:43339817delins[chr1:151051544]
+chr5:43340553delins[chr1:151051544]
+chr5:59208947-59191394
+chr5:59208963-59191394
+chr5:682879-785947
+chr5:684420-676950
+chr5:684813-676950
+chr5:686462-676957
+chr5:688932-682499
+chr5:756938-675752ins[chr5:784226-787012]
+chr6:105218336-105210596
+chr6:132904104delins[chr7:145494437]
+chr6:17278753-17279768
+chr6:31098970-31168710ins[chr1:161809392-161810849]
+chr6:31135571-31117778
+chr6:31135627-31117816
+chr6:32364490-32365469
+chr6:32364494-32365469
+chr6:32365848-32362678
+chr6:80012684-79988817
+chr7:144145216-143960804
+chr7:145557450delins[chr6:132908592]  C
+chr7:78286802-77801344  C
+chr7:78297405-77805796  C
+chr7:92836875-92836877ins[chr18:45149792-45149889]  C
+chr8:112192452-112185771ins[chr8:112191484-112192351] C
+chr8:115759392-115740540  ~
+chr8:115760010-115740540  ~
+chr8:29498283delins[chr17:38409177] C
+chr8:40021941-39999137  C
+chr8:40022005-39999137  C
+chr8:73128638-73161617  C
+chr8:73128638-73161650  C
+chr8:74604931-74605324  C
+chr8:74604931-74607038  C
+chr8:74604931-74609359  C
+chr8:74612406-74598416  C
+chr8:74612406-74601138  C
+chr8:84094806delins[chr22:17441875] C
+
+chr9:143065873(ABL1)delins[chr11:118602724(ARCN1)]  P
+chr9:143066898delins[chr11:118602724]
+chr9:143068241delins[chr11:118602724]
+chr9:143071450delins[chr11:118602724]
+chr9:143072905delins[chr11:118602724]
+
+chr9:144741377-144735173ins[chr7:149022627-149025601] C
+chr9:145176418-145164688  C
+chr9:145779321-144454159  C
+
+chr9:149540006(NRARP)delins[chr11:116499036(???)] P
+
+chr9:149543056-145338031
+chr9:149543056-145340121
+chr9:149543056-145340132
+chr9:149543088-145338031
+chr9:149543088-145340132
+chr9:23385004-23366669
+chr9:6393993-6392485
+chr9:92813733-92813737
+chrM:13697-13697
+chrM:5700-6869ins[chrM:0-16569]
+chrM:7885-12544
+chrM:8723-8723
+chrX:5800881delins[chr4:102821553]

+ 14 - 15
src/lib.rs

@@ -903,7 +903,7 @@ pub fn read_fasta(path: &str) -> Result<Vec<(String, Sequence)>> {
 // }
 
 fn get_contigs_fa_paths(dir: &str) -> Result<Vec<PathBuf>> {
-    let pattern = format!("{}/**/*_flye.fa", dir);
+    let pattern = format!("{}/**/*.fa", dir);
     let fa_paths: Vec<PathBuf> = glob::glob(&pattern)
         .expect("Failed to read glob pattern")
         .filter_map(Result::ok)
@@ -1178,24 +1178,23 @@ mod tests {
     fn dir() {
         init();
 
-        let id = "ROBIN";
-        let chrom = ["chr9"];
-        info!("This record will be captured by `cargo test`");
-
-        let dir = format!(
-            "/data/longreads_basic_pipe/{id}/diag/scan/reads/{}",
-            chrom[0]
-        );
+        // let dir = format!("/data/longreads_basic_pipe/{id}/diag/scan/reads",);
+        let dir = "/data/wtdbg2".to_string();
 
         // Load from fasta in dir.
         let genome = Genome::from_contigs_sequences(&dir).unwrap();
         genome.stats();
-        genome.iter().for_each(|(_s, chrom)| {
-            chrom
-                .iter()
-                .filter_map(|c| c.hgvs())
-                .for_each(|c| println!("{c}"))
-        });
+        let mut res: Vec<String> = genome
+            .iter()
+            .flat_map(|(_s, chrom)| {
+                chrom.iter().filter_map(|c| c.hgvs())
+                // .map(|c| println!("{c}"))
+            })
+            .collect();
+        res.sort();
+        res.dedup();
+        res.iter().for_each(|s| println!("{s}"));
+
         // println!("{genome:#?}");
     }
 }