|
@@ -2,18 +2,43 @@ use anyhow::{Context, Ok, Result};
|
|
|
use serde::{Deserialize, Serialize};
|
|
use serde::{Deserialize, Serialize};
|
|
|
use std::str::FromStr;
|
|
use std::str::FromStr;
|
|
|
|
|
|
|
|
|
|
+/// Represents a simplified version of an NCBI GFF (General Feature Format) record.
|
|
|
|
|
+///
|
|
|
|
|
+/// This struct encapsulates key attributes from a GFF record, focusing on
|
|
|
|
|
+/// feature type and various annotations.
|
|
|
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
|
|
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
|
|
|
pub struct NCBIGFF {
|
|
pub struct NCBIGFF {
|
|
|
|
|
+ /// The type of the feature (e.g., "gene", "exon", "CDS")
|
|
|
pub feature: String,
|
|
pub feature: String,
|
|
|
|
|
+ /// The name of the feature, if available
|
|
|
pub name: Option<String>,
|
|
pub name: Option<String>,
|
|
|
|
|
+ /// The standard name of the feature, if available
|
|
|
pub standard_name: Option<String>,
|
|
pub standard_name: Option<String>,
|
|
|
|
|
+ /// The function of the feature, if specified
|
|
|
pub function: Option<String>,
|
|
pub function: Option<String>,
|
|
|
|
|
+ /// The experiment associated with the feature, if any
|
|
|
pub experiment: Option<String>,
|
|
pub experiment: Option<String>,
|
|
|
|
|
+ /// Any additional notes about the feature
|
|
|
pub note: Option<String>,
|
|
pub note: Option<String>,
|
|
|
|
|
+ /// The regulatory class of the feature, if applicable
|
|
|
pub regulatory_class: Option<String>,
|
|
pub regulatory_class: Option<String>,
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
impl From<noodles_gff::RecordBuf> for NCBIGFF {
|
|
impl From<noodles_gff::RecordBuf> for NCBIGFF {
|
|
|
|
|
+ /// Converts a noodles_gff::RecordBuf into an NCBIGFF struct.
|
|
|
|
|
+ ///
|
|
|
|
|
+ /// This implementation extracts relevant information from a GFF record
|
|
|
|
|
+ /// and populates the NCBIGFF struct fields.
|
|
|
|
|
+ ///
|
|
|
|
|
+ /// # Arguments
|
|
|
|
|
+ /// * `r` - A noodles_gff::RecordBuf representing a GFF record
|
|
|
|
|
+ ///
|
|
|
|
|
+ /// # Returns
|
|
|
|
|
+ /// An NCBIGFF struct populated with data from the input RecordBuf
|
|
|
|
|
+ ///
|
|
|
|
|
+ /// # Note
|
|
|
|
|
+ /// This conversion handles both string and array attributes, joining array
|
|
|
|
|
+ /// values with a space if necessary.
|
|
|
fn from(r: noodles_gff::RecordBuf) -> Self {
|
|
fn from(r: noodles_gff::RecordBuf) -> Self {
|
|
|
let attr = r.attributes();
|
|
let attr = r.attributes();
|
|
|
|
|
|
|
@@ -36,15 +61,54 @@ impl From<noodles_gff::RecordBuf> for NCBIGFF {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+/// Represents an NCBI (National Center for Biotechnology Information) accession number.
|
|
|
|
|
+///
|
|
|
|
|
+/// This struct encapsulates the components of an NCBI accession, including its prefix,
|
|
|
|
|
+/// number, and version.
|
|
|
#[derive(Debug, Clone)]
|
|
#[derive(Debug, Clone)]
|
|
|
pub struct NCBIAcc {
|
|
pub struct NCBIAcc {
|
|
|
|
|
+ /// The prefix of the accession (e.g., "NM", "NR", "XM")
|
|
|
pub prefix: String,
|
|
pub prefix: String,
|
|
|
|
|
+ /// The numeric part of the accession
|
|
|
pub number: u64,
|
|
pub number: u64,
|
|
|
|
|
+ /// The version number of the accession
|
|
|
pub version: f32,
|
|
pub version: f32,
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
impl FromStr for NCBIAcc {
|
|
impl FromStr for NCBIAcc {
|
|
|
type Err = anyhow::Error;
|
|
type Err = anyhow::Error;
|
|
|
|
|
+ /// Parses a string into an NCBIAcc struct.
|
|
|
|
|
+ ///
|
|
|
|
|
+ /// This method handles various formats of NCBI accessions, including:
|
|
|
|
|
+ /// - Standard format: "PREFIX_NUMBER.VERSION" (e.g., "NM_001234.5")
|
|
|
|
|
+ /// - Unassigned transcripts: "unassigned_transcript_NUMBER_VERSION"
|
|
|
|
|
+ /// - Accessions without versions
|
|
|
|
|
+ /// - Accessions without numbers (treated as having max u64 number and version 0.0)
|
|
|
|
|
+ ///
|
|
|
|
|
+ /// # Arguments
|
|
|
|
|
+ /// * `s` - A string slice representing the NCBI accession
|
|
|
|
|
+ ///
|
|
|
|
|
+ /// # Returns
|
|
|
|
|
+ /// * `Ok(NCBIAcc)` if parsing is successful
|
|
|
|
|
+ /// * `Err(anyhow::Error)` if parsing fails
|
|
|
|
|
+ ///
|
|
|
|
|
+ /// # Examples
|
|
|
|
|
+ /// ```
|
|
|
|
|
+ /// let acc1 = NCBIAcc::from_str("NM_001234.5").unwrap();
|
|
|
|
|
+ /// assert_eq!(acc1.prefix, "NM");
|
|
|
|
|
+ /// assert_eq!(acc1.number, 1234);
|
|
|
|
|
+ /// assert_eq!(acc1.version, 5.0);
|
|
|
|
|
+ ///
|
|
|
|
|
+ /// let acc2 = NCBIAcc::from_str("unassigned_transcript_56789_1").unwrap();
|
|
|
|
|
+ /// assert_eq!(acc2.prefix, "unassigned_transcript");
|
|
|
|
|
+ /// assert_eq!(acc2.number, 56789);
|
|
|
|
|
+ /// assert_eq!(acc2.version, 1.0);
|
|
|
|
|
+ ///
|
|
|
|
|
+ /// let acc3 = NCBIAcc::from_str("XR_123456").unwrap();
|
|
|
|
|
+ /// assert_eq!(acc3.prefix, "XR_123456");
|
|
|
|
|
+ /// assert_eq!(acc3.number, u64::MAX);
|
|
|
|
|
+ /// assert_eq!(acc3.version, 0.0);
|
|
|
|
|
+ /// ```
|
|
|
fn from_str(s: &str) -> Result<Self> {
|
|
fn from_str(s: &str) -> Result<Self> {
|
|
|
if s.contains("unassigned_transcript_") {
|
|
if s.contains("unassigned_transcript_") {
|
|
|
let s = s.replace("unassigned_transcript_", "");
|
|
let s = s.replace("unassigned_transcript_", "");
|