Thomas 1 jaar geleden
bovenliggende
commit
d361ec6c1d
5 gewijzigde bestanden met toevoegingen van 297 en 226 verwijderingen
  1. 196 195
      Cargo.lock
  2. 64 0
      src/annotation/ncbi.rs
  3. 2 2
      src/annotation/vep.rs
  4. 3 29
      src/commands/dorado.rs
  5. 32 0
      src/runners.rs

File diff suppressed because it is too large
+ 196 - 195
Cargo.lock


+ 64 - 0
src/annotation/ncbi.rs

@@ -2,18 +2,43 @@ use anyhow::{Context, Ok, Result};
 use serde::{Deserialize, Serialize};
 use std::str::FromStr;
 
+/// Represents a simplified version of an NCBI GFF (General Feature Format) record.
+///
+/// This struct encapsulates key attributes from a GFF record, focusing on
+/// feature type and various annotations.
 #[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
 pub struct NCBIGFF {
+    /// The type of the feature (e.g., "gene", "exon", "CDS")
     pub feature: String,
+    /// The name of the feature, if available
     pub name: Option<String>,
+    /// The standard name of the feature, if available
     pub standard_name: Option<String>,
+    /// The function of the feature, if specified
     pub function: Option<String>,
+    /// The experiment associated with the feature, if any
     pub experiment: Option<String>,
+    /// Any additional notes about the feature
     pub note: Option<String>,
+    /// The regulatory class of the feature, if applicable
     pub regulatory_class: Option<String>,
 }
 
 impl From<noodles_gff::RecordBuf> for NCBIGFF {
+    /// Converts a noodles_gff::RecordBuf into an NCBIGFF struct.
+    ///
+    /// This implementation extracts relevant information from a GFF record
+    /// and populates the NCBIGFF struct fields.
+    ///
+    /// # Arguments
+    /// * `r` - A noodles_gff::RecordBuf representing a GFF record
+    ///
+    /// # Returns
+    /// An NCBIGFF struct populated with data from the input RecordBuf
+    ///
+    /// # Note
+    /// This conversion handles both string and array attributes, joining array
+    /// values with a space if necessary.
     fn from(r: noodles_gff::RecordBuf) -> Self {
         let attr = r.attributes();
 
@@ -36,15 +61,54 @@ impl From<noodles_gff::RecordBuf> for NCBIGFF {
     }
 }
 
+/// Represents an NCBI (National Center for Biotechnology Information) accession number.
+///
+/// This struct encapsulates the components of an NCBI accession, including its prefix,
+/// number, and version.
 #[derive(Debug, Clone)]
 pub struct NCBIAcc {
+    /// The prefix of the accession (e.g., "NM", "NR", "XM")
     pub prefix: String,
+    /// The numeric part of the accession
     pub number: u64,
+    /// The version number of the accession
     pub version: f32,
 }
 
 impl FromStr for NCBIAcc {
     type Err = anyhow::Error;
+    /// Parses a string into an NCBIAcc struct.
+    ///
+    /// This method handles various formats of NCBI accessions, including:
+    /// - Standard format: "PREFIX_NUMBER.VERSION" (e.g., "NM_001234.5")
+    /// - Unassigned transcripts: "unassigned_transcript_NUMBER_VERSION"
+    /// - Accessions without versions
+    /// - Accessions without numbers (treated as having max u64 number and version 0.0)
+    ///
+    /// # Arguments
+    /// * `s` - A string slice representing the NCBI accession
+    ///
+    /// # Returns
+    /// * `Ok(NCBIAcc)` if parsing is successful
+    /// * `Err(anyhow::Error)` if parsing fails
+    ///
+    /// # Examples
+    /// ```
+    /// let acc1 = NCBIAcc::from_str("NM_001234.5").unwrap();
+    /// assert_eq!(acc1.prefix, "NM");
+    /// assert_eq!(acc1.number, 1234);
+    /// assert_eq!(acc1.version, 5.0);
+    ///
+    /// let acc2 = NCBIAcc::from_str("unassigned_transcript_56789_1").unwrap();
+    /// assert_eq!(acc2.prefix, "unassigned_transcript");
+    /// assert_eq!(acc2.number, 56789);
+    /// assert_eq!(acc2.version, 1.0);
+    ///
+    /// let acc3 = NCBIAcc::from_str("XR_123456").unwrap();
+    /// assert_eq!(acc3.prefix, "XR_123456");
+    /// assert_eq!(acc3.number, u64::MAX);
+    /// assert_eq!(acc3.version, 0.0);
+    /// ```
     fn from_str(s: &str) -> Result<Self> {
         if s.contains("unassigned_transcript_") {
             let s = s.replace("unassigned_transcript_", "");

+ 2 - 2
src/annotation/vep.rs

@@ -133,7 +133,7 @@ pub struct VEP {
 /// potential impact on gene function.
 ///
 /// For more information, see:
-/// https://ensembl.org/info/genome/variation/prediction/predicted_data.html
+/// <https://ensembl.org/info/genome/variation/prediction/predicted_data.html>
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
 pub enum VepConsequence {
     /// Complete destruction of a transcript
@@ -477,7 +477,7 @@ impl TryFrom<&VepLine> for VEP {
     ///
     /// # Behavior
     /// - Converts "-" strings to None for optional fields
-    /// - Parses the consequence field into a Vec<VepConsequence>
+    /// - Parses the consequence field into a `Vec<VepConsequence>`
     /// - Parses the extra field into a VEPExtra struct
     ///
     /// # Example

+ 3 - 29
src/commands/dorado.rs

@@ -258,6 +258,7 @@ impl Dorado {
         fs::remove_file(tmp_original)?;
         fs::remove_file(tmp_original_i)?;
         fs::remove_file(bam)?;
+
         self.index()?;
         Ok(())
     }
@@ -303,7 +304,6 @@ impl Dorado {
         info!("Basecalling ✅");
 
         // Demux the temporary bam file
-
         let tmp_demux_dir = format!("{tmp_dir}/demuxed");
         fs::create_dir(&tmp_demux_dir)?;
 
@@ -315,41 +315,15 @@ impl Dorado {
         info!("Running: {pipe}");
         let pipe_cmd = cmd!("bash", "-c", pipe);
         pipe_cmd.run()?;
-        //
-        //
-        // duct::cmd!(
-        //     &config.align.dorado_bin,
-        //     "demux",
-        //     "--output-dir",
-        //     &tmp_demux_dir,
-        //     "--kit-name",
-        //     &sequencing_kit,
-        //     &tmp_dir,
-        // )
-        // .run()?;
+
         info!("Demux ✅");
-        //
         for case in cases.iter() {
             let barcode = case.barcode.replace("NB", "");
             let bam = find_unique_file(
                 &tmp_demux_dir,
                 &format!("{sequencing_kit}_barcode{}.bam", barcode),
             )?;
-            //
-            //     // Trim
-            //     let trimmed_bam = format!(
-            //         "{tmp_demux_dir}/{sequencing_kit}_barcode{}_trimmed.bam",
-            //         barcode
-            //     );
-            //     let pipe = format!(
-            //         "{} trim --sequencing-kit {sequencing_kit} {bam} | samtools view -h -@ {} -b /dev/stdin -o {trimmed_bam}",
-            //         config.align.dorado_bin, &config.align.samtools_view_threads
-            //     );
-            //
-            //     info!("Running: {pipe}");
-            //     cmd!("bash", "-c", pipe).run()?;
-            //     info!("Trim ✅");
-            //
+
             // Align
             let aligned_bam = format!(
                 "{tmp_demux_dir}/{sequencing_kit}_barcode{}_aligned.bam",

+ 32 - 0
src/runners.rs

@@ -189,16 +189,31 @@ impl Log for DockerRun {
     }
 }
 
+/// Represents a command to be run, with facilities for execution, monitoring, and logging.
 pub struct CommandRun {
+    /// The binary or command to be executed
     pub bin: String,
+    /// The arguments to be passed to the command
     pub args: Vec<String>,
+    /// The child process, if the command has been started
     pub child: Option<Child>,
+    /// Sender for the command's output channel
     pub tx: mpsc::Sender<(String, String)>,
+    /// Receiver for the command's output channel
     pub rx: mpsc::Receiver<(String, String)>,
+    /// Accumulated log of the command's execution
     pub log: String,
 }
 
 impl CommandRun {
+    /// Creates a new CommandRun instance.
+    ///
+    /// # Arguments
+    /// * `bin` - The binary or command to be executed
+    /// * `args` - The arguments to be passed to the command
+    ///
+    /// # Returns
+    /// A new CommandRun instance
     pub fn new(bin: &str, args: &[&str]) -> Self {
         let (tx, rx) = mpsc::channel();
 
@@ -214,6 +229,13 @@ impl CommandRun {
 }
 
 impl Run for CommandRun {
+    /// Runs the command.
+    ///
+    /// This method spawns the child process and sets up threads to capture
+    /// its stdout and stderr output.
+    ///
+    /// # Returns
+    /// `Ok(())` if the command was successfully started, otherwise an error
     fn run(&mut self) -> anyhow::Result<()> {
         let info = format!("Running command: {} {}", &self.bin, &self.args.join(" "));
         info!("{info}");
@@ -256,6 +278,12 @@ impl Run for CommandRun {
 }
 
 impl Wait for CommandRun {
+    /// Waits for the command to complete.
+    ///
+    /// This method monitors the command's output and waits for the process to exit.
+    ///
+    /// # Returns
+    /// `Ok(())` if the command completed successfully, otherwise an error
     fn wait(&mut self) -> anyhow::Result<()> {
         if let Some(child) = &mut self.child {
             loop {
@@ -287,6 +315,10 @@ impl Wait for CommandRun {
 }
 
 impl Log for CommandRun {
+    /// Returns the accumulated log of the command's execution.
+    ///
+    /// # Returns
+    /// A string containing the command's execution log
     fn log(&self) -> String {
         self.log.clone()
     }

Some files were not shown because too many files changed in this diff