Thomas 1 year ago
parent
commit
9896eb24f4
1 changed files with 121 additions and 147 deletions
  1. 121 147
      src/lib.rs

+ 121 - 147
src/lib.rs

@@ -3,9 +3,7 @@ use std::{
     io::{Read, Seek, SeekFrom},
 };
 
-use arrow::array::{
-    ArrayRef, Int16Array, StringArray, TimestampMillisecondArray, UInt16Array,
-};
+use arrow::array::{ArrayRef, Int16Array, StringArray, TimestampMillisecondArray, UInt16Array};
 use arrow::{array::RecordBatch, ipc::reader::FileReader};
 use chrono::TimeZone;
 use chrono::{DateTime, Utc};
@@ -74,171 +72,147 @@ impl Pod5Info {
         let mut system_name = String::new();
         let mut system_type = String::new();
 
-        // Update variables with actual values
-        // acquisition_id = "acquisition_123".to_string();
-        // acquisition_start_time = Utc::now();
-        // adc_max = 32767;
-        // adc_min = -32768;
-        // experiment_name = "Experiment XYZ".to_string();
-        // flow_cell_id = "FC123456".to_string();
-        // flow_cell_product_code = "FCPROD123".to_string();
-        // protocol_name = "Protocol ABC".to_string();
-        // protocol_run_id = "protocol_run_456".to_string();
-        // protocol_start_time = Utc::now();
-        // sample_id = "sample_789".to_string();
-        // sample_rate = 44100;
-        // sequencing_kit = "SEQKIT123".to_string();
-        // sequencer_position = "Position A1".to_string();
-        // sequencer_position_type = "Type B".to_string();
-        // software = "Software v1.0".to_string();
-        // system_name = "System Name".to_string();
-        // system_type = "System Type".to_string();
-        //
         if let Some(contents) = footer.contents() {
             for content in contents.iter() {
-                match content.content_type() {
-                    ContentType::RunInfoTable => {
-                        // println!("{content:#?}");
-                        let batch = read_arrow_table(
-                            file_path,
-                            content.offset() as u64,
-                            content.length() as u64,
-                        )
-                        .unwrap();
-                        let schema = batch[0].schema();
-                        for column in 0..batch[0].num_columns() {
-                            let array: ArrayRef = batch[0].column(column).clone();
-
-                            // Print column name and values
-                            let column_name = schema.field(column).name().to_string();
-                            // println!("Column: {}", column_name);
+                if let ContentType::RunInfoTable = content.content_type() {
+                    // println!("{content:#?}");
+                    let batch = read_arrow_table(
+                        file_path,
+                        content.offset() as u64,
+                        content.length() as u64,
+                    )
+                    .unwrap();
+                    let schema = batch[0].schema();
+                    for column in 0..batch[0].num_columns() {
+                        let array: ArrayRef = batch[0].column(column).clone();
 
-                            // Match the type of the array to extract values
-                            match array.data_type() {
-                                arrow::datatypes::DataType::Int16 => {
-                                    let int_array =
-                                        array.as_any().downcast_ref::<Int16Array>().unwrap();
-                                    for i in 0..int_array.len() {
-                                        // println!("{}: i16,", column_name);
-                                        match column_name.as_str() {
-                                            "adc_max" => adc_max = int_array.value(i),
-                                            "adc_min" => adc_min = int_array.value(i),
-                                            _ => (),
-                                        }
+                        // Print column name and values
+                        let column_name = schema.field(column).name().to_string();
+                        // println!("Column: {}", column_name);
 
-                                        // println!("{}", int_array.value(i));
+                        // Match the type of the array to extract values
+                        match array.data_type() {
+                            arrow::datatypes::DataType::Int16 => {
+                                let int_array =
+                                    array.as_any().downcast_ref::<Int16Array>().unwrap();
+                                for i in 0..int_array.len() {
+                                    // println!("{}: i16,", column_name);
+                                    match column_name.as_str() {
+                                        "adc_max" => adc_max = int_array.value(i),
+                                        "adc_min" => adc_min = int_array.value(i),
+                                        _ => (),
                                     }
+
+                                    // println!("{}", int_array.value(i));
                                 }
-                                arrow::datatypes::DataType::UInt16 => {
-                                    let int_array =
-                                        array.as_any().downcast_ref::<UInt16Array>().unwrap();
-                                    for i in 0..int_array.len() {
-                                        // println!("{}: u16,", column_name);
-                                        match column_name.as_str() {
-                                            "sample_rate" => sample_rate = int_array.value(i),
-                                            _ => (),
-                                        }
+                            }
+                            arrow::datatypes::DataType::UInt16 => {
+                                let int_array =
+                                    array.as_any().downcast_ref::<UInt16Array>().unwrap();
+                                for i in 0..int_array.len() {
+                                    // println!("{}: u16,", column_name);
+                                    if let "sample_rate" = column_name.as_str() {
+                                        sample_rate = int_array.value(i)
                                     }
                                 }
+                            }
 
-                                // arrow::datatypes::DataType::Int32 => {
-                                //     let int_array =
-                                //         array.as_any().downcast_ref::<Int32Array>().unwrap();
-                                //     for i in 0..int_array.len() {
-                                //         println!("{}: i32,", column_name);
-                                //
-                                //         // println!("{}", int_array.value(i));
-                                //     }
-                                // }
-                                // arrow::datatypes::DataType::UInt32 => {
-                                //     let int_array =
-                                //         array.as_any().downcast_ref::<UInt32Array>().unwrap();
-                                //     for i in 0..int_array.len() {
-                                //         println!("{}: u32,", column_name);
-                                //
-                                //         // println!("{}", int_array.value(i));
-                                //     }
-                                // }
-                                // arrow::datatypes::DataType::Float64 => {
-                                //     let float_array =
-                                //         array.as_any().downcast_ref::<Float64Array>().unwrap();
-                                //     for i in 0..float_array.len() {
-                                //         println!("{}: f64,", column_name);
-                                //
-                                //         // println!("{}", float_array.value(i));
-                                //     }
-                                // }
-                                arrow::datatypes::DataType::Utf8 => {
-                                    let string_array =
-                                        array.as_any().downcast_ref::<StringArray>().unwrap();
-                                    let string_array: Vec<String> = string_array
-                                        .iter()
-                                        .flat_map(|v| match v {
-                                            Some(v) => vec![v.to_string()],
-                                            None => vec![],
-                                        })
-                                        .collect();
+                            // arrow::datatypes::DataType::Int32 => {
+                            //     let int_array =
+                            //         array.as_any().downcast_ref::<Int32Array>().unwrap();
+                            //     for i in 0..int_array.len() {
+                            //         println!("{}: i32,", column_name);
+                            //
+                            //         // println!("{}", int_array.value(i));
+                            //     }
+                            // }
+                            // arrow::datatypes::DataType::UInt32 => {
+                            //     let int_array =
+                            //         array.as_any().downcast_ref::<UInt32Array>().unwrap();
+                            //     for i in 0..int_array.len() {
+                            //         println!("{}: u32,", column_name);
+                            //
+                            //         // println!("{}", int_array.value(i));
+                            //     }
+                            // }
+                            // arrow::datatypes::DataType::Float64 => {
+                            //     let float_array =
+                            //         array.as_any().downcast_ref::<Float64Array>().unwrap();
+                            //     for i in 0..float_array.len() {
+                            //         println!("{}: f64,", column_name);
+                            //
+                            //         // println!("{}", float_array.value(i));
+                            //     }
+                            // }
+                            arrow::datatypes::DataType::Utf8 => {
+                                let string_array =
+                                    array.as_any().downcast_ref::<StringArray>().unwrap();
+                                let string_array: Vec<String> = string_array
+                                    .iter()
+                                    .flat_map(|v| match v {
+                                        Some(v) => vec![v.to_string()],
+                                        None => vec![],
+                                    })
+                                    .collect();
 
-                                    let value = string_array.join(" ");
+                                let value = string_array.join(" ");
 
-                                    match column_name.as_str() {
-                                        "acquisition_id" => acquisition_id = value,
-                                        "experiment_name" => experiment_name = value,
-                                        "flow_cell_id" => flow_cell_id = value,
-                                        "flow_cell_product_code" => flow_cell_product_code = value,
-                                        "protocol_name" => protocol_name = value,
-                                        "protocol_run_id" => protocol_run_id = value,
-                                        "sample_id" => sample_id = value,
-                                        "sequencing_kit" => sequencing_kit = value,
-                                        "sequencer_position" => sequencer_position = value,
-                                        "sequencer_position_type" => {
-                                            sequencer_position_type = value
-                                        }
-                                        "software" => software = value,
-                                        "system_name" => system_name = value,
-                                        "system_type" => system_type = value,
-                                        _ => (),
+                                match column_name.as_str() {
+                                    "acquisition_id" => acquisition_id = value,
+                                    "experiment_name" => experiment_name = value,
+                                    "flow_cell_id" => flow_cell_id = value,
+                                    "flow_cell_product_code" => flow_cell_product_code = value,
+                                    "protocol_name" => protocol_name = value,
+                                    "protocol_run_id" => protocol_run_id = value,
+                                    "sample_id" => sample_id = value,
+                                    "sequencing_kit" => sequencing_kit = value,
+                                    "sequencer_position" => sequencer_position = value,
+                                    "sequencer_position_type" => {
+                                        sequencer_position_type = value
                                     }
-                                    // println!("{}: String,", column_name);
-                                    // println!("{}", string_array.join(" "));
+                                    "software" => software = value,
+                                    "system_name" => system_name = value,
+                                    "system_type" => system_type = value,
+                                    _ => (),
                                 }
-                                arrow::datatypes::DataType::Timestamp(
-                                    arrow::datatypes::TimeUnit::Millisecond,
-                                    Some(timezone),
-                                ) => {
-                                    if &timezone.to_string() == "UTC" {
-                                        let timestamp_array = array
-                                            .as_any()
-                                            .downcast_ref::<TimestampMillisecondArray>()
-                                            .unwrap();
-                                        for i in 0..timestamp_array.len() {
-                                            let timestamp = timestamp_array.value(i);
-                                            let datetime: DateTime<Utc> =
-                                                Utc.timestamp_millis_opt(timestamp).unwrap();
-                                            // println!("{}: DateTime<Utc>,", column_name);
+                                // println!("{}: String,", column_name);
+                                // println!("{}", string_array.join(" "));
+                            }
+                            arrow::datatypes::DataType::Timestamp(
+                                arrow::datatypes::TimeUnit::Millisecond,
+                                Some(timezone),
+                            ) => {
+                                if &timezone.to_string() == "UTC" {
+                                    let timestamp_array = array
+                                        .as_any()
+                                        .downcast_ref::<TimestampMillisecondArray>()
+                                        .unwrap();
+                                    for i in 0..timestamp_array.len() {
+                                        let timestamp = timestamp_array.value(i);
+                                        let datetime: DateTime<Utc> =
+                                            Utc.timestamp_millis_opt(timestamp).unwrap();
+                                        // println!("{}: DateTime<Utc>,", column_name);
 
-                                            match column_name.as_str() {
-                                                "acquisition_start_time" => {
-                                                    acquisition_start_time = datetime
-                                                }
-                                                "protocol_start_time" => {
-                                                    protocol_start_time = datetime
-                                                }
-                                                _ => (),
+                                        match column_name.as_str() {
+                                            "acquisition_start_time" => {
+                                                acquisition_start_time = datetime
                                             }
-
-                                            // println!("{}", datetime.to_rfc3339());
+                                            "protocol_start_time" => {
+                                                protocol_start_time = datetime
+                                            }
+                                            _ => (),
                                         }
+
+                                        // println!("{}", datetime.to_rfc3339());
                                     }
                                 }
+                            }
 
-                                _ => {
-                                    // println!("Unsupported data type: {:?}", array.data_type());
-                                }
+                            _ => {
+                                // println!("Unsupported data type: {:?}", array.data_type());
                             }
                         }
                     }
-                    _ => (),
                 }
             }
         }
@@ -298,7 +272,7 @@ mod tests {
 
     #[test]
     fn it_works() {
-        let file_path = "/data/run_data/20240620-CL/SAU-MRD-NB12_AUB-DIAG-NB13_PAR-DIAG-NB14/20240620_1532_1D_PAW61519_8071e9f4/pod5/PAW61519_8071e9f4_709b3710_0.pod5";
+        let file_path = "/data/run_data/20240326-CL/RICCO-MRD-NB03_MOREAU-DIAG-NB04/20240326_1355_1C_PAU37401_75b4c39d/pod5_pass/barcode03/PAU37401_pass_barcode03_75b4c39d_6af082bf_0.pod5";
         let r = Pod5Info::from_pod5(file_path);
         println!("{r:#?}");
     }