|
|
@@ -18,7 +18,13 @@ use anyhow::Context;
|
|
|
use itertools::Itertools;
|
|
|
use log::info;
|
|
|
use rayon::prelude::*;
|
|
|
-use std::{fs, io::BufRead, path::Path, str::FromStr};
|
|
|
+use serde::{Deserialize, Serialize};
|
|
|
+use std::{
|
|
|
+ fs::{self, File},
|
|
|
+ io::{self, BufRead},
|
|
|
+ path::Path,
|
|
|
+ str::FromStr,
|
|
|
+};
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
pub struct Savana {
|
|
|
@@ -423,3 +429,97 @@ impl SavanaReadCounts {
|
|
|
.collect()
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+#[derive(Debug, Serialize, Deserialize)]
|
|
|
+pub struct SavanaCN {
|
|
|
+ pub segments: Vec<CNSegment>,
|
|
|
+}
|
|
|
+
|
|
|
+#[derive(Debug, Serialize, Deserialize)]
|
|
|
+pub struct CNSegment {
|
|
|
+ pub chromosome: String,
|
|
|
+ pub start: u64,
|
|
|
+ pub end: u64,
|
|
|
+ pub segment_id: String,
|
|
|
+ pub bin_count: u32,
|
|
|
+ pub sum_of_bin_lengths: u64,
|
|
|
+ pub weight: f64,
|
|
|
+ pub copy_number: f64,
|
|
|
+ pub minor_allele_copy_number: Option<f64>,
|
|
|
+ pub mean_baf: Option<f64>,
|
|
|
+ pub no_het_snps: u32,
|
|
|
+}
|
|
|
+
|
|
|
+impl SavanaCN {
|
|
|
+ pub fn parse_file<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
|
|
|
+ let file = File::open(&path).context("Failed to open the file")?;
|
|
|
+ let reader = io::BufReader::new(file);
|
|
|
+
|
|
|
+ let mut segments = Vec::new();
|
|
|
+
|
|
|
+ for (index, line) in reader.lines().enumerate() {
|
|
|
+ let line = line.context("Failed to read line from file")?;
|
|
|
+
|
|
|
+ // Skip header line
|
|
|
+ if index == 0 {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ let parts: Vec<&str> = line.split('\t').collect();
|
|
|
+ if parts.len() != 11 {
|
|
|
+ return Err(anyhow::anyhow!(
|
|
|
+ "Invalid number of columns at line {} (expected 11, got {})",
|
|
|
+ index + 1,
|
|
|
+ parts.len()
|
|
|
+ ));
|
|
|
+ }
|
|
|
+
|
|
|
+ // Helper closure for parsing with context
|
|
|
+ let parse_field = |field: &str, name: &str, line_num: usize| -> anyhow::Result<f64> {
|
|
|
+ field
|
|
|
+ .parse()
|
|
|
+ .context(format!("Failed to parse {name} at line {line_num}"))
|
|
|
+ };
|
|
|
+
|
|
|
+ let parse_field_u32 = |field: &str, name: &str, line_num: usize| -> anyhow::Result<u32> {
|
|
|
+ field
|
|
|
+ .parse()
|
|
|
+ .context(format!("Failed to parse {name} at line {line_num}"))
|
|
|
+ };
|
|
|
+
|
|
|
+ let line_num = index + 1;
|
|
|
+
|
|
|
+ let segment = CNSegment {
|
|
|
+ chromosome: parts[0].to_string(),
|
|
|
+ start: parse_field_u32(parts[1], "start", line_num)? as u64,
|
|
|
+ end: parse_field_u32(parts[2], "end", line_num)? as u64,
|
|
|
+ segment_id: parts[3].to_string(),
|
|
|
+ bin_count: parse_field_u32(parts[4], "bin_count", line_num)?,
|
|
|
+ sum_of_bin_lengths: parse_field_u32(parts[5], "sum_of_bin_lengths", line_num)?
|
|
|
+ as u64,
|
|
|
+ weight: parse_field(parts[6], "weight", line_num)?,
|
|
|
+ copy_number: parse_field(parts[7], "copy_number", line_num)?,
|
|
|
+ minor_allele_copy_number: if parts[8].is_empty() {
|
|
|
+ None
|
|
|
+ } else {
|
|
|
+ Some(parse_field(parts[8], "minor_allele_copy_number", line_num)?)
|
|
|
+ },
|
|
|
+ mean_baf: if parts[9].is_empty() {
|
|
|
+ None
|
|
|
+ } else {
|
|
|
+ Some(parse_field(parts[9], "mean_baf", line_num)?)
|
|
|
+ },
|
|
|
+ no_het_snps: if parts[10].is_empty() {
|
|
|
+ 0
|
|
|
+ } else {
|
|
|
+ parse_field_u32(parts[10], "no_het_snps", line_num)?
|
|
|
+ },
|
|
|
+ };
|
|
|
+
|
|
|
+ segments.push(segment);
|
|
|
+ }
|
|
|
+
|
|
|
+ Ok(Self { segments })
|
|
|
+ }
|
|
|
+
|
|
|
+}
|