|
|
@@ -354,6 +354,7 @@ impl fmt::Display for Infos {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+#[allow(non_camel_case_types)]
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
|
|
pub enum Info {
|
|
|
Empty,
|
|
|
@@ -369,9 +370,42 @@ pub enum Info {
|
|
|
RGU(u32),
|
|
|
RTU(u32),
|
|
|
SVTYPE(SVType),
|
|
|
+ MATEID(String),
|
|
|
+ NORMAL_READ_SUPPORT(u32),
|
|
|
+ TUMOUR_READ_SUPPORT(u32),
|
|
|
+ NORMAL_ALN_SUPPORT(u32),
|
|
|
+ TUMOUR_ALN_SUPPORT(u32),
|
|
|
SVLEN(i32),
|
|
|
+ TUMOUR_DP_BEFORE(Vec<u32>),
|
|
|
+ TUMOUR_DP_AT(Vec<u32>),
|
|
|
+ TUMOUR_DP_AFTER(Vec<u32>),
|
|
|
+ NORMAL_DP_BEFORE(Vec<u32>),
|
|
|
+ NORMAL_DP_AT(Vec<u32>),
|
|
|
+ NORMAL_DP_AFTER(Vec<u32>),
|
|
|
+ TUMOUR_AF(Vec<f32>),
|
|
|
+ NORMAL_AF(Vec<f32>),
|
|
|
+ BP_NOTATION(String),
|
|
|
+ SOURCE(String),
|
|
|
+ CLUSTERED_READS_TUMOUR(u32),
|
|
|
+ CLUSTERED_READS_NORMAL(u32),
|
|
|
+ TUMOUR_ALT_HP(Vec<u32>),
|
|
|
+ TUMOUR_PS(Vec<String>),
|
|
|
+ NORMAL_ALT_HP(Vec<u32>),
|
|
|
+ NORMAL_PS(Vec<String>),
|
|
|
+ TUMOUR_TOTAL_HP_AT(Vec<u32>),
|
|
|
+ NORMAL_TOTAL_HP_AT(Vec<u32>),
|
|
|
+ ORIGIN_STARTS_STD_DEV(f32),
|
|
|
+ ORIGIN_MAPQ_MEAN(f32),
|
|
|
+ ORIGIN_EVENT_SIZE_STD_DEV(f32),
|
|
|
+ ORIGIN_EVENT_SIZE_MEDIAN(f32),
|
|
|
+ ORIGIN_EVENT_SIZE_MEAN(f32),
|
|
|
+ END_STARTS_STD_DEV(f32),
|
|
|
+ END_MAPQ_MEAN(f32),
|
|
|
+ END_EVENT_SIZE_STD_DEV(f32),
|
|
|
+ END_EVENT_SIZE_MEDIAN(f32),
|
|
|
+ END_EVENT_SIZE_MEAN(f32),
|
|
|
+ CLASS(String),
|
|
|
END(u32),
|
|
|
- MATEID(String),
|
|
|
SVINSLEN(u32),
|
|
|
SVINSSEQ(String),
|
|
|
}
|
|
|
@@ -380,70 +414,63 @@ impl FromStr for Info {
|
|
|
type Err = anyhow::Error;
|
|
|
|
|
|
fn from_str(s: &str) -> anyhow::Result<Self> {
|
|
|
- if s.contains("=") {
|
|
|
+ if s.contains('=') {
|
|
|
let (key, value) = s
|
|
|
.split_once('=')
|
|
|
- .context(format!("Can't split with `=` {s}"))?;
|
|
|
+ .context(format!("Can't split with `=` in string: {s}"))?;
|
|
|
+
|
|
|
Ok(match key {
|
|
|
- "FAU" => Info::FAU(
|
|
|
- value
|
|
|
- .parse()
|
|
|
- .context(format!("Can't parse into u32: {value}"))?,
|
|
|
- ),
|
|
|
- "FCU" => Info::FCU(
|
|
|
- value
|
|
|
- .parse()
|
|
|
- .context(format!("Can't parse into u32: {value}"))?,
|
|
|
- ),
|
|
|
- "FGU" => Info::FGU(
|
|
|
- value
|
|
|
- .parse()
|
|
|
- .context(format!("Can't parse into u32: {value}"))?,
|
|
|
- ),
|
|
|
- "FTU" => Info::FTU(
|
|
|
- value
|
|
|
- .parse()
|
|
|
- .context(format!("Can't parse into u32: {value}"))?,
|
|
|
- ),
|
|
|
- "RAU" => Info::RAU(
|
|
|
- value
|
|
|
- .parse()
|
|
|
- .context(format!("Can't parse into u32: {value}"))?,
|
|
|
- ),
|
|
|
- "RCU" => Info::RCU(
|
|
|
- value
|
|
|
- .parse()
|
|
|
- .context(format!("Can't parse into u32: {value}"))?,
|
|
|
- ),
|
|
|
- "RGU" => Info::RGU(
|
|
|
- value
|
|
|
- .parse()
|
|
|
- .context(format!("Can't parse into u32: {value}"))?,
|
|
|
- ),
|
|
|
- "RTU" => Info::RTU(
|
|
|
- value
|
|
|
- .parse()
|
|
|
- .context(format!("Can't parse into u32: {value}"))?,
|
|
|
- ),
|
|
|
+ "FAU" => Info::FAU(parse_value(value, key)?),
|
|
|
+ "FCU" => Info::FCU(parse_value(value, key)?),
|
|
|
+ "FGU" => Info::FGU(parse_value(value, key)?),
|
|
|
+ "FTU" => Info::FTU(parse_value(value, key)?),
|
|
|
+ "RAU" => Info::RAU(parse_value(value, key)?),
|
|
|
+ "RCU" => Info::RCU(parse_value(value, key)?),
|
|
|
+ "RGU" => Info::RGU(parse_value(value, key)?),
|
|
|
+ "RTU" => Info::RTU(parse_value(value, key)?),
|
|
|
+ "SVLEN" => Info::SVLEN(parse_value(value, key)?),
|
|
|
+ "END" => Info::END(parse_value(value, key)?),
|
|
|
+ "SVINSLEN" => Info::SVINSLEN(parse_value(value, key)?),
|
|
|
"SVTYPE" => Info::SVTYPE(value.parse()?),
|
|
|
- "SVLEN" => Info::SVLEN(
|
|
|
- value
|
|
|
- .parse()
|
|
|
- .context(format!("Can't parse into u32: {value}"))?,
|
|
|
- ),
|
|
|
- "END" => Info::END(
|
|
|
- value
|
|
|
- .parse()
|
|
|
- .context(format!("Can't parse into u32: {value}"))?,
|
|
|
- ),
|
|
|
"MATEID" => Info::MATEID(value.to_string()),
|
|
|
- "SVINSLEN" => Info::SVINSLEN(
|
|
|
- value
|
|
|
- .parse()
|
|
|
- .context(format!("Can't parse into u32: {value}"))?,
|
|
|
- ),
|
|
|
+ "NORMAL_READ_SUPPORT" => Info::NORMAL_READ_SUPPORT(parse_value(value, key)?),
|
|
|
+ "TUMOUR_READ_SUPPORT" => Info::TUMOUR_READ_SUPPORT(parse_value(value, key)?),
|
|
|
+ "NORMAL_ALN_SUPPORT" => Info::NORMAL_ALN_SUPPORT(parse_value(value, key)?),
|
|
|
+ "TUMOUR_ALN_SUPPORT" => Info::TUMOUR_ALN_SUPPORT(parse_value(value, key)?),
|
|
|
"SVINSSEQ" => Info::SVINSSEQ(value.to_string()),
|
|
|
-
|
|
|
+ "TUMOUR_DP_BEFORE" => Info::TUMOUR_DP_BEFORE(parse_vec_value(value, key)?),
|
|
|
+ "TUMOUR_DP_AT" => Info::TUMOUR_DP_AT(parse_vec_value(value, key)?),
|
|
|
+ "TUMOUR_DP_AFTER" => Info::TUMOUR_DP_AFTER(parse_vec_value(value, key)?),
|
|
|
+ "NORMAL_DP_BEFORE" => Info::NORMAL_DP_BEFORE(parse_vec_value(value, key)?),
|
|
|
+ "NORMAL_DP_AT" => Info::NORMAL_DP_AT(parse_vec_value(value, key)?),
|
|
|
+ "NORMAL_DP_AFTER" => Info::NORMAL_DP_AFTER(parse_vec_value(value, key)?),
|
|
|
+ "TUMOUR_AF" => Info::TUMOUR_AF(parse_vec_value(value, key)?),
|
|
|
+ "NORMAL_AF" => Info::NORMAL_AF(parse_vec_value(value, key)?),
|
|
|
+ "BP_NOTATION" => Info::BP_NOTATION(value.to_string()),
|
|
|
+ "SOURCE" => Info::SOURCE(value.to_string()),
|
|
|
+ "CLUSTERED_READS_TUMOUR" => Info::CLUSTERED_READS_TUMOUR(parse_value(value, key)?),
|
|
|
+ "CLUSTERED_READS_NORMAL" => Info::CLUSTERED_READS_NORMAL(parse_value(value, key)?),
|
|
|
+ "TUMOUR_ALT_HP" => Info::TUMOUR_ALT_HP(parse_vec_value(value, key)?),
|
|
|
+ "TUMOUR_PS" => Info::TUMOUR_PS(parse_vec_value(value, key)?),
|
|
|
+ "NORMAL_ALT_HP" => Info::NORMAL_ALT_HP(parse_vec_value(value, key)?),
|
|
|
+ "NORMAL_PS" => Info::NORMAL_PS(parse_vec_value(value, key)?),
|
|
|
+ "TUMOUR_TOTAL_HP_AT" => Info::TUMOUR_TOTAL_HP_AT(parse_vec_value(value, key)?),
|
|
|
+ "NORMAL_TOTAL_HP_AT" => Info::NORMAL_TOTAL_HP_AT(parse_vec_value(value, key)?),
|
|
|
+ "ORIGIN_STARTS_STD_DEV" => Info::ORIGIN_STARTS_STD_DEV(parse_value(value, key)?),
|
|
|
+ "ORIGIN_MAPQ_MEAN" => Info::ORIGIN_MAPQ_MEAN(parse_value(value, key)?),
|
|
|
+ "ORIGIN_EVENT_SIZE_STD_DEV" => {
|
|
|
+ Info::ORIGIN_EVENT_SIZE_STD_DEV(parse_value(value, key)?)
|
|
|
+ }
|
|
|
+ "ORIGIN_EVENT_SIZE_MEDIAN" => {
|
|
|
+ Info::ORIGIN_EVENT_SIZE_MEDIAN(parse_value(value, key)?)
|
|
|
+ }
|
|
|
+ "ORIGIN_EVENT_SIZE_MEAN" => Info::ORIGIN_EVENT_SIZE_MEAN(parse_value(value, key)?),
|
|
|
+ "END_STARTS_STD_DEV" => Info::END_STARTS_STD_DEV(parse_value(value, key)?),
|
|
|
+ "END_MAPQ_MEAN" => Info::END_MAPQ_MEAN(parse_value(value, key)?),
|
|
|
+ "END_EVENT_SIZE_STD_DEV" => Info::END_EVENT_SIZE_STD_DEV(parse_value(value, key)?),
|
|
|
+ "END_EVENT_SIZE_MEDIAN" => Info::END_EVENT_SIZE_MEDIAN(parse_value(value, key)?),
|
|
|
+ "END_EVENT_SIZE_MEAN" => Info::END_EVENT_SIZE_MEAN(parse_value(value, key)?),
|
|
|
+ "CLASS" => Info::CLASS(value.to_string()),
|
|
|
_ => Info::Empty,
|
|
|
})
|
|
|
} else {
|
|
|
@@ -451,7 +478,6 @@ impl FromStr for Info {
|
|
|
"H" => Info::H,
|
|
|
"F" => Info::F,
|
|
|
"P" => Info::P,
|
|
|
-
|
|
|
_ => Info::Empty,
|
|
|
})
|
|
|
}
|
|
|
@@ -479,10 +505,50 @@ impl fmt::Display for Info {
|
|
|
Info::MATEID(v) => write!(f, "MATEID={v}"),
|
|
|
Info::SVINSLEN(v) => write!(f, "SVINSLEN={v}"),
|
|
|
Info::SVINSSEQ(v) => write!(f, "SVINSSEQ={v}"),
|
|
|
+ Info::NORMAL_READ_SUPPORT(v) => write!(f, "NORMAL_READ_SUPPORT={v}"),
|
|
|
+ Info::TUMOUR_READ_SUPPORT(v) => write!(f, "TUMOUR_READ_SUPPORT={v}"),
|
|
|
+ Info::NORMAL_ALN_SUPPORT(v) => write!(f, "NORMAL_ALN_SUPPORT={v}"),
|
|
|
+ Info::TUMOUR_ALN_SUPPORT(v) => write!(f, "TUMOUR_ALN_SUPPORT={v}"),
|
|
|
+ Info::TUMOUR_DP_BEFORE(v) => write!(f, "TUMOUR_DP_BEFORE={}", concat_numbers(v)),
|
|
|
+ Info::TUMOUR_DP_AT(v) => write!(f, "TUMOUR_DP_AT={}", concat_numbers(v)),
|
|
|
+ Info::TUMOUR_DP_AFTER(v) => write!(f, "TUMOUR_DP_AFTER={}", concat_numbers(v)),
|
|
|
+ Info::NORMAL_DP_BEFORE(v) => write!(f, "NORMAL_DP_BEFORE={}", concat_numbers(v)),
|
|
|
+ Info::NORMAL_DP_AT(v) => write!(f, "NORMAL_DP_AT={}", concat_numbers(v)),
|
|
|
+ Info::NORMAL_DP_AFTER(v) => write!(f, "NORMAL_DP_AFTER={}", concat_numbers(v)),
|
|
|
+ Info::TUMOUR_AF(v) => write!(f, "TUMOUR_AF={}", concat_numbers(v)),
|
|
|
+ Info::NORMAL_AF(v) => write!(f, "NORMAL_AF={}", concat_numbers(v)),
|
|
|
+ Info::BP_NOTATION(v) => write!(f, "BP_NOTATION={v}"),
|
|
|
+ Info::SOURCE(v) => write!(f, "SOURCE={v}"),
|
|
|
+ Info::CLUSTERED_READS_TUMOUR(v) => write!(f, "CLUSTERED_READS_TUMOUR={v}"),
|
|
|
+ Info::CLUSTERED_READS_NORMAL(v) => write!(f, "CLUSTERED_READS_NORMAL={v}"),
|
|
|
+ Info::TUMOUR_ALT_HP(v) => write!(f, "TUMOUR_ALT_HP={}", concat_numbers(v)),
|
|
|
+ Info::TUMOUR_PS(v) => write!(f, "TUMOUR_PS={}", v.join(",")),
|
|
|
+ Info::NORMAL_ALT_HP(v) => write!(f, "NORMAL_ALT_HP={}", concat_numbers(v)),
|
|
|
+ Info::NORMAL_PS(v) => write!(f, "NORMAL_PS={}", v.join(",")),
|
|
|
+ Info::TUMOUR_TOTAL_HP_AT(v) => write!(f, "TUMOUR_TOTAL_HP_AT={}", concat_numbers(v)),
|
|
|
+ Info::NORMAL_TOTAL_HP_AT(v) => write!(f, "NORMAL_TOTAL_HP_AT={}", concat_numbers(v)),
|
|
|
+ Info::ORIGIN_STARTS_STD_DEV(v) => write!(f, "ORIGIN_STARTS_STD_DEV={v}"),
|
|
|
+ Info::ORIGIN_MAPQ_MEAN(v) => write!(f, "ORIGIN_MAPQ_MEAN={v}"),
|
|
|
+ Info::ORIGIN_EVENT_SIZE_STD_DEV(v) => write!(f, "ORIGIN_EVENT_SIZE_STD_DEV={v}"),
|
|
|
+ Info::ORIGIN_EVENT_SIZE_MEDIAN(v) => write!(f, "ORIGIN_EVENT_SIZE_MEDIAN={v}"),
|
|
|
+ Info::ORIGIN_EVENT_SIZE_MEAN(v) => write!(f, "ORIGIN_EVENT_SIZE_MEAN={v}"),
|
|
|
+ Info::END_STARTS_STD_DEV(v) => write!(f, "END_STARTS_STD_DEV={v}"),
|
|
|
+ Info::END_MAPQ_MEAN(v) => write!(f, "END_MAPQ_MEAN={v}"),
|
|
|
+ Info::END_EVENT_SIZE_STD_DEV(v) => write!(f, "END_EVENT_SIZE_STD_DEV={v}"),
|
|
|
+ Info::END_EVENT_SIZE_MEDIAN(v) => write!(f, "END_EVENT_SIZE_MEDIAN={v}"),
|
|
|
+ Info::END_EVENT_SIZE_MEAN(v) => write!(f, "END_EVENT_SIZE_MEAN={v}"),
|
|
|
+ Info::CLASS(v) => write!(f, "CLASS={v}"),
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+pub fn concat_numbers<T: ToString>(v: &[T]) -> String {
|
|
|
+ v.iter()
|
|
|
+ .map(|n| n.to_string())
|
|
|
+ .collect::<Vec<String>>()
|
|
|
+ .join(",")
|
|
|
+}
|
|
|
+
|
|
|
/// Format
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
|
|
pub enum Format {
|
|
|
@@ -495,8 +561,9 @@ pub enum Format {
|
|
|
PL(Vec<u32>),
|
|
|
|
|
|
// Clairs
|
|
|
+ // when format begins with N: normal
|
|
|
AF(f32),
|
|
|
- NAF(u32),
|
|
|
+ NAF(f32), // DP(u32),
|
|
|
NDP(u32),
|
|
|
NAD(Vec<u32>),
|
|
|
AU(u32),
|
|
|
@@ -558,28 +625,57 @@ impl TryFrom<(&str, &str)> for Format {
|
|
|
type Error = anyhow::Error;
|
|
|
|
|
|
fn try_from((key, value): (&str, &str)) -> anyhow::Result<Self> {
|
|
|
- Ok(match key {
|
|
|
+ let format = match key {
|
|
|
"GT" => Format::GT(value.to_string()),
|
|
|
- "GQ" => Format::GQ(value.parse().context(format!("Can't parse GQ: {value}"))?),
|
|
|
- "DP" => Format::DP(value.parse().context(format!("Can't parse DP: {value}"))?),
|
|
|
- "AD" => Format::AD(
|
|
|
- value
|
|
|
- .split(',')
|
|
|
- .map(|e| e.parse().context("Failed to parse AD"))
|
|
|
- .collect::<anyhow::Result<Vec<_>>>()?,
|
|
|
- ),
|
|
|
- "VAF" => Format::VAF(value.parse().context(format!("Can't parse VAF: {value}"))?),
|
|
|
- "PL" => Format::PL(
|
|
|
- value
|
|
|
- .split(',')
|
|
|
- .map(|e| e.parse().context("Failed to parse AD"))
|
|
|
- .collect::<anyhow::Result<Vec<_>>>()?,
|
|
|
- ),
|
|
|
- "TR" => Format::TR(value.parse()?),
|
|
|
- "VR" => Format::TR(value.parse()?),
|
|
|
+ "GQ" => Format::GQ(parse_value(value, key)?),
|
|
|
+ "DP" => Format::DP(parse_value(value, key)?),
|
|
|
+ "AD" => Format::AD(parse_vec_value(value, key)?),
|
|
|
+ "VAF" => Format::VAF(parse_value(value, key)?),
|
|
|
+ "AF" => Format::AF(parse_value(value, key)?),
|
|
|
+ "NAF" => Format::NAF(parse_value(value, key)?),
|
|
|
+ "NDP" => Format::NDP(parse_value(value, key)?),
|
|
|
+ "NAD" => Format::NAD(parse_vec_value(value, key)?),
|
|
|
+ "AU" => Format::AU(parse_value(value, key)?),
|
|
|
+ "CU" => Format::CU(parse_value(value, key)?),
|
|
|
+ "GU" => Format::GU(parse_value(value, key)?),
|
|
|
+ "TU" => Format::TU(parse_value(value, key)?),
|
|
|
+ "NAU" => Format::NAU(parse_value(value, key)?),
|
|
|
+ "NCU" => Format::NCU(parse_value(value, key)?),
|
|
|
+ "NGU" => Format::NGU(parse_value(value, key)?),
|
|
|
+ "NTU" => Format::NTU(parse_value(value, key)?),
|
|
|
+ "PL" => Format::PL(parse_vec_value(value, key)?),
|
|
|
+ "TR" => Format::TR(parse_value(value, key)?),
|
|
|
+ "VR" => Format::VR(parse_value(value, key)?),
|
|
|
_ => Format::Other((key.to_string(), value.to_string())),
|
|
|
+ };
|
|
|
+ Ok(format)
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// Helper function to parse a single value (DeepSeek)
|
|
|
+fn parse_value<T: std::str::FromStr>(value: &str, key: &str) -> anyhow::Result<T>
|
|
|
+where
|
|
|
+ T::Err: std::fmt::Debug,
|
|
|
+{
|
|
|
+ value
|
|
|
+ .parse()
|
|
|
+ .map_err(|e| anyhow::anyhow!("{:?}", e)) // Convert the error to `anyhow::Error`
|
|
|
+ .context(format!("Can't parse {}: {}", key, value)) // Add context
|
|
|
+}
|
|
|
+
|
|
|
+// Helper function to parse comma-separated values (DeepSeek)
|
|
|
+fn parse_vec_value<T: std::str::FromStr>(value: &str, key: &str) -> anyhow::Result<Vec<T>>
|
|
|
+where
|
|
|
+ T::Err: std::fmt::Debug,
|
|
|
+{
|
|
|
+ value
|
|
|
+ .split(',')
|
|
|
+ .map(|e| {
|
|
|
+ e.parse()
|
|
|
+ .map_err(|e| anyhow::anyhow!("{:?}", e)) // Convert the error to `anyhow::Error`
|
|
|
+ .context(format!("Failed to parse {}: {}", key, e)) // Add context
|
|
|
})
|
|
|
- }
|
|
|
+ .collect()
|
|
|
}
|
|
|
|
|
|
impl From<Format> for (String, String) {
|