|
|
@@ -796,37 +796,55 @@ pub fn read_sm_tag_or_inject(
|
|
|
.with_context(|| format!("Failed to open BAM: {bam_path}"))?;
|
|
|
let header = bam::Header::from_template(reader.header());
|
|
|
let header_text = String::from_utf8_lossy(&header.to_bytes()).to_string();
|
|
|
+
|
|
|
+ let mut first_sm: Option<String> = None;
|
|
|
+ let mut all_have_sm = true;
|
|
|
+
|
|
|
for line in header_text.lines() {
|
|
|
if line.starts_with("@RG") {
|
|
|
- for field in line.split('\t') {
|
|
|
- if let Some(sm) = field.strip_prefix("SM:") {
|
|
|
- return Ok(sm.to_string());
|
|
|
- }
|
|
|
+ let sm = line.split('\t')
|
|
|
+ .find_map(|f| f.strip_prefix("SM:"))
|
|
|
+ .map(|s| s.to_string());
|
|
|
+ if sm.is_none() {
|
|
|
+ all_have_sm = false;
|
|
|
+ } else if first_sm.is_none() {
|
|
|
+ first_sm = sm;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- // Preserve original mtime before modifying header
|
|
|
- let original_mtime = filetime::FileTime::from_last_modification_time(
|
|
|
- &std::fs::metadata(bam_path)
|
|
|
- .with_context(|| format!("Failed to stat BAM: {bam_path}"))?,
|
|
|
- );
|
|
|
-
|
|
|
- // SM missing (dorado/MinKNOW unclassified barcode) — inject it
|
|
|
- info!("No @RG SM tag in {bam_path}, injecting SM:{fallback_sample}");
|
|
|
- let mut reheader = SamtoolsReheader::from_config(config, bam_path, fallback_sample);
|
|
|
- reheader
|
|
|
- .run()
|
|
|
- .with_context(|| format!("Failed to inject SM into {bam_path}"))?;
|
|
|
-
|
|
|
- let mut index = SamtoolsIndex::from_config(config, bam_path);
|
|
|
- index
|
|
|
- .run()
|
|
|
- .with_context(|| format!("Failed to re-index {bam_path}"))?;
|
|
|
-
|
|
|
- // Restore original mtime so is_file_older() doesn't re-trigger downstream callers
|
|
|
- filetime::set_file_mtime(bam_path, original_mtime)
|
|
|
- .with_context(|| format!("Failed to restore mtime on {bam_path}"))?;
|
|
|
-
|
|
|
- Ok(fallback_sample.to_string())
|
|
|
+ if !all_have_sm {
|
|
|
+ // At least one @RG line missing SM — inject into those
|
|
|
+ info!("Some @RG lines in {bam_path} lack SM tag, injecting SM:{fallback_sample}");
|
|
|
+
|
|
|
+ let original_mtime = filetime::FileTime::from_last_modification_time(
|
|
|
+ &std::fs::metadata(bam_path)?,
|
|
|
+ );
|
|
|
+
|
|
|
+ let mut reheader = SamtoolsReheader::from_config(config, bam_path, fallback_sample);
|
|
|
+ reheader.run()?;
|
|
|
+ let mut index = SamtoolsIndex::from_config(config, bam_path);
|
|
|
+ index.run()?;
|
|
|
+
|
|
|
+ filetime::set_file_mtime(bam_path, original_mtime)?;
|
|
|
+
|
|
|
+ return Ok(first_sm.unwrap_or_else(|| fallback_sample.to_string()));
|
|
|
+ }
|
|
|
+
|
|
|
+ first_sm.context(format!("No @RG lines found in {bam_path}"))
|
|
|
+}
|
|
|
+
|
|
|
+#[cfg(test)]
|
|
|
+mod tests {
|
|
|
+ use super::*;
|
|
|
+ use crate::helpers::test_init;
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn sm_tag() -> anyhow::Result<()> {
|
|
|
+ test_init();
|
|
|
+ let config = Config::default();
|
|
|
+
|
|
|
+ read_sm_tag_or_inject(&config.tumoral_bam("CHAHA"), "CHAHA_diag", &config)?;
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
}
|