|
|
@@ -92,6 +92,99 @@ impl Run for SamtoolsIndex {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+/// Wrapper around `samtools reheader` to inject a missing `SM` tag into `@RG` lines.
|
|
|
+///
|
|
|
+/// This is needed for ONT BAMs produced by dorado/MinKNOW where the `@RG` header
|
|
|
+/// may lack an `SM` field (e.g., when barcode is "unclassified"). GATK Mutect2
|
|
|
+/// requires `SM` on every `@RG` line and will crash without it.
|
|
|
+///
|
|
|
+/// The operation is **header-only** — no read data is rewritten, making it
|
|
|
+/// nearly instant even on large BAMs.
|
|
|
+///
|
|
|
+/// Produces a command of the form:
|
|
|
+///
|
|
|
+/// ```text
|
|
|
+/// bash -c '<bin> view -H <bam> | sed "s/^\(@RG.*\)/\1\tSM:<sample>/" | <bin> reheader --in-place - <bam>'
|
|
|
+/// ```
|
|
|
+///
|
|
|
+/// **Important**: After reheader, the BAM index (`.bai`) should be regenerated
|
|
|
+/// via [`SamtoolsIndex`] since header offsets may change.
|
|
|
+#[derive(Debug)]
|
|
|
+pub struct SamtoolsReheader {
|
|
|
+ /// Path to the `samtools` binary.
|
|
|
+ pub bin: String,
|
|
|
+ /// Path to the BAM file to modify in-place.
|
|
|
+ pub bam: String,
|
|
|
+ /// Sample name to inject as `SM:<sample>` in all `@RG` lines.
|
|
|
+ pub sample: String,
|
|
|
+ slurm: bool,
|
|
|
+}
|
|
|
+
|
|
|
+impl super::Command for SamtoolsReheader {
|
|
|
+ fn cmd(&self) -> String {
|
|
|
+ // Uses --in-place to avoid tmp file + rename dance.
|
|
|
+ // The sed appends SM:<sample> to every @RG line that doesn't already have one.
|
|
|
+ format!(
|
|
|
+ "bash -c '{bin} view -H {bam} \
|
|
|
+ | sed \"/^@RG/{{/SM:/!s/$/\\tSM:{sample}/}}\" \
|
|
|
+ | {bin} reheader --in-place - {bam}'",
|
|
|
+ bin = self.bin,
|
|
|
+ bam = self.bam,
|
|
|
+ sample = self.sample,
|
|
|
+ )
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl super::LocalRunner for SamtoolsReheader {}
|
|
|
+impl super::LocalBatchRunner for SamtoolsReheader {}
|
|
|
+
|
|
|
+impl super::SlurmRunner for SamtoolsReheader {
|
|
|
+ fn slurm_args(&self) -> Vec<String> {
|
|
|
+ SlurmParams {
|
|
|
+ job_name: Some("samtools_reheader".into()),
|
|
|
+ cpus_per_task: Some(1),
|
|
|
+ mem: Some("4G".into()),
|
|
|
+ partition: Some("shortq".into()),
|
|
|
+ gres: None,
|
|
|
+ }
|
|
|
+ .to_args()
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl super::SbatchRunner for SamtoolsReheader {
|
|
|
+ fn slurm_params(&self) -> SlurmParams {
|
|
|
+ SlurmParams {
|
|
|
+ job_name: Some("samtools_reheader".into()),
|
|
|
+ cpus_per_task: Some(1),
|
|
|
+ mem: Some("4G".into()),
|
|
|
+ partition: Some("shortq".into()),
|
|
|
+ gres: None,
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl SamtoolsReheader {
|
|
|
+ pub fn from_config(config: &Config, bam: &str, sample: &str) -> Self {
|
|
|
+ Self {
|
|
|
+ bin: config.align.samtools_bin.clone(),
|
|
|
+ bam: bam.to_string(),
|
|
|
+ sample: sample.to_string(),
|
|
|
+ slurm: config.slurm_runner,
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl Run for SamtoolsReheader {
|
|
|
+ fn run(&mut self) -> anyhow::Result<()> {
|
|
|
+ if self.slurm {
|
|
|
+ let _output = SlurmRunner::exec(self)?;
|
|
|
+ } else {
|
|
|
+ let _output = LocalRunner::exec(self)?;
|
|
|
+ }
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
/// Wrapper around a `samtools merge` invocation used to append one BAM into
|
|
|
/// another while preserving read group (RG) uniqueness.
|
|
|
///
|