Thomas 3 недель назад
Родитель
Сommit
fd1dc2c8df
3 измененных файлов с 57 добавлено и 33 удалено
  1. 5 1
      src/callers/gatk.rs
  2. 7 5
      src/commands/samtools.rs
  3. 45 27
      src/io/bam.rs

+ 5 - 1
src/callers/gatk.rs

@@ -293,6 +293,7 @@ impl JobCommand for Mutect2 {
 
         // Normal sample name must match @RG SM tag in normal BAM header.
         let sample_name_normal = &self.normal_sample;
+        let germline = self.config.constit_vcf(&self.id);
 
         let bind_flags = singularity_bind_flags([
             &self.config.normal_bam(&self.id),
@@ -302,6 +303,7 @@ impl JobCommand for Mutect2 {
             &self.log_dir,
             &self.config.tmp_dir,
             &self.bed_path.display().to_string(),
+            &germline,
         ]);
 
         // Three-step pipeline chained inside a single Singularity exec.
@@ -316,6 +318,8 @@ impl JobCommand for Mutect2 {
                 --input {normal_bam} \
                 --normal-sample {normal_sample} \
                 --intervals {bed} \
+                --callable-depth 6 \
+                --germline-resource \
                 --native-pair-hmm-threads {threads} \
                 --f1r2-tar-gz /output/f1r2.tar.gz \
                 --output /output/raw.vcf.gz \
@@ -858,6 +862,6 @@ mod tests {
     fn mutect2_run() -> anyhow::Result<()> {
         test_init();
         let config = Config::default();
-        run_mutect2_chunked("DUMCO", &config, 50)
+        run_mutect2_chunked("CHALO", &config, 50)
     }
 }

+ 7 - 5
src/commands/samtools.rs

@@ -6,7 +6,7 @@ use std::{
 };
 
 use anyhow::Context;
-use log::info;
+use log::{debug, info};
 use uuid::Uuid;
 
 use crate::{
@@ -83,6 +83,7 @@ impl SamtoolsIndex {
 
 impl Run for SamtoolsIndex {
     fn run(&mut self) -> anyhow::Result<()> {
+        debug!("Indexing BAM file with samtools.");
         if self.slurm {
             let _output = SlurmRunner::exec(self)?;
         } else {
@@ -122,12 +123,13 @@ pub struct SamtoolsReheader {
 
 impl super::Command for SamtoolsReheader {
     fn cmd(&self) -> String {
-        // Uses --in-place to avoid tmp file + rename dance.
-        // The sed appends SM:<sample> to every @RG line that doesn't already have one.
+        // No bash -c wrapper — the runner already provides one.
+        // No --in-place — fails when new header is larger (adding SM tag).
         format!(
-            "bash -c '{bin} view -H {bam} \
+            "{bin} view -H {bam} \
              | sed \"/^@RG/{{/SM:/!s/$/\\tSM:{sample}/}}\" \
-             | {bin} reheader --in-place - {bam}'",
+             | {bin} reheader - {bam} > {bam}.reheader.tmp \
+             && mv {bam}.reheader.tmp {bam}",
             bin = self.bin,
             bam = self.bam,
             sample = self.sample,

+ 45 - 27
src/io/bam.rs

@@ -796,37 +796,55 @@ pub fn read_sm_tag_or_inject(
         .with_context(|| format!("Failed to open BAM: {bam_path}"))?;
     let header = bam::Header::from_template(reader.header());
     let header_text = String::from_utf8_lossy(&header.to_bytes()).to_string();
+
+    let mut first_sm: Option<String> = None;
+    let mut all_have_sm = true;
+
     for line in header_text.lines() {
         if line.starts_with("@RG") {
-            for field in line.split('\t') {
-                if let Some(sm) = field.strip_prefix("SM:") {
-                    return Ok(sm.to_string());
-                }
+            let sm = line.split('\t')
+                .find_map(|f| f.strip_prefix("SM:"))
+                .map(|s| s.to_string());
+            if sm.is_none() {
+                all_have_sm = false;
+            } else if first_sm.is_none() {
+                first_sm = sm;
             }
         }
     }
 
-    // Preserve original mtime before modifying header
-    let original_mtime = filetime::FileTime::from_last_modification_time(
-        &std::fs::metadata(bam_path)
-            .with_context(|| format!("Failed to stat BAM: {bam_path}"))?,
-    );
-
-    // SM missing (dorado/MinKNOW unclassified barcode) — inject it
-    info!("No @RG SM tag in {bam_path}, injecting SM:{fallback_sample}");
-    let mut reheader = SamtoolsReheader::from_config(config, bam_path, fallback_sample);
-    reheader
-        .run()
-        .with_context(|| format!("Failed to inject SM into {bam_path}"))?;
-
-    let mut index = SamtoolsIndex::from_config(config, bam_path);
-    index
-        .run()
-        .with_context(|| format!("Failed to re-index {bam_path}"))?;
-
-    // Restore original mtime so is_file_older() doesn't re-trigger downstream callers
-    filetime::set_file_mtime(bam_path, original_mtime)
-        .with_context(|| format!("Failed to restore mtime on {bam_path}"))?;
-
-    Ok(fallback_sample.to_string())
+    if !all_have_sm {
+        // At least one @RG line missing SM — inject into those
+        info!("Some @RG lines in {bam_path} lack SM tag, injecting SM:{fallback_sample}");
+
+        let original_mtime = filetime::FileTime::from_last_modification_time(
+            &std::fs::metadata(bam_path)?,
+        );
+
+        let mut reheader = SamtoolsReheader::from_config(config, bam_path, fallback_sample);
+        reheader.run()?;
+        let mut index = SamtoolsIndex::from_config(config, bam_path);
+        index.run()?;
+
+        filetime::set_file_mtime(bam_path, original_mtime)?;
+
+        return Ok(first_sm.unwrap_or_else(|| fallback_sample.to_string()));
+    }
+
+    first_sm.context(format!("No @RG lines found in {bam_path}"))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::helpers::test_init;
+
+    #[test]
+    fn sm_tag() -> anyhow::Result<()> {
+        test_init();
+        let config = Config::default();
+
+        read_sm_tag_or_inject(&config.tumoral_bam("CHAHA"), "CHAHA_diag", &config)?;
+        Ok(())
+    }
 }