Bläddra i källkod

bam alt/depth

Thomas 7 månader sedan
förälder
incheckning
74c6f4da7b
7 ändrade filer med 434 tillägg och 147 borttagningar
  1. 61 65
      src/annotation/mod.rs
  2. 32 21
      src/callers/clairs.rs
  3. 3 4
      src/callers/deep_somatic.rs
  4. 145 3
      src/lib.rs
  5. 2 3
      src/pipes/somatic.rs
  6. 40 19
      src/variant/variant.rs
  7. 151 32
      src/variant/variant_collection.rs

+ 61 - 65
src/annotation/mod.rs

@@ -74,15 +74,20 @@ pub enum Annotation {
     /// Timing of replication for the variant's genomic position.
     ReplicationTiming(ReplicationClass),
 
+    /// Variant in an High Depth position
     HighDepth,
 
+    /// Variant in the given panel ranges
     Panel(String),
 
+    /// Variant at a CpG
     CpG,
 
+    /// Variant in a region of low alignement quality
     LowMAPQ,
 
-    VNTR
+    /// Variant in a variable number of tandem repeat locus
+    VNTR,
 }
 
 /// Denotes the biological sample type associated with a variant call.
@@ -193,31 +198,33 @@ impl FromStr for Sample {
 /// such as base content or sample-specific information.
 impl fmt::Display for Annotation {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        use Annotation::*;
+
         let s = match self {
-            Annotation::Callers(caller, sample) => format!("{caller} {sample}"),
-            Annotation::AlterationCategory(alt_cat) => alt_cat.to_string(),
-            Annotation::ShannonEntropy(_) => "ShannonEntropy".into(),
-            Annotation::ConstitDepth(_) => "ConstitDepth".into(),
-            Annotation::ConstitAlt(_) => "ConstitAlt".into(),
-            Annotation::LowConstitDepth => "LowConstitDepth".into(),
-            Annotation::HighConstitAlt => "HighConstitAlt".into(),
-            Annotation::Cosmic(_) => "Cosmic".into(),
-            Annotation::GnomAD(_) => "GnomAD".into(),
-            Annotation::LowEntropy => "LowEntropy".into(),
-            Annotation::VEP(_) => "VEP".into(),
-            Annotation::CpG => "CpG".into(),
-            Annotation::VNTR => "VNTR".into(),
-            Annotation::TriNucleotides(bases) => format!(
-                        "Trinucleotides({})",
-                        bases.iter().map(|b| b.to_string()).collect::<String>(),
-                    ),
-            Annotation::ReplicationTiming(rt) => match rt {
-                        ReplicationClass::Early => "ReplicationEarly".into(),
-                        ReplicationClass::Late => "ReplicationLate".into(),
-                    },
-            Annotation::HighDepth => "HighDepth".into(),
-            Annotation::Panel(name) => format!("Panel_{name}"),
-            Annotation::LowMAPQ => "LowMAPQ".to_string(),
+            Callers(caller, sample) => format!("{caller} {sample}"),
+            AlterationCategory(alt_cat) => alt_cat.to_string(),
+            ShannonEntropy(_) => "ShannonEntropy".into(),
+            ConstitDepth(_) => "ConstitDepth".into(),
+            ConstitAlt(_) => "ConstitAlt".into(),
+            LowConstitDepth => "LowConstitDepth".into(),
+            HighConstitAlt => "HighConstitAlt".into(),
+            Cosmic(_) => "Cosmic".into(),
+            GnomAD(_) => "GnomAD".into(),
+            LowEntropy => "LowEntropy".into(),
+            VEP(_) => "VEP".into(),
+            CpG => "CpG".into(),
+            VNTR => "VNTR".into(),
+            TriNucleotides(bases) => format!(
+                "Trinucleotides({})",
+                bases.iter().map(|b| b.to_string()).collect::<String>(),
+            ),
+            ReplicationTiming(rt) => match rt {
+                ReplicationClass::Early => "ReplicationEarly".into(),
+                ReplicationClass::Late => "ReplicationLate".into(),
+            },
+            HighDepth => "HighDepth".into(),
+            Panel(name) => format!("Panel_{name}"),
+            LowMAPQ => "LowMAPQ".to_string(),
         };
         write!(f, "{}", s)
     }
@@ -261,14 +268,15 @@ pub enum Caller {
 
 impl fmt::Display for Caller {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        use Caller::*;
         match self {
-            Caller::DeepVariant => write!(f, "DeepVariant"),
-            Caller::ClairS => write!(f, "ClairS"),
-            Caller::NanomonSV => write!(f, "NanomonSV"),
-            Caller::NanomonSVSolo => write!(f, "NanomonSV-solo"),
-            Caller::Savana => write!(f, "Savana"),
-            Caller::Severus => write!(f, "Severus"),
-            Caller::DeepSomatic => write!(f, "DeepSomatic"),
+            DeepVariant => write!(f, "DeepVariant"),
+            ClairS => write!(f, "ClairS"),
+            NanomonSV => write!(f, "NanomonSV"),
+            NanomonSVSolo => write!(f, "NanomonSV-solo"),
+            Savana => write!(f, "Savana"),
+            Severus => write!(f, "Severus"),
+            DeepSomatic => write!(f, "DeepSomatic"),
         }
     }
 }
@@ -277,14 +285,15 @@ impl FromStr for Caller {
     type Err = anyhow::Error;
 
     fn from_str(s: &str) -> anyhow::Result<Self> {
+        use Caller::*;
         match s {
-            "DeepVariant" => Ok(Caller::DeepVariant),
-            "ClairS" => Ok(Caller::ClairS),
-            "NanomonSV" => Ok(Caller::NanomonSV),
-            "NanomonSV-solo" => Ok(Caller::NanomonSVSolo),
-            "Savana" => Ok(Caller::Savana),
-            "Severus" => Ok(Caller::Severus),
-            "DeepSomatic" => Ok(Caller::DeepSomatic),
+            "DeepVariant" => Ok(DeepVariant),
+            "ClairS" => Ok(ClairS),
+            "NanomonSV" => Ok(NanomonSV),
+            "NanomonSV-solo" => Ok(NanomonSVSolo),
+            "Savana" => Ok(Savana),
+            "Severus" => Ok(Severus),
+            "DeepSomatic" => Ok(DeepSomatic),
             _ => Err(anyhow::anyhow!("Can't parse Caller {s}")),
         }
     }
@@ -339,6 +348,7 @@ impl Annotations {
         &self,
         annotations: Option<Box<dyn Fn(&Annotation) -> bool + Send + Sync>>,
     ) -> AnnotationsStats {
+        use Annotation::*;
         let map: DashMap<String, u64> = DashMap::new();
         let num_maps: DashMap<String, HashMap<String, Vec<f64>>> = DashMap::new();
 
@@ -353,29 +363,16 @@ impl Annotations {
             let mut numerical = Vec::new();
             for ann in anns.iter() {
                 match ann {
-                    Annotation::LowConstitDepth
-                    | Annotation::LowEntropy
-                    | Annotation::GnomAD(_)
-                    | Annotation::VEP(_)
-                    | Annotation::TriNucleotides(_)
-                    | Annotation::ReplicationTiming(_)
-                    | Annotation::HighDepth
-                    | Annotation::CpG
-                    | Annotation::VNTR
-                    | Annotation::Panel(_)
-                    | Annotation::LowMAPQ
-                    | Annotation::HighConstitAlt => categorical.push(ann.to_string()),
-                    Annotation::Callers(caller, sample) => {
-                        categorical.push(format!("{caller} {sample}"))
-                    }
-                    Annotation::ShannonEntropy(v) => numerical.push((ann.to_string(), *v)),
-                    Annotation::ConstitDepth(v) | Annotation::ConstitAlt(v) => {
+                    LowConstitDepth | LowEntropy | GnomAD(_) | VEP(_) | TriNucleotides(_)
+                    | ReplicationTiming(_) | HighDepth | CpG | VNTR | Panel(_) | LowMAPQ
+                    | HighConstitAlt => categorical.push(ann.to_string()),
+                    Callers(caller, sample) => categorical.push(format!("{caller} {sample}")),
+                    ShannonEntropy(v) => numerical.push((ann.to_string(), *v)),
+                    ConstitDepth(v) | Annotation::ConstitAlt(v) => {
                         numerical.push((ann.to_string(), *v as f64));
                     }
-                    Annotation::Cosmic(c) => numerical.push((ann.to_string(), c.cosmic_cnt as f64)),
-                    Annotation::AlterationCategory(alt_cat) => {
-                        categorical.push(alt_cat.to_string())
-                    }
+                    Cosmic(c) => numerical.push((ann.to_string(), c.cosmic_cnt as f64)),
+                    AlterationCategory(alt_cat) => categorical.push(alt_cat.to_string()),
                 }
             }
 
@@ -661,13 +658,13 @@ pub trait CallerCat {
 
 /// Determines whether a variant annotation meets either of the following criteria:
 ///
-/// 1. It is observed in gnomAD with a non-zero allele frequency *and* has 
+/// 1. It is observed in gnomAD with a non-zero allele frequency *and* has
 ///    at least one constitutive alternate read.
-/// 2. All variant calls are from `SoloTumor` samples and there is at least one 
+/// 2. All variant calls are from `SoloTumor` samples and there is at least one
 ///    constitutive alternate read.
 ///
-/// This function is typically used to identify variants that are either likely 
-/// germline (based on population frequency and constitutive signal) or artifacts 
+/// This function is typically used to identify variants that are either likely
+/// germline (based on population frequency and constitutive signal) or artifacts
 /// (appearing only in tumor calls but with constitutive support).
 ///
 /// # Arguments
@@ -715,4 +712,3 @@ pub fn is_gnomad_and_constit_alt(anns: &[Annotation]) -> bool {
 
     (has_gnomad && has_constit_alt) || only_tumor_with_constit
 }
-

+ 32 - 21
src/callers/clairs.rs

@@ -11,7 +11,7 @@ use crate::{
         variant_collection::VariantCollection,
     },
 };
-use anyhow::{Context, Ok};
+use anyhow::Ok;
 use log::{debug, info, warn};
 use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
 use std::{fs, path::Path};
@@ -76,7 +76,8 @@ impl ShouldRun for ClairS {
     /// Determines whether ClairS should be re-run based on BAM modification timestamps.
     fn should_run(&self) -> bool {
         let passed_vcf = &self.config.clairs_passed_vcf(&self.id);
-        let result = is_file_older(passed_vcf, &self.config.normal_bam(&self.id), true).unwrap_or(true)
+        let result = is_file_older(passed_vcf, &self.config.normal_bam(&self.id), true)
+            .unwrap_or(true)
             || is_file_older(passed_vcf, &self.config.tumoral_bam(&self.id), true).unwrap_or(true);
         if result {
             warn!("ClairS should run for id: {}.", self.id);
@@ -105,7 +106,8 @@ impl Run for ClairS {
         let (output_vcf, output_indels_vcf) = self.config.clairs_output_vcfs(&self.id);
         if !Path::new(&output_vcf).exists() || !Path::new(&output_indels_vcf).exists() {
             let output_dir = self.config.clairs_output_dir(&self.id);
-            fs::create_dir_all(&output_dir).context(format!("Failed create dir: {output_dir}"))?;
+            fs::create_dir_all(&output_dir)
+                .map_err(|e| anyhow::anyhow!("Failed create dir: {output_dir}.\n{e}"))?;
 
             let mut docker_run = DockerRun::new(&[
                 "run",
@@ -138,12 +140,12 @@ impl Run for ClairS {
                 &format!("{}_diag", self.id),
             ]);
             let report = run_wait(&mut docker_run)
-                .context(format!("Failed to run ClairS for {}", &self.id))?;
+                .map_err(|e| anyhow::anyhow!("Failed to run ClairS for {}.\n{e}", &self.id))?;
 
             let log_file = format!("{}/clairs_", self.log_dir);
             report
                 .save_to_file(&log_file)
-                .context(format!("Error while writing logs into {log_file}"))?;
+                .map_err(|e| anyhow::anyhow!("Error while writing logs into {log_file}.\n{e}"))?;
         } else {
             debug!(
                 "ClairS output VCF already exists for {}, skipping execution.",
@@ -162,15 +164,17 @@ impl Run for ClairS {
                 &clair3_germline_passed,
                 BcftoolsConfig::default(),
             )
-            .context(format!(
-                "Error while running bcftools keep PASS for {}",
-                &clair3_germline_passed
-            ))?;
+            .map_err(|e| {
+                anyhow::anyhow!(
+                    "Failed to run `bcftools keep PASS` for {}.\n{e}",
+                    &clair3_germline_passed
+                )
+            })?;
 
             let log_file = format!("{}/bcftools_pass_", self.log_dir);
             report
                 .save_to_file(&log_file)
-                .context(format!("Error while writing logs into {log_file}"))?;
+                .map_err(|e| anyhow::anyhow!("Error while writing logs into {log_file}.\n{e}"))?;
 
             // fs::remove_file(&tmp_file).context(format!("Can't remove tmp file {tmp_file}"))?;
         } else {
@@ -189,26 +193,33 @@ impl Run for ClairS {
                 &tmp_file,
                 BcftoolsConfig::default(),
             )
-            .context(format!(
-                "Failed to run bcftools concat for {} and {}",
-                &output_vcf, &output_indels_vcf
-            ))?;
+            .map_err(|e| {
+                anyhow::anyhow!(
+                    "Failed to run bcftools concat for {} and {}.\n{e}",
+                    &output_vcf,
+                    &output_indels_vcf
+                )
+            })?;
             let log_file = format!("{}/bcftools_concat_", self.log_dir);
             report
                 .save_to_file(&log_file)
-                .context(format!("Error while writing logs into {log_file}"))?;
+                .map_err(|e| anyhow::anyhow!("Error while writing logs into {log_file}\n{e}"))?;
 
-            let report =
-                bcftools_keep_pass(&tmp_file, passed_vcf, BcftoolsConfig::default()).context(
-                    format!("Error while running bcftools keep PASS for {}", &output_vcf),
-                )?;
+            let report = bcftools_keep_pass(&tmp_file, passed_vcf, BcftoolsConfig::default())
+                .map_err(|e| {
+                    anyhow::anyhow!(
+                        "Error while running bcftools keep PASS for {}.\n{e}",
+                        &output_vcf
+                    )
+                })?;
 
             let log_file = format!("{}/bcftools_pass_", self.log_dir);
             report
                 .save_to_file(&log_file)
-                .context(format!("Error while writing logs into {log_file}"))?;
+                .map_err(|e| anyhow::anyhow!("Error while writing logs into {log_file}.\n{e}"))?;
 
-            fs::remove_file(&tmp_file).context(format!("Failed to remove temporary file {tmp_file}"))?;
+            fs::remove_file(&tmp_file)
+                .map_err(|e| anyhow::anyhow!("Failed to remove temporary file {tmp_file}.\n{e}"))?;
         } else {
             debug!(
                 "ClairS PASSED VCF already exists for {}, skipping execution.",

+ 3 - 4
src/callers/deep_somatic.rs

@@ -1,6 +1,5 @@
 use std::{fs, path::Path};
 
-use anyhow::Context;
 use log::info;
 use rayon::prelude::*;
 
@@ -97,7 +96,7 @@ impl Run for DeepSomatic {
         if !Path::new(&output_vcf).exists() {
             let output_dir = self.config.deepsomatic_output_dir(&self.id);
             fs::create_dir_all(&output_dir)
-                .context(format!("Failed to create dir: {output_dir}"))?;
+                .map_err(|e| anyhow::anyhow!("Failed to create dir: {output_dir}.\n{e}"))?;
 
             let mut docker_run = DockerRun::new(&[
                 "run",
@@ -139,10 +138,10 @@ impl Run for DeepSomatic {
                 &format!("{}_{}", self.id, self.config.normal_name),
             ]);
             let report = run_wait(&mut docker_run)
-                .context(format!("Erreur while running DeepSomatic for {}.", self.id))?;
+                .map_err(|e| anyhow::anyhow!("Failed to run DeepSomatic for {}.\n{e}", self.id))?;
             report
                 .save_to_file(&format!("{}/deepvariant_", self.log_dir))
-                .context("Can't save DeepVariant logs")?;
+                .map_err(|e| anyhow::anyhow!("Can't save DeepVariant logs.\n{e}"))?;
         }
 
         // Keep PASS

+ 145 - 3
src/lib.rs

@@ -169,7 +169,8 @@ mod tests {
     use helpers::estimate_shannon_entropy;
     use io::bed::read_bed;
     use itertools::Itertools;
-    use log::{error, info, warn};
+    use log::{debug, error, info, warn};
+    use pandora_lib_variants::variants::VariantCategory;
     use pipes::somatic::SomaticPipe;
     use positions::{overlaps_par, GenomePosition, GenomeRange};
     use rayon::prelude::*;
@@ -178,7 +179,7 @@ mod tests {
 
     use self::{collection::pod5::{FlowCellCase, Pod5Collection}, commands::dorado, config::Config};
     use super::*;
-    use crate::{annotation::Annotation, callers::{clairs::ClairS, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVSolo}, savana::SavanaCN}, collection::{bam::{self, nt_pileup_new}, flowcells::{scan_archive, FlowCells}, run_tasks, vcf::VcfCollection, Collections, CollectionsConfig, ShouldRun}, commands::dorado::Dorado, helpers::find_files, io::{bed::bedrow_overlaps_par, dict::read_dict, gff::features_ranges}, pipes::somatic::const_stats, positions::{merge_overlapping_genome_ranges, range_intersection_par, sort_ranges}, scan::scan::somatic_scan, variant::{variant::{AlterationCategory, BNDDesc, BNDGraph, GroupByThreshold, ToBNDGraph}, variant_collection::{group_variants_by_bnd_desc, group_variants_by_bnd_rc, Variant}, variants_stats::{self, somatic_depth_quality_ranges, VariantsStats}}};
+    use crate::{annotation::Annotation, callers::{clairs::ClairS, deep_variant::DeepVariant, nanomonsv::{NanomonSV, NanomonSVSolo}, savana::SavanaCN}, collection::{bam::{self, nt_pileup_new}, flowcells::{scan_archive, FlowCells}, run_tasks, vcf::VcfCollection, Collections, CollectionsConfig, ShouldRun}, commands::dorado::Dorado, helpers::find_files, io::{bed::bedrow_overlaps_par, dict::read_dict, gff::features_ranges}, pipes::somatic::const_stats, positions::{merge_overlapping_genome_ranges, range_intersection_par, sort_ranges}, scan::scan::somatic_scan, variant::{variant::{AlterationCategory, BNDDesc, BNDGraph, GroupByThreshold, ToBNDGraph}, variant_collection::{group_variants_by_bnd_desc, group_variants_by_bnd_rc, Variant, VariantCollection}, variants_stats::{self, somatic_depth_quality_ranges, VariantsStats}}};
 
     // export RUST_LOG="debug"
     fn init() {
@@ -553,6 +554,143 @@ mod tests {
         LongphasePhase::initialize(id, Config::default())?.run()
     }
 
+    #[test]
+    fn snv_parse() -> anyhow::Result<()> {
+        init();
+        // ClairS
+        let row = "chr1\t10407\t.\tA\tG\t10.1\tPASS\tF\tGT:GQ:DP:AD:AF\t0/1:10:31:10,20:0.6452";
+        let variant: VcfVariant = row.parse()?;
+        let var_string = variant.into_vcf_row();
+        assert_eq!(row, &var_string);
+
+        let mut variant_col = VariantCollection { 
+            variants: vec![variant], 
+            vcf: collection::vcf::Vcf::new("/data/longreads_basic_pipe/ACHITE/diag/ClairS/ACHITE_diag_clairs_PASSED.vcf.gz".into())?, 
+            caller: Annotation::Callers(annotation::Caller::ClairS, annotation::Sample::Somatic) 
+        };
+        let annotations = Annotations::default();
+        variant_col.annotate_with_constit_bam(&annotations, "/data/longreads_basic_pipe/ACHITE/mrd/ACHITE_mrd_hs1.bam", 1)?;
+        // DeepVariant
+        let row = "chr1\t10407\t.\tA\tG\t7.4\tPASS\t.\tGT:GQ:DP:AD:VAF:PL\t1/1:5:9:1,8:0.888889:5,5,0";
+        variant_col.variants.push(row.parse()?);
+
+        let anns: Vec<Vec<Annotation>> = variant_col.variants.iter()
+            .filter_map(|e| {
+                annotations.store.get(&e.hash()).map(|v| v.value().to_owned())
+            })
+            .collect();
+        assert_eq!(anns[0], anns[1]);
+        Ok(())
+    }
+
+    #[test]
+    fn deletion_parse() -> anyhow::Result<()> {
+        init();
+        // Clairs
+        let row = "chr1\t16760\t.\tAaag\tA\t8.48\tPASS\tF\tGT:GQ:DP:AD:AF\t0/1:8:39:22,17:0.4359";
+        let variant: VcfVariant = row.parse()?;
+        let var_string = variant.into_vcf_row();
+        // case are not keeped
+        assert_eq!(&var_string, "chr1\t16760\t.\tAAAG\tA\t8.48\tPASS\tF\tGT:GQ:DP:AD:AF\t0/1:8:39:22,17:0.4359");
+        assert_eq!(AlterationCategory::DEL, variant.alteration_category());
+
+        let mut variant_col = VariantCollection { 
+            variants: vec![variant], 
+            vcf: collection::vcf::Vcf::new("/data/longreads_basic_pipe/ACHITE/diag/ClairS/ACHITE_diag_clairs_PASSED.vcf.gz".into())?, 
+            caller: Annotation::Callers(annotation::Caller::ClairS, annotation::Sample::Somatic) 
+        };
+        let annotations = Annotations::default();
+
+        // DeepVariant, VAF is not an f32 at it should be, sometimes too long removed the last number for eq
+        let row = "chr12\t326911\t.\tGTGTA\tG\t4.7\tPASS\t.\tGT:GQ:DP:AD:VAF:PL\t0/1:5:8:5,3:0.37500:2,0,21";
+        let variant: VcfVariant = row.parse()?;
+        let var_string = variant.into_vcf_row();
+        assert_eq!(row, &var_string);
+        variant_col.variants.push(row.parse()?);
+
+        assert_eq!(AlterationCategory::DEL, variant.alteration_category());
+
+        // Severus, 0000 added to VAF and hVAF
+        let row = "chr10\t108974982\tseverus_DEL885\tN\t<DEL>\t60\tPASS\tPRECISE;SVTYPE=DEL;SVLEN=474;END=108975456;STRANDS=+-;INSIDE_VNTR=TRUE;MAPQ=60\tGT:GQ:VAF:hVAF:DR:DV\t0/1:61:0.30000:0.30000,0.00000,0.00000:7:3";
+        let variant: VcfVariant = row.parse()?;
+        let var_string = variant.into_vcf_row();
+        assert_eq!(row, &var_string);
+        assert_eq!(AlterationCategory::DEL, variant.alteration_category());
+
+        variant_col.variants.push(row.parse()?);
+
+        // Nanomonsv dont parse last format remove: \t22:0 and nt putted in uppercase
+        let row = "chr14\t5247080\td_106\tG\t<DEL>\t.\tPASS\tEND=5247255;SVTYPE=DEL;SVLEN=-175;SVINSLEN=39;SVINSSEQ=ATAACCCAGGTGATATAACACTTCTTTAGGCTCTGCCTA\tTR:VR\t18:3";
+        let variant: VcfVariant = row.parse()?;
+        let var_string = variant.into_vcf_row();
+        assert_eq!(row, &var_string);
+        assert_eq!(AlterationCategory::DEL, variant.alteration_category());
+
+        variant_col.variants.push(row.parse()?);
+
+        variant_col.annotate_with_constit_bam(
+            &annotations, 
+            "/data/longreads_basic_pipe/ACHITE/mrd/ACHITE_mrd_hs1.bam", 
+            1
+        )?;
+
+        Ok(())
+    }
+
+    #[test]
+    fn insertion_parse() -> anyhow::Result<()> {
+        init();
+        // Clairs
+        let row = "chr1\t23005\t.\tT\tTCAC\t3.1\tPASS\tF\tGT:GQ:DP:AD:A\t0/1:3:13:9,4:0.3077";
+        let variant: VcfVariant = row.parse()?;
+        let var_string = variant.into_vcf_row();
+        assert_eq!(&var_string, row);
+        assert_eq!(AlterationCategory::INS, variant.alteration_category());
+
+        let mut variant_col = VariantCollection { 
+            variants: vec![variant], 
+            vcf: collection::vcf::Vcf::new("/data/longreads_basic_pipe/ACHITE/diag/ClairS/ACHITE_diag_clairs_PASSED.vcf.gz".into())?, 
+            caller: Annotation::Callers(annotation::Caller::ClairS, annotation::Sample::Somatic) 
+        };
+        let annotations = Annotations::default();
+
+        // DeepVariant, VAF is not an f32 at it should be, sometimes too long removed the last number for eq
+        let row = "chrX\t152732993\t.\tG\tGTTTTTTTTT\t18.9\tPASS\t.\tGT:GQ:DP:AD:VAF:PL\t1/1:3:6:0,5:0.50000:15,7,3";
+        let variant: VcfVariant = row.parse()?;
+        let var_string = variant.into_vcf_row();
+        assert_eq!(row, &var_string);
+        variant_col.variants.push(row.parse()?);
+
+        assert_eq!(AlterationCategory::INS, variant.alteration_category());
+
+        // Severus, 0000 added to VAF and hVAF
+        let row = "chr11\t26669932\tseverus_INS9403\tN\tT\t60\tPASS\tPRECISE;SVTYPE=INS;SVLEN=460;INSIDE_VNTR=TRUE;MAPQ=60\tGT:GQ:VAF:hVAF:DR:DV\t0/1:71:0.27000:0.27000,0.00000,0.00000:8:3";
+        let variant: VcfVariant = row.parse()?;
+        let var_string = variant.into_vcf_row();
+        assert_eq!(row, &var_string);
+        assert_eq!(AlterationCategory::INS, variant.alteration_category());
+
+        variant_col.variants.push(row.parse()?);
+
+        // Nanomonsv dont parse last format remove: \t22:0 and nt putted in uppercase
+        let row = "chr8\t87940084\td_333\tT\t<INS>\t.\tPASS\tEND=87940201;SVTYPE=INS;SVINSLEN=172;SVINSSEQ=TA\tTR:VR\t9:5";
+        let variant: VcfVariant = row.parse()?;
+        let var_string = variant.into_vcf_row();
+        assert_eq!(row, &var_string);
+        assert_eq!(AlterationCategory::INS, variant.alteration_category());
+
+        variant_col.variants.push(row.parse()?);
+
+        variant_col.annotate_with_constit_bam(
+            &annotations, 
+            "/data/longreads_basic_pipe/ACHITE/mrd/ACHITE_mrd_hs1.bam", 
+            1
+        )?;
+
+        Ok(())
+    }
+
+
     #[test]
     fn variant_parse() -> anyhow::Result<()> {
         let row = "chr1\t1366\t.\tC\tCCCT\t8.2\tPASS\t.\tGT:GQ:DP:AD:VAF:PL\t1/1:4:6:1,4:0.66667:6,4,0";
@@ -700,11 +838,15 @@ mod tests {
         let collections = Collections::new(
             CollectionsConfig::default()
         )?;
+        let c = Config {
+            somatic_pipe_force: true,
+            ..Default::default()
+        };
         for (a, _) in collections.bam_pairs().iter() {
             if  ["AUBERT", "BAFFREAU", "BAILLEUL"].contains(&a.id.as_str()) {
                 continue;
             }
-            if let Err(e) = SomaticPipe::initialize(&a.id, Config::default()).map(|mut p| if p.should_run() {
+            if let Err(e) = SomaticPipe::initialize(&a.id, c.clone()).map(|mut p| if p.should_run() {
                 if let Err(e) = p.run() {
                     error!("{e}");
                 }

+ 2 - 3
src/pipes/somatic.rs

@@ -4,7 +4,6 @@ use crate::{
         variants_stats::somatic_depth_quality_ranges,
     }
 };
-use anyhow::Context;
 use itertools::Itertools;
 use log::info;
 use rayon::slice::ParallelSliceMut;
@@ -217,8 +216,8 @@ impl Run for SomaticPipe {
         to_run_if_req.extend(create_should_run_normal_tumoral!(&id, &config, DeepVariant));
 
         info!("Running prerequired pipe components.");
-        run_if_required(&mut to_run_if_req).with_context(|| {
-            format!("Failed to run a prerequired component of somatic pipe for {id}.")
+        run_if_required(&mut to_run_if_req).map_err(|e| {
+            anyhow::anyhow!("Failed to run a prerequired component of somatic pipe for {id}.\n{e}")
         })?;
 
         // Initialize variant callers

+ 40 - 19
src/variant/variant.rs

@@ -232,15 +232,14 @@ impl VcfVariant {
     /// An `AlterationCategory` enum representing the type of alteration.
     pub fn alteration_category(&self) -> AlterationCategory {
         match (&self.reference, &self.alternative) {
-            (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotide(_)) => {
+            (ReferenceAlternative::Nucleotide(a), ReferenceAlternative::Nucleotide(b))
+                if *a != Base::N && *b != Base::N =>
+            {
                 AlterationCategory::SNV
             }
             (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Nucleotides(_)) => {
                 AlterationCategory::INS
             }
-            // (ReferenceAlternative::Nucleotide(_), ReferenceAlternative::Unstructured(_)) => {
-            //     AlterationCategory::Other
-            // }
             (ReferenceAlternative::Nucleotides(_), ReferenceAlternative::Nucleotide(_)) => {
                 AlterationCategory::DEL
             }
@@ -271,6 +270,26 @@ impl VcfVariant {
         }
     }
 
+    pub fn inserted_seq(&self) -> Option<String> {
+        if self.alteration_category() != AlterationCategory::INS {
+            return None;
+        }
+
+        if let Some(ins) = self.infos.0.iter().find_map(|e| match e {
+            Info::SVINSSEQ(ins) => Some(ins.to_string()),
+            _ => None,
+        }) {
+            return Some(ins);
+        }
+
+        match &self.alternative {
+            ReferenceAlternative::Nucleotides(nt) => nt
+                .get(1..)
+                .map(|slice| slice.iter().map(ToString::to_string).collect()),
+            _ => None,
+        }
+    }
+
     /// Parses and constructs a BND (breakend) description from the alternative string.
     ///
     /// This function interprets the BND notation in the alternative string and creates
@@ -290,14 +309,6 @@ impl VcfVariant {
     }
 
     /// Returns the length of the deletion if the variant is a deletion (`DEL`).
-    ///
-    /// The length is determined using the following priority:
-    /// 1. If an `END` field is present in the `INFO` section, it is used to compute the length as `END - POS`.
-    /// 2. If an `SVLEN` field is present, its absolute value is used.
-    /// 3. If both `reference` and `alternative` alleles are available and compatible,
-    ///    the length is inferred from the length of the deleted sequence.
-    ///
-    /// Returns `None` if the variant is not a deletion or if no valid length information can be derived.
     pub fn deletion_len(&self) -> Option<u32> {
         if self.alteration_category() != AlterationCategory::DEL {
             return None;
@@ -921,7 +932,13 @@ impl fmt::Display for SVType {
 
 impl VariantId for VcfVariant {
     fn variant_id(&self) -> String {
-        format!("{}_{}>{}", self.position, self.reference, self.alternative)
+        format!(
+            "{}:{}_{}_{}",
+            self.position.contig(),
+            self.position.position + 1,
+            self.reference,
+            self.alternative
+        )
     }
 }
 
@@ -2141,13 +2158,16 @@ pub enum Base {
 
 impl TryFrom<u8> for Base {
     type Error = anyhow::Error;
+
     fn try_from(base: u8) -> anyhow::Result<Self> {
         match base {
-            b'A' => Ok(Base::A),
-            b'T' => Ok(Base::T),
-            b'C' => Ok(Base::C),
-            b'G' => Ok(Base::G),
-            b'N' => Ok(Base::N),
+            // uppercase
+            b'A' | b'a' => Ok(Base::A),
+            b'T' | b't' => Ok(Base::T),
+            b'C' | b'c' => Ok(Base::C),
+            b'G' | b'g' => Ok(Base::G),
+            b'N' | b'n' => Ok(Base::N),
+            // unknown
             _ => Err(anyhow::anyhow!(
                 "Unknown base: {}",
                 String::from_utf8_lossy(&[base])
@@ -2374,8 +2394,9 @@ pub fn run_if_required(iterable: &mut [ShouldRunBox]) -> anyhow::Result<()> {
     iterable.iter_mut().try_for_each(|e| {
         if e.should_run() {
             e.run()
+                .map_err(|err| anyhow::anyhow!("Failed to run {}.\n{err}", e.label()))
         } else {
-            info!("Skipping running: {}", e.label());
+            info!("No run required for: {}", e.label());
 
             Ok(())
         }

+ 151 - 32
src/variant/variant_collection.rs

@@ -10,6 +10,7 @@ use bgzip::{BGZFReader, BGZFWriter};
 use bitcode::{Decode, Encode};
 use csv::ReaderBuilder;
 use log::{debug, error, info, warn};
+use pandora_lib_assembler::assembler::calculate_shannon_entropy;
 use rayon::prelude::*;
 use serde::{Deserialize, Serialize};
 use uuid::Uuid;
@@ -33,6 +34,7 @@ use crate::{
     helpers::{app_storage_dir, estimate_shannon_entropy, mean, temp_file_path, Hash128},
     io::{fasta::sequence_at, readers::get_reader, vcf::vcf_header, writers::get_gz_writer},
     positions::{overlaps_par, GenomePosition, GenomeRange, GetGenomePosition},
+    variant::variant::VariantId,
 };
 
 /// A collection of VCF variants along with associated metadata.
@@ -348,6 +350,40 @@ impl VariantCollection {
         constit_bam_path: &str,
         max_threads: u8,
     ) -> anyhow::Result<()> {
+        fn folder<'a>(alt_seq: &'a str) -> impl Fn((i32, i32), (String, i32)) -> (i32, i32) + 'a {
+            move |(depth_acc, alt_acc), (seq, n): (String, i32)| {
+                if seq == alt_seq {
+                    (depth_acc + n, alt_acc + n)
+                } else {
+                    (depth_acc + n, alt_acc)
+                }
+            }
+        }
+
+        fn match_repeats(
+            v: &Vec<(String, i32)>,
+            nt: char,
+            n: usize,
+            e: usize,
+        ) -> Vec<&(String, i32)> {
+            v.iter()
+                .filter(|(s, _)| {
+                    let len = s.len();
+                    (n.saturating_sub(e)..=n + e).contains(&len) && s.chars().all(|c| c == nt)
+                })
+                .collect()
+        }
+
+        fn single_char_repeat(s: &str) -> Option<(char, usize)> {
+            let mut chars = s.chars();
+            let first = chars.next()?;
+            if chars.all(|c| c == first) {
+                Some((first, s.len()))
+            } else {
+                None
+            }
+        }
+
         self.variants
             .par_chunks(self.chunk_size(max_threads))
             .try_for_each(|chunk| {
@@ -366,41 +402,124 @@ impl VariantCollection {
                         .count()
                         != 2
                     {
-                        let mut n_alt = 0;
-                        let mut depth = 0;
-                        let mut alt_seq = var.alternative.to_string();
-                        let mut is_ins = false;
-
-                        let c = if var.alteration_category() == AlterationCategory::INS {
-                            // TODO: Add stretch comparison
-                            alt_seq = alt_seq.split_off(1);
-                            is_ins = true;
-                            counts_ins_at(&mut bam, &var.position.contig(), var.position.position)?
-                        } else if var.alteration_category() == AlterationCategory::DEL {
-                            alt_seq = "D".to_string();
-                            counts_at(&mut bam, &var.position.contig(), var.position.position + 1)?
-                        } else {
-                            counts_at(&mut bam, &var.position.contig(), var.position.position)?
-                        };
-
-                        for (seq, n) in c {
-                            if seq == alt_seq {
-                                n_alt = n;
-                            } else if is_ins && seq.len() > 1 {
-                                // insertion sequence DO NOT have to match
-                                // rule assumed to reduce FP from Solo callers
-                                // with this rule solo callers won't be able to
-                                // call the variant of an insertion.
-                                n_alt = n;
+                        // let mut n_alt = 0;
+                        // let mut depth = 0;
+                        // let mut is_ins = false;
+                        let alteration_cat = var.alteration_category();
+
+                        // debug!("{alteration_cat:?}");
+                        match alteration_cat {
+                            AlterationCategory::SNV => {
+                                let pileup = counts_at(
+                                    &mut bam,
+                                    &var.position.contig(),
+                                    var.position.position,
+                                )?;
+                                let alt_seq = var.alternative.to_string();
+
+                                let (depth, alt) =
+                                    pileup.into_iter().fold((0, 0), folder(&alt_seq));
+                                // debug!("{} {alt} / {depth}", var.variant_id());
+                                anns.push(Annotation::ConstitDepth(depth as u16));
+                                anns.push(Annotation::ConstitAlt(alt as u16));
                             }
-                            if seq == *"D" && alt_seq != *"D" {
-                                continue;
+                            AlterationCategory::DEL => {
+                                if let Some(del_repr) = var.deletion_desc() {
+                                    let pileup_start = counts_at(
+                                        &mut bam,
+                                        &var.position.contig(),
+                                        del_repr.start.saturating_sub(1),
+                                    )?;
+                                    let (start_depth, start_alt) =
+                                        pileup_start.into_iter().fold((0, 0), folder("D"));
+                                    let pileup_end = counts_at(
+                                        &mut bam,
+                                        &var.position.contig(),
+                                        del_repr.end.saturating_sub(1),
+                                    )?;
+                                    let (end_depth, end_alt) =
+                                        pileup_end.into_iter().fold((0, 0), folder("D"));
+
+                                    // outside start (one base upstream)
+                                    let pileup_out_start = counts_at(
+                                        &mut bam,
+                                        &var.position.contig(),
+                                        del_repr.start.saturating_sub(2),
+                                    )?;
+                                    let (_out_start_depth, out_start_alt) =
+                                        pileup_out_start.into_iter().fold((0, 0), folder("D"));
+
+                                    // outside end (one base downstream)
+                                    let pileup_out_end =
+                                        counts_at(&mut bam, &var.position.contig(), del_repr.end)?;
+                                    let (_out_end_depth, out_end_alt) =
+                                        pileup_out_end.into_iter().fold((0, 0), folder("D"));
+
+                                    let depth = start_depth.min(end_depth);
+                                    let alt = start_alt.min(end_alt).saturating_sub(out_start_alt.max(out_end_alt));
+                                    // debug!("{} {alt} / {depth}", var.variant_id());
+                                    anns.push(Annotation::ConstitAlt(alt as u16));
+                                    anns.push(Annotation::ConstitDepth(depth as u16));
+                                }
                             }
-                            depth += n;
+                            AlterationCategory::INS => {
+                                let pileup = counts_ins_at(
+                                    &mut bam,
+                                    &var.position.contig(),
+                                    var.position.position,
+                                )?;
+
+                                let alt_seq = var.inserted_seq().unwrap_or_default();
+
+                                let (depth, alt) = match single_char_repeat(&alt_seq) {
+                                    Some((repeated, n)) if alt_seq.len() > 1 => {
+                                        // If stretch of same nt consider eq +/- 3 nt
+                                        let pv = pileup.clone().into_iter().collect::<Vec<_>>();
+                                        let res = match_repeats(&pv, repeated, n, 3);
+                                        let depth = pileup.values().sum();
+                                        let alt = res.iter().map(|(_, n)| n).sum();
+                                        (depth, alt)
+                                    }
+                                    _ => pileup.into_iter().fold((0, 0), folder(&alt_seq)),
+                                };
+
+                                debug!("{} {alt} / {depth} ", var.variant_id());
+                                anns.push(Annotation::ConstitAlt(alt as u16));
+                                anns.push(Annotation::ConstitDepth(depth as u16));
+                            }
+                            _ => (),
                         }
-
-                        anns.push(Annotation::ConstitAlt(n_alt as u16));
-                        anns.push(Annotation::ConstitDepth(depth as u16));
+                        // let c = if var.alteration_category() == AlterationCategory::INS {
+                        //     // TODO: Add stretch comparison
+                        //     alt_seq = alt_seq.split_off(1);
+                        //     is_ins = true;
+                        //     counts_ins_at(&mut bam, &var.position.contig(), var.position.position)?
+                        // } else if var.alteration_category() == AlterationCategory::DEL {
+                        //     alt_seq = "D".to_string();
+                        //     counts_at(&mut bam, &var.position.contig(), var.position.position + 1)?
+                        // } else {
+                        //     counts_at(&mut bam, &var.position.contig(), var.position.position)?
+                        // };
+                        //
+                        // for (seq, n) in c {
+                        //     if seq == alt_seq {
+                        //         n_alt = n;
+                        //     } else if is_ins && seq.len() > 1 {
+                        //         // insertion sequence DO NOT have to match
+                        //         // rule assumed to reduce FP from Solo callers
+                        //         // with this rule solo callers won't be able to
+                        //         // call the variant of an insertion.
+                        //         n_alt = n;
+                        //     }
+                        //     if seq == *"D" && alt_seq != *"D" {
+                        //         continue;
+                        //     }
+                        //     depth += n;
+                        // }
+                        // debug!("{n_alt}");
+
+                        // anns.push(Annotation::ConstitAlt(n_alt as u16));
+                        // anns.push(Annotation::ConstitDepth(depth as u16));
                     }
                 }
                 anyhow::Ok(())