Thomas 1 жил өмнө
parent
commit
333aceaaed

+ 25 - 12
Cargo.lock

@@ -1622,7 +1622,7 @@ dependencies = [
  "indexmap",
  "noodles-bgzf 0.33.0",
  "noodles-core",
- "noodles-csi",
+ "noodles-csi 0.38.0",
  "noodles-sam",
 ]
 
@@ -1672,6 +1672,19 @@ dependencies = [
  "noodles-core",
 ]
 
+[[package]]
+name = "noodles-csi"
+version = "0.39.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0f41004636fb4232155421cbf4706565073623838a8252875085fa670b8185c"
+dependencies = [
+ "bit-vec",
+ "byteorder",
+ "indexmap",
+ "noodles-bgzf 0.33.0",
+ "noodles-core",
+]
+
 [[package]]
 name = "noodles-fasta"
 version = "0.41.0"
@@ -1700,14 +1713,14 @@ dependencies = [
 
 [[package]]
 name = "noodles-gff"
-version = "0.37.0"
+version = "0.38.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8750c06d43f2066ea511a874ed5736470c883f2e39cd0f60f28dd4530d4591b4"
+checksum = "ca42cb034ccc595d963de2ff2fd9cf2c3f563bcb65c1337e90f9d73724743d84"
 dependencies = [
  "indexmap",
  "noodles-bgzf 0.33.0",
  "noodles-core",
- "noodles-csi",
+ "noodles-csi 0.39.0",
  "percent-encoding",
 ]
 
@@ -1724,33 +1737,33 @@ dependencies = [
  "memchr",
  "noodles-bgzf 0.33.0",
  "noodles-core",
- "noodles-csi",
+ "noodles-csi 0.38.0",
 ]
 
 [[package]]
 name = "noodles-tabix"
-version = "0.44.0"
+version = "0.45.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "263e63f58871224d0cd30ffc4a8531fb4f8f8ec686807febde8e19a69673fe01"
+checksum = "fb5c5ed1fa0b9ae083c2e1e1cedc07715861400fdd943fdb1ed6c593719be187"
 dependencies = [
  "byteorder",
  "indexmap",
  "noodles-bgzf 0.33.0",
  "noodles-core",
- "noodles-csi",
+ "noodles-csi 0.39.0",
 ]
 
 [[package]]
 name = "noodles-vcf"
-version = "0.65.0"
+version = "0.67.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "37459769faa61c655a2f0cfc1f068c69dbaf92eb648250bd7a7e544b6c6664d0"
+checksum = "2cc572d5d34852b266a7784f3665b58ce451d5a0255f5164f32a7cb30f5f9878"
 dependencies = [
  "indexmap",
  "memchr",
  "noodles-bgzf 0.33.0",
  "noodles-core",
- "noodles-csi",
+ "noodles-csi 0.39.0",
  "noodles-tabix",
  "percent-encoding",
 ]
@@ -1906,7 +1919,7 @@ dependencies = [
  "noodles-bam",
  "noodles-bgzf 0.33.0",
  "noodles-core",
- "noodles-csi",
+ "noodles-csi 0.39.0",
  "noodles-fasta 0.43.0",
  "noodles-gff",
  "noodles-sam",

+ 4 - 4
Cargo.toml

@@ -26,14 +26,14 @@ rust-htslib = "0.47.0"
 uuid = { version = "1.10.0", features = ["serde", "v4"] }
 prettytable-rs = "^0.10"
 noodles-core = "0.15.0"
-noodles-gff = "0.37.0"
+noodles-gff = "0.38.0"
 noodles-bgzf = "0.33.0"
-noodles-csi = "0.38.0"
+noodles-csi = "0.39.0"
 noodles-fasta = "0.43.0"
 noodles-sam = "0.64.0"
 noodles-bam = "0.67.0"
-noodles-vcf = "0.65.0"
-noodles-tabix = "0.44.0"
+noodles-vcf = "0.67.0"
+noodles-tabix = "0.45.0"
 rayon = "1.10.0"
 serde_rusqlite = "0.35.0"
 dashmap = { version = "6.0.1", features = ["rayon", "serde"] }

+ 25 - 2
src/callers/nanomonsv.rs

@@ -103,7 +103,31 @@ impl FromStr for NanomonsvInfo {
     }
 }
 
-impl ToString for NanomonsvInfo {
+// impl std::fmt::Display for NanomonsvInfo {
+//     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+//         write!(f, "SVTYPE={}", self.svtype)?;
+//
+//         if let Some(v) = self.svlen {
+//             write!(f, ";SVLEN={}", v)?;
+//         }
+//         if let Some(v) = self.end {
+//             write!(f, ";END={}", v)?;
+//         }
+//         if let Some(v) = self.svinslen {
+//             write!(f, ";SVINSLEN={}", v)?;
+//         }
+//         if let Some(v) = &self.svinsseq {
+//             write!(f, ";SVINSSEQ={}", v)?;
+//         }
+//         if let Some(v) = &self.mateid {
+//             write!(f, ";MATEID={}", v)?;
+//         }
+//
+//         std::result::Result::Ok(())
+//     }
+// }
+//
+ impl ToString for NanomonsvInfo {
     fn to_string(&self) -> String {
         let mut s = Vec::new();
 
@@ -127,4 +151,3 @@ impl ToString for NanomonsvInfo {
         s.join(";")
     }
 }
-

+ 64 - 152
src/in_out/vcf_writer.rs

@@ -2,6 +2,8 @@ use log::{info, warn};
 use noodles_bgzf as bgzf;
 use noodles_csi::{self as csi};
 use noodles_tabix as tabix;
+use noodles_vcf::variant::record::info::field::key::{ALLELE_FREQUENCIES, TOTAL_DEPTH};
+use noodles_vcf::variant::record_buf::info::field::Value;
 use vcf::variant::io::Write;
 use vcf::variant::record_buf::AlternateBases;
 use vcf::variant::RecordBuf;
@@ -18,156 +20,63 @@ use noodles_core::position::Position;
 use noodles_vcf::{
     self as vcf,
     header::record::value::{
-        map::{Contig, Format},
+        map::{Contig, Info},
         Map,
     },
 };
-use vcf::variant::record::samples::keys::key;
 use vcf::Header;
 
 fn get_vcf_header(dict_file: &str) -> Result<Header> {
-    // let mut header: Vec<String> = vec!["##fileformat=VCFv4.2".to_string()];
-    //
-    // header.extend(
-    //     read_dict(&dict_file)?
-    //         .iter()
-    //         .map(|(sn, len)| format!("##contig=<ID={},length={}>", sn, len)),
-    // );
-    //
-    // header.push("##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Read depth\">".to_string());
-    // header.push(
-    //     "##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Read depth for each allele\">"
-    //         .to_string(),
-    // );
-    //
-    // header.push(
-    //     vec![
-    //         "#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT", "SAMPLE",
-    //     ]
-    //     .join("\t"),
-    // );
-
     let mut header = vcf::Header::builder()
-        .add_format(key::READ_DEPTH, Map::<Format>::from(key::READ_DEPTH))
-        .add_format(key::READ_DEPTHS, Map::<Format>::from(key::READ_DEPTHS))
-        .add_sample_name("LOH")
+        .add_info(TOTAL_DEPTH, Map::<Info>::from(TOTAL_DEPTH))
+        .add_info(ALLELE_FREQUENCIES, Map::<Info>::from(ALLELE_FREQUENCIES))
         .build();
 
     for (ctg, len) in read_dict(dict_file)? {
         let mut contig = Map::<Contig>::new();
         *contig.length_mut() = Some(len as usize);
         header.contigs_mut().insert(ctg.parse()?, contig);
-
-        // header.add_contig(ctg.parse().unwrap(), contig);
     }
-    // read_dict(&dict_file)?.iter().for_each(|(ctg, len)| {
-    //     let mut contig = Map::<Contig>::new();
-    //     *contig.length_mut() = Some(*len as usize);
-    //     header.add_contig(ctg.parse().unwrap(), contig);
-    // });
-
-    // header.add_format(key::READ_DEPTH, Map::<Format>::from(&key::READ_DEPTH));
-    // header.formats_mut().insert(key::READ_DEPTH, Map::<Format>::from(&key::READ_DEPTH));
-
-    // header.add_format(key::READ_DEPTHS, Map::<Format>::from(&key::READ_DEPTHS));
     Ok(header)
-    // Ok(header.join("\n"))
 }
 
 pub fn write_vcf(path: &str, data: &mut [Variant], dict_file: &str) -> Result<()> {
     let mut writer = File::create(path).map(bgzf::Writer::new)?;
     let mut writer_vcf = vcf::io::Writer::new(&mut writer);
 
-    // let mut indexer = csi::binning_index::Indexer::default().set_header(csi::binning_index::index::header::Builder::vcf().build());
-
-    // let mut indexer = tabix::index::Indexer::default();
-    // indexer.set_header(csi::binning_index::index::header::Builder::vcf().build());
-
-    // indexer.set_header(csi::binning_index::index::header::Builder::vcf())
-    // indexer.set_header(csi::binning_index::index::header::Builder::vcf().build());
-
     let header = get_vcf_header(dict_file)?;
     writer_vcf.write_header(&header)?;
-    // indexer.set_header(csi::binning_index::index::header::Builder::);
-    // writer.write_all(&buf)?;
-    // buf.clear();
-
-    // let mut start_position = writer_vcf.get_ref().virtual_position();
-    // let mut actual_contig = String::new();
-    // let mut actual_id = 0;
 
     for (i, row) in data.iter_mut().enumerate() {
-        // if actual_contig != row.contig {
-        //     actual_contig = row.contig.clone();
-        //     actual_id += 1;
-        // }
-
         let record = RecordBuf::builder()
             .set_reference_sequence_name(&row.contig)
             .set_variant_start(Position::new(row.position as usize).unwrap())
             .set_ids([i.to_string()].into_iter().collect())
             .set_reference_bases(format!("{}", row.reference))
             .set_alternate_bases(AlternateBases::from(vec![format!("{}", row.alternative)]))
-            // .set_genotypes(Genotypes::parse(&row.to_min_string(), &header)?)
+            .set_info(
+                [
+                    (
+                        String::from(TOTAL_DEPTH),
+                        Some(Value::Integer(row.get_depth() as i32)),
+                    ),
+                    (
+                        String::from(ALLELE_FREQUENCIES),
+                        Some(Value::Float(row.vaf())),
+                    ),
+                ]
+                .into_iter()
+                .collect(),
+            )
             .build();
 
         writer_vcf.write_variant_record(&header, &record)?;
-        // writer.write_all(record.to_string().as_bytes())?;
-        // writer.write_all("\n".to_string().as_bytes())?;
-        // let end_position = writer.virtual_position();
-
-        // let chunk = Chunk::new(start_position, end_position);
-
-        // let reference_sequence_name = record.chromosome().to_string();
-        // let start = noodles_core::Position::try_from(usize::from(record.position()))
-        //     .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
-        // let start = record.;
-        // let start = record.variant_start().unwrap();
-        // let end = vcf::variant::Record::variant_end(&record, &header)?;
-        // let end = record.variant_end(&header)
-        //     .end()
-        //     .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
-        //     .and_then(|position| {
-        //         noodles_core::Position::try_from(usize::from(position))
-        //             .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
-        //     })?;
-
-        // indexer.add_record(Some((actual_id - 1, start, end, true)), chunk)?;
-        // indexer.add_record(&row.contig, start, end, chunk)?;
-
-        // writer.write_record(&header, &record);
-
-        // start_position = end_position;
     }
-    // let index = indexer.build(read_dict(&dict_file)?.len());
-    // let index = indexer.build();
 
-    // let index_file = File::create(&format!("{}.csi", path)).expect("error creating index file");
-    // let mut writer = csi::Writer::new(index_file);
-    // csi::write(&format!("{}.csi", path), &index)?;
-    // let index = noodles_vcf::io::reader::Builder::default().build_from_path(path)?;
     let index = noodles_vcf::index(path)?;
 
     tabix::write(format!("{}.tbi", path), &index)?;
 
-    // writer.write_index(&index)?;
-
-    // writeln!(vcf, "{}", get_vcf_header(dict_file)?).unwrap();
-
-    // for (i, row) in data.iter_mut().enumerate() {
-    //     writeln!(
-    //         vcf,
-    //         "{}\t{}\t{}\t{}\t{}\t{}\tPASS\t.\t{}",
-    //         row.contig.to_string(),
-    //         row.position.to_string(),
-    //         i + 1,
-    //         row.reference.to_string(),
-    //         row.alternative.to_string(),
-    //         ".", // qual
-    //         row.to_min_string()
-    //     )?;
-    // }
-    // let index = vcf::index(path)?;
     Ok(())
 }
 
@@ -175,8 +84,6 @@ pub struct VariantWritter {
     path: String,
     writer: vcf::io::Writer<bgzf::Writer<File>>,
     header: Header,
-    // indexer: Indexer,
-    // start_position: VirtualPosition,
     id: usize,
 }
 
@@ -184,25 +91,47 @@ impl VariantWritter {
     pub fn new(path: &str, dict_file: &str) -> Result<Self> {
         let mut writer = vcf::io::Writer::new(File::create(path).map(bgzf::Writer::new)?);
 
-        // let mut writer = File::create(path).map(bgzf::Writer::new)?;
         let mut indexer = tabix::index::Indexer::default();
         indexer.set_header(csi::binning_index::index::header::Builder::vcf().build());
 
         let header = get_vcf_header(dict_file)?;
         writer.write_header(&header)?;
-        // let hs = header.to_string();
-        // writer.write_all(hs.as_bytes())?;
 
-        // let start_position = writer.virtual_position();
         Ok(Self {
             path: path.to_string(),
             writer,
             header,
-            // indexer,
-            // start_position,
             id: 0,
         })
     }
+
+    // pub fn parse_variants(&mut self, variants: Variants) -> Result<()> {
+    //
+    //     let record = RecordBuf::builder()
+    //         .set_reference_sequence_name(&row.contig)
+    //         .set_variant_start(Position::new(row.position as usize).unwrap())
+    //         // .set_ids(Ids::default())
+    //         .set_ids([self.id.to_string()].into_iter().collect())
+    //         .set_reference_bases(format!("{}", row.reference))
+    //         .set_alternate_bases(AlternateBases::from(vec![format!("{}", row.alternative)]))
+    //         .set_info(
+    //             [
+    //                 (
+    //                     String::from(TOTAL_DEPTH),
+    //                     Some(Value::Integer(row.get_depth() as i32)),
+    //                 ),
+    //                 (
+    //                     String::from(ALLELE_FREQUENCIES),
+    //                     Some(Value::Float(row.vaf())),
+    //                 ),
+    //             ]
+    //             .into_iter()
+    //             .collect(),
+    //         )
+    //         .build();
+    //     Ok(())
+    // }
+
     pub fn write_variant(&mut self, row: &mut Variant) -> Result<()> {
         let record = RecordBuf::builder()
             .set_reference_sequence_name(&row.contig)
@@ -211,46 +140,30 @@ impl VariantWritter {
             .set_ids([self.id.to_string()].into_iter().collect())
             .set_reference_bases(format!("{}", row.reference))
             .set_alternate_bases(AlternateBases::from(vec![format!("{}", row.alternative)]))
-            // .set_genotypes(Genotypes::parse(&row.to_min_string(), &self.header)?)
+            .set_info(
+                [
+                    (
+                        String::from(TOTAL_DEPTH),
+                        Some(Value::Integer(row.get_depth() as i32)),
+                    ),
+                    (
+                        String::from(ALLELE_FREQUENCIES),
+                        Some(Value::Float(row.vaf())),
+                    ),
+                ]
+                .into_iter()
+                .collect(),
+            )
             .build();
 
-        // self.writer.write
 
         self.writer.write_variant_record(&self.header, &record)?;
-        // self.writer.write(record.to_string().as_bytes()).unwrap();
-        // info!("{:?}", record);
-        // self.writer.write("\n".to_string().as_bytes())?;
-        // self.writer.flush()?;
         self.id += 1;
-        // let end_position = self.writer.virtual_position();
-
-        // let chunk = Chunk::new(self.start_position, end_position);
-
-        // let reference_sequence_name = record.chromosome().to_string();
-        // let start = noodles_core::Position::try_from(usize::from(record.position()))
-        //     .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
-        // let end = record
-        //     .end()
-        //     .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
-        //     .and_then(|position| {
-        //         noodles_core::Position::try_from(usize::from(position))
-        //             .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
-        //     })?;
-        //
-        // // indexer.add_record(Some((actual_id - 1, start, end, true)), chunk)?;
-        // self.indexer.add_record(&row.contig, start, end, chunk)?;
-        //
-        // // writer.write_record(&header, &record);
-        //
-        // self.start_position = end_position;
+        
         Ok(())
     }
+
     pub fn write_index_finish(&mut self) -> Result<()> {
-        // self.writer.finish();
-        // let mut idx = Indexer::default();
-        // std::mem::swap(&mut idx, &mut self.indexer);
-        // // std::mem::replace(&mut self.indexer, Indexer::default());
-        // let index = idx.build();
         let index_path = format!("{}.tbi", &self.path);
         if Path::new(&index_path).exists() {
             fs::remove_file(&index_path)?;
@@ -260,10 +173,9 @@ impl VariantWritter {
                 if let Err(err) = tabix::write(&index_path, &index) {
                     warn!("Can't write VCF index {index_path} {err}");
                 }
-            },
+            }
             Err(err) => warn!("Can't write VCF index {index_path} {err}"),
         }
-        
 
         Ok(())
     }

+ 117 - 3
src/lib.rs

@@ -23,7 +23,7 @@ mod tests {
     use log::info;
     use noodles_core::{Position, Region};
     use sql::variants_sql::write_annotaded;
-    use variants::AllStats;
+    use variants::{load_all, AllStats};
 
     // export RUST_LOG="debug"
     fn init() {
@@ -42,7 +42,7 @@ mod tests {
 
     #[test]
     fn load_from_vcf() -> Result<()> {
-        let name = "COLLE";
+        let name = "HAMROUNE";
 
         let logger =
             env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
@@ -58,7 +58,7 @@ mod tests {
     #[test]
     fn annot() -> anyhow::Result<()> {
         init();
-        let id = "ROBIN";
+        let id = "LEVASSEUR";
         let stats = AllStats::load_json(&format!(
             "/data/longreads_basic_pipe/{id}/diag/report/data/{id}_variants_stats.json"
         ))
@@ -260,4 +260,118 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn write_somatic_vcf() -> anyhow::Result<()> {
+        // ls -d */ | sed 's/\///g' | paste -sd " " | sed 's/ /", "/g'
+        let ids = [
+            "ACHITE",
+            "ADJAGBA",
+            "ALLEMAN",
+            "ALLEMAND",
+            "ANDREASYAN",
+            "AOUF",
+            "ARM",
+            "ASSELIN",
+            "AUBERT",
+            "BACOU",
+            "BAFFREAU",
+            "BAILLEUL",
+            "BECERRA",
+            "BELARBI",
+            "BENDENMOUN",
+            "BENGUIRAT",
+            "BERNIER",
+            "BOUDJELTHIA",
+            "BRETON",
+            "CALVIGNAC",
+            "CAMARA",
+            "CAMEL",
+            "CAZIER",
+            "CHENU",
+            "COLLE",
+            "CONSIGNY",
+            "DAHAN",
+            "DESLANDES",
+            "DOUYER",
+            "DOUYERE",
+            "ELKLIFI",
+            "FALLE",
+            "FALZONE",
+            "FAVOT",
+            "FELICIE",
+            "FERATI",
+            "FRANIATTE",
+            "GABELLE",
+            "GALLET",
+            "GARAGNON",
+            "GEORGIEV",
+            "GILLOUX",
+            "GLADIEUX",
+            "GRAY",
+            "HAMROUNE",
+            "HATTAB",
+            "HENAUX",
+            "HEORHIICHUK",
+            "HURION",
+            "IQBAL",
+            "JEANSELME",
+            "JOLIVET",
+            "KENNOUCHE",
+            "KHABER",
+            "LAVIDALE",
+            "LEBORGNE",
+            "LEVASSEUR",
+            "MACCAGN",
+            "MADIES",
+            "MANCUS",
+            "MANCUSO",
+            "MASSON",
+            "MEDDAH",
+            "MERY",
+            "MICHELAS",
+            "MIGAUD",
+            "MONVILLE",
+            "MOREAU",
+            "MORIN",
+            "NAJAR",
+            "PARACHINI",
+            "PASSARD",
+            "PAYSAN",
+            "PENN",
+            "RICCO",
+            "RIVOALEN",
+            "ROBIN",
+            "ROZIER",
+            "SABER",
+            "SALICETTO",
+            "SAUTRE",
+            "SCHOLZ",
+            "SPINATO",
+            "TAHIR",
+            "TAOUS",
+            "THETE",
+            "VAUTRIN",
+            "VEILLEUR",
+            "VILI",
+        ];
+
+        let logger =
+            env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info"))
+                .build();
+        let multi = MultiProgress::new();
+        LogWrapper::new(multi.clone(), logger).try_init().unwrap();
+
+        for id in ids {
+            if let std::result::Result::Ok(mut variants) = Variants::from_db(
+                id,
+                &multi,
+            ) {
+                let _ = variants.write_vcf(&format!(
+                    "/data/longreads_basic_pipe/{id}/diag/{id}_variants_somatic.vcf.gz"
+                ));
+            }
+        }
+        Ok(())
+    }
 }

+ 24 - 0
src/sql/variants_sql.rs

@@ -255,6 +255,30 @@ pub fn load_variants_name(name: &str, mp: &MultiProgress) -> Result<Variants> {
     Ok(Variants::from_vec(name.to_string(), mp, data))
 }
 
+pub fn load_db(db_path: &str) -> anyhow::Result<Vec<Variant>> {
+    info!("Loading DB {db_path}");
+    let connection = rusqlite::Connection::open(db_path)?;
+    let mut stmt = connection.prepare(
+        "SELECT * FROM Variants",
+    )?;
+
+    let rows = stmt.query_and_then([], |r| match from_row::<VariantSQL>(r) {
+        std::result::Result::Ok(row) => match Variant::try_from(&row) {
+            std::result::Result::Ok(v) => Ok(v),
+            Err(e) => Err(anyhow!(e)),
+        },
+        Err(e) => Err(anyhow!(e)),
+    })?;
+
+    let mut data = Vec::new();
+    for res in rows {
+        data.push(res?);
+    }
+
+    Ok(data)
+}
+
+
 pub fn load_annotaded(db_path: &str) -> anyhow::Result<String> {
     info!("Loading DB {db_path}");
     let connection = rusqlite::Connection::open(db_path)?;

+ 118 - 4
src/variants.rs

@@ -25,7 +25,7 @@ use crate::{
     rna_seq::find_monoallelics,
     sql::{
         stats_sql::insert_stats,
-        variants_sql::{insert_variants, update_annotations, write_annotaded},
+        variants_sql::{insert_variants, load_db, update_annotations, write_annotaded},
     },
     utils::{
         chi_square_test_for_proportions, count_repetitions, estimate_shannon_entropy,
@@ -210,6 +210,13 @@ impl Variants {
         })
     }
 
+    pub fn from_db(name: &str, mp: &MultiProgress) -> anyhow::Result<Self> {
+        let var = load_db(&format!(
+            "/data/longreads_basic_pipe/{name}/diag/{name}_variants.sqlite"
+        ))?;
+        Ok(Variants::from_vec(name.to_string(), mp, var))
+    }
+
     pub fn vcf_filters(&mut self) {
         let cfg = &self.cfg;
         let pg = self.mp.add(new_pg_speed(self.data.len() as u64));
@@ -365,8 +372,11 @@ impl Variants {
                             ));
                         } else {
                             // Check local diversity
-                            let start =
-                                Position::try_from((tumoral.position - 20) as usize).unwrap();
+                            let start = if tumoral.position > 21 {
+                                Position::try_from((tumoral.position - 20) as usize).unwrap()
+                            } else {
+                                Position::try_from(1_usize).unwrap()
+                            };
                             let end = Position::try_from((tumoral.position + 19) as usize).unwrap();
                             let r = Region::new(tumoral.contig.to_string(), start..=end);
                             if let std::result::Result::Ok(reg) = genome_reader.query(&r) {
@@ -570,6 +580,28 @@ impl Variants {
         Ok(())
     }
 
+    pub fn write_vcf(&mut self, path: &str) -> Result<()> {
+        info!("Writing VCF {}", path);
+
+        let mut to_write = sort_variants(self.data.clone(), &self.cfg.dict_file)?;
+        if to_write.is_empty() {
+            warn!("No variants to write");
+            return Ok(());
+        }
+        let pg = self.mp.add(new_pg_speed(to_write.len() as u64));
+        pg.set_message("Writing VCF");
+
+        let mut w = VariantWritter::new(path, &self.cfg.dict_file)?;
+        for row in to_write.iter_mut() {
+            w.write_variant(row)
+                .context(format!("Error writing VCF row {:#?}", row))?;
+            pg.inc(1);
+        }
+        w.write_index_finish()
+            .context(format!("Can't write index for {}", path))?;
+        Ok(())
+    }
+
     /// Keep variants annotated Somatic
     pub fn keep_somatics_un(&mut self) {
         let pg = self.mp.add(new_pg_speed(self.data.len() as u64));
@@ -2016,6 +2048,15 @@ pub fn run_pipe(id: &str, force: bool, multi: &MultiProgress) -> Result<()> {
         "{}/{id}/diag/RNAseq/{id}_monoallelics.tsv",
         cfg.longreads_results_dir
     );
+    let somatic_vcf = format!(
+        "{}/{id}/diag/{id}_variants_somatic.vcf.gz",
+        cfg.longreads_results_dir
+    );
+
+    let constit_vcf = format!(
+        "{}/{id}/diag/{id}_variants_constit.vcf.gz",
+        cfg.longreads_results_dir
+    );
 
     let report_data_dir = format!("{}/{id}/diag/report/data", cfg.longreads_results_dir);
     if !std::path::Path::new(&report_data_dir).exists() {
@@ -2074,7 +2115,7 @@ pub fn run_pipe(id: &str, force: bool, multi: &MultiProgress) -> Result<()> {
     variants.bam_filters(&mrd_bam);
 
     let constits = variants.get_cat(&VariantCategory::Constit);
-    let constits = Variants::from_vec(id.to_string(), multi, constits);
+    let mut constits = Variants::from_vec(id.to_string(), multi, constits);
 
     if std::path::Path::new(&rna_bam).exists() {
         constits
@@ -2083,6 +2124,7 @@ pub fn run_pipe(id: &str, force: bool, multi: &MultiProgress) -> Result<()> {
     }
 
     constits.save_bytes(&bytes_constit_path)?;
+    constits.write_vcf(&constit_vcf)?;
     variants.keep_somatics_un();
     info!("Variants retained: {}", variants.len());
 
@@ -2105,6 +2147,7 @@ pub fn run_pipe(id: &str, force: bool, multi: &MultiProgress) -> Result<()> {
     variants.filter_snp()?;
 
     variants.save_bytes(&bytes_path)?;
+    variants.write_vcf(&somatic_vcf)?;
     let all_stats = variants
         .stats_json(&stats_path)
         .context("Can't write stats")?;
@@ -2206,3 +2249,74 @@ fn write_dashmap_to_tsv(
     writer.flush()?;
     Ok(())
 }
+
+pub fn load_all(id: &str, multi: &MultiProgress) -> Result<Variants> {
+    let cfg = config::Config::get()?;
+    let deepvariant_diag_vcf = format!(
+        "{}/{id}/diag/DeepVariant/{id}_diag_DeepVariant_PASSED.vcf.gz",
+        cfg.longreads_results_dir
+    );
+    if !std::path::Path::new(&deepvariant_diag_vcf).exists() {
+        return Err(anyhow!("{deepvariant_diag_vcf} is required"));
+        // panic!("{deepvariant_diag_vcf} is required")
+    }
+    let deepvariant_mrd_vcf = format!(
+        "{}/{id}/mrd/DeepVariant/{id}_mrd_DeepVariant_PASSED.vcf.gz",
+        cfg.longreads_results_dir
+    );
+    if !std::path::Path::new(&deepvariant_mrd_vcf).exists() {
+        return Err(anyhow!("{deepvariant_mrd_vcf} is required"));
+    }
+    let mrd_bam = format!("{}/{id}/mrd/{id}_mrd_hs1.bam", cfg.longreads_results_dir);
+    if !std::path::Path::new(&mrd_bam).exists() {
+        return Err(anyhow!("{mrd_bam} is required"));
+    }
+    let clairs_vcf = format!(
+        "{}/{id}/diag/ClairS/{id}_diag_clairs_PASSED.vcf.gz",
+        cfg.longreads_results_dir
+    );
+    if !std::path::Path::new(&clairs_vcf).exists() {
+        return Err(anyhow!("{clairs_vcf} is required"));
+    }
+    let clairs_indels_vcf = format!(
+        "{}/{id}/diag/ClairS/{id}_diag_clairs_indel_PASSED.vcf.gz",
+        cfg.longreads_results_dir
+    );
+    if !std::path::Path::new(&clairs_indels_vcf).exists() {
+        return Err(anyhow!("{clairs_indels_vcf} is required"));
+    }
+
+    let nanomonsv_vcf = format!(
+        "{}/{id}/diag/nanomonsv/{id}_diag_nanomonsv_PASSED.vcf.gz",
+        cfg.longreads_results_dir
+    );
+    if !std::path::Path::new(&nanomonsv_vcf).exists() {
+        return Err(anyhow!("{nanomonsv_vcf} is required"));
+    }
+
+    let sources = vec![
+        (
+            deepvariant_diag_vcf.as_str(),
+            &VCFSource::DeepVariant,
+            &VariantType::Somatic,
+        ),
+        (
+            deepvariant_mrd_vcf.as_str(),
+            &VCFSource::DeepVariant,
+            &VariantType::Constitutionnal,
+        ),
+        (
+            clairs_vcf.as_str(),
+            &VCFSource::ClairS,
+            &VariantType::Somatic,
+        ),
+        (
+            nanomonsv_vcf.as_str(),
+            &VCFSource::Nanomonsv,
+            &VariantType::Somatic,
+        ),
+    ];
+    let mut variants = Variants::from_vcfs(id.to_string(), sources, &cfg, multi.clone())?;
+    variants.merge();
+    Ok(variants)
+}