Browse Source

phase qual

Thomas 1 year ago
parent
commit
02cf251a4d
3 changed files with 43 additions and 26 deletions
  1. 11 10
      Cargo.lock
  2. 1 1
      src/lib.rs
  3. 31 15
      src/phase.rs

+ 11 - 10
Cargo.lock

@@ -265,9 +265,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.13"
+version = "4.5.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc"
+checksum = "c937d4061031a6d0c8da4b9a4f98a172fc2976dfb1c19213a9cf7d0d3c837e36"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -275,9 +275,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.13"
+version = "4.5.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99"
+checksum = "85379ba512b21a328adf887e85f7742d12e96eb31f3ef077df4ffc26b506ffed"
 dependencies = [
  "anstream",
  "anstyle",
@@ -1309,7 +1309,7 @@ checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
 [[package]]
 name = "pandora_lib_pileup"
 version = "0.1.0"
-source = "git+https://git.t0m4.fr/Thomas/pandora_lib_pileup.git#e5408e038ff66c8c95e131ef34f89f6199e0799c"
+source = "git+https://git.t0m4.fr/Thomas/pandora_lib_pileup.git#896ce97f874980cdcc22f3c477454e714363caf3"
 dependencies = [
  "anyhow",
  "average",
@@ -1345,7 +1345,7 @@ dependencies = [
 [[package]]
 name = "pandora_lib_variants"
 version = "0.1.0"
-source = "git+https://git.t0m4.fr/Thomas/pandora_lib_variants.git#a49815478eac792537b131854bd0b059afcb163f"
+source = "git+https://git.t0m4.fr/Thomas/pandora_lib_variants.git#eaefae4807aadf2753973870a555a309176fdf5c"
 dependencies = [
  "anyhow",
  "bgzip",
@@ -1369,6 +1369,7 @@ dependencies = [
  "noodles-tabix",
  "noodles-vcf",
  "num-integer",
+ "pandora_lib_pileup",
  "pot",
  "prettytable-rs",
  "rayon",
@@ -1740,18 +1741,18 @@ checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b"
 
 [[package]]
 name = "serde"
-version = "1.0.204"
+version = "1.0.205"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12"
+checksum = "e33aedb1a7135da52b7c21791455563facbbcc43d0f0f66165b42c21b3dfb150"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.204"
+version = "1.0.205"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222"
+checksum = "692d6f5ac90220161d6774db30c662202721e64aed9058d2c394f451261420c1"
 dependencies = [
  "proc-macro2",
  "quote",

+ 1 - 1
src/lib.rs

@@ -317,7 +317,7 @@ mod tests {
         let multi = MultiProgress::new();
         LogWrapper::new(multi.clone(), logger).try_init().unwrap();
 
-        let config = PhaserConfig::new(id, "/data/longreads_basic_pipe", min_records);
+        let config = PhaserConfig::new(id, "/data/longreads_basic_pipe", min_records, 0.33);
         phase(config, multi)
     }
 

+ 31 - 15
src/phase.rs

@@ -40,7 +40,7 @@ impl HeteroVar {
         alternative: u8,
     ) -> Result<(HeteroVar, HeteroVar)> {
         let mut rec_base = if let std::result::Result::Ok(rb) =
-            pandora_lib_pileup::qnames_at_base(bam_a, chr, position, false)
+            pandora_lib_pileup::qnames_at_base(bam_a, chr, position, false, 50)
         {
             rb
         } else {
@@ -48,7 +48,7 @@ impl HeteroVar {
         };
 
         if let std::result::Result::Ok(rb) =
-            pandora_lib_pileup::qnames_at_base(bam_b, chr, position, false)
+            pandora_lib_pileup::qnames_at_base(bam_b, chr, position, false, 50)
         {
             rec_base.extend(rb);
         } else {
@@ -234,12 +234,17 @@ impl Phase {
     }
 
     pub fn bed_string(&self) -> Option<String> {
-        if let (Some((min, _min_cov, _max_cov, max)), Some(id)) = (self.range, self.id()) {
+        // if let (Some((_min, min_cov, max_cov, _max)), Some(id)) = (self.range, self.id()) {
+        if let Some(id) = self.id() {
+            let f = self.data.first().unwrap();
+            let chr = f.chr.clone();
+            let start = f.position;
+            let end = self.data.last().unwrap().position;
             Some(
                 [
-                    self.data.first().unwrap().chr.to_string(),
-                    min.to_string(),
-                    max.to_string(),
+                    chr,
+                    start.to_string(),
+                    end.to_string(),
                     format!("{} {:.2}", id, self.mean_vaf()),
                 ]
                 .join("\t"),
@@ -416,15 +421,21 @@ pub fn variants_phasing(
         .par_chunks(phases.len() / 75)
         .flat_map(|chunks| merge_phases(chunks.to_vec(), min_records, 5, multi).unwrap())
         .collect();
-    phases.sort();
-    let phases: Vec<Phase> = phases
+    phases.par_sort();
+    info!("{} phases", phases.len());
+
+    let mut phases: Vec<Phase> = phases
         .par_chunks(phases.len() / 50)
         .flat_map(|chunks| merge_phases(chunks.to_vec(), min_records, 100, multi).unwrap())
         .collect();
+    phases.par_sort();
+    info!("{} phases", phases.len());
+
     let mut phases: Vec<Phase> = phases
         .par_chunks(phases.len() / 25)
         .flat_map(|chunks| merge_phases(chunks.to_vec(), min_records, 1000, multi).unwrap())
         .collect();
+    info!("{} phases", phases.len());
 
     let chunk_size = phases.len() / 25;
     phases.par_chunks_mut(chunk_size).for_each(|chunk| {
@@ -521,10 +532,11 @@ pub struct PhaserConfig {
     pub const_variants: String,
     pub phases_dir: String,
     pub min_records: usize,
+    pub around_hetero: f32,
 }
 
 impl PhaserConfig {
-    pub fn new(id: &str, data_dir: &str, min_records: usize) -> Self {
+    pub fn new(id: &str, data_dir: &str, min_records: usize, around_hetero: f32) -> Self {
         let bam_path_a = format!("{data_dir}/{id}/diag/{id}_diag_hs1.bam");
         let bam_path_b = format!("{data_dir}/{id}/mrd/{id}_mrd_hs1.bam");
         let const_variants = format!("{data_dir}/{id}/diag/{id}_constit.bytes.gz");
@@ -536,6 +548,7 @@ impl PhaserConfig {
             const_variants,
             phases_dir,
             min_records,
+            around_hetero,
         }
     }
 }
@@ -555,37 +568,38 @@ pub fn phase(config: PhaserConfig, progress: MultiProgress) -> anyhow::Result<()
         const_variants,
         phases_dir,
         min_records,
+        around_hetero,
     } = config;
 
     fs::create_dir_all(&phases_dir)?;
 
+    // Loading variants
     let variants = pandora_lib_variants::variants::Variants::new_from_bytes(
         &id,
         &const_variants,
         progress.clone(),
     )?;
-
     info!(
         "{} variants loaded",
         variants.len().to_formatted_string(&format)
     );
 
+    // Filtering variants for heterozigous SNPs
     let mut variants: Vec<Variant> = variants
         .data
         .into_par_iter()
         .filter(|v| {
             let mut v = v.clone();
-            v.vaf() > 0.4 && v.vaf() < 0.6
+            v.vaf() > (0.5 - around_hetero) && v.vaf() < (0.5 + around_hetero)
         })
         .filter(|v| matches!(v.alt_cat(), AlterationCategory::Snv))
         .collect();
-
     let mut contigs = HashSet::new();
     variants.iter().for_each(|v| {
         contigs.insert(v.contig.to_string());
     });
-
     variants.par_sort_by(|a, b| a.position.cmp(&b.position));
+    info!("{} heterozigous SNPs considered", variants.len());
 
     let dict = read_dict("/data/ref/hs1/chm13v2.0.dict")?;
     for (contig, _) in dict {
@@ -598,14 +612,16 @@ pub fn phase(config: PhaserConfig, progress: MultiProgress) -> anyhow::Result<()
             .filter(|v| v.contig == contig)
             .collect();
         if variants.len() > 1 {
-            info!("{contig}: {} variants to phase", v.len());
+            info!("{contig}: {} SNPs to phase", v.len());
             let phases = variants_phasing(v, &bam_path_a, &bam_path_b, min_records, &progress);
             if !phases.is_empty() {
                 save_phases(
                     &phases,
                     &format!("{phases_dir}/{id}_{contig}_phases.postcard.gz"),
                 )?;
-                write_phases_bed(&phases, &format!("{phases_dir}/{id}_{contig}.bed"))?;
+                let bed_path = format!("{phases_dir}/{id}_{contig}.bed");
+                info!("Writting bed file {bed_path}");
+                write_phases_bed(&phases, &bed_path)?;
             }
         }
     }