|
|
@@ -40,7 +40,7 @@ impl HeteroVar {
|
|
|
alternative: u8,
|
|
|
) -> Result<(HeteroVar, HeteroVar)> {
|
|
|
let mut rec_base = if let std::result::Result::Ok(rb) =
|
|
|
- pandora_lib_pileup::qnames_at_base(bam_a, chr, position, false)
|
|
|
+ pandora_lib_pileup::qnames_at_base(bam_a, chr, position, false, 50)
|
|
|
{
|
|
|
rb
|
|
|
} else {
|
|
|
@@ -48,7 +48,7 @@ impl HeteroVar {
|
|
|
};
|
|
|
|
|
|
if let std::result::Result::Ok(rb) =
|
|
|
- pandora_lib_pileup::qnames_at_base(bam_b, chr, position, false)
|
|
|
+ pandora_lib_pileup::qnames_at_base(bam_b, chr, position, false, 50)
|
|
|
{
|
|
|
rec_base.extend(rb);
|
|
|
} else {
|
|
|
@@ -234,12 +234,17 @@ impl Phase {
|
|
|
}
|
|
|
|
|
|
pub fn bed_string(&self) -> Option<String> {
|
|
|
- if let (Some((min, _min_cov, _max_cov, max)), Some(id)) = (self.range, self.id()) {
|
|
|
+ // if let (Some((_min, min_cov, max_cov, _max)), Some(id)) = (self.range, self.id()) {
|
|
|
+ if let Some(id) = self.id() {
|
|
|
+ let f = self.data.first().unwrap();
|
|
|
+ let chr = f.chr.clone();
|
|
|
+ let start = f.position;
|
|
|
+ let end = self.data.last().unwrap().position;
|
|
|
Some(
|
|
|
[
|
|
|
- self.data.first().unwrap().chr.to_string(),
|
|
|
- min.to_string(),
|
|
|
- max.to_string(),
|
|
|
+ chr,
|
|
|
+ start.to_string(),
|
|
|
+ end.to_string(),
|
|
|
format!("{} {:.2}", id, self.mean_vaf()),
|
|
|
]
|
|
|
.join("\t"),
|
|
|
@@ -416,15 +421,21 @@ pub fn variants_phasing(
|
|
|
.par_chunks(phases.len() / 75)
|
|
|
.flat_map(|chunks| merge_phases(chunks.to_vec(), min_records, 5, multi).unwrap())
|
|
|
.collect();
|
|
|
- phases.sort();
|
|
|
- let phases: Vec<Phase> = phases
|
|
|
+ phases.par_sort();
|
|
|
+ info!("{} phases", phases.len());
|
|
|
+
|
|
|
+ let mut phases: Vec<Phase> = phases
|
|
|
.par_chunks(phases.len() / 50)
|
|
|
.flat_map(|chunks| merge_phases(chunks.to_vec(), min_records, 100, multi).unwrap())
|
|
|
.collect();
|
|
|
+ phases.par_sort();
|
|
|
+ info!("{} phases", phases.len());
|
|
|
+
|
|
|
let mut phases: Vec<Phase> = phases
|
|
|
.par_chunks(phases.len() / 25)
|
|
|
.flat_map(|chunks| merge_phases(chunks.to_vec(), min_records, 1000, multi).unwrap())
|
|
|
.collect();
|
|
|
+ info!("{} phases", phases.len());
|
|
|
|
|
|
let chunk_size = phases.len() / 25;
|
|
|
phases.par_chunks_mut(chunk_size).for_each(|chunk| {
|
|
|
@@ -521,10 +532,11 @@ pub struct PhaserConfig {
|
|
|
pub const_variants: String,
|
|
|
pub phases_dir: String,
|
|
|
pub min_records: usize,
|
|
|
+ pub around_hetero: f32,
|
|
|
}
|
|
|
|
|
|
impl PhaserConfig {
|
|
|
- pub fn new(id: &str, data_dir: &str, min_records: usize) -> Self {
|
|
|
+ pub fn new(id: &str, data_dir: &str, min_records: usize, around_hetero: f32) -> Self {
|
|
|
let bam_path_a = format!("{data_dir}/{id}/diag/{id}_diag_hs1.bam");
|
|
|
let bam_path_b = format!("{data_dir}/{id}/mrd/{id}_mrd_hs1.bam");
|
|
|
let const_variants = format!("{data_dir}/{id}/diag/{id}_constit.bytes.gz");
|
|
|
@@ -536,6 +548,7 @@ impl PhaserConfig {
|
|
|
const_variants,
|
|
|
phases_dir,
|
|
|
min_records,
|
|
|
+ around_hetero,
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
@@ -555,37 +568,38 @@ pub fn phase(config: PhaserConfig, progress: MultiProgress) -> anyhow::Result<()
|
|
|
const_variants,
|
|
|
phases_dir,
|
|
|
min_records,
|
|
|
+ around_hetero,
|
|
|
} = config;
|
|
|
|
|
|
fs::create_dir_all(&phases_dir)?;
|
|
|
|
|
|
+ // Loading variants
|
|
|
let variants = pandora_lib_variants::variants::Variants::new_from_bytes(
|
|
|
&id,
|
|
|
&const_variants,
|
|
|
progress.clone(),
|
|
|
)?;
|
|
|
-
|
|
|
info!(
|
|
|
"{} variants loaded",
|
|
|
variants.len().to_formatted_string(&format)
|
|
|
);
|
|
|
|
|
|
+ // Filtering variants for heterozigous SNPs
|
|
|
let mut variants: Vec<Variant> = variants
|
|
|
.data
|
|
|
.into_par_iter()
|
|
|
.filter(|v| {
|
|
|
let mut v = v.clone();
|
|
|
- v.vaf() > 0.4 && v.vaf() < 0.6
|
|
|
+ v.vaf() > (0.5 - around_hetero) && v.vaf() < (0.5 + around_hetero)
|
|
|
})
|
|
|
.filter(|v| matches!(v.alt_cat(), AlterationCategory::Snv))
|
|
|
.collect();
|
|
|
-
|
|
|
let mut contigs = HashSet::new();
|
|
|
variants.iter().for_each(|v| {
|
|
|
contigs.insert(v.contig.to_string());
|
|
|
});
|
|
|
-
|
|
|
variants.par_sort_by(|a, b| a.position.cmp(&b.position));
|
|
|
+ info!("{} heterozigous SNPs considered", variants.len());
|
|
|
|
|
|
let dict = read_dict("/data/ref/hs1/chm13v2.0.dict")?;
|
|
|
for (contig, _) in dict {
|
|
|
@@ -598,14 +612,16 @@ pub fn phase(config: PhaserConfig, progress: MultiProgress) -> anyhow::Result<()
|
|
|
.filter(|v| v.contig == contig)
|
|
|
.collect();
|
|
|
if variants.len() > 1 {
|
|
|
- info!("{contig}: {} variants to phase", v.len());
|
|
|
+ info!("{contig}: {} SNPs to phase", v.len());
|
|
|
let phases = variants_phasing(v, &bam_path_a, &bam_path_b, min_records, &progress);
|
|
|
if !phases.is_empty() {
|
|
|
save_phases(
|
|
|
&phases,
|
|
|
&format!("{phases_dir}/{id}_{contig}_phases.postcard.gz"),
|
|
|
)?;
|
|
|
- write_phases_bed(&phases, &format!("{phases_dir}/{id}_{contig}.bed"))?;
|
|
|
+ let bed_path = format!("{phases_dir}/{id}_{contig}.bed");
|
|
|
+ info!("Writting bed file {bed_path}");
|
|
|
+ write_phases_bed(&phases, &bed_path)?;
|
|
|
}
|
|
|
}
|
|
|
}
|