|
@@ -696,6 +696,41 @@ pub fn overlaps_par(positions: &[&GenomePosition], ranges: &[&GenomeRange]) -> V
|
|
|
// .collect()
|
|
// .collect()
|
|
|
// }
|
|
// }
|
|
|
|
|
|
|
|
|
|
+/// Returns every region that **overlaps** between two sorted
|
|
|
|
|
+/// slices of genome ranges, distributing the work across
|
|
|
|
|
+/// threads with **Rayon**.
|
|
|
|
|
+///
|
|
|
|
|
+/// Both inputs **must**
|
|
|
|
|
+/// - be sorted by `(contig, start)`
|
|
|
|
|
+/// - contain non-overlapping, half-open ranges (`start < end`)
|
|
|
|
|
+/// - use the same coordinate system.
|
|
|
|
|
+///
|
|
|
|
|
+/// # Parameters
|
|
|
|
|
+/// * `a` – Slice of references to `GenomeRange`.
|
|
|
|
|
+/// * `b` – Slice of references to `GenomeRange`.
|
|
|
|
|
+///
|
|
|
|
|
+/// # Returns
|
|
|
|
|
+/// A `Vec<GenomeRange>` containing one entry for each pairwise
|
|
|
|
|
+/// intersection.
|
|
|
|
|
+/// The output is *not* guaranteed to be sorted.
|
|
|
|
|
+///
|
|
|
|
|
+/// # Complexity
|
|
|
|
|
+/// `O(|a| + |b|)` total work; memory ≤ the number of produced
|
|
|
|
|
+/// intersections. Work is split per contig and executed in
|
|
|
|
|
+/// parallel.
|
|
|
|
|
+///
|
|
|
|
|
+/// # Panics
|
|
|
|
|
+/// Never panics if the pre-conditions above are met.
|
|
|
|
|
+///
|
|
|
|
|
+/// # Example
|
|
|
|
|
+/// ```
|
|
|
|
|
+/// # use pandora_lib_promethion::{GenomeRange, range_intersection_par};
|
|
|
|
|
+/// let a_range = GenomeRange { contig: 1, range: 100..200 };
|
|
|
|
|
+/// let b_range = GenomeRange { contig: 1, range: 150..250 };
|
|
|
|
|
+///
|
|
|
|
|
+/// let out = range_intersection_par(&[&a_range], &[&b_range]);
|
|
|
|
|
+/// assert_eq!(out, vec![GenomeRange { contig: 1, range: 150..200 }]);
|
|
|
|
|
+/// ```
|
|
|
pub fn range_intersection_par(a: &[&GenomeRange], b: &[&GenomeRange]) -> Vec<GenomeRange> {
|
|
pub fn range_intersection_par(a: &[&GenomeRange], b: &[&GenomeRange]) -> Vec<GenomeRange> {
|
|
|
let (a_contigs, b_contigs) = rayon::join(
|
|
let (a_contigs, b_contigs) = rayon::join(
|
|
|
|| extract_contig_indices(a),
|
|
|| extract_contig_indices(a),
|
|
@@ -745,7 +780,7 @@ pub fn range_intersection_par(a: &[&GenomeRange], b: &[&GenomeRange]) -> Vec<Gen
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// Helper to create (contig, start index, end index) tuples
|
|
// Helper to create (contig, start index, end index) tuples
|
|
|
-fn extract_contig_indices(ranges: &[&GenomeRange]) -> Vec<(u8, usize, usize)> {
|
|
|
|
|
|
|
+pub fn extract_contig_indices(ranges: &[&GenomeRange]) -> Vec<(u8, usize, usize)> {
|
|
|
let mut indices = Vec::new();
|
|
let mut indices = Vec::new();
|
|
|
let mut current_contig = None;
|
|
let mut current_contig = None;
|
|
|
let mut start_idx = 0;
|
|
let mut start_idx = 0;
|
|
@@ -768,7 +803,7 @@ fn extract_contig_indices(ranges: &[&GenomeRange]) -> Vec<(u8, usize, usize)> {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// Binary search to find contig indices in precomputed list
|
|
// Binary search to find contig indices in precomputed list
|
|
|
-fn find_contig_indices(contigs: &[(u8, usize, usize)], target: u8) -> Option<(usize, usize)> {
|
|
|
|
|
|
|
+pub fn find_contig_indices(contigs: &[(u8, usize, usize)], target: u8) -> Option<(usize, usize)> {
|
|
|
contigs.binary_search_by(|(c, _, _)| c.cmp(&target))
|
|
contigs.binary_search_by(|(c, _, _)| c.cmp(&target))
|
|
|
.ok()
|
|
.ok()
|
|
|
.map(|idx| (contigs[idx].1, contigs[idx].2))
|
|
.map(|idx| (contigs[idx].1, contigs[idx].2))
|