| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475 |
- use rayon::prelude::*;
- pub fn build_suffix_array(genome: &[u8]) -> Vec<usize> {
- let n = genome.len();
- if n == 0 {
- return Vec::new();
- }
- let mut sa: Vec<usize> = (0..n).collect();
- let mut rank: Vec<i64> = genome.iter().map(|&b| b as i64).collect();
- let mut tmp = vec![0i64; n];
- let mut gap = 1usize;
- while gap < n {
- // ── Sort — main bottleneck, worth parallelising ───────────────────
- sa.par_sort_unstable_by(|&a, &b| {
- let second = |i: usize| if i + gap < n { rank[i + gap] } else { -1 };
- (rank[a], second(a)).cmp(&(rank[b], second(b)))
- });
- // ── Compute key-change flags in parallel ──────────────────────────
- let differs: Vec<bool> = (1..n)
- .into_par_iter()
- .map(|i| {
- let second = |i: usize| if i + gap < n { rank[i + gap] } else { -1i64 };
- let prev = sa[i - 1];
- let cur = sa[i];
- (rank[prev], second(prev)) != (rank[cur], second(cur))
- })
- .collect();
- // ── Prefix sum — sequential, O(n), cache-friendly ────────────────
- tmp[sa[0]] = 0;
- for i in 1..n {
- tmp[sa[i]] = tmp[sa[i - 1]] + if differs[i - 1] { 1 } else { 0 };
- }
- rank.copy_from_slice(&tmp);
- if rank[sa[n - 1]] == (n - 1) as i64 {
- break;
- }
- gap *= 2;
- }
- sa
- }
- pub fn build_lcp_kasai(genome: &[u8], sa: &[usize]) -> Vec<usize> {
- let n = genome.len();
- let mut rank = vec![0usize; n];
- for (i, &s) in sa.iter().enumerate() {
- rank[s] = i;
- }
- let mut lcp = vec![0usize; n];
- let mut h = 0usize;
- for p in 0..n {
- let r = rank[p];
- if r == 0 {
- h = 0;
- continue;
- }
- let prev = sa[r - 1];
- while p + h < n && prev + h < n && genome[p + h] == genome[prev + h] {
- h += 1;
- }
- lcp[r] = h;
- if h > 0 {
- h -= 1;
- }
- }
- lcp
- }
|