suffix.rs 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. use rayon::prelude::*;
  2. pub fn build_suffix_array(genome: &[u8]) -> Vec<usize> {
  3. let n = genome.len();
  4. if n == 0 {
  5. return Vec::new();
  6. }
  7. let mut sa: Vec<usize> = (0..n).collect();
  8. let mut rank: Vec<i64> = genome.iter().map(|&b| b as i64).collect();
  9. let mut tmp = vec![0i64; n];
  10. let mut gap = 1usize;
  11. while gap < n {
  12. // ── Sort — main bottleneck, worth parallelising ───────────────────
  13. sa.par_sort_unstable_by(|&a, &b| {
  14. let second = |i: usize| if i + gap < n { rank[i + gap] } else { -1 };
  15. (rank[a], second(a)).cmp(&(rank[b], second(b)))
  16. });
  17. // ── Compute key-change flags in parallel ──────────────────────────
  18. let differs: Vec<bool> = (1..n)
  19. .into_par_iter()
  20. .map(|i| {
  21. let second = |i: usize| if i + gap < n { rank[i + gap] } else { -1i64 };
  22. let prev = sa[i - 1];
  23. let cur = sa[i];
  24. (rank[prev], second(prev)) != (rank[cur], second(cur))
  25. })
  26. .collect();
  27. // ── Prefix sum — sequential, O(n), cache-friendly ────────────────
  28. tmp[sa[0]] = 0;
  29. for i in 1..n {
  30. tmp[sa[i]] = tmp[sa[i - 1]] + if differs[i - 1] { 1 } else { 0 };
  31. }
  32. rank.copy_from_slice(&tmp);
  33. if rank[sa[n - 1]] == (n - 1) as i64 {
  34. break;
  35. }
  36. gap *= 2;
  37. }
  38. sa
  39. }
  40. pub fn build_lcp_kasai(genome: &[u8], sa: &[usize]) -> Vec<usize> {
  41. let n = genome.len();
  42. let mut rank = vec![0usize; n];
  43. for (i, &s) in sa.iter().enumerate() {
  44. rank[s] = i;
  45. }
  46. let mut lcp = vec![0usize; n];
  47. let mut h = 0usize;
  48. for p in 0..n {
  49. let r = rank[p];
  50. if r == 0 {
  51. h = 0;
  52. continue;
  53. }
  54. let prev = sa[r - 1];
  55. while p + h < n && prev + h < n && genome[p + h] == genome[prev + h] {
  56. h += 1;
  57. }
  58. lcp[r] = h;
  59. if h > 0 {
  60. h -= 1;
  61. }
  62. }
  63. lcp
  64. }