index.ts 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. import { spawn } from 'child_process';
  2. const async_exec = (prog: string, args: string[], onData: Function, onErr: Function) => {
  3. return new Promise((resolve, reject) => {
  4. const child = spawn(prog, args, {shell: true})
  5. child.stdout.on('data', data => onData(data.toString().trim()))
  6. child.stderr.on('data', data => onErr(data.toString().trim()))
  7. child.on('error', err => reject(err))
  8. child.on('exit', code => resolve(code))
  9. })
  10. }
  11. const diversitySeq = (Seq:string) => {
  12. return Seq.split('').reduce((prev, _curr, id, array) => {
  13. if (id != 0 && array[id] !== array[id-1]) {
  14. return prev + (1/array.length)
  15. } else {
  16. return prev
  17. }
  18. }, 0)
  19. }
  20. const annotateSeq = async (
  21. seq: string,
  22. blastDB: string,
  23. maxBlast = 100,
  24. minDiversity = 0.1,
  25. blastnPath = 'blastn',
  26. ) => {
  27. return new Promise<any>(async (resolve, reject) => {
  28. try {
  29. let results = ''
  30. interface sequence {
  31. sequence: string,
  32. blastn?: any
  33. }
  34. let sequence: sequence = {sequence: seq}
  35. if (sequence.sequence) {
  36. if (diversitySeq(sequence.sequence) > minDiversity) {
  37. const sequenceStr = '\'>GG\\n' + sequence.sequence + '\''
  38. await async_exec('echo', [sequenceStr, '|',
  39. blastnPath, '-db', blastDB, '-query', '-', '-outfmt', '6', '-max_target_seqs', '100'], (m:string) => results += m, console.log)
  40. if(results !== '') {
  41. //https://www.metagenomics.wiki/tools/blast/blastn-output-format-6
  42. const keys = [/*'qseqid',*/ 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore']
  43. results.split('\n')
  44. .map((it, index) => sequence.blastn = [
  45. ...(sequence.blastn || []),
  46. {index, ...it.split('\t')
  47. .slice(1)
  48. .reduce((a, v, i) => ({ ...a, [keys[i]]: isNaN(parseInt(v)) ? v : parseInt(v)}), {})}
  49. ].splice(0,maxBlast))
  50. if(sequence.blastn.length === 0) {
  51. throw 'Blastn results parsing failed'
  52. }
  53. } else {
  54. // console.log('WARNING NO BLASTN RESULT', ['echo', '-e', ,'\'' + sequenceStr + '\'', '|',
  55. // blastn, '-db', blastDB, '-query', '-', '-outfmt', '6', '-max_target_seqs', '100'].join(' '))
  56. throw 'No blastn hit'
  57. }
  58. } else { throw 'Sequence diversity < ' + minDiversity }
  59. } else { throw 'No sequence' }
  60. resolve(sequence)
  61. } catch (error) {
  62. reject(error)
  63. }
  64. })
  65. }
  66. const transpose = (matrix: any) => matrix.reduce(($:any, row:any) => row.map((_:any, i:any) => [...($[i] || []), row[i]]), [])
  67. const whichMax = (arr:any) => arr.flatMap((v:any, i:any) => v === Math.max(...arr) ? i : [])
  68. const getBlastRepr = async (args: any) => {
  69. const {sequence, dbs} = args
  70. try {
  71. let all_blastn:any = []
  72. for (const cdb of dbs) {
  73. let res = []
  74. try {
  75. res = await annotateSeq(sequence, cdb)
  76. if (res.blastn.length > 0) all_blastn = [...all_blastn, ...res.blastn].map((v,i) => {return {...v, index: i + 1}})
  77. } catch (e) {}
  78. }
  79. const indiv_match = all_blastn.map((blastn:any) => {
  80. const {start, end} = blastn.qstart <= blastn.qend ? {start: blastn.qstart, end: blastn.qend} : {end: blastn.qstart, start: blastn.qend}
  81. return sequence.split('').map((_:any,i:any) => ((i + 1) >= start && (i + 1) <= end) ? '|' : '_').join('')
  82. })
  83. const bestRepr = transpose(indiv_match.map((v:any) => v.split(''))).map((v:any) => {
  84. const tmp = v.map((c:any,i:any) => {
  85. if (c === '|') {
  86. return all_blastn[i].length
  87. } else {
  88. return 0
  89. }
  90. })
  91. if (Math.max(...tmp) === 0 ) {
  92. return 0
  93. } else {
  94. return whichMax(tmp)[0] + 1
  95. }
  96. })
  97. let bestReprRed: any = []
  98. let n = 0
  99. let start = 0
  100. bestRepr.reduce((p:any,c:any,i:any) => {
  101. if (p !== c ) {
  102. const name = p === 0 ? 'unknown' : all_blastn.filter((v:any) => v.index === p)[0].sseqid + ":" + all_blastn.filter((v:any) => v.index === p)[0].sstart + '-' + all_blastn.filter((v:any) => v.index === p)[0].send
  103. bestReprRed.push({name, n, start, end:i})
  104. start = (i+1)
  105. n = 0
  106. }
  107. n++
  108. if (i === (bestRepr.length - 1)) {
  109. const name = c === 0 ? 'unknown' : all_blastn.filter((v:any) => v.index === c)[0].sseqid + ":" + all_blastn.filter((v:any) => v.index === c)[0].sstart + '-' + all_blastn.filter((v:any) => v.index === c)[0].send
  110. bestReprRed.push({name, n, start, end:i+1})
  111. }
  112. return c
  113. })
  114. const sup = [sequence, ...indiv_match, bestRepr.join('')]
  115. return {short: bestReprRed.flatMap((ee:any) => ee.name + "{" + ee.n + "}").join("<>"), all_blastn, sup, bestReprRed}
  116. } catch (error) {
  117. console.log(error);
  118. return 1
  119. }
  120. }
  121. export { getBlastRepr }
  122. /*(async()=>{
  123. const sequence = 'ATCTTCACCACGAACTGCTGCTTGCTCGCTTGCTCCTCAGTCCTAGCTTCATCAAACACTGGTTCCTGGAATCCTGTCTGCTGCTGTCTTCCTAGATTCACTGAATCTTCACCACGAACTGCTGCTTGCTCGCTTGCTCCTCAGTCCTAGCTTCATCAA'
  124. const dbs = ['/home/thomas/NGS/ref/RNA/human_rna.fna']
  125. console.log(await getBlastRepr({sequence, dbs}));
  126. })()*/