index.ts 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. import { spawn } from 'child_process'
  2. import fs from 'fs'
  3. const zlib = require('zlib')
  4. const readline = require('readline')
  5. const Papa = require('papaparse')
  6. const async_exec = (prog: string, args: string[], onData: Function) => {
  7. return new Promise((resolve, reject) => {
  8. const child = spawn(prog, args, {shell: true})
  9. child.stdout.on('data', data => onData(data.toString().trim()))
  10. child.stderr.on('data', data => onData(data.toString().trim()))
  11. child.on('error', err => reject(err))
  12. child.on('exit', code => resolve(code))
  13. })
  14. }
  15. // Read fasta/fa/fna
  16. const asyncReadSmallFasta = (path: string) => {
  17. return new Promise<any[]>((resolve, reject) => {
  18. const rs = fs.createReadStream(path)
  19. const rl = readline.createInterface({
  20. input: rs, crlfDelay: Infinity
  21. })
  22. rs.once('error', err => reject(err))
  23. let tmpObj = {} as {name: string, sequence: string}
  24. let results: any[] = []
  25. rl.on('line', (line: string) => {
  26. if (line.match(/>/g)) {
  27. if (tmpObj?.name) results.push(tmpObj)
  28. tmpObj = {name: line, sequence: ''}
  29. } else {
  30. tmpObj.sequence += line
  31. }
  32. })
  33. rl.on('close', (_: any) => {
  34. results.push(tmpObj)
  35. resolve(results)
  36. })
  37. })
  38. }
  39. // https://www.biostars.org/p/98885/
  40. const async_save_fai = (fasta_path: string) => {
  41. console.log('Creating new fai from : ', fasta_path)
  42. return new Promise((resolve, reject) => {
  43. const child = spawn('samtools', ['faidx', fasta_path], {
  44. shell: true,
  45. })
  46. let result = ''
  47. child.stdout.on('data', (data: { toString: () => string }) => {
  48. result += data.toString()
  49. })
  50. child.on('error', (err: { toString: (arg0: string) => any }) => {
  51. reject(err.toString('utf8'))
  52. })
  53. child.on('exit', () => {
  54. resolve(result)
  55. })
  56. })
  57. }
  58. const async_paparse = (path: string, opt: { header: boolean; dynamicTyping: boolean }) => {
  59. return new Promise((resolve, reject) => {
  60. const output: any[] = []
  61. const parseStream = Papa.parse(Papa.NODE_STREAM_INPUT, opt)
  62. let rs = fs.createReadStream(path)
  63. if((new RegExp(/\.gz$/)).test(path)) {
  64. rs = rs.pipe(zlib.createGunzip())
  65. }
  66. rs.pipe(parseStream)
  67. .on('error', (error: any) => reject(error))
  68. .on('data', (row: any) => {output.push(row)})
  69. .on('finish', () => resolve(output))
  70. })
  71. }
  72. const async_read_bytes = (path: fs.PathLike, start: any, length: number) => {
  73. length = length - 1
  74. return new Promise((resolve, reject) => {
  75. let data = Buffer.alloc(0)
  76. fs.createReadStream(path, { start : start, end: start + length })
  77. .on('error', error => reject(error))
  78. .on('data', (d: Buffer) => {
  79. data = Buffer.concat([data, d])
  80. })
  81. .on('end', () => resolve(data.toString()))
  82. })
  83. }
  84. // Create fai if not present return sequences names and if provided return : obj {name, sequence}
  85. const asyncReadFasta = (path: string, sequences: string[] | string) => {
  86. sequences = Array.isArray(sequences) ? sequences : [sequences]
  87. return new Promise(async (resolve, reject) => {
  88. const path_fai = (new RegExp('fai$')).test(path) ? path : path + '.fai'
  89. if(!fs.existsSync(path_fai)) {
  90. try { await async_save_fai(path) } catch (e) { reject(e) }
  91. }
  92. try {
  93. const fai = await async_paparse(path_fai, {header: false, dynamicTyping: true })
  94. if(sequences.length === 0) {
  95. if (Array.isArray(fai)) resolve(fai.map((it: any[]) => it[0]))
  96. } else {
  97. let results = []
  98. for (let index = 0; index < sequences.length; index++) {
  99. const seqName = sequences[index]
  100. const tmpFai = Array.isArray(fai) ? fai.filter((it: any[]) => it[0] === seqName)[0] : []
  101. const sequence = await async_read_bytes(path, tmpFai[2],
  102. (Math.trunc(tmpFai[1] / tmpFai[3]) * tmpFai[4]) + (tmpFai[1] % tmpFai[3])) // fu, but better use samtools faidx !
  103. results.push({name: tmpFai[0], sequence})
  104. }
  105. resolve(results)
  106. }
  107. } catch (e) { reject(e) }
  108. })
  109. }
  110. const selectFasta = (path:string, name: string[] | string, out:string) => {
  111. return new Promise<string>(async (resolve, reject) => {
  112. if(!Array.isArray(name)) name = [name]
  113. await async_exec('samtools', ['faidx', path, ...name, '>', out], console.log)
  114. })
  115. }
  116. export { asyncReadFasta, asyncReadSmallFasta, selectFasta }