index.ts 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. // https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_sprot_human.xml.gz
  2. import fs from 'fs'
  3. import readline from 'readline'
  4. import { XMLParser } from 'fast-xml-parser'
  5. const line$ = (path: string) => readline.createInterface({
  6. input: fs.createReadStream(path),
  7. crlfDelay: Infinity
  8. })
  9. const makeIndex = async (filePath: string, indexPath?: string) => {
  10. indexPath = indexPath || filePath + '.jsi'
  11. let byteAcc = 0
  12. const fromSel = new RegExp("^<entry")
  13. const toSel = new RegExp("^</entry>")
  14. const valSel = new RegExp('<accession>')
  15. let tmp = {values:[]} as any
  16. for await (const line of line$(filePath)) {
  17. if(fromSel.test(line)) tmp['from'] = byteAcc
  18. byteAcc += (line.length + 1)
  19. if (valSel.test(line)) tmp['values'].push(line.match("<accession>(.*?)</accession>")![1]) // 'uck
  20. if(toSel.test(line)) {
  21. await fs.promises.appendFile(indexPath, tmp.values.join(';') + '\t' + tmp.from + '\t' + byteAcc + '\n')
  22. tmp = {values:[]}
  23. }
  24. }
  25. }
  26. const readOffset = (path: string, from:number, to:number) => {
  27. return new Promise<string>(async (resolve, reject) => {
  28. const size = to - from
  29. const buffer = Buffer.alloc(size);
  30. let filehandle = null;
  31. try {
  32. filehandle = await fs.promises.open(path, 'r+');
  33. await filehandle.read(buffer, 0, buffer.length, from);
  34. } finally {
  35. if (filehandle) {
  36. await filehandle.close()
  37. resolve(buffer.toString())
  38. }
  39. }
  40. })
  41. }
  42. const getEntryOffset = async (dbPath:string, accession:string): Promise<number[]> => {
  43. const indexPath = dbPath + '.jsi'
  44. if (!fs.existsSync(indexPath)) await makeIndex(dbPath)
  45. const lineSel = new RegExp(accession)
  46. for await (const line of line$(indexPath)) {
  47. if (lineSel.test(line)) return [Number(line.split('\t')[1]),Number(line.split('\t')[2])]
  48. }
  49. return [0, 0]
  50. }
  51. const getEnrty = async (dbPath:string, accession:string) => {
  52. const parser = new XMLParser({
  53. ignoreAttributes: false,
  54. alwaysCreateTextNode: false,
  55. attributeNamePrefix: "",
  56. textNodeName: "value",
  57. allowBooleanAttributes: true,
  58. })
  59. const offsets = await getEntryOffset(dbPath, accession)
  60. return parser.parse(await readOffset(dbPath, offsets[0], offsets[1]))
  61. }
  62. export { makeIndex, readOffset, getEnrty }