| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374 |
- // https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_sprot_human.xml.gz
- import fs from 'fs'
- import readline from 'readline'
- import { XMLParser } from 'fast-xml-parser'
- const line$ = (path: string) => readline.createInterface({
- input: fs.createReadStream(path),
- crlfDelay: Infinity
- })
- const makeIndex = async (filePath: string, indexPath?: string) => {
- indexPath = indexPath || filePath + '.jsi'
- let byteAcc = 0
- const fromSel = new RegExp("^<entry")
- const toSel = new RegExp("^</entry>")
- const valSel = new RegExp('<accession>')
- let tmp = {values:[]} as any
- for await (const line of line$(filePath)) {
- if(fromSel.test(line)) tmp['from'] = byteAcc
- byteAcc += (line.length + 1)
- if (valSel.test(line)) tmp['values'].push(line.match("<accession>(.*?)</accession>")![1]) // 'uck
- if(toSel.test(line)) {
- await fs.promises.appendFile(indexPath, tmp.values.join(';') + '\t' + tmp.from + '\t' + byteAcc + '\n')
- tmp = {values:[]}
- }
- }
- }
- const readOffset = (path: string, from:number, to:number) => {
- return new Promise<string>(async (resolve, reject) => {
- const size = to - from
- const buffer = Buffer.alloc(size);
- let filehandle = null;
- try {
- filehandle = await fs.promises.open(path, 'r+');
- await filehandle.read(buffer, 0, buffer.length, from);
- } finally {
- if (filehandle) {
- await filehandle.close()
- resolve(buffer.toString())
- }
- }
- })
- }
- const getEntryOffset = async (dbPath:string, accession:string): Promise<number[]> => {
- const indexPath = dbPath + '.jsi'
- if (!fs.existsSync(indexPath)) await makeIndex(dbPath)
- const lineSel = new RegExp(accession)
- for await (const line of line$(indexPath)) {
- if (lineSel.test(line)) return [Number(line.split('\t')[1]),Number(line.split('\t')[2])]
- }
- return [0, 0]
- }
- const getEnrty = async (dbPath:string, accession:string) => {
- const parser = new XMLParser({
- ignoreAttributes: false,
- alwaysCreateTextNode: false,
- attributeNamePrefix: "",
- textNodeName: "value",
- allowBooleanAttributes: true,
- })
- const offsets = await getEntryOffset(dbPath, accession)
- return parser.parse(await readOffset(dbPath, offsets[0], offsets[1]))
- }
- export { makeIndex, readOffset, getEnrty }
|