// https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_sprot_human.xml.gz import fs from 'fs' import readline from 'readline' import { XMLParser } from 'fast-xml-parser' const line$ = (path: string) => readline.createInterface({ input: fs.createReadStream(path), crlfDelay: Infinity }) const makeIndex = async (filePath: string, indexPath?: string) => { indexPath = indexPath || filePath + '.jsi' let byteAcc = 0 const fromSel = new RegExp("^") const valSel = new RegExp('') let tmp = {values:[]} as any for await (const line of line$(filePath)) { if(fromSel.test(line)) tmp['from'] = byteAcc byteAcc += (line.length + 1) if (valSel.test(line)) tmp['values'].push(line.match("(.*?)")![1]) // 'uck if(toSel.test(line)) { await fs.promises.appendFile(indexPath, tmp.values.join(';') + '\t' + tmp.from + '\t' + byteAcc + '\n') tmp = {values:[]} } } } const readOffset = (path: string, from:number, to:number) => { return new Promise(async (resolve, reject) => { const size = to - from const buffer = Buffer.alloc(size); let filehandle = null; try { filehandle = await fs.promises.open(path, 'r+'); await filehandle.read(buffer, 0, buffer.length, from); } finally { if (filehandle) { await filehandle.close() resolve(buffer.toString()) } } }) } const getEntryOffset = async (dbPath:string, accession:string): Promise => { const indexPath = dbPath + '.jsi' if (!fs.existsSync(indexPath)) await makeIndex(dbPath) const lineSel = new RegExp(accession) for await (const line of line$(indexPath)) { if (lineSel.test(line)) return [Number(line.split('\t')[1]),Number(line.split('\t')[2])] } return [0, 0] } const getEnrty = async (dbPath:string, accession:string) => { const parser = new XMLParser({ ignoreAttributes: false, alwaysCreateTextNode: false, attributeNamePrefix: "", textNodeName: "value", allowBooleanAttributes: true, }) const offsets = await getEntryOffset(dbPath, accession) return parser.parse(await readOffset(dbPath, offsets[0], offsets[1])) } export { makeIndex, readOffset, getEnrty }