// wget ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/mRNA_Prot/human.6.rna.gbff.gz import fs from 'fs' import readline from 'readline' import { Buffer } from 'buffer' import genbankParser from 'genbank-parser' const line$ = (path: string) => readline.createInterface({ input: fs.createReadStream(path), crlfDelay: Infinity }); const readOffset = (path: string, from:number, to:number) => { return new Promise(async (resolve, reject) => { const size = to - from const buffer = Buffer.alloc(size); let filehandle = null; try { filehandle = await fs.promises.open(path, 'r+'); await filehandle.read(buffer, 0, buffer.length, from); } finally { if (filehandle) { await filehandle.close() resolve(buffer.toString()) } } }) } /* * strings -a -t d human.1.rna.gbff | grep LOCUS | awk '{print $1"\t"$3}' > human.1.rna.gbff.index * */ const makeGbffIndex = async (filePath: string, lineSize = 80, indexPath?: string) => { interface entry { filePath: string; value : string; from : number; to ?: number; } indexPath = indexPath || filePath + '.jsi' let entries = [] as entry[] let lineN = 0 let byteAcc = 0 for await (const line of line$(filePath)) { if(line.match(/^LOCUS/)) { entries.push({ filePath, value : line.split(/\s+/)[1], from : byteAcc }) if(lineN !== 0) { entries[entries.length - 2]["to"] = byteAcc await fs.promises.appendFile(indexPath, [ entries[entries.length - 2]["value"], entries[entries.length - 2]["from"], entries[entries.length - 2]["to"]].join('\t') + '\n') entries = entries.splice(1) } } byteAcc += (line.length + 1) lineN++ } entries[entries.length - 1]["to"] = byteAcc await fs.promises.appendFile(indexPath, [ entries[entries.length - 1]["value"], entries[entries.length - 1]["from"], entries[entries.length - 1]["to"]].join('\t')) return entries } const getOffset = async (indexPath: string, acc: string) => { let res for await (const line of line$(indexPath)) { const tmp = line.split('\t') if (tmp[0] === acc) { res = [indexPath.split('.jsi')[0], tmp[1], tmp[2]] break } } return res } const getFromAcc = async (acc: string, dbPath: string | string[], indexPath?: string | string[]) => { dbPath = Array.isArray(dbPath) ? dbPath : [dbPath] if (!indexPath) { indexPath = [] for (const p of dbPath) { const iP = p + '.jsi' if (!fs.existsSync(iP)) { console.log('Writing index: ' + iP); await makeGbffIndex(p) } indexPath.push(iP) } } else { indexPath = Array.isArray(indexPath) ? indexPath : [indexPath] if (indexPath.length !== dbPath.length) throw 'Error' } let i = 0 let res for (const p of dbPath) { res = await getOffset(indexPath[i], acc) if (res) break i++ } if (res) { const rr = await readOffset(res[0], Number(res[1]), Number(res[2])) res = genbankParser(rr)[0] } return res } export { getFromAcc }