| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133 |
- import { spawn } from 'child_process'
- import fs from 'fs'
- const zlib = require('zlib')
- const readline = require('readline')
- const Papa = require('papaparse')
- const async_exec = (prog: string, args: string[], onData: Function) => {
- return new Promise((resolve, reject) => {
- const child = spawn(prog, args, {shell: true})
- child.stdout.on('data', data => onData(data.toString().trim()))
- child.stderr.on('data', data => onData(data.toString().trim()))
- child.on('error', err => reject(err))
- child.on('exit', code => resolve(code))
- })
- }
- // Read fasta/fa/fna
- const asyncReadSmallFasta = (path: string) => {
- return new Promise<any[]>((resolve, reject) => {
- const rs = fs.createReadStream(path)
- const rl = readline.createInterface({
- input: rs, crlfDelay: Infinity
- })
- rs.once('error', err => reject(err))
- let tmpObj = {} as {name: string, sequence: string}
- let results: any[] = []
- rl.on('line', (line: string) => {
- if (line.match(/>/g)) {
- if (tmpObj?.name) results.push(tmpObj)
- tmpObj = {name: line, sequence: ''}
- } else {
- tmpObj.sequence += line
- }
- })
- rl.on('close', (_: any) => {
- results.push(tmpObj)
- resolve(results)
- })
- })
- }
- // https://www.biostars.org/p/98885/
- const async_save_fai = (fasta_path: string) => {
- console.log('Creating new fai from : ', fasta_path)
- return new Promise((resolve, reject) => {
- const child = spawn('samtools', ['faidx', fasta_path], {
- shell: true,
- })
- let result = ''
- child.stdout.on('data', (data: { toString: () => string }) => {
- result += data.toString()
- })
- child.on('error', (err: { toString: (arg0: string) => any }) => {
- reject(err.toString('utf8'))
- })
- child.on('exit', () => {
- resolve(result)
- })
- })
- }
- const async_paparse = (path: string, opt: { header: boolean; dynamicTyping: boolean }) => {
- return new Promise((resolve, reject) => {
- const output: any[] = []
- const parseStream = Papa.parse(Papa.NODE_STREAM_INPUT, opt)
- let rs = fs.createReadStream(path)
- if((new RegExp(/\.gz$/)).test(path)) {
- rs = rs.pipe(zlib.createGunzip())
- }
- rs.pipe(parseStream)
- .on('error', (error: any) => reject(error))
- .on('data', (row: any) => {output.push(row)})
- .on('finish', () => resolve(output))
- })
- }
- const async_read_bytes = (path: fs.PathLike, start: any, length: number) => {
- length = length - 1
- return new Promise((resolve, reject) => {
- let data = Buffer.alloc(0)
- fs.createReadStream(path, { start : start, end: start + length })
- .on('error', error => reject(error))
- .on('data', (d: Buffer) => {
- data = Buffer.concat([data, d])
- })
- .on('end', () => resolve(data.toString()))
- })
- }
- // Create fai if not present return sequences names and if provided return : obj {name, sequence}
- const asyncReadFasta = (path: string, sequences: string[] | string) => {
- sequences = Array.isArray(sequences) ? sequences : [sequences]
- return new Promise(async (resolve, reject) => {
- const path_fai = (new RegExp('fai$')).test(path) ? path : path + '.fai'
- if(!fs.existsSync(path_fai)) {
- try { await async_save_fai(path) } catch (e) { reject(e) }
- }
- try {
- const fai = await async_paparse(path_fai, {header: false, dynamicTyping: true })
- if(sequences.length === 0) {
- if (Array.isArray(fai)) resolve(fai.map((it: any[]) => it[0]))
- } else {
- let results = []
- for (let index = 0; index < sequences.length; index++) {
- const seqName = sequences[index]
- const tmpFai = Array.isArray(fai) ? fai.filter((it: any[]) => it[0] === seqName)[0] : []
- const sequence = await async_read_bytes(path, tmpFai[2],
- (Math.trunc(tmpFai[1] / tmpFai[3]) * tmpFai[4]) + (tmpFai[1] % tmpFai[3])) // fu, but better use samtools faidx !
- results.push({name: tmpFai[0], sequence})
- }
- resolve(results)
- }
- } catch (e) { reject(e) }
- })
- }
- const selectFasta = (path:string, name: string[] | string, out:string) => {
- return new Promise<string>(async (resolve, reject) => {
- if(!Array.isArray(name)) name = [name]
- await async_exec('samtools', ['faidx', path, ...name, '>', out], console.log)
- })
- }
- export { asyncReadFasta, asyncReadSmallFasta, selectFasta }
|