import { spawn } from 'child_process'; const async_exec = (prog: string, args: string[], onData: Function, onErr: Function) => { return new Promise((resolve, reject) => { const child = spawn(prog, args, {shell: true}) child.stdout.on('data', data => onData(data.toString().trim())) child.stderr.on('data', data => onErr(data.toString().trim())) child.on('error', err => reject(err)) child.on('exit', code => resolve(code)) }) } const diversitySeq = (Seq:string) => { return Seq.split('').reduce((prev, _curr, id, array) => { if (id != 0 && array[id] !== array[id-1]) { return prev + (1/array.length) } else { return prev } }, 0) } const annotateSeq = async ( seq: string, blastDB: string, maxBlast = 100, minDiversity = 0.1, blastnPath = 'blastn', ) => { return new Promise(async (resolve, reject) => { try { let results = '' interface sequence { sequence: string, blastn?: any } let sequence: sequence = {sequence: seq} if (sequence.sequence) { if (diversitySeq(sequence.sequence) > minDiversity) { const sequenceStr = '\'>GG\\n' + sequence.sequence + '\'' await async_exec('echo', [sequenceStr, '|', blastnPath, '-db', blastDB, '-query', '-', '-outfmt', '6', '-max_target_seqs', '100'], (m:string) => results += m, console.log) if(results !== '') { //https://www.metagenomics.wiki/tools/blast/blastn-output-format-6 const keys = [/*'qseqid',*/ 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore'] results.split('\n') .map((it, index) => sequence.blastn = [ ...(sequence.blastn || []), {index, ...it.split('\t') .slice(1) .reduce((a, v, i) => ({ ...a, [keys[i]]: isNaN(parseInt(v)) ? v : parseInt(v)}), {})} ].splice(0,maxBlast)) if(sequence.blastn.length === 0) { throw 'Blastn results parsing failed' } } else { // console.log('WARNING NO BLASTN RESULT', ['echo', '-e', ,'\'' + sequenceStr + '\'', '|', // blastn, '-db', blastDB, '-query', '-', '-outfmt', '6', '-max_target_seqs', '100'].join(' ')) throw 'No blastn hit' } } else { throw 'Sequence diversity < ' + minDiversity } } else { throw 'No sequence' } resolve(sequence) } catch (error) { reject(error) } }) } const transpose = (matrix: any) => matrix.reduce(($:any, row:any) => row.map((_:any, i:any) => [...($[i] || []), row[i]]), []) const whichMax = (arr:any) => arr.flatMap((v:any, i:any) => v === Math.max(...arr) ? i : []) const getBlastRepr = async (args: any) => { const {sequence, dbs} = args try { let all_blastn:any = [] for (const cdb of dbs) { let res = [] try { res = await annotateSeq(sequence, cdb) if (res.blastn.length > 0) all_blastn = [...all_blastn, ...res.blastn].map((v,i) => {return {...v, index: i + 1}}) } catch (e) {} } if(all_blastn.length > 0) { const indiv_match = all_blastn.map((blastn:any) => { const {start, end} = blastn.qstart <= blastn.qend ? {start: blastn.qstart, end: blastn.qend} : {end: blastn.qstart, start: blastn.qend} return sequence.split('').map((_:any,i:any) => ((i + 1) >= start && (i + 1) <= end) ? '|' : '_').join('') }) const bestRepr = transpose(indiv_match.map((v:any) => v.split(''))).map((v:any) => { const tmp = v.map((c:any,i:any) => { if (c === '|') { return all_blastn[i].length } else { return 0 } }) if (Math.max(...tmp) === 0 ) { return 0 } else { return whichMax(tmp)[0] + 1 } }) let bestReprRed: any = [] let n = 0 let start = 0 bestRepr.reduce((p:any,c:any,i:any) => { if (p !== c ) { const name = p === 0 ? 'unknown' : all_blastn.filter((v:any) => v.index === p)[0].sseqid + ":" + all_blastn.filter((v:any) => v.index === p)[0].sstart + '-' + all_blastn.filter((v:any) => v.index === p)[0].send bestReprRed.push({name, n, start, end:i}) start = (i+1) n = 0 } n++ if (i === (bestRepr.length - 1)) { const name = c === 0 ? 'unknown' : all_blastn.filter((v:any) => v.index === c)[0].sseqid + ":" + all_blastn.filter((v:any) => v.index === c)[0].sstart + '-' + all_blastn.filter((v:any) => v.index === c)[0].send bestReprRed.push({name, n, start, end:i+1}) } return c }) const sup = [sequence, ...indiv_match, bestRepr.join('')] return {short: bestReprRed.flatMap((ee:any) => ee.name + "{" + ee.n + "}").join("<>"), all_blastn, sup, bestReprRed} } else { return {} } } catch (error) { console.log(error); return 1 } } export { getBlastRepr } /* (async()=>{ const sequence = 'TGTTAAAAGTAAGAGACAGCTGAACCCTCGTGGAGCCATTCATACAGGTCCCTATT' const dbs = ['/home/thomas/NGS/ref/RNA/human_rna.fna'] console.log(await getBlastRepr({sequence, dbs})); })()*/