| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147 |
- import { spawn } from 'child_process';
- const async_exec = (prog: string, args: string[], onData: Function, onErr: Function) => {
- return new Promise((resolve, reject) => {
- const child = spawn(prog, args, {shell: true})
- child.stdout.on('data', data => onData(data.toString().trim()))
- child.stderr.on('data', data => onErr(data.toString().trim()))
- child.on('error', err => reject(err))
- child.on('exit', code => resolve(code))
- })
- }
- const diversitySeq = (Seq:string) => {
- return Seq.split('').reduce((prev, _curr, id, array) => {
- if (id != 0 && array[id] !== array[id-1]) {
- return prev + (1/array.length)
- } else {
- return prev
- }
- }, 0)
- }
- const annotateSeq = async (
- seq: string,
- blastDB: string,
- maxBlast = 100,
- minDiversity = 0.1,
- blastnPath = 'blastn',
- ) => {
- return new Promise<any>(async (resolve, reject) => {
- try {
- let results = ''
- interface sequence {
- sequence: string,
- blastn?: any
- }
- let sequence: sequence = {sequence: seq}
- if (sequence.sequence) {
- if (diversitySeq(sequence.sequence) > minDiversity) {
- const sequenceStr = '\'>GG\\n' + sequence.sequence + '\''
-
- await async_exec('echo', [sequenceStr, '|',
- blastnPath, '-db', blastDB, '-query', '-', '-outfmt', '6', '-max_target_seqs', '100'], (m:string) => results += m, console.log)
-
-
- if(results !== '') {
- //https://www.metagenomics.wiki/tools/blast/blastn-output-format-6
- const keys = [/*'qseqid',*/ 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore']
- results.split('\n')
- .map((it, index) => sequence.blastn = [
- ...(sequence.blastn || []),
- {index, ...it.split('\t')
- .slice(1)
- .reduce((a, v, i) => ({ ...a, [keys[i]]: isNaN(parseInt(v)) ? v : parseInt(v)}), {})}
- ].splice(0,maxBlast))
-
- if(sequence.blastn.length === 0) {
- throw 'Blastn results parsing failed'
- }
- } else {
- // console.log('WARNING NO BLASTN RESULT', ['echo', '-e', ,'\'' + sequenceStr + '\'', '|',
- // blastn, '-db', blastDB, '-query', '-', '-outfmt', '6', '-max_target_seqs', '100'].join(' '))
- throw 'No blastn hit'
- }
- } else { throw 'Sequence diversity < ' + minDiversity }
- } else { throw 'No sequence' }
- resolve(sequence)
- } catch (error) {
- reject(error)
- }
- })
- }
- const transpose = (matrix: any) => matrix.reduce(($:any, row:any) => row.map((_:any, i:any) => [...($[i] || []), row[i]]), [])
- const whichMax = (arr:any) => arr.flatMap((v:any, i:any) => v === Math.max(...arr) ? i : [])
- const getBlastRepr = async (args: any) => {
- const {sequence, dbs} = args
- try {
- let all_blastn:any = []
- for (const cdb of dbs) {
- let res = []
- try {
- res = await annotateSeq(sequence, cdb)
- if (res.blastn.length > 0) all_blastn = [...all_blastn, ...res.blastn].map((v,i) => {return {...v, index: i + 1}})
- } catch (e) {}
- }
- const indiv_match = all_blastn.map((blastn:any) => {
- const {start, end} = blastn.qstart <= blastn.qend ? {start: blastn.qstart, end: blastn.qend} : {end: blastn.qstart, start: blastn.qend}
- return sequence.split('').map((_:any,i:any) => ((i + 1) >= start && (i + 1) <= end) ? '|' : '_').join('')
- })
- const bestRepr = transpose(indiv_match.map((v:any) => v.split(''))).map((v:any) => {
- const tmp = v.map((c:any,i:any) => {
- if (c === '|') {
- return all_blastn[i].length
- } else {
- return 0
- }
- })
- if (Math.max(...tmp) === 0 ) {
- return 0
- } else {
- return whichMax(tmp)[0] + 1
- }
- })
- let bestReprRed: any = []
- let n = 0
- let start = 0
- bestRepr.reduce((p:any,c:any,i:any) => {
- if (p !== c ) {
- const name = p === 0 ? 'unknown' : all_blastn.filter((v:any) => v.index === p)[0].sseqid + ":" + all_blastn.filter((v:any) => v.index === p)[0].sstart + '-' + all_blastn.filter((v:any) => v.index === p)[0].send
- bestReprRed.push({name, n, start, end:i})
- start = (i+1)
- n = 0
- }
- n++
- if (i === (bestRepr.length - 1)) {
- const name = c === 0 ? 'unknown' : all_blastn.filter((v:any) => v.index === c)[0].sseqid + ":" + all_blastn.filter((v:any) => v.index === c)[0].sstart + '-' + all_blastn.filter((v:any) => v.index === c)[0].send
- bestReprRed.push({name, n, start, end:i+1})
- }
- return c
- })
- const sup = [sequence, ...indiv_match, bestRepr.join('')]
- return {short: bestReprRed.flatMap((ee:any) => ee.name + "{" + ee.n + "}").join("<>"), all_blastn, sup, bestReprRed}
- } catch (error) {
- console.log(error);
- return 1
- }
- }
- export { getBlastRepr }
- /*(async()=>{
- const sequence = 'ATCTTCACCACGAACTGCTGCTTGCTCGCTTGCTCCTCAGTCCTAGCTTCATCAAACACTGGTTCCTGGAATCCTGTCTGCTGCTGTCTTCCTAGATTCACTGAATCTTCACCACGAACTGCTGCTTGCTCGCTTGCTCCTCAGTCCTAGCTTCATCAA'
- const dbs = ['/home/thomas/NGS/ref/RNA/human_rna.fna']
- console.log(await getBlastRepr({sequence, dbs}));
-
- })()*/
|