"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; Object.defineProperty(exports, "__esModule", { value: true }); exports.getBlastRepr = void 0; const child_process_1 = require("child_process"); const async_exec = (prog, args, onData, onErr) => { return new Promise((resolve, reject) => { const child = (0, child_process_1.spawn)(prog, args, { shell: true }); child.stdout.on('data', data => onData(data.toString().trim())); child.stderr.on('data', data => onErr(data.toString().trim())); child.on('error', err => reject(err)); child.on('exit', code => resolve(code)); }); }; const diversitySeq = (Seq) => { return Seq.split('').reduce((prev, _curr, id, array) => { if (id != 0 && array[id] !== array[id - 1]) { return prev + (1 / array.length); } else { return prev; } }, 0); }; const annotateSeq = (seq, blastDB, maxBlast = 100, minDiversity = 0.1, blastnPath = 'blastn') => __awaiter(void 0, void 0, void 0, function* () { return new Promise((resolve, reject) => __awaiter(void 0, void 0, void 0, function* () { try { let results = ''; let sequence = { sequence: seq }; if (sequence.sequence) { if (diversitySeq(sequence.sequence) > minDiversity) { const sequenceStr = '\'>GG\\n' + sequence.sequence + '\''; yield async_exec('echo', [sequenceStr, '|', blastnPath, '-db', blastDB, '-query', '-', '-outfmt', '6', '-max_target_seqs', '100'], (m) => results += m, console.log); if (results !== '') { //https://www.metagenomics.wiki/tools/blast/blastn-output-format-6 const keys = [/*'qseqid',*/ 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore']; results.split('\n') .map((it, index) => sequence.blastn = [ ...(sequence.blastn || []), Object.assign({ index }, it.split('\t') .slice(1) .reduce((a, v, i) => (Object.assign(Object.assign({}, a), { [keys[i]]: isNaN(parseInt(v)) ? v : parseInt(v) })), {})) ].splice(0, maxBlast)); if (sequence.blastn.length === 0) { throw 'Blastn results parsing failed'; } } else { // console.log('WARNING NO BLASTN RESULT', ['echo', '-e', ,'\'' + sequenceStr + '\'', '|', // blastn, '-db', blastDB, '-query', '-', '-outfmt', '6', '-max_target_seqs', '100'].join(' ')) throw 'No blastn hit'; } } else { throw 'Sequence diversity < ' + minDiversity; } } else { throw 'No sequence'; } resolve(sequence); } catch (error) { reject(error); } })); }); const transpose = (matrix) => matrix.reduce(($, row) => row.map((_, i) => [...($[i] || []), row[i]]), []); const whichMax = (arr) => arr.flatMap((v, i) => v === Math.max(...arr) ? i : []); const getBlastRepr = (args) => __awaiter(void 0, void 0, void 0, function* () { const { sequence, dbs } = args; try { let all_blastn = []; for (const cdb of dbs) { let res = []; try { res = yield annotateSeq(sequence, cdb); if (res.blastn.length > 0) all_blastn = [...all_blastn, ...res.blastn].map((v, i) => { return Object.assign(Object.assign({}, v), { index: i + 1 }); }); } catch (e) { } } if (all_blastn.length > 0) { const indiv_match = all_blastn.map((blastn) => { const { start, end } = blastn.qstart <= blastn.qend ? { start: blastn.qstart, end: blastn.qend } : { end: blastn.qstart, start: blastn.qend }; return sequence.split('').map((_, i) => ((i + 1) >= start && (i + 1) <= end) ? '|' : '_').join(''); }); const bestRepr = transpose(indiv_match.map((v) => v.split(''))).map((v) => { const tmp = v.map((c, i) => { if (c === '|') { return all_blastn[i].length; } else { return 0; } }); if (Math.max(...tmp) === 0) { return 0; } else { return whichMax(tmp)[0] + 1; } }); let bestReprRed = []; let n = 0; let start = 0; bestRepr.reduce((p, c, i) => { if (p !== c) { const name = p === 0 ? 'unknown' : all_blastn.filter((v) => v.index === p)[0].sseqid + ":" + all_blastn.filter((v) => v.index === p)[0].sstart + '-' + all_blastn.filter((v) => v.index === p)[0].send; bestReprRed.push({ name, n, start, end: i }); start = (i + 1); n = 0; } n++; if (i === (bestRepr.length - 1)) { const name = c === 0 ? 'unknown' : all_blastn.filter((v) => v.index === c)[0].sseqid + ":" + all_blastn.filter((v) => v.index === c)[0].sstart + '-' + all_blastn.filter((v) => v.index === c)[0].send; bestReprRed.push({ name, n, start, end: i + 1 }); } return c; }); const sup = [sequence, ...indiv_match, bestRepr.join('')]; return { short: bestReprRed.flatMap((ee) => ee.name + "{" + ee.n + "}").join("<>"), all_blastn, sup, bestReprRed }; } else { return {}; } } catch (error) { console.log(error); return 1; } }); exports.getBlastRepr = getBlastRepr; /* (async()=>{ const sequence = 'TGTTAAAAGTAAGAGACAGCTGAACCCTCGTGGAGCCATTCATACAGGTCCCTATT' const dbs = ['/home/thomas/NGS/ref/RNA/human_rna.fna'] console.log(await getBlastRepr({sequence, dbs})); })()*/