| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143 |
- "use strict";
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
- return new (P || (P = Promise))(function (resolve, reject) {
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
- step((generator = generator.apply(thisArg, _arguments || [])).next());
- });
- };
- Object.defineProperty(exports, "__esModule", { value: true });
- exports.getBlastRepr = void 0;
- const child_process_1 = require("child_process");
- const async_exec = (prog, args, onData, onErr) => {
- return new Promise((resolve, reject) => {
- const child = (0, child_process_1.spawn)(prog, args, { shell: true });
- child.stdout.on('data', data => onData(data.toString().trim()));
- child.stderr.on('data', data => onErr(data.toString().trim()));
- child.on('error', err => reject(err));
- child.on('exit', code => resolve(code));
- });
- };
- const diversitySeq = (Seq) => {
- return Seq.split('').reduce((prev, _curr, id, array) => {
- if (id != 0 && array[id] !== array[id - 1]) {
- return prev + (1 / array.length);
- }
- else {
- return prev;
- }
- }, 0);
- };
- const annotateSeq = (seq, blastDB, maxBlast = 100, minDiversity = 0.1, blastnPath = 'blastn') => __awaiter(void 0, void 0, void 0, function* () {
- return new Promise((resolve, reject) => __awaiter(void 0, void 0, void 0, function* () {
- try {
- let results = '';
- let sequence = { sequence: seq };
- if (sequence.sequence) {
- if (diversitySeq(sequence.sequence) > minDiversity) {
- const sequenceStr = '\'>GG\\n' + sequence.sequence + '\'';
- yield async_exec('echo', [sequenceStr, '|',
- blastnPath, '-db', blastDB, '-query', '-', '-outfmt', '6', '-max_target_seqs', '100'], (m) => results += m, console.log);
- if (results !== '') {
- //https://www.metagenomics.wiki/tools/blast/blastn-output-format-6
- const keys = [/*'qseqid',*/ 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore'];
- results.split('\n')
- .map((it, index) => sequence.blastn = [
- ...(sequence.blastn || []),
- Object.assign({ index }, it.split('\t')
- .slice(1)
- .reduce((a, v, i) => (Object.assign(Object.assign({}, a), { [keys[i]]: isNaN(parseInt(v)) ? v : parseInt(v) })), {}))
- ].splice(0, maxBlast));
- if (sequence.blastn.length === 0) {
- throw 'Blastn results parsing failed';
- }
- }
- else {
- // console.log('WARNING NO BLASTN RESULT', ['echo', '-e', ,'\'' + sequenceStr + '\'', '|',
- // blastn, '-db', blastDB, '-query', '-', '-outfmt', '6', '-max_target_seqs', '100'].join(' '))
- throw 'No blastn hit';
- }
- }
- else {
- throw 'Sequence diversity < ' + minDiversity;
- }
- }
- else {
- throw 'No sequence';
- }
- resolve(sequence);
- }
- catch (error) {
- reject(error);
- }
- }));
- });
- const transpose = (matrix) => matrix.reduce(($, row) => row.map((_, i) => [...($[i] || []), row[i]]), []);
- const whichMax = (arr) => arr.flatMap((v, i) => v === Math.max(...arr) ? i : []);
- const getBlastRepr = (args) => __awaiter(void 0, void 0, void 0, function* () {
- const { sequence, dbs } = args;
- try {
- let all_blastn = [];
- for (const cdb of dbs) {
- let res = [];
- try {
- res = yield annotateSeq(sequence, cdb);
- if (res.blastn.length > 0)
- all_blastn = [...all_blastn, ...res.blastn].map((v, i) => { return Object.assign(Object.assign({}, v), { index: i + 1 }); });
- }
- catch (e) { }
- }
- const indiv_match = all_blastn.map((blastn) => {
- const { start, end } = blastn.qstart <= blastn.qend ? { start: blastn.qstart, end: blastn.qend } : { end: blastn.qstart, start: blastn.qend };
- return sequence.split('').map((_, i) => ((i + 1) >= start && (i + 1) <= end) ? '|' : '_').join('');
- });
- const bestRepr = transpose(indiv_match.map((v) => v.split(''))).map((v) => {
- const tmp = v.map((c, i) => {
- if (c === '|') {
- return all_blastn[i].length;
- }
- else {
- return 0;
- }
- });
- if (Math.max(...tmp) === 0) {
- return 0;
- }
- else {
- return whichMax(tmp)[0] + 1;
- }
- });
- let bestReprRed = [];
- let n = 0;
- let start = 0;
- bestRepr.reduce((p, c, i) => {
- if (p !== c) {
- const name = p === 0 ? 'unknown' : all_blastn.filter((v) => v.index === p)[0].sseqid + ":" + all_blastn.filter((v) => v.index === p)[0].sstart + '-' + all_blastn.filter((v) => v.index === p)[0].send;
- bestReprRed.push({ name, n, start, end: i });
- start = (i + 1);
- n = 0;
- }
- n++;
- if (i === (bestRepr.length - 1)) {
- const name = c === 0 ? 'unknown' : all_blastn.filter((v) => v.index === c)[0].sseqid + ":" + all_blastn.filter((v) => v.index === c)[0].sstart + '-' + all_blastn.filter((v) => v.index === c)[0].send;
- bestReprRed.push({ name, n, start, end: i + 1 });
- }
- return c;
- });
- const sup = [sequence, ...indiv_match, bestRepr.join('')];
- return { short: bestReprRed.flatMap((ee) => ee.name + "{" + ee.n + "}").join("<>"), all_blastn, sup, bestReprRed };
- }
- catch (error) {
- console.log(error);
- return 1;
- }
- });
- exports.getBlastRepr = getBlastRepr;
- /*(async()=>{
- const sequence = 'ATCTTCACCACGAACTGCTGCTTGCTCGCTTGCTCCTCAGTCCTAGCTTCATCAAACACTGGTTCCTGGAATCCTGTCTGCTGCTGTCTTCCTAGATTCACTGAATCTTCACCACGAACTGCTGCTTGCTCGCTTGCTCCTCAGTCCTAGCTTCATCAA'
- const dbs = ['/home/thomas/NGS/ref/RNA/human_rna.fna']
- console.log(await getBlastRepr({sequence, dbs}));
-
- })()*/
|