|
|
@@ -0,0 +1,157 @@
|
|
|
+"use strict";
|
|
|
+var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
|
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
|
+ return new (P || (P = Promise))(function (resolve, reject) {
|
|
|
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
|
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
|
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
|
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
|
+ });
|
|
|
+};
|
|
|
+var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
+ return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
|
+};
|
|
|
+Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
+const child_process_1 = require("child_process");
|
|
|
+const gbffparser_1 = require("gbffparser");
|
|
|
+const fs_1 = __importDefault(require("fs"));
|
|
|
+const os_1 = __importDefault(require("os"));
|
|
|
+const path_1 = __importDefault(require("path"));
|
|
|
+const async_exec = (prog, args, onData) => {
|
|
|
+ return new Promise((resolve, reject) => {
|
|
|
+ const child = (0, child_process_1.spawn)(prog, args, { shell: true });
|
|
|
+ child.stdout.on('data', data => onData(data.toString().trim()));
|
|
|
+ child.stderr.on('data', data => console.log(data.toString().trim()));
|
|
|
+ child.on('error', err => reject(err));
|
|
|
+ child.on('exit', code => resolve(code));
|
|
|
+ });
|
|
|
+};
|
|
|
+const openSam = (filePaths, restraintTo, count) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
+ let accum = '';
|
|
|
+ let jsonLines = [];
|
|
|
+ filePaths = Array.isArray(filePaths) ? filePaths : [filePaths];
|
|
|
+ for (let filePath of filePaths) {
|
|
|
+ let args = ['view'];
|
|
|
+ if (filePath.match(/\.sam$/)) {
|
|
|
+ if (restraintTo) {
|
|
|
+ if (!fs_1.default.existsSync(filePath.split(/\.sam$/)[0] + '.bam')) {
|
|
|
+ const tmpBam = path_1.default.join(os_1.default.tmpdir(), Math.random() + '.bam');
|
|
|
+ yield async_exec('sambamba', ['view', '-S', filePath, '-f', 'bam', '>', tmpBam], () => { });
|
|
|
+ filePath = filePath.split(/\.sam$/)[0] + '.bam';
|
|
|
+ yield async_exec('sambamba', ['sort', tmpBam, '-o', filePath], () => { });
|
|
|
+ yield fs_1.default.promises.unlink(tmpBam);
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ filePath = filePath.split(/\.sam$/)[0] + '.bam';
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ args.push('-S');
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (!count) {
|
|
|
+ args = [...args, '-f', 'json'];
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ if (restraintTo) {
|
|
|
+ args.push('-c');
|
|
|
+ }
|
|
|
+ }
|
|
|
+ args.push(filePath);
|
|
|
+ if (restraintTo)
|
|
|
+ args.push(restraintTo);
|
|
|
+ console.log(['sambamba', ...args].join(' '));
|
|
|
+ yield async_exec('sambamba', args, (m) => {
|
|
|
+ accum += m;
|
|
|
+ accum = accum.replace('}{', '}\n{');
|
|
|
+ if (accum.match('\n')) {
|
|
|
+ accum.split('\n').map((e, i, a) => {
|
|
|
+ if (i !== a.length - 1) {
|
|
|
+ try {
|
|
|
+ jsonLines.push(JSON.parse(e));
|
|
|
+ }
|
|
|
+ catch (error) {
|
|
|
+ console.log(error);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ accum = e;
|
|
|
+ }
|
|
|
+ });
|
|
|
+ }
|
|
|
+ });
|
|
|
+ if (accum !== '') {
|
|
|
+ accum = accum.replace('}{', '}\n{');
|
|
|
+ accum.split('\n').map((e, i, a) => {
|
|
|
+ try {
|
|
|
+ jsonLines.push(JSON.parse(e));
|
|
|
+ }
|
|
|
+ catch (error) {
|
|
|
+ console.log(error);
|
|
|
+ }
|
|
|
+ });
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return jsonLines;
|
|
|
+});
|
|
|
+(() => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
+ // await asyncBwaMem('/home/thomas/NGS/ref/ncbi/RNA/human_NM.fa',
|
|
|
+ // ['/Turbine-pool/LAL-T_RNAseq/fastq_fastp/58_MAS/R1.fq.gz','/Turbine-pool/LAL-T_RNAseq/fastq_fastp/58_MAS/R2.fq.gz'],
|
|
|
+ // 'TEST', 'TEST', 'test/', console.log)
|
|
|
+ const symbol = 'MYC';
|
|
|
+ const LRGPath = '/home/thomas/NGS/ref/ncbi/LRG_RefSeqGene';
|
|
|
+ const tablePath = '/home/thomas/NGS/ref/ncbi/GCF_000001405.39_GRCh38.p13_feature_table.txt';
|
|
|
+ const rnaDBPath = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].map(n => '/home/thomas/NGS/ref/ncbi/RNA/human.' + n + '.rna.gbff');
|
|
|
+ const geneDBPath = [1, 2, 3, 4, 5, 6, 7].map(n => '/home/thomas/NGS/ref/ncbi/GENES/refseqgene.' + n + '.genomic.gbff');
|
|
|
+ const geneInfo = yield (0, gbffparser_1.getSymbol)(symbol, LRGPath, tablePath, geneDBPath, rnaDBPath);
|
|
|
+ yield fs_1.default.promises.writeFile('test/geneInfo.json', JSON.stringify(geneInfo.filter((entry) => entry.feature === 'mRNA'), null, 4));
|
|
|
+ const transcripts = geneInfo.filter((entry) => entry.feature === 'mRNA')
|
|
|
+ .map((entry) => (Object.assign(Object.assign({}, entry), { sequence: entry.data.sequence, data: entry.data.features.filter((feature) => feature.type === 'exon') })))
|
|
|
+ .map((entry) => ({
|
|
|
+ accession: entry.product_accession,
|
|
|
+ genomic_accession: entry.genomic_accession,
|
|
|
+ start: entry.start,
|
|
|
+ end: entry.end,
|
|
|
+ sequence: entry.sequence,
|
|
|
+ exons: [...entry.data.map((d) => ({ start: d.start, end: d.end }))]
|
|
|
+ }))
|
|
|
+ .map((entry) => (Object.assign(Object.assign({}, entry), { exons: entry.exons.map((exon) => (Object.assign(Object.assign({}, exon), { sequence: entry.sequence.substring(exon.start - 1, exon.end) }))) })));
|
|
|
+ yield fs_1.default.promises.writeFile('test/sub.json', JSON.stringify(transcripts, null, 4));
|
|
|
+ for (let index = 0; index < transcripts.length; index++) {
|
|
|
+ const transcript = transcripts[index];
|
|
|
+ transcripts[index].count = {
|
|
|
+ all: yield openSam('test/bwa_mem_properly_on_human_NM.sorted.bam', transcript.accession, true),
|
|
|
+ splitters: yield openSam('test/bwa_mem_splitters_on_human_NM.sam', transcript.accession, true),
|
|
|
+ discordants: yield openSam('test/bwa_mem_discordants_on_human_NM.sam', transcript.accession, true),
|
|
|
+ };
|
|
|
+ const samJSON = yield openSam([
|
|
|
+ 'test/bwa_mem_splitters_on_human_NM.sam',
|
|
|
+ 'test/bwa_mem_discordants_on_human_NM.sam'
|
|
|
+ ], transcript.accession);
|
|
|
+ const byRead = {};
|
|
|
+ samJSON.map((entry) => ({
|
|
|
+ qname: entry.qname.split('_')[0],
|
|
|
+ pos: entry.pos,
|
|
|
+ exon: transcript.exons.flatMap((exon, i) => (exon.start <= entry.pos && exon.end >= entry.pos) ? i + 1 : [])[0]
|
|
|
+ }))
|
|
|
+ .map((entry) => {
|
|
|
+ if (typeof byRead[entry.qname] === 'undefined')
|
|
|
+ byRead[entry.qname] = [];
|
|
|
+ byRead[entry.qname] = [...new Set([...byRead[entry.qname], entry.exon])].sort((a, b) => a - b);
|
|
|
+ });
|
|
|
+ const byAltern = {};
|
|
|
+ Object.keys(byRead).map(qname => {
|
|
|
+ const bridges = byRead[qname].flatMap((e, i) => byRead[qname]
|
|
|
+ .flatMap((ee, ii) => i === ii || i >= ii ? [] : e + '-' + ee));
|
|
|
+ for (const bridge of bridges) {
|
|
|
+ if (typeof byAltern[bridge] === 'undefined')
|
|
|
+ byAltern[bridge] = [];
|
|
|
+ byAltern[bridge].push(qname);
|
|
|
+ }
|
|
|
+ });
|
|
|
+ transcripts[index].altTranscripts = Object.keys(byAltern)
|
|
|
+ .map(bridge => ({ bridge, reads: byAltern[bridge] }))
|
|
|
+ .sort((a, b) => b.reads.length - a.reads.length);
|
|
|
+ yield fs_1.default.promises.writeFile('test/altTranscripts-' + transcript.accession + '.json', JSON.stringify(transcripts[index], null, 4));
|
|
|
+ }
|
|
|
+}))();
|