"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.openSam = exports.analysisTranscript = void 0; const child_process_1 = require("child_process"); const gbffparser_1 = require("gbffparser"); const fs_1 = __importDefault(require("fs")); const os_1 = __importDefault(require("os")); const path_1 = __importDefault(require("path")); const async_exec = (prog, args, onData) => { return new Promise((resolve, reject) => { const child = (0, child_process_1.spawn)(prog, args, { shell: true }); child.stdout.on('data', data => onData(data.toString().trim())); // child.stderr.on('data', data => console.log(data.toString().trim())) child.on('error', err => reject(err)); child.on('exit', code => resolve(code)); }); }; const openSam = (filePaths, restraintTo, count) => __awaiter(void 0, void 0, void 0, function* () { let accum = ''; let jsonLines = []; filePaths = Array.isArray(filePaths) ? filePaths : [filePaths]; for (let filePath of filePaths) { let args = ['view']; if (filePath.match(/\.sam$/)) { if (restraintTo) { if (!fs_1.default.existsSync(filePath.split(/\.sam$/)[0] + '.bam')) { const tmpBam = path_1.default.join(os_1.default.tmpdir(), Math.random() + '.bam'); yield async_exec('sambamba', ['view', '-S', filePath, '-f', 'bam', '>', tmpBam], () => { }); filePath = filePath.split(/\.sam$/)[0] + '.bam'; yield async_exec('sambamba', ['sort', tmpBam, '-o', filePath], () => { }); yield fs_1.default.promises.unlink(tmpBam); } else { filePath = filePath.split(/\.sam$/)[0] + '.bam'; } } else { args.push('-S'); } } if (!count) { args = [...args, '-f', 'json']; } else { //if(restraintTo) { args.push('-c'); //} } args.push(filePath); if (restraintTo) args.push(restraintTo); const threads = os_1.default.cpus().length - 2 > 0 ? os_1.default.cpus().length - 2 : 1; args.push('-t'); args.push(String(threads)); console.log(['sambamba', ...args].join(' ')); yield async_exec('sambamba', args, (m) => { accum += m; accum = accum.replace('}{', '}\n{'); if (accum.match('\n')) { accum.split('\n').map((e, i, a) => { if (i !== a.length - 1) { try { jsonLines.push(JSON.parse(e)); } catch (error) { console.log(error); } } else { accum = e; } }); } }); if (accum !== '') { accum = accum.replace('}{', '}\n{'); accum.split('\n').map((e, i, a) => { try { jsonLines.push(JSON.parse(e)); } catch (error) { console.log(error); } }); } } return jsonLines; }); exports.openSam = openSam; const extractReads = (reads, fastqPaths) => { fastqPaths = Array.isArray(fastqPaths) ? fastqPaths : [fastqPaths]; }; const analysisTranscript = (accession, properBam, splittersSam, disordantsSam, rnaDBPath) => __awaiter(void 0, void 0, void 0, function* () { const accessionWoVersion = accession.split(/\.[0-9]{1,3}/)[0]; const accJson = yield (0, gbffparser_1.getFromAcc)(accessionWoVersion, rnaDBPath); let json = { sequence: (accJson === null || accJson === void 0 ? void 0 : accJson.sequence) || '', version: accJson === null || accJson === void 0 ? void 0 : accJson.version, exons: (accJson === null || accJson === void 0 ? void 0 : accJson.features.filter(entry => entry.type === 'exon').map((exon, i) => ({ n: i + 1, start: exon.start, end: exon.end, strand: exon.strand, sequence: accJson.sequence.substring(exon.start - 1, exon.end), counts: {}, }))) || [], counts: {}, altTranscripts: {} }; json.counts.all = yield openSam(properBam, accession, true); json.counts.splitters = yield openSam(splittersSam, accession, true); json.counts.discordants = yield openSam(disordantsSam, accession, true); for (let index = 0; index < json.exons.length; index++) { const exon = json.exons[index]; json.exons[index].counts.all = yield openSam(properBam, accession + ':' + exon.start + '-' + exon.end, true); json.exons[index].counts.splitters = yield openSam(splittersSam, accession + ':' + exon.start + '-' + exon.end, true); json.exons[index].counts.discordants = yield openSam(disordantsSam, accession + ':' + exon.start + '-' + exon.end, true); } if (typeof json.exons !== 'undefined') { const samJSON = yield openSam([ 'test/bwa_mem_splitters_on_human_NM.sam', 'test/bwa_mem_discordants_on_human_NM.sam' ], accession); const byRead = {}; samJSON.map((entry) => ({ qname: entry.qname.split('_')[0], pos: entry.pos, exon: json.exons.flatMap((exon, i) => (exon.start <= entry.pos && exon.end >= entry.pos) ? i + 1 : [])[0] })) .map((entry) => { if (typeof byRead[entry.qname] === 'undefined') byRead[entry.qname] = []; byRead[entry.qname] = [...new Set([...byRead[entry.qname], entry.exon])].sort((a, b) => a - b); }); const byAltern = {}; Object.keys(byRead).map(qname => { const bridges = byRead[qname].flatMap((e, i) => byRead[qname] .flatMap((ee, ii) => i === ii || i >= ii ? [] : e + '-' + ee)); for (const bridge of bridges) { if (typeof byAltern[bridge] === 'undefined') byAltern[bridge] = []; byAltern[bridge].push(qname); } }); json.altTranscripts = Object.keys(byAltern) .map(bridge => ({ bridge, reads: byAltern[bridge] })) .sort((a, b) => b.reads.length - a.reads.length); } return json; }); exports.analysisTranscript = analysisTranscript; /*(async()=>{ // await asyncBwaMem('/home/thomas/NGS/ref/ncbi/RNA/human_NM.fa', // ['/Turbine-pool/LAL-T_RNAseq/fastq_fastp/58_MAS/R1.fq.gz','/Turbine-pool/LAL-T_RNAseq/fastq_fastp/58_MAS/R2.fq.gz'], // 'TEST', 'TEST', 'test/', console.log) const symbol = 'NOTCH1' const LRGPath = '/home/thomas/NGS/ref/ncbi/LRG_RefSeqGene' const tablePath = '/home/thomas/NGS/ref/ncbi/GCF_000001405.39_GRCh38.p13_feature_table.txt' const rnaDBPath = [1,2,3,4,5,6,7,8,9,10].map(n => '/home/thomas/NGS/ref/ncbi/RNA/human.' + n + '.rna.gbff') const geneDBPath = [1,2,3,4,5,6,7].map(n => '/home/thomas/NGS/ref/ncbi/GENES/refseqgene.' + n + '.genomic.gbff') const geneInfo = await getSymbol(symbol, LRGPath, tablePath, geneDBPath, rnaDBPath) await fs.promises.writeFile('test/geneInfo.json', JSON.stringify(geneInfo.filter((entry:any) => entry.feature === 'mRNA'), null, 4)) const transcripts = geneInfo.filter((entry:any) => entry.feature === 'mRNA') .map((entry:any) => ({...entry, sequence: entry.data.sequence, data:entry.data.features.filter((feature:any) => feature.type === 'exon')})) .map((entry:any) => ({ accession: entry.product_accession, genomic_accession: entry.genomic_accession, start: entry.start, end: entry.end, sequence: entry.sequence, exons: [...entry.data.map((d:any) => ({start: d.start, end: d.end}))] })) .map((entry:any) => ({...entry, exons: entry.exons.map((exon:any) => ({...exon, sequence: entry.sequence.substring(exon.start-1,exon.end)}))})) await fs.promises.writeFile('test/sub.json', JSON.stringify(transcripts, null, 4)) for (let index = 0; index < transcripts.length; index++) { const transcript = transcripts[index] transcripts[index].count = { all : await openSam('test/bwa_mem_properly_on_human_NM.sorted.bam', transcript.accession, true), splitters : await openSam('test/bwa_mem_splitters_on_human_NM.sam', transcript.accession, true), discordants: await openSam('test/bwa_mem_discordants_on_human_NM.sam', transcript.accession, true), } const samJSON = await openSam([ 'test/bwa_mem_splitters_on_human_NM.sam', 'test/bwa_mem_discordants_on_human_NM.sam'], transcript.accession) const byRead = {} as {[key:string]: number[]} samJSON.map((entry:any) => ({ qname: entry.qname.split('_')[0], pos: entry.pos, exon: transcript.exons.flatMap((exon:any, i:any) => (exon.start <= entry.pos && exon.end >= entry.pos) ? i + 1 : [])[0] })) .map((entry:any) => { if(typeof byRead[entry.qname] === 'undefined') byRead[entry.qname] = [] byRead[entry.qname] = [...new Set([...byRead[entry.qname], entry.exon])].sort((a,b) => a - b) }) const byAltern = {} as {[key:string]: string[]} Object.keys(byRead).map(qname => { const bridges = byRead[qname].flatMap((e,i) => byRead[qname] .flatMap((ee,ii) => i === ii || i >= ii ? []: e + '-' + ee)) for (const bridge of bridges) { if(typeof byAltern[bridge] === 'undefined') byAltern[bridge] = [] byAltern[bridge].push(qname) } }) transcripts[index].altTranscripts = Object.keys(byAltern) .map(bridge => ({bridge, reads: byAltern[bridge]})) .sort((a,b) => b.reads.length - a.reads.length) await fs.promises.writeFile('test/altTranscripts-' + transcript.accession + '.json', JSON.stringify(transcripts[index], null, 4)) } })() */