| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234 |
- "use strict";
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
- return new (P || (P = Promise))(function (resolve, reject) {
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
- step((generator = generator.apply(thisArg, _arguments || [])).next());
- });
- };
- var __importDefault = (this && this.__importDefault) || function (mod) {
- return (mod && mod.__esModule) ? mod : { "default": mod };
- };
- Object.defineProperty(exports, "__esModule", { value: true });
- exports.openSam = exports.analysisTranscript = void 0;
- const child_process_1 = require("child_process");
- const gbffparser_1 = require("gbffparser");
- const fs_1 = __importDefault(require("fs"));
- const os_1 = __importDefault(require("os"));
- const path_1 = __importDefault(require("path"));
- const async_exec = (prog, args, onData) => {
- return new Promise((resolve, reject) => {
- const child = (0, child_process_1.spawn)(prog, args, { shell: true });
- child.stdout.on('data', data => onData(data.toString().trim()));
- // child.stderr.on('data', data => console.log(data.toString().trim()))
- child.on('error', err => reject(err));
- child.on('exit', code => resolve(code));
- });
- };
- const openSam = (filePaths, restraintTo, count) => __awaiter(void 0, void 0, void 0, function* () {
- let accum = '';
- let jsonLines = [];
- filePaths = Array.isArray(filePaths) ? filePaths : [filePaths];
- for (let filePath of filePaths) {
- let args = ['view'];
- if (filePath.match(/\.sam$/)) {
- if (restraintTo) {
- if (!fs_1.default.existsSync(filePath.split(/\.sam$/)[0] + '.bam')) {
- const tmpBam = path_1.default.join(os_1.default.tmpdir(), Math.random() + '.bam');
- yield async_exec('sambamba', ['view', '-S', filePath, '-f', 'bam', '>', tmpBam], () => { });
- filePath = filePath.split(/\.sam$/)[0] + '.bam';
- yield async_exec('sambamba', ['sort', tmpBam, '-o', filePath], () => { });
- yield fs_1.default.promises.unlink(tmpBam);
- }
- else {
- filePath = filePath.split(/\.sam$/)[0] + '.bam';
- }
- }
- else {
- args.push('-S');
- }
- }
- if (!count) {
- args = [...args, '-f', 'json'];
- }
- else {
- //if(restraintTo) {
- args.push('-c');
- //}
- }
- args.push(filePath);
- if (restraintTo)
- args.push(restraintTo);
- const threads = os_1.default.cpus().length - 2 > 0 ? os_1.default.cpus().length - 2 : 1;
- args.push('-t');
- args.push(String(threads));
- console.log(['sambamba', ...args].join(' '));
- yield async_exec('sambamba', args, (m) => {
- accum += m;
- accum = accum.replace('}{', '}\n{');
- if (accum.match('\n')) {
- accum.split('\n').map((e, i, a) => {
- if (i !== a.length - 1) {
- try {
- jsonLines.push(JSON.parse(e));
- }
- catch (error) {
- console.log(error);
- }
- }
- else {
- accum = e;
- }
- });
- }
- });
- if (accum !== '') {
- accum = accum.replace('}{', '}\n{');
- accum.split('\n').map((e, i, a) => {
- try {
- jsonLines.push(JSON.parse(e));
- }
- catch (error) {
- console.log(error);
- }
- });
- }
- }
- return jsonLines;
- });
- exports.openSam = openSam;
- const extractReads = (reads, fastqPaths) => {
- fastqPaths = Array.isArray(fastqPaths) ? fastqPaths : [fastqPaths];
- };
- const analysisTranscript = (accession, properBam, splittersSam, disordantsSam, rnaDBPath) => __awaiter(void 0, void 0, void 0, function* () {
- const accessionWoVersion = accession.split(/\.[0-9]{1,3}/)[0];
- const accJson = yield (0, gbffparser_1.getFromAcc)(accessionWoVersion, rnaDBPath);
- let json = {
- sequence: (accJson === null || accJson === void 0 ? void 0 : accJson.sequence) || '',
- version: accJson === null || accJson === void 0 ? void 0 : accJson.version,
- exons: (accJson === null || accJson === void 0 ? void 0 : accJson.features.filter(entry => entry.type === 'exon').map((exon, i) => ({
- n: i + 1,
- start: exon.start,
- end: exon.end,
- strand: exon.strand,
- sequence: accJson.sequence.substring(exon.start - 1, exon.end),
- counts: {},
- }))) || [],
- counts: {},
- altTranscripts: {}
- };
- json.counts.all = yield openSam(properBam, accession, true);
- json.counts.splitters = yield openSam(splittersSam, accession, true);
- json.counts.discordants = yield openSam(disordantsSam, accession, true);
- for (let index = 0; index < json.exons.length; index++) {
- const exon = json.exons[index];
- json.exons[index].counts.all = yield openSam(properBam, accession + ':' + exon.start + '-' + exon.end, true);
- json.exons[index].counts.splitters = yield openSam(splittersSam, accession + ':' + exon.start + '-' + exon.end, true);
- json.exons[index].counts.discordants = yield openSam(disordantsSam, accession + ':' + exon.start + '-' + exon.end, true);
- }
- if (typeof json.exons !== 'undefined') {
- const samJSON = yield openSam([
- 'test/bwa_mem_splitters_on_human_NM.sam',
- 'test/bwa_mem_discordants_on_human_NM.sam'
- ], accession);
- const byRead = {};
- samJSON.map((entry) => ({
- qname: entry.qname.split('_')[0],
- pos: entry.pos,
- exon: json.exons.flatMap((exon, i) => (exon.start <= entry.pos && exon.end >= entry.pos) ? i + 1 : [])[0]
- }))
- .map((entry) => {
- if (typeof byRead[entry.qname] === 'undefined')
- byRead[entry.qname] = [];
- byRead[entry.qname] = [...new Set([...byRead[entry.qname], entry.exon])].sort((a, b) => a - b);
- });
- const byAltern = {};
- Object.keys(byRead).map(qname => {
- const bridges = byRead[qname].flatMap((e, i) => byRead[qname]
- .flatMap((ee, ii) => i === ii || i >= ii ? [] : e + '-' + ee));
- for (const bridge of bridges) {
- if (typeof byAltern[bridge] === 'undefined')
- byAltern[bridge] = [];
- byAltern[bridge].push(qname);
- }
- });
- json.altTranscripts = Object.keys(byAltern)
- .map(bridge => ({ bridge, reads: byAltern[bridge] }))
- .sort((a, b) => b.reads.length - a.reads.length);
- }
- return json;
- });
- exports.analysisTranscript = analysisTranscript;
- /*(async()=>{
- // await asyncBwaMem('/home/thomas/NGS/ref/ncbi/RNA/human_NM.fa',
- // ['/Turbine-pool/LAL-T_RNAseq/fastq_fastp/58_MAS/R1.fq.gz','/Turbine-pool/LAL-T_RNAseq/fastq_fastp/58_MAS/R2.fq.gz'],
- // 'TEST', 'TEST', 'test/', console.log)
-
- const symbol = 'NOTCH1'
- const LRGPath = '/home/thomas/NGS/ref/ncbi/LRG_RefSeqGene'
- const tablePath = '/home/thomas/NGS/ref/ncbi/GCF_000001405.39_GRCh38.p13_feature_table.txt'
- const rnaDBPath = [1,2,3,4,5,6,7,8,9,10].map(n => '/home/thomas/NGS/ref/ncbi/RNA/human.' + n + '.rna.gbff')
- const geneDBPath = [1,2,3,4,5,6,7].map(n => '/home/thomas/NGS/ref/ncbi/GENES/refseqgene.' + n + '.genomic.gbff')
-
- const geneInfo = await getSymbol(symbol, LRGPath, tablePath, geneDBPath, rnaDBPath)
- await fs.promises.writeFile('test/geneInfo.json', JSON.stringify(geneInfo.filter((entry:any) => entry.feature === 'mRNA'), null, 4))
- const transcripts = geneInfo.filter((entry:any) => entry.feature === 'mRNA')
- .map((entry:any) => ({...entry, sequence: entry.data.sequence, data:entry.data.features.filter((feature:any) => feature.type === 'exon')}))
- .map((entry:any) => ({
- accession: entry.product_accession,
- genomic_accession: entry.genomic_accession,
- start: entry.start,
- end: entry.end,
- sequence: entry.sequence,
- exons: [...entry.data.map((d:any) => ({start: d.start, end: d.end}))]
- }))
- .map((entry:any) => ({...entry, exons: entry.exons.map((exon:any) => ({...exon, sequence: entry.sequence.substring(exon.start-1,exon.end)}))}))
- await fs.promises.writeFile('test/sub.json', JSON.stringify(transcripts, null, 4))
-
- for (let index = 0; index < transcripts.length; index++) {
- const transcript = transcripts[index]
-
- transcripts[index].count = {
- all : await openSam('test/bwa_mem_properly_on_human_NM.sorted.bam', transcript.accession, true),
- splitters : await openSam('test/bwa_mem_splitters_on_human_NM.sam', transcript.accession, true),
- discordants: await openSam('test/bwa_mem_discordants_on_human_NM.sam', transcript.accession, true),
- }
- const samJSON = await openSam([
- 'test/bwa_mem_splitters_on_human_NM.sam',
- 'test/bwa_mem_discordants_on_human_NM.sam'],
- transcript.accession)
- const byRead = {} as {[key:string]: number[]}
- samJSON.map((entry:any) => ({
- qname: entry.qname.split('_')[0],
- pos: entry.pos,
- exon: transcript.exons.flatMap((exon:any, i:any) => (exon.start <= entry.pos && exon.end >= entry.pos) ? i + 1 : [])[0]
- }))
- .map((entry:any) => {
- if(typeof byRead[entry.qname] === 'undefined') byRead[entry.qname] = []
- byRead[entry.qname] = [...new Set([...byRead[entry.qname], entry.exon])].sort((a,b) => a - b)
- })
- const byAltern = {} as {[key:string]: string[]}
- Object.keys(byRead).map(qname => {
- const bridges = byRead[qname].flatMap((e,i) => byRead[qname]
- .flatMap((ee,ii) => i === ii || i >= ii ? []: e + '-' + ee))
- for (const bridge of bridges) {
- if(typeof byAltern[bridge] === 'undefined') byAltern[bridge] = []
- byAltern[bridge].push(qname)
- }
- })
- transcripts[index].altTranscripts = Object.keys(byAltern)
- .map(bridge => ({bridge, reads: byAltern[bridge]}))
- .sort((a,b) => b.reads.length - a.reads.length)
- await fs.promises.writeFile('test/altTranscripts-' + transcript.accession + '.json', JSON.stringify(transcripts[index], null, 4))
- }
- })()
- */
|