|
|
@@ -12,6 +12,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
|
};
|
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
+exports.openSam = exports.analysisTranscript = void 0;
|
|
|
const child_process_1 = require("child_process");
|
|
|
const gbffparser_1 = require("gbffparser");
|
|
|
const fs_1 = __importDefault(require("fs"));
|
|
|
@@ -21,7 +22,7 @@ const async_exec = (prog, args, onData) => {
|
|
|
return new Promise((resolve, reject) => {
|
|
|
const child = (0, child_process_1.spawn)(prog, args, { shell: true });
|
|
|
child.stdout.on('data', data => onData(data.toString().trim()));
|
|
|
- child.stderr.on('data', data => console.log(data.toString().trim()));
|
|
|
+ // child.stderr.on('data', data => console.log(data.toString().trim()))
|
|
|
child.on('error', err => reject(err));
|
|
|
child.on('exit', code => resolve(code));
|
|
|
});
|
|
|
@@ -53,13 +54,16 @@ const openSam = (filePaths, restraintTo, count) => __awaiter(void 0, void 0, voi
|
|
|
args = [...args, '-f', 'json'];
|
|
|
}
|
|
|
else {
|
|
|
- if (restraintTo) {
|
|
|
- args.push('-c');
|
|
|
- }
|
|
|
+ //if(restraintTo) {
|
|
|
+ args.push('-c');
|
|
|
+ //}
|
|
|
}
|
|
|
args.push(filePath);
|
|
|
if (restraintTo)
|
|
|
args.push(restraintTo);
|
|
|
+ const threads = os_1.default.cpus().length - 2 > 0 ? os_1.default.cpus().length - 2 : 1;
|
|
|
+ args.push('-t');
|
|
|
+ args.push(String(threads));
|
|
|
console.log(['sambamba', ...args].join(' '));
|
|
|
yield async_exec('sambamba', args, (m) => {
|
|
|
accum += m;
|
|
|
@@ -94,45 +98,46 @@ const openSam = (filePaths, restraintTo, count) => __awaiter(void 0, void 0, voi
|
|
|
}
|
|
|
return jsonLines;
|
|
|
});
|
|
|
-(() => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
- // await asyncBwaMem('/home/thomas/NGS/ref/ncbi/RNA/human_NM.fa',
|
|
|
- // ['/Turbine-pool/LAL-T_RNAseq/fastq_fastp/58_MAS/R1.fq.gz','/Turbine-pool/LAL-T_RNAseq/fastq_fastp/58_MAS/R2.fq.gz'],
|
|
|
- // 'TEST', 'TEST', 'test/', console.log)
|
|
|
- const symbol = 'NOTCH1';
|
|
|
- const LRGPath = '/home/thomas/NGS/ref/ncbi/LRG_RefSeqGene';
|
|
|
- const tablePath = '/home/thomas/NGS/ref/ncbi/GCF_000001405.39_GRCh38.p13_feature_table.txt';
|
|
|
- const rnaDBPath = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].map(n => '/home/thomas/NGS/ref/ncbi/RNA/human.' + n + '.rna.gbff');
|
|
|
- const geneDBPath = [1, 2, 3, 4, 5, 6, 7].map(n => '/home/thomas/NGS/ref/ncbi/GENES/refseqgene.' + n + '.genomic.gbff');
|
|
|
- const geneInfo = yield (0, gbffparser_1.getSymbol)(symbol, LRGPath, tablePath, geneDBPath, rnaDBPath);
|
|
|
- yield fs_1.default.promises.writeFile('test/geneInfo.json', JSON.stringify(geneInfo.filter((entry) => entry.feature === 'mRNA'), null, 4));
|
|
|
- const transcripts = geneInfo.filter((entry) => entry.feature === 'mRNA')
|
|
|
- .map((entry) => (Object.assign(Object.assign({}, entry), { sequence: entry.data.sequence, data: entry.data.features.filter((feature) => feature.type === 'exon') })))
|
|
|
- .map((entry) => ({
|
|
|
- accession: entry.product_accession,
|
|
|
- genomic_accession: entry.genomic_accession,
|
|
|
- start: entry.start,
|
|
|
- end: entry.end,
|
|
|
- sequence: entry.sequence,
|
|
|
- exons: [...entry.data.map((d) => ({ start: d.start, end: d.end }))]
|
|
|
- }))
|
|
|
- .map((entry) => (Object.assign(Object.assign({}, entry), { exons: entry.exons.map((exon) => (Object.assign(Object.assign({}, exon), { sequence: entry.sequence.substring(exon.start - 1, exon.end) }))) })));
|
|
|
- yield fs_1.default.promises.writeFile('test/sub.json', JSON.stringify(transcripts, null, 4));
|
|
|
- for (let index = 0; index < transcripts.length; index++) {
|
|
|
- const transcript = transcripts[index];
|
|
|
- transcripts[index].count = {
|
|
|
- all: yield openSam('test/bwa_mem_properly_on_human_NM.sorted.bam', transcript.accession, true),
|
|
|
- splitters: yield openSam('test/bwa_mem_splitters_on_human_NM.sam', transcript.accession, true),
|
|
|
- discordants: yield openSam('test/bwa_mem_discordants_on_human_NM.sam', transcript.accession, true),
|
|
|
- };
|
|
|
+exports.openSam = openSam;
|
|
|
+const extractReads = (reads, fastqPaths) => {
|
|
|
+ fastqPaths = Array.isArray(fastqPaths) ? fastqPaths : [fastqPaths];
|
|
|
+};
|
|
|
+const analysisTranscript = (accession, properBam, splittersSam, disordantsSam, rnaDBPath) => __awaiter(void 0, void 0, void 0, function* () {
|
|
|
+ const accessionWoVersion = accession.split(/\.[0-9]{1,3}/)[0];
|
|
|
+ const accJson = yield (0, gbffparser_1.getFromAcc)(accessionWoVersion, rnaDBPath);
|
|
|
+ let json = {
|
|
|
+ sequence: (accJson === null || accJson === void 0 ? void 0 : accJson.sequence) || '',
|
|
|
+ version: accJson === null || accJson === void 0 ? void 0 : accJson.version,
|
|
|
+ exons: (accJson === null || accJson === void 0 ? void 0 : accJson.features.filter(entry => entry.type === 'exon').map((exon, i) => ({
|
|
|
+ n: i + 1,
|
|
|
+ start: exon.start,
|
|
|
+ end: exon.end,
|
|
|
+ strand: exon.strand,
|
|
|
+ sequence: accJson.sequence.substring(exon.start - 1, exon.end),
|
|
|
+ counts: {},
|
|
|
+ }))) || [],
|
|
|
+ counts: {},
|
|
|
+ altTranscripts: {}
|
|
|
+ };
|
|
|
+ json.counts.all = yield openSam(properBam, accession, true);
|
|
|
+ json.counts.splitters = yield openSam(splittersSam, accession, true);
|
|
|
+ json.counts.discordants = yield openSam(disordantsSam, accession, true);
|
|
|
+ for (let index = 0; index < json.exons.length; index++) {
|
|
|
+ const exon = json.exons[index];
|
|
|
+ json.exons[index].counts.all = yield openSam(properBam, accession + ':' + exon.start + '-' + exon.end, true);
|
|
|
+ json.exons[index].counts.splitters = yield openSam(splittersSam, accession + ':' + exon.start + '-' + exon.end, true);
|
|
|
+ json.exons[index].counts.discordants = yield openSam(disordantsSam, accession + ':' + exon.start + '-' + exon.end, true);
|
|
|
+ }
|
|
|
+ if (typeof json.exons !== 'undefined') {
|
|
|
const samJSON = yield openSam([
|
|
|
'test/bwa_mem_splitters_on_human_NM.sam',
|
|
|
'test/bwa_mem_discordants_on_human_NM.sam'
|
|
|
- ], transcript.accession);
|
|
|
+ ], accession);
|
|
|
const byRead = {};
|
|
|
samJSON.map((entry) => ({
|
|
|
qname: entry.qname.split('_')[0],
|
|
|
pos: entry.pos,
|
|
|
- exon: transcript.exons.flatMap((exon, i) => (exon.start <= entry.pos && exon.end >= entry.pos) ? i + 1 : [])[0]
|
|
|
+ exon: json.exons.flatMap((exon, i) => (exon.start <= entry.pos && exon.end >= entry.pos) ? i + 1 : [])[0]
|
|
|
}))
|
|
|
.map((entry) => {
|
|
|
if (typeof byRead[entry.qname] === 'undefined')
|
|
|
@@ -149,9 +154,81 @@ const openSam = (filePaths, restraintTo, count) => __awaiter(void 0, void 0, voi
|
|
|
byAltern[bridge].push(qname);
|
|
|
}
|
|
|
});
|
|
|
- transcripts[index].altTranscripts = Object.keys(byAltern)
|
|
|
+ json.altTranscripts = Object.keys(byAltern)
|
|
|
.map(bridge => ({ bridge, reads: byAltern[bridge] }))
|
|
|
.sort((a, b) => b.reads.length - a.reads.length);
|
|
|
- yield fs_1.default.promises.writeFile('test/altTranscripts-' + transcript.accession + '.json', JSON.stringify(transcripts[index], null, 4));
|
|
|
}
|
|
|
-}))();
|
|
|
+ return json;
|
|
|
+});
|
|
|
+exports.analysisTranscript = analysisTranscript;
|
|
|
+/*(async()=>{
|
|
|
+ // await asyncBwaMem('/home/thomas/NGS/ref/ncbi/RNA/human_NM.fa',
|
|
|
+ // ['/Turbine-pool/LAL-T_RNAseq/fastq_fastp/58_MAS/R1.fq.gz','/Turbine-pool/LAL-T_RNAseq/fastq_fastp/58_MAS/R2.fq.gz'],
|
|
|
+ // 'TEST', 'TEST', 'test/', console.log)
|
|
|
+
|
|
|
+ const symbol = 'NOTCH1'
|
|
|
+
|
|
|
+ const LRGPath = '/home/thomas/NGS/ref/ncbi/LRG_RefSeqGene'
|
|
|
+ const tablePath = '/home/thomas/NGS/ref/ncbi/GCF_000001405.39_GRCh38.p13_feature_table.txt'
|
|
|
+
|
|
|
+ const rnaDBPath = [1,2,3,4,5,6,7,8,9,10].map(n => '/home/thomas/NGS/ref/ncbi/RNA/human.' + n + '.rna.gbff')
|
|
|
+ const geneDBPath = [1,2,3,4,5,6,7].map(n => '/home/thomas/NGS/ref/ncbi/GENES/refseqgene.' + n + '.genomic.gbff')
|
|
|
+
|
|
|
+ const geneInfo = await getSymbol(symbol, LRGPath, tablePath, geneDBPath, rnaDBPath)
|
|
|
+ await fs.promises.writeFile('test/geneInfo.json', JSON.stringify(geneInfo.filter((entry:any) => entry.feature === 'mRNA'), null, 4))
|
|
|
+ const transcripts = geneInfo.filter((entry:any) => entry.feature === 'mRNA')
|
|
|
+ .map((entry:any) => ({...entry, sequence: entry.data.sequence, data:entry.data.features.filter((feature:any) => feature.type === 'exon')}))
|
|
|
+ .map((entry:any) => ({
|
|
|
+ accession: entry.product_accession,
|
|
|
+ genomic_accession: entry.genomic_accession,
|
|
|
+ start: entry.start,
|
|
|
+ end: entry.end,
|
|
|
+ sequence: entry.sequence,
|
|
|
+ exons: [...entry.data.map((d:any) => ({start: d.start, end: d.end}))]
|
|
|
+ }))
|
|
|
+ .map((entry:any) => ({...entry, exons: entry.exons.map((exon:any) => ({...exon, sequence: entry.sequence.substring(exon.start-1,exon.end)}))}))
|
|
|
+ await fs.promises.writeFile('test/sub.json', JSON.stringify(transcripts, null, 4))
|
|
|
+
|
|
|
+ for (let index = 0; index < transcripts.length; index++) {
|
|
|
+ const transcript = transcripts[index]
|
|
|
+
|
|
|
+ transcripts[index].count = {
|
|
|
+ all : await openSam('test/bwa_mem_properly_on_human_NM.sorted.bam', transcript.accession, true),
|
|
|
+ splitters : await openSam('test/bwa_mem_splitters_on_human_NM.sam', transcript.accession, true),
|
|
|
+ discordants: await openSam('test/bwa_mem_discordants_on_human_NM.sam', transcript.accession, true),
|
|
|
+ }
|
|
|
+
|
|
|
+ const samJSON = await openSam([
|
|
|
+ 'test/bwa_mem_splitters_on_human_NM.sam',
|
|
|
+ 'test/bwa_mem_discordants_on_human_NM.sam'],
|
|
|
+ transcript.accession)
|
|
|
+
|
|
|
+ const byRead = {} as {[key:string]: number[]}
|
|
|
+ samJSON.map((entry:any) => ({
|
|
|
+ qname: entry.qname.split('_')[0],
|
|
|
+ pos: entry.pos,
|
|
|
+ exon: transcript.exons.flatMap((exon:any, i:any) => (exon.start <= entry.pos && exon.end >= entry.pos) ? i + 1 : [])[0]
|
|
|
+ }))
|
|
|
+ .map((entry:any) => {
|
|
|
+ if(typeof byRead[entry.qname] === 'undefined') byRead[entry.qname] = []
|
|
|
+ byRead[entry.qname] = [...new Set([...byRead[entry.qname], entry.exon])].sort((a,b) => a - b)
|
|
|
+ })
|
|
|
+
|
|
|
+ const byAltern = {} as {[key:string]: string[]}
|
|
|
+ Object.keys(byRead).map(qname => {
|
|
|
+ const bridges = byRead[qname].flatMap((e,i) => byRead[qname]
|
|
|
+ .flatMap((ee,ii) => i === ii || i >= ii ? []: e + '-' + ee))
|
|
|
+ for (const bridge of bridges) {
|
|
|
+ if(typeof byAltern[bridge] === 'undefined') byAltern[bridge] = []
|
|
|
+ byAltern[bridge].push(qname)
|
|
|
+ }
|
|
|
+ })
|
|
|
+
|
|
|
+ transcripts[index].altTranscripts = Object.keys(byAltern)
|
|
|
+ .map(bridge => ({bridge, reads: byAltern[bridge]}))
|
|
|
+ .sort((a,b) => b.reads.length - a.reads.length)
|
|
|
+
|
|
|
+ await fs.promises.writeFile('test/altTranscripts-' + transcript.accession + '.json', JSON.stringify(transcripts[index], null, 4))
|
|
|
+ }
|
|
|
+})()
|
|
|
+*/
|