| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245 |
- import { spawn } from 'child_process';
- import { getSymbol, getFromAcc } from "gbffparser"
- import { asyncBwaMem, makeReference } from 'aligner'
- import fs from 'fs'
- import os from 'os'
- import path from 'path';
- const async_exec = (prog: string, args: string[], onData: Function) => {
- return new Promise((resolve, reject) => {
- const child = spawn(prog, args, {shell: true})
- child.stdout.on('data', data => onData(data.toString().trim()))
- // child.stderr.on('data', data => console.log(data.toString().trim()))
- child.on('error', err => reject(err))
- child.on('exit', code => resolve(code))
- })
- }
- const openSam = async (
- filePaths: string | string[],
- restraintTo ?: string,
- count?: boolean
- ) => {
- let accum = ''
- let jsonLines: any[] = []
- filePaths = Array.isArray(filePaths) ? filePaths : [filePaths]
- for (let filePath of filePaths) {
- let args = ['view']
- if (filePath.match(/\.sam$/)) {
- if(restraintTo) {
- if(!fs.existsSync(filePath.split(/\.sam$/)[0] + '.bam')) {
- const tmpBam = path.join(os.tmpdir(), Math.random() + '.bam')
- await async_exec('sambamba', ['view', '-S', filePath, '-f', 'bam', '>', tmpBam], () => {})
- filePath = filePath.split(/\.sam$/)[0] + '.bam'
- await async_exec('sambamba', ['sort', tmpBam, '-o', filePath], () => {})
- await fs.promises.unlink(tmpBam)
- } else {
- filePath = filePath.split(/\.sam$/)[0] + '.bam'
- }
- } else {
- args.push('-S')
- }
- }
- if(!count) {
- args = [...args, '-f', 'json']
- } else {
- //if(restraintTo) {
- args.push('-c')
- //}
- }
- args.push(filePath)
- if(restraintTo) args.push(restraintTo)
- const threads = os.cpus().length - 2 > 0 ? os.cpus().length - 2 : 1
- args.push('-t')
- args.push(String(threads))
- console.log(['sambamba', ...args].join(' '));
-
- await async_exec('sambamba', args, (m: string) => {
- accum+=m
- accum = accum.replace('}{','}\n{')
- if(accum.match('\n')) {
- accum.split('\n').map((e,i,a) => {
- if (i !== a.length -1) {
- try {
- jsonLines.push(JSON.parse(e))
- } catch (error) {
- console.log(error);
- }
- } else {
- accum = e
- }
- })
- }
- })
- if(accum !== '') {
- accum = accum.replace('}{','}\n{')
- accum.split('\n').map((e,i,a) => {
- try {
- jsonLines.push(JSON.parse(e))
- } catch (error) {
- console.log(error);
- }
- })
- }
- }
- return jsonLines
- }
- const extractReads = (
- reads: string[],
- fastqPaths: string | string[],
- ) => {
- fastqPaths = Array.isArray(fastqPaths) ? fastqPaths : [fastqPaths]
- }
- const analysisTranscript = async (
- accession: string,
- properBam:string,
- splittersSam: string,
- disordantsSam: string,
- rnaDBPath: string[]
- ) => {
- const accessionWoVersion = accession.split(/\.[0-9]{1,3}/)[0]
- const accJson = await getFromAcc(accessionWoVersion, rnaDBPath)
- let json: any = {
- sequence : accJson?.sequence || '',
- version: accJson?.version,
- exons: accJson?.features.filter(entry => entry.type === 'exon').map((exon,i) => ({
- n: i + 1,
- start: exon.start,
- end: exon.end,
- strand: exon.strand,
- sequence: accJson.sequence.substring(exon.start-1,exon.end),
- counts: {},
- })) || [],
- counts: {},
- altTranscripts: {}
- }
- json.counts.all = await openSam(properBam, accession, true)
- json.counts.splitters = await openSam(splittersSam, accession, true)
- json.counts.discordants = await openSam(disordantsSam, accession, true)
- for (let index = 0; index < json.exons.length; index++) {
- const exon = json.exons[index];
- json.exons[index].counts.all = await openSam(properBam, accession + ':' + exon.start + '-' + exon.end, true)
- json.exons[index].counts.splitters = await openSam(splittersSam, accession + ':' + exon.start + '-' + exon.end, true)
- json.exons[index].counts.discordants = await openSam(disordantsSam, accession + ':' + exon.start + '-' + exon.end, true)
- }
- if(typeof json.exons !== 'undefined') {
- const samJSON = await openSam([
- 'test/bwa_mem_splitters_on_human_NM.sam',
- 'test/bwa_mem_discordants_on_human_NM.sam'],
- accession)
-
- const byRead = {} as {[key:string]: number[]}
- samJSON.map((entry:any) => ({
- qname: entry.qname.split('_')[0],
- pos: entry.pos,
- exon: json.exons.flatMap((exon:any, i:any) => (exon.start <= entry.pos && exon.end >= entry.pos) ? i + 1 : [])[0]
- }))
- .map((entry:any) => {
- if(typeof byRead[entry.qname] === 'undefined') byRead[entry.qname] = []
- byRead[entry.qname] = [...new Set([...byRead[entry.qname], entry.exon])].sort((a,b) => a - b)
- })
-
- const byAltern = {} as {[key:string]: string[]}
- Object.keys(byRead).map(qname => {
- const bridges = byRead[qname].flatMap((e,i) => byRead[qname]
- .flatMap((ee,ii) => i === ii || i >= ii ? []: e + '-' + ee))
- for (const bridge of bridges) {
- if(typeof byAltern[bridge] === 'undefined') byAltern[bridge] = []
- byAltern[bridge].push(qname)
- }
- })
-
- json.altTranscripts = Object.keys(byAltern)
- .map(bridge => ({bridge, reads: byAltern[bridge]}))
- .sort((a,b) => b.reads.length - a.reads.length)
- }
- return json
- }
- export { analysisTranscript, openSam }
- /*(async()=>{
- // await asyncBwaMem('/home/thomas/NGS/ref/ncbi/RNA/human_NM.fa',
- // ['/Turbine-pool/LAL-T_RNAseq/fastq_fastp/58_MAS/R1.fq.gz','/Turbine-pool/LAL-T_RNAseq/fastq_fastp/58_MAS/R2.fq.gz'],
- // 'TEST', 'TEST', 'test/', console.log)
-
- const symbol = 'NOTCH1'
- const LRGPath = '/home/thomas/NGS/ref/ncbi/LRG_RefSeqGene'
- const tablePath = '/home/thomas/NGS/ref/ncbi/GCF_000001405.39_GRCh38.p13_feature_table.txt'
- const rnaDBPath = [1,2,3,4,5,6,7,8,9,10].map(n => '/home/thomas/NGS/ref/ncbi/RNA/human.' + n + '.rna.gbff')
- const geneDBPath = [1,2,3,4,5,6,7].map(n => '/home/thomas/NGS/ref/ncbi/GENES/refseqgene.' + n + '.genomic.gbff')
-
- const geneInfo = await getSymbol(symbol, LRGPath, tablePath, geneDBPath, rnaDBPath)
- await fs.promises.writeFile('test/geneInfo.json', JSON.stringify(geneInfo.filter((entry:any) => entry.feature === 'mRNA'), null, 4))
- const transcripts = geneInfo.filter((entry:any) => entry.feature === 'mRNA')
- .map((entry:any) => ({...entry, sequence: entry.data.sequence, data:entry.data.features.filter((feature:any) => feature.type === 'exon')}))
- .map((entry:any) => ({
- accession: entry.product_accession,
- genomic_accession: entry.genomic_accession,
- start: entry.start,
- end: entry.end,
- sequence: entry.sequence,
- exons: [...entry.data.map((d:any) => ({start: d.start, end: d.end}))]
- }))
- .map((entry:any) => ({...entry, exons: entry.exons.map((exon:any) => ({...exon, sequence: entry.sequence.substring(exon.start-1,exon.end)}))}))
- await fs.promises.writeFile('test/sub.json', JSON.stringify(transcripts, null, 4))
-
- for (let index = 0; index < transcripts.length; index++) {
- const transcript = transcripts[index]
-
- transcripts[index].count = {
- all : await openSam('test/bwa_mem_properly_on_human_NM.sorted.bam', transcript.accession, true),
- splitters : await openSam('test/bwa_mem_splitters_on_human_NM.sam', transcript.accession, true),
- discordants: await openSam('test/bwa_mem_discordants_on_human_NM.sam', transcript.accession, true),
- }
- const samJSON = await openSam([
- 'test/bwa_mem_splitters_on_human_NM.sam',
- 'test/bwa_mem_discordants_on_human_NM.sam'],
- transcript.accession)
- const byRead = {} as {[key:string]: number[]}
- samJSON.map((entry:any) => ({
- qname: entry.qname.split('_')[0],
- pos: entry.pos,
- exon: transcript.exons.flatMap((exon:any, i:any) => (exon.start <= entry.pos && exon.end >= entry.pos) ? i + 1 : [])[0]
- }))
- .map((entry:any) => {
- if(typeof byRead[entry.qname] === 'undefined') byRead[entry.qname] = []
- byRead[entry.qname] = [...new Set([...byRead[entry.qname], entry.exon])].sort((a,b) => a - b)
- })
- const byAltern = {} as {[key:string]: string[]}
- Object.keys(byRead).map(qname => {
- const bridges = byRead[qname].flatMap((e,i) => byRead[qname]
- .flatMap((ee,ii) => i === ii || i >= ii ? []: e + '-' + ee))
- for (const bridge of bridges) {
- if(typeof byAltern[bridge] === 'undefined') byAltern[bridge] = []
- byAltern[bridge].push(qname)
- }
- })
- transcripts[index].altTranscripts = Object.keys(byAltern)
- .map(bridge => ({bridge, reads: byAltern[bridge]}))
- .sort((a,b) => b.reads.length - a.reads.length)
- await fs.promises.writeFile('test/altTranscripts-' + transcript.accession + '.json', JSON.stringify(transcripts[index], null, 4))
- }
- })()
- */
|