import { spawn } from 'child_process'; import { getSymbol, getFromAcc } from "gbffparser" import { asyncBwaMem, makeReference } from 'aligner' import fs from 'fs' import os from 'os' import path from 'path'; const async_exec = (prog: string, args: string[], onData: Function) => { return new Promise((resolve, reject) => { const child = spawn(prog, args, {shell: true}) child.stdout.on('data', data => onData(data.toString().trim())) // child.stderr.on('data', data => console.log(data.toString().trim())) child.on('error', err => reject(err)) child.on('exit', code => resolve(code)) }) } const openSam = async ( filePaths: string | string[], restraintTo ?: string, count?: boolean ) => { let accum = '' let jsonLines: any[] = [] filePaths = Array.isArray(filePaths) ? filePaths : [filePaths] for (let filePath of filePaths) { let args = ['view'] if (filePath.match(/\.sam$/)) { if(restraintTo) { if(!fs.existsSync(filePath.split(/\.sam$/)[0] + '.bam')) { const tmpBam = path.join(os.tmpdir(), Math.random() + '.bam') await async_exec('sambamba', ['view', '-S', filePath, '-f', 'bam', '>', tmpBam], () => {}) filePath = filePath.split(/\.sam$/)[0] + '.bam' await async_exec('sambamba', ['sort', tmpBam, '-o', filePath], () => {}) await fs.promises.unlink(tmpBam) } else { filePath = filePath.split(/\.sam$/)[0] + '.bam' } } else { args.push('-S') } } if(!count) { args = [...args, '-f', 'json'] } else { //if(restraintTo) { args.push('-c') //} } args.push(filePath) if(restraintTo) args.push(restraintTo) const threads = os.cpus().length - 2 > 0 ? os.cpus().length - 2 : 1 args.push('-t') args.push(String(threads)) console.log(['sambamba', ...args].join(' ')); await async_exec('sambamba', args, (m: string) => { accum+=m accum = accum.replace('}{','}\n{') if(accum.match('\n')) { accum.split('\n').map((e,i,a) => { if (i !== a.length -1) { try { jsonLines.push(JSON.parse(e)) } catch (error) { console.log(error); } } else { accum = e } }) } }) if(accum !== '') { accum = accum.replace('}{','}\n{') accum.split('\n').map((e,i,a) => { try { jsonLines.push(JSON.parse(e)) } catch (error) { console.log(error); } }) } } return jsonLines } const extractReads = ( reads: string[], fastqPaths: string | string[], ) => { fastqPaths = Array.isArray(fastqPaths) ? fastqPaths : [fastqPaths] } const analysisTranscript = async ( accession: string, properBam:string, splittersSam: string, disordantsSam: string, rnaDBPath: string[] ) => { const accessionWoVersion = accession.split(/\.[0-9]{1,3}/)[0] const accJson = await getFromAcc(accessionWoVersion, rnaDBPath) let json: any = { sequence : accJson?.sequence || '', version: accJson?.version, exons: accJson?.features.filter(entry => entry.type === 'exon').map((exon,i) => ({ n: i + 1, start: exon.start, end: exon.end, strand: exon.strand, sequence: accJson.sequence.substring(exon.start-1,exon.end), counts: {}, })) || [], counts: {}, altTranscripts: {} } json.counts.all = await openSam(properBam, accession, true) json.counts.splitters = await openSam(splittersSam, accession, true) json.counts.discordants = await openSam(disordantsSam, accession, true) for (let index = 0; index < json.exons.length; index++) { const exon = json.exons[index]; json.exons[index].counts.all = await openSam(properBam, accession + ':' + exon.start + '-' + exon.end, true) json.exons[index].counts.splitters = await openSam(splittersSam, accession + ':' + exon.start + '-' + exon.end, true) json.exons[index].counts.discordants = await openSam(disordantsSam, accession + ':' + exon.start + '-' + exon.end, true) } if(typeof json.exons !== 'undefined') { const samJSON = await openSam([ 'test/bwa_mem_splitters_on_human_NM.sam', 'test/bwa_mem_discordants_on_human_NM.sam'], accession) const byRead = {} as {[key:string]: number[]} samJSON.map((entry:any) => ({ qname: entry.qname.split('_')[0], pos: entry.pos, exon: json.exons.flatMap((exon:any, i:any) => (exon.start <= entry.pos && exon.end >= entry.pos) ? i + 1 : [])[0] })) .map((entry:any) => { if(typeof byRead[entry.qname] === 'undefined') byRead[entry.qname] = [] byRead[entry.qname] = [...new Set([...byRead[entry.qname], entry.exon])].sort((a,b) => a - b) }) const byAltern = {} as {[key:string]: string[]} Object.keys(byRead).map(qname => { const bridges = byRead[qname].flatMap((e,i) => byRead[qname] .flatMap((ee,ii) => i === ii || i >= ii ? []: e + '-' + ee)) for (const bridge of bridges) { if(typeof byAltern[bridge] === 'undefined') byAltern[bridge] = [] byAltern[bridge].push(qname) } }) json.altTranscripts = Object.keys(byAltern) .map(bridge => ({bridge, reads: byAltern[bridge]})) .sort((a,b) => b.reads.length - a.reads.length) } return json } export { analysisTranscript, openSam } /*(async()=>{ // await asyncBwaMem('/home/thomas/NGS/ref/ncbi/RNA/human_NM.fa', // ['/Turbine-pool/LAL-T_RNAseq/fastq_fastp/58_MAS/R1.fq.gz','/Turbine-pool/LAL-T_RNAseq/fastq_fastp/58_MAS/R2.fq.gz'], // 'TEST', 'TEST', 'test/', console.log) const symbol = 'NOTCH1' const LRGPath = '/home/thomas/NGS/ref/ncbi/LRG_RefSeqGene' const tablePath = '/home/thomas/NGS/ref/ncbi/GCF_000001405.39_GRCh38.p13_feature_table.txt' const rnaDBPath = [1,2,3,4,5,6,7,8,9,10].map(n => '/home/thomas/NGS/ref/ncbi/RNA/human.' + n + '.rna.gbff') const geneDBPath = [1,2,3,4,5,6,7].map(n => '/home/thomas/NGS/ref/ncbi/GENES/refseqgene.' + n + '.genomic.gbff') const geneInfo = await getSymbol(symbol, LRGPath, tablePath, geneDBPath, rnaDBPath) await fs.promises.writeFile('test/geneInfo.json', JSON.stringify(geneInfo.filter((entry:any) => entry.feature === 'mRNA'), null, 4)) const transcripts = geneInfo.filter((entry:any) => entry.feature === 'mRNA') .map((entry:any) => ({...entry, sequence: entry.data.sequence, data:entry.data.features.filter((feature:any) => feature.type === 'exon')})) .map((entry:any) => ({ accession: entry.product_accession, genomic_accession: entry.genomic_accession, start: entry.start, end: entry.end, sequence: entry.sequence, exons: [...entry.data.map((d:any) => ({start: d.start, end: d.end}))] })) .map((entry:any) => ({...entry, exons: entry.exons.map((exon:any) => ({...exon, sequence: entry.sequence.substring(exon.start-1,exon.end)}))})) await fs.promises.writeFile('test/sub.json', JSON.stringify(transcripts, null, 4)) for (let index = 0; index < transcripts.length; index++) { const transcript = transcripts[index] transcripts[index].count = { all : await openSam('test/bwa_mem_properly_on_human_NM.sorted.bam', transcript.accession, true), splitters : await openSam('test/bwa_mem_splitters_on_human_NM.sam', transcript.accession, true), discordants: await openSam('test/bwa_mem_discordants_on_human_NM.sam', transcript.accession, true), } const samJSON = await openSam([ 'test/bwa_mem_splitters_on_human_NM.sam', 'test/bwa_mem_discordants_on_human_NM.sam'], transcript.accession) const byRead = {} as {[key:string]: number[]} samJSON.map((entry:any) => ({ qname: entry.qname.split('_')[0], pos: entry.pos, exon: transcript.exons.flatMap((exon:any, i:any) => (exon.start <= entry.pos && exon.end >= entry.pos) ? i + 1 : [])[0] })) .map((entry:any) => { if(typeof byRead[entry.qname] === 'undefined') byRead[entry.qname] = [] byRead[entry.qname] = [...new Set([...byRead[entry.qname], entry.exon])].sort((a,b) => a - b) }) const byAltern = {} as {[key:string]: string[]} Object.keys(byRead).map(qname => { const bridges = byRead[qname].flatMap((e,i) => byRead[qname] .flatMap((ee,ii) => i === ii || i >= ii ? []: e + '-' + ee)) for (const bridge of bridges) { if(typeof byAltern[bridge] === 'undefined') byAltern[bridge] = [] byAltern[bridge].push(qname) } }) transcripts[index].altTranscripts = Object.keys(byAltern) .map(bridge => ({bridge, reads: byAltern[bridge]})) .sort((a,b) => b.reads.length - a.reads.length) await fs.promises.writeFile('test/altTranscripts-' + transcript.accession + '.json', JSON.stringify(transcripts[index], null, 4)) } })() */