index.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. "use strict";
  2. var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
  3. function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
  4. return new (P || (P = Promise))(function (resolve, reject) {
  5. function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
  6. function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
  7. function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
  8. step((generator = generator.apply(thisArg, _arguments || [])).next());
  9. });
  10. };
  11. var __importDefault = (this && this.__importDefault) || function (mod) {
  12. return (mod && mod.__esModule) ? mod : { "default": mod };
  13. };
  14. Object.defineProperty(exports, "__esModule", { value: true });
  15. exports.openSam = exports.analysisTranscript = void 0;
  16. const child_process_1 = require("child_process");
  17. const gbffparser_1 = require("gbffparser");
  18. const fs_1 = __importDefault(require("fs"));
  19. const os_1 = __importDefault(require("os"));
  20. const path_1 = __importDefault(require("path"));
  21. const async_exec = (prog, args, onData) => {
  22. return new Promise((resolve, reject) => {
  23. const child = (0, child_process_1.spawn)(prog, args, { shell: true });
  24. child.stdout.on('data', data => onData(data.toString().trim()));
  25. // child.stderr.on('data', data => console.log(data.toString().trim()))
  26. child.on('error', err => reject(err));
  27. child.on('exit', code => resolve(code));
  28. });
  29. };
  30. const openSam = (filePaths, restraintTo, count) => __awaiter(void 0, void 0, void 0, function* () {
  31. let accum = '';
  32. let jsonLines = [];
  33. filePaths = Array.isArray(filePaths) ? filePaths : [filePaths];
  34. for (let filePath of filePaths) {
  35. let args = ['view'];
  36. if (filePath.match(/\.sam$/)) {
  37. if (restraintTo) {
  38. if (!fs_1.default.existsSync(filePath.split(/\.sam$/)[0] + '.bam')) {
  39. const tmpBam = path_1.default.join(os_1.default.tmpdir(), Math.random() + '.bam');
  40. yield async_exec('sambamba', ['view', '-S', filePath, '-f', 'bam', '>', tmpBam], () => { });
  41. filePath = filePath.split(/\.sam$/)[0] + '.bam';
  42. yield async_exec('sambamba', ['sort', tmpBam, '-o', filePath], () => { });
  43. yield fs_1.default.promises.unlink(tmpBam);
  44. }
  45. else {
  46. filePath = filePath.split(/\.sam$/)[0] + '.bam';
  47. }
  48. }
  49. else {
  50. args.push('-S');
  51. }
  52. }
  53. if (!count) {
  54. args = [...args, '-f', 'json'];
  55. }
  56. else {
  57. //if(restraintTo) {
  58. args.push('-c');
  59. //}
  60. }
  61. args.push(filePath);
  62. if (restraintTo)
  63. args.push(restraintTo);
  64. const threads = os_1.default.cpus().length - 2 > 0 ? os_1.default.cpus().length - 2 : 1;
  65. args.push('-t');
  66. args.push(String(threads));
  67. console.log(['sambamba', ...args].join(' '));
  68. yield async_exec('sambamba', args, (m) => {
  69. accum += m;
  70. accum = accum.replace('}{', '}\n{');
  71. if (accum.match('\n')) {
  72. accum.split('\n').map((e, i, a) => {
  73. if (i !== a.length - 1) {
  74. try {
  75. jsonLines.push(JSON.parse(e));
  76. }
  77. catch (error) {
  78. console.log(error);
  79. }
  80. }
  81. else {
  82. accum = e;
  83. }
  84. });
  85. }
  86. });
  87. if (accum !== '') {
  88. accum = accum.replace('}{', '}\n{');
  89. accum.split('\n').map((e, i, a) => {
  90. try {
  91. jsonLines.push(JSON.parse(e));
  92. }
  93. catch (error) {
  94. console.log(error);
  95. }
  96. });
  97. }
  98. }
  99. return jsonLines;
  100. });
  101. exports.openSam = openSam;
  102. const extractReads = (reads, fastqPaths) => {
  103. fastqPaths = Array.isArray(fastqPaths) ? fastqPaths : [fastqPaths];
  104. };
  105. const analysisTranscript = (accession, properBam, splittersSam, disordantsSam, rnaDBPath) => __awaiter(void 0, void 0, void 0, function* () {
  106. const accessionWoVersion = accession.split(/\.[0-9]{1,3}/)[0];
  107. const accJson = yield (0, gbffparser_1.getFromAcc)(accessionWoVersion, rnaDBPath);
  108. let json = {
  109. sequence: (accJson === null || accJson === void 0 ? void 0 : accJson.sequence) || '',
  110. version: accJson === null || accJson === void 0 ? void 0 : accJson.version,
  111. exons: (accJson === null || accJson === void 0 ? void 0 : accJson.features.filter(entry => entry.type === 'exon').map((exon, i) => ({
  112. n: i + 1,
  113. start: exon.start,
  114. end: exon.end,
  115. strand: exon.strand,
  116. sequence: accJson.sequence.substring(exon.start - 1, exon.end),
  117. counts: {},
  118. }))) || [],
  119. counts: {},
  120. altTranscripts: {}
  121. };
  122. json.counts.all = yield openSam(properBam, accession, true);
  123. json.counts.splitters = yield openSam(splittersSam, accession, true);
  124. json.counts.discordants = yield openSam(disordantsSam, accession, true);
  125. for (let index = 0; index < json.exons.length; index++) {
  126. const exon = json.exons[index];
  127. json.exons[index].counts.all = yield openSam(properBam, accession + ':' + exon.start + '-' + exon.end, true);
  128. json.exons[index].counts.splitters = yield openSam(splittersSam, accession + ':' + exon.start + '-' + exon.end, true);
  129. json.exons[index].counts.discordants = yield openSam(disordantsSam, accession + ':' + exon.start + '-' + exon.end, true);
  130. }
  131. if (typeof json.exons !== 'undefined') {
  132. const samJSON = yield openSam([
  133. 'test/bwa_mem_splitters_on_human_NM.sam',
  134. 'test/bwa_mem_discordants_on_human_NM.sam'
  135. ], accession);
  136. const byRead = {};
  137. samJSON.map((entry) => ({
  138. qname: entry.qname.split('_')[0],
  139. pos: entry.pos,
  140. exon: json.exons.flatMap((exon, i) => (exon.start <= entry.pos && exon.end >= entry.pos) ? i + 1 : [])[0]
  141. }))
  142. .map((entry) => {
  143. if (typeof byRead[entry.qname] === 'undefined')
  144. byRead[entry.qname] = [];
  145. byRead[entry.qname] = [...new Set([...byRead[entry.qname], entry.exon])].sort((a, b) => a - b);
  146. });
  147. const byAltern = {};
  148. Object.keys(byRead).map(qname => {
  149. const bridges = byRead[qname].flatMap((e, i) => byRead[qname]
  150. .flatMap((ee, ii) => i === ii || i >= ii ? [] : e + '-' + ee));
  151. for (const bridge of bridges) {
  152. if (typeof byAltern[bridge] === 'undefined')
  153. byAltern[bridge] = [];
  154. byAltern[bridge].push(qname);
  155. }
  156. });
  157. json.altTranscripts = Object.keys(byAltern)
  158. .map(bridge => ({ bridge, reads: byAltern[bridge] }))
  159. .sort((a, b) => b.reads.length - a.reads.length);
  160. }
  161. return json;
  162. });
  163. exports.analysisTranscript = analysisTranscript;
  164. /*(async()=>{
  165. // await asyncBwaMem('/home/thomas/NGS/ref/ncbi/RNA/human_NM.fa',
  166. // ['/Turbine-pool/LAL-T_RNAseq/fastq_fastp/58_MAS/R1.fq.gz','/Turbine-pool/LAL-T_RNAseq/fastq_fastp/58_MAS/R2.fq.gz'],
  167. // 'TEST', 'TEST', 'test/', console.log)
  168. const symbol = 'NOTCH1'
  169. const LRGPath = '/home/thomas/NGS/ref/ncbi/LRG_RefSeqGene'
  170. const tablePath = '/home/thomas/NGS/ref/ncbi/GCF_000001405.39_GRCh38.p13_feature_table.txt'
  171. const rnaDBPath = [1,2,3,4,5,6,7,8,9,10].map(n => '/home/thomas/NGS/ref/ncbi/RNA/human.' + n + '.rna.gbff')
  172. const geneDBPath = [1,2,3,4,5,6,7].map(n => '/home/thomas/NGS/ref/ncbi/GENES/refseqgene.' + n + '.genomic.gbff')
  173. const geneInfo = await getSymbol(symbol, LRGPath, tablePath, geneDBPath, rnaDBPath)
  174. await fs.promises.writeFile('test/geneInfo.json', JSON.stringify(geneInfo.filter((entry:any) => entry.feature === 'mRNA'), null, 4))
  175. const transcripts = geneInfo.filter((entry:any) => entry.feature === 'mRNA')
  176. .map((entry:any) => ({...entry, sequence: entry.data.sequence, data:entry.data.features.filter((feature:any) => feature.type === 'exon')}))
  177. .map((entry:any) => ({
  178. accession: entry.product_accession,
  179. genomic_accession: entry.genomic_accession,
  180. start: entry.start,
  181. end: entry.end,
  182. sequence: entry.sequence,
  183. exons: [...entry.data.map((d:any) => ({start: d.start, end: d.end}))]
  184. }))
  185. .map((entry:any) => ({...entry, exons: entry.exons.map((exon:any) => ({...exon, sequence: entry.sequence.substring(exon.start-1,exon.end)}))}))
  186. await fs.promises.writeFile('test/sub.json', JSON.stringify(transcripts, null, 4))
  187. for (let index = 0; index < transcripts.length; index++) {
  188. const transcript = transcripts[index]
  189. transcripts[index].count = {
  190. all : await openSam('test/bwa_mem_properly_on_human_NM.sorted.bam', transcript.accession, true),
  191. splitters : await openSam('test/bwa_mem_splitters_on_human_NM.sam', transcript.accession, true),
  192. discordants: await openSam('test/bwa_mem_discordants_on_human_NM.sam', transcript.accession, true),
  193. }
  194. const samJSON = await openSam([
  195. 'test/bwa_mem_splitters_on_human_NM.sam',
  196. 'test/bwa_mem_discordants_on_human_NM.sam'],
  197. transcript.accession)
  198. const byRead = {} as {[key:string]: number[]}
  199. samJSON.map((entry:any) => ({
  200. qname: entry.qname.split('_')[0],
  201. pos: entry.pos,
  202. exon: transcript.exons.flatMap((exon:any, i:any) => (exon.start <= entry.pos && exon.end >= entry.pos) ? i + 1 : [])[0]
  203. }))
  204. .map((entry:any) => {
  205. if(typeof byRead[entry.qname] === 'undefined') byRead[entry.qname] = []
  206. byRead[entry.qname] = [...new Set([...byRead[entry.qname], entry.exon])].sort((a,b) => a - b)
  207. })
  208. const byAltern = {} as {[key:string]: string[]}
  209. Object.keys(byRead).map(qname => {
  210. const bridges = byRead[qname].flatMap((e,i) => byRead[qname]
  211. .flatMap((ee,ii) => i === ii || i >= ii ? []: e + '-' + ee))
  212. for (const bridge of bridges) {
  213. if(typeof byAltern[bridge] === 'undefined') byAltern[bridge] = []
  214. byAltern[bridge].push(qname)
  215. }
  216. })
  217. transcripts[index].altTranscripts = Object.keys(byAltern)
  218. .map(bridge => ({bridge, reads: byAltern[bridge]}))
  219. .sort((a,b) => b.reads.length - a.reads.length)
  220. await fs.promises.writeFile('test/altTranscripts-' + transcript.accession + '.json', JSON.stringify(transcripts[index], null, 4))
  221. }
  222. })()
  223. */