index.js 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. "use strict";
  2. var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
  3. function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
  4. return new (P || (P = Promise))(function (resolve, reject) {
  5. function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
  6. function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
  7. function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
  8. step((generator = generator.apply(thisArg, _arguments || [])).next());
  9. });
  10. };
  11. var __importDefault = (this && this.__importDefault) || function (mod) {
  12. return (mod && mod.__esModule) ? mod : { "default": mod };
  13. };
  14. Object.defineProperty(exports, "__esModule", { value: true });
  15. exports.selectFasta = exports.asyncReadSmallFasta = exports.asyncReadFasta = void 0;
  16. const child_process_1 = require("child_process");
  17. const fs_1 = __importDefault(require("fs"));
  18. const zlib = require('zlib');
  19. const readline = require('readline');
  20. const Papa = require('papaparse');
  21. const async_exec = (prog, args, onData) => {
  22. return new Promise((resolve, reject) => {
  23. const child = (0, child_process_1.spawn)(prog, args, { shell: true });
  24. child.stdout.on('data', data => onData(data.toString().trim()));
  25. child.stderr.on('data', data => onData(data.toString().trim()));
  26. child.on('error', err => reject(err));
  27. child.on('exit', code => resolve(code));
  28. });
  29. };
  30. // Read fasta/fa/fna
  31. const asyncReadSmallFasta = (path) => {
  32. return new Promise((resolve, reject) => {
  33. const rs = fs_1.default.createReadStream(path);
  34. const rl = readline.createInterface({
  35. input: rs, crlfDelay: Infinity
  36. });
  37. rs.once('error', err => reject(err));
  38. let tmpObj = {};
  39. let results = [];
  40. rl.on('line', (line) => {
  41. if (line.match(/>/g)) {
  42. if (tmpObj === null || tmpObj === void 0 ? void 0 : tmpObj.name)
  43. results.push(tmpObj);
  44. tmpObj = { name: line, sequence: '' };
  45. }
  46. else {
  47. tmpObj.sequence += line;
  48. }
  49. });
  50. rl.on('close', (_) => {
  51. results.push(tmpObj);
  52. resolve(results);
  53. });
  54. });
  55. };
  56. exports.asyncReadSmallFasta = asyncReadSmallFasta;
  57. // https://www.biostars.org/p/98885/
  58. const async_save_fai = (fasta_path) => {
  59. console.log('Creating new fai from : ', fasta_path);
  60. return new Promise((resolve, reject) => {
  61. const child = (0, child_process_1.spawn)('samtools', ['faidx', fasta_path], {
  62. shell: true,
  63. });
  64. let result = '';
  65. child.stdout.on('data', (data) => {
  66. result += data.toString();
  67. });
  68. child.on('error', (err) => {
  69. reject(err.toString('utf8'));
  70. });
  71. child.on('exit', () => {
  72. resolve(result);
  73. });
  74. });
  75. };
  76. const async_paparse = (path, opt) => {
  77. return new Promise((resolve, reject) => {
  78. const output = [];
  79. const parseStream = Papa.parse(Papa.NODE_STREAM_INPUT, opt);
  80. let rs = fs_1.default.createReadStream(path);
  81. if ((new RegExp(/\.gz$/)).test(path)) {
  82. rs = rs.pipe(zlib.createGunzip());
  83. }
  84. rs.pipe(parseStream)
  85. .on('error', (error) => reject(error))
  86. .on('data', (row) => { output.push(row); })
  87. .on('finish', () => resolve(output));
  88. });
  89. };
  90. const async_read_bytes = (path, start, length) => {
  91. length = length - 1;
  92. return new Promise((resolve, reject) => {
  93. let data = Buffer.alloc(0);
  94. fs_1.default.createReadStream(path, { start: start, end: start + length })
  95. .on('error', error => reject(error))
  96. .on('data', (d) => {
  97. data = Buffer.concat([data, d]);
  98. })
  99. .on('end', () => resolve(data.toString()));
  100. });
  101. };
  102. // Create fai if not present return sequences names and if provided return : obj {name, sequence}
  103. const asyncReadFasta = (path, sequences) => {
  104. sequences = Array.isArray(sequences) ? sequences : [sequences];
  105. return new Promise((resolve, reject) => __awaiter(void 0, void 0, void 0, function* () {
  106. const path_fai = (new RegExp('fai$')).test(path) ? path : path + '.fai';
  107. if (!fs_1.default.existsSync(path_fai)) {
  108. try {
  109. yield async_save_fai(path);
  110. }
  111. catch (e) {
  112. reject(e);
  113. }
  114. }
  115. try {
  116. const fai = yield async_paparse(path_fai, { header: false, dynamicTyping: true });
  117. if (sequences.length === 0) {
  118. if (Array.isArray(fai))
  119. resolve(fai.map((it) => it[0]));
  120. }
  121. else {
  122. let results = [];
  123. for (let index = 0; index < sequences.length; index++) {
  124. const seqName = sequences[index];
  125. const tmpFai = Array.isArray(fai) ? fai.filter((it) => it[0] === seqName)[0] : [];
  126. const sequence = yield async_read_bytes(path, tmpFai[2], (Math.trunc(tmpFai[1] / tmpFai[3]) * tmpFai[4]) + (tmpFai[1] % tmpFai[3])); // fu, but better use samtools faidx !
  127. results.push({ name: tmpFai[0], sequence });
  128. }
  129. resolve(results);
  130. }
  131. }
  132. catch (e) {
  133. reject(e);
  134. }
  135. }));
  136. };
  137. exports.asyncReadFasta = asyncReadFasta;
  138. const selectFasta = (path, name, out) => {
  139. return new Promise((resolve, reject) => __awaiter(void 0, void 0, void 0, function* () {
  140. if (!Array.isArray(name))
  141. name = [name];
  142. yield async_exec('samtools', ['faidx', path, ...name, '>', out], console.log);
  143. }));
  144. };
  145. exports.selectFasta = selectFasta;