| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145 |
- "use strict";
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
- return new (P || (P = Promise))(function (resolve, reject) {
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
- step((generator = generator.apply(thisArg, _arguments || [])).next());
- });
- };
- var __importDefault = (this && this.__importDefault) || function (mod) {
- return (mod && mod.__esModule) ? mod : { "default": mod };
- };
- Object.defineProperty(exports, "__esModule", { value: true });
- exports.selectFasta = exports.asyncReadSmallFasta = exports.asyncReadFasta = void 0;
- const child_process_1 = require("child_process");
- const fs_1 = __importDefault(require("fs"));
- const zlib = require('zlib');
- const readline = require('readline');
- const Papa = require('papaparse');
- const async_exec = (prog, args, onData) => {
- return new Promise((resolve, reject) => {
- const child = (0, child_process_1.spawn)(prog, args, { shell: true });
- child.stdout.on('data', data => onData(data.toString().trim()));
- child.stderr.on('data', data => onData(data.toString().trim()));
- child.on('error', err => reject(err));
- child.on('exit', code => resolve(code));
- });
- };
- // Read fasta/fa/fna
- const asyncReadSmallFasta = (path) => {
- return new Promise((resolve, reject) => {
- const rs = fs_1.default.createReadStream(path);
- const rl = readline.createInterface({
- input: rs, crlfDelay: Infinity
- });
- rs.once('error', err => reject(err));
- let tmpObj = {};
- let results = [];
- rl.on('line', (line) => {
- if (line.match(/>/g)) {
- if (tmpObj === null || tmpObj === void 0 ? void 0 : tmpObj.name)
- results.push(tmpObj);
- tmpObj = { name: line, sequence: '' };
- }
- else {
- tmpObj.sequence += line;
- }
- });
- rl.on('close', (_) => {
- results.push(tmpObj);
- resolve(results);
- });
- });
- };
- exports.asyncReadSmallFasta = asyncReadSmallFasta;
- // https://www.biostars.org/p/98885/
- const async_save_fai = (fasta_path) => {
- console.log('Creating new fai from : ', fasta_path);
- return new Promise((resolve, reject) => {
- const child = (0, child_process_1.spawn)('samtools', ['faidx', fasta_path], {
- shell: true,
- });
- let result = '';
- child.stdout.on('data', (data) => {
- result += data.toString();
- });
- child.on('error', (err) => {
- reject(err.toString('utf8'));
- });
- child.on('exit', () => {
- resolve(result);
- });
- });
- };
- const async_paparse = (path, opt) => {
- return new Promise((resolve, reject) => {
- const output = [];
- const parseStream = Papa.parse(Papa.NODE_STREAM_INPUT, opt);
- let rs = fs_1.default.createReadStream(path);
- if ((new RegExp(/\.gz$/)).test(path)) {
- rs = rs.pipe(zlib.createGunzip());
- }
- rs.pipe(parseStream)
- .on('error', (error) => reject(error))
- .on('data', (row) => { output.push(row); })
- .on('finish', () => resolve(output));
- });
- };
- const async_read_bytes = (path, start, length) => {
- length = length - 1;
- return new Promise((resolve, reject) => {
- let data = Buffer.alloc(0);
- fs_1.default.createReadStream(path, { start: start, end: start + length })
- .on('error', error => reject(error))
- .on('data', (d) => {
- data = Buffer.concat([data, d]);
- })
- .on('end', () => resolve(data.toString()));
- });
- };
- // Create fai if not present return sequences names and if provided return : obj {name, sequence}
- const asyncReadFasta = (path, sequences) => {
- sequences = Array.isArray(sequences) ? sequences : [sequences];
- return new Promise((resolve, reject) => __awaiter(void 0, void 0, void 0, function* () {
- const path_fai = (new RegExp('fai$')).test(path) ? path : path + '.fai';
- if (!fs_1.default.existsSync(path_fai)) {
- try {
- yield async_save_fai(path);
- }
- catch (e) {
- reject(e);
- }
- }
- try {
- const fai = yield async_paparse(path_fai, { header: false, dynamicTyping: true });
- if (sequences.length === 0) {
- if (Array.isArray(fai))
- resolve(fai.map((it) => it[0]));
- }
- else {
- let results = [];
- for (let index = 0; index < sequences.length; index++) {
- const seqName = sequences[index];
- const tmpFai = Array.isArray(fai) ? fai.filter((it) => it[0] === seqName)[0] : [];
- const sequence = yield async_read_bytes(path, tmpFai[2], (Math.trunc(tmpFai[1] / tmpFai[3]) * tmpFai[4]) + (tmpFai[1] % tmpFai[3])); // fu, but better use samtools faidx !
- results.push({ name: tmpFai[0], sequence });
- }
- resolve(results);
- }
- }
- catch (e) {
- reject(e);
- }
- }));
- };
- exports.asyncReadFasta = asyncReadFasta;
- const selectFasta = (path, name, out) => {
- return new Promise((resolve, reject) => __awaiter(void 0, void 0, void 0, function* () {
- if (!Array.isArray(name))
- name = [name];
- yield async_exec('samtools', ['faidx', path, ...name, '>', out], console.log);
- }));
- };
- exports.selectFasta = selectFasta;
|