Răsfoiți Sursa

first commit

Thomas 3 ani în urmă
comite
6e7ccb1b1e
7 a modificat fișierele cu 390 adăugiri și 0 ștergeri
  1. 0 0
      README.md
  2. 96 0
      index.js
  3. 111 0
      index.ts
  4. 51 0
      package-lock.json
  5. 18 0
      package.json
  6. 101 0
      tsconfig.json
  7. 13 0
      yarn.lock

+ 0 - 0
README.md


+ 96 - 0
index.js

@@ -0,0 +1,96 @@
+"use strict";
+var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
+    function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
+    return new (P || (P = Promise))(function (resolve, reject) {
+        function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
+        function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
+        function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
+        step((generator = generator.apply(thisArg, _arguments || [])).next());
+    });
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+const child_process_1 = require("child_process");
+/* (c) Thomas Steimlé 2022
+ * cat bwa_mem_splitters_on_HG38_Viral.sam | awk '$0~/^@/{next}{lxa=split($0,xa,"XA:Z:"); print $1"\t"$3"\t"$4; if(lxa>1){split(xa[2],xap,","); print $1"\t"xap[1]"\t"substr(xap[2],2)"\tXA"}}' | more
+ * require os : cat, awk, sort, uniq
+ *
+ */
+const async_exec = (prog, args, onData, onErr) => {
+    return new Promise((resolve, reject) => {
+        const child = (0, child_process_1.spawn)(prog, args, { shell: true });
+        child.stdout.on('data', data => onData(data /*.toString().trim()*/));
+        child.stderr.on('data', data => onErr(data.toString().trim()));
+        child.on('error', err => reject(err));
+        child.on('exit', code => resolve(code));
+    });
+};
+const clusterSam = (input_sam, threshold, minReads) => {
+    return new Promise((resolve, _reject) => __awaiter(void 0, void 0, void 0, function* () {
+        let inputSam = Array.isArray(input_sam) ? input_sam.join(' ') : input_sam;
+        let lineAcc = '';
+        let byContigs = {};
+        yield async_exec('cat', [
+            inputSam,
+            '|',
+            'awk', '\'$0~/^@/{next}{lxa=split($0,xa,"XA:Z:"); print $1"\t"$3"\t"$4; if(lxa>1){split(xa[2],xap,","); print $1"\t"xap[1]"\t"substr(xap[2],2)"\tXA"}}\'',
+            '|',
+            'sort',
+            '|',
+            'uniq'
+        ], (m) => {
+            let tmpSeq = (lineAcc + m).split(/\n/);
+            lineAcc = tmpSeq.pop(); // 'uck typescript
+            tmpSeq.map(e => {
+                let tmpName = '';
+                let tmpPos = { rname: '', position: 0 };
+                e.split(/\t/).map((el, i) => {
+                    switch (i) {
+                        case 0:
+                            tmpPos['rname'] = el;
+                            break;
+                        case 1:
+                            tmpName = el;
+                            break;
+                        case 2:
+                            tmpPos['position'] = Number(el);
+                            break;
+                        default:
+                            break;
+                    }
+                });
+                if (Array.isArray(byContigs[tmpName])) {
+                    byContigs[tmpName].push(tmpPos);
+                }
+                else {
+                    byContigs[tmpName] = [tmpPos];
+                }
+            });
+        }, console.log);
+        let byReads = {};
+        Object
+            .keys(byContigs)
+            .map(name => {
+            let cluster = 0;
+            byContigs[name]
+                .sort((a, b) => a.position - b.position)
+                .map((e, i, a) => {
+                var _a;
+                cluster = Math.abs(e.position - ((_a = a[i - 1]) === null || _a === void 0 ? void 0 : _a.position)) > threshold ? cluster + 1 : cluster;
+                const clutserName = cluster + '@' + name;
+                byReads[e.rname] = Array.isArray(byReads[e.rname]) ? [...new Set([...byReads[e.rname], clutserName])] : [clutserName];
+            });
+        });
+        let byClusters = {};
+        Object.keys(byReads).map(rname => {
+            const tmpClusterName = byReads[rname].sort().join('+++');
+            byClusters[tmpClusterName] = Array.isArray(byClusters[tmpClusterName]) ? [...new Set([...byClusters[tmpClusterName], rname])] : [rname];
+        });
+        Object.keys(byClusters).map(e => byClusters[e].length < minReads ? delete byClusters[e] : null);
+        resolve((Object.keys(byClusters).map(clusterName => ({ clusterName, rname: byClusters[clusterName] })).sort((a, b) => b.rname.length - a.rname.length)));
+    }));
+};
+/*
+(async () => {
+    console.log(await clusterSam('/home/thomas/Documents/Programmes/ttest/bwa_mem_splitters_on_HG38_Viral.sam', 333, 55));
+})()
+*/ 

+ 111 - 0
index.ts

@@ -0,0 +1,111 @@
+import { spawn } from 'child_process';
+
+/* (c) Thomas Steimlé 2022 
+ * cat bwa_mem_splitters_on_HG38_Viral.sam | awk '$0~/^@/{next}{lxa=split($0,xa,"XA:Z:"); print $1"\t"$3"\t"$4; if(lxa>1){split(xa[2],xap,","); print $1"\t"xap[1]"\t"substr(xap[2],2)"\tXA"}}' | more
+ * require os : cat, awk, sort, uniq
+ *
+ */ 
+const async_exec = (prog: string, args: string[], onData: Function, onErr: Function) => {
+    return new Promise((resolve, reject) => {
+        const child = spawn(prog, args, {shell: true})
+
+        child.stdout.on('data', data => onData(data/*.toString().trim()*/))
+        child.stderr.on('data', data => onErr(data.toString().trim()))
+
+        child.on('error', err => reject(err))
+        child.on('exit', code => resolve(code))
+    })
+}
+
+const clusterSam = (
+    input_sam: string | Array<string>,
+    threshold: number,
+    minReads : number
+) => {
+    return new Promise<any>( async (resolve, _reject) => {
+        let inputSam: string = Array.isArray(input_sam) ? input_sam.join(' ') : input_sam
+
+        let lineAcc: string = ''
+        interface position {
+            rname: string;
+            position: number;
+        }
+
+        interface byContigs {
+            [key: string]: position[]
+        }
+
+        let byContigs: byContigs = {}
+        await async_exec('cat', [
+            inputSam, 
+            '|', 
+                'awk', '\'$0~/^@/{next}{lxa=split($0,xa,"XA:Z:"); print $1"\t"$3"\t"$4; if(lxa>1){split(xa[2],xap,","); print $1"\t"xap[1]"\t"substr(xap[2],2)"\tXA"}}\'', //skip header
+            '|', 
+                'sort',
+            '|', 
+                'uniq' 
+            ], (m: string) => {
+                let tmpSeq: string[] = (lineAcc + m).split(/\n/)
+                lineAcc = tmpSeq.pop() ! // 'uck typescript
+                tmpSeq.map(e => {
+                    let tmpName: string = ''
+                    let tmpPos: position = {rname: '', position: 0}
+                    e.split(/\t/).map((el, i) => {
+                        switch (i) {
+                            case 0:
+                                tmpPos['rname'] = el
+                                break;
+                            case 1:
+                                tmpName = el
+                                break;
+                            case 2:
+                                tmpPos['position'] = Number(el)
+                                break;
+                            default:
+                                break;
+                        }
+                    })
+                    if (Array.isArray(byContigs[tmpName])) {
+                        byContigs[tmpName].push(tmpPos)
+                    } else {
+                        byContigs[tmpName] = [tmpPos]
+                    }
+                })
+            }, console.log)
+
+            interface byReads {
+                [key: string]: string[]
+            }
+            let byReads: byReads = {}
+
+            Object
+                .keys(byContigs)
+                .map(name => {
+                    let cluster = 0
+                    byContigs[name]
+                    .sort((a, b) => a.position - b.position)
+                    .map((e, i, a) => {
+                        cluster = Math.abs(e.position - a[i-1]?.position) > threshold ? cluster + 1 : cluster
+                        const clutserName = cluster + '@' + name
+                        byReads[e.rname] = Array.isArray(byReads[e.rname]) ? [... new Set([...byReads[e.rname], clutserName])] : [clutserName]
+                    })
+                })
+
+            interface byClusters {
+                [key: string]: string[]
+            }
+            let byClusters: byClusters = {}
+            Object.keys(byReads).map(rname => {
+                const tmpClusterName = byReads[rname].sort().join('+++')
+                byClusters[tmpClusterName] = Array.isArray(byClusters[tmpClusterName]) ? [... new Set([...byClusters[tmpClusterName], rname])] : [rname]
+            })
+
+            Object.keys(byClusters).map(e => byClusters[e].length < minReads ? delete byClusters[e] : null);
+            resolve((Object.keys(byClusters).map(clusterName => ({clusterName, rname: byClusters[clusterName]})).sort((a:any,b:any) => b.rname.length - a.rname.length)) )
+    })
+}
+/*
+(async () => {
+    console.log(await clusterSam('/home/thomas/Documents/Programmes/ttest/bwa_mem_splitters_on_HG38_Viral.sam', 333, 55));
+})()
+*/

+ 51 - 0
package-lock.json

@@ -0,0 +1,51 @@
+{
+  "name": "clustersam",
+  "version": "1.0.0",
+  "lockfileVersion": 2,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "clustersam",
+      "version": "1.0.0",
+      "license": "ISC",
+      "devDependencies": {
+        "@types/node": "^17.0.17",
+        "typescript": "^4.5.5"
+      }
+    },
+    "node_modules/@types/node": {
+      "version": "17.0.17",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.17.tgz",
+      "integrity": "sha512-e8PUNQy1HgJGV3iU/Bp2+D/DXh3PYeyli8LgIwsQcs1Ar1LoaWHSIT6Rw+H2rNJmiq6SNWiDytfx8+gYj7wDHw==",
+      "dev": true
+    },
+    "node_modules/typescript": {
+      "version": "4.5.5",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.5.5.tgz",
+      "integrity": "sha512-TCTIul70LyWe6IJWT8QSYeA54WQe8EjQFU4wY52Fasj5UKx88LNYKCgBEHcOMOrFF1rKGbD8v/xcNWVUq9SymA==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "bin": {
+        "tsc": "bin/tsc",
+        "tsserver": "bin/tsserver"
+      },
+      "engines": {
+        "node": ">=4.2.0"
+      }
+    }
+  },
+  "dependencies": {
+    "@types/node": {
+      "version": "17.0.17",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.17.tgz",
+      "integrity": "sha512-e8PUNQy1HgJGV3iU/Bp2+D/DXh3PYeyli8LgIwsQcs1Ar1LoaWHSIT6Rw+H2rNJmiq6SNWiDytfx8+gYj7wDHw==",
+      "dev": true
+    },
+    "typescript": {
+      "version": "4.5.5",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.5.5.tgz",
+      "integrity": "sha512-TCTIul70LyWe6IJWT8QSYeA54WQe8EjQFU4wY52Fasj5UKx88LNYKCgBEHcOMOrFF1rKGbD8v/xcNWVUq9SymA==",
+      "dev": true
+    }
+  }
+}

+ 18 - 0
package.json

@@ -0,0 +1,18 @@
+{
+  "name": "clustersam",
+  "version": "1.0.0",
+  "description": "",
+  "main": "index.js",
+  "scripts": {
+    "prepublish": "npm run build",
+    "build": "tsc",
+    "test": "echo \"Error: no test specified\" && exit 1"
+  },
+  "keywords": [],
+  "author": "",
+  "license": "ISC",
+  "devDependencies": {
+    "@types/node": "^17.0.17",
+    "typescript": "^4.5.5"
+  }
+}

+ 101 - 0
tsconfig.json

@@ -0,0 +1,101 @@
+{
+  "compilerOptions": {
+    /* Visit https://aka.ms/tsconfig.json to read more about this file */
+
+    /* Projects */
+    // "incremental": true,                              /* Enable incremental compilation */
+    // "composite": true,                                /* Enable constraints that allow a TypeScript project to be used with project references. */
+    // "tsBuildInfoFile": "./",                          /* Specify the folder for .tsbuildinfo incremental compilation files. */
+    // "disableSourceOfProjectReferenceRedirect": true,  /* Disable preferring source files instead of declaration files when referencing composite projects */
+    // "disableSolutionSearching": true,                 /* Opt a project out of multi-project reference checking when editing. */
+    // "disableReferencedProjectLoad": true,             /* Reduce the number of projects loaded automatically by TypeScript. */
+
+    /* Language and Environment */
+    "target": "es2016",                                  /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
+    // "lib": [],                                        /* Specify a set of bundled library declaration files that describe the target runtime environment. */
+    // "jsx": "preserve",                                /* Specify what JSX code is generated. */
+    // "experimentalDecorators": true,                   /* Enable experimental support for TC39 stage 2 draft decorators. */
+    // "emitDecoratorMetadata": true,                    /* Emit design-type metadata for decorated declarations in source files. */
+    // "jsxFactory": "",                                 /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h' */
+    // "jsxFragmentFactory": "",                         /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
+    // "jsxImportSource": "",                            /* Specify module specifier used to import the JSX factory functions when using `jsx: react-jsx*`.` */
+    // "reactNamespace": "",                             /* Specify the object invoked for `createElement`. This only applies when targeting `react` JSX emit. */
+    // "noLib": true,                                    /* Disable including any library files, including the default lib.d.ts. */
+    // "useDefineForClassFields": true,                  /* Emit ECMAScript-standard-compliant class fields. */
+
+    /* Modules */
+    "module": "commonjs",                                /* Specify what module code is generated. */
+    // "rootDir": "./",                                  /* Specify the root folder within your source files. */
+    // "moduleResolution": "node",                       /* Specify how TypeScript looks up a file from a given module specifier. */
+    // "baseUrl": "./",                                  /* Specify the base directory to resolve non-relative module names. */
+    // "paths": {},                                      /* Specify a set of entries that re-map imports to additional lookup locations. */
+    // "rootDirs": [],                                   /* Allow multiple folders to be treated as one when resolving modules. */
+    // "typeRoots": [],                                  /* Specify multiple folders that act like `./node_modules/@types`. */
+    // "types": [],                                      /* Specify type package names to be included without being referenced in a source file. */
+    // "allowUmdGlobalAccess": true,                     /* Allow accessing UMD globals from modules. */
+    // "resolveJsonModule": true,                        /* Enable importing .json files */
+    // "noResolve": true,                                /* Disallow `import`s, `require`s or `<reference>`s from expanding the number of files TypeScript should add to a project. */
+
+    /* JavaScript Support */
+    // "allowJs": true,                                  /* Allow JavaScript files to be a part of your program. Use the `checkJS` option to get errors from these files. */
+    // "checkJs": true,                                  /* Enable error reporting in type-checked JavaScript files. */
+    // "maxNodeModuleJsDepth": 1,                        /* Specify the maximum folder depth used for checking JavaScript files from `node_modules`. Only applicable with `allowJs`. */
+
+    /* Emit */
+    // "declaration": true,                              /* Generate .d.ts files from TypeScript and JavaScript files in your project. */
+    // "declarationMap": true,                           /* Create sourcemaps for d.ts files. */
+    // "emitDeclarationOnly": true,                      /* Only output d.ts files and not JavaScript files. */
+    // "sourceMap": true,                                /* Create source map files for emitted JavaScript files. */
+    // "outFile": "./",                                  /* Specify a file that bundles all outputs into one JavaScript file. If `declaration` is true, also designates a file that bundles all .d.ts output. */
+    // "outDir": "./",                                   /* Specify an output folder for all emitted files. */
+    // "removeComments": true,                           /* Disable emitting comments. */
+    // "noEmit": true,                                   /* Disable emitting files from a compilation. */
+    // "importHelpers": true,                            /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
+    // "importsNotUsedAsValues": "remove",               /* Specify emit/checking behavior for imports that are only used for types */
+    // "downlevelIteration": true,                       /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
+    // "sourceRoot": "",                                 /* Specify the root path for debuggers to find the reference source code. */
+    // "mapRoot": "",                                    /* Specify the location where debugger should locate map files instead of generated locations. */
+    // "inlineSourceMap": true,                          /* Include sourcemap files inside the emitted JavaScript. */
+    // "inlineSources": true,                            /* Include source code in the sourcemaps inside the emitted JavaScript. */
+    // "emitBOM": true,                                  /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
+    // "newLine": "crlf",                                /* Set the newline character for emitting files. */
+    // "stripInternal": true,                            /* Disable emitting declarations that have `@internal` in their JSDoc comments. */
+    // "noEmitHelpers": true,                            /* Disable generating custom helper functions like `__extends` in compiled output. */
+    // "noEmitOnError": true,                            /* Disable emitting files if any type checking errors are reported. */
+    // "preserveConstEnums": true,                       /* Disable erasing `const enum` declarations in generated code. */
+    // "declarationDir": "./",                           /* Specify the output directory for generated declaration files. */
+    // "preserveValueImports": true,                     /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */
+
+    /* Interop Constraints */
+    // "isolatedModules": true,                          /* Ensure that each file can be safely transpiled without relying on other imports. */
+    // "allowSyntheticDefaultImports": true,             /* Allow 'import x from y' when a module doesn't have a default export. */
+    "esModuleInterop": true,                             /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables `allowSyntheticDefaultImports` for type compatibility. */
+    // "preserveSymlinks": true,                         /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
+    "forceConsistentCasingInFileNames": true,            /* Ensure that casing is correct in imports. */
+
+    /* Type Checking */
+    "strict": true,                                      /* Enable all strict type-checking options. */
+    // "noImplicitAny": true,                            /* Enable error reporting for expressions and declarations with an implied `any` type.. */
+    // "strictNullChecks": true,                         /* When type checking, take into account `null` and `undefined`. */
+    // "strictFunctionTypes": true,                      /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
+    // "strictBindCallApply": true,                      /* Check that the arguments for `bind`, `call`, and `apply` methods match the original function. */
+    // "strictPropertyInitialization": true,             /* Check for class properties that are declared but not set in the constructor. */
+    // "noImplicitThis": true,                           /* Enable error reporting when `this` is given the type `any`. */
+    // "useUnknownInCatchVariables": true,               /* Type catch clause variables as 'unknown' instead of 'any'. */
+    // "alwaysStrict": true,                             /* Ensure 'use strict' is always emitted. */
+    // "noUnusedLocals": true,                           /* Enable error reporting when a local variables aren't read. */
+    // "noUnusedParameters": true,                       /* Raise an error when a function parameter isn't read */
+    // "exactOptionalPropertyTypes": true,               /* Interpret optional property types as written, rather than adding 'undefined'. */
+    // "noImplicitReturns": true,                        /* Enable error reporting for codepaths that do not explicitly return in a function. */
+    // "noFallthroughCasesInSwitch": true,               /* Enable error reporting for fallthrough cases in switch statements. */
+    // "noUncheckedIndexedAccess": true,                 /* Include 'undefined' in index signature results */
+    // "noImplicitOverride": true,                       /* Ensure overriding members in derived classes are marked with an override modifier. */
+    // "noPropertyAccessFromIndexSignature": true,       /* Enforces using indexed accessors for keys declared using an indexed type */
+    // "allowUnusedLabels": true,                        /* Disable error reporting for unused labels. */
+    // "allowUnreachableCode": true,                     /* Disable error reporting for unreachable code. */
+
+    /* Completeness */
+    // "skipDefaultLibCheck": true,                      /* Skip type checking .d.ts files that are included with TypeScript. */
+    "skipLibCheck": true                                 /* Skip type checking all .d.ts files. */
+  }
+}

+ 13 - 0
yarn.lock

@@ -0,0 +1,13 @@
+# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
+# yarn lockfile v1
+
+
+"@types/node@^17.0.17":
+  "integrity" "sha512-e8PUNQy1HgJGV3iU/Bp2+D/DXh3PYeyli8LgIwsQcs1Ar1LoaWHSIT6Rw+H2rNJmiq6SNWiDytfx8+gYj7wDHw=="
+  "resolved" "https://registry.npmjs.org/@types/node/-/node-17.0.17.tgz"
+  "version" "17.0.17"
+
+"typescript@^4.5.5":
+  "integrity" "sha512-TCTIul70LyWe6IJWT8QSYeA54WQe8EjQFU4wY52Fasj5UKx88LNYKCgBEHcOMOrFF1rKGbD8v/xcNWVUq9SymA=="
+  "resolved" "https://registry.npmjs.org/typescript/-/typescript-4.5.5.tgz"
+  "version" "4.5.5"