From 054fa0dfa398a5760f0b8fb1fa8974850958d2c0 Mon Sep 17 00:00:00 2001
From: Vincent Q Thai <70456187+vthai321@users.noreply.github.com>
Date: Mon, 22 Apr 2024 16:53:57 -0700
Subject: [PATCH 01/44] Grant Dummy Operators Access to Dummy Property Field
(#2572)
This is a minor change that allows dummy operators, from the workflow
parser, the ability to access the dummy properties field. In the future,
the workflow parser will take advantage of the dummy property feature to
describe properties of equivalent operators from other workflows that
aren't currently present in Texera. Below is an image of what this
functionality looks like.
![Capture](https://github.com/Texera/texera/assets/70456187/0c8a0b13-4e2c-48d2-b4d7-f3de52af4f0e)
Co-authored-by: Xiaozhen Liu
- + - - - - + + + +
From 1f519856b4c73c81acf4199f7e08aa2a1e54956a Mon Sep 17 00:00:00 2001 From: Yicong Huang <17627829+Yicong-Huang@users.noreply.github.com> Date: Fri, 26 Apr 2024 12:34:40 -0700 Subject: [PATCH 06/44] Fix macOS CI timeout on frontend unit tests (#1704) This PR intends to fix the macOS CI timeout issue as an urgent fix. It includes a few changes: 1. upgrade karma version to 6.4.3 2. enlarge the timeouts 3. adding waitwebpack wrapper according to https://github.com/karma-runner/karma-chrome-launcher/issues/154#issuecomment-986661937. --- core/gui/angular.json | 6 +- core/gui/karma.conf.js | 25 +++-- core/gui/karma.waitwebpack.js | 27 +++++ core/gui/package.json | 22 ++-- .../component/workspace.component.spec.ts | 1 - core/gui/yarn.lock | 100 ++++++++++++++---- 6 files changed, 137 insertions(+), 44 deletions(-) create mode 100644 core/gui/karma.waitwebpack.js diff --git a/core/gui/angular.json b/core/gui/angular.json index 6a11d768436..a76ee354362 100644 --- a/core/gui/angular.json +++ b/core/gui/angular.json @@ -2,7 +2,7 @@ "$schema": "./node_modules/@angular/cli/lib/config/schema.json", "version": 1, "projects": { - "texera-gui": { + "gui": { "root": "", "sourceRoot": "src", "projectType": "application", @@ -73,12 +73,12 @@ "serve": { "builder": "@angular-builders/custom-webpack:dev-server", "options": { - "browserTarget": "texera-gui:build", + "browserTarget": "gui:build", "proxyConfig": "proxy.config.json" }, "configurations": { "production": { - "browserTarget": "texera-gui:build:production" + "browserTarget": "gui:build:production" } } }, diff --git a/core/gui/karma.conf.js b/core/gui/karma.conf.js index dd82140de31..76d1e89bd83 100644 --- a/core/gui/karma.conf.js +++ b/core/gui/karma.conf.js @@ -4,12 +4,13 @@ module.exports = function(config) { config.set({ basePath: "", - frameworks: ["jasmine", "@angular-devkit/build-angular"], + frameworks: ['waitwebpack', "jasmine", "@angular-devkit/build-angular"], plugins: [ require("karma-jasmine"), require("karma-chrome-launcher"), require("karma-coverage"), require("karma-jasmine-html-reporter"), + require('./karma.waitwebpack'), require("@angular-devkit/build-angular/plugins/karma") ], client: { @@ -18,16 +19,28 @@ module.exports = function(config) { random: false // disable the random running order } }, + customLaunchers: { + ChromeHeadlessCustom: { + base: 'ChromeHeadless', + flags: [ + '--no-sandbox', + '--headless', + '--disable-gpu', + '--disable-translate', + '--disable-extensions' + ], + }, + }, reporters: ["progress", "kjhtml"], port: 9876, colors: true, logLevel: config.LOG_INFO, autoWatch: true, - browsers: ["Chrome"], + browsers: ["ChromeHeadlessCustom"], singleRun: false, - captureTimeout: 60000, - browserDisconnectTimeout: 10000, - browserDisconnectTolerance: 3, - browserNoActivityTimeout: 60000 + captureTimeout: 240000, + browserDisconnectTimeout: 60000, + browserDisconnectTolerance: 1, + browserNoActivityTimeout: 240000 }); }; diff --git a/core/gui/karma.waitwebpack.js b/core/gui/karma.waitwebpack.js new file mode 100644 index 00000000000..9901177f20d --- /dev/null +++ b/core/gui/karma.waitwebpack.js @@ -0,0 +1,27 @@ +function WebpackCompilerEventsPlugin(options) { + this.options = options; +} + +WebpackCompilerEventsPlugin.prototype.apply = function(compiler) { + compiler.hooks.afterDone.tap('webpack-compiler-events-plugin', this.options.afterDone) +}; + +function waitWebpackFactory(config) { + return new Promise(resolve => { + let isFirstBuild = true; + config.buildWebpack.webpackConfig.plugins.push(new WebpackCompilerEventsPlugin({ + afterDone: () => { + if (isFirstBuild) { + console.log('First webpack build done'); + isFirstBuild = false; + resolve(); + } + } + })); + }); +} +waitWebpackFactory.$inject = ['config']; + +module.exports = { + 'framework:waitwebpack': ['factory', waitWebpackFactory] +}; diff --git a/core/gui/package.json b/core/gui/package.json index 388c3d1864f..9f1a59b1fb2 100644 --- a/core/gui/package.json +++ b/core/gui/package.json @@ -11,7 +11,7 @@ "build:ci": "nx build --configuration=production --progress=false --source-map=false", "analyze": "ng build --configuration=production --stats-json && webpack-bundle-analyzer dist/stats.json", "test": "ng test", - "test:ci": "node --max_old_space_size=4096 ./node_modules/nx/bin/nx test --watch=false --progress=false --browsers=ChromeHeadless", + "test:ci": "node --max_old_space_size=6144 ./node_modules/nx/bin/nx test --watch=false --progress=false --browsers=ChromeHeadless", "prettier:fix": "prettier --write ./src", "lint": "ng lint", "eslint:fix": "yarn eslint --fix ./src", @@ -21,6 +21,7 @@ }, "private": true, "dependencies": { + "@ali-hm/angular-tree-component": "12.0.5", "@angular/animations": "16.2.12", "@angular/cdk": "16.2.12", "@angular/common": "16.2.12", @@ -40,7 +41,6 @@ "@stoplight/json-ref-resolver": "3.1.5", "@types/lodash-es": "4.17.4", "@types/plotly.js-basic-dist-min": "^2.12.4", - "@ali-hm/angular-tree-component": "12.0.5", "ajv": "8.10.0", "backbone": "1.4.1", "dagre": "0.8.5", @@ -58,22 +58,22 @@ "monaco-editor": "0.36.1", "monaco-languageclient": "5.0.1", "ng-zorro-antd": "16.2.2", + "ng2-pdf-viewer": "9.1.5", "ngx-color-picker": "12.0.1", + "ngx-file-drop": "16.0.0", + "ngx-image-viewer": "1.0.13", "ngx-json-viewer": "3.2.1", "ngx-markdown": "16.0.0", "papaparse": "5.4.1", - "ng2-pdf-viewer": "9.1.5", - "ngx-file-drop": "16.0.0", - "ngx-image-viewer": "1.0.13", - "read-excel-file": "5.7.1", - "sanitize-filename": "1.6.3", "path-browserify": "^1.0.1", "plotly.js-basic-dist-min": "^2.29.0", "popper.js": "1.16.1", "quill": "1.3.7", "quill-cursors": "3.1.2", + "read-excel-file": "5.7.1", "ring-buffer-ts": "1.0.3", "rxjs": "7.5.5", + "sanitize-filename": "1.6.3", "tinyqueue": "2.0.3", "tslib": "2.3.1", "uuid": "8.3.2", @@ -105,9 +105,9 @@ "@types/json-schema": "7.0.9", "@types/lodash": "4.14.179", "@types/node": "~18.15.5", + "@types/papaparse": "5.3.5", "@types/quill": "2.0.9", "@types/uuid": "8.3.4", - "@types/papaparse": "5.3.5", "@typescript-eslint/eslint-plugin": "7.0.2", "@typescript-eslint/parser": "7.0.2", "babel-plugin-dynamic-import-node": "^2.3.3", @@ -123,7 +123,7 @@ "git-describe": "4.1.0", "jasmine-core": "3.8.0", "jasmine-spec-reporter": "5.0.2", - "karma": "6.3.17", + "karma": "6.4.3", "karma-chrome-launcher": "3.1.0", "karma-coverage": "~2.2.1", "karma-jasmine": "4.0.1", @@ -135,10 +135,10 @@ "prettier-eslint-cli": "8.0.1", "rxjs-marbles": "7.0.1", "sass": "1.71.1", + "style-loader": "3.3.4", "ts-node": "4.1.0", "typescript": "5.1.6", - "webpack-bundle-analyzer": "4.5.0", - "style-loader": "~3.3.3" + "webpack-bundle-analyzer": "4.5.0" }, "browserslist": [ "defaults", diff --git a/core/gui/src/app/workspace/component/workspace.component.spec.ts b/core/gui/src/app/workspace/component/workspace.component.spec.ts index ab3806ec656..e69de29bb2d 100644 --- a/core/gui/src/app/workspace/component/workspace.component.spec.ts +++ b/core/gui/src/app/workspace/component/workspace.component.spec.ts @@ -1 +0,0 @@ -describe("WorkspaceComponent", () => {}); diff --git a/core/gui/yarn.lock b/core/gui/yarn.lock index bf67ffa68d6..2b9e4fc2aa5 100644 --- a/core/gui/yarn.lock +++ b/core/gui/yarn.lock @@ -457,11 +457,16 @@ "@babel/highlight" "^7.24.2" picocolors "^1.0.0" -"@babel/compat-data@^7.22.6", "@babel/compat-data@^7.22.9", "@babel/compat-data@^7.23.5", "@babel/compat-data@^7.24.1": +"@babel/compat-data@^7.22.6", "@babel/compat-data@^7.22.9", "@babel/compat-data@^7.24.1": version "7.24.1" resolved "https://registry.yarnpkg.com/@babel/compat-data/-/compat-data-7.24.1.tgz#31c1f66435f2a9c329bb5716a6d6186c516c3742" integrity sha512-Pc65opHDliVpRHuKfzI+gSA4zcgr65O4cl64fFJIWEEh8JoHIHh0Oez1Eo8Arz8zq/JhgKodQaxEwUPRtZylVA== +"@babel/compat-data@^7.23.5": + version "7.24.4" + resolved "https://registry.yarnpkg.com/@babel/compat-data/-/compat-data-7.24.4.tgz#6f102372e9094f25d908ca0d34fc74c74606059a" + integrity sha512-vg8Gih2MLK+kOkHJp4gBEIkyaIi00jgWot2D9QOmmfLC8jINSOzmCLta6Bvz/JSBCqnegV0L80jhxkol5GWNfQ== + "@babel/core@7.22.9": version "7.22.9" resolved "https://registry.yarnpkg.com/@babel/core/-/core-7.22.9.tgz#bd96492c68822198f33e8a256061da3cf391f58f" @@ -504,7 +509,28 @@ json5 "^2.2.3" semver "^6.3.1" -"@babel/core@^7.12.3", "@babel/core@^7.23.2": +"@babel/core@^7.12.3": + version "7.24.4" + resolved "https://registry.yarnpkg.com/@babel/core/-/core-7.24.4.tgz#1f758428e88e0d8c563874741bc4ffc4f71a4717" + integrity sha512-MBVlMXP+kkl5394RBLSxxk/iLTeVGuXTV3cIDXavPpMMqnSnt6apKgan/U8O3USWZCWZT/TbgfEpKa4uMgN4Dg== + dependencies: + "@ampproject/remapping" "^2.2.0" + "@babel/code-frame" "^7.24.2" + "@babel/generator" "^7.24.4" + "@babel/helper-compilation-targets" "^7.23.6" + "@babel/helper-module-transforms" "^7.23.3" + "@babel/helpers" "^7.24.4" + "@babel/parser" "^7.24.4" + "@babel/template" "^7.24.0" + "@babel/traverse" "^7.24.1" + "@babel/types" "^7.24.0" + convert-source-map "^2.0.0" + debug "^4.1.0" + gensync "^1.0.0-beta.2" + json5 "^2.2.3" + semver "^6.3.1" + +"@babel/core@^7.23.2": version "7.24.3" resolved "https://registry.yarnpkg.com/@babel/core/-/core-7.24.3.tgz#568864247ea10fbd4eff04dda1e05f9e2ea985c3" integrity sha512-5FcvN1JHw2sHJChotgx8Ek0lyuh4kCKelgMTTqhYJJtloNvUfpAFMeNQUtdlIaktwrSV9LtCdqwk48wL2wBacQ== @@ -535,7 +561,7 @@ "@jridgewell/trace-mapping" "^0.3.17" jsesc "^2.5.1" -"@babel/generator@^7.22.9", "@babel/generator@^7.23.0", "@babel/generator@^7.24.1": +"@babel/generator@^7.22.9", "@babel/generator@^7.23.0": version "7.24.1" resolved "https://registry.yarnpkg.com/@babel/generator/-/generator-7.24.1.tgz#e67e06f68568a4ebf194d1c6014235344f0476d0" integrity sha512-DfCRfZsBcrPEHUfuBMgbJ1Ut01Y/itOs+hY2nFLgqsqXd52/iSiVq5TITtUasIUgm+IIKdY2/1I7auiQOEeC9A== @@ -545,6 +571,16 @@ "@jridgewell/trace-mapping" "^0.3.25" jsesc "^2.5.1" +"@babel/generator@^7.24.1", "@babel/generator@^7.24.4": + version "7.24.4" + resolved "https://registry.yarnpkg.com/@babel/generator/-/generator-7.24.4.tgz#1fc55532b88adf952025d5d2d1e71f946cb1c498" + integrity sha512-Xd6+v6SnjWVx/nus+y0l1sxMOTOMBkyL4+BIdbALyatQnAe/SRVjANeDPSCYaX+i1iJmuGSKf3Z+E+V/va1Hvw== + dependencies: + "@babel/types" "^7.24.0" + "@jridgewell/gen-mapping" "^0.3.5" + "@jridgewell/trace-mapping" "^0.3.25" + jsesc "^2.5.1" + "@babel/helper-annotate-as-pure@7.22.5", "@babel/helper-annotate-as-pure@^7.22.5": version "7.22.5" resolved "https://registry.yarnpkg.com/@babel/helper-annotate-as-pure/-/helper-annotate-as-pure-7.22.5.tgz#e7f06737b197d580a01edf75d97e2c8be99d3882" @@ -747,7 +783,7 @@ "@babel/template" "^7.22.15" "@babel/types" "^7.22.19" -"@babel/helpers@^7.22.6", "@babel/helpers@^7.23.2", "@babel/helpers@^7.24.1": +"@babel/helpers@^7.22.6", "@babel/helpers@^7.23.2": version "7.24.1" resolved "https://registry.yarnpkg.com/@babel/helpers/-/helpers-7.24.1.tgz#183e44714b9eba36c3038e442516587b1e0a1a94" integrity sha512-BpU09QqEe6ZCHuIHFphEFgvNSrubve1FtyMton26ekZ85gRGi6LrTF7zArARp2YvyFxloeiRmtSCq5sjh1WqIg== @@ -756,6 +792,15 @@ "@babel/traverse" "^7.24.1" "@babel/types" "^7.24.0" +"@babel/helpers@^7.24.1", "@babel/helpers@^7.24.4": + version "7.24.4" + resolved "https://registry.yarnpkg.com/@babel/helpers/-/helpers-7.24.4.tgz#dc00907fd0d95da74563c142ef4cd21f2cb856b6" + integrity sha512-FewdlZbSiwaVGlgT1DPANDuCHaDMiOo+D/IDYRFYjHOuv66xMSJ7fQwwODwRNAPkADIO/z1EoF/l2BCWlWABDw== + dependencies: + "@babel/template" "^7.24.0" + "@babel/traverse" "^7.24.1" + "@babel/types" "^7.24.0" + "@babel/highlight@^7.24.2": version "7.24.2" resolved "https://registry.yarnpkg.com/@babel/highlight/-/highlight-7.24.2.tgz#3f539503efc83d3c59080a10e6634306e0370d26" @@ -766,11 +811,16 @@ js-tokens "^4.0.0" picocolors "^1.0.0" -"@babel/parser@^7.10.3", "@babel/parser@^7.14.7", "@babel/parser@^7.22.5", "@babel/parser@^7.22.7", "@babel/parser@^7.23.0", "@babel/parser@^7.24.0", "@babel/parser@^7.24.1": +"@babel/parser@^7.10.3", "@babel/parser@^7.22.5", "@babel/parser@^7.22.7", "@babel/parser@^7.23.0": version "7.24.1" resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.24.1.tgz#1e416d3627393fab1cb5b0f2f1796a100ae9133a" integrity sha512-Zo9c7N3xdOIQrNip7Lc9wvRPzlRtovHVE4lkz8WEDr7uYh/GMQhSiIgFxGIArRHYdJE5kxtZjAf8rT0xhdLCzg== +"@babel/parser@^7.14.7", "@babel/parser@^7.24.0", "@babel/parser@^7.24.1", "@babel/parser@^7.24.4": + version "7.24.4" + resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.24.4.tgz#234487a110d89ad5a3ed4a8a566c36b9453e8c88" + integrity sha512-zTvEBcghmeBma9QIGunWevvBAp4/Qu9Bdq+2k0Ot4fVMD6v3dsC9WOcRSKk7tRRyBM/53yKMJko9xOatGQAwSg== + "@babel/plugin-bugfix-safari-id-destructuring-collision-in-function-expression@^7.22.5", "@babel/plugin-bugfix-safari-id-destructuring-collision-in-function-expression@^7.24.1": version "7.24.1" resolved "https://registry.yarnpkg.com/@babel/plugin-bugfix-safari-id-destructuring-collision-in-function-expression/-/plugin-bugfix-safari-id-destructuring-collision-in-function-expression-7.24.1.tgz#b645d9ba8c2bc5b7af50f0fe949f9edbeb07c8cf" @@ -2784,9 +2834,9 @@ integrity sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA== "@socket.io/component-emitter@~3.1.0": - version "3.1.0" - resolved "https://registry.yarnpkg.com/@socket.io/component-emitter/-/component-emitter-3.1.0.tgz#96116f2a912e0c02817345b3c10751069920d553" - integrity sha512-+9jVqKhRSpsc591z5vX+X5Yyw+he/HCB4iQ/RYxw35CEPaY1gnsNE43nf9n9AaYjAQrTiI/mOwKUKdUs9vf7Xg== + version "3.1.2" + resolved "https://registry.yarnpkg.com/@socket.io/component-emitter/-/component-emitter-3.1.2.tgz#821f8442f4175d8f0467b9daf26e3a18e2d02af2" + integrity sha512-9BCxFwvbGg/RsZK9tjXd8s4UcwR0MWeFQ1XEKIQVVvAGJyINdrqKMcTRyLoK8Rse1GjzLV9cwjWV1olXRWEXVA== "@stoplight/json-ref-resolver@3.1.5": version "3.1.5" @@ -3081,9 +3131,9 @@ "@types/node" "*" "@types/node@*", "@types/node@>=10.0.0": - version "20.11.30" - resolved "https://registry.yarnpkg.com/@types/node/-/node-20.11.30.tgz#9c33467fc23167a347e73834f788f4b9f399d66f" - integrity sha512-dHM6ZxwlmuZaRmUPfv1p+KrdD1Dci04FbdEm/9wEMouFqxYoFl5aMkt0VMAUtYRQDyYvD41WJLukhq/ha3YuTw== + version "20.12.7" + resolved "https://registry.yarnpkg.com/@types/node/-/node-20.12.7.tgz#04080362fa3dd6c5822061aa3124f5c152cff384" + integrity sha512-wq0cICSkRLVaf3UGLMGItu/PtdY7oaXaI/RVU+xliKVOtRna3PRY57ZDfztpDL0n11vfymMUnXv8QwYCO7L1wg== dependencies: undici-types "~5.26.4" @@ -4453,11 +4503,16 @@ caniuse-api@^3.0.0: lodash.memoize "^4.1.2" lodash.uniq "^4.5.0" -caniuse-lite@^1.0.0, caniuse-lite@^1.0.30001464, caniuse-lite@^1.0.30001587, caniuse-lite@^1.0.30001599: +caniuse-lite@^1.0.0, caniuse-lite@^1.0.30001464, caniuse-lite@^1.0.30001599: version "1.0.30001600" resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001600.tgz#93a3ee17a35aa6a9f0c6ef1b2ab49507d1ab9079" integrity sha512-+2S9/2JFhYmYaDpZvo0lKkfvuKIglrx68MwOBqMGHhQsNkLjB5xtc/TGoEPs+MxjSyN/72qer2g97nzR641mOQ== +caniuse-lite@^1.0.30001587: + version "1.0.30001612" + resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001612.tgz#d34248b4ec1f117b70b24ad9ee04c90e0b8a14ae" + integrity sha512-lFgnZ07UhaCcsSZgWW0K5j4e69dK1u/ltrL9lTUiFOwNHs12S3UMIEYgBV0Z6C6hRDev7iRnMzzYmKabYdXF9g== + caseless@~0.12.0: version "0.12.0" resolved "https://registry.yarnpkg.com/caseless/-/caseless-0.12.0.tgz#1b681c21ff84033c826543090689420d187151dc" @@ -5782,9 +5837,9 @@ ejs@^3.1.7: jake "^10.8.5" electron-to-chromium@^1.4.668: - version "1.4.715" - resolved "https://registry.yarnpkg.com/electron-to-chromium/-/electron-to-chromium-1.4.715.tgz#bb16bcf2a3537962fccfa746b5c98c5f7404ff46" - integrity sha512-XzWNH4ZSa9BwVUQSDorPWAUQ5WGuYz7zJUNpNif40zFCiCl20t8zgylmreNmn26h5kiyw2lg7RfTmeMBsDklqg== + version "1.4.749" + resolved "https://registry.yarnpkg.com/electron-to-chromium/-/electron-to-chromium-1.4.749.tgz#9869e2e258141da26a2272b58264584c3461279d" + integrity sha512-LRMMrM9ITOvue0PoBrvNIraVmuDbJV5QC9ierz/z5VilMdPOVMjOtpICNld3PuXuTZ3CHH/UPxX9gHhAPwi+0Q== elkjs@^0.8.2: version "0.8.2" @@ -8126,10 +8181,10 @@ karma-source-map-support@1.4.0: dependencies: source-map-support "^0.5.5" -karma@6.3.17: - version "6.3.17" - resolved "https://registry.yarnpkg.com/karma/-/karma-6.3.17.tgz#5d963fb52463b73e1b5892ecb54c8f21bb04ba1d" - integrity sha512-2TfjHwrRExC8yHoWlPBULyaLwAFmXmxQrcuFImt/JsAsSZu1uOWTZ1ZsWjqQtWpHLiatJOHL5jFjXSJIgCd01g== +karma@6.4.3: + version "6.4.3" + resolved "https://registry.yarnpkg.com/karma/-/karma-6.4.3.tgz#763e500f99597218bbb536de1a14acc4ceea7ce8" + integrity sha512-LuucC/RE92tJ8mlCwqEoRWXP38UMAqpnq98vktmS9SznSoUPPUJQbc91dHcxcunROvfQjdORVA/YFviH+Xci9Q== dependencies: "@colors/colors" "1.5.0" body-parser "^1.19.0" @@ -8150,7 +8205,7 @@ karma@6.3.17: qjobs "^1.2.0" range-parser "^1.2.1" rimraf "^3.0.2" - socket.io "^4.2.0" + socket.io "^4.7.2" source-map "^0.6.1" tmp "^0.2.1" ua-parser-js "^0.7.30" @@ -8953,7 +9008,6 @@ ng-zorro-antd@16.2.2: date-fns "^2.16.1" tslib "^2.3.0" - ng2-pdf-viewer@9.1.5: version "9.1.5" resolved "https://registry.yarnpkg.com/ng2-pdf-viewer/-/ng2-pdf-viewer-9.1.5.tgz#bcc9223eea96d2974f8c8377a333d45ff64a3caf" @@ -10968,7 +11022,7 @@ socket.io-parser@~4.2.4: "@socket.io/component-emitter" "~3.1.0" debug "~4.3.1" -socket.io@^4.2.0: +socket.io@^4.7.2: version "4.7.5" resolved "https://registry.yarnpkg.com/socket.io/-/socket.io-4.7.5.tgz#56eb2d976aef9d1445f373a62d781a41c7add8f8" integrity sha512-DmeAkF6cwM9jSfmp6Dr/5/mfMwb5Z5qRrSXLpo3Fq5SqyU8CMF15jIN4ZhfSwu35ksM1qmHZDQ/DK5XTccSTvA== @@ -11271,7 +11325,7 @@ strong-log-transformer@^2.1.0: minimist "^1.2.0" through "^2.3.4" -style-loader@^3.3.0, style-loader@~3.3.3: +style-loader@3.3.4, style-loader@^3.3.0: version "3.3.4" resolved "https://registry.yarnpkg.com/style-loader/-/style-loader-3.3.4.tgz#f30f786c36db03a45cbd55b6a70d930c479090e7" integrity sha512-0WqXzrsMTyb8yjZJHDqwmnwRJvhALK9LfRtRc6B4UTWe8AijYLZYZ9thuJTZc2VfQWINADW/j+LiJnfy2RoC1w== From 31676bdee7d28ae11f4ab1e614144b48f2f7fbfb Mon Sep 17 00:00:00 2001 From: yunyad <114192306+yunyad@users.noreply.github.com> Date: Fri, 26 Apr 2024 13:25:21 -0700 Subject: [PATCH 07/44] Hugging face Sentiment Analysis (#2600) Issue #2587 This PR utilized a model from HuggingFace: cardiffnlp/twitter-roberta-base-sentiment-latest and introduced a new machine learning operator called "Hugging Face Sentiment Analysis". The inputs of this operator will be a selected columns of text. User needs to select the column for sentiment analysis. The outputs of this operator will add three columns: "huggingface_sentiment_positive", huggingface_sentiment_neutral" and huggingface_sentiment_negative" as default. They are in 'DOUBLE' format, rounded to four decimal places. Users are able to change the name based on their needs. This operator doesn't handle any text preprocessing. During testing, one HuggingFace operator utilized ~800MB RAM. Here is a demo and output schemas: ![Screen Recording 2024-04-21 at 9 50 57 PM](https://github.com/Texera/texera/assets/114192306/765c8f09-d016-4571-82f7-0217ebbee4cb) --- core/amber/requirements.txt | 3 +- .../workflow/common/operators/LogicalOp.scala | 7 +- .../HuggingFaceSentimentAnalysisOpDesc.scala | 96 ++++++++++++++++++ .../HuggingFaceSentimentAnalysis.png | Bin 0 -> 13831 bytes 4 files changed, 104 insertions(+), 2 deletions(-) create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/huggingFace/HuggingFaceSentimentAnalysisOpDesc.scala create mode 100644 core/gui/src/assets/operator_images/HuggingFaceSentimentAnalysis.png diff --git a/core/amber/requirements.txt b/core/amber/requirements.txt index a3b201b6794..05094ee5be2 100644 --- a/core/amber/requirements.txt +++ b/core/amber/requirements.txt @@ -24,4 +24,5 @@ python-lsp-server[all]==1.5.0 python-lsp-server[websockets] bidict==0.22.0 cached_property -psutil \ No newline at end of file +psutil +transformers \ No newline at end of file diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala index 016cb9a23f6..15be56f76db 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala @@ -34,6 +34,7 @@ import edu.uci.ics.texera.workflow.operators.intersect.IntersectOpDesc import edu.uci.ics.texera.workflow.operators.intervalJoin.IntervalJoinOpDesc import edu.uci.ics.texera.workflow.operators.keywordSearch.KeywordSearchOpDesc import edu.uci.ics.texera.workflow.operators.limit.LimitOpDesc +import edu.uci.ics.texera.workflow.operators.huggingFace.HuggingFaceSentimentAnalysisOpDesc import edu.uci.ics.texera.workflow.operators.projection.ProjectionOpDesc import edu.uci.ics.texera.workflow.operators.randomksampling.RandomKSamplingOpDesc import edu.uci.ics.texera.workflow.operators.regex.RegexOpDesc @@ -180,7 +181,11 @@ trait StateTransferFunc new Type(value = classOf[FunnelPlotOpDesc], name = "FunnelPlot"), new Type(value = classOf[TablesPlotOpDesc], name = "TablesPlot"), new Type(value = classOf[JavaUDFOpDesc], name = "JavaUDF"), - new Type(value = classOf[SortOpDesc], name = "Sort") + new Type(value = classOf[SortOpDesc], name = "Sort"), + new Type( + value = classOf[HuggingFaceSentimentAnalysisOpDesc], + name = "HuggingFaceSentimentAnalysis" + ) ) ) abstract class LogicalOp extends PortDescriptor with Serializable { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/huggingFace/HuggingFaceSentimentAnalysisOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/huggingFace/HuggingFaceSentimentAnalysisOpDesc.scala new file mode 100644 index 00000000000..ac9a109df9d --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/huggingFace/HuggingFaceSentimentAnalysisOpDesc.scala @@ -0,0 +1,96 @@ +package edu.uci.ics.texera.workflow.operators.huggingFace + +import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription} +import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort} +import edu.uci.ics.texera.workflow.common.metadata.annotations.AutofillAttributeName +import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} +import edu.uci.ics.texera.workflow.common.operators.PythonOperatorDescriptor +import edu.uci.ics.texera.workflow.common.tuple.schema.{AttributeType, Schema} + +class HuggingFaceSentimentAnalysisOpDesc extends PythonOperatorDescriptor { + @JsonProperty(value = "attribute", required = true) + @JsonPropertyDescription("column to perform sentiment analysis on") + @AutofillAttributeName + var attribute: String = _ + + @JsonProperty( + value = "Positive result attribute", + required = true, + defaultValue = "huggingface_sentiment_positive" + ) + @JsonPropertyDescription("column name of the sentiment analysis result (positive)") + var resultAttributePositive: String = _ + + @JsonProperty( + value = "Neutral result attribute", + required = true, + defaultValue = "huggingface_sentiment_neutral" + ) + @JsonPropertyDescription("column name of the sentiment analysis result (neutral)") + var resultAttributeNeutral: String = _ + + @JsonProperty( + value = "Negative result attribute", + required = true, + defaultValue = "huggingface_sentiment_negative" + ) + @JsonPropertyDescription("column name of the sentiment analysis result (negative)") + var resultAttributeNegative: String = _ + + override def generatePythonCode(): String = { + s"""from pytexera import * + |from transformers import pipeline + |from transformers import AutoModelForSequenceClassification + |from transformers import TFAutoModelForSequenceClassification + |from transformers import AutoTokenizer, AutoConfig + |import numpy as np + |from scipy.special import softmax + | + |class ProcessTupleOperator(UDFOperatorV2): + | + | def open(self): + | model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest" + | self.tokenizer = AutoTokenizer.from_pretrained(model_name) + | self.config = AutoConfig.from_pretrained(model_name) + | self.model = AutoModelForSequenceClassification.from_pretrained(model_name) + | + | @overrides + | def process_tuple(self, tuple_: Tuple, port: int) -> Iterator[Optional[TupleLike]]: + | encoded_input = self.tokenizer(tuple_["$attribute"], return_tensors='pt') + | output = self.model(**encoded_input) + | scores = softmax(output[0][0].detach().numpy()) + | ranking = np.argsort(scores)[::-1] + | labels = {"positive": "$resultAttributePositive", "neutral": "$resultAttributeNeutral", "negative": "$resultAttributeNegative"} + | for i in range(scores.shape[0]): + | label = labels[self.config.id2label[ranking[i]]] + | score = scores[ranking[i]] + | tuple_[label] = np.round(float(score), 4) + | yield tuple_""".stripMargin + } + + override def operatorInfo: OperatorInfo = + OperatorInfo( + "Hugging Face Sentiment Analysis", + "Analyzing Sentiments with a Twitter-Based Model from Hugging Face", + OperatorGroupConstants.MACHINE_LEARNING_GROUP, + inputPorts = List(InputPort()), + outputPorts = List(OutputPort()), + supportReconfiguration = true + ) + + override def getOutputSchema(schemas: Array[Schema]): Schema = { + if ( + resultAttributePositive == null || resultAttributePositive.trim.isEmpty || + resultAttributeNeutral == null || resultAttributeNeutral.trim.isEmpty || + resultAttributeNegative == null || resultAttributeNegative.trim.isEmpty + ) + return null + Schema + .builder() + .add(schemas(0)) + .add(resultAttributePositive, AttributeType.DOUBLE) + .add(resultAttributeNeutral, AttributeType.DOUBLE) + .add(resultAttributeNegative, AttributeType.DOUBLE) + .build() + } +} diff --git a/core/gui/src/assets/operator_images/HuggingFaceSentimentAnalysis.png b/core/gui/src/assets/operator_images/HuggingFaceSentimentAnalysis.png new file mode 100644 index 0000000000000000000000000000000000000000..673b8ea9077dacce2aa4ec76979d849cf7bb6775 GIT binary patch literal 13831 zcmch8Wmg?d)Ahj}g1fr~mxF6?cPD6&5Zs+WaCaxTyL)hVcXxL<9A2*b{SVKY4>fB| z_wKHinyTH^Vake9D2N1z0000*Mp{A@0D$;>g#f_AehPiZ5{pj(Ya%Bl0r>du$?Yso z006i;WF$n@-B&Ms5ZpBu8ii%Z0z~{-L*&E5DN!Zy2ds(9TuRg`cekt`N~eS>U9Yzu zUtX>C&gLIi7AL#XV@1f2$;ZXT9RKWFkX$OHCWye3L$Hg%vR^eXjz~fQkIsE2TY21u zUwyV-iO6`K&g&Ofy{dPb2P4V9z(Yd@h>-nX2|RP-NXbAZC=Zt$-GKO!cG&J|+gYUhfrM&7TVS~t(l2UntJh`StHjYy zp*Y&{1R>pLeNiNEwhxQ-i&3g+&p-`P)W}wqNjg#0u#+ZtG>53Cag@lv{#m{?86QVS zT$~~#(h6uV!OzI8MnZ8upt9Psi%1!f=UAfyx3llB?i;S}WDZ;Qbd9eC+l0hu`n}){ zF&ObMl+e$_n9=)5_Pv{>NMSWtQakQimphYxmnbpnYHlzOFAPfh6xx)hK za(dQl_5IR_lOxw<3HwlTKT+98e#k(` lb7D8Ie-49?iGak}0_D2rDtmjOeywWrX5ovEQo+o Nt>gy_pSM4z?}{eZVz zF9jwPt?fX6GQMwlr%luP3PYw-(izG)P&TZrQ2D|di1h2mU{D~jQDbB>V~==vjP_$~ zY9M)ULDy+l#M@{R??IRPemr{jmW@`8X-&`#lp`VMfYQFX0leRn61+eq{qZ5bmHF9F ziW6KIL6ng(X8ezL5pxsrA&B*T7SwIo0jYC=jeZnpUyq8)`Tp&4^|}cCU)Cs0csEav z8%&nT8YSkd-^s9P*@LChtG~qU9Sm-j6ZH50=rp%z+L `gEl?W4@5+l->;n8~f81@e63^;R*t3;S3u-}`E{heNuW(U_`*m FtHTM%pozEZ3o;qo7KxXVOstK-9#)V*k8Iey=`-5X$G_ z? pFA<&KN>L<{BDnT4pOWnR>6Pd_KXtyyl?0}6pVK(ZR{A~#brCwGRMp$H%J97gh5 z69{~P>?hLk?dlQ{M+*?_*ougBUZOFmXg{b3hjL&0G_}zniF>sA;J+(g#pW_FqRYlX zZah>IJ#DseuJfX8H8|bsv+MA_KH^}ai+U_ai@Pm36|*>_Dtgs_mI1!hQG0Hg6#1;0 zS;_zAciG{Y_V8*2)f}Pc{9zbSjXS%6{@g1=>u9oYQk^WOw7LS7tiGA+`c{WWP> zw&=T67yr?9K@CUL{9xhFPNKH2qr2nit4^}O-o-LD1n>z8=(!}We}$mU6{5V(6~_NC z8yJ#r&F-EHojB*GepnxJAQt5Tb=sCfjguzg=xJ{_Kz ~bAn{Z*BCiz!__W>qYq?ia8FQOH?C~-HYVn7{REHOSCBQ3)_`K~-1$%7MnrdG%( z;Vx3Bp2baQyE$!^*h=l+EeH*~fC@jABPjT5T(~c#Od*VO<|SmlO0EWfHTC&3 z4bZEn*%_5@ZFIJ13iZreYtAPbu)9}WtUsw715v6ik5((Qs_tvQ_*j+>b$nNh*&{J9 z)bJUUOPmxo|Hy}qE+rH{XAsSFB-PyYh0wh;oRo(;0Q!L%Y=S?j@J5FBS9;9zHkT^x z@Ud$f=;#V{BuMTzy)QK?g^01;+Beg6dXUV?o^wDvCOMQ1mNBm5azuS1bKK*C`7Qo{ z3ARgK1NR7h-4i_8hk)}G>(>V*nb9EaUY!7i%B+Vh;p$z$IrtEeXp^%ffrX%^Ku#YL zx9Y?{o*?dOXp6SVH)B_#Gl(1gF|1ngbmxX7QTHUjvR8km1^IN(oC{T&SBSAwVL-QB zv7YfgEAFcSqolLPXFjMRx$@u7Rq%vO+UV(2T0{|!MReVRk9=|!`0#}S^S3N=qBO2# z5+d(`)9iNS4CpKCb}@IjeDwZd;sx0dM91MTRO7JIy{d?brDdq;J= z80UpSzK7Sm&IjbDCo{z K5@HZ1OHHd5@PkMGj!o=fpKum#NP$h3wd2E7ixQQ3;E=>hF+d$* zWjFDV1x!3 |n+sQ|_VUi8Q z%^cs85e;(af3^4t7>uhyQvMCEp}shPQ*(J(A2^u9YQYI0apoSD0ed2gKujr-!NXK= zSE z1&iCF0YLJIx_%U#Ty(K4lkE5twb)KChodI}!qcZ3ndG09d5pT6&^E`Qu(O0VjbV33 z>>mgZYS E>(pg$%Ts0{lWdwbcR#E>&M* z9kUA(7kK9FM0wWQ TnTYS=yMG0EO5EhqF7U5UP+)8 zlpz{UG=8Q3pN|j -V||p6hR~oc3jF1I4f@dYhm8es?%-5W>J36*DBH&sbM8O&$Ww{eJrgIJ@XZ7 zjs_pdHwFqtc86= =A=su2c+r3{KfGL&6A~GmPOhN$gwvX?{yrQ7*HFWCMi{hk zYyW;Uuz#klC*QQB6$<{aSWDJfNtV=Xx;fPki^8XVNM>q!ca69|N }SuDX>EDNth+Pu ^NdP$w7* zg9_J;O*KXaY;(J}dcqq6z5g&}UBOoYRxG4}?F4$-;5E%;UXL6j2J?RfK@|`7D0iIK zzrT>G9{jNp=+RWFuitg6hqiudeX)(zAVW9XSS9M^$$C*S{UPWWsAE(o{cZ0pd}XSM zrQUv+>Kl&x0Uptv(DvQ87>DT0CFY9Cwn@oyHRM~3I*WHjW2WH<_cP~_v(HrLzB;4E zngxz?vG@B;s*=o9#0SmH m&VA_SrSI>7}7jp}N|B8MqGD6eT<(au~6v z4|2GoyG*AW#O>%0WN9*;N-rYO+&gPym)2X?Y-^K0LSvTgmmG5K7t%$ze~LG9VZR4x zSVzv{Axe5;$1;<(4mU&pJ!+diV_N#^YGJu6x@gUhe$JFo1&dd4pV=#}X^|dOC}^7u z*^fu)7_ YiVkQmZr__+%^y+ctRuj)3Us8 zjH=2#GdYsK6crctmN~c)(P_5c)c-*@R! %LdXmAE>jcV`DfTZzlK8Q)M!s+X zz5p?uyfY^V5TlPdpYwQO>fC=XzM;puibrDuZ_Y{0IOY&B*P50YfLi*^zY(x!b$8Nt zR%On(yJTNEn4pXZb?C6S&3wnGB#+A%v1$k7Wf#{rg~U>b1ilvr2m~*w)(KrX!D ;7(VOhI6KT>Xu8Ok-vhuWKoL^VGl0RWQk{BMo6?62Mo}(zjNJ9fJgJpI1LB8LG z>WX$Iuk_A?ZNjj9&$0-6Dif<|hv~01Ch4xcCO7gt3mDHuxdS_3*_Vcv9EFBsL|h3* ziH^r6Ov^86jux&QVzp=O0u3dz5gloV<;c37H$CFk8)kdrn)_bv_(g;uJtpzX;qT|T z`mHAi$XJ3v=w>X^Q2`HUCP6+A&p-!7@sF>=7i0-CHVBl3%Mg>Mzs_SRGQ2XeBi!TN zk@pRYncOy$XlF(Z8banotKeG4S{0x8mrxH>jy~FzYp}{SD Date: Fri, 26 Apr 2024 14:35:23 -0700 Subject: [PATCH 08/44] Fix Result Panel Not Showing Results Occasionally (#2623) This PR fixes an issue of result panel that could cause results not being shown from time to time (#2612 ). The issue is caused by #2604. The `currentPageIndex` should start from 1 but #2604 introduced a logic that could change the index to 0. Adding a check in the condition could solve this issue. I tested and did not see problems anymore. --- .../result-table-frame/result-table-frame.component.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/gui/src/app/workspace/component/result-panel/result-table-frame/result-table-frame.component.ts b/core/gui/src/app/workspace/component/result-panel/result-table-frame/result-table-frame.component.ts index 09d9155faae..04d2d75d307 100644 --- a/core/gui/src/app/workspace/component/result-panel/result-table-frame/result-table-frame.component.ts +++ b/core/gui/src/app/workspace/component/result-panel/result-table-frame/result-table-frame.component.ts @@ -97,7 +97,7 @@ export class ResultTableFrameComponent implements OnInit, OnChanges { this.resizeService.currentSize.pipe(untilDestroyed(this)).subscribe(size => { this.adjustPageSizeBasedOnPanelSize(size.height); let currentPageNum: number = Math.ceil(this.totalNumTuples / this.pageSize); - while (this.currentPageIndex > currentPageNum) { + while (this.currentPageIndex > currentPageNum && this.currentPageIndex > 1) { this.currentPageIndex -= 1; } }); From b0f08d43d15f9ccbbdf941a2db2cecb74808f3e3 Mon Sep 17 00:00:00 2001 From: Bob Bai <43344272+bobbai00@users.noreply.github.com> Date: Fri, 26 Apr 2024 14:57:10 -0700 Subject: [PATCH 09/44] Save result file to existing dataset (#2616) This PR change the logic of exporting result. ### Logic Comparison - old logic: the result file will be saved to the `Result` dashboard - new logic: the result file will be saved to one of the existing datasets, user can choose which dataset to store. ### Demo ![2024-04-24 09 17 18](https://github.com/Texera/texera/assets/43344272/9e1f5084-80e9-4101-9159-24489ccd4443) --------- Co-authored-by: Xiaozhen Liu --- .../request/ResultExportRequest.scala | 3 +- .../user/dataset/DatasetResource.scala | 150 ++++++++++++------ .../web/service/ResultExportService.scala | 18 ++- core/gui/src/app/app.module.ts | 2 + .../component/menu/menu.component.ts | 16 +- .../result-exportation.component.html | 29 ++++ .../result-exportation.component.scss | 42 +++++ .../result-exportation.component.ts | 59 +++++++ .../workflow-result-export.service.ts | 7 +- .../types/workflow-websocket.interface.ts | 1 + 10 files changed, 274 insertions(+), 53 deletions(-) create mode 100644 core/gui/src/app/workspace/component/result-exportation/result-exportation.component.html create mode 100644 core/gui/src/app/workspace/component/result-exportation/result-exportation.component.scss create mode 100644 core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/ResultExportRequest.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/ResultExportRequest.scala index 3b88383589c..3143ad2dc62 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/ResultExportRequest.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/ResultExportRequest.scala @@ -5,5 +5,6 @@ case class ResultExportRequest( workflowId: Int, workflowName: String, operatorId: String, - operatorName: String + operatorName: String, + datasetIds: Array[Int] ) extends TexeraWebSocketRequest diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/user/dataset/DatasetResource.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/user/dataset/DatasetResource.scala index 5da0a2925c2..279188f936c 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/user/dataset/DatasetResource.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/user/dataset/DatasetResource.scala @@ -37,7 +37,7 @@ import edu.uci.ics.texera.web.resource.dashboard.user.dataset.DatasetResource.{ ERR_DATASET_CREATION_FAILED_MESSAGE, ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE, context, - createNewDatasetVersion, + createNewDatasetVersionFromFormData, getDashboardDataset, getDatasetByID, getDatasetLatestVersion, @@ -55,7 +55,6 @@ import org.jooq.types.UInteger import java.io.{InputStream, OutputStream} import java.net.URLDecoder import java.nio.charset.StandardCharsets -import java.nio.file.Paths import java.util import java.util.concurrent.locks.ReentrantLock import javax.annotation.security.RolesAllowed @@ -72,6 +71,7 @@ import javax.ws.rs.{ QueryParam } import javax.ws.rs.core.{MediaType, Response, StreamingOutput} +import scala.collection.mutable import scala.jdk.CollectionConverters._ object DatasetResource { @@ -187,18 +187,97 @@ object DatasetResource { latestVersion } - // this function create a new dataset version - // the dataset is identified by did, the file changes/removals are contained in multiPart form - // it returns the created dataset version if creation succeed, else return None - // concurrency control is performed here: the thread has to have the lock in order to create the new version - private def createNewDatasetVersion( + // DatasetOperation defines the operations that will be applied when creating a new dataset version + private case class DatasetOperation( + filesToAdd: Map[java.nio.file.Path, InputStream], + filesToRemove: List[java.nio.file.Path] + ) + + private def parseUserUploadedFormToDatasetOperations( + did: UInteger, + multiPart: FormDataMultiPart + ): DatasetOperation = { + val datasetPath = PathUtils.getDatasetPath(did) // Obtain dataset base path + + // Mutable collections for constructing DatasetOperation + val filesToAdd = mutable.Map[java.nio.file.Path, InputStream]() + val filesToRemove = mutable.ListBuffer[java.nio.file.Path]() + + val fields = multiPart.getFields.keySet.iterator() // Get all field names + + // for multipart, each file-related operation's key starts with file: + // the operation is either upload or remove + // for file:upload, the file path will be suffixed to it, e.g. file:upload:a/b/c.csv The value will be the file content + // for file:remove, the value would be filepath1,filepath2 + while (fields.hasNext) { + val fieldName = fields.next() + val bodyPart = multiPart.getField(fieldName) // Get the body part for the field + + if (fieldName.startsWith(FILE_OPERATION_UPLOAD_PREFIX)) { + // Determine the relative file path and resolve it with the dataset base path + val filePath = datasetPath.resolve(fieldName.substring(FILE_OPERATION_UPLOAD_PREFIX.length)) + val inputStream = + bodyPart.getValueAs(classOf[InputStream]) // Get input stream from multipart + filesToAdd.put(filePath, inputStream) // Add to the map for uploads + } else if (fieldName.startsWith(FILE_OPERATION_REMOVE_PREFIX)) { + val filePathsValue = + bodyPart.getValueAs(classOf[String]) // Get the file paths as a comma-separated string + val filePaths = filePathsValue.split(",") // Split into individual file paths + filePaths.foreach { filePath => + val normalizedFilePath = filePath.stripPrefix("/") // Normalize path + val physicalFilePath = datasetPath.resolve(normalizedFilePath) // Convert to full path + filesToRemove += physicalFilePath // Add to the list for removals + } + } + } + + // Return a new DatasetOperation with the map and list + DatasetOperation(filesToAdd.toMap, filesToRemove.toList) + } + + // add file(s) to a dataset, a new version will be created + def createNewDatasetVersionByAddingFiles( + did: UInteger, + uid: UInteger, + filesToAdd: Map[java.nio.file.Path, InputStream] + ): Option[DashboardDatasetVersion] = { + applyDatasetOperationToCreateNewVersion( + context, + did, + uid, + "", + DatasetOperation(filesToAdd, List()) + ) + } + + // create a new dataset version using the form data from frontend + def createNewDatasetVersionFromFormData( ctx: DSLContext, did: UInteger, uid: UInteger, userProvidedVersionName: String, multiPart: FormDataMultiPart ): Option[DashboardDatasetVersion] = { + val datasetOperation = parseUserUploadedFormToDatasetOperations(did, multiPart) + applyDatasetOperationToCreateNewVersion( + ctx, + did, + uid, + userProvidedVersionName, + datasetOperation + ) + } + // apply the dataset operation to create a new dataset version + // it returns the created dataset version if creation succeed, else return None + // concurrency control is performed here: the thread has to have the lock in order to create the new version + private def applyDatasetOperationToCreateNewVersion( + ctx: DSLContext, + did: UInteger, + uid: UInteger, + userProvidedVersionName: String, + datasetOperation: DatasetOperation + ): Option[DashboardDatasetVersion] = { // Acquire or Create the lock for dataset of {did} val lock = DatasetResource.datasetLocks.getOrElseUpdate(did, new ReentrantLock()) @@ -207,54 +286,31 @@ object DatasetResource { } lock.lock() try { - val datasetPath = Paths.get(PathUtils.getDatasetPath(did).toString) + val datasetPath = PathUtils.getDatasetPath(did) - // this is used to check if file operation happens - var fileOperationHappens = false - // for multipart, each file-related operation's key starts with file: - // the operation is either upload or remove - // for file:upload, the file path will be suffixed to it, e.g. file:upload:a/b/c.csv The value will be the file content - // for file:remove, the value would be filepath1,filepath2 - val fields = multiPart.getFields().keySet().iterator() + if (datasetOperation.filesToAdd.isEmpty && datasetOperation.filesToRemove.isEmpty) { + return None + } val versionName = generateDatasetVersionName(ctx, did, userProvidedVersionName) val commitHash = GitVersionControlLocalFileStorage.withCreateVersion( datasetPath, versionName, () => { - while (fields.hasNext) { - val fieldName = fields.next() - val bodyPart = multiPart.getField(fieldName) - - if (fieldName.startsWith(FILE_OPERATION_UPLOAD_PREFIX)) { - // val contentDisposition = bodyPart.getContentDisposition - // val contentType = bodyPart.getMediaType.toString - val filePath = - datasetPath.resolve(fieldName.substring(FILE_OPERATION_UPLOAD_PREFIX.length)) - // TODO: be careful with the string operation here - val value: InputStream = bodyPart.getValueAs(classOf[InputStream]) - GitVersionControlLocalFileStorage.writeFileToRepo(datasetPath, filePath, value) - fileOperationHappens = true - } else if (fieldName.startsWith(FILE_OPERATION_REMOVE_PREFIX)) { - val filePathsValue = bodyPart.getValueAs(classOf[String]) - val filePaths = filePathsValue.split(",") - filePaths.foreach { filePath => - val normalizedFilePath = filePath.stripPrefix("/") - GitVersionControlLocalFileStorage.removeFileFromRepo( - datasetPath, - datasetPath.resolve(normalizedFilePath) - ) - } - fileOperationHappens = true - } + datasetOperation.filesToAdd.foreach { + case (filePath, fileStream) => + GitVersionControlLocalFileStorage.writeFileToRepo(datasetPath, filePath, fileStream) + } + + datasetOperation.filesToRemove.foreach { filePath => + GitVersionControlLocalFileStorage.removeFileFromRepo( + datasetPath, + filePath + ) } } ) - if (!fileOperationHappens) { - return None - } - // create the DatasetVersion that persists in the DB val datasetVersion = new DatasetVersion() @@ -370,7 +426,8 @@ class DatasetResource { GitVersionControlLocalFileStorage.initRepo(datasetPath) // create the initial version of the dataset - val createdVersion = createNewDatasetVersion(ctx, did, uid, initialVersionName, files) + val createdVersion = + createNewDatasetVersionFromFormData(ctx, did, uid, initialVersionName, files) createdVersion match { case Some(_) => @@ -507,7 +564,8 @@ class DatasetResource { throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE) } // create the version - val createdVersion = createNewDatasetVersion(ctx, did, uid, versionName, multiPart) + val createdVersion = + createNewDatasetVersionFromFormData(ctx, did, uid, versionName, multiPart) createdVersion match { case None => diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala index 8b15f611cd2..403cc6f035b 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala @@ -15,8 +15,10 @@ import edu.uci.ics.texera.Utils.retry import edu.uci.ics.texera.web.model.websocket.request.ResultExportRequest import edu.uci.ics.texera.web.model.websocket.response.ResultExportResponse import edu.uci.ics.texera.web.resource.GoogleResource +import edu.uci.ics.texera.web.resource.dashboard.user.dataset.DatasetResource.createNewDatasetVersionByAddingFiles + +import edu.uci.ics.texera.web.resource.dashboard.user.dataset.utils.PathUtils import edu.uci.ics.texera.web.resource.dashboard.user.file.UserFileResource -import edu.uci.ics.texera.web.resource.dashboard.user.project.ProjectResource import edu.uci.ics.texera.web.resource.dashboard.user.workflow.WorkflowVersionResource import edu.uci.ics.texera.workflow.common.storage.OpResultStorage import edu.uci.ics.texera.workflow.common.tuple.Tuple @@ -98,17 +100,27 @@ class ResultExportService(opResultStorage: OpResultStorage, wId: UInteger) { .truncatedTo(ChronoUnit.SECONDS) .format(DateTimeFormatter.ofPattern("yyyy-MM-dd_HH-mm-ss")) val fileName = s"${request.workflowName}-v$latestVersion-${request.operatorName}-$timestamp.csv" + + // add files to datasets + request.datasetIds.foreach(did => { + val datasetPath = PathUtils.getDatasetPath(UInteger.valueOf(did)) + val filePath = datasetPath.resolve(fileName) + createNewDatasetVersionByAddingFiles( + UInteger.valueOf(did), + uid, + Map(filePath -> new ByteArrayInputStream(stream.toByteArray)) + ) + }) UserFileResource.saveFile( uid, fileName, new ByteArrayInputStream(stream.toByteArray), "generated by workflow" ) - val addToProjectStatus = ProjectResource.addExportedFileToProject(uid, wId, fileName) ResultExportResponse( "success", - s"File saved to User Dashboard as $fileName $addToProjectStatus" + s"File saved to User Dashboard as $fileName to Datasets ${request.datasetIds.mkString(",")}" ) } diff --git a/core/gui/src/app/app.module.ts b/core/gui/src/app/app.module.ts index e8095897b60..cd69a68d564 100644 --- a/core/gui/src/app/app.module.ts +++ b/core/gui/src/app/app.module.ts @@ -133,6 +133,7 @@ import { NzNoAnimationModule } from "ng-zorro-antd/core/no-animation"; import { TreeModule } from "@ali-hm/angular-tree-component"; import { EnvironmentComponent } from "./workspace/component/left-panel/environment/environment.component"; import { FileSelectionComponent } from "./workspace/component/file-selection/file-selection.component"; +import { ResultExportationComponent } from "./workspace/component/result-exportation/result-exportation.component"; registerLocaleData(en); @@ -149,6 +150,7 @@ registerLocaleData(en); TimeTravelComponent, WorkflowEditorComponent, ResultPanelComponent, + ResultExportationComponent, OperatorLabelComponent, DashboardComponent, AdminUserComponent, diff --git a/core/gui/src/app/workspace/component/menu/menu.component.ts b/core/gui/src/app/workspace/component/menu/menu.component.ts index bb25e164dfc..7a8198690c8 100644 --- a/core/gui/src/app/workspace/component/menu/menu.component.ts +++ b/core/gui/src/app/workspace/component/menu/menu.component.ts @@ -27,6 +27,9 @@ import { OperatorMenuService } from "../../service/operator-menu/operator-menu.s import { CoeditorPresenceService } from "../../service/workflow-graph/model/coeditor-presence.service"; import { Subscription, timer } from "rxjs"; import { isDefined } from "../../../common/util/predicate"; +import { FileSelectionComponent } from "../file-selection/file-selection.component"; +import { NzModalService } from "ng-zorro-antd/modal"; +import { ResultExportationComponent } from "../result-exportation/result-exportation.component"; /** * MenuComponent is the top level menu bar that shows @@ -95,7 +98,8 @@ export class MenuComponent implements OnInit { private userProjectService: UserProjectService, private notificationService: NotificationService, public operatorMenu: OperatorMenuService, - public coeditorPresenceService: CoeditorPresenceService + public coeditorPresenceService: CoeditorPresenceService, + private modalService: NzModalService ) { workflowWebsocketService .subscribeToEvent("ExecutionDurationUpdateEvent") @@ -326,7 +330,15 @@ export class MenuComponent implements OnInit { * */ public onClickExportExecutionResult(exportType: string): void { - this.workflowResultExportService.exportWorkflowExecutionResult(exportType, this.currentWorkflowName); + const modal = this.modalService.create({ + nzTitle: "Export Result and Save to a Dataset", + nzContent: ResultExportationComponent, + nzData: { + exportType: exportType, + workflowName: this.currentWorkflowName, + }, + nzFooter: null, + }); } /** diff --git a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.html b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.html new file mode 100644 index 00000000000..b3498651ed4 --- /dev/null +++ b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.html @@ -0,0 +1,29 @@ + + +diff --git a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.scss b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.scss new file mode 100644 index 00000000000..00ecc8b605d --- /dev/null +++ b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.scss @@ -0,0 +1,42 @@ +.centered-container { + display: flex; + flex-direction: column; /* Arrange children vertically */ + align-items: center; /* Center horizontally */ + justify-content: center; /* Center vertically */ + text-align: center; +} + +.datasets-container { + background-color: white; +} + +.dataset-id-container { + background-color: grey; + color: white; + width: 35px; + height: 35px; + border-radius: 50%; + display: flex; + justify-content: center; + align-items: center; /* Center vertically */ + font-size: 14px; + margin-left: 5px; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); + overflow: hidden; +} + +.auto-option-content { + width: 100%; + height: 50%; + display: flex; + justify-content: space-between; +} + +.dataset-name { + margin-left: 10px; + flex-grow: 1; /* This will make the name take up the remaining space */ +} + +.dataset-option-link-btn { + margin-right: 5px; +} diff --git a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts new file mode 100644 index 00000000000..4a50816b902 --- /dev/null +++ b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts @@ -0,0 +1,59 @@ +import { UntilDestroy, untilDestroyed } from "@ngneat/until-destroy"; +import { Component, inject, Input, OnInit } from "@angular/core"; +import { environment } from "../../../../environments/environment"; +import { WorkflowResultExportService } from "../../service/workflow-result-export/workflow-result-export.service"; +import { DashboardDataset } from "../../../dashboard/user/type/dashboard-dataset.interface"; +import { DatasetService } from "../../../dashboard/user/service/user-dataset/dataset.service"; +import { NZ_MODAL_DATA, NzModalRef } from "ng-zorro-antd/modal"; + +@UntilDestroy() +@Component({ + selector: "texera-result-exportation-modal", + templateUrl: "./result-exportation.component.html", + styleUrls: ["./result-exportation.component.scss"], +}) +export class ResultExportationComponent implements OnInit { + exportType: string = inject(NZ_MODAL_DATA).exportType; + + workflowName: string = inject(NZ_MODAL_DATA).workflowName; + + inputDatasetName = ""; + + userAccessibleDatasets: DashboardDataset[] = []; + filteredUserAccessibleDatasets: DashboardDataset[] = []; + + constructor( + private modalRef: NzModalRef, + private workflowResultExportService: WorkflowResultExportService, + private datasetService: DatasetService + ) {} + + ngOnInit(): void { + this.datasetService + .retrieveAccessibleDatasets() + .pipe(untilDestroyed(this)) + .subscribe(datasets => { + this.userAccessibleDatasets = datasets.filter(dataset => dataset.accessPrivilege === "WRITE"); + this.filteredUserAccessibleDatasets = [...this.userAccessibleDatasets]; + }); + } + + onUserInputDatasetName(event: Event): void { + const value = this.inputDatasetName; + + if (value) { + this.filteredUserAccessibleDatasets = this.userAccessibleDatasets.filter( + dataset => dataset.dataset.did && dataset.dataset.name.toLowerCase().includes(value) + ); + } + } + + onClickSaveResultFileToDatasets(dataset: DashboardDataset) { + if (dataset.dataset.did) { + this.workflowResultExportService.exportWorkflowExecutionResult(this.exportType, this.workflowName, [ + dataset.dataset.did, + ]); + this.modalRef.close(); + } + } +} diff --git a/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts b/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts index 428a4f3beff..eab238fcf65 100644 --- a/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts +++ b/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts @@ -60,7 +60,11 @@ export class WorkflowResultExportService { /** * export the workflow execution result according the export type */ - exportWorkflowExecutionResult(exportType: string, workflowName: string): void { + exportWorkflowExecutionResult( + exportType: string, + workflowName: string, + datasetIds: ReadonlyArray+ ++ +++{{dataset.dataset.did?.toString()}}+ + {{ dataset.dataset.name }} + + += [] + ): void { if (!environment.exportExecutionResultEnabled || !this.hasResultToExport) { return; } @@ -86,6 +90,7 @@ export class WorkflowResultExportService { workflowName, operatorId, operatorName, + datasetIds, }); }); } diff --git a/core/gui/src/app/workspace/types/workflow-websocket.interface.ts b/core/gui/src/app/workspace/types/workflow-websocket.interface.ts index d70ae9aa106..655f2716071 100644 --- a/core/gui/src/app/workspace/types/workflow-websocket.interface.ts +++ b/core/gui/src/app/workspace/types/workflow-websocket.interface.ts @@ -99,6 +99,7 @@ export type ResultExportRequest = Readonly<{ workflowName: string; operatorId: string; operatorName: string; + datasetIds: ReadonlyArray ; }>; export type ResultExportResponse = Readonly<{ From c488ef8e196ad9bc7b2ca08837ed1f8b9724a5ae Mon Sep 17 00:00:00 2001 From: Yicong Huang <17627829+Yicong-Huang@users.noreply.github.com> Date: Fri, 26 Apr 2024 20:42:16 -0700 Subject: [PATCH 10/44] Include acknowledgement for NIDDK (#2626) --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index d43a9a54edb..89043f61fda 100644 --- a/README.md +++ b/README.md @@ -112,4 +112,7 @@ To try our collaborative data analytics in _Demonstration of Collaborative and I This project is supported by the National Science Foundation under the awards [III 1745673](https://www.nsf.gov/awardsearch/showAward?AWD_ID=1745673), [III 2107150](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2107150), AWS Research Credits, and Google Cloud Platform Education Programs. +* This project is supported by NIH NIDDK. + + * [Yourkit](https://www.yourkit.com/) has given an open source license to use their profiler in this project. From e573a177370c64c37655775406bb3d50c7705c82 Mon Sep 17 00:00:00 2001 From: yunyad <114192306+yunyad@users.noreply.github.com> Date: Sun, 28 Apr 2024 12:29:58 -0700 Subject: [PATCH 11/44] Image Visualization Operator shows multiple images (#2627) This PR fixed Issue #2625. Image visualization operator can show multiple images. ![Screen Recording 2024-04-28 at 10 24 31 AM](https://github.com/Texera/texera/assets/114192306/508c595e-ae02-456e-bb93-7ee18627e821) --- .../ImageViz/ImageVisualizerOpDesc.scala | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/ImageViz/ImageVisualizerOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/ImageViz/ImageVisualizerOpDesc.scala index c9ade4a4d3d..acc39333365 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/ImageViz/ImageVisualizerOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/visualization/ImageViz/ImageVisualizerOpDesc.scala @@ -8,7 +8,6 @@ import edu.uci.ics.texera.workflow.common.operators.PythonOperatorDescriptor import edu.uci.ics.texera.workflow.common.tuple.schema.{Attribute, AttributeType, Schema} import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort} import edu.uci.ics.texera.workflow.operators.visualization.{ - ImageUtility, VisualizationConstants, VisualizationOperator } @@ -44,21 +43,34 @@ class ImageVisualizerOpDesc extends VisualizationOperator with PythonOperatorDes override def generatePythonCode(): String = { val finalCode = s""" |from pytexera import * - |from PIL import Image - |import numpy as np + |import base64 + |from io import BytesIO | |class ProcessTupleOperator(UDFOperatorV2): + | images_html = [] | | def render_error(self, error_msg): - | return ''' Image is not available.
- |Reason is: {}
- | '''.format(error_msg) + | return f'Image is not available.
Reason: {error_msg}
' + | + | def encode_image_to_html(self, binary_image_data): + | try: + | encoded_image_data = base64.b64encode(binary_image_data) + | encoded_image_str = encoded_image_data.decode("utf-8") + | html = f'' + | return html + | except Exception as e: + | return self.render_error("Binary input is not valid") | | @overrides | def process_tuple(self, tuple_: Tuple, port: int) -> Iterator[Optional[TupleLike]]: | ${createBinaryData()} - | ${ImageUtility.encodeImageToHTML()} - | yield {"html-content": html} + | self.images_html.append(self.encode_image_to_html(binary_image_data)) + | yield + | + | @overrides + | def on_finish(self, port: int) -> Iterator[Optional[TupleLike]]: + | all_images_html = "" + "".join(self.images_html) + "" + | yield {"html-content": all_images_html} |""".stripMargin finalCode } From 1ea2e20b48abb53dfdf64ee121668e4e5c3d2a0d Mon Sep 17 00:00:00 2001 From: Xiaozhen LiuDate: Sun, 28 Apr 2024 22:48:53 -0700 Subject: [PATCH 12/44] Fix Comment Box Positions Not Persisted (#2620) On workflow editor, the positions of comment boxes are not persisted because of a wrong filter in #1674. This PR fixes it. Co-authored-by: Xinyuan Lin --- .../model/workflow-action.service.ts | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/core/gui/src/app/workspace/service/workflow-graph/model/workflow-action.service.ts b/core/gui/src/app/workspace/service/workflow-graph/model/workflow-action.service.ts index 1f1b5a814fa..fecfd7237ef 100644 --- a/core/gui/src/app/workspace/service/workflow-graph/model/workflow-action.service.ts +++ b/core/gui/src/app/workspace/service/workflow-graph/model/workflow-action.service.ts @@ -841,6 +841,17 @@ export class WorkflowActionService { const offsetY = movedElement.newPosition.y - movedElement.oldPosition.y; this.jointGraphWrapper.setListenPositionChange(false); this.undoRedoService.setListenJointCommand(false); + // Persistence and shared-editing syncing for comment boxes have different interfaces. + // Setting positions inside commentBoxes here only for persistence. + // Syncing uses elementPositionMap. + selectedElements + .filter(elementID => elementID.includes("commentBox")) + .forEach(elementID => { + this.texeraGraph.sharedModel.commentBoxMap + .get(elementID) + ?.set("commentBoxPosition", this.jointGraphWrapper.getElementPosition(elementID)); + }); + // Move other highlighted operators. selectedElements .filter(elementID => elementID !== movedElement.elementID) .forEach(elementID => { @@ -849,12 +860,6 @@ export class WorkflowActionService { elementID, this.jointGraphWrapper.getElementPosition(elementID) ); - // The position of comment box is included in its object, so we only set it here for persistence. - if (elementID.includes("commentBox")) { - this.texeraGraph.sharedModel.commentBoxMap - .get(elementID) - ?.set("commentBoxPosition", this.jointGraphWrapper.getElementPosition(elementID)); - } }); this.jointGraphWrapper.setListenPositionChange(true); this.undoRedoService.setListenJointCommand(true); From c55a76ca479119f52751b9b781fd2fd0375583cc Mon Sep 17 00:00:00 2001 From: Xiaozhen Liu Date: Mon, 29 Apr 2024 10:02:06 -0700 Subject: [PATCH 13/44] Fix Incorrect Size for Revert Version Button (#2624) See title. Before: After: --- core/gui/src/app/workspace/component/menu/menu.component.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/gui/src/app/workspace/component/menu/menu.component.html b/core/gui/src/app/workspace/component/menu/menu.component.html index e63b94afb0e..32dacb63a1a 100644 --- a/core/gui/src/app/workspace/component/menu/menu.component.html +++ b/core/gui/src/app/workspace/component/menu/menu.component.html @@ -40,7 +40,8 @@ nz-button nzType="primary" [disabled]="!workflowVersionService.modificationEnabledBeforeTempWorkflow" - (click)="revertToVersion()"> + (click)="revertToVersion()" + style="width: 160px"> Restore this version {{autoSaveState}} From 1de6a04e44393f23dfe75d0c27dc0135978dc58e Mon Sep 17 00:00:00 2001 From: Bob Bai <43344272+bobbai00@users.noreply.github.com> Date: Sat, 4 May 2024 14:00:11 -0700 Subject: [PATCH 14/44] Add Hugging Face Text Summarization Operator (#2645) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR adds a new operator, called `HuggingFaceTextSummary`. The operator utilizes a pre-trained model on [hugging face](https://huggingface.co/mrm8488/bert-mini2bert-mini-finetuned-cnn_daily_mail-summarization). It takes the input text and output the summary of the input text. ### Runtime resource occupation: around 236MB ### Required packages - transformers - torch ### Properties This operator has the following properties: - `attribute`: required. The column to perform the text summary - `resultAttribute`: optional. The result summary's column name. Default value is `summary` ### Demo ![2024-05-03 23 44 49](https://github.com/Texera/texera/assets/43344272/9566499e-81f2-4155-a14e-7496c593e3cf) --- core/amber/operator-requirements.txt | 3 +- .../workflow/common/operators/LogicalOp.scala | 13 +++- .../HuggingFaceTextSummarizationOpDesc.scala | 70 ++++++++++++++++++ .../HuggingFaceTextSummarization.png | Bin 0 -> 13831 bytes 4 files changed, 82 insertions(+), 4 deletions(-) create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/huggingFace/HuggingFaceTextSummarizationOpDesc.scala create mode 100644 core/gui/src/assets/operator_images/HuggingFaceTextSummarization.png diff --git a/core/amber/operator-requirements.txt b/core/amber/operator-requirements.txt index 89ec61c5bb9..3e5acb42367 100644 --- a/core/amber/operator-requirements.txt +++ b/core/amber/operator-requirements.txt @@ -2,4 +2,5 @@ wordcloud plotly praw pillow -pybase64 \ No newline at end of file +pybase64 +torch \ No newline at end of file diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala index 15be56f76db..db7543ad23c 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala @@ -4,9 +4,9 @@ import com.fasterxml.jackson.annotation.JsonSubTypes.Type import com.fasterxml.jackson.annotation.{ JsonIgnore, JsonProperty, + JsonPropertyDescription, JsonSubTypes, - JsonTypeInfo, - JsonPropertyDescription + JsonTypeInfo } import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp @@ -34,7 +34,10 @@ import edu.uci.ics.texera.workflow.operators.intersect.IntersectOpDesc import edu.uci.ics.texera.workflow.operators.intervalJoin.IntervalJoinOpDesc import edu.uci.ics.texera.workflow.operators.keywordSearch.KeywordSearchOpDesc import edu.uci.ics.texera.workflow.operators.limit.LimitOpDesc -import edu.uci.ics.texera.workflow.operators.huggingFace.HuggingFaceSentimentAnalysisOpDesc +import edu.uci.ics.texera.workflow.operators.huggingFace.{ + HuggingFaceSentimentAnalysisOpDesc, + HuggingFaceTextSummarizationOpDesc +} import edu.uci.ics.texera.workflow.operators.projection.ProjectionOpDesc import edu.uci.ics.texera.workflow.operators.randomksampling.RandomKSamplingOpDesc import edu.uci.ics.texera.workflow.operators.regex.RegexOpDesc @@ -185,6 +188,10 @@ trait StateTransferFunc new Type( value = classOf[HuggingFaceSentimentAnalysisOpDesc], name = "HuggingFaceSentimentAnalysis" + ), + new Type( + value = classOf[HuggingFaceTextSummarizationOpDesc], + name = "HuggingFaceTextSummarization" ) ) ) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/huggingFace/HuggingFaceTextSummarizationOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/huggingFace/HuggingFaceTextSummarizationOpDesc.scala new file mode 100644 index 00000000000..cf697127f9f --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/huggingFace/HuggingFaceTextSummarizationOpDesc.scala @@ -0,0 +1,70 @@ +package edu.uci.ics.texera.workflow.operators.huggingFace + +import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription} +import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort} +import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} +import edu.uci.ics.texera.workflow.common.metadata.annotations.AutofillAttributeName +import edu.uci.ics.texera.workflow.common.operators.PythonOperatorDescriptor +import edu.uci.ics.texera.workflow.common.tuple.schema.{AttributeType, Schema} + +class HuggingFaceTextSummarizationOpDesc extends PythonOperatorDescriptor { + @JsonProperty(value = "attribute", required = true) + @JsonPropertyDescription("attribute to perform text summarization on") + @AutofillAttributeName + var attribute: String = _ + + @JsonProperty( + value = "Result attribute name", + required = false, + defaultValue = "summary" + ) + @JsonPropertyDescription("attribute name of the text summary result") + var resultAttribute: String = _ + + override def generatePythonCode(): String = { + s""" + |from transformers import BertTokenizerFast, EncoderDecoderModel + |import torch + |from pytexera import * + | + |class ProcessTupleOperator(UDFOperatorV2): + | + | def open(self): + | model_name = "mrm8488/bert-mini2bert-mini-finetuned-cnn_daily_mail-summarization" + | self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + | self.tokenizer = BertTokenizerFast.from_pretrained(model_name) + | self.model = EncoderDecoderModel.from_pretrained(model_name).to(self.device) + | + | @overrides + | def process_tuple(self, tuple_: Tuple, port: int) -> Iterator[Optional[TupleLike]]: + | text = tuple_["$attribute"] + | + | inputs = self.tokenizer([text], padding="max_length", truncation=True, max_length=512, return_tensors="pt") + | input_ids = inputs.input_ids.to(self.device) + | attention_mask = inputs.attention_mask.to(self.device) + | + | output = self.model.generate(input_ids, attention_mask=attention_mask) + | summary = self.tokenizer.decode(output[0], skip_special_tokens=True) + | tuple_["$resultAttribute"] = summary + | yield tuple_""".stripMargin + } + + override def operatorInfo: OperatorInfo = + OperatorInfo( + "Hugging Face Text Summarization", + "Summarize the given text content with a mini2bert pre-trained model from Hugging Face", + OperatorGroupConstants.MACHINE_LEARNING_GROUP, + inputPorts = List(InputPort()), + outputPorts = List(OutputPort()) + ) + + override def getOutputSchema(schemas: Array[Schema]): Schema = { + if (resultAttribute == null || resultAttribute.trim.isEmpty) + throw new RuntimeException("Result attribute name should be given") + Schema + .builder() + .add(schemas(0)) + .add(resultAttribute, AttributeType.STRING) + .build() + } +} diff --git a/core/gui/src/assets/operator_images/HuggingFaceTextSummarization.png b/core/gui/src/assets/operator_images/HuggingFaceTextSummarization.png new file mode 100644 index 0000000000000000000000000000000000000000..673b8ea9077dacce2aa4ec76979d849cf7bb6775 GIT binary patch literal 13831 zcmch8Wmg?d)Ahj}g1fr~mxF6?cPD6&5Zs+WaCaxTyL)hVcXxL<9A2*b{SVKY4>fB| z_wKHinyTH^Vake9D2N1z0000*Mp{A@0D$;>g#f_AehPiZ5{pj(Ya%Bl0r>du$?Yso z006i;WF$n@-B&Ms5ZpBu8ii%Z0z~{-L*&E5DN!Zy2ds(9TuRg`cekt`N~eS>U9Yzu zUtX>C&gLIi7AL#XV@1f2$;ZXT9RKWFkX$OHCWye3L$Hg%vR^eXjz~fQkIsE2TY21u zUwyV-iO6`K&g&Ofy{dPb2P4V9z(Yd@h>-nX2|RP-NXbAZC=Z y{hh}-25xxWT0R0_7!4jvvTmEFyzbS$=nx*p+u`32@Y9ARYgz(Gc7Nr zgr%va%jR0W3HLo?Yws=Kp`Y+TRIy$nz7Tat`%HI2&O!VB-te~oX}nG#q-ahroc}uH zEyoumX_h;|ZMR9d7v|v&GSPbr1n39TcLOp%DBuVPEsD&63@s?Km`9NenE;FtD;zXm zI}_wYHbRCrJL1nu5GAyZZ@}W@H(b@6;*u#aK@iTU-T46EZ|c&HA1x4T`j{iqOwy-% zcXP2Wy+4|WW?1wni$l48iizdmy^Q=Rkznx!v~m3lR_A2)haiD_LZ>Xd#aVbYTtQO5h)A^EW;ENevA_S*W|WtKCr86WH% zD-49v**&(l{x11pWsb1N2%0@kf;Pnk{?aZuT7o;*X^G>E*|zFKgY=Mdva +3KtX z_LhGtVF!I@3-WiRx0%s*Rkti8=i87e2!mF_`|!4e%i5+Vy*>T2hp0VUv8(d){&tq7 z5~(W2)p%jwv>@(x96QK?tlN>hI}{%b7Cw~rctJ<$foZDU-$qO(K!g}zC6q&IX1znk zte@nRGs*aA0Yuk4$-xEZSha;+q*;I0>vLQwnpSP%Ziyd&bB;cKGQ_?#UdWJh<_%nM z$EHM~Zkc ex4Gb$GB7sk|FSl|Cy!@7__knj^oxOaPm(Y;0|C-38Q@EWQ_W1 z`QlWk4GYSipEgOx+ut-cpB=~0Y+uWpL6|pLqOxM>B_8|`kj{uD$Q~6M)>Lq6>1BOg z4vF<)5?migq;TqGRcf7|=8_vnNamr ZLe*EZL7Nw?MSsxS!~)ZFTA>DD=4E6}288^qWz3xC}@iSAr?FQrh? z+-VR*Ne7!IM 2=3Zf-%X!6}kdP8#up0^w}sTbVdL8N*Gd!@A-V4^X9{!Yrn79;%5_~m@G z+6}dOvi527adc?z8cGLUMJO38T(WCqzlsD=E#j<)DAkAxbQnWo=Xi6`(+PAhJ9WG+ zeG7=W=&*o^5Z&Zvrg3ARtiQDo`%IZK>58!&;1MzAc8#-el!Y@|*oFFY9jjSl4^9HG zz0@=+7rf4w^Z6-*8ukK=)q)s!K8SP4mUzx=*7@hY7Y!f#{VC5Cn!??C$vZu59ITD! zwf9YzS&A(9dt8#n+S{!(m@GgO!_|JTLzY3Y8YR6dMjr=@>_|ttLquztSJrJN_!T#%qOsk z6#G<_Y4#u(%iU7ch0d6R=2{~=un~flm*?_yZfX=sVrtDgzRDB-qqjB~CMOOSe>j)l zBuC%n;zNk6`qAwvJ_Q!u@#&T#+(S86Z8&Rx;xtbGo@~OfIxMHRGQt^Ptyo&go6OYT z9py*Fvtaq5P<1H%k?t^LoWdvnjSo_l$N(BY2{uYG7v+!y3xPM_oosGc6|@A=U*6?g za5PKcv&qhehE<;w=_FB@{S&WS$w^#gfe)S5IB&)rYz698-g?<;wmRP>G}a_rri&mZ z3%U<=7L}>h$j3Z3rgcq3 1~ttnIeQq$Zu6QaJ*NJ zw?C={VI+kbcL#-=T<06kU(3iyDcI={at{BY)@(+jMq~1d% 9%GN`sh)u zWbafQVy>=HsCu{*?l Y^+Pi5$hPh7vLD!mywWMWrd#Rim5cu*cU)ed70z zNz&PSN<>O|Hu> !Wz0b{ zsViEUi Oi#?QmbZdSUsc4yI&ACb7v%=obj>WaqwO_CI%U7;{HBSg_MC+9mb+Of z5H`VJgb8+7lje~OVM{O2Ngt>BOZp}>a+nFZNiBCDc>~9wc==P%5z=teBf#5_XT~*V zks@zctk$2**}sejxlv7bngexPXF#jYAzOPJO@?L!?_2db{20z$!mm4PC|@NUVBH|; ze&!7g$Nh-6NXaddnwWX*h8cW5AD8>L3LiXnxU6DA-VO)}np14IkNsf^@@hjY4f$^( z{~RgC#a`>)d<}K4yGPiiCL#9eINuTxI>(8>w-$h?6i-ag0-um-5u2Ch%X$63QJ~`E z?-><6BgR``BSC}PjHw>jRK4R_paQ3#sp}lhK?GM=*EBjMZ^S*{UUaHgOpt|7b__9} z%f~Xu;=z_eZx1nD|GJ()Vluvuq)6JkzmDpI)~arGf==D^2W@StzJI)B5UzZDJn5~R zZFNSy?|U`ZZGq2mEM!}f8xM>weYU)uesuL5&P=wYr)v=PIM~WkGP>m4NYORD*>x<^ za5i8<36p3xiLX_gY y=~>4N5CfeQEW0)VBhOxITzwjN;wy! zohg+SY9+5}Cf4!%zJv*(_ebrK*haizWe@OLL=S@gVbY+V^HWkh+nGm|WW<&6+J{3{ za{ yoK12)THM0I%)FlO1NM7AP-u(#2=x9aTq{uf z&4ce3 IM(XVmS6f5z|_UETw()M_2C&8zrG(Bf@|D8UdMUnn(rx>XJ2 zi92Zf+!bnFD0|QJd0}0{sAs)zwdKhBf# OJi`$!B8{2x9`NJbIzR>3zzF=L`cd+<)jobOrJ3Mc$iv&LN&jPf zuJSaI0vk?QCi#qDkF?kwWW?2&`4(T1P5t(K@`Tf}E=}?$Yj*CNYKBMfSk736B9HBn z8Th`W5@{mS27rR&5&+erCiLW2p-5)V0eA<~3YJQ5`FO^SML{E33H&63+Xi2u!d+LR z+1+R7QGEI~-xjylDt7^!6iD2c0HJu&n+GWHjo;q0<5F~Pv D=`IH6ZR z1e4CG2b^+)Z?+y$kMLHzip{0(wZS(3NJf+sguxP3BA#=)AIh@+-(7%fEDWI=0ClC% zfN)}JY1N{==$3_)A=XXNe}_U8SB_y_k-fN#f%x#!;3pN@X7~6WD&Xq$(jGu4!2!{7 zn5G81)jRE0hhVnUFIsGT51cEyMH+t;NFDi(gh DIgJW*_rmUi&au zJo+1r2P9{%2d^qW0tOjvc8(l;yM^Ukau@GI%a+i8w;dpakqtr;Wo`;7=}u5!rDL#Z zduxZrq#@evhq$DWTf747hY6R(zx?bTA%{a@ F9dWy7I&`ckH}8e?=5NVGYv~6DA?~PI(5Eq*I}2de)& UZ1?Q@-GxsB^*L+AXiAi@K+eBHODU$p01%TaVmNX9iT8 zw=AboJ9fhc{-JK?=dAzx=)_lR=uDXC6{{tXTp<~}Vt4V*T_lW^O0L*~7 85_wO6DuzrJC4Pf=Zjhp2ub*W>Kt|9Xc^h*DbII z4lEowrZd#f%SO?8_X@B<`S$YAk#NsSNgPP~EP%O(=CliFj+L}=14LlEe>@KeXNW~% z`d;EyE3IH%W==G<&dz8nz8Jkvik*~gPc`TjGilXSZ%`WnpT|hOB`ctpxy85mSMaUR z->MPI4hA76dAMib*_QbHQx3-bwzf$}n7(emrO3U7NpCU@OxZ%fX5!|wy3=-CWt*Qz zx>OkveoUwK7BWeH+)YmGPEK*FeWD4@yx&2U#Up)M>|(Q-RpOrFI=NPJ(Zx{C4>QG? zVq0ydewiHdpEwvl1YRbn`!2`9U)TIWPgQY_b7>KAQhsE# z-U!h756Is;%VoC!YCUBK@sPA*%tpvsku1c(ZLI0b#Z}?7v5K+`r|@y6ry7o>J$w|# z(lKBXS#L0)ODHVI;6joLcA;H<#Wgsb5nnU+du({*d}m w*2(B+qP>F6Z&I-|009=9!XR6+OC?b0= zxLg=ex2fxF9pJ7*&oOY`i<90k94Xet!C@$(+QA`Dpo={?Y~-YA5{1EI`d)v=E~v;` z(^Z@;lx$SQqZY7R;h#IGfW~&>PZTY)m0i^Bw_EC-nAFZRa72I3pHM_INFvfw7)Z>K zx&h)IxH`F1#~={{*Krk9ZHyE-6?GCn>*;#y%3iDg)#dO&^$#@(z%r(#X9^n|AeqP! zY`s%#)b+>suY<9Fm7(mVbK(3GllqBOr(6K^OXUU-(FvdG0$wi5T5=Q0Sd1T*F$+ql z?Bc(Eg*#XUS|9XW{D&+el>Q3*Q|8f$BrPH>ooHs>qw_$~Q1hN?LiQI9(-|NYQoRA# z^W09H#!ywqXdWrc#msElmW_r=m0x5&1e9UB1Lm)c^=NQn|Hii{EJpK}2?g*y^A16b zQ=Q>B0s0urJSUsH-jGEwgj38W$9tZ0`J$mhY5w~b)t6jVo|kq8X!>*TTo?H1PE1yW zE!E5>dKhv5=0nL4WGB;Yv^Q66Ktb1L2E)EC#K^YWy#9 bMvR6)c?~gh<@36WDL?4>23wDXI05=oBfCas@ei43!S&s7IY6AhMoHKj`p*A zcF7a~w@?otmja?1bL%u;a*~ b1D>FN;Nd{4;4~azPxd*B*q7pC7bMmpv*vCx$1o5 zeIn6LtFLcoEWb?L7+FDOGA?Z~IRH(V(iEubblq>kB2^9m55&TrQx-B{%Rj9on_r;D zHYsanqpr-(k4NV7Yt$z^yh*B_@*y nvnREeKV(j|z9{ z`Pc3ZL*9ae6u$SPmEqz7_oH`PhWmS{WJQD9p{)G>!%@!uV%7y9mp-RA(x}UG0bF8K zuspwZjxFz@v6ydp+tarn*^I5TAHJ#fzW8@32-Aq5*!fOM{3iPx4dHnO1dEuJqjK5D za?-E%?c|=aO&@!FTdn|@IMBopShOh|N?ReB`4W(wWbiMr`Ngysk 7^2? zXT@LW0#IjR;pkHB=@b?Y_57PjZykau&fp*A4a|Cqan|=+4QiY5bjB#a6VKj|bKz79 z#8vj_mbWfinF~~x;(y3RV_$dDI{MaAYWm#XAe0R~?Bce7{!_kA`B(h@QJClJ2)#ZA zN1ujzX{4?Z7_hUK?8VPlAl mw{clbWQXN?PK6#bPc%n<-C;22tfvkhS z%$+&L+!9~di(buJrtP^xqCOfOI94d#*^T!R!Pvp@z65vG0*JoTw93b`=obMtV>?sP z{_{j{Z;(c`e_z-CTqUpw6y=MD0L_tN$m%^QQ@OtvM(%c;p#EIg%PDVBCcaDFv4kR| z2TxfQzqfil&>%#rrr{Y%$kv2y{e-QHa%ByPv5JnJL^20dY~TA^5w Rf+3?@=zY|tCt<0hiPqgcG!So7O(A{5)#CQkvW?Qor z>L=q&E&-91)exr(QMVOW3I&?-ek-BCvY;S{J#^jY1!VPp&);1Lb`51--iA+u5dI^j zFz1X}KT*f4MD0WAfvDZ|jN0^iGElLj7okmff6{HA H)4VkqU9CNAoukt) bnT2FdjP^F{!z^GJDo7L! @8@ER zZcprkIO{jT4Smra(uM0SmnV|A@9$N=n7$(@os7wLAa|3l1aRV=(}(t@>WJmOz71@6 zU;o%FCs V6{RVEManVCWHE3Jj 9VHyXmP;w_SL zIkm=>)SOVu &L_I!;!hFPyD6Ps?%7=&B! znihPX^(UofPKkN;vA%h;ms^1H3Yx^r9)Ul@t9ALZ+p=uM{0x_Ln!XdWQuq(DFh9*v zpW8^XwW#yE-6fNMo=8#ofZ4%q2r2ySSuegZq_&_-j*w|aGFt^N6$=F}Y{a7Hxq5)= z{`vqG^xcW0t$>~;)2uBh1V#~!RM7RtWxI<1m!*^&(&k$&cVB`{Cv0RV-R*3Gmhc*v zWILwJ=kbIU6b?-1Hyq*hXG~6J(?QHtBTX^md@S;0t#McS=_C3=eU!vm?Z4wYC`6~w zo9@PG&c&n-aDYJk5UDF7WaLdW5M)yOZv^3tJl3 s(P z`yIhLkmw$oNRx0LHNTca;D2WyIxHD0uVZ(d-Jpr}neRTa{7vriz*-A`Q<0M#BSy)U z8`Kyp!OwF87$9sh8WmgV#QgMWJJ0%l>qF8m4;0a*UwhHidfoKsqkyqRlhzKw6F=cR z)Vz&7WH$|5&+?Q;N=bLK)-Hr}s{3*c VYhWY`XT!pyl8~x?i}Wcy6wH-8C%d(`R0qw+^CMXbkdls|IdVqz)?@b zzj#YFpy<>PIiYIOSv8ZBuV?c0({8_kf#+~Y0;VAM3&5^QQRDcMfH;$^ur?Eyy-ct< zF)xIV)}P+e8Iw-p44Rg&eZQR`C-+GYdHuh}W=Q4V{K*evY*vyk?NUr3fpBO@X!*ze z&!Exp?ustjkxmn-3h6M%BwifVhMfVAYBZZDPlRm!X~kySncZc^3(bDW25r*So~}p| ziLwz6ZArwwSzfToaRmOFvZ~V)I@9!%aR(~)Kwt>067|rAk2yTwL5f=m9b`JN5Y-ZL zeGy&d@`;JsW7H5v%ZQK=8X|F$pfu-FA2jI0BN|~-I7%k|P#}9)H_xr^=4{hHezuJf zDp{qi;VThS%Tu;$fc-<@w9f4L^pa|)S-&vp04a>8FzA P(+0G} R hZ6gF7Tcno) 89l~k%0ob8_L*b( z< gv9$x&Ju;XWGcGdqX&r~ zvxXl+fMxI|f+6~I&H@u=s76A4&lhze$TpsyQ)ak%6Nvm?i1otfXPbP)Z%nim52;Z@ zGMkpk2!psa2-5rD)6mifk3A{h-z{u{kl5*(y; k%i!oR8B--WV&g)4vpym(?%1CNkxN=QnZmSkYFPYS4XI9$ykFh_mH_g% zc`z=xzaU-Xptrsa!Q4ylXNsXxTaXsdvKY-h)lW=Lqsx=TRs}cd=*;$$%?}7n%j9uV z_cy DEpf{3(SHcO;!5fkj>o!d<{>di{tTZi!y@wX1_(zer_?G9 zK!7&3h((glP1g+TTc`qm{V}aimKEw)%l{kBfP!;M=hrQdGFc#IZGM2m;xLxdsU;B# z+~qn4(^)ax{9cEz4{0zX?|9?hA@-X})Gv*XGddmbW=QTTUKBCAb%V3l>}ox7@Xal` zM`|*v{T^)-#0Z`7JAM0r1luB2!b z`6nk*{f|1M-?x-ap2^r-LX8>1)x~ix;Un)Gnd;>gst6dI@Pf4kfhBohqu#F=rhMxZ zki~6vF4Ic~4u;HwfHB2Z5?(3vPwsIe)VlZyCEpUHr77x0>GyQyifV~20A^H&92Cuq z5)e#XcPqy#yY5fi-VUphnyZf}@VMJlC8RVwxI)b}c&v>8N08!gRXLq&S=|dQXDPws z=| gs24)UmFN;=U6?lh;5h*YfzNsn~HfYv{ zc(v;qJG3}iKbgQyQ#ADbU8bnx?BpnHu=;M6?SHc*mU5_KM^wUEH|@VS7W6dv%f%4i zF;We##`T^X;sTT|$?CX81Cw@-Tddxv(iyl19Swi|geC%|+!~mF3Miz*63PlyjW9Lb z;E(bHtlAAo>&Ab-ko;4C_07autILty<*#%{Au1n;fmcEQt+zINzPof>2AAFUgyE?n zY!BdjeucXIK`-Q~=aMxKq}J86O7AUP(Bx_}NN|FY4bgKxYGWG`;4P9TQ$*LG7-zT5 zxbV60uSiP^q;C`o9SPm2uB>Or3Mz0Pb+ospBC0J8__+E)i~60TK-{aiFsxp@Z_uTy z^s}L^)6a&)&8>eI;aH8=HMRM+4<2SmP y|!4MAUvb;1&PP0lM5$}H9*g;)f zS}ENH)R?=6ck}s63;?!YTRpxW-}bfH`mi)%ojxB}3>xXjEm`{W7>m%+_`jq4ZMUKr zFQ^| a^+U^9>?8S!oF zM>}(1$hrTjEfTbGh3dSIOYC!~y*$3T?A_BU7Z74hKEf7a^t6^%J@O~u8IeiZT~%=Z z$?e}mFBKM`y t$-GKO!cG&J|+gYUhfrM&7TVS~t(l2UntJh`StHjYy zp*Y&{1R>pLeNiNEwhxQ-i&3g+&p-`P)W}wqNjg#0u#+ZtG>53Cag@lv{#m{?86QVS zT$~~#(h6uV!OzI8MnZ8upt9Psi%1!f=UAfyx3llB?i;S}WDZ;Qbd9eC+l0hu`n}){ zF&ObMl+e$_n9=)5_Pv{>NMSWtQakQimphYxmnbpnYHlzOFAPfh6xx)hK za(dQl_5IR_lOxw<3HwlTKT+98e#k(` lb7D8Ie-49?iGak}0_D2rDtmjOeywWrX5ovEQo+o Nt>gy_pSM4z?}{eZVz zF9jwPt?fX6GQMwlr%luP3PYw-(izG)P&TZrQ2D|di1h2mU{D~jQDbB>V~==vjP_$~ zY9M)ULDy+l#M@{R??IRPemr{jmW@`8X-&`#lp`VMfYQFX0leRn61+eq{qZ5bmHF9F ziW6KIL6ng(X8ezL5pxsrA&B*T7SwIo0jYC=jeZnpUyq8)`Tp&4^|}cCU)Cs0csEav z8%&nT8YSkd-^s9P*@LChtG~qU9Sm-j6ZH50=rp%z+L `gEl?W4@5+l->;n8~f81@e63^;R*t3;S3u-}`E{heNuW(U_`*m FtHTM%pozEZ3o;qo7KxXVOstK-9#)V*k8Iey=`-5X$G_ z? pFA<&KN>L<{BDnT4pOWnR>6Pd_KXtyyl?0}6pVK(ZR{A~#brCwGRMp$H%J97gh5 z69{~P>?hLk?dlQ{M+*?_*ougBUZOFmXg{b3hjL&0G_}zniF>sA;J+(g#pW_FqRYlX zZah>IJ#DseuJfX8H8|bsv+MA_KH^}ai+U_ai@Pm36|*>_Dtgs_mI1!hQG0Hg6#1;0 zS;_zAciG{Y_V8*2)f}Pc{9zbSjXS%6{@g1=>u9oYQk^WOw7LS7tiGA+`c{WWP> zw&=T67yr?9K@CUL{9xhFPNKH2qr2nit4^}O-o-LD1n>z8=(!}We}$mU6{5V(6~_NC z8yJ#r&F-EHojB*GepnxJAQt5Tb=sCfjguzg=xJ{_Kz ~bAn{Z*BCiz!__W>qYq?ia8FQOH?C~-HYVn7{REHOSCBQ3)_`K~-1$%7MnrdG%( z;Vx3Bp2baQyE$!^*h=l+EeH*~fC@jABPjT5T(~c#Od*VO<|SmlO0EWfHTC&3 z4bZEn*%_5@ZFIJ13iZreYtAPbu)9}WtUsw715v6ik5((Qs_tvQ_*j+>b$nNh*&{J9 z)bJUUOPmxo|Hy}qE+rH{XAsSFB-PyYh0wh;oRo(;0Q!L%Y=S?j@J5FBS9;9zHkT^x z@Ud$f=;#V{BuMTzy)QK?g^01;+Beg6dXUV?o^wDvCOMQ1mNBm5azuS1bKK*C`7Qo{ z3ARgK1NR7h-4i_8hk)}G>(>V*nb9EaUY!7i%B+Vh;p$z$IrtEeXp^%ffrX%^Ku#YL zx9Y?{o*?dOXp6SVH)B_#Gl(1gF|1ngbmxX7QTHUjvR8km1^IN(oC{T&SBSAwVL-QB zv7YfgEAFcSqolLPXFjMRx$@u7Rq%vO+UV(2T0{|!MReVRk9=|!`0#}S^S3N=qBO2# z5+d(`)9iNS4CpKCb}@IjeDwZd;sx0dM91MTRO7JIy{d?brDdq;J= z80UpSzK7Sm&IjbDCo{z K5@HZ1OHHd5@PkMGj!o=fpKum#NP$h3wd2E7ixQQ3;E=>hF+d$* zWjFDV1x!3 |n+sQ|_VUi8Q z%^cs85e;(af3^4t7>uhyQvMCEp}shPQ*(J(A2^u9YQYI0apoSD0ed2gKujr-!NXK= zSE z1&iCF0YLJIx_%U#Ty(K4lkE5twb)KChodI}!qcZ3ndG09d5pT6&^E`Qu(O0VjbV33 z>>mgZYS E>(pg$%Ts0{lWdwbcR#E>&M* z9kUA(7kK9FM0wWQ TnTYS=yMG0EO5EhqF7U5UP+)8 zlpz{UG=8Q3pN|j -V||p6hR~oc3jF1I4f@dYhm8es?%-5W>J36*DBH&sbM8O&$Ww{eJrgIJ@XZ7 zjs_pdHwFqtc86= =A=su2c+r3{KfGL&6A~GmPOhN$gwvX?{yrQ7*HFWCMi{hk zYyW;Uuz#klC*QQB6$<{aSWDJfNtV=Xx;fPki^8XVNM>q!ca69|N }SuDX>EDNth+Pu ^NdP$w7* zg9_J;O*KXaY;(J}dcqq6z5g&}UBOoYRxG4}?F4$-;5E%;UXL6j2J?RfK@|`7D0iIK zzrT>G9{jNp=+RWFuitg6hqiudeX)(zAVW9XSS9M^$$C*S{UPWWsAE(o{cZ0pd}XSM zrQUv+>Kl&x0Uptv(DvQ87>DT0CFY9Cwn@oyHRM~3I*WHjW2WH<_cP~_v(HrLzB;4E zngxz?vG@B;s*=o9#0SmH m&VA_SrSI>7}7jp}N|B8MqGD6eT<(au~6v z4|2GoyG*AW#O>%0WN9*;N-rYO+&gPym)2X?Y-^K0LSvTgmmG5K7t%$ze~LG9VZR4x zSVzv{Axe5;$1;<(4mU&pJ!+diV_N#^YGJu6x@gUhe$JFo1&dd4pV=#}X^|dOC}^7u z*^fu)7_ YiVkQmZr__+%^y+ctRuj)3Us8 zjH=2#GdYsK6crctmN~c)(P_5c)c-*@R! %LdXmAE>jcV`DfTZzlK8Q)M!s+X zz5p?uyfY^V5TlPdpYwQO>fC=XzM;puibrDuZ_Y{0IOY&B*P50YfLi*^zY(x!b$8Nt zR%On(yJTNEn4pXZb?C6S&3wnGB#+A%v1$k7Wf#{rg~U>b1ilvr2m~*w)(KrX!D ;7(VOhI6KT>Xu8Ok-vhuWKoL^VGl0RWQk{BMo6?62Mo}(zjNJ9fJgJpI1LB8LG z>WX$Iuk_A?ZNjj9&$0-6Dif<|hv~01Ch4xcCO7gt3mDHuxdS_3*_Vcv9EFBsL|h3* ziH^r6Ov^86jux&QVzp=O0u3dz5gloV<;c37H$CFk8)kdrn)_bv_(g;uJtpzX;qT|T z`mHAi$XJ3v=w>X^Q2`HUCP6+A&p-!7@sF>=7i0-CHVBl3%Mg>Mzs_SRGQ2XeBi!TN zk@pRYncOy$XlF(Z8banotKeG4S{0x8mrxH>jy~FzYp}{SD Date: Sat, 4 May 2024 15:58:42 -0700 Subject: [PATCH 15/44] Add Default Value for UDF Worker Count (#2643) As title, this PR sets the default values of worker count to be 1 for all UDF operators. --- .../ics/texera/workflow/operators/udf/java/JavaUDFOpDesc.scala | 2 +- .../operators/udf/python/DualInputPortsPythonUDFOpDescV2.scala | 2 +- .../workflow/operators/udf/python/PythonUDFOpDescV2.scala | 2 +- .../operators/udf/python/source/PythonUDFSourceOpDescV2.java | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/udf/java/JavaUDFOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/udf/java/JavaUDFOpDesc.scala index da36074c622..06f5b759288 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/udf/java/JavaUDFOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/udf/java/JavaUDFOpDesc.scala @@ -38,7 +38,7 @@ class JavaUDFOpDesc extends LogicalOp { @JsonPropertyDescription("Input your code here") var code: String = "" - @JsonProperty(required = true) + @JsonProperty(required = true, defaultValue = "1") @JsonSchemaTitle("Worker count") @JsonPropertyDescription("Specify how many parallel workers to lunch") var workers: Int = Int.box(1) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/udf/python/DualInputPortsPythonUDFOpDescV2.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/udf/python/DualInputPortsPythonUDFOpDescV2.scala index 7a9e84c831b..9a5928e9756 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/udf/python/DualInputPortsPythonUDFOpDescV2.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/udf/python/DualInputPortsPythonUDFOpDescV2.scala @@ -45,7 +45,7 @@ class DualInputPortsPythonUDFOpDescV2 extends LogicalOp { @JsonPropertyDescription("Input your code here") var code: String = "" - @JsonProperty(required = true) + @JsonProperty(required = true, defaultValue = "1") @JsonSchemaTitle("Worker count") @JsonPropertyDescription("Specify how many parallel workers to lunch") var workers: Int = Int.box(1) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/udf/python/PythonUDFOpDescV2.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/udf/python/PythonUDFOpDescV2.scala index f9cda84c63e..143992e6a47 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/udf/python/PythonUDFOpDescV2.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/udf/python/PythonUDFOpDescV2.scala @@ -45,7 +45,7 @@ class PythonUDFOpDescV2 extends LogicalOp { @JsonPropertyDescription("Input your code here") var code: String = "" - @JsonProperty(required = true) + @JsonProperty(required = true, defaultValue = "1") @JsonSchemaTitle("Worker count") @JsonPropertyDescription("Specify how many parallel workers to lunch") var workers: Int = Int.box(1) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/udf/python/source/PythonUDFSourceOpDescV2.java b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/udf/python/source/PythonUDFSourceOpDescV2.java index 137d2d27391..76dcccf48a1 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/udf/python/source/PythonUDFSourceOpDescV2.java +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/udf/python/source/PythonUDFSourceOpDescV2.java @@ -46,7 +46,7 @@ public class PythonUDFSourceOpDescV2 extends SourceOperatorDescriptor { @JsonPropertyDescription("Input your code here") public String code; - @JsonProperty(required = true) + @JsonProperty(required = true, defaultValue = "1") @JsonSchemaTitle("Worker count") @JsonPropertyDescription("Specify how many parallel workers to lunch") public Integer workers = 1; From 7d30341cb9806908559557ecfb8a0107fd3b0dd7 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Sun, 5 May 2024 01:31:58 -0700 Subject: [PATCH 16/44] Introduce Sklearn ML models (#2641) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR introduces the Sklearn base operator class and the following ML operators from the Sklearn library : 1. [Logistic Regression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) 2. [Logistic Regression Cross Validation](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegressionCV.html) 3. [Ridge Regression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html) 4. [Ridge Regression Cross Validation](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RidgeCV.html) 5. [Stochastic Gradient Descent](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html) 6. [Passive Aggressive](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.PassiveAggressiveClassifier.html) 7. [Linear Perceptron](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Perceptron.html) 8. [K-nearest Neighbors](https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html) 9. [Nearest Centroid](https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.NearestCentroid.html) 10. [Support Vector Machine](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html) 11. [Linear Support Vector Machine](https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html) 12. [Decision Tree](https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html) 13. [Extra Tree](https://scikit-learn.org/stable/modules/generated/sklearn.tree.ExtraTreeClassifier.html) 14. [Multi-layer Perceptron](https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html) 15. [Probability Calibration](https://scikit-learn.org/stable/modules/generated/sklearn.calibration.CalibratedClassifierCV.html) 16. [Gradient Boosting](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html) 17. [Adaptive Boosting](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html) 18. [Random Forest](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html) 19. [Bagging](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.BaggingClassifier.html) 20. [ExtraTrees](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesClassifier.html) 21. [Gaussian Naive Bayes](https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html) 22. [Multinomial Naive Bayes](https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.MultinomialNB.html) 23. [Complement Naive Bayes](https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.ComplementNB.html) 24. [Bernoulli Naive Bayes](https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.BernoulliNB.html) 25. [Dummy Classifier](https://scikit-learn.org/stable/modules/generated/sklearn.dummy.DummyClassifier.html) Each ML model operator will take the training and testing datasets as inputs and produce the ML model name, accuracy, and model itself as the output. The ML operators offer a user-friendly interface, allowing you to select the target Y value and view the accuracy on the console. This control enhances your experience and understanding of the ML process. ### Usage: 1. The ML models produced by the ML operators can be used in downstream Python UDF by calling the `predict` method: 2. The ML models can also be used with the Prediction Operator: --- core/amber/operator-requirements.txt | 3 +- .../workflow/common/operators/LogicalOp.scala | 72 ++++++++++++++++++ .../SklearnAdaptiveBoostingOpDesc.scala | 6 ++ .../sklearn/SklearnBaggingOpDesc.scala | 6 ++ .../SklearnBernoulliNaiveBayesOpDesc.scala | 6 ++ .../SklearnComplementNaiveBayesOpDesc.scala | 6 ++ .../sklearn/SklearnDecisionTreeOpDesc.scala | 6 ++ .../SklearnDummyClassifierOpDesc.scala | 6 ++ .../sklearn/SklearnExtraTreeOpDesc.scala | 6 ++ .../sklearn/SklearnExtraTreesOpDesc.scala | 6 ++ .../SklearnGaussianNaiveBayesOpDesc.scala | 6 ++ .../SklearnGradientBoostingOpDesc.scala | 6 ++ .../operators/sklearn/SklearnKNNOpDesc.scala | 6 ++ .../sklearn/SklearnLinearSVMOpDesc.scala | 6 ++ .../SklearnLogisticRegressionCVOpDesc.scala | 6 ++ .../SklearnLogisticRegressionOpDesc.scala | 6 ++ .../operators/sklearn/SklearnMLOpDesc.scala | 69 +++++++++++++++++ .../SklearnMultiLayerPerceptronOpDesc.scala | 6 ++ .../SklearnMultinomialNaiveBayesOpDesc.scala | 6 ++ .../SklearnNearestCentroidOpDesc.scala | 6 ++ .../SklearnPassiveAggressiveOpDesc.scala | 6 ++ .../sklearn/SklearnPerceptronOpDesc.scala | 6 ++ .../sklearn/SklearnPredictionOpDesc.scala | 49 ++++++++++++ .../SklearnProbabilityCalibrationOpDesc.scala | 6 ++ .../sklearn/SklearnRandomForestOpDesc.scala | 6 ++ .../sklearn/SklearnRidgeCVOpDesc.scala | 6 ++ .../sklearn/SklearnRidgeOpDesc.scala | 6 ++ .../operators/sklearn/SklearnSDGOpDesc.scala | 6 ++ .../operators/sklearn/SklearnSVMOpDesc.scala | 6 ++ .../SklearnAdaptiveBoosting.png | Bin 0 -> 117082 bytes .../assets/operator_images/SklearnBagging.png | Bin 0 -> 60221 bytes .../SklearnBernoulliNaiveBayes.png | Bin 0 -> 433434 bytes .../SklearnComplementNaiveBayes.png | Bin 0 -> 74896 bytes .../operator_images/SklearnDecisionTree.png | Bin 0 -> 7095 bytes .../assets/operator_images/SklearnDummy.png | Bin 0 -> 39008 bytes .../operator_images/SklearnExtraTree.png | Bin 0 -> 20903 bytes .../operator_images/SklearnExtraTrees.png | Bin 0 -> 75482 bytes .../SklearnGaussianNaiveBayes.png | Bin 0 -> 69880 bytes .../SklearnGradientBoosting.png | Bin 0 -> 100542 bytes .../src/assets/operator_images/SklearnKNN.png | Bin 0 -> 96537 bytes .../operator_images/SklearnLinearSVM.png | Bin 0 -> 17599 bytes .../SklearnLogisticRegression.png | Bin 0 -> 18324 bytes .../SklearnLogisticRegressionCV.png | Bin 0 -> 10842 bytes .../SklearnMultiLayerPerceptron.png | Bin 0 -> 128735 bytes .../SklearnMultinomialNaiveBayes.png | Bin 0 -> 34729 bytes .../SklearnNearestCentroid.png | Bin 0 -> 214245 bytes .../SklearnPassiveAggressive.png | Bin 0 -> 9322 bytes .../operator_images/SklearnPerceptron.png | Bin 0 -> 13079 bytes .../operator_images/SklearnPrediction.png | Bin 0 -> 98115 bytes .../SklearnProbabilityCalibration.png | Bin 0 -> 83338 bytes .../operator_images/SklearnRandomForest.png | Bin 0 -> 81937 bytes .../assets/operator_images/SklearnRidge.png | Bin 0 -> 24635 bytes .../assets/operator_images/SklearnRidgeCV.png | Bin 0 -> 16258 bytes .../src/assets/operator_images/SklearnSDG.png | Bin 0 -> 22220 bytes .../src/assets/operator_images/SklearnSVM.png | Bin 0 -> 17776 bytes 55 files changed, 342 insertions(+), 1 deletion(-) create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnAdaptiveBoostingOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnBaggingOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnBernoulliNaiveBayesOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnComplementNaiveBayesOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnDecisionTreeOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnDummyClassifierOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnExtraTreeOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnExtraTreesOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnGaussianNaiveBayesOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnGradientBoostingOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnKNNOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnLinearSVMOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnLogisticRegressionCVOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnLogisticRegressionOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnMLOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnMultiLayerPerceptronOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnMultinomialNaiveBayesOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnNearestCentroidOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnPassiveAggressiveOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnPerceptronOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnPredictionOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnProbabilityCalibrationOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnRandomForestOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnRidgeCVOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnRidgeOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnSDGOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnSVMOpDesc.scala create mode 100644 core/gui/src/assets/operator_images/SklearnAdaptiveBoosting.png create mode 100644 core/gui/src/assets/operator_images/SklearnBagging.png create mode 100644 core/gui/src/assets/operator_images/SklearnBernoulliNaiveBayes.png create mode 100644 core/gui/src/assets/operator_images/SklearnComplementNaiveBayes.png create mode 100644 core/gui/src/assets/operator_images/SklearnDecisionTree.png create mode 100644 core/gui/src/assets/operator_images/SklearnDummy.png create mode 100644 core/gui/src/assets/operator_images/SklearnExtraTree.png create mode 100644 core/gui/src/assets/operator_images/SklearnExtraTrees.png create mode 100644 core/gui/src/assets/operator_images/SklearnGaussianNaiveBayes.png create mode 100644 core/gui/src/assets/operator_images/SklearnGradientBoosting.png create mode 100644 core/gui/src/assets/operator_images/SklearnKNN.png create mode 100644 core/gui/src/assets/operator_images/SklearnLinearSVM.png create mode 100644 core/gui/src/assets/operator_images/SklearnLogisticRegression.png create mode 100644 core/gui/src/assets/operator_images/SklearnLogisticRegressionCV.png create mode 100644 core/gui/src/assets/operator_images/SklearnMultiLayerPerceptron.png create mode 100644 core/gui/src/assets/operator_images/SklearnMultinomialNaiveBayes.png create mode 100644 core/gui/src/assets/operator_images/SklearnNearestCentroid.png create mode 100644 core/gui/src/assets/operator_images/SklearnPassiveAggressive.png create mode 100644 core/gui/src/assets/operator_images/SklearnPerceptron.png create mode 100644 core/gui/src/assets/operator_images/SklearnPrediction.png create mode 100644 core/gui/src/assets/operator_images/SklearnProbabilityCalibration.png create mode 100644 core/gui/src/assets/operator_images/SklearnRandomForest.png create mode 100644 core/gui/src/assets/operator_images/SklearnRidge.png create mode 100644 core/gui/src/assets/operator_images/SklearnRidgeCV.png create mode 100644 core/gui/src/assets/operator_images/SklearnSDG.png create mode 100644 core/gui/src/assets/operator_images/SklearnSVM.png diff --git a/core/amber/operator-requirements.txt b/core/amber/operator-requirements.txt index 3e5acb42367..d75fc19dd52 100644 --- a/core/amber/operator-requirements.txt +++ b/core/amber/operator-requirements.txt @@ -3,4 +3,5 @@ plotly praw pillow pybase64 -torch \ No newline at end of file +torch +scikit-learn \ No newline at end of file diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala index db7543ad23c..cd603a28759 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala @@ -44,6 +44,34 @@ import edu.uci.ics.texera.workflow.operators.regex.RegexOpDesc import edu.uci.ics.texera.workflow.operators.reservoirsampling.ReservoirSamplingOpDesc import edu.uci.ics.texera.workflow.operators.sentiment.SentimentAnalysisOpDesc import edu.uci.ics.texera.workflow.operators.sink.managed.ProgressiveSinkOpDesc +import edu.uci.ics.texera.workflow.operators.sklearn.{ + SklearnAdaptiveBoostingOpDesc, + SklearnBaggingOpDesc, + SklearnBernoulliNaiveBayesOpDesc, + SklearnComplementNaiveBayesOpDesc, + SklearnDecisionTreeOpDesc, + SklearnDummyClassifierOpDesc, + SklearnExtraTreeOpDesc, + SklearnExtraTreesOpDesc, + SklearnGaussianNaiveBayesOpDesc, + SklearnGradientBoostingOpDesc, + SklearnKNNOpDesc, + SklearnLinearSVMOpDesc, + SklearnLogisticRegressionCVOpDesc, + SklearnLogisticRegressionOpDesc, + SklearnMultiLayerPerceptronOpDesc, + SklearnMultinomialNaiveBayesOpDesc, + SklearnNearestCentroidOpDesc, + SklearnPassiveAggressiveOpDesc, + SklearnPerceptronOpDesc, + SklearnPredictionOpDesc, + SklearnProbabilityCalibrationOpDesc, + SklearnRandomForestOpDesc, + SklearnRidgeCVOpDesc, + SklearnRidgeOpDesc, + SklearnSDGOpDesc, + SklearnSVMOpDesc +} import edu.uci.ics.texera.workflow.operators.sort.SortOpDesc import edu.uci.ics.texera.workflow.operators.sortPartitions.SortPartitionsOpDesc import edu.uci.ics.texera.workflow.operators.source.apis.reddit.RedditSearchSourceOpDesc @@ -185,6 +213,50 @@ trait StateTransferFunc new Type(value = classOf[TablesPlotOpDesc], name = "TablesPlot"), new Type(value = classOf[JavaUDFOpDesc], name = "JavaUDF"), new Type(value = classOf[SortOpDesc], name = "Sort"), + new Type(value = classOf[SklearnLogisticRegressionOpDesc], name = "SklearnLogisticRegression"), + new Type( + value = classOf[SklearnLogisticRegressionCVOpDesc], + name = "SklearnLogisticRegressionCV" + ), + new Type(value = classOf[SklearnRidgeOpDesc], name = "SklearnRidge"), + new Type(value = classOf[SklearnRidgeCVOpDesc], name = "SklearnRidgeCV"), + new Type(value = classOf[SklearnSDGOpDesc], name = "SklearnSDG"), + new Type(value = classOf[SklearnPassiveAggressiveOpDesc], name = "SklearnPassiveAggressive"), + new Type(value = classOf[SklearnPerceptronOpDesc], name = "SklearnPerceptron"), + new Type(value = classOf[SklearnKNNOpDesc], name = "SklearnKNN"), + new Type(value = classOf[SklearnNearestCentroidOpDesc], name = "SklearnNearestCentroid"), + new Type(value = classOf[SklearnSVMOpDesc], name = "SklearnSVM"), + new Type(value = classOf[SklearnLinearSVMOpDesc], name = "SklearnLinearSVM"), + new Type(value = classOf[SklearnDecisionTreeOpDesc], name = "SklearnDecisionTree"), + new Type(value = classOf[SklearnExtraTreeOpDesc], name = "SklearnExtraTree"), + new Type( + value = classOf[SklearnMultiLayerPerceptronOpDesc], + name = "SklearnMultiLayerPerceptron" + ), + new Type( + value = classOf[SklearnProbabilityCalibrationOpDesc], + name = "SklearnProbabilityCalibration" + ), + new Type(value = classOf[SklearnRandomForestOpDesc], name = "SklearnRandomForest"), + new Type(value = classOf[SklearnBaggingOpDesc], name = "SklearnBagging"), + new Type(value = classOf[SklearnGradientBoostingOpDesc], name = "SklearnGradientBoosting"), + new Type(value = classOf[SklearnAdaptiveBoostingOpDesc], name = "SklearnAdaptiveBoosting"), + new Type(value = classOf[SklearnExtraTreesOpDesc], name = "SklearnExtraTrees"), + new Type(value = classOf[SklearnGaussianNaiveBayesOpDesc], name = "SklearnGaussianNaiveBayes"), + new Type( + value = classOf[SklearnMultinomialNaiveBayesOpDesc], + name = "SklearnMultinomialNaiveBayes" + ), + new Type( + value = classOf[SklearnComplementNaiveBayesOpDesc], + name = "SklearnComplementNaiveBayes" + ), + new Type( + value = classOf[SklearnBernoulliNaiveBayesOpDesc], + name = "SklearnBernoulliNaiveBayes" + ), + new Type(value = classOf[SklearnDummyClassifierOpDesc], name = "SklearnDummyClassifier"), + new Type(value = classOf[SklearnPredictionOpDesc], name = "SklearnPrediction"), new Type( value = classOf[HuggingFaceSentimentAnalysisOpDesc], name = "HuggingFaceSentimentAnalysis" diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnAdaptiveBoostingOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnAdaptiveBoostingOpDesc.scala new file mode 100644 index 00000000000..70893c7c693 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnAdaptiveBoostingOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnAdaptiveBoostingOpDesc extends SklearnMLOpDesc { + model = "from sklearn.ensemble import AdaBoostClassifier" + name = "Adaptive Boosting" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnBaggingOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnBaggingOpDesc.scala new file mode 100644 index 00000000000..98cbee10950 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnBaggingOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnBaggingOpDesc extends SklearnMLOpDesc { + model = "from sklearn.ensemble import BaggingClassifier" + name = "Bagging" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnBernoulliNaiveBayesOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnBernoulliNaiveBayesOpDesc.scala new file mode 100644 index 00000000000..4f8aabeb950 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnBernoulliNaiveBayesOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnBernoulliNaiveBayesOpDesc extends SklearnMLOpDesc { + model = "from sklearn.naive_bayes import BernoulliNB" + name = "Bernoulli Naive Bayes" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnComplementNaiveBayesOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnComplementNaiveBayesOpDesc.scala new file mode 100644 index 00000000000..cac35f5daf9 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnComplementNaiveBayesOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnComplementNaiveBayesOpDesc extends SklearnMLOpDesc { + model = "from sklearn.naive_bayes import ComplementNB" + name = "Complement Naive Bayes" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnDecisionTreeOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnDecisionTreeOpDesc.scala new file mode 100644 index 00000000000..815e4047664 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnDecisionTreeOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnDecisionTreeOpDesc extends SklearnMLOpDesc { + model = "from sklearn.tree import DecisionTreeClassifier" + name = "Decision Tree" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnDummyClassifierOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnDummyClassifierOpDesc.scala new file mode 100644 index 00000000000..286540836a1 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnDummyClassifierOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnDummyClassifierOpDesc extends SklearnMLOpDesc { + model = "from sklearn.dummy import dummy" + name = "Dummy Classifier" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnExtraTreeOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnExtraTreeOpDesc.scala new file mode 100644 index 00000000000..b7b05a7bcc3 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnExtraTreeOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnExtraTreeOpDesc extends SklearnMLOpDesc { + model = "from sklearn.tree import ExtraTreeClassifier" + name = "Extra Tree" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnExtraTreesOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnExtraTreesOpDesc.scala new file mode 100644 index 00000000000..8dee19029cb --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnExtraTreesOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnExtraTreesOpDesc extends SklearnMLOpDesc { + model = "from sklearn.ensemble import ExtraTreesClassifier" + name = "Extra Trees" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnGaussianNaiveBayesOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnGaussianNaiveBayesOpDesc.scala new file mode 100644 index 00000000000..b8d378e83e7 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnGaussianNaiveBayesOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnGaussianNaiveBayesOpDesc extends SklearnMLOpDesc { + model = "from sklearn.naive_bayes import GaussianNB" + name = "Gaussian Naive Bayes" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnGradientBoostingOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnGradientBoostingOpDesc.scala new file mode 100644 index 00000000000..f89ba74c0f2 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnGradientBoostingOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnGradientBoostingOpDesc extends SklearnMLOpDesc { + model = "from sklearn.ensemble import GradientBoostingClassifier" + name = "Gradient Boosting" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnKNNOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnKNNOpDesc.scala new file mode 100644 index 00000000000..1d60f25dced --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnKNNOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnKNNOpDesc extends SklearnMLOpDesc { + model = "from sklearn.neighbors import KNeighborsClassifier" + name = "K-nearest Neighbors" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnLinearSVMOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnLinearSVMOpDesc.scala new file mode 100644 index 00000000000..1f3a78852a6 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnLinearSVMOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnLinearSVMOpDesc extends SklearnMLOpDesc { + model = "from sklearn.svm import LinearSVC" + name = "Linear Support Vector Machine" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnLogisticRegressionCVOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnLogisticRegressionCVOpDesc.scala new file mode 100644 index 00000000000..7fddd2f9ebd --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnLogisticRegressionCVOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnLogisticRegressionCVOpDesc extends SklearnMLOpDesc { + model = "from sklearn.linear_model import LogisticRegressionCV" + name = "Logistic Regression Cross Validation" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnLogisticRegressionOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnLogisticRegressionOpDesc.scala new file mode 100644 index 00000000000..8be99e62e50 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnLogisticRegressionOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnLogisticRegressionOpDesc extends SklearnMLOpDesc { + model = "from sklearn.linear_model import LogisticRegression" + name = "Logistic Regression" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnMLOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnMLOpDesc.scala new file mode 100644 index 00000000000..3d25195b52f --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnMLOpDesc.scala @@ -0,0 +1,69 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +import com.fasterxml.jackson.annotation.{JsonIgnore, JsonProperty, JsonPropertyDescription} +import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort, PortIdentity} +import edu.uci.ics.texera.workflow.common.metadata.annotations.AutofillAttributeName +import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} +import edu.uci.ics.texera.workflow.common.operators.PythonOperatorDescriptor +import edu.uci.ics.texera.workflow.common.tuple.schema.{AttributeType, Schema} + +abstract class SklearnMLOpDesc extends PythonOperatorDescriptor { + @JsonIgnore + var model = "" + + @JsonIgnore + var name = "" + + @JsonProperty(value = "Target Attribute", required = true) + @JsonPropertyDescription("attribute in your dataset corresponding to target") + @AutofillAttributeName + var target: String = _ + + override def generatePythonCode(): String = + s"""$model + |from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score + |from pytexera import * + |class ProcessTableOperator(UDFTableOperator): + | @overrides + | def process_table(self, table: Table, port: int) -> Iterator[Optional[TableLike]]: + | if port == 0: + | self.model = ${model + .split(" ") + .last}().fit(table.drop("$target", axis=1), table["$target"]) + | else: + | predictions = self.model.predict(table.drop("$target", axis=1)) + | auc = accuracy_score(table["$target"], predictions) + | f1 = f1_score(table["$target"], predictions, average='micro') + | precision = precision_score(table["$target"], predictions, average='micro') + | recall = recall_score(table["$target"], predictions, average='micro') + | print("Accuracy:", auc, ", F1:", f1, ", Precision:", precision, ", Recall:", recall) + | yield {"name" : "$name", + | "accuracy" : auc, + | "f1" : f1, + | "precision" : precision, + | "recall" : recall, + | "model" : self.model}""".stripMargin + + override def operatorInfo: OperatorInfo = + OperatorInfo( + name, + "Sklearn " + name + " Operator", + OperatorGroupConstants.MACHINE_LEARNING_GROUP, + inputPorts = List( + InputPort(PortIdentity(), "training"), + InputPort(PortIdentity(1), "testing", dependencies = List(PortIdentity())) + ), + outputPorts = List(OutputPort()) + ) + + override def getOutputSchema(schemas: Array[Schema]): Schema = + Schema + .builder() + .add("name", AttributeType.STRING) + .add("accuracy", AttributeType.DOUBLE) + .add("f1", AttributeType.DOUBLE) + .add("precision", AttributeType.DOUBLE) + .add("recall", AttributeType.DOUBLE) + .add("model", AttributeType.BINARY) + .build() +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnMultiLayerPerceptronOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnMultiLayerPerceptronOpDesc.scala new file mode 100644 index 00000000000..c368ad9257c --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnMultiLayerPerceptronOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnMultiLayerPerceptronOpDesc extends SklearnMLOpDesc { + model = "from sklearn.neural_network import MLPClassifier" + name = "Multi-layer Perceptron" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnMultinomialNaiveBayesOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnMultinomialNaiveBayesOpDesc.scala new file mode 100644 index 00000000000..c2f0caccac6 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnMultinomialNaiveBayesOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnMultinomialNaiveBayesOpDesc extends SklearnMLOpDesc { + model = "from sklearn.naive_bayes import MultinomialNB" + name = "Multinomial Naive Bayes" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnNearestCentroidOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnNearestCentroidOpDesc.scala new file mode 100644 index 00000000000..88293093a1f --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnNearestCentroidOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnNearestCentroidOpDesc extends SklearnMLOpDesc { + model = "from sklearn.neighbors import NearestCentroid" + name = "Nearest Centroid" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnPassiveAggressiveOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnPassiveAggressiveOpDesc.scala new file mode 100644 index 00000000000..978d9ebc4e2 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnPassiveAggressiveOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnPassiveAggressiveOpDesc extends SklearnMLOpDesc { + model = "from sklearn.linear_model import PassiveAggressiveClassifier" + name = "Passive Aggressive" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnPerceptronOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnPerceptronOpDesc.scala new file mode 100644 index 00000000000..2f5e120f811 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnPerceptronOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnPerceptronOpDesc extends SklearnMLOpDesc { + model = "from sklearn.linear_model import Perceptron" + name = "Linear Perceptron" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnPredictionOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnPredictionOpDesc.scala new file mode 100644 index 00000000000..faa177aa20d --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnPredictionOpDesc.scala @@ -0,0 +1,49 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription} +import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort, PortIdentity} +import edu.uci.ics.texera.workflow.common.metadata.annotations.AutofillAttributeName +import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} +import edu.uci.ics.texera.workflow.common.operators.PythonOperatorDescriptor +import edu.uci.ics.texera.workflow.common.tuple.schema.{AttributeType, Schema} + +class SklearnPredictionOpDesc extends PythonOperatorDescriptor { + @JsonProperty(value = "Model Attribute", required = true, defaultValue = "model") + @JsonPropertyDescription("attribute corresponding to ML model") + @AutofillAttributeName + var model: String = _ + + @JsonProperty(value = "Output Attribute Name", required = true, defaultValue = "prediction") + @JsonPropertyDescription("attribute name of the prediction result") + var resultAttribute: String = _ + + override def generatePythonCode(): String = + s"""from pytexera import * + |class ProcessTupleOperator(UDFOperatorV2): + | @overrides + | def process_tuple(self, tuple_: Tuple, port: int) -> Iterator[Optional[TupleLike]]: + | if port == 0: + | self.model = tuple_["$model"] + | else: + | tuple_["$resultAttribute"] = str(self.model.predict([tuple_])[0]) + | yield tuple_""".stripMargin + + override def operatorInfo: OperatorInfo = + OperatorInfo( + "Sklearn Prediction", + "Skleanr Prediction Operator", + OperatorGroupConstants.MACHINE_LEARNING_GROUP, + inputPorts = List( + InputPort(PortIdentity(), "model"), + InputPort(PortIdentity(1), "testing", dependencies = List(PortIdentity())) + ), + outputPorts = List(OutputPort()) + ) + + override def getOutputSchema(schemas: Array[Schema]): Schema = + Schema + .builder() + .add(schemas(1)) + .add(resultAttribute, AttributeType.STRING) + .build() +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnProbabilityCalibrationOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnProbabilityCalibrationOpDesc.scala new file mode 100644 index 00000000000..19d3f22a535 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnProbabilityCalibrationOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnProbabilityCalibrationOpDesc extends SklearnMLOpDesc { + model = "from sklearn.calibration import CalibratedClassifierCV" + name = "Probability Calibration" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnRandomForestOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnRandomForestOpDesc.scala new file mode 100644 index 00000000000..159ef40a26e --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnRandomForestOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnRandomForestOpDesc extends SklearnMLOpDesc { + model = "from sklearn.ensemble import RandomForestClassifier" + name = "Random Forest" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnRidgeCVOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnRidgeCVOpDesc.scala new file mode 100644 index 00000000000..f12e9723016 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnRidgeCVOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnRidgeCVOpDesc extends SklearnMLOpDesc { + model = "from sklearn.linear_model import RidgeClassifierCV" + name = "Ridge Regression Cross Validation" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnRidgeOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnRidgeOpDesc.scala new file mode 100644 index 00000000000..1dce1033eb2 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnRidgeOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnRidgeOpDesc extends SklearnMLOpDesc { + model = "from sklearn.linear_model import RidgeClassifier" + name = "Ridge Regression" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnSDGOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnSDGOpDesc.scala new file mode 100644 index 00000000000..2c366976459 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnSDGOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnSDGOpDesc extends SklearnMLOpDesc { + model = "from sklearn.linear_model import SGDClassifier" + name = "Stochastic Gradient Descent" +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnSVMOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnSVMOpDesc.scala new file mode 100644 index 00000000000..a1f3507f10e --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/sklearn/SklearnSVMOpDesc.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.operators.sklearn + +class SklearnSVMOpDesc extends SklearnMLOpDesc { + model = "from sklearn.svm import SVC" + name = "Support Vector Machine" +} diff --git a/core/gui/src/assets/operator_images/SklearnAdaptiveBoosting.png b/core/gui/src/assets/operator_images/SklearnAdaptiveBoosting.png new file mode 100644 index 0000000000000000000000000000000000000000..2daaf54222aaf34473f2c329db51e505d2059a3c GIT binary patch literal 117082 zcmafbd0b5U|Np7bwG~~LV#?A~7nK%jq{aP28l_x}O4~4!C{5aF8QgnABbrcYv!t$R z6{4nPTq+|Kbz8{PP)gghZ`yvZ_k?Edz2C=g{_*+T&N=V*`?Wk@&)0I!h20iA7S3Bf z55us9MAL11;2%W{o9q9>9QZF?>?=6 XPzX zH&VFrMdx9elIu#0P-%aA+J{%qR|T6cdM*|}X0xgI2Uzd>j~zK~<%_Wk_EoRpbp9B8 z?E2yRMH#tMp;J>X^xlU>-D2Ob#Ax~JBExC-?lC{LNIQ>KEPm4%JX{hy?itd)n$zw( zH5S=8(L0e5tWo?#dgdl nR6;zP$gtB+nEeDZnfh=JjkLCN~w0oKZ?h%#%Ax6;g|m_w=T+`8OGd%}U1 z$4}p^QC}f{mQ>S}n4B82bJ#}unnLbbp^KxWD0p5G^Vs#l7e}KTO?H%~D>!r=>~E?K zudI@7y0<}c=31IDg+5EawB_BKL;X$N9&MlZrS{dEwiQO`Ke;_?<}zw7J)CCr)=vLG zanJcepM=IaJBQVO77|$O914CF>CuZ*0RtU9M;ol3ZA;5iP?=y1650y1k@CO(Xj~1G zvp?=5m5%Ee9?(gmzE{X9xH(|PkgSdPb_x8OR3SUpg4VlO^5fdVOA-7_B7ZB9og22B zq{fIX8ae-L=xVbLT(Un-Q|R`9XiXX18RiXV1HCULEv+_=3-juX8Ntp5=DFRwgf`9F zOAjaCW*& %=B@aT znJ-U84Y)tUe|r#y(YV@hNv7PYz=XjcTpHk-?289wX8z8$kW>660v*lpi}Qjj +Uozyi#D9_1X2zk6EHO@0V448b+#C?GURp(lL? *a+K6lduNuyl0Yoke==K$ e?)r@Yto6}tH$7+S@jOn=6( zjpHvjyt6UMPir}si6=^AQyP}cTt}OL4Osrke2|*4O6{~6P2%y+z6`uBg`O;O6;jJ3 z<_#y1Ng%`DdA z3pa%=meJ5mkyR2hi60NrpT9w}?2uh8R`u}WuR<0{< ?J+GFC$Jy4;qhQ zC7o+IEY0FTPhrH;l&RW7T1IRr`C%@>V3-)Rai@@6^yCyZ=aRgyH?QO5J)U)wyQ}6- zVzRJB@@B~?db~zHuaxL?{0H_y8>LYzMnbWuRq(FLDtcUD9*ZCRp$(qHyP3uq2e5Y$ zR;0ymnlh5fO!bIyZBFXrS!AvMPTyuwcotPf+%GMZ5>QL@5Bc_P#$27l9ipIQQ zRc+5;$*vL0EB0rUFmpxhD(q^h>BD nbc4WoEG~cOmY6nM8LtYYR3K z0q8;36y5GzO_M_^%wyrrdYs#jXVEr{fNN6dBw;;kh!vZ KsdBMtz^^2=hyYWq;|s-jyWiu%vF;xe7WGgk=qw}+cKN<&(Og<%tjef0 zDOA|J$QAVPk;G~98s@5DxJ3V&bBUstTs}sB!)2ej$9#aV^;kHU)|#`c@5Db=@_FTi zpEM@fD~&EP#$)l_svYZzqE9KYGg(9212J%iX=sBCrpyn~EVkq>o;fr7gsdWjmEui! zEW1^? ODg>#}sYVgpQODc)Cen>hJ&dBl5C^=*+Wv?9YT_L0gMb}E}Y)Wmq_ z(-?UxDDQQIk<<30R^{AxcFZTCt7S0OCbPa<%=BF2nN?_ARJpe2`>OpJDXA+LNzwvy zEc>_st-`L7ced12B@tw5R{}DfuvH#?l2a}OK<40$DmQ@+T)t9%$_!hTg6}SteOx9# zvkH6u6S`Nk&a$8RhtAJpAB<8O&CID0egeJJbVn+ng5lu_o-T&F5raHrzsMuB 5uzKt;BSI0rCz7MxHz+RXD)YE`iSTz`rmR za(Ht(^N5H9V0*Jj)!qxjF~Y)7{K}MLZ-jS`zo%*9E8%!hdz)T2AEO!AD*0~_4ac;I z7Ol!{`>Io!hR 00;HIv)Yb#jPdnFry0roF`y2Vf0gkXTSw^^(5 zjP^Ehno&+VvtRi1gzd$qlCKuc#U>fTirJ`PqPLs$$h1!u++<{ZEy??huuPK^5jKuL zJ#yYF@^Fo 4NbL`4vrn4YR`u5{3kyVsO?&qg@P6!9 mR1R#HIO47`0FMiM*fa;XX+2Qeh&u?7pO#yCUp5*b?rc zdx@;HF!E?9sVjEXkYFL+ZZ4pTu#qM`mvz~}Y}e!X`$}Rxkw-)Xka39qP=e(B6UaR3 zETSwoHVL5(8~?ZL9oC0G5u9}Jr7&KKJ|2wFTI8BvJmp+MkzBICkGyt#( {wdnk1NADny4eta!g9%DP`(=`jaM4ryms ` z9Gfw P zRqJy?kTQ9(q-rPO=Xb`Gng-tj+;`lDNCPI>3Dne0 dbqHP%7~;w!B1%;V~c6Xh+;r)YEJljSMx zdBVE(DRW9}ya%+b6#bb2bRtYsK^SYv&UPm6{$!SdJzyJj%}(a$JQTw69UP}bJn517 z$y$&dE+j^?m?f;1y5A#KD!QdF3ozH}22%@r0Vm%jd~( ^11&2{tw7oM%o>Sf{eZ 78Ovhavj@MZ-7 zMvGXR8=<PF2XUX7^9~RC$JHq!QEUagC@(nB;8wL(6a;QI+Q@nIL%UaewrPN5z zmcTlzq$%_S5m(q`;YJ@=&9MQMP(i2#@)Ug$?+yj>UKh#CJrB6^8a%~VSSWeLM0z;n z5Y2(;_P(Vg@3+E0zk!4dGA`w-(rB0d30pmn!X~x$knACPVFcA$?u-**Ju*8COqeLS z1rNl6GeNP!M;ke?srd7Wx@cx@XbKqPhc7<#-q*rwS9m{+3}?2lL8f|;1t# lWM@?;tA$t* zR^hP^76s0$DIg5IlnIUEk$b{cY}i^?Y`T&W>kTNI$6m*I3~9J9n;kKOL^As>^EyyO zEa~|5W~S)fKdUQ3b%RrahfEZsS&bpeBYf In64At`0Amw!JsCRN_N0q$MX z0rJ91J|=4xi$su~9VHqjxn^0NV5D_f|5DUx*p|Z`W;rrKTyhFr^!+z-hvNY9uwj(o zAD~Ue`Tvt+IPMzNsvJY|D(O0=&7LWi(avdbH1|L`(QYF+Oy@f&+vt3ME{6LJnQy=& z=C^G{xEITc5N?f}Qd5%mQt(OMLpw0R@4u~DPvI|vQHL8q#hK2Hka-C641!xkE`1*< z5((L&iX;^AF!LkuhX?p6B!{C~jSC*k*yPQ!!^NgHjuKJc5ac#~X+3gN$P?9+H3jC{ z-?9Lsop^xn7Fo|8S?kE{ZzmxvYNTt%3;Q9#Kr&eF-n80B@ =*1!EatDiamB5l8+3sT2rbqH(I zqV$(Afe9m*W}k_1pY#6PZ5WohOmefy&XbF+!Z9rV8EwV)ztHkGKhdmgSyyR ^ngRuIi^ze`CWH9#ez(ygDWP5BmKYfd>W`-!e4Xgd->n+)N-BRC{#xS1W zqsxLl)vXiRQ}mGL9< LequG!C!-LtqM_Ke{bdvIk2g3D_rb-=?oqd5#z7kmMStpX3h#e@XQ4-6H z@0W&k&%p?q>54N(o4Lt8GF*Sjd@OUNGij4BMdA@U1L+U_WrtiF8|P!xzZ!%ch>Fuw zSM--%ijfP;na65`Acj5A EHl{egii0qYOR&ZvW((%G-Y2yqm{PN_kDB)ku=E+=r+ zetc peZQo%%MegiIRW(v^T9d;MEq=HC)6d3e0rim?d4g_7GYwCD@dD$lz^Co zxxl7lMGS2uJ5a98wKM_HCP?&|AL@r>+g936>SNdt#epP3qowL=icLwIE|&rt>}}B# zDjXy{Dl0X0wjEc6ySnp)1^FzyOp~G8CV`pPr!vchZGm8CD+jSrpL1wM!fb47O0btu zETKNh_J$prB}iafGad*VnN)3dv1y}w8}#} @bc`%Vi^1B=8hQ;!gxJ-KJ 0Az%t8^n zl65! eze1B4>)CW&ST+OxHL@=KbH VFUZ!IQwQ;6ux*&1a5iaRs?qK$?@Vy&1%BF*(qw z1&StmZ2Iy0#k_5S9!Sqzux*Xip~CKRO21XB@? ErKL z|3KiN9QrJbZKfvdi**|yLSqgPb-rx~Y8pWkpJaijjg?{;`K*ZM*g=7Ot;$1==zNtT z=bQNQUbfBsbPC+UfXq<{RN0eH(7MN837bdVO2N7mk##2nP9m{R909TZ8Z3h5!ISM; zl>rH8-J07Xk{AUrZyZ8iwXf_SQ9Nk8TGr)$1=y{!o5(^zvdEealnQHZsL4{0F-3Rf zgZO$~pMz4>$-p-5n+b$qQDMt(VIDQu*y;gV9*&@gmLcac-_ zCp_pIyU)Yc`j(0;6R!!eq+UZ9K*__C1d}heX0yR1p}xwW$zL#^j?0upn;qb+I}X+e zo5=}y{=-gqKJRB$5t~Yzk_XI6y)S{$+Qy7Tmagdrzsv=@CCn4?8=sD0Sj6f-77-Y2 z_CFSAyc|AhX$za(0i_(SP>RwShTPb-O2kIE{{5dEXLu`Uq9=ip=p uNC=PXz>heS1 zpGYH)c+)p_&_REHf^UvkqQMWZjh21GWflF)0xTH%Zb9Q&qtLGE?RjVN{WkLQ$qvDK zQH-*YqtVSPV-g;_6pZ#3Ca~nza#cos8K;}@->)V(Op!=}k~_~E+O^GNsXPAH|Hp5i zTA*6HmgB}qSU58k!h5o>MtVU5D$P*!`6aUMEJcf_Ek)w3(4%RIQ%mT*+T8ljo<8qU zgMi04CNr;-23K<^jN#4iSMSL>aXO*-J~Lf+S?u?%&@RBB4o16BN52Bi64z5TvHdcw zL_V{ zIV-Nuoao6fZHod0bL7%oj0xiGa;rzuKWbKAR+e*9%9=e_@a z5NrnrZNaD8*bdK4wqXpD2CYfXLcMe9KBR%V-<^Efx9aRZ+vqsQLitXb cgR-6fvWf^8sll) V}Qkc@DP3|)arg1co!K vWhmpf*9CRigDO)%Yr39^h*r%`;pZ=f) z_M2A@>HfnQmC@ZKU(%zaLb66vfEaBVs^&U*&IzGJH-oGp2ny}P=P$uN_^DfH=e)uJ zDAa{Y`{a)A!ipci16OP^J4u2~JnEd8b)$7@3Ou$L8e yu)J5d##_tTLVGa)@ iA eQbcqTmhb12Zo`g#iczucpB#yrofNRvvLT^dxz;y(NxzK@D`)(XP zg#x)|JLXCZk!$4H9D*f6LZKe@mBR)mr`mzR(Jn;$N>cX#_E=R@NO7AUe}4$|W5d^4 zreG(riC=v_RLG1i{ub;ctbDI8hRbn6c5W|X=N7LrD2XIzIPi{7_qJdb&;L0h#QSYr z&*ao+2*?<3yTIe`GllIu_N=bsq!N%9QalV|%_>8)74{<7PdEe|AzlxqkqS_B=g%P= zEJYAU64^-DYuLz?OB$?0j!}oXwK$=XJL=(vfYC@u0v;GZV4Z^MJYfC)>Z}=Wq)~c1 zWZQ0Og9Y5V-!6qs9!6b%!+pXANV_lw1+;d=Esmyw <5W*mo)Vhm7|JntsOZX2H*xNVFf8x6ewva7@U=B;oMj5~m`n 3mZODAh58awvD4Ji*q0rz? qHQJ zz)ZkAVT`B!L|sG(AFI3dANC*x<00S#rSP}{R}(ACGP%s-r7!^T5BtQO2QJHJ2^et- zpJu@7mZBMKMmD-k$VPEkB#JxNL*R BDx@5H8}prLY)uB*eMytQ0@9 )c?a!hCT!HA48c%*k8;oH=nxs zVjEeAx-`s$Om_S@WC88x!Ir)h%MNoFE)3#J(Ihm?PTG@T+Cyl6>~zQ%1{&2+M#P23 zJ(F!(&DrZY2X}EhgT?%^TO@+|G{e#%f~2PV)|Q)ZU};>`17yph!6Um9t|Ou3Bx3k) z`PZKvxnk6t|HL!$=}8KGBTR6%!!F-VCU3b}D%-S{69{WVXapu9iYfIykWg`43ab?W z<8r#CjzaA%py6v^5kgs-6^gIq72u6}(0uXy7`%`dJ#AJnVlZQRJ>m(=6%9I}-a~U) zLDBct#nD!>B*<&NlAMkifae_#JVqZdI-LRge931?NIjKpQchQVS{wE6h{KCU^o{oj zR+l>8VcIkJAB4xTCt0&nwyEG2%EV6Ecv3v&+X^;IB(>`!CLlC()gzaXOTX&JGJqnn zZ=*bw9Q0X=HaBKE4hn}LG;EDwerd^knz2@~pJ^W`(47142t;c{dHIIZ05W7Ii~}U| z&r1wY=nu&&t`ikf+L 0~MGk$7r>F zpQQ- J^mnMd#G&vj1&sw1|GI z$Z$q;kf5qj1O7TIAQAveYzK^!|1)aw4hNi|4`gP5Pb9(n2Cuo)&pK8+3Oo2T#UVlX z@`b-l|JZ9spd?Z_ydU -Q7yA22;^?(vStEQ-8=v+gHU3{=miK1>Uu(}tw_yw$ ze+2Ncb~z<4^cR`VXQ~{;^PPowR7sN=CDCz6P&-Q1iQMQfb`D>c@^5IPaXf(@&>}$% zQMqNS+F+J}u(~ug0}qs6i0_cI3O1VLHVS;R>1@kVx@OSMzI2v^#VbMJbDb%^L?u>) zg~RMF-tsi6Z|nri4+CK-a~>1-=PUhZL^gjJsjjG`uyHozRbVq=90Vr1y{Fyl9{-z< z(};fMvTc!ZRzUHTq+{maa0L^CvG17X5dKoaFaJh9+PR@v&B|wjs@&Bv$nbQEVYc;g z?-=znHGJ^rHlc||9vNzZ8^IH@-l4Zb_VzY%#Y#@ld!Z?=1Jji$EjYra2Y4%#NC>O@ zSwBRZ3y&Kmu$~xLVDJ4L#=tH{OoI%8*pGO$D4~*U$^p&z2#Q?#VAr{d797C|BAe@F z&;AnfZ(yYPOczW+lor*pO_0OuaD0jJH#~Vu@+%O6%YTm>uiI=WiO~ONR8QAYmubfU znms6B3{b`u ZteKy_8A;jA=9Yy45+H_hT>bdP8dOVZ;x7M1_b{m2;wN9fIJ zBN&$41wQk){8QB9hRGOWL-FnfdV+CR(bTkVJ_=BN>0^A%(2ee4gj8kEAbCo~-hiac z<}#!F^vpX9KHYDp_n$mnYPNf+PemToh#$SbdA7wHeU3Mx$G2ygIN7 ywW7yX1 ze`b=IB!T>K5P|bv$u mUNOu9E-hf4sTCZ;2<4IP}(VK$O*lbP?aJB!JQw$fj@)>-m_^aWIYM7`!g{G2=g~6$PK~QWAsRA3 RyIBWBhMhaTS;NX5)Xj zEnyGz!cgeb&(DQIkZ~`J3@5qK?=Po}Ujf@K0C~O9;()n8!DGf#sE4yX popp-Dp0%9NK z8wUNeG)G;Ktz`mK`gu$_=;bL0jNm|_mx2T5w5gqm*k}nu%f2GT8D&>n N{6-0*jWj^gUS^`5NW7E@Zg;r?Pobl?E84quuH0mMp z5)v-T&Cz5hBf)pN2OqIoCgiC0*%4*V<+8QZi~lYSu~)w>l5NxB9Fs)t{1dVfo|*q& z gSmb6FsPbh!}k_`4&3)$rB!z#%01M|{w7 7RL(Ay80G#py2!7-rbr@T@U5_Zvt?=x=Oguhsi^(Y>J30R^Kmyye)4 zI^0%jiqnC#nL>Au$TJg&8D8~+nnU+YH7?%W5ed1(AZQs34h;d?w}pWmMq59|jJPlT ztYHb6HWVX;>V)wWXm8c~)f2F-DpujV4e(Y9fDy3$;rvZh#@8a5P& 2_K8PEin8GrH5W0{5Rm&)eR#o%K5MbSkBvt zBTJ<6*a##?Rzqg+)m9*?qJjSJ**2R~KPKx!_gE?2BZhTGq?l&T{tA)bcuu@%<&~G{ zZD+C*&9&7 KK~ET@brcLxTPf@=n+Vla8YwnJ}Pw9p1tw$itgMy4=OoR5TU4 zt|L^9J;`;R=*|@ydL_uggc@Ve_PH2V7pG#3uS-Lu8F7Qu9gEpP0+X*kg8QC3c3Rg1 z_ab*PJ7DPEf?WYsjdcPuc}#2&qHB3GC=}+ %18!LFtt`509ZZ(l) ztN#Xjo}c&S@Suy51JLsL;0P4ScUCMY J8EL?4)^`Nh>RxF-1Z;L>20O(F}; z;JMbWOJDN~>;E$RGv^+Uwn8Mg- LJ#UCiHQV~D6X3>dR=PvMA!LfUtB1?_7 z$?)b-8;1Sc!|UzU()j8>64(x>y^VZ^fpPYBN+<@H_B~>z?~puWFs#lYGNk9h=$YWh zPLOjjxgq_sK{`wfqnh(gB(cf8zKu%ICv%}LvA$h}e@q(OO l zIR9I3qcgvq|3+@opZB5uGY$&6ymXekNeDLietdyfO!FaE8S7LQJ|XwC0YS3!cV_Oc zlM^w8fh}l!LGa@ZcW#$kJNnERhVt|;8Q-C9ZTbCry?KfnyW<^xbS)>+|B>8MmmsxM zBXLtT6ko@J6ENgV2qJmMyM%jL>2UMkedvZ;p*LgYlvrgAw(B=H#guxJ*>2B}e%tlS z65eoHL|!+4?CECmdFH7XD>n@055kNbHlCdG_mpMfWXDu=@Kivn{#dVng)*mIcWQ8I zvO0KTq@wUMzeKRb@!#{;Dk{~4LmS}5IvHFhQ;*oUIsO9qYKi&X#A+U{=NP 9E!Mh5IxvBJ8->X zTrpC=r}g%;iocRtqjwUQQ6kufxK8Vo+~%~>;%-RW1eH0iOd9{2yV5sK61Gq}{ eg(u2>d`p33g6Ptn?Q2nK0+(Wz^mhTmhRz&5My6UpH^ z#GpNFUKFXi1i!MNsWF!`;Nbb76z1(R-ALZyjy3ej^`*8t%iRp7I)W$qGgxCDcA!zH zMa2x=r+YD6 Hv-n*>E%Ti*U#R1miC4MGn7uOI^;I4nHd)3kG*guh>wx0mL zZQDOq-#_ZObH|u_oa4@&DdtLdtDgwwBiWvv_Ht|eE6Y+b% T9(s ziK*ax0hcYg6CPea@Vxg`Z%^7&S@e2Urk$f!{?G{!qjQX-nL>Q8(U&@IWc@C%++f$q z@{u)#stR!>!zE_@QI1Qm?pL3Sx<0gt`;ib}ky3o?p0ATph4&SlPdVtx$IM#l5^?X! zfD={9?>ZP#zZtYp10Uyagf2v=W17N*Rnl;3a#Lkya!vHSol{$?exI8XF3>ZXGn7O3 zlIbvob&KJNb>puF)YUy`zrsr`zQavmK-?Csj;{GZq23JLmpxD^e`aTx`(neVI}si( zgxxXYS@DUn0w$Yr)Vf9AunFH5-i}}RWHMhKd9Cw z;~kg${uT-~n)S9-TlTUgFkZ&1lyZ29-EiuZp8CT1kIFZEN<3ZBiHsxDAY`{muEohM zdN9(dfL!47B6iS?gj G$OYkWiQ*M7{+n(H3|cJ%NTt<8Uin;DxWDjW!}f> z*fe&Vc_PBjef05QJn&{bkz`-=WpRjeM6)4S$(v)4u>q)*-MShM?|?4=bjy@hV2zyw zBhlDb?UHo&Z76ufeZJN;M$I|TOmctOfTM4ZZAR``nN*M?wBO$c%za5p60fX&p_;jU zc=T@6LxmNBPV7+QFIU4EiOirdX?S6v!E5*!JTvpvGX3|(UC-gn9AL0doT3-| c1okGIioLBd*;D`!JBt$%_;0 zNQ3H}5WEV&!J0JfpLF?>t>j Ah_f %DBPwZ#~vb(**JjFET>1LYZK9UunUNSnS z8)H0?`YL {HSsNVNDxjrc)>1KRQlW#>v&WY>$zpX&!rkDdR-RPBX)}oBg6Ao@ z2VR)l`7ZBfFGcmuUq5u j$ixPuD zb4LYMQg`22P0=3jFN*_X-}J;u!EaOlnM7$1MXhg`bWTENN_9$yci90 9S&c3~jF@D3>^VSbPej^r7DOfvlg1P|e^gTgd7 x C9EFROTe}C$$KYl82>hp6e3J z-+x(%X@vcRuRGp>PdKE8434Hq%>?O1yW37^Ag`5j{kTdcQ?kX_y!+$Z`6f0hInh?P zKUk8gAJaEnX((HN8q%iQqiaf}qc57OC_cb~UBJUQmfZa1Hw3U0-*yx5qyd)&eYxyu zJmDp@81G8ZmOm!$&=?3Wb>%C>o--hky0s=^8zdv=dBJEq#bMLrYEDrdo~>=5JQ~t1 zAQXg+C4eTTeHw&0Fga%6GyLiEBCOCkXwFz{bHLF>*6H9`)|Sn+EkCi`-zKc+$%mRj z-7Uv7*{Ma5I)B}_RvF!)-;bU|T_sgOzltY$$26bvl7i8X%@0mtovhK8PZe(y&UI(v zW6akReZlQjnC+fP&;A^HUa5kyua~71t<*|enm~Ug-ubsZ!?HjHo`sPU v)Q#z@7OmU6w#|boHRpzOV$h1XCg0SEJdZoN7`wD=eUZ_T zb*sfnd!yR6oGW<>g%>D38I8Mn(cGe>H|4Dam*@RnLo8TOj`eVJPgilfBVwKOaAJAs z{dj4T_m*XQtdr{Bh)uFxu9CzsheRfP#Ns$3 U2P6ky74J0m~Xfstk 7?~bS=dF%5lU(_6 zboq!cm stP{N%mfS#Hf@B7{ zKxrz#E3A;($aUWl0tNsjG=_^!W&g$=D; ?4NT1~+pC>+qS@mr*P{MwRk8T%b1P;}zC@SZoQ#lm2 JEU%@ y(Q?M-$*=%KVYy_W(ws)&|40s}lLyBNC2 (kn!o$=K!n|hw#TD6Nv|b(J3D`iLGdDlpEGCk;|6CJW2EG!qpR}@7e8JcU z`>^EmSMkZDLExIu_+v7wm|r$?jF*Jdo_U5h-(o7X%!g_j_&7`LkX%~=6P&dlc1bHb zRnHv;)W89&iEQ&}ix3d<==}`-oL1|Hqg(50ag7yEcQGFD8x}zP?m6t~F=d=oNnqa} zVYsYy(Iz0j#3ad!OHB(;Mzr0OfH88`-fiN3x%^2+vuf2rV7_tqoHS+z$y-zB@h?^~wE900(UtB*U+#p-sD zy!QYs1(a$vM2d~ahvLC0VPI)NxgcID*feYHVeG4h8USJmVego%M!J+3^;L`UqJ#*$ z%fOfiWj}SK#RO*umYrP6t06X6*NbCAMdJn^TUA>pWZA=YczACZ;)LLclGkB0fL~zQ z=LA8W=jW!CrYW9j%*}7($Kc(yzDD(1%oE%zd8F>F)N>ih(UZP&Fj*$kBBgzG)$e~K zpKHQ8W5AqIM%_AX&l+`yBEuoGE>PZ#q5(nAy|qsp%XraMt?@T7NyuubVepoJa`gUi zi(+NDkEyI$up_Kgds#=L(x`^cT=p-cfbG|8VT&{;ct@i$ qbFR1Ip?&s6XPJb5ytK`c#cQKqCPE};GS#)NJ^d*hi*GSzWEdjH#jKNx zppAjtK1UwyYF-LH@`U&-S$BtJpo>!|tuKMMUQcf#lsx^D*>5g+FtRg?wGxGZu*SSU zAoV}yDnq+NO=#1m4NjX}o(v=}xnt@aE>9NT;jYK${JKrNPMVv~2k}Kl?_A98$bA7u z*E)JrCAduM%G3(j+f8PJ(&$n!2)CS<19EKEbOiNNdTCndm*BzyN6UFXE}a>YI|h_7 z;{D-uVd5Db_6&C^{6r2j_vJp`fn3ii8!^o1u0RKS#l9S-;%F34gggeXN(<3haraRP zW2kuiOr3GSC`2eOH?Kg9tq@ad1B-)%RcrLB7|oEW0j3lvZI+LtSh_M|ZE&-(?0ZbZ z;s>kP$G<>=jt&pOQ&&}S;$_}ppnT}73#m)OF)3qT$bS|&6Zs`cXSVC@>eGNoqDLrg zh>|XjmkPUYY4P!<2AVGQb|5)6P0VH=AKOiGOcQ+21g8QXe+T6`mjM@WzkLfWH0M8= zq4 I5YakLje}$ z4`;KwVY56ZsFEZ}924XiZVafBfO!9)t}RbE)yXdf=%78*&Y-?8_6@|w4qt0dVp)Cl zgW1SkFXxwe$
QuZ!xyxeRvYJoH8w|n>Hq2q`nbH@&;FiU2d8* zd2@^LVk70va;Odv*9--+h2{6XNrP&fw$C`pLlMoXEL|j>)5Te8>|d>w{8_*Kei)8N zqd*uK%c~d=1Dn4D(J#wuxC9TD02m9FaMG3pEDr#CXkRt!=QRdJkgFl@9=pp-*G^k) z57`%cejM yMj}YVHJDz<8+GO1iAMN~kjTkN^#k^u-8HIu#)MKTpXz(i(OLE;7 zV%A}#B-=V9kjxz|^-ln&G%wE9_)8Cp0TVo2qnks+q>f(68CDygh0Lthij<&ja84|9 zfn)C21R`8epEGnybnbDI#8h7bU0Nf91s_&l7VzR>op@fs*TP#cyztJjqnCRi;Pwla z!b&xbJSQMgfGX7-P(eKxtOh-6 5ES1Ix zN1zU*3)UUtWQgnC%;5l?HI0Fi=U6EZH+73y;IytXQ{MSR@f=#yreNve%}bApjTG4@ zIW?C} )5Ta`4+-iW68%=FwrFtV z0L$+3JdMWoDK*0_?|v`k2jO4@rcS}^0TvLl%kF1nXBx3iiObsgB$<$FVmv?nb!)^j z)eukvIfzIVxRPmK8q=`i?#;ywMF-#6=g7+g(GFwk-DB39XLPGToXc#qV7%X7-NklB zbxB&$1NuXy{#|ZTTj2{GwobYBfr!)WU?O~96w(32(qXiv* )H7=7gO@b;Uuqr*5??LCiFKf>)M8_qis{{;dOrEm}C;u(D}eNJvrQL+A|+$VF%fZ z!4rActWDmXVSc&MMur#hr*t4~P60y%H_d(N_2UTe`%LQuM!;JiO>V1k2LICV07jK< zc~lqTmobYyl=iU?87J|+=l<%! z?4W0(!+MQ1-6g&r5;_=;2A^2#Q)^f+mbs#pSWuRs3y;D&uQ(;jd!(#@@G+g9W0NjH z*T-YO#c{CJ-kidx;}yM+@hY~&=X>+I_$alYHR>+Qb_bUsYX(Tpf(K<9Uk~>=96wQT z!8T2d?3h2Sv;-SVVu1rhH1A5 T@{WDgZlpNF{$amm+Y^6<+AKSs$*c#s=*d0N zS}SOr_^`Dq!^-*_r9FO_jSX?Q0k2nJL+=7SM?&!53PBJ~3x|Wr>KcLdzbX_|Bqxtg zEx_v1{5SPM0Syu))w*?@PXV`7(eZ3=(+B1)VPND8ScST~<=N?O#<*<7H3T@!mI=p+ z6)Eklc&jbt&l2kK{#620Aom6s&0#CNJ-iN|HU}^PnhMKNRGZr({jdRHA$pWw?rH6e zfIQ+!U7dxhl`f2gl=g#$yT@4Y`qD+yF&@6^HV)Hxo!9pTAw*|L1O&BO74)p^t#WPG zyoM< L1tn{g6%3kX<%d;WL(WY5V}U> zANO|%s-PS8aSimI-F~IpH6lI-bEsVhbx?QMD=jTLVS`c0t|nAzgkle4B+uFMYpEm? z?|E=cr)G^ 4}wSSVZ3{n0t z{N8IACAyijNZvQ#)v{Jf1Z2nX3A*nPD#g&MTs4n~^{m?utc@(zaxOTbY0gQSV?BOz zKeP~7Bp6muIP~Zz+Or3o0fWk~ZhEdCoa_3rf@_0E;B16LDInI&MPHFbTkR31A%8wd zmjI2rN27iD8SrSH2C#j-!g%CJJ+rQd&v^jfYTI)EB1C}s`%&flyu=xd^Dyk80)ST7 zE>hsD0T^v9;qCPeMlc56xBF+sF0tS7SRIoD+-nuoBihJ37Nn+suo7yUpcDicg|EHi z3FRK{bZG V)-*(jJ-egHhlDEDv1f>%N8f0u)y9O3AIGPcU