From e1c1a9e1b3b0db75214f66ec7997732074737943 Mon Sep 17 00:00:00 2001 From: TimAdams84 Date: Fri, 16 Feb 2024 13:58:31 +0100 Subject: [PATCH 1/3] Extend README.md --- README.md | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e9a9f04..c8c4b94 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,58 @@ # INDEX – the Intelligent Data Steward Toolbox -Intelligent data steward toolbox using Large Language Model embeddings for automated Data-Harmonization + +INDEX is an intelligent data steward toolbox that leverages Large Language Model embeddings for automated Data-Harmonization. + +## Table of Contents +- [Introduction](##ntroduction) +- [Installation & Usage](#installation) +- [Configuration](#configuration) + +## Introduction + +INDEX relies on vector embeddings calculated based on variable descriptions to generate mapping suggestions for any +dataset, enabling efficient and accurate data indexing and retrieval. Confirmed mappings are stored alongside their +vectorized representations in a knowledge base, facilitating rapid search and retrieval operations, ultimately enhancing +data management and analysis capabilities. New mappings may be added to the knowledge base in an iterative procedure, +allowing for improved mapping suggestions in subsequent harmonization tasks. + +## Installation & Usage + +Clone the repository: + +```bash +git clone https://github.com/SCAI-BIO/index +cd index +``` + +### Starting the Backend locally + +Install python requirements: + +```bash +pip install -r requirements.txt +``` + + +Run the Backend API on port 5000: + +```bash +uvicorn main:app --reload --port 5000 +``` + +### Run the Backend via Docker + +Download the latest docker build: + +```bash +docker pull ghcr.io/scai-bio/backend:latest +``` + +## Configuration + +### Description Embeddings + +You can configure INDEX to use either a local language model or call OPenAPIs embedding API. While using the OpenAI API +is significantly faster, you will need to provide an API key that is linked to your OpenAI account. + +Currently, the following local models are implemented: +* [MPNet](https://huggingface.co/docs/transformers/model_doc/mpnet) \ No newline at end of file From 1d3b7ed1050b8f42899d02332f69221078b698b3 Mon Sep 17 00:00:00 2001 From: TimAdams84 Date: Wed, 21 Feb 2024 11:29:38 +0100 Subject: [PATCH 2/3] Split CI to only build containers after main push --- .github/workflows/docker-package.yml | 59 ++++++++++++++++++++++++++++ .github/workflows/python-package.yml | 24 ----------- 2 files changed, 59 insertions(+), 24 deletions(-) create mode 100644 .github/workflows/docker-package.yml diff --git a/.github/workflows/docker-package.yml b/.github/workflows/docker-package.yml new file mode 100644 index 0000000..9a775c2 --- /dev/null +++ b/.github/workflows/docker-package.yml @@ -0,0 +1,59 @@ +name: Python package + +on: + push: + branches: [ "main" ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.10", "3.11"] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install flake8 pytest + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with pytest + run: | + pytest + + build_docker_image: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Get Version Tags + id: versions + run: | + echo "BACKEND_VERSION=$(echo "$(> "$GITHUB_OUTPUT" + - name: Docker Login + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Build & push backend + uses: docker/build-push-action@v2 + with: + file: Dockerfile + push: true + tags: | + ghcr.io/scai-bio/backend:latest + ghcr.io/scai-bio/backend:${{ steps.versions.outputs.BACKEND_VERSION }} diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index b9324ca..7794281 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -35,27 +35,3 @@ jobs: - name: Test with pytest run: | pytest - - build_docker_image: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Get Version Tags - id: versions - run: | - echo "BACKEND_VERSION=$(echo "$(> "$GITHUB_OUTPUT" - - name: Docker Login - uses: docker/login-action@v1 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Build & push backend - uses: docker/build-push-action@v2 - with: - file: Dockerfile - push: true - tags: | - ghcr.io/scai-bio/backend:latest - ghcr.io/scai-bio/backend:${{ steps.versions.outputs.BACKEND_VERSION }} From c3c5d5394da4bb7f56445ae103d45e3fe3f08800 Mon Sep 17 00:00:00 2001 From: TimAdams84 Date: Wed, 21 Feb 2024 11:30:31 +0100 Subject: [PATCH 3/3] Remove python build from docker ci script --- .github/workflows/docker-package.yml | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/.github/workflows/docker-package.yml b/.github/workflows/docker-package.yml index 9a775c2..e0865b5 100644 --- a/.github/workflows/docker-package.yml +++ b/.github/workflows/docker-package.yml @@ -5,34 +5,6 @@ on: branches: [ "main" ] jobs: - build: - - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: ["3.9", "3.10", "3.11"] - - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install flake8 pytest - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with pytest - run: | - pytest build_docker_image: runs-on: ubuntu-latest