diff --git a/.github/workflows/docker-package.yml b/.github/workflows/docker-package.yml index 1c3d7ec..3bc5599 100644 --- a/.github/workflows/docker-package.yml +++ b/.github/workflows/docker-package.yml @@ -1,4 +1,4 @@ -name: Python package +name: Docker package on: release: @@ -28,3 +28,4 @@ jobs: tags: | ghcr.io/scai-bio/index/backend:latest ghcr.io/scai-bio/index/backend:${{ steps.version.outputs.VERSION }} + - name: Build & push frontend diff --git a/.github/workflows/tests.yml b/.github/workflows/python-tests.yml similarity index 100% rename from .github/workflows/tests.yml rename to .github/workflows/python-tests.yml diff --git a/README.md b/README.md index 9c01c34..3ec9d29 100644 --- a/README.md +++ b/README.md @@ -1,68 +1,55 @@ # INDEX – the Intelligent Data Steward Toolbox -![example workflow](https://github.com/SCAI-BIO/index/actions/workflows/tests.yml/badge.svg) ![GitHub Release](https://img.shields.io/github/v/release/SCAI-BIO/index) +![example workflow](https://github.com/SCAI-BIO/index/actions/workflows/tests.yml/badge.svg) +![GitHub Release](https://img.shields.io/github/v/release/SCAI-BIO/index) -INDEX is an intelligent data steward toolbox that leverages Large Language Model embeddings for automated Data-Harmonization. +INDEX is an intelligent data steward toolbox that leverages Large Language Model embeddings for automated Data-Harmonization. ## Table of Contents - [Introduction](#introduction) - [Installation](#installation) -- [Usage](#usage) + - [Local Development Server](#local-development-server) + - [Starting the backend](#starting-the-backend) + - [Starting the frontend](#starting-the-frontend) + - [Docker](#docker) - [Configuration](#configuration) ## Introduction -INDEX uses vector embeddings from variable descriptions to suggest mappings for datasets based on their semantic -similarity. Mappings are stored with their vector representations in a knowledge base, where they can be used for -subsequent harmonisation tasks, potentially improving the following suggestions with each iteration. Models for -the computation as well as databases for storage are meant to be configurable and extendable to adapt the tool for -specific use-cases. +INDEX uses vector embeddings from variable descriptions to suggest mappings for datasets based on their semantic similarity. Mappings are stored with their vector representations in a knowledge base, where they can be used for subsequent harmonisation tasks, potentially improving suggestions with each iteration. The tool is designed to be configurable and extendable, adapting for specific use-cases through customizable models and databases. ## Installation -```bash -uvicorn api.routes:app --reload --port 5000 -``` - -### Run the Backend via Docker - -The API can also be run via docker. - -You can either build the docker container locally or download the latest build from the index GitHub package registry. +### Local Development Server +#### Starting the backend ```bash -docker build . -t ghcr.io/scai-bio/api/backend:latest +cd api +pip install -r requirements.txt +uvicorn routes:app --reload --port 5000 ``` -```bash -docker pull ghcr.io/scai-bio/api/backend:latest -``` +Navigate to [localhost:5000](http://localhost:5000) to access the backend. -After build/download you will be able to start the container and access the INDEX API per default on [localhost:5000](http://localhost:8000): +#### Starting the frontend ```bash -docker run -p 8000:80 ghcr.io/api/scai-bio/backend:latest +cd client +pip install -r requirements.txt +uvicorn routes:app --reload --port 5000 ``` -## Configuration - -### Description Embeddings - -You can configure INDEX to use either a local language model or call OPenAPIs embedding API. While using the OpenAI API -is significantly faster, you will need to provide an API key that is linked to your OpenAI account. +Navigate to [localhost:4200](http://localhost:4200) to access the frontend. -Currently, the following local models are implemented: -* [Sentence Transformer (MPNet)](https://huggingface.co/docs/transformers/model_doc/mpnet) +### Docker -The API will default to use a local embedding model. You can adjust the model loaded on start up in the configurations. +You can start both frontend and API using docker-compose: -### Database +```bash +docker-compose -f docker-compose.local.yaml up +``` -INDEX will by default store mappings in a file based db file in the [index/db](api/db) dir. For testing purposes -the initial SQLLite file based db contains a few of mappings to concepts in SNOMED CT. All available database adapter -implementations can be found in [index/repository](api/repository). +## Configuration -To exchange the DB implementation, load your custom DB adapter or pre-saved file-based DB file on application startup -[here](https://github.com/SCAI-BIO/index/blob/923601677fd62d50c3748b7f11666420e82df609/index/api/routes.py#L14). -The same can be done for any other embedding model. +_TODO: Add configuration instructions_ diff --git a/api/routes.py b/api/routes.py index 3d70919..f2dc0ae 100644 --- a/api/routes.py +++ b/api/routes.py @@ -1,6 +1,6 @@ import logging -import json import uvicorn + from fastapi import FastAPI, HTTPException from starlette.middleware.cors import CORSMiddleware from starlette.responses import RedirectResponse, HTMLResponse @@ -11,7 +11,7 @@ from datastew.visualisation import get_html_plot_for_current_database_state logger = logging.getLogger("uvicorn.info") -repository = SQLLiteRepository(mode="memory") +repository = SQLLiteRepository(mode="disk", path="snomed.db") embedding_model = MPNetAdapter() db_plot_html = None @@ -56,6 +56,7 @@ allow_headers=["*"], ) + @app.get("/", include_in_schema=False) def swagger_redirect(): return RedirectResponse(url='/docs') diff --git a/api/snomed.db b/api/snomed.db new file mode 100644 index 0000000..f660548 Binary files /dev/null and b/api/snomed.db differ diff --git a/client/Dockerfile.dev b/client/Dockerfile.dev new file mode 100644 index 0000000..0eaed69 --- /dev/null +++ b/client/Dockerfile.dev @@ -0,0 +1,32 @@ +# FIRST STAGE: Build the Angular Application +FROM node:20.12.2 as build + +# Set the working directory +WORKDIR /app + +# Copy the current directory contents into the working directory +COPY . . + +# Update lock file +RUN npm install + +# Install dependencies +RUN npm ci + +# Install Angular CLI globally +RUN npm install -g @angular/cli + +# Build the Angular application +RUN ng build --configuration=development + +# SECOND STAGE: Serve the application using Nginx +FROM docker.io/library/nginx:1.26.0 + +# Copy the built application from the previous stage +COPY --from=build /app/dist/client/browser /usr/share/nginx/html + +# Expose port 80 +EXPOSE 80 + +# Start Nginx serve +CMD ["nginx", "-g", "daemon off;"] diff --git a/client/Dockerfile.prod b/client/Dockerfile.prod new file mode 100644 index 0000000..d66c0be --- /dev/null +++ b/client/Dockerfile.prod @@ -0,0 +1,32 @@ +# FIRST STAGE: Build the Angular Application +FROM node:20.12.2 as build + +# Set the working directory +WORKDIR /app + +# Copy the current directory contents into the working directory +COPY . . + +# Update lock file +RUN npm install + +# Install dependencies +RUN npm ci + +# Install Angular CLI globally +RUN npm install -g @angular/cli + +# Build the Angular application +RUN ng build --configuration=production + +# SECOND STAGE: Serve the application using Nginx +FROM docker.io/library/nginx:1.26.0 + +# Copy the built application from the previous stage +COPY --from=build /app/dist/client/browser /usr/share/nginx/html + +# Expose port 80 +EXPOSE 80 + +# Start Nginx serve +CMD ["nginx", "-g", "daemon off;"] diff --git a/client/angular.json b/client/angular.json index 4df5d04..bb9c3bb 100644 --- a/client/angular.json +++ b/client/angular.json @@ -93,5 +93,8 @@ } } } + }, + "cli": { + "analytics": false } } diff --git a/client/src/environments/environment.development.ts b/client/src/environments/environment.development.ts index d442c24..e2c08a6 100644 --- a/client/src/environments/environment.development.ts +++ b/client/src/environments/environment.development.ts @@ -1,3 +1,3 @@ export const environment: { openApiUrl: string } = { - openApiUrl: 'http://193.175.165.153:8000', + openApiUrl: 'http://localhost:5000', }; diff --git a/client/src/environments/environment.production.ts b/client/src/environments/environment.production.ts index 822dc53..7c37b0e 100644 --- a/client/src/environments/environment.production.ts +++ b/client/src/environments/environment.production.ts @@ -1,3 +1,3 @@ -// export const environment: { openApiUrl: string } = { -// openApiUrl: 'https://index.bio.scai.fraunhofer.de', -// }; +export const environment: { openApiUrl: string } = { + openApiUrl: 'https://index.bio.scai.fraunhofer.de', + }; diff --git a/client/src/environments/environment.ts b/client/src/environments/environment.ts index 122b920..e2c08a6 100644 --- a/client/src/environments/environment.ts +++ b/client/src/environments/environment.ts @@ -1,4 +1,3 @@ export const environment: { openApiUrl: string } = { - //openApiUrl: 'https://index.bio.scai.fraunhofer.de', - openApiUrl: 'http://193.175.165.153:8000', + openApiUrl: 'http://localhost:5000', }; diff --git a/docker-compose.local.yaml b/docker-compose.local.yaml new file mode 100644 index 0000000..09d1ff2 --- /dev/null +++ b/docker-compose.local.yaml @@ -0,0 +1,21 @@ +version: "3.12" + +services: + + frontend: + image: index-client + build: + context: ./client + dockerfile: Dockerfile.dev + ports: + - "4200:80" + depends_on: + - backend + + backend: + image: index-api + build: + context: ./api + dockerfile: Dockerfile + ports: + - "5000:80" \ No newline at end of file diff --git a/ui/.gitkeep b/ui/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/ui/src/app/app.component.scss b/ui/src/app/app.component.scss deleted file mode 100644 index e69de29..0000000