-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ML/LlamaIndex: Add software tests and CI configuration (#707)
* ML/LlamaIndex: Adjustments to make it work with non-Azure OpenAI * ML/LlamaIndex: Add software tests and CI configuration
- Loading branch information
Showing
11 changed files
with
263 additions
and
130 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
name: LlamaIndex | ||
|
||
on: | ||
pull_request: | ||
branches: ~ | ||
paths: | ||
- '.github/workflows/ml-llamaindex.yml' | ||
- 'topic/machine-learning/llama-index/**' | ||
- '/requirements.txt' | ||
push: | ||
branches: [ main ] | ||
paths: | ||
- '.github/workflows/ml-llamaindex.yml' | ||
- 'topic/machine-learning/llama-index/**' | ||
- '/requirements.txt' | ||
|
||
# Allow job to be triggered manually. | ||
workflow_dispatch: | ||
|
||
# Run job each night after CrateDB nightly has been published. | ||
schedule: | ||
- cron: '0 3 * * *' | ||
|
||
# Cancel in-progress jobs when pushing to the same branch. | ||
concurrency: | ||
cancel-in-progress: true | ||
group: ${{ github.workflow }}-${{ github.ref }} | ||
|
||
jobs: | ||
test: | ||
name: " | ||
Python: ${{ matrix.python-version }} | ||
CrateDB: ${{ matrix.cratedb-version }} | ||
on ${{ matrix.os }}" | ||
runs-on: ${{ matrix.os }} | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
os: [ | ||
'ubuntu-latest', | ||
] | ||
python-version: [ | ||
'3.8', | ||
'3.13', | ||
] | ||
cratedb-version: [ 'nightly' ] | ||
|
||
services: | ||
cratedb: | ||
image: crate/crate:${{ matrix.cratedb-version }} | ||
ports: | ||
- 4200:4200 | ||
- 5432:5432 | ||
env: | ||
CRATE_HEAP_SIZE: 4g | ||
|
||
env: | ||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | ||
|
||
steps: | ||
|
||
- name: Acquire sources | ||
uses: actions/checkout@v4 | ||
|
||
- name: Set up Python | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version: ${{ matrix.python-version }} | ||
architecture: x64 | ||
cache: 'pip' | ||
cache-dependency-path: | | ||
requirements.txt | ||
topic/machine-learning/llama-index/requirements.txt | ||
topic/machine-learning/llama-index/requirements-dev.txt | ||
- name: Install utilities | ||
run: | | ||
pip install -r requirements.txt | ||
- name: Validate topic/machine-learning/llama-index | ||
run: | | ||
ngr test --accept-no-venv topic/machine-learning/llama-index |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# OPENAI_API_KEY=sk-XJZ7pfog5Gp8Kus8D--invalid--0CJ5lyAKSefZLaV1Y9S1 | ||
OPENAI_API_TYPE=openai | ||
CRATEDB_SQLALCHEMY_URL="crate://crate@localhost:4200/" | ||
CRATEDB_TABLE_NAME=time_series_data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
CREATE TABLE IF NOT EXISTS time_series_data ( | ||
timestamp TIMESTAMP, | ||
value DOUBLE, | ||
location STRING, | ||
sensor_id INT | ||
); | ||
|
||
INSERT INTO time_series_data (timestamp, value, location, sensor_id) | ||
VALUES | ||
('2023-09-14T00:00:00', 10.5, 'Sensor A', 1), | ||
('2023-09-14T01:00:00', 15.2, 'Sensor A', 1), | ||
('2023-09-14T02:00:00', 18.9, 'Sensor A', 1), | ||
('2023-09-14T03:00:00', 12.7, 'Sensor B', 2), | ||
('2023-09-14T04:00:00', 17.3, 'Sensor B', 2), | ||
('2023-09-14T05:00:00', 20.1, 'Sensor B', 2), | ||
('2023-09-14T06:00:00', 22.5, 'Sensor A', 1), | ||
('2023-09-14T07:00:00', 18.3, 'Sensor A', 1), | ||
('2023-09-14T08:00:00', 16.8, 'Sensor A', 1), | ||
('2023-09-14T09:00:00', 14.6, 'Sensor B', 2), | ||
('2023-09-14T10:00:00', 13.2, 'Sensor B', 2), | ||
('2023-09-14T11:00:00', 11.7, 'Sensor B', 2); | ||
|
||
REFRESH TABLE time_series_data; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,59 +1,92 @@ | ||
""" Example code using Azure Open AI and llama-index. """ | ||
""" | ||
Use an LLM to query a database in human language. | ||
Example code using LlamaIndex with vanilla Open AI and Azure Open AI. | ||
""" | ||
|
||
import os | ||
import openai | ||
import sqlalchemy as sa | ||
|
||
from dotenv import load_dotenv | ||
from langchain_openai import AzureOpenAIEmbeddings | ||
from langchain_openai import OpenAIEmbeddings | ||
from llama_index.llms.azure_openai import AzureOpenAI | ||
from llama_index.llms.openai import OpenAI | ||
from llama_index.embeddings.langchain import LangchainEmbedding | ||
from llama_index.core.utilities.sql_wrapper import SQLDatabase | ||
from llama_index.core.query_engine import NLSQLTableQueryEngine | ||
from llama_index.core import Settings | ||
|
||
if __name__ == "__main__": | ||
load_dotenv() | ||
|
||
def configure_llm(): | ||
""" | ||
Configure LLM. Use either vanilla Open AI, or Azure Open AI. | ||
""" | ||
|
||
openai.api_type = os.getenv("OPENAI_API_TYPE") | ||
openai.azure_endpoint = os.getenv("OPENAI_AZURE_ENDPOINT") | ||
openai.api_version = os.getenv("OPENAI_AZURE_API_VERSION") | ||
openai.api_key = os.getenv("OPENAI_API_KEY") | ||
|
||
llm = AzureOpenAI( | ||
engine=os.getenv("LLM_INSTANCE"), | ||
azure_endpoint=os.getenv("OPENAI_AZURE_ENDPOINT"), | ||
api_key = os.getenv("OPENAI_API_KEY"), | ||
api_version = os.getenv("OPENAI_AZURE_API_VERSION"), | ||
temperature=0.0 | ||
) | ||
if openai.api_type == "openai": | ||
llm = OpenAI( | ||
api_key=os.getenv("OPENAI_API_KEY"), | ||
temperature=0.0 | ||
) | ||
elif openai.api_type == "azure": | ||
llm = AzureOpenAI( | ||
engine=os.getenv("LLM_INSTANCE"), | ||
azure_endpoint=os.getenv("OPENAI_AZURE_ENDPOINT"), | ||
api_key = os.getenv("OPENAI_API_KEY"), | ||
api_version = os.getenv("OPENAI_AZURE_API_VERSION"), | ||
temperature=0.0 | ||
) | ||
else: | ||
raise ValueError(f"Open AI API type not defined or invalid: {openai.api_type}") | ||
|
||
Settings.llm = llm | ||
Settings.embed_model = LangchainEmbedding( | ||
AzureOpenAIEmbeddings( | ||
azure_endpoint=os.getenv("OPENAI_AZURE_ENDPOINT"), | ||
model=os.getenv("EMBEDDING_MODEL_INSTANCE") | ||
if openai.api_type == "openai": | ||
Settings.embed_model = LangchainEmbedding(OpenAIEmbeddings()) | ||
elif openai.api_type == "azure": | ||
Settings.embed_model = LangchainEmbedding( | ||
AzureOpenAIEmbeddings( | ||
azure_endpoint=os.getenv("OPENAI_AZURE_ENDPOINT"), | ||
model=os.getenv("EMBEDDING_MODEL_INSTANCE") | ||
) | ||
) | ||
) | ||
|
||
print("Creating SQLAlchemy engine...") | ||
engine_crate = sa.create_engine(os.getenv("CRATEDB_URL")) | ||
print("Connecting to CrateDB...") | ||
|
||
def main(): | ||
""" | ||
Use an LLM to query a database in human language. | ||
""" | ||
|
||
# Configure application. | ||
load_dotenv() | ||
configure_llm() | ||
|
||
# Configure database connection and query engine. | ||
print("Connecting to CrateDB") | ||
engine_crate = sa.create_engine(os.getenv("CRATEDB_SQLALCHEMY_URL")) | ||
engine_crate.connect() | ||
print("Creating SQLDatabase instance...") | ||
|
||
print("Creating LlamaIndex QueryEngine") | ||
sql_database = SQLDatabase(engine_crate, include_tables=[os.getenv("CRATEDB_TABLE_NAME")]) | ||
print("Creating QueryEngine...") | ||
query_engine = NLSQLTableQueryEngine( | ||
sql_database=sql_database, | ||
tables=[os.getenv("CRATEDB_TABLE_NAME")], | ||
llm = llm | ||
llm=Settings.llm | ||
) | ||
|
||
print("Running query...") | ||
|
||
# Invoke an inquiry. | ||
print("Running query") | ||
QUERY_STR = "What is the average value for sensor 1?" | ||
answer = query_engine.query(QUERY_STR) | ||
print(answer.get_formatted_sources()) | ||
print("Query was:", QUERY_STR) | ||
print("Answer was:", answer) | ||
print(answer.metadata) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
[tool.pytest.ini_options] | ||
minversion = "2.0" | ||
addopts = """ | ||
-rfEX -p pytester --strict-markers --verbosity=3 --capture=no | ||
--cov=. --cov-report=term-missing --cov-report=xml | ||
""" | ||
|
||
#log_level = "DEBUG" | ||
#log_cli_level = "DEBUG" | ||
|
||
testpaths = [ | ||
"*.py", | ||
] | ||
xfail_strict = true | ||
markers = [ | ||
] | ||
|
||
[tool.coverage.run] | ||
branch = false | ||
|
||
[tool.coverage.report] | ||
fail_under = 0 | ||
show_missing = true | ||
omit = [ | ||
"conftest.py", | ||
"test*.py", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
cratedb-toolkit | ||
pueblo[testing] | ||
sqlparse |
Oops, something went wrong.