diff --git a/.env.template b/.env.template new file mode 100644 index 0000000..2145482 --- /dev/null +++ b/.env.template @@ -0,0 +1 @@ +SQL_EVAL_UPLOAD_URL=YOUR_SQL_EVAL_UPLOAD_URL \ No newline at end of file diff --git a/README.md b/README.md index 2a666ec..64d7b13 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ This repository contains the code that Defog uses for the evaluation of generate ## Introduction Our testing procedure comprises the following steps. For each question/query pair: + 1. We generate a SQL query (possibly from an LLM). 2. We run both the "gold" query and the generated query on their respective database to obtain 2 dataframes with the results. 3. We compare the 2 dataframes using an "exact" and a "subset" match. TODO add link to blogpost. @@ -19,6 +20,7 @@ This is a comprehensive set of instructions that assumes basic familiarity with ### Install Dependencies Firstly, clone the repository where we store our database data and schema. Install all Python libraries listed in the `requirements.txt` file. You would also need to download the spacy model if you're using the NER heuristic for our [metadata-pruning method](https://github.com/defog-ai/sql-eval/blob/main/utils/pruning.py) (set by c=0, more below). Finally, install the library. + ```bash git clone https://github.com/defog-ai/defog-data.git cd defog-data @@ -29,7 +31,7 @@ pip install -e . ### Start Postgres Instance -Next, you would need to set up the databases that the queries are executed on. We use Postgres here, since it is the most common OSS database with the widest distribution and usage in production. In addition, we would recommend using Docker to do this, as it is the easiest way to get started. You can install Docker [here](https://docs.docker.com/get-docker/). +Next, you would need to set up the databases that the queries are executed on. We use Postgres here, since it is the most common OSS database with the widest distribution and usage in production. In addition, we would recommend using Docker to do this, as it is the easiest way to get started. You can install Docker [here](https://docs.docker.com/get-docker/). Once you have Docker installed, you can create the Docker container and start the Postgres database using the following commands. We recommend mounting a volume on `data/postgres` to persist the data, as well as `data/export` to make it easier to import the data. To create the container, run: @@ -39,11 +41,13 @@ docker create --name postgres-sql-eval -e POSTGRES_PASSWORD=postgres -p 5432:543 ``` To start the container, run: + ```bash docker start postgres-sql-eval ``` If you want to reset the Postgres server instance's state (e.g. memory leaks from transient connections), you can turn it off (and start it back up after): + ```bash docker stop postgres-sql-eval # see that the container is still there: @@ -51,12 +55,12 @@ docker container list -a ``` Some notes: + - You would need to stop other Postgres instances listening on port 5432 before running the above command. -- You only need to run the `docker create ...` once to create the image, and then subsequently only `docker start/stop postgres-sql-eval`. +- You only need to run the `docker create ...` once to create the image, and then subsequently only `docker start/stop postgres-sql-eval`. - The data is persisted in `data/postgres`, so turning it off isn't critical. On the other hand, if you delete the `data/postgres` folder, then all is lost T.T - While we will use Docker for deploying Postgres and the initialization, you are free to modify the scripts/instructions to work with your local installation. - ### Import Data into Postgres The data for importing is in the `defog-data` repository which we cloned earlier. Each folder contains the metadata and data corresponding to a single database (e.g. `academic` contains all the data required to reload the 'academic' database). We assume that you have a `psql` client installed locally. We will create a new database in our postgres instance for each of the 7 SQL databases with the following commands: @@ -74,6 +78,7 @@ export DBPORT=5432 ### Import Data into Snowflake Should you wish to import the data into Snowflake, the setup instructions are also in the `defog-data` repository. After installing the [Snowflake CLI](https://docs.snowflake.com/en/user-guide/snowsql-install-config), configure your credentials as per the [docs](https://docs.snowflake.com/en/user-guide/snowsql-config) and set them as environment variables like below, then run the setup command. + ```sh export SFDBPASSWORD="your_password" export SFDBUSER="your_username" @@ -85,15 +90,19 @@ export SFDBWAREHOUSE="your_warehouse" Note that during evaluation you'll have to use the `_snowflake` question files in `/data`. The queries been modified to be valid on Snowflake databases. ### Import Data into BigQuery, MySQL, SQLite, SQL Server + The setup instructions for these database management systems are found in the `defog-data` repository. Configure your credentials accordingly, set up your environment variables, then translate and import the eval databases with the command: + ```python python translate_ddl_dialect.py ``` + During evaluation, you'll have to set the right `--db_type` flag and use the corresponding `_{dialect}` question files in `/data`. ### Using Private Data (Optional) If you have a private dataset that you do not want to make publicly available but would still like to repurpose the code here for evaluations, you can do so by following the steps below. + - Begin by creating a separate git repository for your private data, that has a `setup.py` file, similar to [defog-data](https://github.com/defog-ai/defog-data). - Create the metadata and data files, and import them into your database. This is to allow our evaluation framework to run the generated queries with some actual data. You can refer to `defog-data`'s [metadata objects](https://github.com/defog-ai/defog-data/blob/main/defog_data/metadata.py) for the schema, and [setup.sh](https://github.com/defog-ai/defog-data/blob/main/setup.sh) as an example on how import the data into your database. We do not prescribe any specific folder structure, and leave it to you to decide how you want to organize your data, so long as you can import it into your database easily. - To use our metadata pruning utilities, you would need to have the following defined: @@ -102,10 +111,12 @@ If you have a private dataset that you do not want to make publicly available bu - A way to define joinable columns between tables. In our case, we call a dictionary [columns_join](https://github.com/defog-ai/defog-data/blob/db8c3d4c4004144d2b3ff5a2701529f5545f520f/defog_data/supplementary.py#L233) of database name to a nested dictionary of table tuples to column name tuples. You can refer to the raw data for an example of how we generate this dictionary. Once all of the 3 above steps have completed, you would need to + - Install your data library as a dependency, by running `pip install -e .` (-e to automatically incorporate edits without reinstalling) - Replace the associated function calls and variables in [prune_metadata_str](utils/pruning.py#L165) with your own imported functions and variables. Note that you might not name your package/module `defog_data_private.supplementary`, so do modify accordingly. Some things to take note of: + - If you do not populate your database with data (ie only create the tables without inserting data), you would return empty dataframes most of the time (regardless of whether the query generated was what you want), and it would result in results matching all the time and generate a lot of false positives. Hence, you might want to consider populating your database with some meaningful data that would return different results if the queries should be different from what you want. - If testing out on your private data, you would also need to change the questions file to point to your own questions file (tailored to your database schema). @@ -117,7 +128,7 @@ If there are functions that are generally useful for all query generators, they ### Runner -Having implemented the query generator, the next piece of abstraction would be the runner. The runner calls the query generator, and is responsible for handling the configuration of work (e.g. parallelization / batching / model selected etc.) to the query generator for each question/query pair. +Having implemented the query generator, the next piece of abstraction would be the runner. The runner calls the query generator, and is responsible for handling the configuration of work (e.g. parallelization / batching / model selected etc.) to the query generator for each question/query pair. We have provided a few common runners: `eval/openai_runner.py` for calling OpenAI's API (with parallelization support), `eval/anthropic_runner` for calling Anthropic's API, `eval/hf_runner.py` for calling a local Hugging Face model and finally, `eval/api_runner.py` makes it possible to use a custom API for evaluation. @@ -126,6 +137,7 @@ When testing your own query generator with an existing runner, you can replace t ## Running the Test ### OpenAI + Remember to have your API key (`OPENAI_API_KEY` or `ANTHROPIC_API_KEY`) set as an environment variable before running the test if you plan to call the OpenAI or Anthropic/other LLM API's accordingly. To test it out with just 10 questions (instead of all 200), parallelized across 5 : @@ -143,7 +155,9 @@ python main.py \ ``` ### Anthropic + To test out the full suite of questions for claude-3: + ```bash python main.py \ -db postgres \ @@ -157,6 +171,7 @@ python main.py \ ``` ### Hugging Face + To test it out with our fine-tuned sql model with just 10 questions (instead of all 200): ```bash @@ -170,11 +185,13 @@ python -W ignore main.py \ -m defog/llama-3-sqlcoder-8b \ -c 0 ``` + We also support loading a peft adapter here as well via the `-a` flag. Note that the loading of the adapter with the model will take slightly longer than usual. ### vLLM We also have a [vllm](https://blog.vllm.ai/) runner which uses the vLLM engine to run the inference altogether as a single batch. It is much faster to do so especially when `num_beams` > 1. You would have to pass in a single set of merged model weights, path to LoRA adapters if applicable, and the model architecture needs to be supported by vLLM. Here's a sample command: + ```bash python -W ignore main.py \ -db postgres \ @@ -190,6 +207,7 @@ python -W ignore main.py \ Optionally, if you're running evals on a model that is quantized with AWQ, add the `-qz` or `--quantized` parameter. Only applicable for the vllm runner. ### Running with an API Server + If running with different settings, you can setup an api server to avoid reloading for each test setting and then run the tests subsequently. We enable setting up 2 types of api servers, namely the vllm api server, as well as the TGI server. We also provide our custom modification of the vllm api server, which only returns the generated output. @@ -201,7 +219,7 @@ We also provide our custom modification of the vllm api server, which only retur python -m vllm.entrypoints.api_server \ --model defog/sqlcoder-7b-2 \ --tensor-parallel-size 4 \ - --dtype float16 + --dtype float16 # to set up a vllm server that supports LoRA adapters python -m vllm.entrypoints.api_server \ @@ -210,7 +228,7 @@ python -m vllm.entrypoints.api_server \ --dtype float16 \ --max-model-len 4096 \ --enable-lora \ - --max-lora-rank 64 + --max-lora-rank 64 # to use our modified api server python utils/api_server.py \ @@ -235,6 +253,7 @@ python main.py \ #### TGI API Server You may consult the [TGI documentation](https://huggingface.co/docs/text-generation-inference/quicktour) for more information on how to set up a TGI server. Here's a sample command to set up a TGI server using a preset docker image and run the evaluation using the API runner. Note that you would want to change the number of shards and the model id accordingly, depending on how many gpu's you have available and your model of choice. + ```bash # to set up a tgi server model="defog/sqlcoder-7b-2" @@ -266,6 +285,7 @@ python main.py \ #### Multiple Prompts If you'd like to test out a few prompts in a single run (to save the few minutes spent loading the model into GPU at the start of each run), you can specify a list of prompt files in `--prompt_file` (e.g. `-f prompts/prompt-1.md prompts/prompt-2.md prompts/prompt-3.md`), as well as a corresponding list of output files in `--output_file` (e.g. `-o results/results-1.csv results/results-2.csv results/results-3.csv`). The number of prompts and output files must be the same. Here's a sample command: + ```bash python -W ignore main.py \ -db postgres \ @@ -275,9 +295,11 @@ python -W ignore main.py \ -f prompts/prompt_1.md prompts/prompt_2.md \ -m defog/sqlcoder2 ``` + While you can do the same for the other runners, the time savings are most significant when loading a large model locally, vs calling an always-on API. ### Bedrock + ```bash python -W ignore main.py \ -db postgres \ @@ -288,8 +310,8 @@ python -W ignore main.py \ -m meta.llama3-70b-instruct-v1:0 ``` - ### Llama CPP + To run the eval using Llama CPP, you can use the following code. Before running this, you must install `llama-cpp-python` with the following (on Apple Silicon) `CMAKE_ARGS="-DLLAMA_METAL=on" pip install llama-cpp-python` @@ -304,9 +326,10 @@ python -W ignore main.py \ -g llama_cpp \ -f "prompts/prompt.md" \ -m path/to/model.gguf - ``` +``` ### MLX + To run the eval using MLX, you can use the following code. Before running this, you must install `mlx-lm` package with `pip install mlx-lm` Note that MLX does not currently have beam search, and hence will have lower quality results. @@ -322,6 +345,7 @@ python -W ignore main.py \ ``` ### Gemini + Before running this, you must create an account with [Google AI](https://ai.google.dev/) and set your credentials with `export GOOGLE_APPLICATION_CREDENTIALS=`. Then, install these packages with `pip install vertexai google-cloud-aiplatform`. ```bash @@ -337,6 +361,7 @@ python -W ignore main.py \ ``` ### Mistral + Before running this, you must create an account with [Mistral](https://mistral.ai/) and obtain an API key and store it with `export MISTRAL_API_KEY=`. Then, install `mistralai` with `pip install mistralai`. ```bash @@ -352,57 +377,62 @@ python -W ignore main.py \ ``` ## CLI Flags + You can use the following flags in the command line to change the configurations of your evaluation runs. + ### Data-related parameters -| CLI Flags | Description | -|---------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| -q, --questions_file | CSV file that contains the test questions and true queries. If this is not set, it will default to the relevant `questions_gen_.csv` file. It may be helpful to always end your questions_file name with `_.csv` to ensure compatibility between the queries and selected db_type. | -| -n, --num_questions | Use this to limit the total number of questions you want to test. | -| -db, --db_type | Database type to run your queries on. Currently supported types are `postgres` and `snowflake`. | -| -d, --use_private_data | Use this to read from your own private data library. | -| -dp, --decimal_points | Use this to specify the number of decimal points a result should be rounded to. This is `None` by default | +| CLI Flags | Description | +| ---------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| -q, --questions_file | CSV file that contains the test questions and true queries. If this is not set, it will default to the relevant `questions_gen_.csv` file. It may be helpful to always end your questions*file name with `*.csv` to ensure compatibility between the queries and selected db_type. | +| -n, --num_questions | Use this to limit the total number of questions you want to test. | +| -db, --db_type | Database type to run your queries on. Currently supported types are `postgres` and `snowflake`. | +| -d, --use_private_data | Use this to read from your own private data library. | +| -dp, --decimal_points | Use this to specify the number of decimal points a result should be rounded to. This is `None` by default | ### Model-related parameters -| CLI Flags | Description | -|-------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| -g, --model_type | Model type used. Make sure this matches the model used. Currently defined options in `main.py` are `oa` for OpenAI models, `anthropic` for Anthropic models, `hf` for Hugging Face models, `vllm` for a vllm runner, `api` for API endpoints, `llama_cpp` for llama cpp, and `mlx` for mlx | -| -m, --model | Model that will be tested and used to generate the queries. Some options for OpenAI models are chat models `gpt-3.5-turbo-0613` and `gpt-4-0613`, and non-chat model `text-davinci-003`. Options for Anthropic include the latest claude-3 family of models (e.g. `claude-3-opus-20240229`). For Hugging Face, and VLLM models, simply use the path of your chosen model (e.g. `defog/sqlcoder`). | -| -a, --adapter | Path to the relevant adapter model you're using. Only available for the `hf_runner`. | -| --api_url | The URL of the custom API you want to send the prompt to. Only used when model_type is `api`. | -| -qz, --quantized | Indicate whether the model is an AWQ quantized model. Only available for `vllm_runner`. | +| CLI Flags | Description | +| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| -g, --model_type | Model type used. Make sure this matches the model used. Currently defined options in `main.py` are `oa` for OpenAI models, `anthropic` for Anthropic models, `hf` for Hugging Face models, `vllm` for a vllm runner, `api` for API endpoints, `llama_cpp` for llama cpp, and `mlx` for mlx | +| -m, --model | Model that will be tested and used to generate the queries. Some options for OpenAI models are chat models `gpt-3.5-turbo-0613` and `gpt-4-0613`, and non-chat model `text-davinci-003`. Options for Anthropic include the latest claude-3 family of models (e.g. `claude-3-opus-20240229`). For Hugging Face, and VLLM models, simply use the path of your chosen model (e.g. `defog/sqlcoder`). | +| -a, --adapter | Path to the relevant adapter model you're using. Only available for the `hf_runner`. | +| --api_url | The URL of the custom API you want to send the prompt to. Only used when model_type is `api`. | +| -qz, --quantized | Indicate whether the model is an AWQ quantized model. Only available for `vllm_runner`. | ### Inference-technique-related parameters -| CLI Flags | Description | -|--------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| -f, --prompt_file | Markdown file with the prompt used for query generation. You can pass in a list of prompts to test sequentially without reloading the script. | -| -b, --num_beams | Indicates the number of beams you want to use for beam search at inference. Only available for `hf_runner`, `vllm_runner`, and `api_runner`. | -| -c, --num_columns | Number of columns, default 20. To not prune the columns, set it to 0. | -| -s, --shuffle_metadata | Shuffle metadata, default False. This shuffles the order of the tables within the schema and the order of the columns within each table but does not shift columns between tables (to preserve the structure of the database). | -| -k, --k_shot | Used when you want to include k-shot examples in your prompt. Make sure that the column 'k_shot_prompt' exists in your questions_file. | -| --cot_table_alias | Used when you want to include chain-of-thought instructions before the actual sql generation. Allowed values are `instruct`, `prealias` and `pregen`. If using `instruct` or `prealias`, make sure that the placeholder '{cot_instructions}' exists in your prompt file. `instruct` will get your model generate the chain-of-thought table aliases, while `prealias` would already generate the aliases in the prompt. | | +| CLI Flags | Description | +| ---------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --- | +| -f, --prompt_file | Markdown file with the prompt used for query generation. You can pass in a list of prompts to test sequentially without reloading the script. | +| -b, --num_beams | Indicates the number of beams you want to use for beam search at inference. Only available for `hf_runner`, `vllm_runner`, and `api_runner`. | +| -c, --num_columns | Number of columns, default 20. To not prune the columns, set it to 0. | +| -s, --shuffle_metadata | Shuffle metadata, default False. This shuffles the order of the tables within the schema and the order of the columns within each table but does not shift columns between tables (to preserve the structure of the database). | +| -k, --k_shot | Used when you want to include k-shot examples in your prompt. Make sure that the column 'k_shot_prompt' exists in your questions_file. | +| --cot_table_alias | Used when you want to include chain-of-thought instructions before the actual sql generation. Allowed values are `instruct`, `prealias` and `pregen`. If using `instruct` or `prealias`, make sure that the placeholder '{cot_instructions}' exists in your prompt file. `instruct` will get your model generate the chain-of-thought table aliases, while `prealias` would already generate the aliases in the prompt. | | ### Execution-related parameters -| CLI Flags | Description | -|----------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| -o, --output_file | Output CSV file that will store your results. You need to pass the same number of output file paths as the number of prompt files. | -| -p, --parallel_threads | No. of parallel workers available for generating and processing queries | -| -t, --timeout_gen | No. of seconds before timeout occurs for query generation. The default is 30.0s. | -| -u, --timeout_exec | No. of seconds before timeout occurs for query execution on the database. The default is 10.0s. | -| -v, --verbose | Prints details in command line. | -| --upload_url | (optional) the URL that you want to report the results to. The server that serves this URL must have functionality that is similar to the sample server in `utils/webserver.py`. | - +| CLI Flags | Description | +| ---------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| -o, --output_file | Output CSV file that will store your results. You need to pass the same number of output file paths as the number of prompt files. | +| -p, --parallel_threads | No. of parallel workers available for generating and processing queries | +| -t, --timeout_gen | No. of seconds before timeout occurs for query generation. The default is 30.0s. | +| -u, --timeout_exec | No. of seconds before timeout occurs for query execution on the database. The default is 10.0s. | +| -v, --verbose | Prints details in command line. | +| --upload_url | (optional) the URL that you want to report the results to. The server that serves this URL must have functionality that is similar to the sample server in `utils/webserver.py`. | +| --run_name | (optional) the name of this run for logging purposes | ## Checking the Results -To better understand your query generator's performance, you can explore the results generated and aggregated for the various metrics that you care about. + +To better understand your query generator's performance, you can explore the results generated and aggregated for the various metrics that you care about. ### Upload URL + If you would like to start a google cloud function to receive the results, you can use the `--upload_url` flag to specify the URL that you want to report the results to. Before running the evaluation code with this flag, you would need to create a server that serves at the provided URL. We have provided 2 sample cloud function endpoints for writing either to bigquery or postgres, in the `results_fn_bigquery` and `results_fn_postgres` folders. You may also implement your own server to take in similar arguments. Before deploying either cloud functions, you would need to set up the environment variables by making a copy of .env.yaml.template and renaming it to .env.yaml, and then filling in the relevant fields. For the bigquery cloud function, you would also need to put your service account's key.json file in the same folder, and put the file name in the `CREDENTIALS_PATH` field in the .env.yaml file. After doing so, you can deploy the google cloud function: + ```bash # for uploading to bigquery gcloud functions deploy results_bigquery \ @@ -430,6 +460,7 @@ gcloud functions deploy results_postgres \ The cloud function's name is whatever comes after `gcloud functions deploy` (in this case, `results_bigquery`), and you can use it to check the logs of the function by running `gcloud functions logs read results_bigquery`. You can then run the evaluation code with the `--upload_url` flag to report the results to the cloud function. The cloud function will then write the results to the relevant database. + ```bash python main.py \ -db postgres \ @@ -444,7 +475,9 @@ python main.py \ If you would like to always report your results to an upload_url, even if it's not explicitly provided, you can set it in your environment variables as `SQL_EVAL_UPLOAD_URL` #### Testing the function locally + If you'd like to modify the functions and test it out locally, you can run these sample commands to deploy the function locally and then trigger the openai runner: + ```bash functions-framework --target bigquery --source results_fn_bigquery --debug python main.py \ @@ -457,10 +490,10 @@ python main.py \ --upload_url http://127.0.0.1:8080/ ``` - ## Misc We welcome contributions to our project, specifically: + - Dataset - Adding new database schema/data - Framework code diff --git a/eval/api_runner.py b/eval/api_runner.py index 3715822..15120cd 100644 --- a/eval/api_runner.py +++ b/eval/api_runner.py @@ -189,6 +189,7 @@ def run_api_eval(args): logprobs = args.logprobs cot_table_alias = args.cot_table_alias sql_lora_path = args.adapter if args.adapter else None + run_name = args.run_name if args.run_name else None if sql_lora_path: print("Using LoRA adapter at:", sql_lora_path) if logprobs: @@ -281,11 +282,12 @@ def run_api_eval(args): if not os.path.exists(output_dir): os.makedirs(output_dir) + results = output_df.to_dict("records") + if logprobs: print( f"Writing logprobs to JSON file at eval-visualizer/public/{output_file.split('/')[-1].replace('.csv', '.json')}" ) - results = output_df.to_dict("records") with open( f"./eval-visualizer/public/{output_file.split('/')[-1].replace('.csv', '.json')}", "w", @@ -302,11 +304,17 @@ def run_api_eval(args): # with open(prompt_file, "r") as f: # prompt = f.read() - # if args.upload_url is not None: - # upload_results( - # results=results, - # url=args.upload_url, - # runner_type="api_runner", - # prompt=prompt, - # args=args, - # ) + if run_name is None: + run_name = output_file.split("/")[-1].replace(".csv", "") + print( + "Run name not provided. Using a output filename for run name:", run_name + ) + + if args.upload_url is not None: + upload_results( + results=results, + url=args.upload_url, + runner_type="api_runner", + args=args, + run_name=run_name, + ) diff --git a/main.py b/main.py index c110715..408a8e9 100644 --- a/main.py +++ b/main.py @@ -33,6 +33,7 @@ parser.add_argument("-v", "--verbose", action="store_true") parser.add_argument("-l", "--logprobs", action="store_true") parser.add_argument("--upload_url", type=str) + parser.add_argument("--run_name", type=str, required=False) parser.add_argument( "-qz", "--quantized", default=False, action=argparse.BooleanOptionalAction ) diff --git a/utils/creds.py b/utils/creds.py index 1071801..fde20e2 100644 --- a/utils/creds.py +++ b/utils/creds.py @@ -23,7 +23,7 @@ "creds": os.environ.get("GOOGLE_APPLICATION_CREDENTIALS"), }, "sqlite": { - "path_to_folder": f"{os.path.expanduser('~')}/workspace/defog-data/sqlite_dbs/", # Path to folder containing sqlite dbs + "path_to_folder": f"./defog-data/sqlite_dbs/", # Path to folder containing sqlite dbs }, "tsql": { "server": os.getenv("TSQL_SERVER"), diff --git a/utils/reporting.py b/utils/reporting.py index 1bee309..8027a77 100644 --- a/utils/reporting.py +++ b/utils/reporting.py @@ -1,9 +1,7 @@ -import json import requests from uuid import uuid4 from datetime import datetime import os -import hashlib # get the GPU name this is running on @@ -81,8 +79,8 @@ def num_gpus(): def upload_results( results: list, url: str, + run_name: str, runner_type: str, - prompt: str, args: dict, ): """ @@ -94,26 +92,16 @@ def upload_results( # Create a unique id for the request run_id = uuid4().hex - # Create a unique id for the prompt, based on a hash of the prompt - prompt_id = hashlib.md5(prompt.encode()).hexdigest() - # Create a dictionary with the request id and the results data = { "run_id": run_id, "results": results, "timestamp": datetime.now().isoformat(), "runner_type": runner_type, - "prompt": prompt, - "prompt_id": prompt_id, "model": args.model, "num_beams": args.num_beams, "db_type": args.db_type, - "gpu_name": get_gpu_name(), - "gpu_memory": get_gpu_memory(), - "gpu_driver_version": get_gpu_driver_version(), - "gpu_cuda_version": get_gpu_cuda_version(), - "num_gpus": num_gpus(), - "run_args": vars(args), + "run_name": run_name, } # Send the data to the server response = requests.post(url, json=data) diff --git a/utils/upload_report_gcloud.py b/utils/upload_report_gcloud.py new file mode 100644 index 0000000..224eca6 --- /dev/null +++ b/utils/upload_report_gcloud.py @@ -0,0 +1,21 @@ +# this is a Google cloud function for receiving the data from the web app and storing it in the database +# to launch the cloud function, run the following command in the terminal: +# gcloud functions deploy record-eval --runtime python10 --trigger-http --allow-unauthenticated + +import functions_framework +from google.cloud import storage +import json + +BUCKET_NAME = "YOUR-BUCKET-NAME" + + +@functions_framework.http +def hello_http(request): + request_json = request.get_json(silent=True) + results = request_json["results"] + run_name = request_json["run_name"] + storage_client = storage.Client() + bucket = storage_client.bucket(BUCKET_NAME) + blob = bucket.blob(run_name + ".json") + blob.upload_from_string(json.dumps(results)) + return "success"