Add commands to start redis on non-standard port and one to run an ex…

…periment
Gapminder · Jan 25, 2024 · d2767f8 · d2767f8
1 parent 61b89f6
commit d2767f8
Show file tree

Hide file tree

Showing 3 changed files with 33 additions and 18 deletions.
diff --git a/automation-api/pyproject.toml b/automation-api/pyproject.toml
@@ -92,6 +92,16 @@ install_kernel = "python -m ipykernel install --user --name gapminder-ai-automat
 fetch_questions = "python yival_experiments/scripts/fetch_questions.py"
 generate_experiment_config = "python yival_experiments/scripts/generate_experiment_config.py"
 generate_results_file = "python yival_experiments/scripts/generate_results.py"
+start_redis = "docker run --rm -p 26379:6379 --name local-redis redis redis-server --save 60 1 --loglevel warning"
+
+[tool.poe.tasks.run_experiment]
+shell = "cd yival_experiments && yival run --output ./output/$experiment ./experiment_configurations/$experiment.yaml"
+help = "Run a yival experiment with a given name."
+
+  [[tool.poe.tasks.run_experiment.args]]
+  name = "experiment"
+  help = "Name of the experiment to run"
+  options = ["-e", "--experiment"]
 
 [tool.poe]
 envfile = ".env"

diff --git a/automation-api/yival_experiments/README.md b/automation-api/yival_experiments/README.md
@@ -31,30 +31,35 @@ To update the experiment with the settings in AI Eval Spreadsheet, run the gener
 poe generate_experiment_config
 ```
 
-This generates one experiment configuration per language and stores them in in `./yival_experiments/experiment_configurations/`.
+This generates one experiment configuration per language and stores them in `./yival_experiments/experiment_configurations/`.
 
-## 5. Run an experiment
+## 5. Start Redis for caching
 
-To run a particular experiment:
+The model compare function will cache LLM call results for the
+evaluator, and by default the cache is dictionary in memory.
+Redis is used by default for caching, so that it won't lose the cache when Yival
+exits. start a local redis server:
 
 ``` shell
-yival run --output ./yival_experiments/output/experiment_20231104_en ./yival_experiments/experiment_20231104_en.yaml
+poe start_redis
 ```
 
-This will output a pickle file in `output/experiment_20231104_en_0.pkl` which include all Experiment Results objects.
+Note: To not use Redis, comment the line for redis cache in the top
+of `custom_configuration/model_compare.py` and
 
-When the experiment is completed, Yival will start a web server to show the results.
+## 6. Run an experiment
 
-### Use Redis for caching
+To run a particular experiment configuration (in `./yival_experiments/experiment_configurations/`):
 
-The model compare function will cache LLM call results for the
-evaluator, and by default the cache is dictionary in memory. You can
-also use Redis to caching, so that it won't lose the cache when Yival
-exits. To do this, uncomment the line for redis cache in the top of
-`custom_configuration/model_compare.py` and set the host and password
-to your redis server.
+``` shell
+poe run_experiment --experiment=experiment_202401251706_en-US
+```
+
+This will output a pickle file in `./yival_experiments/output/experiment_202401251706_en-US_0.pkl` which includes all Experiment Results objects.
+
+When the experiment is completed, Yival will start a web server to show the results.
 
-## 6. Generate a result xlsx from output
+## 7. Generate a result xlsx from output
 
 To convert the pickle files to Excel file:
 
@@ -66,7 +71,7 @@ This will read all pickles in output/ directory and will generate `results.xlsx`
 
 TODO: We can add a custom evaluator in Yival to calculate the final scores.
 
-## 7. Calculate scores, upload results to AI Eval Spreadsheet
+## 8. Calculate scores, upload results to AI Eval Spreadsheet
 
 Two notebooks in notebooks/ directory are provided for calculating scores.
 

diff --git a/automation-api/yival_experiments/custom_configuration/model_compare.py b/automation-api/yival_experiments/custom_configuration/model_compare.py
@@ -18,9 +18,9 @@
 # set this to see verbose outputs
 litellm.set_verbose = True
 # enable caching in the evaluator.
-litellm.cache = litellm.Cache()
-# use Redis for caching: comment the line above and uncomment the line below.
-# litellm.cache = litellm.Cache(type="redis", host="127.0.0.1", port=6379)
+# litellm.cache = litellm.Cache()
+# to not use Redis for caching: uncomment the line above and comment the line below.
+litellm.cache = litellm.Cache(type="redis", host="127.0.0.1", port=26379)
 
 
 def model_compare(