Merge pull request #195 from ALLAN-DIP/add-game-runners

Add game running scripts
ALLAN-DIP · Apr 22, 2024 · 27bb754 · 27bb754
2 parents 1381335 + 876ba2d
commit 27bb754
Show file tree

Hide file tree

Showing 14 changed files with 629 additions and 52 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -64,6 +64,15 @@ ENV PYTHONPATH=/model/src/model_server/research:$PYTHONPATH
 # Copy baseline_bots code into the Docker image
 COPY src/ /model/src/model_server/baseline_bots/src/
 
+# Generate batch file for running server
+COPY containers/allan_dip_bot/ /model/src/model_server/baseline_bots/containers/allan_dip_bot/
+RUN chmod -R 777 /model/src/model_server/baseline_bots/containers/allan_dip_bot/
+
+ENV BATCH_FILE=/model/src/model_server/baseline_bots/batch.txt
+ENV WORKING_DIR=/model/src/model_server/research/WORKING_DIR
+
+RUN bash /model/src/model_server/baseline_bots/containers/allan_dip_bot/generate_batch_file.sh
+
 FROM base AS dev
 
 # Copy specialized files
@@ -81,11 +90,5 @@ CMD ["/bin/bash", "-c", "/model/src/model_server/baseline_bots/containers/allan_
 
 FROM base AS allan_dip_bot
 
-# Copy specialized files
-COPY containers/allan_dip_bot/ /model/src/model_server/baseline_bots/containers/allan_dip_bot/
-RUN chmod -R 777 /model/src/model_server/baseline_bots/containers/allan_dip_bot/
-
-ENV WORKING_DIR=/model/src/model_server/research/WORKING_DIR
-
 # Script executors
 ENTRYPOINT ["/model/src/model_server/baseline_bots/containers/allan_dip_bot/run.sh"]
diff --git a/containers/allan_dip_bot/generate_batch_file.sh b/containers/allan_dip_bot/generate_batch_file.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+{
+  printf "max_batch_size { value: %s }\n" "$MAX_BATCH_SIZE"
+  printf "batch_timeout_micros { value: %s }\n" "$BATCH_TIMEOUT_MICROS"
+  printf "max_enqueued_batches { value: %s }\n" "$MAX_ENQUEUED_BATCHES"
+  printf "num_batch_threads { value: %s }\n" "$NUM_BATCH_THREADS"
+  printf "pad_variable_length_inputs: %s\n" "$PAD_VARIABLE_LENGTH_INPUTS"
+} >"$BATCH_FILE"
diff --git a/containers/allan_dip_bot/run_model_server.sh b/containers/allan_dip_bot/run_model_server.sh
@@ -2,20 +2,11 @@
 
 set -euo pipefail
 
-batch_file="batch.txt"
-{
-  printf "max_batch_size { value: %s }\n" "$MAX_BATCH_SIZE"
-  printf "batch_timeout_micros { value: %s }\n" "$BATCH_TIMEOUT_MICROS"
-  printf "max_enqueued_batches { value: %s }\n" "$MAX_ENQUEUED_BATCHES"
-  printf "num_batch_threads { value: %s }\n" "$NUM_BATCH_THREADS"
-  printf "pad_variable_length_inputs: %s\n" "$PAD_VARIABLE_LENGTH_INPUTS"
-} >$batch_file
-
 tensorflow_model_server \
   --port=9501 \
   --model_name="player" \
   --enable_batching=true \
-  --batching_parameters_file=$batch_file \
+  --batching_parameters_file="$BATCH_FILE" \
   --model_base_path=/model/src/model_server/bot_neurips2019-sl_model/ \
   --tensorflow_session_parallelism=8 \
   --file_system_poll_wait_seconds=3
diff --git a/scripts/README.md b/scripts/README.md
@@ -0,0 +1,100 @@
+# Running many ANTONY games
+
+## Setup
+
+These scripts need to be run in a Python environment with a recent version of `diplomacy` installed. They have only been tested with Python 3.7, the latest version that `diplomacy` officially supports.
+
+`run_antony.sh`, which actually runs the ANTONY agent, requires a specific directory layout to work properly. The current directory needs a copy of the <https://github.com/ALLAN-DIP/diplomacy_cicero> repository cloned to a directory called `diplomacy_cicero` and with the desired branch checked out. In addition, it requires a directory named `cicero/` with the subdirectories `agents/`, `gpt2/`, and `models/`, as is currently done when running ANTONY elsewhere.
+
+Before attempting to use the orchestrator, focus on getting `run_antony.sh` to run properly in an `srun` session. Use the same Slurm configuration as in `game_runner.sh` except with 1 GPU instead of 7.
+
+## Usage
+
+The workflow for this orchestration is unfortunately complex. Here are the steps to run games:
+
+1. Create JSON config file (e.g., `config.json`) describing games to run
+2. Run `generate_sbatch.py` to create command files for individual games
+   ```bash
+   python scripts/generate_sbatch.py config.json --output-dir output/
+   ```
+3. Run `game_runner.sh` with `sbatch` and a game command file as input
+   ```bash
+   USER_EMAIL=$USER@$(hostname | sed 's/.*\.\(.*\..*\)/\1/g')
+   SLURM_SCRIPT=$(realpath scripts/game_runner.sh)
+   COMMAND_FILE=$(realpath config_antony_template_2023_09_01_22_39_28_312128.json)
+   sbatch --mail-user="$USER_EMAIL" "$SLURM_SCRIPT" "$COMMAND_FILE"
+   ```
+   These steps do not require any user intervention, but they are useful to know when debugging:
+   1. `game_runner.sh` directly runs `run_game.py`. It only exists as a wrapper for running the Python script with Slurm.
+   2. `run_game.py` sets up a single game, runs the provided commands, and saves results for that game.
+   3. `run_antony.sh`, called by the generated commands, runs an ANTONY container.
+
+Some of the file paths need to be changed to fit your specific setup. All commands must be run from same directory because the generated files contain hardcoded paths.
+
+To run only a single game, define a single game in the configuration file and use the following definition for `COMMAND_FILE` in step 3 above:
+
+```bash
+COMMAND_FILE=$(python scripts/generate_sbatch.py config.json --output-dir output/ | tail -1)
+```
+
+## Limitations
+
+Although there is a `stop_year` field defined in the JSON format, it does not currently work as Alex is unaware of how to make ANTONY stop after a particular point. The games will keep running until they are finished.
+
+For simplicity, environment variables cannot be passed to ANTONY at this time. This was done as a simplification to save time, and Alex can add the functionality if needed.
+
+## Data formats
+
+Here is an example of the JSON format for step 1:
+
+```json
+[
+  {
+    "id": "antony_game1",
+    "agents": {
+      "AUSTRIA": {
+        "agent_params": "--model baseline"
+      },
+      "ENGLAND": {
+        "agent_params": "--model silent"
+      }
+    },
+    "stop_year": 1905
+  },
+  {
+    "id": "antony_game2",
+    "agents": {
+      "AUSTRIA": {
+        "agent_params": "--model silent"
+      },
+      "ENGLAND": {
+        "agent_params": "--model baseline"
+      }
+    },
+    "stop_year": 1908
+  }
+]
+```
+
+The above configuration describes two games. Only 2 countries are listed for each game for brevity, but all 7 should be defined. If a country is not included, then the agent will run without any extra parameters being added.
+
+See `config.json` for a minimal configuration and `config_template.json` for a slightly more complex one.
+
+Here is an example from Alex's testing of the JSON generated for a single game as part of step 2:
+
+```json
+{
+  "commands": [
+    "bash /project/jonmay_231/ahedges/baseline_bots/scripts/run_antony.sh ahedges_antony_test_2023_09_12_19_33_49_856274 shade.tacc.utexas.edu AUSTRIA /project/jonmay_231/ahedges/output_2023_09_12_19_33_47 0 |& tee /project/jonmay_231/ahedges/output_2023_09_12_19_33_47/logs/AUSTRIA.txt",
+    "bash /project/jonmay_231/ahedges/baseline_bots/scripts/run_antony.sh ahedges_antony_test_2023_09_12_19_33_49_856274 shade.tacc.utexas.edu ENGLAND /project/jonmay_231/ahedges/output_2023_09_12_19_33_47 1 |& tee /project/jonmay_231/ahedges/output_2023_09_12_19_33_47/logs/ENGLAND.txt",
+    "bash /project/jonmay_231/ahedges/baseline_bots/scripts/run_antony.sh ahedges_antony_test_2023_09_12_19_33_49_856274 shade.tacc.utexas.edu FRANCE /project/jonmay_231/ahedges/output_2023_09_12_19_33_47 2 |& tee /project/jonmay_231/ahedges/output_2023_09_12_19_33_47/logs/FRANCE.txt",
+    "bash /project/jonmay_231/ahedges/baseline_bots/scripts/run_antony.sh ahedges_antony_test_2023_09_12_19_33_49_856274 shade.tacc.utexas.edu GERMANY /project/jonmay_231/ahedges/output_2023_09_12_19_33_47 3 |& tee /project/jonmay_231/ahedges/output_2023_09_12_19_33_47/logs/GERMANY.txt",
+    "bash /project/jonmay_231/ahedges/baseline_bots/scripts/run_antony.sh ahedges_antony_test_2023_09_12_19_33_49_856274 shade.tacc.utexas.edu ITALY /project/jonmay_231/ahedges/output_2023_09_12_19_33_47 4 |& tee /project/jonmay_231/ahedges/output_2023_09_12_19_33_47/logs/ITALY.txt",
+    "bash /project/jonmay_231/ahedges/baseline_bots/scripts/run_antony.sh ahedges_antony_test_2023_09_12_19_33_49_856274 shade.tacc.utexas.edu RUSSIA /project/jonmay_231/ahedges/output_2023_09_12_19_33_47 5 |& tee /project/jonmay_231/ahedges/output_2023_09_12_19_33_47/logs/RUSSIA.txt",
+    "bash /project/jonmay_231/ahedges/baseline_bots/scripts/run_antony.sh ahedges_antony_test_2023_09_12_19_33_49_856274 shade.tacc.utexas.edu TURKEY /project/jonmay_231/ahedges/output_2023_09_12_19_33_47 6 |& tee /project/jonmay_231/ahedges/output_2023_09_12_19_33_47/logs/TURKEY.txt"
+  ],
+  "game_id": "ahedges_antony_test_2023_09_12_19_33_49_856274",
+  "host": "shade.tacc.utexas.edu",
+  "data_dir": "/project/jonmay_231/ahedges/output_2023_09_12_19_33_47"
+}
+```
diff --git a/scripts/config.json b/scripts/config.json
@@ -0,0 +1,7 @@
+[
+  {
+    "id": "ahedges_antony_test",
+    "agents": {},
+    "stop_year": 1905
+  }
+]
diff --git a/scripts/config_template.json b/scripts/config_template.json
@@ -0,0 +1,29 @@
+[
+  {
+    "id": "antony_template",
+    "agents": {
+      "AUSTRIA": {
+        "agent_params": ""
+      },
+      "ENGLAND": {
+        "agent_params": ""
+      },
+      "FRANCE": {
+        "agent_params": ""
+      },
+      "GERMANY": {
+        "agent_params": ""
+      },
+      "ITALY": {
+        "agent_params": ""
+      },
+      "RUSSIA": {
+        "agent_params": ""
+      },
+      "TURKEY": {
+        "agent_params": ""
+      }
+    },
+    "stop_year": 1905
+  }
+]
diff --git a/scripts/create_game.py b/scripts/create_game.py
@@ -5,26 +5,30 @@
 import argparse
 import asyncio
 import json
-from typing import Optional, Sequence
+from typing import Any, Optional, Sequence
 
 from diplomacy.client.connection import connect
 
 DEFAULT_RULES = ("REAL_TIME", "POWER_CHOICE")
+DEFAULT_DEADLINE = 0
+DEFAULT_NUM_PLAYERS = 7
 DEFAULT_USER = "allanumd"
 DEFAULT_PASSWORD = "password"
+DEFAULT_HOST = "localhost"
+DEFAULT_PORT = 8432
 
 
 async def create_game(
     game_id: str,
     rules: Sequence[str] = DEFAULT_RULES,
-    deadline: int = 0,
-    n_controls: int = 7,
+    deadline: int = DEFAULT_DEADLINE,
+    n_controls: int = DEFAULT_NUM_PLAYERS,
     user: str = DEFAULT_USER,
     password: str = DEFAULT_PASSWORD,
     game_password: Optional[str] = None,
-    hostname: str = "localhost",
-    port: int = 8432,
-) -> None:
+    hostname: str = DEFAULT_HOST,
+    port: int = DEFAULT_PORT,
+) -> Any:
     """Creates a game on the Diplomacy server"""
     connection = await connect(hostname, port)
     channel = await connection.authenticate(user, password)
@@ -47,20 +51,23 @@ async def create_game(
         "status": game.status,
         "daide_port": game.daide_port,
     }
-    print(json.dumps(game_data, ensure_ascii=False, indent=4))
+    return game_data
 
 
 def main() -> None:
     parser = argparse.ArgumentParser(description=__doc__)
     parser.add_argument("--game_id", type=str, required=True, help="Game ID.")
     parser.add_argument("--rules", nargs="+", default=DEFAULT_RULES, help="Game rules.")
     parser.add_argument(
-        "--deadline", type=int, default=0, help="Turn deadline in seconds."
+        "--deadline",
+        type=int,
+        default=DEFAULT_DEADLINE,
+        help="Turn deadline in seconds.",
     )
     parser.add_argument(
         "--n_controls",
         type=int,
-        default=7,
+        default=DEFAULT_NUM_PLAYERS,
         help="Number of controlled powers (default: %(default)s)",
     )
     parser.add_argument("--user", type=str, default=DEFAULT_USER, help="SHADE user.")
@@ -69,9 +76,9 @@ def main() -> None:
     )
     parser.add_argument("--game-password", type=str, help="Game password.")
     parser.add_argument(
-        "--host", type=str, default="localhost", help="Server hostname."
+        "--host", type=str, default=DEFAULT_HOST, help="Server hostname."
     )
-    parser.add_argument("--port", type=int, default=8432, help="Server port.")
+    parser.add_argument("--port", type=int, default=DEFAULT_PORT, help="Server port.")
     args = parser.parse_args()
 
     if args.deadline < 0:
@@ -81,7 +88,7 @@ def main() -> None:
     if args.n_controls > 7:
         raise ValueError("--n_controls cannot be greater than 7")
 
-    asyncio.run(
+    game_data = asyncio.run(
         create_game(
             game_id=args.game_id,
             rules=args.rules,
@@ -94,6 +101,7 @@ def main() -> None:
             port=args.port,
         )
     )
+    print(json.dumps(game_data, ensure_ascii=False, indent=4))
 
 
 if __name__ == "__main__":

diff --git a/scripts/download_game.py b/scripts/download_game.py
@@ -6,36 +6,36 @@
 import asyncio
 import json
 from pathlib import Path
-from typing import Optional
+from typing import Any, Optional
 
 from diplomacy.client.connection import connect
 from diplomacy.client.network_game import NetworkGame
 from diplomacy.utils.export import to_saved_game_format
 
+REPO_DIR = Path(__file__).resolve().parent.parent
+
 DEFAULT_USER = "allanumd"
 DEFAULT_PASSWORD = "password"
+DEFAULT_HOST = "localhost"
+DEFAULT_PORT = 8432
 
 
 async def download_game(
     game_id: str,
-    output_file: Path,
     user: str = DEFAULT_USER,
     password: str = DEFAULT_PASSWORD,
     game_password: Optional[str] = None,
-    hostname: str = "localhost",
-    port: int = 8432,
-) -> None:
+    hostname: str = DEFAULT_HOST,
+    port: int = DEFAULT_PORT,
+) -> Any:
     """Downloads a game from the Diplomacy server"""
     connection = await connect(hostname, port)
     channel = await connection.authenticate(user, password)
     game: NetworkGame = await channel.join_game(
         game_id=game_id, power_name=None, registration_password=game_password
     )
-
-    with open(output_file, mode="w") as file:
-        json.dump(to_saved_game_format(game), file, ensure_ascii=False, indent=2)
-        file.write("\n")
-    print(f"Wrote game log to file {str(output_file)!r}")
+    game_json = to_saved_game_format(game)
+    return game_json
 
 
 def main() -> None:
@@ -48,30 +48,39 @@ def main() -> None:
     )
     parser.add_argument("--game-password", type=str, help="Game password.")
     parser.add_argument(
-        "--host", type=str, default="localhost", help="Server hostname."
+        "--host", type=str, default=DEFAULT_HOST, help="Server hostname."
     )
-    parser.add_argument("--port", type=int, default=8432, help="Server port.")
+    parser.add_argument("--port", type=int, default=DEFAULT_PORT, help="Server port.")
     args = parser.parse_args()
+    game_id: str = args.game_id
+    raw_output_file: Optional[Path] = args.output_file
+    user: str = args.user
+    password: str = args.password
+    game_password: Optional[str] = args.game_password
+    host: str = args.host
+    port: int = args.port
 
-    if args.output_file is None:
-        repository_dir = Path(__file__).resolve().parent.parent
-        output_file = repository_dir / "data" / f"{args.game_id}_log.json"
+    if raw_output_file is None:
+        output_file = REPO_DIR / "data" / f"{game_id}_log.json"
     else:
-        output_file = args.output_file
+        output_file = raw_output_file
     if not output_file.parent.is_dir():
         output_file.parent.mkdir(parents=True, exist_ok=True)
 
-    asyncio.run(
+    game_json = asyncio.run(
         download_game(
-            game_id=args.game_id,
-            output_file=output_file,
-            user=args.user,
-            password=args.password,
-            game_password=args.game_password,
-            hostname=args.host,
-            port=args.port,
+            game_id=game_id,
+            user=user,
+            password=password,
+            game_password=game_password,
+            hostname=host,
+            port=port,
         )
     )
+    with open(output_file, mode="w") as file:
+        json.dump(game_json, file, ensure_ascii=False, indent=2)
+        file.write("\n")
+    print(f"Wrote game log to file {str(output_file)!r}")
 
 
 if __name__ == "__main__":