diff --git a/agent_code/auto_bomber/model.py b/agent_code/auto_bomber/model.py index 928c76ec7..6e6280ba5 100644 --- a/agent_code/auto_bomber/model.py +++ b/agent_code/auto_bomber/model.py @@ -18,32 +18,19 @@ def get_model_dir(): return model_path.MODEL_DIR +def get_config_path(): + try: + return os.environ["CONFIG_FILE"] + except KeyError as e: + return "default_hyper_parameters.json" + + class LinearAutoBomberModel: def __init__(self, train, feature_extractor): self.train = train self.weights = None self.feature_extractor = feature_extractor - - model_dir = get_model_dir() - if model_dir and Path(model_dir).is_dir(): - self.model_dir = Path(model_dir) - elif model_dir and not Path(model_dir).is_dir(): - raise FileNotFoundError("The specified model directory does not exist!\nIf you wish to train a NEW model" - "set parameter to None, otherwise specify a valid model directory.") - elif not self.train and not model_dir: - raise ValueError("No model directory has been specified.\n A model directory is required for inference.") - else: - root_dir = Path(model_path.MODELS_ROOT) - root_dir.mkdir(parents=True, exist_ok=True) - existing_subdirs = sorted([int(x.stem) for x in root_dir.iterdir() if x.is_dir()]) - - model_index = existing_subdirs[-1] if existing_subdirs else -1 - model_index += 1 - self.model_dir = Path(model_path.MODELS_ROOT) / str(model_index) - self.model_dir.mkdir() - # Copy configuration file for logging purposes - shutil.copy(Path("default_hyper_parameters.json"), self.model_dir / "hyper_parameters.json") - shutil.copy(Path("feature_engineering.py"), self.model_dir / "feature_engineering.py") + self.determine_or_create_model_dir() self.weights_path = self.model_dir / "weights.pt" if self.weights_path.is_file(): @@ -56,7 +43,8 @@ def __init__(self, train, feature_extractor): self.hyper_parameters = json.load(file) if self.train: - self.writer = SummaryWriter(logdir=f"./runs/exp{self.model_dir.stem}") + current = Path(model_path.MODELS_DEFAULT_ROOT) + self.writer = SummaryWriter(logdir=f"{model_path.TF_BOARD_DIR}/{self.model_dir.relative_to(current)}") def store(self): with self.weights_path.open(mode="wb") as file: @@ -74,9 +62,15 @@ def select_best_action(self, game_state: dict, agent_self, softmax=False): choice = np.random.choice(len(q_action_values), p=p) else: top_3_actions = q_action_values.argsort()[-3:][::-1] - choice = np.random.choice(top_3_actions, p=[0.9, 0.05, 0.05]) + choice = self.filter_bomb_if_not_top_action(np.random.choice(top_3_actions, p=[0.9, 0.05, 0.05]), + top_3_actions) return self.hyper_parameters["actions"][choice] + def filter_bomb_if_not_top_action(self, choice, top_3_actions): + if choice == 5 and choice != top_3_actions[0]: + return top_3_actions[0] + return choice + def fit_model_with_transition_batch(self, transitions: Transitions, round: int): loss = [] numpy_transitions = transitions.to_numpy_transitions(self.hyper_parameters) @@ -104,3 +98,30 @@ def init_if_needed(self, features_x, agent_self): # Xavier weights initialization self.weights = np.random.rand(len(self.hyper_parameters["actions"]), len(features_x)) * np.sqrt(1 / len(features_x)) + + def determine_or_create_model_dir(self): + configured_model_dir = get_model_dir() + if configured_model_dir and Path(configured_model_dir).is_dir(): + self.model_dir = Path(configured_model_dir) + elif self.train: + self.create_model_dir(configured_model_dir) + else: + raise FileNotFoundError("The specified model directory does not exist!\n" + "Create a new model by training first.") + + def create_model_dir(self, configured_model_dir): + if configured_model_dir: + self.model_dir = Path(configured_model_dir) + else: + root_dir = Path(model_path.MODELS_DEFAULT_ROOT) + root_dir.mkdir(parents=True, exist_ok=True) + existing_subdirs = sorted([int(x.stem) for x in root_dir.iterdir() if x.is_dir()]) + + model_index = existing_subdirs[-1] if existing_subdirs else -1 + model_index += 1 + self.model_dir = Path(model_path.MODELS_DEFAULT_ROOT) / str(model_index) + + self.model_dir.mkdir(parents=True) + # Copy configuration file for logging purposes + shutil.copy(Path(get_config_path()), self.model_dir / "hyper_parameters.json") + shutil.copy(Path("feature_engineering.py"), self.model_dir / "feature_engineering.py") diff --git a/agent_code/auto_bomber/model_path.py b/agent_code/auto_bomber/model_path.py index 33e12a109..ff30bb2f5 100644 --- a/agent_code/auto_bomber/model_path.py +++ b/agent_code/auto_bomber/model_path.py @@ -1,2 +1,3 @@ -MODELS_ROOT = "./models" +MODELS_DEFAULT_ROOT = "./models" +TF_BOARD_DIR = "./runs/opponents" MODEL_DIR = None diff --git a/agent_code/auto_bomber/production/42/config.py b/agent_code/auto_bomber/production/42/config.py deleted file mode 100644 index 8f4730c95..000000000 --- a/agent_code/auto_bomber/production/42/config.py +++ /dev/null @@ -1,7 +0,0 @@ -MODELS_ROOT = "./models" -# MODEL_DIR = "./models/41" -MODEL_DIR = None -ACTIONS = ['UP', 'RIGHT', 'DOWN', 'LEFT', 'WAIT', 'BOMB'] -EPSILON = 0.2 -DISCOUNT = 0.4 -LEARNING_RATE = 0.0003 diff --git a/agent_code/auto_bomber/production/42/weights.pt b/agent_code/auto_bomber/production/42/weights.pt deleted file mode 100644 index 55db47681..000000000 Binary files a/agent_code/auto_bomber/production/42/weights.pt and /dev/null differ diff --git a/settings.py b/settings.py index 53801104d..d9a4678ed 100644 --- a/settings.py +++ b/settings.py @@ -5,7 +5,7 @@ # Game properties COLS = 17 ROWS = 17 -CRATE_DENSITY = 0.15 # 0.75 +CRATE_DENSITY = 0.75 MAX_AGENTS = 4 # Round properties @@ -40,7 +40,7 @@ } # Logging levels -LOG_GAME = logging.INFO -LOG_AGENT_WRAPPER = logging.DEBUG -LOG_AGENT_CODE = logging.DEBUG +LOG_GAME = logging.WARN +LOG_AGENT_WRAPPER = logging.WARN +LOG_AGENT_CODE = logging.WARN LOG_MAX_FILE_SIZE = 100 * 1024 * 1024 # 100 MB diff --git a/training_scheduler.py b/training_scheduler.py new file mode 100644 index 000000000..a9f7cfb48 --- /dev/null +++ b/training_scheduler.py @@ -0,0 +1,72 @@ +import os +import subprocess +import sys +from datetime import datetime +from pathlib import Path +from sys import stdout +from time import sleep + +ROUNDS = 300000 +MODEL_ROOT_DIR = "./models/opponents" +CONFIGS_DIR = "./configs" +MAX_PARALLEL = 30 + + +class Scheduler: + def __init__(self): + self.processes = [(None, None)] * MAX_PARALLEL + self.next_free = 0 + + def wait_for_free(self): + while True: + for index, process in enumerate(self.processes): + if process[0] is None: + self.next_free = index + return + if process[0].poll() is not None: + self.next_free = index + self.processes[index] = (None, None) + return + sleep(30) + + def execute(self, path: Path): + if self.next_free is None: + raise Exception("No free slot") + + current = Path(".") + p = subprocess.Popen( + [sys.executable, "./main.py", "play", "--my-agent", "auto_bomber", "--train", "1", "--n-rounds", + f"{ROUNDS}", + "--no-gui"], + env=dict(os.environ, MODEL_DIR=MODEL_ROOT_DIR + path.relative_to(current).__str__(), + CONFIG_FILE=path.absolute()), + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL + ) + + print(f"[{datetime.now(tz=None).strftime('%m/%d/%Y, %H:%M:%S')}] Started: {path.__str__()} - pid: {p.pid}") + + self.processes[self.next_free] = (p, path.stem) + self.next_free = None + + def terminate(self, name): + for index, process in enumerate(self.processes): + if process[1] == name: + process[0].terminate() + self.processes[index] = (None, None) + + def wait(self): + for p, n in self.processes: + if p is not None: + p.wait() + + +if __name__ == '__main__': + scheduler = Scheduler() + configs_to_process = Path(CONFIGS_DIR).glob("**/*.json") + for config in configs_to_process: + scheduler.wait_for_free() + scheduler.execute(config) + stdout.flush() + + scheduler.wait()