From 5a2493545f2f3c91c9c9229524924d4642781560 Mon Sep 17 00:00:00 2001 From: Thomas Carmet <8408330+tcarmet@users.noreply.github.com> Date: Fri, 6 Oct 2023 15:22:51 -0700 Subject: [PATCH] PTFE-1041 review gcloud backend (#457) --- runner_manager/backend/gcloud.py | 96 +++++++++++++++++++++++--------- runner_manager/models/backend.py | 58 +------------------ tests/unit/backend/test_gcp.py | 70 +++++++++++++++++------ 3 files changed, 125 insertions(+), 99 deletions(-) diff --git a/runner_manager/backend/gcloud.py b/runner_manager/backend/gcloud.py index d10b920a..dd8e9f56 100644 --- a/runner_manager/backend/gcloud.py +++ b/runner_manager/backend/gcloud.py @@ -1,20 +1,24 @@ import logging import re import time -from typing import Dict, List, Literal +from typing import List, Literal, MutableMapping from google.api_core.exceptions import BadRequest, NotFound from google.api_core.extended_operation import ExtendedOperation from google.cloud.compute import ( AccessConfig, + AdvancedMachineFeatures, AttachedDisk, AttachedDiskInitializeParams, Image, ImagesClient, Instance, InstancesClient, + Items, + Metadata, NetworkInterface, Operation, + Scheduling, ZoneOperationsClient, ) from pydantic import Field @@ -63,25 +67,28 @@ def wait_for_operation( return result time.sleep(1) - def get_image(self) -> Image: + @property + def image(self) -> Image: return self.image_client.get_from_family( project=self.instance_config.image_project, family=self.instance_config.image_family, ) - def get_disks(self) -> List[AttachedDisk]: + @property + def disks(self) -> List[AttachedDisk]: return [ AttachedDisk( boot=True, auto_delete=True, initialize_params=AttachedDiskInitializeParams( - source_image=self.get_image().self_link, + source_image=self.image.self_link, disk_size_gb=self.instance_config.disk_size_gb, ), ) ] - def get_network_interfaces(self) -> List[NetworkInterface]: + @property + def network_interfaces(self) -> List[NetworkInterface]: return [ NetworkInterface( network=self.instance_config.network, @@ -93,23 +100,65 @@ def get_network_interfaces(self) -> List[NetworkInterface]: ) ] - def create(self, runner: Runner): - labels: Dict[str, str] = {} + @property + def scheduling(self) -> Scheduling: + """Configure scheduling.""" + if self.instance_config.spot: + return Scheduling( + provisioning_model="SPOT", instance_termination_action="DELETE" + ) + else: + return Scheduling( + provisioning_model="STANDARD", instance_termination_action="DEFAULT" + ) + + def configure_metadata(self, runner: Runner) -> Metadata: + items: List[Items] = [] + env = self.instance_config.runner_env(runner) + for key, value in env.dict().items(): + items.append(Items(key=key, value=value)) + # Template the startup script to install and setup the runner + # with the appropriate configuration. + startup_script = self.instance_config.template_startup(runner) + items.append(Items(key="startup-script", value=startup_script)) + return Metadata(items=items) + + def configure_instance(self, runner: Runner) -> Instance: + """Configure instance.""" + return Instance( + name=runner.name, + disks=self.disks, + machine_type=( + f"zones/{self.config.zone}/machineTypes/" + f"{self.instance_config.machine_type}" + ), + network_interfaces=self.network_interfaces, + labels=self.setup_labels(runner), + metadata=self.configure_metadata(runner), + advanced_machine_features=AdvancedMachineFeatures( + enable_nested_virtualization=self.instance_config.enable_nested_virtualization + ), + scheduling=self.scheduling, + ) + + def _sanitize_label_value(self, value: str) -> str: + value = value[:63] + value = value.lower() + value = re.sub(r"[^a-z0-9_-]", "-", value) + return value + + def setup_labels(self, runner: Runner) -> MutableMapping[str, str]: + labels: MutableMapping[str, str] = dict() if self.manager: labels["runner-manager"] = self.manager + labels["status"] = self._sanitize_label_value(runner.status) + labels["busy"] = self._sanitize_label_value(str(runner.busy)) + return labels + + def create(self, runner: Runner): try: - image = self.get_image() - disks = self.get_disks() - network_interfaces = self.get_network_interfaces() - self.instance_config.image = image.self_link - self.instance_config.disks = disks - self.instance_config.machine_type = ( - f"zones/{self.config.zone}/machineTypes/" - f"{self.instance_config.machine_type}" - ) - self.instance_config.network_interfaces = network_interfaces - self.instance_config.labels = labels - instance: Instance = self.instance_config.configure_instance(runner) + instance: Instance = self.configure_instance(runner) + ext_operation: ExtendedOperation = self.client.insert( project=self.config.project_id, zone=self.config.zone, @@ -180,12 +229,6 @@ def list(self) -> List[Runner]: raise e return runners - def _sanitize_label_value(self, value: str) -> str: - value = value[:63] - value = value.lower() - value = re.sub(r"[^a-z0-9_-]", "-", value) - return value - def update(self, runner: Runner) -> Runner: try: instance: Instance = self.client.get( @@ -193,8 +236,7 @@ def update(self, runner: Runner) -> Runner: zone=self.config.zone, instance=runner.instance_id or runner.name, ) - instance.labels["status"] = self._sanitize_label_value(runner.status) - instance.labels["busy"] = self._sanitize_label_value(str(runner.busy)) + instance.labels = self.setup_labels(runner) log.info(f"Updating {runner.name} labels to {instance.labels}") self.client.update( diff --git a/runner_manager/models/backend.py b/runner_manager/models/backend.py index 586321cd..37185f03 100644 --- a/runner_manager/models/backend.py +++ b/runner_manager/models/backend.py @@ -1,17 +1,8 @@ from enum import Enum from pathlib import Path from string import Template -from typing import Annotated, Dict, List, Optional, Sequence, TypedDict - -from google.cloud.compute import ( - AdvancedMachineFeatures, - AttachedDisk, - Instance, - Items, - Metadata, - NetworkInterface, - Scheduling, -) +from typing import Dict, List, Optional, Sequence, TypedDict + from mypy_boto3_ec2.literals import InstanceTypeType, VolumeTypeType from mypy_boto3_ec2.type_defs import ( BlockDeviceMappingTypeDef, @@ -114,57 +105,12 @@ class GCPInstanceConfig(InstanceConfig): spot: bool = False network: str = "global/networks/default" enable_nested_virtualization: bool = True - labels: Optional[Dict[str, str]] = {} - image: Optional[str] = None - disks: Optional[List[Annotated[dict, AttachedDisk]]] = [] spot: bool = False - disk_size_gb: int = 20 - network_interfaces: Optional[List[Annotated[dict, NetworkInterface]]] = [] - class Config: arbitrary_types_allowed = True - @property - def scheduling(self) -> Scheduling: - """Configure scheduling.""" - if self.spot: - return Scheduling( - provisioning_model="SPOT", instance_termination_action="DELETE" - ) - else: - return Scheduling( - provisioning_model="STANDARD", instance_termination_action="DEFAULT" - ) - - def configure_metadata(self, runner: Runner) -> Metadata: - items: List[Items] = [] - env: RunnerEnv = self.runner_env(runner) - for key, value in env.dict().items(): - items.append(Items(key=key, value=value)) - # Template the startup script to install and setup the runner - # with the appropriate configuration. - startup_script = self.template_startup(runner) - items.append(Items(key="startup-script", value=startup_script)) - return Metadata(items=items) - - def configure_instance(self, runner: Runner) -> Instance: - """Configure instance.""" - metadata: Metadata = self.configure_metadata(runner) - return Instance( - name=runner.name, - disks=self.disks, - machine_type=self.machine_type, - network_interfaces=self.network_interfaces, - labels=self.labels, - metadata=metadata, - advanced_machine_features=AdvancedMachineFeatures( - enable_nested_virtualization=self.enable_nested_virtualization - ), - scheduling=self.scheduling, - ) - class AWSConfig(BackendConfig): """Configuration for AWS backend.""" diff --git a/tests/unit/backend/test_gcp.py b/tests/unit/backend/test_gcp.py index 031c9a8c..e388c40f 100644 --- a/tests/unit/backend/test_gcp.py +++ b/tests/unit/backend/test_gcp.py @@ -1,6 +1,7 @@ import os from typing import List +from google.cloud.compute import Image from pytest import fixture, mark, raises from redis_om import NotFoundError @@ -11,7 +12,7 @@ @fixture() -def gcp_group(settings) -> RunnerGroup: +def gcp_group(settings, monkeypatch) -> RunnerGroup: config = GCPConfig( project_id=os.environ.get("CLOUDSDK_CORE_PROJECT", ""), zone=os.environ.get("CLOUDSDK_COMPUTE_ZONE", ""), @@ -33,6 +34,12 @@ def gcp_group(settings) -> RunnerGroup: "label", ], ) + fake_image = Image( + self_link="my_image_link", + source_image="my_image", + ) + + monkeypatch.setattr(GCPBackend, "image", fake_image) return runner_group @@ -43,16 +50,23 @@ def gcp_runner(runner: Runner, gcp_group: RunnerGroup) -> Runner: return runner -def test_gcp_instance(runner: Runner): - gcp_instance: GCPInstanceConfig = GCPInstanceConfig() - instance = gcp_instance.configure_instance(runner) - # Assert name is defined - assert instance.name == runner.name +def test_gcp_network_interfaces(gcp_group: RunnerGroup): + interfaces = gcp_group.backend.network_interfaces + assert len(interfaces) == 1 + + +def test_gcp_group(gcp_group: RunnerGroup): + gcp_group.save() + gcp_group.delete(gcp_group.pk) + + +def test_gcp_metadata(runner: Runner, gcp_group): + metadata = gcp_group.backend.configure_metadata(runner) # Assert metadata are properly set startup: bool = False assert runner.encoded_jit_config is not None - for item in instance.metadata.items: + for item in metadata.items: if item.key == "startup-script": assert runner.name in item.value assert runner.labels[0].name in item.value @@ -62,15 +76,39 @@ def test_gcp_instance(runner: Runner): assert startup is True -def test_gcp_spot_config(runner: Runner): - gcp_instance: GCPInstanceConfig = GCPInstanceConfig(spot=True) - instance = gcp_instance.configure_instance(runner) - assert instance.scheduling.provisioning_model == "SPOT" - assert instance.scheduling.instance_termination_action == "DELETE" - gcp_instance: GCPInstanceConfig = GCPInstanceConfig(spot=False) - instance = gcp_instance.configure_instance(runner) - assert instance.scheduling.provisioning_model == "STANDARD" - assert instance.scheduling.instance_termination_action == "DEFAULT" +def test_gcp_setup_labels(runner: Runner, gcp_group: RunnerGroup): + labels = gcp_group.backend.setup_labels(runner) + assert labels["status"] == runner.status + assert labels["busy"] == str(runner.busy).lower() + + +def test_gcp_spot_config(runner: Runner, gcp_group: RunnerGroup): + gcp_group.backend.instance_config.spot = True + scheduling = gcp_group.backend.scheduling + assert scheduling.provisioning_model == "SPOT" + assert scheduling.instance_termination_action == "DELETE" + gcp_group.backend.instance_config.spot = False + scheduling = gcp_group.backend.scheduling + assert scheduling.provisioning_model == "STANDARD" + assert scheduling.instance_termination_action == "DEFAULT" + + +def test_gcp_disks(runner: Runner, gcp_group: RunnerGroup): + # patch self.image.self_link to return a fake image + + disks = gcp_group.backend.disks + assert len(disks) == 1 + assert ( + disks[0].initialize_params.disk_size_gb + == gcp_group.backend.instance_config.disk_size_gb + ) + assert disks[0].boot is True + assert disks[0].auto_delete is True + + +def test_gcp_instance(runner: Runner, gcp_group: RunnerGroup): + instance = gcp_group.backend.configure_instance(runner) + assert instance.name == runner.name @mark.skipif(