diff --git a/.vscode/settings.json b/.vscode/settings.json index 521a08b..334ee5d 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -9,5 +9,7 @@ ], "files.exclude": { "**/__pycache__": true - } + }, + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true } diff --git a/src/dsmlp/admission_controller.py b/src/dsmlp/admission_controller.py index 0096feb..8d403e6 100644 --- a/src/dsmlp/admission_controller.py +++ b/src/dsmlp/admission_controller.py @@ -16,7 +16,7 @@ def create_app(test_config=None): logging.getLogger('waitress').setLevel(logging.INFO) logging.getLogger('dsmlp').setLevel(logging.DEBUG) logger = PythonLogger(None) - validator = Validator(factory.awsed_client, logger) + validator = Validator(factory.awsed_client, factory.kube_client, logger) @app.route('/validate', methods=['POST']) def validate_request(): diff --git a/src/dsmlp/app/config.py b/src/dsmlp/app/config.py new file mode 100644 index 0000000..0f6aa24 --- /dev/null +++ b/src/dsmlp/app/config.py @@ -0,0 +1,2 @@ +GPU_LABEL = "nvidia.com/gpu" +GPU_LIMIT_ANNOTATION = 'gpu-limit' \ No newline at end of file diff --git a/src/dsmlp/app/gpu_validator.py b/src/dsmlp/app/gpu_validator.py index e69de29..b568d97 100644 --- a/src/dsmlp/app/gpu_validator.py +++ b/src/dsmlp/app/gpu_validator.py @@ -0,0 +1,37 @@ +from dataclasses import dataclass +import json +from typing import List, Optional + +from dataclasses_json import dataclass_json +from dsmlp.plugin.awsed import AwsedClient, UnsuccessfulRequest +from dsmlp.plugin.console import Console +from dsmlp.plugin.course import ConfigProvider +from dsmlp.plugin.kube import KubeClient, NotFound +import jsonify + +from dsmlp.plugin.logger import Logger +from dsmlp.app.types import * +from dsmlp.app.config import * + + +class GPUValidator(ComponentValidator): + + def __init__(self, kube: KubeClient, logger: Logger) -> None: + self.kube = kube + self.logger = logger + + def validate_pod(self, request: Request): + """ + Validate pods for namespaces with the 'k8s-sync' label + """ + + namespace = self.kube.get_namespace(request.namespace) + curr_gpus = self.kube.get_gpus_in_namespace(request.namespace) + + requested_gpus = 0 + for container in request.object.spec.containers: + if container.resources is not None and GPU_LABEL in container.resources.requests: + requested_gpus += container.resources.requests[GPU_LABEL] + + if requested_gpus + curr_gpus > namespace.gpu_quota: + raise ValidationFailure(f"GPU quota exceeded. Requested {requested_gpus} but with {curr_gpus} already in use, the quota of {namespace.gpu_quota} would be exceeded.") \ No newline at end of file diff --git a/src/dsmlp/app/types.py b/src/dsmlp/app/types.py index c04f3e2..8c65dc8 100644 --- a/src/dsmlp/app/types.py +++ b/src/dsmlp/app/types.py @@ -1,6 +1,6 @@ from dataclasses import dataclass import json -from typing import List, Optional +from typing import List, Optional, Dict from dataclasses_json import dataclass_json from dsmlp.plugin.awsed import AwsedClient, UnsuccessfulRequest @@ -19,12 +19,17 @@ class SecurityContext: runAsUser: Optional[int] = None runAsGroup: Optional[int] = None +@dataclass_json +@dataclass +class ResourceRequirements: + requests: Optional[Dict[str, int]] = None + limits: Optional[Dict[str, int]] = None @dataclass_json @dataclass class Container: securityContext: Optional[SecurityContext] = None - + resources: Optional[ResourceRequirements] = None @dataclass_json @dataclass diff --git a/src/dsmlp/app/validator.py b/src/dsmlp/app/validator.py index 13467da..904a787 100644 --- a/src/dsmlp/app/validator.py +++ b/src/dsmlp/app/validator.py @@ -12,13 +12,14 @@ from dsmlp.plugin.logger import Logger from abc import ABCMeta, abstractmethod from dsmlp.app.id_validator import IDValidator +from dsmlp.app.gpu_validator import GPUValidator from dsmlp.app.types import * class Validator: - def __init__(self, awsed: AwsedClient, logger: Logger) -> None: + def __init__(self, awsed: AwsedClient, kube: KubeClient, logger: Logger) -> None: self.awsed = awsed self.logger = logger - self.component_validators = [IDValidator(awsed, logger)] + self.component_validators = [IDValidator(awsed, logger), GPUValidator(kube, logger)] def validate_request(self, admission_review_json): self.logger.debug("request=" + json.dumps(admission_review_json, indent=2)) diff --git a/src/dsmlp/ext/kube.py b/src/dsmlp/ext/kube.py index 9077477..84998db 100644 --- a/src/dsmlp/ext/kube.py +++ b/src/dsmlp/ext/kube.py @@ -6,8 +6,7 @@ from dsmlp.plugin.kube import KubeClient, Namespace, NotFound -GPU_LABEL = "nvidia.com/gpu" -GPU_LIMIT_ANNOTATION = 'gpu-limit' +from dsmlp.app.config import * class DefaultKubeClient(KubeClient): diff --git a/tests/app/test_validator.py b/tests/app/test_validator.py index 754c40d..f54ba88 100644 --- a/tests/app/test_validator.py +++ b/tests/app/test_validator.py @@ -4,18 +4,22 @@ from dsmlp.plugin.awsed import ListTeamsResponse, TeamJson, UserResponse from dsmlp.plugin.kube import Namespace from hamcrest import assert_that, contains_inanyorder, equal_to, has_item -from tests.fakes import FakeAwsedClient, FakeLogger +from tests.fakes import FakeAwsedClient, FakeLogger, FakeKubeClient class TestValidator: def setup_method(self) -> None: self.logger = FakeLogger() self.awsed_client = FakeAwsedClient() + self.kube_client = FakeKubeClient() self.awsed_client.add_user('user10', UserResponse(uid=10)) self.awsed_client.add_teams('user10', ListTeamsResponse( teams=[TeamJson(gid=1000)] )) + + self.kube_client.add_namespace('user10', Namespace(name='user10', labels={'k8s-sync': 'true'}, gpu_quota=10)) + self.kube_client.set_existing_gpus('user10', 0) def test_log_request_details(self): self.when_validate( @@ -40,6 +44,7 @@ def test_log_request_details(self): def test_pod_security_context(self): self.awsed_client.add_user('user1', UserResponse(uid=1)) + self.kube_client.add_namespace('user1', Namespace(name='user1', labels={'k8s-sync': 'true'}, gpu_quota=10)) response = self.when_validate( { @@ -73,6 +78,7 @@ def test_pod_security_context(self): def test_security_context(self): self.awsed_client.add_user('user1', UserResponse(uid=1)) + self.kube_client.add_namespace('user1', Namespace(name='user1', labels={'k8s-sync': 'true'}, gpu_quota=10)) response = self.when_validate( { @@ -497,7 +503,7 @@ def test_log_allowed_requests(self): "INFO Allowed request username=user10 namespace=user10 uid=705ab4f5-6393-11e8-b7cc-42010a800002")) def when_validate(self, json): - validator = Validator(self.awsed_client, self.logger) + validator = Validator(self.awsed_client, self.kube_client, self.logger) response = validator.validate_request(json) return response diff --git a/tests/fakes.py b/tests/fakes.py index f6984e7..c639f33 100644 --- a/tests/fakes.py +++ b/tests/fakes.py @@ -39,15 +39,25 @@ def add_teams(self, username, teams: ListTeamsResponse): class FakeKubeClient(KubeClient): def __init__(self): self.namespaces: TypedDict[str, Namespace] = {} + self.existing_gpus: TypedDict[str, int] = {} def get_namespace(self, name: str) -> Namespace: try: return self.namespaces[name] except KeyError: raise UnsuccessfulRequest() + + def get_gpus_in_namespace(self, name: str) -> int: + try: + return self.existing_gpus[name] + except KeyError: + return 0 def add_namespace(self, name: str, namespace: Namespace): self.namespaces[name] = namespace + + def set_existing_gpus(self, name: str, gpus: int): + self.existing_gpus[name] = gpus class FakeLogger(Logger):