diff --git a/src/dsmlp/app/config.py b/src/dsmlp/app/config.py index 81f0100..c2c3251 100644 --- a/src/dsmlp/app/config.py +++ b/src/dsmlp/app/config.py @@ -1,3 +1,3 @@ GPU_LABEL = "nvidia.com/gpu" GPU_LIMIT_ANNOTATION = 'gpu-limit' -LOW_PRIORITY_CLASS = "low" \ No newline at end of file +LOW_PRIORITY_CLASS = "low" diff --git a/src/dsmlp/app/tritongpt_validator.py b/src/dsmlp/app/tritongpt_validator.py new file mode 100644 index 0000000..47f88b2 --- /dev/null +++ b/src/dsmlp/app/tritongpt_validator.py @@ -0,0 +1,33 @@ +from dataclasses import dataclass +import json +from typing import List, Optional + +from dataclasses_json import dataclass_json +from dsmlp.plugin.awsed import AwsedClient, UnsuccessfulRequest +from dsmlp.plugin.console import Console +from dsmlp.plugin.course import ConfigProvider +from dsmlp.plugin.kube import KubeClient, NotFound +import jsonify + +from dsmlp.plugin.logger import Logger +from dsmlp.app.types import * +from dsmlp.app.config import * + +# used in order to bypass awsed for tritonGPT while still maintaining UID security. +class TritonGPTValidator(ComponentValidator): + + def __init__(self, kube: KubeClient, logger: Logger) -> None: + self.kube = kube + self.logger = logger + + def validate_pod(self, request: Request): + + namespace = self.kube.get_namespace(request.namespace) + + permitted_uids = self.kube.get_tgpt_uids(namespace) + requested_uid = request.object.spec.securityContext.runAsUser + + # if request.uid is not in kube.get_tgpt_uids + # return validationfailure + if str(requested_uid) not in permitted_uids: + raise ValidationFailure(f"TritonGPT Validator: user with access to UIDs {permitted_uids} attempted to run a pod as {requested_uid}. Pod denied.") diff --git a/src/dsmlp/app/validator.py b/src/dsmlp/app/validator.py index e58645b..99b1ade 100644 --- a/src/dsmlp/app/validator.py +++ b/src/dsmlp/app/validator.py @@ -13,12 +13,15 @@ from abc import ABCMeta, abstractmethod from dsmlp.app.id_validator import IDValidator from dsmlp.app.gpu_validator import GPUValidator +from dsmlp.app.tritongpt_validator import TritonGPTValidator from dsmlp.app.types import * +from dsmlp.app.config import * class Validator: def __init__(self, awsed: AwsedClient, kube: KubeClient, logger: Logger) -> None: self.awsed = awsed self.logger = logger + self.kube = kube self.component_validators = [IDValidator(awsed, logger), GPUValidator(awsed, kube, logger)] def validate_request(self, admission_review_json): @@ -51,6 +54,24 @@ def handle_request(self, request: Request): return self.admission_response(request.uid, True, "Allowed") def validate_pod(self, request: Request): + + ### if tgpt-validator == enabled + ### run special tritongpt validator that gets permitted UIDs from namespace instead of sicad + + try: + namespace = self.kube.get_namespace(request.namespace) + tgpt_label = self.kube.get_tgpt_label(namespace) + + except Exception as err: + self.logger.info("Failed to evaluate TGPT label logic. Falling back on regular validator components. Error: " + str(err)) + for component_validator in self.component_validators: + component_validator.validate_pod(request) + + if(tgpt_label == "enabled"): + self.logger.info("Triton GPT Mode Activated. Only running TritonGPT Validator.") + TritonGPTValidator(self.kube, self.logger).validate_pod(request) + return + for component_validator in self.component_validators: component_validator.validate_pod(request) @@ -65,4 +86,4 @@ def admission_response(self, uid, allowed, message): "message": message } } - } \ No newline at end of file + } diff --git a/src/dsmlp/ext/kube.py b/src/dsmlp/ext/kube.py index 46e6c03..55b918f 100644 --- a/src/dsmlp/ext/kube.py +++ b/src/dsmlp/ext/kube.py @@ -50,6 +50,15 @@ def get_gpus_in_namespace(self, name: str) -> int: return gpu_count + def get_tgpt_label(self, namespace) -> str: + return namespace.labels.get("tgt-validator","") + + # TODO: make arbitrary function of getting namespace labels. + def get_tgpt_uids(self, namespace) -> str: + + # should be comma delimited, i.e. 2000,100,2,20 + return namespace.labels.get("permitted-uids", "").split(',') + # noinspection PyMethodMayBeStatic def get_policy_api(self) -> CoreV1Api: diff --git a/tests/app/test_tgpt_validator.py b/tests/app/test_tgpt_validator.py new file mode 100644 index 0000000..1e47ff5 --- /dev/null +++ b/tests/app/test_tgpt_validator.py @@ -0,0 +1,165 @@ +import inspect +from operator import contains +from dsmlp.app.validator import Validator +from dsmlp.plugin.awsed import ListTeamsResponse, TeamJson, UserResponse +from dsmlp.plugin.kube import Namespace +from hamcrest import assert_that, contains_inanyorder, equal_to, has_item +from tests.fakes import FakeAwsedClient, FakeLogger, FakeKubeClient + + +class TestTGPTValidator: + def setup_method(self) -> None: + self.logger = FakeLogger() + self.awsed_client = FakeAwsedClient() + self.kube_client = FakeKubeClient() + + self.awsed_client.add_user( + 'user10', UserResponse(uid=30, enrollments=[])) + self.awsed_client.add_teams('user10', ListTeamsResponse( + teams=[TeamJson(gid=1000)] + )) + + self.kube_client.add_namespace('user10', Namespace( + name='user10', labels={'k8s-sync': 'true', 'tgpt-validator': 'enabled', 'permitted-uids': '30,3000'}, gpu_quota=10)) + + self.awsed_client.add_user( + 'user100', UserResponse(uid=10, enrollments=[])) + self.awsed_client.add_teams('user10', ListTeamsResponse( + teams=[TeamJson(gid=1000)] + )) + + self.kube_client.add_namespace('user100', Namespace( + name='user100', labels={'k8s-sync': 'true', 'tgpt-validator': 'disabled', 'permitted-uids': '10'}, gpu_quota=10)) + + def test_good_request(self): + self.when_validate( + { + "request": { + "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", + "namespace": "user10", + "userInfo": { + "username": "system:kube-system" + }, + "object": { + "metadata": { + "labels": {} + }, + "spec": { + "containers": [{}], + "securityContext": {"runAsUser": 30}, + }, + } + } + } + ) + + assert_that(self.logger.messages, has_item( + f"INFO Allowed request username=system:kube-system namespace=user10 uid=705ab4f5-6393-11e8-b7cc-42010a800002")) + + def test_good_request_2(self): + self.when_validate( + { + "request": { + "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", + "namespace": "user10", + "userInfo": { + "username": "system:kube-system" + }, + "object": { + "metadata": { + "labels": {} + }, + "spec": { + "containers": [{}], + "securityContext": {"runAsUser": 3000}, + }, + } + } + } + ) + + assert_that(self.logger.messages, has_item( + f"INFO Allowed request username=system:kube-system namespace=user10 uid=705ab4f5-6393-11e8-b7cc-42010a800002")) + + def test_bad_request(self): + self.when_validate( + { + "request": { + "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", + "namespace": "user10", + "userInfo": { + "username": "system:kube-system" + }, + "object": { + "metadata": { + "labels": {} + }, + "spec": { + "containers": [{}], + "securityContext": {"runAsUser": 300}, + }, + } + } + } + ) + + assert_that(self.logger.messages, has_item( + f"INFO Denied request username=system:kube-system namespace=user10 reason=TritonGPT Validator: user with access to UIDs ['30', '3000'] attempted to run a pod as 300. Pod denied. uid=705ab4f5-6393-11e8-b7cc-42010a800002")) + + def test_good_request_not_enabled_permitted_on(self): + self.when_validate( + { + "request": { + "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", + "namespace": "user100", + "userInfo": { + "username": "system:kube-system" + }, + "object": { + "metadata": { + "labels": {} + }, + "spec": { + "containers": [{}], + "securityContext": {"runAsUser": 10}, + }, + } + } + } + ) + + assert_that(self.logger.messages, has_item( + f"INFO Allowed request username=system:kube-system namespace=user100 uid=705ab4f5-6393-11e8-b7cc-42010a800002")) + + #assert_that(self.logger.messages, has_item( + #"INFO Allowed request username=user10 namespace=user10 uid=705ab4f5-6393-11e8-b7cc-42010a800002")) + + # def test_gpu_quota_request(self): + # self.awsed_client.add_user_gpu_quota('user10', 10) + # self.awsed_client.get_user_gpu_quota('user10') + + # response = self.when_validate( + # { + # "request": { + # "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", + # "namespace": "user10", + # "userInfo": { + # "username": "user10" + # }, + # "object": { + # "metadata": { + # "labels": {} + # }, + # "spec": { + # "containers": [{}] + # } + # } + # } + # } + # ) + + def when_validate(self, json): + validator = Validator(self.awsed_client, self.kube_client, self.logger) + response = validator.validate_request(json) + + return response diff --git a/tests/fakes.py b/tests/fakes.py index b2f2114..3751e6f 100644 --- a/tests/fakes.py +++ b/tests/fakes.py @@ -77,6 +77,18 @@ def add_namespace(self, name: str, namespace: Namespace): def set_existing_gpus(self, name: str, gpus: int): self.existing_gpus[name] = gpus + def get_tgpt_label(self, namespace) -> str: + try: + return namespace.labels.get("tgpt-validator", "") + except KeyError: + raise UnsuccessfulRequest() + + def get_tgpt_uids(self, namespace) -> str: + try: + return namespace.labels.get("permitted-uids").split(',') + except KeyError: + raise UnsuccessfulRequest() + class FakeLogger(Logger): def __init__(self) -> None: