From 83d2024b7663b4a74c162f2aeacb36039995a228 Mon Sep 17 00:00:00 2001 From: heshou198 <129102271+heshou198@users.noreply.github.com> Date: Tue, 12 Mar 2024 16:40:14 -0700 Subject: [PATCH] Gpu quota (#4) * Refactored out ID validating section * Compliant to existing tests now * GPU Validator tests done * Fixed case where metadata does not have annotations * int typecast fix * Fixed key error with collecting existing gpus * Fixed issue with getting container gpus * Handles limits as well now * Unit tests for k8s client * Merge done * Test rework * Metadata optional, id_validator reformat * Permit no-gpu pods if gpu usage is overcommitted --------- Co-authored-by: D0rkKnight --- .vscode/settings.json | 4 +- ref.json | 621 ++++++++++++++++++++++++++++ ref.txt | 0 requirements.txt | 1 + src/dsmlp/admission_controller.py | 2 +- src/dsmlp/app/config.py | 3 + src/dsmlp/app/gpu_validator.py | 56 +++ src/dsmlp/app/id_validator.py | 145 +++++++ src/dsmlp/app/types.py | 85 ++++ src/dsmlp/app/validator.py | 180 +-------- src/dsmlp/ext/kube.py | 33 +- src/dsmlp/plugin/kube.py | 7 +- tests/app/test_gpu_validator.py | 78 ++++ tests/app/test_id_validator.py | 143 +++++++ tests/app/test_logs.py | 101 +++++ tests/app/test_validator.py | 624 ----------------------------- tests/app/utils.py | 81 ++++ tests/ext/test_kube_client.py | 91 +++++ tests/fakes.py | 10 + tests/ref.json | 94 +++++ tests/test_admission_controller.py | 107 ----- 21 files changed, 1560 insertions(+), 906 deletions(-) create mode 100644 ref.json create mode 100644 ref.txt create mode 100644 src/dsmlp/app/config.py create mode 100644 src/dsmlp/app/gpu_validator.py create mode 100644 src/dsmlp/app/id_validator.py create mode 100644 src/dsmlp/app/types.py create mode 100644 tests/app/test_gpu_validator.py create mode 100644 tests/app/test_id_validator.py create mode 100644 tests/app/test_logs.py delete mode 100644 tests/app/test_validator.py create mode 100644 tests/app/utils.py create mode 100644 tests/ext/test_kube_client.py create mode 100644 tests/ref.json delete mode 100644 tests/test_admission_controller.py diff --git a/.vscode/settings.json b/.vscode/settings.json index 521a08b..334ee5d 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -9,5 +9,7 @@ ], "files.exclude": { "**/__pycache__": true - } + }, + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true } diff --git a/ref.json b/ref.json new file mode 100644 index 0000000..00c71c5 --- /dev/null +++ b/ref.json @@ -0,0 +1,621 @@ +{ + "kind": "AdmissionReview", + "apiVersion": "admission.k8s.io/v1", + "request": { + "uid": "ae0f37ca-7e96-4d80-9467-691bf5c7ef8c", + "kind": { + "group": "", + "version": "v1", + "kind": "Pod" + }, + "resource": { + "group": "", + "version": "v1", + "resource": "pods" + }, + "requestKind": { + "group": "", + "version": "v1", + "kind": "Pod" + }, + "requestResource": { + "group": "", + "version": "v1", + "resource": "pods" + }, + "name": "ghan-6495", + "namespace": "ghan", + "operation": "CREATE", + "userInfo": { + "username": "ghan", + "uid": "42", + "groups": [ + "system:authenticated" + ] + }, + "object": { + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "name": "ghan-6495", + "namespace": "ghan", + "uid": "3121b7b1-6212-4f34-9bca-2665cc180fe8", + "creationTimestamp": "2024-01-17T02:05:35Z", + "labels": { + "dsmlp/course": "CSE160_WI24_A00", + "dsmlp/user": "ghan", + "svcreg": "true" + }, + "managedFields": [ + { + "manager": "kubectl-create", + "operation": "Update", + "apiVersion": "v1", + "time": "2024-01-17T02:05:33Z", + "fieldsType": "FieldsV1", + "fieldsV1": { + "f:metadata": { + "f:labels": { + ".": {}, + "f:dsmlp/course": {}, + "f:dsmlp/user": {}, + "f:svcreg": {} + } + }, + "f:spec": { + "f:activeDeadlineSeconds": {}, + "f:affinity": { + ".": {}, + "f:nodeAffinity": { + ".": {}, + "f:preferredDuringSchedulingIgnoredDuringExecution": {} + } + }, + "f:containers": { + "k:{\"name\":\"c1\"}": { + ".": {}, + "f:command": {}, + "f:env": { + ".": {}, + "k:{\"name\":\"DOCKER_IMAGE\"}": { + ".": {}, + "f:name": {}, + "f:value": {} + }, + "k:{\"name\":\"HOME\"}": { + ".": {}, + "f:name": {}, + "f:value": {} + }, + "k:{\"name\":\"KUBERNETES_LIMIT_CPU\"}": { + ".": {}, + "f:name": {}, + "f:valueFrom": { + ".": {}, + "f:resourceFieldRef": {} + } + }, + "k:{\"name\":\"KUBERNETES_LIMIT_MEM\"}": { + ".": {}, + "f:name": {}, + "f:valueFrom": { + ".": {}, + "f:resourceFieldRef": {} + } + }, + "k:{\"name\":\"KUBERNETES_NODE_NAME\"}": { + ".": {}, + "f:name": {}, + "f:valueFrom": { + ".": {}, + "f:fieldRef": {} + } + }, + "k:{\"name\":\"LD_LIBRARY_PATH\"}": { + ".": {}, + "f:name": {}, + "f:value": {} + }, + "k:{\"name\":\"LOGNAME\"}": { + ".": {}, + "f:name": {}, + "f:value": {} + }, + "k:{\"name\":\"MEM_LIMIT\"}": { + ".": {}, + "f:name": {}, + "f:valueFrom": { + ".": {}, + "f:resourceFieldRef": {} + } + }, + "k:{\"name\":\"SHELL\"}": { + ".": {}, + "f:name": {}, + "f:value": {} + }, + "k:{\"name\":\"TERM\"}": { + ".": {}, + "f:name": {}, + "f:value": {} + }, + "k:{\"name\":\"TZ\"}": { + ".": {}, + "f:name": {}, + "f:value": {} + }, + "k:{\"name\":\"USER\"}": { + ".": {}, + "f:name": {}, + "f:value": {} + }, + "k:{\"name\":\"XDG_CACHE_HOME\"}": { + ".": {}, + "f:name": {}, + "f:value": {} + } + }, + "f:image": {}, + "f:imagePullPolicy": {}, + "f:name": {}, + "f:resources": { + ".": {}, + "f:limits": { + ".": {}, + "f:cpu": {}, + "f:memory": {}, + "f:nvidia.com/gpu": {} + }, + "f:requests": { + ".": {}, + "f:cpu": {}, + "f:memory": {}, + "f:nvidia.com/gpu": {} + } + }, + "f:terminationMessagePath": {}, + "f:terminationMessagePolicy": {}, + "f:volumeMounts": { + ".": {}, + "k:{\"mountPath\":\"/public\"}": { + ".": {}, + "f:mountPath": {}, + "f:name": {}, + "f:subPath": {} + }, + "k:{\"mountPath\":\"/scratch\"}": { + ".": {}, + "f:mountPath": {}, + "f:name": {} + }, + "k:{\"mountPath\":\"/teams\"}": { + ".": {}, + "f:mountPath": {}, + "f:name": {}, + "f:subPath": {} + } + }, + "f:workingDir": {} + } + }, + "f:dnsPolicy": {}, + "f:enableServiceLinks": {}, + "f:priorityClassName": {}, + "f:restartPolicy": {}, + "f:schedulerName": {}, + "f:securityContext": { + ".": {}, + "f:runAsUser": {} + }, + "f:terminationGracePeriodSeconds": {}, + "f:volumes": { + ".": {}, + "k:{\"name\":\"scratch\"}": { + ".": {}, + "f:emptyDir": {}, + "f:name": {} + } + } + } + } + } + ] + }, + "spec": { + "volumes": [ + { + "name": "scratch", + "emptyDir": {} + }, + { + "name": "kube-api-access-xm22l", + "projected": { + "sources": [ + { + "serviceAccountToken": { + "expirationSeconds": 3607, + "path": "token" + } + }, + { + "configMap": { + "name": "kube-root-ca.crt", + "items": [ + { + "key": "ca.crt", + "path": "ca.crt" + } + ] + } + }, + { + "downwardAPI": { + "items": [ + { + "path": "namespace", + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "metadata.namespace" + } + } + ] + } + } + ], + "defaultMode": 420 + } + }, + { + "name": "patches", + "nfs": { + "server": "its-dsmlp-fs01.ucsd.edu", + "path": "/export/apps/patches" + } + }, + { + "name": "support", + "emptyDir": {} + }, + { + "name": "dsmlp-datasets", + "nfs": { + "server": "its-dsmlp-fs01.ucsd.edu", + "path": "/export/datasets" + } + }, + { + "name": "dshm", + "emptyDir": { + "medium": "Memory" + } + }, + { + "name": "nbmessages", + "nfs": { + "server": "its-dsmlp-fs01.ucsd.edu", + "path": "/export/nbmessages" + } + }, + { + "name": "dsmlp-datasets-2", + "nfs": { + "server": "its-dsmlp-fs02.ucsd.edu", + "path": "/export/datasets" + } + }, + { + "name": "course-workspace", + "nfs": { + "server": "its-dsmlp-fs04.ucsd.edu", + "path": "/export/workspaces/CSE160_WI24_A00" + } + }, + { + "name": "home", + "persistentVolumeClaim": { + "claimName": "home" + } + } + ], + "initContainers": [ + { + "name": "init-support", + "image": "ucsdets/k8s-support:stable", + "command": [ + "/bin/sh", + "-c" + ], + "args": [ + "cp -r /support/* /opt/k8s-support ; ls -al /opt/k8s-support" + ], + "resources": { + "limits": { + "cpu": "500m", + "memory": "256M" + }, + "requests": { + "cpu": "500m", + "memory": "256M" + } + }, + "volumeMounts": [ + { + "name": "support", + "mountPath": "/opt/k8s-support" + }, + { + "name": "kube-api-access-xm22l", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "containers": [ + { + "name": "c1", + "image": "ucsdets/nvcr-cuda:latest", + "command": [ + "/opt/k8s-support/bin/tini", + "--", + "/opt/k8s-support/bin/initenv-createhomedir.sh", + "/opt/k8s-support/bin/pause" + ], + "workingDir": "/home/ghan", + "env": [ + { + "name": "KUBERNETES_NODE_NAME", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "spec.nodeName" + } + } + }, + { + "name": "KUBERNETES_LIMIT_CPU", + "valueFrom": { + "resourceFieldRef": { + "containerName": "c1", + "resource": "limits.cpu", + "divisor": "0" + } + } + }, + { + "name": "KUBERNETES_LIMIT_MEM", + "valueFrom": { + "resourceFieldRef": { + "containerName": "c1", + "resource": "limits.memory", + "divisor": "0" + } + } + }, + { + "name": "MEM_LIMIT", + "valueFrom": { + "resourceFieldRef": { + "containerName": "c1", + "resource": "limits.memory", + "divisor": "0" + } + } + }, + { + "name": "USER", + "value": "ghan" + }, + { + "name": "LOGNAME", + "value": "ghan" + }, + { + "name": "HOME", + "value": "/home/ghan" + }, + { + "name": "XDG_CACHE_HOME", + "value": "/tmp/xdg-cache" + }, + { + "name": "TERM", + "value": "xterm" + }, + { + "name": "TZ", + "value": "PST8PDT" + }, + { + "name": "SHELL", + "value": "/bin/bash" + }, + { + "name": "DOCKER_IMAGE", + "value": "ucsdets/nvcr-cuda:latest" + }, + { + "name": "LD_LIBRARY_PATH", + "value": "/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/opt/conda/pkgs/cudatoolkit-11.2.2-he111cf0_8/lib:/opt/conda/pkgs/cudnn-8.2.1.32-h86fa8c9_0/lib" + }, + { + "name": "HOME", + "value": "/home/ghan" + }, + { + "name": "NBGRADER_COURSEID", + "value": "CSE160_WI24_A00" + }, + { + "name": "LOGNAME", + "value": "ghan" + }, + { + "name": "NB_UMASK", + "value": "0007" + } + ], + "resources": { + "limits": { + "cpu": "1", + "memory": "1Gi", + "nvidia.com/gpu": "1" + }, + "requests": { + "cpu": "0", + "memory": "0", + "nvidia.com/gpu": "1" + } + }, + "volumeMounts": [ + { + "name": "scratch", + "mountPath": "/scratch" + }, + { + "name": "course-workspace", + "mountPath": "/public", + "subPath": "public" + }, + { + "name": "course-workspace", + "mountPath": "/teams", + "subPath": "teams" + }, + { + "name": "kube-api-access-xm22l", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + }, + { + "name": "patches", + "mountPath": "/usr/bin/git", + "subPath": "git_2.25.1-1ubuntu3.10_amd64.focal/unpack/usr/bin/git" + }, + { + "name": "patches", + "mountPath": "/usr/bin/git-shell", + "subPath": "git_2.25.1-1ubuntu3.10_amd64.focal/unpack/usr/bin/git-shell" + }, + { + "name": "patches", + "mountPath": "/usr/bin/git-core", + "subPath": "git_2.25.1-1ubuntu3.10_amd64.focal/unpack/usr/bin/git-core" + }, + { + "name": "course-workspace", + "mountPath": "/home/ghan", + "subPath": "home/ghan" + }, + { + "name": "course-workspace", + "mountPath": "/home/ghan/public", + "subPath": "public" + }, + { + "name": "home", + "mountPath": "/home/ghan/private" + }, + { + "name": "course-workspace", + "mountPath": "/srv/nbgrader/CSE160_WI24_A00/exchange", + "subPath": "apps/nbgrader/exchange" + }, + { + "name": "course-workspace", + "mountPath": "/usr/local/etc/jupyter/nbgrader_config.py", + "subPath": "apps/nbgrader/config/nbgrader_config.py" + }, + { + "name": "support", + "mountPath": "/opt/k8s-support" + }, + { + "name": "dsmlp-datasets", + "mountPath": "/datasets" + }, + { + "name": "dshm", + "mountPath": "/dev/shm" + }, + { + "name": "nbmessages", + "mountPath": "/srv/nbmessages" + }, + { + "name": "dsmlp-datasets-2", + "mountPath": "/datasets-2" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Never", + "terminationGracePeriodSeconds": 600, + "activeDeadlineSeconds": 21600, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "securityContext": { + "runAsUser": 91481 + }, + "affinity": { + "nodeAffinity": { + "preferredDuringSchedulingIgnoredDuringExecution": [ + { + "weight": 10, + "preference": { + "matchExpressions": [ + { + "key": "node-type", + "operator": "NotIn", + "values": [ + "research" + ] + } + ] + } + } + ] + } + }, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ], + "priorityClassName": "normal", + "priority": 10, + "enableServiceLinks": true, + "preemptionPolicy": "PreemptLowerPriority" + }, + "status": { + "phase": "Pending", + "qosClass": "Burstable" + } + }, + "oldObject": null, + "dryRun": false, + "options": { + "kind": "CreateOptions", + "apiVersion": "meta.k8s.io/v1", + "fieldManager": "kubectl-create", + "fieldValidation": "Strict" + } + } +} \ No newline at end of file diff --git a/ref.txt b/ref.txt new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt index 313b8b2..d988e02 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ PyHamcrest requests_mock dataclasses-json python-dotenv +pytest git+https://github.com/ucsd-ets/awsed_python_client.git@2024.1.2-RC1 diff --git a/src/dsmlp/admission_controller.py b/src/dsmlp/admission_controller.py index 0096feb..8d403e6 100644 --- a/src/dsmlp/admission_controller.py +++ b/src/dsmlp/admission_controller.py @@ -16,7 +16,7 @@ def create_app(test_config=None): logging.getLogger('waitress').setLevel(logging.INFO) logging.getLogger('dsmlp').setLevel(logging.DEBUG) logger = PythonLogger(None) - validator = Validator(factory.awsed_client, logger) + validator = Validator(factory.awsed_client, factory.kube_client, logger) @app.route('/validate', methods=['POST']) def validate_request(): diff --git a/src/dsmlp/app/config.py b/src/dsmlp/app/config.py new file mode 100644 index 0000000..81f0100 --- /dev/null +++ b/src/dsmlp/app/config.py @@ -0,0 +1,3 @@ +GPU_LABEL = "nvidia.com/gpu" +GPU_LIMIT_ANNOTATION = 'gpu-limit' +LOW_PRIORITY_CLASS = "low" \ No newline at end of file diff --git a/src/dsmlp/app/gpu_validator.py b/src/dsmlp/app/gpu_validator.py new file mode 100644 index 0000000..2c1e5b1 --- /dev/null +++ b/src/dsmlp/app/gpu_validator.py @@ -0,0 +1,56 @@ +from dataclasses import dataclass +import json +from typing import List, Optional + +from dataclasses_json import dataclass_json +from dsmlp.plugin.awsed import AwsedClient, UnsuccessfulRequest +from dsmlp.plugin.console import Console +from dsmlp.plugin.course import ConfigProvider +from dsmlp.plugin.kube import KubeClient, NotFound +import jsonify + +from dsmlp.plugin.logger import Logger +from dsmlp.app.types import * +from dsmlp.app.config import * + + +class GPUValidator(ComponentValidator): + + def __init__(self, kube: KubeClient, logger: Logger) -> None: + self.kube = kube + self.logger = logger + + def validate_pod(self, request: Request): + """ + Validate pods for namespaces with the 'k8s-sync' label + """ + + # Low priority pods pass through + priority = request.object.spec.priorityClassName + if priority is not None and priority == LOW_PRIORITY_CLASS: + return + + namespace = self.kube.get_namespace(request.namespace) + curr_gpus = self.kube.get_gpus_in_namespace(request.namespace) + + utilized_gpus = 0 + for container in request.object.spec.containers: + requested, limit = 0, 0 + try: + requested = int(container.resources.requests[GPU_LABEL]) + except (KeyError, AttributeError, TypeError): + pass + try: + limit = int(container.resources.limits[GPU_LABEL]) + except (KeyError, AttributeError, TypeError): + pass + + utilized_gpus += max(requested, limit) + + # Short circuit if no GPUs requested (permits overcap) + if utilized_gpus == 0: + return + + if utilized_gpus + curr_gpus > namespace.gpu_quota: + raise ValidationFailure( + f"GPU quota exceeded. Wanted {utilized_gpus} but with {curr_gpus} already in use, the quota of {namespace.gpu_quota} would be exceeded.") diff --git a/src/dsmlp/app/id_validator.py b/src/dsmlp/app/id_validator.py new file mode 100644 index 0000000..99888aa --- /dev/null +++ b/src/dsmlp/app/id_validator.py @@ -0,0 +1,145 @@ +from dataclasses import dataclass +import json +from typing import List, Optional + +from dataclasses_json import dataclass_json +from dsmlp.plugin.awsed import AwsedClient, UnsuccessfulRequest +from dsmlp.plugin.console import Console +from dsmlp.plugin.course import ConfigProvider +from dsmlp.plugin.kube import KubeClient, NotFound +import jsonify + +from dsmlp.plugin.logger import Logger +from dsmlp.app.types import * + + +class IDValidator(ComponentValidator): + + def __init__(self, awsed: AwsedClient, logger: Logger) -> None: + self.awsed = awsed + self.logger = logger + + def validate_pod(self, request: Request): + """ + Validate pods for namespaces with the 'k8s-sync' label + """ + username = request.namespace +# namespace = self.kube.get_namespace(request.namespace) + +# if 'k8s-sync' in namespace.labels: + user = self.awsed.describe_user(username) + if not user: + raise ValidationFailure( + f"namespace: no AWSEd user found with username {username}") + allowed_uid = user.uid + allowed_courses = user.enrollments + + team_response = self.awsed.list_user_teams(username) + allowed_gids = [team.gid for team in team_response.teams] + allowed_gids.append(0) + allowed_gids.append(100) + + metadata = request.object.metadata + spec = request.object.spec + + if metadata is not None and metadata.labels is not None: + self.validate_course_enrollment(allowed_courses, metadata.labels) + + self.validate_pod_security_context( + allowed_uid, allowed_gids, spec.securityContext) + self.validate_containers(allowed_uid, allowed_gids, spec) + + def validate_course_enrollment(self, allowed_courses: List[str], labels: Dict[str, str]): + if not 'dsmlp/course' in labels: + return + if not labels['dsmlp/course'] in allowed_courses: + raise ValidationFailure( + f"metadata.labels: dsmlp/course must be in range {allowed_courses}") + + def validate_pod_security_context( + self, + authorized_uid: int, + allowed_teams: List[int], + securityContext: PodSecurityContext): + + if securityContext is None: + return + + if securityContext.runAsUser is not None and authorized_uid != securityContext.runAsUser: + raise ValidationFailure( + f"spec.securityContext: uid must be in range [{authorized_uid}]") + + if securityContext.runAsGroup is not None and securityContext.runAsGroup not in allowed_teams: + raise ValidationFailure( + f"spec.securityContext: gid must be in range {allowed_teams}") + + if securityContext.fsGroup is not None and securityContext.fsGroup not in allowed_teams: + raise ValidationFailure( + f"spec.securityContext: gid must be in range {allowed_teams}") + + if securityContext.supplementalGroups is not None: + for sgroup in securityContext.supplementalGroups: + if not sgroup in allowed_teams: + raise ValidationFailure( + f"spec.securityContext: gid must be in range {allowed_teams}") + + def validate_containers( + self, + authorized_uid: int, + allowed_teams: List[int], + spec: PodSpec + ): + """ + Validate the security context of containers and initContainers + """ + self.validate_security_contexts( + authorized_uid, allowed_teams, spec.containers, "containers") + self.validate_security_contexts( + authorized_uid, allowed_teams, spec.initContainers, "initContainers") + + def validate_security_contexts( + self, authorized_uid: int, allowed_teams: List[int], + containers: List[Container], + context: str): + """ + Validate the security context of a container. + """ + + if containers is None: + return + + for i, container in enumerate(containers): + securityContext = container.securityContext + if securityContext is None: + continue + + self.validate_security_context( + authorized_uid, allowed_teams, securityContext, f"{context}[{i}]") + + def validate_security_context( + self, + authorized_uid: int, + allowed_teams: List[int], + securityContext: SecurityContext, + context: str): + + if securityContext.runAsUser is not None and authorized_uid != securityContext.runAsUser: + raise ValidationFailure( + f"spec.{context}.securityContext: uid must be in range [{authorized_uid}]") + + if securityContext.runAsGroup is not None and securityContext.runAsGroup not in allowed_teams: + raise ValidationFailure( + f"spec.{context}.securityContext: gid must be in range {allowed_teams}") + + def admission_response(self, uid, allowed, message): + return { + "apiVersion": "admission.k8s.io/v1", + "kind": "AdmissionReview", + "response": { + "uid": uid, + "allowed": allowed, + "status": { + "message": message + } + } + } diff --git a/src/dsmlp/app/types.py b/src/dsmlp/app/types.py new file mode 100644 index 0000000..c238850 --- /dev/null +++ b/src/dsmlp/app/types.py @@ -0,0 +1,85 @@ + +from dataclasses import dataclass +from typing import List, Optional, Dict +from dataclasses_json import dataclass_json +from abc import ABCMeta, abstractmethod + +@dataclass_json +@dataclass +class SecurityContext: + """Each Container has a SecurityContext""" + runAsUser: Optional[int] = None + runAsGroup: Optional[int] = None + +@dataclass_json +@dataclass +class ResourceRequirements: + requests: Optional[Dict[str, int]] = None + limits: Optional[Dict[str, int]] = None + +@dataclass_json +@dataclass +class Container: + securityContext: Optional[SecurityContext] = None + resources: Optional[ResourceRequirements] = None + +@dataclass_json +@dataclass +class PodSecurityContext: + """Each Pod has a SecurityContext""" + runAsUser: Optional[int] = None + runAsGroup: Optional[int] = None + fsGroup: Optional[int] = None + supplementalGroups: Optional[List[int]] = None + + +@dataclass_json +@dataclass +class PodSpec: + containers: List[Container] + initContainers: Optional[List[Container]] = None + securityContext: Optional[PodSecurityContext] = None + priorityClassName: Optional[str] = None + +@dataclass_json +@dataclass +class ObjectMeta: + labels: Dict[str, str] + + +@dataclass_json +@dataclass +class Object: + metadata: ObjectMeta + spec: PodSpec + + +@dataclass_json +@dataclass +class UserInfo: + username: str + + +@dataclass_json +@dataclass +class Request: + uid: str + namespace: str + object: Object + userInfo: UserInfo + + +@dataclass_json +@dataclass +class AdmissionReview: + request: Request + +class ValidationFailure(Exception): + def __init__(self, message: str) -> None: + self.message = message + super().__init__(self.message) + +class ComponentValidator: + @abstractmethod + def validate_pod(self, request: Request): + pass \ No newline at end of file diff --git a/src/dsmlp/app/validator.py b/src/dsmlp/app/validator.py index d3869f0..be1420c 100644 --- a/src/dsmlp/app/validator.py +++ b/src/dsmlp/app/validator.py @@ -10,83 +10,16 @@ import jsonify from dsmlp.plugin.logger import Logger - - -@dataclass_json -@dataclass -class SecurityContext: - """Each Container has a SecurityContext""" - runAsUser: Optional[int] = None - runAsGroup: Optional[int] = None - - -@dataclass_json -@dataclass -class Container: - securityContext: Optional[SecurityContext] = None - - -@dataclass_json -@dataclass -class PodSecurityContext: - """Each Pod has a SecurityContext""" - runAsUser: Optional[int] = None - runAsGroup: Optional[int] = None - fsGroup: Optional[int] = None - supplementalGroups: Optional[List[int]] = None - - -@dataclass_json -@dataclass -class PodSpec: - containers: List[Container] - initContainers: Optional[List[Container]] = None - securityContext: Optional[PodSecurityContext] = None - -@dataclass_json -@dataclass -class ObjectMeta: - labels: Dict[str, str] - - -@dataclass_json -@dataclass -class Object: - metadata: ObjectMeta - spec: PodSpec - - -@dataclass_json -@dataclass -class UserInfo: - username: str - - -@dataclass_json -@dataclass -class Request: - uid: str - namespace: str - object: Object - userInfo: UserInfo - - -@dataclass_json -@dataclass -class AdmissionReview: - request: Request - - -class ValidationFailure(Exception): - def __init__(self, message: str) -> None: - self.message = message - super().__init__(self.message) - +from abc import ABCMeta, abstractmethod +from dsmlp.app.id_validator import IDValidator +from dsmlp.app.gpu_validator import GPUValidator +from dsmlp.app.types import * class Validator: - def __init__(self, awsed: AwsedClient, logger: Logger) -> None: + def __init__(self, awsed: AwsedClient, kube: KubeClient, logger: Logger) -> None: self.awsed = awsed self.logger = logger + self.component_validators = [IDValidator(awsed, logger), GPUValidator(kube, logger)] def validate_request(self, admission_review_json): self.logger.debug("request=" + json.dumps(admission_review_json, indent=2)) @@ -118,103 +51,8 @@ def handle_request(self, request: Request): return self.admission_response(request.uid, True, "Allowed") def validate_pod(self, request: Request): - """ - Validate pods for namespaces with the 'k8s-sync' label - """ - username = request.namespace -# namespace = self.kube.get_namespace(request.namespace) - -# if 'k8s-sync' in namespace.labels: - user = self.awsed.describe_user(username) - if not user: - raise ValidationFailure(f"namespace: no AWSEd user found with username {username}") - allowed_uid = user.uid - allowed_courses = user.enrollments - - team_response = self.awsed.list_user_teams(username) - allowed_gids = [team.gid for team in team_response.teams] - allowed_gids.append(0) - allowed_gids.append(100) - - metadata = request.object.metadata - spec = request.object.spec - self.validate_course_enrollment(allowed_courses, metadata.labels) - self.validate_pod_security_context(allowed_uid, allowed_gids, spec.securityContext) - self.validate_containers(allowed_uid, allowed_gids, spec) - - def validate_course_enrollment(self, allowed_courses: List[str], labels: Dict[str, str]): - if not 'dsmlp/course' in labels: - return - if not labels['dsmlp/course'] in allowed_courses: - raise ValidationFailure(f"metadata.labels: dsmlp/course must be in range {allowed_courses}") - - def validate_pod_security_context( - self, - authorized_uid: int, - allowed_teams: List[int], - securityContext: PodSecurityContext): - - if securityContext is None: - return - - if securityContext.runAsUser is not None and authorized_uid != securityContext.runAsUser: - raise ValidationFailure(f"spec.securityContext: uid must be in range [{authorized_uid}]") - - if securityContext.runAsGroup is not None and securityContext.runAsGroup not in allowed_teams: - raise ValidationFailure(f"spec.securityContext: gid must be in range {allowed_teams}") - - if securityContext.fsGroup is not None and securityContext.fsGroup not in allowed_teams: - raise ValidationFailure(f"spec.securityContext: gid must be in range {allowed_teams}") - - if securityContext.supplementalGroups is not None: - for sgroup in securityContext.supplementalGroups: - if not sgroup in allowed_teams: - raise ValidationFailure(f"spec.securityContext: gid must be in range {allowed_teams}") - - def validate_containers( - self, - authorized_uid: int, - allowed_teams: List[int], - spec: PodSpec - ): - """ - Validate the security context of containers and initContainers - """ - self.validate_security_contexts(authorized_uid, allowed_teams, spec.containers, "containers") - self.validate_security_contexts(authorized_uid, allowed_teams, spec.initContainers, "initContainers") - - def validate_security_contexts( - self, authorized_uid: int, allowed_teams: List[int], - containers: List[Container], - context: str): - """ - Validate the security context of a container. - """ - - if containers is None: - return - - for i, container in enumerate(containers): - securityContext = container.securityContext - if securityContext is None: - continue - - self.validate_security_context(authorized_uid, allowed_teams, securityContext, f"{context}[{i}]") - - def validate_security_context( - self, - authorized_uid: int, - allowed_teams: List[int], - securityContext: SecurityContext, - context: str): - - if securityContext.runAsUser is not None and authorized_uid != securityContext.runAsUser: - raise ValidationFailure( - f"spec.{context}.securityContext: uid must be in range [{authorized_uid}]") - - if securityContext.runAsGroup is not None and securityContext.runAsGroup not in allowed_teams: - raise ValidationFailure( - f"spec.{context}.securityContext: gid must be in range {allowed_teams}") + for component_validator in self.component_validators: + component_validator.validate_pod(request) def admission_response(self, uid, allowed, message): return { @@ -227,4 +65,4 @@ def admission_response(self, uid, allowed, message): "message": message } } - } + } \ No newline at end of file diff --git a/src/dsmlp/ext/kube.py b/src/dsmlp/ext/kube.py index 8f832ed..46e6c03 100644 --- a/src/dsmlp/ext/kube.py +++ b/src/dsmlp/ext/kube.py @@ -6,6 +6,8 @@ from dsmlp.plugin.kube import KubeClient, Namespace, NotFound +from dsmlp.app.config import * + class DefaultKubeClient(KubeClient): """ @@ -16,11 +18,40 @@ def get_namespace(self, name: str) -> Namespace: api = self.get_policy_api() v1namespace: V1Namespace = api.read_namespace(name=name) metadata: V1ObjectMeta = v1namespace.metadata + + gpu_quota = 1 + if metadata is not None and metadata.annotations is not None and GPU_LIMIT_ANNOTATION in metadata.annotations: + gpu_quota = int(metadata.annotations[GPU_LIMIT_ANNOTATION]) + return Namespace( name=metadata.name, - labels=metadata.labels) + labels=metadata.labels, + gpu_quota=gpu_quota) + + def get_gpus_in_namespace(self, name: str) -> int: + api = self.get_policy_api() + V1Namespace: V1Namespace = api.read_namespace(name=name) + pods = api.list_namespaced_pod(namespace=name) + + gpu_count = 0 + for pod in pods.items: + for container in pod.spec.containers: + requested, limit = 0, 0 + try: + requested = int(container.resources.requests[GPU_LABEL]) + except (KeyError, AttributeError, TypeError): + pass + try: + limit = int(container.resources.limits[GPU_LABEL]) + except (KeyError, AttributeError, TypeError): + pass + + gpu_count += max(requested, limit) + + return gpu_count # noinspection PyMethodMayBeStatic + def get_policy_api(self) -> CoreV1Api: try: config.load_incluster_config() diff --git a/src/dsmlp/plugin/kube.py b/src/dsmlp/plugin/kube.py index 2963d52..5ef822d 100644 --- a/src/dsmlp/plugin/kube.py +++ b/src/dsmlp/plugin/kube.py @@ -8,11 +8,11 @@ class NotFound(Exception): pass - @dataclass class Namespace: name: str labels: typing.Optional[dict] + gpu_quota: typing.Optional[int] class KubeClient(metaclass=ABCMeta): @@ -20,3 +20,8 @@ class KubeClient(metaclass=ABCMeta): def get_namespace(self, name: str) -> Namespace: """Get a namespace""" pass + + @abstractmethod + def get_gpus_in_namespace(self) -> int: + """Get # of GPUs in Namespace""" + pass diff --git a/tests/app/test_gpu_validator.py b/tests/app/test_gpu_validator.py new file mode 100644 index 0000000..7ef087c --- /dev/null +++ b/tests/app/test_gpu_validator.py @@ -0,0 +1,78 @@ +import inspect +from operator import contains +from dsmlp.app.types import ValidationFailure +from dsmlp.app.validator import Validator +from dsmlp.plugin.awsed import ListTeamsResponse, TeamJson, UserResponse +from dsmlp.plugin.kube import Namespace +from hamcrest import assert_that, contains_inanyorder, equal_to, has_item +from tests.fakes import FakeAwsedClient, FakeLogger, FakeKubeClient +from dsmlp.ext.kube import DefaultKubeClient +from dsmlp.app.gpu_validator import GPUValidator +from tests.app.utils import gen_request, try_val_with_component + + +class TestGPUValidator: + def setup_method(self) -> None: + self.logger = FakeLogger() + self.awsed_client = FakeAwsedClient() + self.kube_client = FakeKubeClient() + + self.awsed_client.add_user( + 'user10', UserResponse(uid=10, enrollments=[])) + self.awsed_client.add_teams('user10', ListTeamsResponse( + teams=[TeamJson(gid=1000)] + )) + + self.kube_client.add_namespace('user10', Namespace( + name='user10', labels={'k8s-sync': 'true'}, gpu_quota=10)) + self.kube_client.set_existing_gpus('user10', 0) + + def test_no_gpus_requested(self): + self.try_validate( + gen_request(), expected=True, message="Allowed" + ) + + def test_quota_not_reached(self): + + self.try_validate( + gen_request(gpu_req=10), expected=True, message="Allowed" + ) + + def test_quota_exceeded(self): + + self.try_validate( + gen_request(gpu_req=11), expected=False, message="GPU quota exceeded. Wanted 11 but with 0 already in use, the quota of 10 would be exceeded." + ) + + def test_sum_exceeded(self): + self.kube_client.set_existing_gpus('user10', 5) + + self.try_validate( + gen_request(gpu_req=6), expected=False, message="GPU quota exceeded. Wanted 6 but with 5 already in use, the quota of 10 would be exceeded." + ) + + def test_low_priority(self): + self.kube_client.set_existing_gpus('user10', 5) + + self.try_validate( + gen_request(gpu_req=6, low_priority=True), expected=True + ) + + # Should respond to limit as well as request + def test_limit_exceeded(self): + self.kube_client.set_existing_gpus('user10', 5) + + self.try_validate( + gen_request(gpu_lim=6), expected=False, message="GPU quota exceeded. Wanted 6 but with 5 already in use, the quota of 10 would be exceeded." + ) + + # Tests pod overcap + def test_low_priority_overcap(self): + self.kube_client.set_existing_gpus('user10', 11) + + self.try_validate( + gen_request(), expected=True) + + def try_validate(self, json, expected: bool, message: str = None): + try_val_with_component(GPUValidator( + self.kube_client, self.logger), json, expected, message) diff --git a/tests/app/test_id_validator.py b/tests/app/test_id_validator.py new file mode 100644 index 0000000..de9e384 --- /dev/null +++ b/tests/app/test_id_validator.py @@ -0,0 +1,143 @@ +import inspect +from operator import contains +from dsmlp.app.types import * +from dsmlp.app.id_validator import IDValidator +from dsmlp.app.validator import Validator +from dsmlp.plugin.awsed import ListTeamsResponse, TeamJson, UserResponse +from dsmlp.plugin.kube import Namespace +from hamcrest import assert_that, contains_inanyorder, equal_to, has_item +from tests.app.utils import gen_request, try_val_with_component +from tests.fakes import FakeAwsedClient, FakeLogger, FakeKubeClient + + +class TestIDValidator: + def setup_method(self) -> None: + self.logger = FakeLogger() + self.awsed_client = FakeAwsedClient() + self.kube_client = FakeKubeClient() + + self.awsed_client.add_user( + 'user10', UserResponse(uid=10, enrollments=[])) + self.awsed_client.add_teams('user10', ListTeamsResponse( + teams=[TeamJson(gid=1000)] + )) + + self.kube_client.add_namespace('user10', Namespace( + name='user10', labels={'k8s-sync': 'true'}, gpu_quota=10)) + + def test_course_enrollment(self): + self.awsed_client.add_user( + 'user1', UserResponse(uid=1, enrollments=["course1"])) + self.kube_client.add_namespace('user1', Namespace( + name='user1', labels={'k8s-sync': 'true'}, gpu_quota=10)) + + self.try_validate(gen_request( + course="course1", username="user1", run_as_user=1, has_container=False), True, "Allowed") + + def test_pod_security_context(self): + self.awsed_client.add_user( + 'user1', UserResponse(uid=1, enrollments=[])) + self.kube_client.add_namespace('user1', Namespace( + name='user1', labels={'k8s-sync': 'true'}, gpu_quota=10)) + + self.try_validate(gen_request( + username="user1", run_as_user=1, has_container=False), True, "Allowed") + + def test_security_context(self): + self.awsed_client.add_user( + 'user1', UserResponse(uid=1, enrollments=[])) + self.kube_client.add_namespace('user1', Namespace( + name='user1', labels={'k8s-sync': 'true'}, gpu_quota=10)) + + self.try_validate(gen_request( + username="user1", run_as_user=1, has_container=True), True, "Allowed") + + def test_deny_security_context(self): + """ + The user is launching a Pod, + but the PodSecurityContext.runAsUser doesn't belong to them. + Deny the request. + """ + self.awsed_client.add_user( + 'user2', UserResponse(uid=2, enrollments=[])) + + self.try_validate(gen_request( + username="user2", run_as_user=3, has_container=False), False, "spec.securityContext: uid must be in range [2]") + + def test_deny_unknown_user(self): + + self.try_validate(gen_request( + username="user2", run_as_user=2, has_container=False), False, "namespace: no AWSEd user found with username user2") + + def test_deny_course_enrollment(self): + """ + The user is launching a Pod, + but they are not enrolled in the course in the label "dsmlp/course". + Deny the request. + """ + self.awsed_client.add_user( + 'user2', UserResponse(uid=2, enrollments=[])) + + self.try_validate(gen_request( + course="course1", username="user2", run_as_user=2, has_container=False), False, "metadata.labels: dsmlp/course must be in range []") + + def test_deny_pod_security_context(self): + self.awsed_client.add_user( + 'user2', UserResponse(uid=2, enrollments=[])) + + self.try_validate(gen_request( + username="user2", run_as_user=2, container_override=[Container(), Container(securityContext=SecurityContext(runAsUser=3))]), False, "spec.containers[1].securityContext: uid must be in range [2]") + + def test_deny_init_container(self): + """ + The user is launching a Pod with an Init Container, + but the uid doesn't belong to them. + Deny the request. + """ + self.awsed_client.add_user( + 'user2', UserResponse(uid=2, enrollments=[])) + + self.try_validate(gen_request( + username="user2", run_as_user=2, container_override=[Container()], init_containers=[Container(), Container(securityContext=SecurityContext(runAsUser=99))]), False, "spec.initContainers[1].securityContext: uid must be in range [2]") + + def test_deny_pod_security_context2(self): + """ + The Pod doesn't have any security contexts. + It should be launched. + """ + + self.try_validate(gen_request( + username="user10", container_override=[Container()]), True, "Allowed") + + # check podSecurityContext.runAsGroup + def test_deny_team_gid(self): + + self.try_validate(gen_request( + username="user10", run_as_group=2, container_override=[Container()]), False, "spec.securityContext: gid must be in range [1000, 0, 100]") + + # check podSecurityContext.fsGroup + def test_deny_pod_fsGroup(self): + + self.try_validate(gen_request( + username="user10", fs_group=2, container_override=[Container()]), False, "spec.securityContext: gid must be in range [1000, 0, 100]") + + # check podSecurityContext.supplementalGroups + def test_deny_pod_supplemental_groups(self): + + self.try_validate(gen_request( + username="user10", supplemental_groups=[2], container_override=[Container()]), False, "spec.securityContext: gid must be in range [1000, 0, 100]") + + # check container.securityContext.runAsGroup + def test_deny_container_run_as_group(self): + + self.try_validate(gen_request( + username="user10", container_override=[Container(securityContext=SecurityContext(runAsGroup=2))]), False, "spec.containers[0].securityContext: gid must be in range [1000, 0, 100]") + + def test_allow_gid_0_and_100a(self): + + self.try_validate(gen_request( + username="user10", run_as_group=0, container_override=[Container(securityContext=SecurityContext(runAsGroup=100))]), True, "Allowed") + + def try_validate(self, json, expected: bool, message: str = None): + try_val_with_component(IDValidator( + self.awsed_client, self.logger), json, expected, message) diff --git a/tests/app/test_logs.py b/tests/app/test_logs.py new file mode 100644 index 0000000..76c0ac7 --- /dev/null +++ b/tests/app/test_logs.py @@ -0,0 +1,101 @@ +import inspect +from operator import contains +from dsmlp.app.validator import Validator +from dsmlp.plugin.awsed import ListTeamsResponse, TeamJson, UserResponse +from dsmlp.plugin.kube import Namespace +from hamcrest import assert_that, contains_inanyorder, equal_to, has_item +from tests.fakes import FakeAwsedClient, FakeLogger, FakeKubeClient + + +class TestLogs: + def setup_method(self) -> None: + self.logger = FakeLogger() + self.awsed_client = FakeAwsedClient() + self.kube_client = FakeKubeClient() + + self.awsed_client.add_user( + 'user10', UserResponse(uid=10, enrollments=[])) + self.awsed_client.add_teams('user10', ListTeamsResponse( + teams=[TeamJson(gid=1000)] + )) + + self.kube_client.add_namespace('user10', Namespace( + name='user10', labels={'k8s-sync': 'true'}, gpu_quota=10)) + + def test_log_request_details(self): + self.when_validate( + { + "request": { + "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", + "namespace": "user10", + "userInfo": { + "username": "system:kube-system" + }, + "object": { + "metadata": { + "labels": {} + }, + "spec": { + "containers": [{}] + }, + } + } + } + ) + + assert_that(self.logger.messages, has_item( + "INFO Allowed request username=system:kube-system namespace=user10 uid=705ab4f5-6393-11e8-b7cc-42010a800002")) + + def test_failures_are_logged(self): + self.awsed_client.add_user( + 'user2', UserResponse(uid=2, enrollments=[])) + + response = self.when_validate( + { + "request": { + "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", + "userInfo": { + "username": "user2" + }, + "namespace": "user2", + "object": { + "metadata": { + "labels": {} + }, + "spec": { + "containers": [], + "securityContext": {"runAsUser": 3}}, + }}}) + + assert_that(self.logger.messages, has_item( + f"INFO Denied request username=user2 namespace=user2 reason={response['response']['status']['message']} uid=705ab4f5-6393-11e8-b7cc-42010a800002")) + + def test_log_allowed_requests(self): + self.when_validate( + { + "request": { + "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", + "userInfo": { + "username": "user10" + }, + "namespace": "user10", + "object": { + "metadata": { + "labels": {} + }, + "spec": { + "containers": [{}] + } + } + } + } + ) + + assert_that(self.logger.messages, has_item( + "INFO Allowed request username=user10 namespace=user10 uid=705ab4f5-6393-11e8-b7cc-42010a800002")) + + def when_validate(self, json): + validator = Validator(self.awsed_client, self.kube_client, self.logger) + response = validator.validate_request(json) + + return response diff --git a/tests/app/test_validator.py b/tests/app/test_validator.py deleted file mode 100644 index d9aac05..0000000 --- a/tests/app/test_validator.py +++ /dev/null @@ -1,624 +0,0 @@ -import inspect -from operator import contains -from dsmlp.app.validator import Validator -from dsmlp.plugin.awsed import ListTeamsResponse, TeamJson, UserResponse -from dsmlp.plugin.kube import Namespace -from hamcrest import assert_that, contains_inanyorder, equal_to, has_item -from tests.fakes import FakeAwsedClient, FakeLogger - - -class TestValidator: - def setup_method(self) -> None: - self.logger = FakeLogger() - self.awsed_client = FakeAwsedClient() - - self.awsed_client.add_user('user10', UserResponse(uid=10, enrollments=[])) - self.awsed_client.add_teams('user10', ListTeamsResponse( - teams=[TeamJson(gid=1000)] - )) - - def test_log_request_details(self): - self.when_validate( - { - "request": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "namespace": "user10", - "userInfo": { - "username": "system:kube-system" - }, - "object": { - "metadata": { - "labels": {} - }, - "spec": { - "containers": [{}] - }, - } - } - } - ) - - assert_that(self.logger.messages, has_item( - "INFO Allowed request username=system:kube-system namespace=user10 uid=705ab4f5-6393-11e8-b7cc-42010a800002")) - - def test_course_enrollment(self): - self.awsed_client.add_user('user1', UserResponse(uid=1, enrollments=["course1"])) - - response = self.when_validate( - { - "request": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "userInfo": { - "username": "user1" - }, - "namespace": "user1", - "object": { - "metadata": { - "labels": { - "dsmlp/course": "course1" - } - }, - "spec": { - "securityContext": { - "runAsUser": 1 - }, - "containers": [] - }, - } - } - } - ) - - assert_that(response, equal_to({ - "apiVersion": "admission.k8s.io/v1", - "kind": "AdmissionReview", - "response": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "allowed": True, - "status": { - "message": "Allowed" - }}})) - - def test_pod_security_context(self): - self.awsed_client.add_user('user1', UserResponse(uid=1, enrollments=[])) - - response = self.when_validate( - { - "request": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "userInfo": { - "username": "user1" - }, - "namespace": "user1", - "object": { - "metadata": { - "labels": {} - }, - "spec": { - "securityContext": { - "runAsUser": 1 - }, - "containers": [] - }, - } - } - } - ) - - assert_that(response, equal_to({ - "apiVersion": "admission.k8s.io/v1", - "kind": "AdmissionReview", - "response": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "allowed": True, - "status": { - "message": "Allowed" - }}})) - - def test_security_context(self): - self.awsed_client.add_user('user1', UserResponse(uid=1, enrollments=[])) - - response = self.when_validate( - { - "request": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "userInfo": { - "username": "user1" - }, - "namespace": "user1", - "object": { - "metadata": { - "labels": {} - }, - "spec": { - "securityContext": { - "runAsUser": 1 - }, - "containers": [ - { - "securityContext": { - "runAsUser": 1 - } - } - ] - } - } - } - } - ) - - assert_that(response, equal_to({ - "apiVersion": "admission.k8s.io/v1", - "kind": "AdmissionReview", - "response": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "allowed": True, - "status": {"message": "Allowed"}}})) - - def test_deny_security_context(self): - """ - The user is launching a Pod, - but the PodSecurityContext.runAsUser doesn't belong to them. - Deny the request. - """ - self.awsed_client.add_user('user2', UserResponse(uid=2, enrollments=[])) - - response = self.when_validate( - { - "request": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "userInfo": { - "username": "user2" - }, - "namespace": "user2", - "object": { - "metadata": { - "labels": {} - }, - "spec": { - "securityContext": {"runAsUser": 3}, - "containers": [] - } - } - }}) - - assert_that(response, equal_to({ - "apiVersion": "admission.k8s.io/v1", - "kind": "AdmissionReview", - "response": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "allowed": False, - "status": { - "message": "spec.securityContext: uid must be in range [2]" - }}})) - - def test_failures_are_logged(self): - self.awsed_client.add_user('user2', UserResponse(uid=2, enrollments=[])) - - response = self.when_validate( - { - "request": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "userInfo": { - "username": "user2" - }, - "namespace": "user2", - "object": { - "metadata": { - "labels": {} - }, - "spec": { - "containers": [], - "securityContext": {"runAsUser": 3}}, - }}}) - - assert_that(self.logger.messages, has_item( - f"INFO Denied request username=user2 namespace=user2 reason={response['response']['status']['message']} uid=705ab4f5-6393-11e8-b7cc-42010a800002")) - - def test_deny_unknown_user(self): - response = self.when_validate( - { - "request": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "userInfo": { - "username": "user2" - }, - "namespace": "user2", - "object": { - "metadata": { - "labels": {} - }, - "spec": { - "containers": [], - "securityContext": {"runAsUser": 2}}, - }}}) - assert_that(response, equal_to({ - "apiVersion": "admission.k8s.io/v1", - "kind": "AdmissionReview", - "response": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "allowed": False, - "status": { - "message": "namespace: no AWSEd user found with username user2" - }}})) - - def test_deny_course_enrollment(self): - """ - The user is launching a Pod, - but they are not enrolled in the course in the label "dsmlp/course". - Deny the request. - """ - self.awsed_client.add_user('user2', UserResponse(uid=2, enrollments=[])) - - response = self.when_validate( - { - "request": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "userInfo": { - "username": "user2" - }, - "namespace": "user2", - "object": { - "metadata": { - "labels": { - "dsmlp/course": "course1" - } - }, - "spec": { - "securityContext": {"runAsUser": 2}, - "containers": [] - } - } - }}) - - assert_that(response, equal_to({ - "apiVersion": "admission.k8s.io/v1", - "kind": "AdmissionReview", - "response": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "allowed": False, - "status": { - "message": "metadata.labels: dsmlp/course must be in range []" - }}})) - - def test_deny_pod_security_context(self): - self.awsed_client.add_user('user2', UserResponse(uid=2, enrollments=[])) - - response = self.when_validate( - { - "request": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "userInfo": { - "username": "user2" - }, - "namespace": "user2", - "object": { - "kind": "Pod", - "metadata": { - "labels": {} - }, - "spec": { - "securityContext": {"runAsUser": 2}, - "containers": [ - {}, - { - "securityContext": {"runAsUser": 3} - } - ] - } - } - }}) - - assert_that(response, equal_to({ - "apiVersion": "admission.k8s.io/v1", - "kind": "AdmissionReview", - "response": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "allowed": False, "status": { - "message": "spec.containers[1].securityContext: uid must be in range [2]" - }}})) - - def test_deny_init_container(self): - """ - The user is launching a Pod with an Init Container, - but the uid doesn't belong to them. - Deny the request. - """ - self.awsed_client.add_user('user2', UserResponse(uid=2, enrollments=[])) - - response = self.when_validate( - { - "request": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "userInfo": { - "username": "user2" - }, - "namespace": "user2", - "object": { - "kind": "Pod", - "metadata": { - "labels": {} - }, - "spec": { - "containers": [{}], - "initContainers": [ - {}, - { - "securityContext": {"runAsUser": 99} - } - ] - } - } - }}) - - assert_that(response, equal_to({ - "apiVersion": "admission.k8s.io/v1", - "kind": "AdmissionReview", - "response": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "allowed": False, - "status": { - "message": "spec.initContainers[1].securityContext: uid must be in range [2]" - }}})) - - def test_deny_pod_security_context2(self): - """ - The Pod doesn't have any security contexts. - It should be launched. - """ - - response = self.when_validate( - { - "request": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "userInfo": { - "username": "user10" - }, - "namespace": "user10", - "object": { - "kind": "Pod", - "metadata": { - "labels": {} - }, - "spec": { - "containers": [{}] - } - } - }}) - - assert_that(response, equal_to({ - "apiVersion": "admission.k8s.io/v1", - "kind": "AdmissionReview", - "response": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "allowed": True, "status": { - "message": "Allowed" - }}})) - - # check podSecurityContext.runAsGroup - def test_deny_team_gid(self): - response = self.when_validate( - { - "request": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "userInfo": { - "username": "user10" - }, - "namespace": "user10", - "object": { - "kind": "Pod", - "metadata": { - "labels": {} - }, - "spec": { - "securityContext": {"runAsGroup": 2}, - "containers": [{}] - } - } - }} - ) - - assert_that(response, equal_to({ - "apiVersion": "admission.k8s.io/v1", - "kind": "AdmissionReview", - "response": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "allowed": False, "status": { - "message": "spec.securityContext: gid must be in range [1000, 0, 100]" - }}})) - - # check podSecurityContext.fsGroup - def test_deny_pod_fsGroup(self): - response = self.when_validate( - { - "request": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "userInfo": { - "username": "user10" - }, - "namespace": "user10", - "object": { - "kind": "Pod", - "metadata": { - "labels": {} - }, - "spec": { - "securityContext": {"fsGroup": 2}, - "containers": [{}] - } - } - }} - ) - - assert_that(response, equal_to({ - "apiVersion": "admission.k8s.io/v1", - "kind": "AdmissionReview", - "response": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "allowed": False, "status": { - "message": "spec.securityContext: gid must be in range [1000, 0, 100]" - }}})) - - # check podSecurityContext.supplementalGroups - def test_deny_pod_supplemental_groups(self): - response = self.when_validate( - { - "request": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "userInfo": { - "username": "user10" - }, - "namespace": "user10", - "object": { - "kind": "Pod", - "metadata": { - "labels": {} - }, - "spec": { - "securityContext": {"supplementalGroups": [2]}, - "containers": [{}] - } - } - }} - ) - - assert_that(response, equal_to({ - "apiVersion": "admission.k8s.io/v1", - "kind": "AdmissionReview", - "response": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "allowed": False, "status": { - "message": "spec.securityContext: gid must be in range [1000, 0, 100]" - }}})) - - # check container.securityContext.runAsGroup - def test_deny_container_run_as_group(self): - response = self.when_validate( - { - "request": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "userInfo": { - "username": "user10" - }, - "namespace": "user10", - "object": { - "kind": "Pod", - "metadata": { - "labels": {} - }, - "spec": { - "containers": [ - { - "securityContext": {"runAsGroup": 2} - } - ] - } - } - }} - ) - - assert_that(response, equal_to({ - "apiVersion": "admission.k8s.io/v1", - "kind": "AdmissionReview", - "response": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "allowed": False, "status": { - "message": "spec.containers[0].securityContext: gid must be in range [1000, 0, 100]" - }}})) - - def test_allow_gid_0_and_100a(self): - response = self.when_validate( - { - "request": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "userInfo": { - "username": "user10" - }, - "namespace": "user10", - "object": { - "kind": "Pod", - "metadata": { - "labels": {} - }, - "spec": { - "securityContext": {"runAsGroup": 0}, - "containers": [ - { - "securityContext": {"runAsGroup": 100} - } - ] - } - } - } - } - ) - - assert_that(response, equal_to({ - "apiVersion": "admission.k8s.io/v1", - "kind": "AdmissionReview", - "response": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "allowed": True, "status": { - "message": "Allowed" - }}})) - - # no longer needed since the webhook filters for k9s-sync namespaces only - # def test_unlabelled_namespace_can_use_any_uid(self): - # self.kube.add_namespace('kube-system', Namespace(name='kube-system', labels={})) - - # response = self.when_validate( - # { - # "request": { - # "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - # "userInfo": { - # "username": "user10" - # }, - # "namespace": "kube-system", - # "object": { - # "spec": { - # "containers": [{}] - # } - # } - # } - # } - # ) - - # assert_that(response, equal_to({ - # "apiVersion": "admission.k8s.io/v1", - # "kind": "AdmissionReview", - # "response": { - # "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - # "allowed": True, - # "status": { - # "message": "Allowed" - # }}})) - - def test_log_allowed_requests(self): - self.when_validate( - { - "request": { - "uid": "705ab4f5-6393-11e8-b7cc-42010a800002", - "userInfo": { - "username": "user10" - }, - "namespace": "user10", - "object": { - "metadata": { - "labels": {} - }, - "spec": { - "containers": [{}] - } - } - } - } - ) - - assert_that(self.logger.messages, has_item( - "INFO Allowed request username=user10 namespace=user10 uid=705ab4f5-6393-11e8-b7cc-42010a800002")) - - def when_validate(self, json): - validator = Validator(self.awsed_client, self.logger) - response = validator.validate_request(json) - - return response diff --git a/tests/app/utils.py b/tests/app/utils.py new file mode 100644 index 0000000..ec128e6 --- /dev/null +++ b/tests/app/utils.py @@ -0,0 +1,81 @@ +from hamcrest import assert_that, equal_to +from dsmlp.app.config import GPU_LABEL +from dsmlp.app.validator import Validator +from src.dsmlp.app.types import * +from typing import List + + +def gen_request(gpu_req: int = 0, gpu_lim: int = 0, low_priority: bool = False, uid: str = "705ab4f5-6393-11e8-b7cc-42010a800002", course: str = None, + run_as_user: int = None, run_as_group: int = None, fs_group: int = None, supplemental_groups: List[int] = None, username: str = "user10", has_container: bool = True, + container_override: List[Container] = None, init_containers: List[Container] = None) -> Request: + + res_req = None + if gpu_req > 0: + if res_req is None: + res_req = ResourceRequirements() + + res_req.requests = {GPU_LABEL: gpu_req} + + if gpu_lim > 0: + if res_req is None: + res_req = ResourceRequirements() + + res_req.limits = {GPU_LABEL: gpu_lim} + + p_class = None + if low_priority: + p_class = "low" + + labels = {} + if course is not None: + labels["dsmlp/course"] = course + + metadata = None + if labels != {}: + metadata = ObjectMeta(labels=labels) + + sec_context = None + if run_as_user is not None or run_as_group is not None or fs_group is not None or supplemental_groups is not None: + sec_context = PodSecurityContext( + runAsUser=run_as_user, runAsGroup=run_as_group, fsGroup=fs_group, supplementalGroups=supplemental_groups) + + containers = [] + if has_container: + c = Container(resources=res_req) + + if run_as_user is not None or run_as_group is not None: + c.securityContext = SecurityContext(runAsUser=run_as_user, + runAsGroup=run_as_group) + + containers.append(c) + + if container_override is not None: + containers = container_override + + request = Request( + uid=uid, + namespace=username, + object=Object( + metadata=metadata, + spec=PodSpec( + containers=containers, + priorityClassName=p_class, + securityContext=sec_context, + initContainers=init_containers + ) + ), + userInfo=UserInfo(username=username) + ) + + return request + + +def try_val_with_component(validator: Validator, json, expected: bool, message: str = None): + try: + response = validator.validate_pod(json) + if not expected: + raise AssertionError(f"Expected exception but got {response}") + except Exception as e: + if expected: + raise AssertionError(f"Expected no exception but got {e}") + assert_that(e.message, equal_to(message)) diff --git a/tests/ext/test_kube_client.py b/tests/ext/test_kube_client.py new file mode 100644 index 0000000..7ae2c76 --- /dev/null +++ b/tests/ext/test_kube_client.py @@ -0,0 +1,91 @@ +import inspect +from operator import contains +from dsmlp.app.validator import Validator +from dsmlp.plugin.awsed import ListTeamsResponse, TeamJson, UserResponse +from dsmlp.plugin.kube import Namespace +from hamcrest import assert_that, contains_inanyorder, equal_to, has_item +from tests.fakes import FakeAwsedClient, FakeLogger, FakeKubeClient +from dsmlp.ext.kube import DefaultKubeClient +from kubernetes.client import V1PodList, V1Pod, V1PodSpec, V1Container, V1ResourceRequirements + +class FakeInternalClient: + def read_namespace(self, name: str) -> Namespace: + return "namespace" + def list_namespaced_pod(self, namespace: str) -> int: + + try: + return self.namespaced_pods + except AttributeError: + raise AttributeError("namespaced_pods not set") + + def set_namespaced_pods(self, pods): + self.namespaced_pods = pods + +class TestValidator: + def setup_method(self) -> None: + self.logger = FakeLogger() + self.real_kube_client = DefaultKubeClient() + + def patch_kube_client(self, namespaced_pods): + client = FakeInternalClient() + client.set_namespaced_pods(namespaced_pods) + + self.real_kube_client.get_policy_api = lambda: client + + return self.real_kube_client + + def test_collect_gpus(self): + + k_client = self.patch_kube_client(V1PodList( + items=[V1Pod( + spec=V1PodSpec( + containers=[V1Container( + name="container1", + resources=V1ResourceRequirements( + requests={"nvidia.com/gpu": "1"}, + limits={"nvidia.com/gpu": "2"} + ) + )] + ) + )] + )) + + assert_that(k_client.get_gpus_in_namespace('user10'), equal_to(2)) + + def test_no_gpus_requested(self): + + k_client = self.patch_kube_client(V1PodList( + items=[V1Pod( + spec=V1PodSpec( + containers=[V1Container( + name="container1", + resources=V1ResourceRequirements( + limits={"nvidia.com/gpu": "1"} + ) + )] + ) + )] + )) + + assert_that(k_client.get_gpus_in_namespace('user10'), equal_to(1)) + + def test_no_limits_nor_requests(self): + + k_client = self.patch_kube_client(V1PodList( + items=[V1Pod( + spec=V1PodSpec( + containers=[V1Container( + name="container1", + resources=V1ResourceRequirements() + )] + ) + )] + )) + + assert_that(k_client.get_gpus_in_namespace('user10'), equal_to(0)) + + def when_validate(self, json): + validator = Validator(self.awsed_client, self.kube_client, self.logger) + response = validator.validate_request(json) + + return response diff --git a/tests/fakes.py b/tests/fakes.py index 6ec0684..c3e480c 100644 --- a/tests/fakes.py +++ b/tests/fakes.py @@ -39,15 +39,25 @@ def add_teams(self, username, teams: ListTeamsResponse): class FakeKubeClient(KubeClient): def __init__(self): self.namespaces: TypedDict[str, Namespace] = {} + self.existing_gpus: TypedDict[str, int] = {} def get_namespace(self, name: str) -> Namespace: try: return self.namespaces[name] except KeyError: raise UnsuccessfulRequest() + + def get_gpus_in_namespace(self, name: str) -> int: + try: + return self.existing_gpus[name] + except KeyError: + return 0 def add_namespace(self, name: str, namespace: Namespace): self.namespaces[name] = namespace + + def set_existing_gpus(self, name: str, gpus: int): + self.existing_gpus[name] = gpus class FakeLogger(Logger): diff --git a/tests/ref.json b/tests/ref.json new file mode 100644 index 0000000..8c86b95 --- /dev/null +++ b/tests/ref.json @@ -0,0 +1,94 @@ +{ + "kind": "AdmissionReview", + "apiVersion": "admission.k8s.io/v1beta1", + "request": { + "uid": "bf72b12b-0b87-11e8-ad08-020ee0ad1b60", + "kind": { + "group": "", + "version": "v1", + "kind": "Pod" + }, + "resource": { + "group": "", + "version": "v1", + "resource": "pods" + }, + "namespace": "charms", + "operation": "CREATE", + "userInfo": { + "username": "lucky", + "groups": [ + "system:masters", + "system:authenticated" + ] + }, + "object": { + "metadata": { + "name": "webhookx", + "namespace": "default", + "uid": "bf72ab8e-0b87-11e8-ad08-020ee0ad1b60", + "creationTimestamp": "2018-02-06T21:50:29Z" + }, + "spec": { + "volumes": [ + { + "name": "default-token-nd57q", + "secret": { + "secretName": "default-token-nd57q" + } + } + ], + "containers": [ + { + "name": "webhookx", + "image": "nginx", + "resources": { + "limits": { + "alpha.kubernetes.io/nvidia-gpu": "1" + }, + "requests": { + "alpha.kubernetes.io/nvidia-gpu": "1" + } + }, + "volumeMounts": [ + { + "name": "default-token-nd57q", + "readOnly": true, + "mountPath": "/var/run/secrets/kubernetes.io/serviceaccount" + } + ], + "terminationMessagePath": "/dev/termination-log", + "terminationMessagePolicy": "File", + "imagePullPolicy": "Always" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "serviceAccountName": "default", + "serviceAccount": "default", + "securityContext": {}, + "schedulerName": "default-scheduler", + "tolerations": [ + { + "key": "node.kubernetes.io/not-ready", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + }, + { + "key": "node.kubernetes.io/unreachable", + "operator": "Exists", + "effect": "NoExecute", + "tolerationSeconds": 300 + } + ] + }, + "status": { + "phase": "Pending", + "qosClass": "BestEffort" + } + }, + "oldObject": null + } +} \ No newline at end of file diff --git a/tests/test_admission_controller.py b/tests/test_admission_controller.py deleted file mode 100644 index 86ce985..0000000 --- a/tests/test_admission_controller.py +++ /dev/null @@ -1,107 +0,0 @@ -import pytest -import inspect -import os -import shutil -import tempfile -import dsmlp -from dsmlp.app import factory -# from dsmlp.plugin.awsed import CourseJson -from tests.fakes import FakeAwsedClient - - -@pytest.mark.integration -class TestDirCreateMain: - def setup_method(self) -> None: - self.awsed_client = FakeAwsedClient() - factory.awsed_client = self.awsed_client - - # teams1 = {"teams": [ - # { - # "gid": 1000, - # "members": [ - # { - # "firstName": "string", - # "lastName": "string", - # "role": "string", - # "uid": 0, - # "username": "user1" - # } - # ], - # "teamName": "string" - # } - # ]} - - # self.awsed_client.add_teams_for_course_from_dict('course1', teams1) - - # teams2 = {"teams": [ - # { - # "gid": 2000, - # "members": [ - # { - # "firstName": "string", - # "lastName": "string", - # "role": "string", - # "uid": 0, - # "username": "user2" - # } - # ], - # "teamName": "string" - # } - # ]} - - # self.awsed_client.add_teams_for_course_from_dict('course2', teams2) - - # teams3 = {"teams": [ - # { - # "gid": 3000, - # "members": [ - # { - # "firstName": "string", - # "lastName": "string", - # "role": "string", - # "uid": 0, - # "username": "user1" - # } - # ], - # "teamName": "string" - # }, - # { - # "gid": 4000, - # "members": [ - # { - # "firstName": "string", - # "lastName": "string", - # "role": "string", - # "uid": 0, - # "username": "user2" - # } - # ], - # "teamName": "string" - # } - # ]} - # self.awsed_client.add_teams_for_course_from_dict('course3', teams3) - - def test_something(self, capsys): - pass - # self.awsed_client.add_course(CourseJson(courseId='course1', tags=['teams-enabled'])) - - # os.environ["COURSE_IDS"] = "course1" - # os.environ["TEAM_ROOT"] = tempfile.gettempdir() - # dsmlp.app.factory.course_provider = EnvVarConfigProvider('COURSE_IDS') - # self.clean_dir(tempfile.gettempdir() + "/course1") - # cdir = tempfile.gettempdir() + "/course1/string" - # main() - # captured = capsys.readouterr() - # assert captured.out == "Reading course1...\n" + f"{cdir}, uid=0, gid=1000\n" - # assert captured.out == inspect.cleandoc( - # f""" - # Reading course1... - # {cdir}, uid=0, gid=1000 - # """) + "\n" - - # noinspection PyMethodMayBeStatic - # def clean_dir(self, course_): - # try: - # shutil.rmtree(course_) - # except FileNotFoundError: - # pass