From 4349cf77a52e2855fe0ccf3ea4d0c9d9c8526f5e Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Thu, 3 Mar 2022 09:31:01 +0100 Subject: [PATCH 01/68] Catch gcp HttpError --- sebs/gcp/gcp.py | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 633765af..93af5a2b 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -10,6 +10,7 @@ from typing import cast, Dict, Optional, Tuple, List, Type from googleapiclient.discovery import build +from googleapiclient.errors import HttpError from google.cloud import monitoring_v3 from sebs.cache import Cache @@ -201,23 +202,10 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti full_func_name = GCP.get_full_function_name(project_name, location, func_name) get_req = self.function_client.projects().locations().functions().get(name=full_func_name) - get_result = get_req.execute() - - language_runtime = (code_package.language_name + language_runtime.replace(".", ""),) - - # if result is not empty, then function does exists - if get_result: - self.logging.info("Function {} exists on GCP, update the instance.".format(func_name)) - function = GCPFunction( - name=func_name, - benchmark=benchmark, - code_package_hash=code_package.hash, - timeout=timeout, - memory=memory, - bucket=code_bucket, - ) - self.update_function(function, code_package) - else: + + try: + get_result = get_req.execute() + except HttpError: create_req = ( self.function_client.projects() .locations() @@ -261,6 +249,19 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti function = GCPFunction( func_name, benchmark, code_package.hash, timeout, memory, code_bucket ) + else: + # if result is not empty, then function does exists + self.logging.info("Function {} exists on GCP, update the instance.".format(func_name)) + + function = GCPFunction( + name=func_name, + benchmark=benchmark, + code_package_hash=code_package.hash, + timeout=timeout, + memory=memory, + bucket=code_bucket, + ) + self.update_function(function, code_package) # Add LibraryTrigger to a new function from sebs.gcp.triggers import LibraryTrigger From 8e9162c0a79abed481a88436537748a19ad54821 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Mon, 7 Mar 2022 10:49:23 +0100 Subject: [PATCH 02/68] Add gcp workflow --- sebs/faas/workflow.py | 80 +++++++++++++++++++++++++++++++++++++++++++ sebs/gcp/workflow.py | 59 +++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+) create mode 100644 sebs/faas/workflow.py create mode 100644 sebs/gcp/workflow.py diff --git a/sebs/faas/workflow.py b/sebs/faas/workflow.py new file mode 100644 index 00000000..3aa02cba --- /dev/null +++ b/sebs/faas/workflow.py @@ -0,0 +1,80 @@ +import json +from abc import ABC +from abc import abstractmethod +import concurrent.futures +from datetime import datetime, timedelta +from enum import Enum +from typing import Callable, Dict, List, Optional # noqa + +from sebs.utils import LoggingBase +from .function import Trigger + +""" + Abstraction base class for FaaS function. Contains a list of associated triggers + and might implement non-trigger execution if supported by the SDK. + Example: direct function invocation through AWS boto3 SDK. +""" + + +class Workflow(LoggingBase): + def __init__(self, benchmark: str, name: str, code_hash: str): + super().__init__() + self._benchmark = benchmark + self._name = name + self._code_package_hash = code_hash + self._updated_code = False + self._triggers: Dict[Trigger.TriggerType, List[Trigger]] = {} + + @property + def name(self): + return self._name + + @property + def benchmark(self): + return self._benchmark + + @property + def code_package_hash(self): + return self._code_package_hash + + @code_package_hash.setter + def code_package_hash(self, new_hash: str): + self._code_package_hash = new_hash + + @property + def updated_code(self) -> bool: + return self._updated_code + + @updated_code.setter + def updated_code(self, val: bool): + self._updated_code = val + + def triggers_all(self) -> List[Trigger]: + return [trig for trigger_type, triggers in self._triggers.items() for trig in triggers] + + def triggers(self, trigger_type: Trigger.TriggerType) -> List[Trigger]: + try: + return self._triggers[trigger_type] + except KeyError: + return [] + + def add_trigger(self, trigger: Trigger): + if trigger.trigger_type() not in self._triggers: + self._triggers[trigger.trigger_type()] = [trigger] + else: + self._triggers[trigger.trigger_type()].append(trigger) + + def serialize(self) -> dict: + return { + "name": self._name, + "hash": self._code_package_hash, + "benchmark": self._benchmark, + "triggers": [ + obj.serialize() for t_type, triggers in self._triggers.items() for obj in triggers + ], + } + + @staticmethod + @abstractmethod + def deserialize(cached_config: dict) -> "Function": + pass diff --git a/sebs/gcp/workflow.py b/sebs/gcp/workflow.py new file mode 100644 index 00000000..ed8e10dc --- /dev/null +++ b/sebs/gcp/workflow.py @@ -0,0 +1,59 @@ +from typing import cast, Optional + +from sebs.faas.workflow import Workflow +from sebs.gcp.storage import GCPStorage + + +class GCPWorkflow(Workflow): + def __init__( + self, + name: str, + benchmark: str, + code_package_hash: str, + timeout: int, + memory: int, + bucket: Optional[str] = None, + ): + super().__init__(benchmark, name, code_package_hash) + self.timeout = timeout + self.memory = memory + self.bucket = bucket + + @staticmethod + def typename() -> str: + return "GCP.GCPWorkflow" + + def serialize(self) -> dict: + return { + **super().serialize(), + "timeout": self.timeout, + "memory": self.memory, + "bucket": self.bucket, + } + + @staticmethod + def deserialize(cached_config: dict) -> "GCPWorkflow": + from sebs.faas.function import Trigger + from sebs.gcp.triggers import LibraryTrigger, HTTPTrigger + + ret = GCPWorkflow( + cached_config["name"], + cached_config["benchmark"], + cached_config["hash"], + cached_config["timeout"], + cached_config["memory"], + cached_config["bucket"], + ) + for trigger in cached_config["triggers"]: + trigger_type = cast( + Trigger, + {"Library": LibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), + ) + assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) + ret.add_trigger(trigger_type.deserialize(trigger)) + return ret + + def code_bucket(self, benchmark: str, storage_client: GCPStorage): + if not self.bucket: + self.bucket, idx = storage_client.add_input_bucket(benchmark) + return self.bucket From 7896ddd173211989fbd14d95d54bde8e95f664bb Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Mon, 7 Mar 2022 10:50:05 +0100 Subject: [PATCH 03/68] Basic workflow invocation --- sebs.py | 77 +++++++++++++++++++++++++++++++- sebs/faas/system.py | 93 +++++++++++++++++++++++++++++++++++++- sebs/gcp/gcp.py | 103 ++++++++++++++++++++++++++++++++++++++++--- sebs/gcp/triggers.py | 71 +++++++++++++++++++++++++---- 4 files changed, 325 insertions(+), 19 deletions(-) diff --git a/sebs.py b/sebs.py index 11329508..e726ffaa 100755 --- a/sebs.py +++ b/sebs.py @@ -181,7 +181,7 @@ def benchmark(): help="Override function name for random generation.", ) @common_params -def invoke(benchmark, benchmark_input_size, repetitions, trigger, function_name, **kwargs): +def function(benchmark, benchmark_input_size, repetitions, trigger, function_name, **kwargs): ( config, @@ -233,6 +233,81 @@ def invoke(benchmark, benchmark_input_size, repetitions, trigger, function_name, with open("experiments.json", "w") as out_f: out_f.write(sebs.utils.serialize(result)) sebs_client.logging.info("Save results to {}".format(os.path.abspath("experiments.json"))) + +@benchmark.command() +@click.argument("benchmark", type=str) # , help="Benchmark to be used.") +@click.argument( + "benchmark-input-size", type=click.Choice(["test", "small", "large"]) +) # help="Input test size") +@click.option( + "--repetitions", default=5, type=int, help="Number of experimental repetitions." +) +@click.option( + "--trigger", + type=click.Choice(["library", "http"]), + default="library", + help="Workflow trigger to be used." +) +@click.option( + "--function-name", + default=None, + type=str, + help="Override function name for random generation.", +) +@common_params +def workflow(benchmark, benchmark_input_size, repetitions, trigger, function_name, **kwargs): + + ( + config, + output_dir, + logging_filename, + sebs_client, + deployment_client, + ) = parse_common_params(**kwargs) + + experiment_config = sebs_client.get_experiment_config(config["experiments"]) + benchmark_obj = sebs_client.get_benchmark( + benchmark, + deployment_client, + experiment_config, + logging_filename=logging_filename, + ) + workflow = deployment_client.get_workflow( + benchmark_obj, 'test' #function_name if function_name else deployment_client.default_function_name(benchmark_obj) + ) + storage = deployment_client.get_storage( + replace_existing=experiment_config.update_storage + ) + input_config = benchmark_obj.prepare_input( + storage=storage, size=benchmark_input_size + ) + + result = sebs.experiments.ExperimentResult( + experiment_config, deployment_client.config + ) + result.begin() + + trigger_type = Trigger.TriggerType.get(trigger) + triggers = workflow.triggers(trigger_type) + if len(triggers) == 0: + trigger = deployment_client.create_trigger( + workflow, trigger_type + ) + else: + trigger = triggers[0] + for i in range(repetitions): + sebs_client.logging.info(f"Beginning repetition {i+1}/{repetitions}") + ret = trigger.sync_invoke(input_config) + if ret.stats.failure: + sebs_client.logging.info(f"Failure on repetition {i+1}/{repetitions}") + #deployment_client.get_invocation_error( + # function_name=func.name, start_time=start_time, end_time=end_time + #) + result.add_invocation(workflow, ret) + result.end() + with open("experiments.json", "w") as out_f: + out_f.write(sebs.utils.serialize(result)) + sebs_client.logging.info("Save results to {}".format(os.path.abspath("experiments.json"))) @benchmark.command() diff --git a/sebs/faas/system.py b/sebs/faas/system.py index cdc3a656..6f9e7712 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -9,6 +9,7 @@ from sebs.cache import Cache from sebs.config import SeBSConfig from sebs.faas.function import Function, Trigger, ExecutionResult +from sebs.faas.workflow import Workflow from sebs.faas.storage import PersistentStorage from sebs.utils import LoggingBase from .config import Config @@ -110,6 +111,10 @@ def package_code(self, directory: str, language_name: str, benchmark: str) -> Tu @abstractmethod def create_function(self, code_package: Benchmark, func_name: str) -> Function: pass + + @abstractmethod + def create_workflow(self, code_package: Benchmark, workflow_name: str): + pass @abstractmethod def cached_function(self, function: Function): @@ -133,7 +138,6 @@ def update_function(self, function: Function, code_package: Benchmark): """ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) -> Function: - if code_package.language_version not in self.system_config.supported_language_versions( self.name(), code_package.language_name ): @@ -201,6 +205,79 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) ) code_package.query_cache() return function + + + def get_workflow(self, code_package: Benchmark, workflow_name: Optional[str] = None): + if code_package.language_version not in self.system_config.supported_language_versions( + self.name(), code_package.language_name + ): + raise Exception( + "Unsupported {language} version {version} in {system}!".format( + language=code_package.language_name, + version=code_package.language_version, + system=self.name(), + ) + ) + + # if not workflow_name: + # workflow_name = self.default_function_name(code_package) + rebuilt, _ = code_package.build(self.package_code) + + return self.create_workflow(code_package, workflow_name) + + + """ + There's no function with that name? + a) yes -> create new function. Implementation might check if a function + with that name already exists in the cloud and update its code. + b) no -> retrieve function from the cache. Function code in cloud will + be updated if the local version is different. + """ + functions = code_package.functions + if not functions or func_name not in functions: + msg = ( + "function name not provided." + if not func_name + else "function {} not found in cache.".format(func_name) + ) + self.logging.info("Creating new function! Reason: " + msg) + function = self.create_function(code_package, func_name) + self.cache_client.add_function( + deployment_name=self.name(), + language_name=code_package.language_name, + code_package=code_package, + function=function, + ) + code_package.query_cache() + return function + else: + # retrieve function + cached_function = functions[func_name] + code_location = code_package.code_location + function = self.function_type().deserialize(cached_function) + self.cached_function(function) + self.logging.info( + "Using cached function {fname} in {loc}".format(fname=func_name, loc=code_location) + ) + # is the function up-to-date? + if function.code_package_hash != code_package.hash or rebuilt: + self.logging.info( + f"Cached function {func_name} with hash " + f"{function.code_package_hash} is not up to date with " + f"current build {code_package.hash} in " + f"{code_location}, updating cloud version!" + ) + self.update_function(function, code_package) + function.code_package_hash = code_package.hash + function.updated_code = True + self.cache_client.add_function( + deployment_name=self.name(), + language_name=code_package.language_name, + code_package=code_package, + function=function, + ) + code_package.query_cache() + return function @abstractmethod def default_function_name(self, code_package: Benchmark) -> str: @@ -220,9 +297,21 @@ def download_metrics( metrics: dict, ): pass + + def create_trigger(self, obj, trigger_type: Trigger.TriggerType) -> Trigger: + if isinstance(obj, Function): + self.create_function_trigger(obj, trigger_type) + elif isinstance(obj, Workflow): + self.create_workflow_trigger(obj, trigger_type) + else: + raise TypeError("Cannot create trigger for {obj}") @abstractmethod - def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + def create_function_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + pass + + @abstractmethod + def create_workflow_trigger(self, workflow: Workflow, trigger_type: Trigger.TriggerType) -> Trigger: pass # @abstractmethod diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 93af5a2b..cce64f75 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -17,11 +17,13 @@ from sebs.config import SeBSConfig from sebs.benchmark import Benchmark from ..faas.function import Function, Trigger +from ..faas.workflow import Workflow from .storage import PersistentStorage from ..faas.system import System from sebs.gcp.config import GCPConfig from sebs.gcp.storage import GCPStorage from sebs.gcp.function import GCPFunction +from sebs.gcp.workflow import GCPWorkflow from sebs.utils import LoggingHandlers """ @@ -73,10 +75,14 @@ def function_type() -> "Type[Function]": def initialize(self, config: Dict[str, str] = {}): self.function_client = build("cloudfunctions", "v1", cache_discovery=False) + self.workflow_client = build("workflows", "v1", cache_discovery=False) self.get_storage() def get_function_client(self): return self.function_client + + def get_workflow_client(self): + return self.workflow_client """ Access persistent storage instance. @@ -211,9 +217,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti .locations() .functions() .create( - location="projects/{project_name}/locations/{location}".format( - project_name=project_name, location=location - ), + location=GCP.get_location(project_name, location), body={ "name": full_func_name, "entryPoint": "handler", @@ -263,16 +267,16 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti ) self.update_function(function, code_package) - # Add LibraryTrigger to a new function - from sebs.gcp.triggers import LibraryTrigger + # Add LibraryFunctionTrigger to a new function + from sebs.gcp.triggers import FunctionLibraryTrigger - trigger = LibraryTrigger(func_name, self) + trigger = FunctionLibraryTrigger(func_name, self) trigger.logging_handlers = self.logging_handlers function.add_trigger(trigger) return function - def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + def create_function_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: from sebs.gcp.triggers import HTTPTrigger if trigger_type == Trigger.TriggerType.HTTP: @@ -302,6 +306,19 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) function.add_trigger(trigger) self.cache_client.update_function(function) return trigger + + def create_workflow_trigger(self, workflow: Workflow, trigger_type: Trigger.TriggerType) -> Trigger: + from sebs.gcp.triggers import WorkflowLibraryTrigger + + if trigger_type == Trigger.TriggerType.HTTP: + raise NotImplementedError('Cannot create http triggers for workflows.') + else: + trigger = WorkflowLibraryTrigger(workflow.name, self) + + trigger.logging_handlers = self.logging_handlers + workflow.add_trigger(trigger) + # self.cache_client.update_workflow(workflow) + return trigger def cached_function(self, function: Function): @@ -355,6 +372,10 @@ def update_function(self, function: Function, code_package: Benchmark): @staticmethod def get_full_function_name(project_name: str, location: str, func_name: str): return f"projects/{project_name}/locations/{location}/functions/{func_name}" + + @staticmethod + def get_full_workflow_name(project_name: str, location: str, workflow_name: str): + return f"projects/{project_name}/locations/{location}/workflows/{workflow_name}" def prepare_experiment(self, benchmark): logs_bucket = self.storage.add_output_bucket(benchmark, suffix="logs") @@ -571,6 +592,74 @@ def deployment_version(self, func: Function) -> int: status_req = function_client.projects().locations().functions().get(name=name) status_res = status_req.execute() return int(status_res["versionId"]) + + @staticmethod + def get_location(project_name: str, location: str) -> str: + return f"projects/{project_name}/locations/{location}" + + def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "GCPWorkflow": + + package = code_package.code_location + benchmark = code_package.benchmark + language_runtime = code_package.language_version + timeout = code_package.benchmark_config.timeout + memory = code_package.benchmark_config.memory + code_bucket: Optional[str] = None + storage_client = self.get_storage() + location = self.config.region + project_name = self.config.project_name + + full_workflow_name = GCP.get_full_workflow_name(project_name, location, workflow_name) + get_req = self.workflow_client.projects().locations().workflows().get(name=full_workflow_name) + + with open('cache/test.yml') as f: + code = f.read() + + try: + get_result = get_req.execute() + except HttpError: + parent = GCP.get_location(project_name, location) + create_req = ( + self.workflow_client.projects() + .locations() + .workflows() + .create( + parent=parent, + workflowId=workflow_name, + body={ + "name": full_workflow_name, + "sourceContents": code, + }, + ) + ) + create_req.execute() + self.logging.info(f"Workflow {workflow_name} has been created!") + + workflow = GCPWorkflow( + workflow_name, benchmark, code_package.hash, timeout, memory, code_bucket + ) + else: + # if result is not empty, then function does exists + self.logging.info("Function {} exists on GCP, update the instance.".format(func_name)) + + workflow = GCPWorkflow( + name=workflow_name, + benchmark=benchmark, + code_package_hash=code_package.hash, + timeout=timeout, + memory=memory, + bucket=code_bucket, + ) + self.update_workflow(workflow, code_package) + + # Add LibraryTrigger to a new function + from sebs.gcp.triggers import WorkflowLibraryTrigger + + trigger = WorkflowLibraryTrigger(workflow_name, self) + trigger.logging_handlers = self.logging_handlers + workflow.add_trigger(trigger) + + return workflow # @abstractmethod # def get_invocation_error(self, function_name: str, diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 13cc3d6c..79918568 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -2,16 +2,20 @@ import datetime import json import time +import os from typing import Dict, Optional # noqa +from google.cloud.workflows import executions_v1beta as workflow_executions +from google.cloud.workflows.executions_v1beta.types import executions as workflow_exec_types + from sebs.gcp.gcp import GCP from sebs.faas.function import ExecutionResult, Trigger class LibraryTrigger(Trigger): - def __init__(self, fname: str, deployment_client: Optional[GCP] = None): + def __init__(self, name: str, deployment_client: Optional[GCP] = None): super().__init__() - self.name = fname + self.name = name self._deployment_client = deployment_client @staticmethod @@ -31,6 +35,18 @@ def deployment_client(self, deployment_client: GCP): def trigger_type() -> Trigger.TriggerType: return Trigger.TriggerType.LIBRARY + def async_invoke(self, payload: dict): + raise NotImplementedError() + + def serialize(self) -> dict: + return {"type": "Library", "name": self.name} + + @staticmethod + def deserialize(obj: dict) -> Trigger: + return LibraryTrigger(obj["name"]) + + +class FunctionLibraryTrigger(LibraryTrigger): def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.info(f"Invoke function {self.name}") @@ -70,16 +86,53 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: output = json.loads(res["result"]) gcp_result.parse_benchmark_output(output) return gcp_result + + +class WorkflowLibraryTrigger(LibraryTrigger): + def sync_invoke(self, payload: dict) -> ExecutionResult: - def async_invoke(self, payload: dict): - raise NotImplementedError() + self.logging.info(f"Invoke workflow {self.name}") - def serialize(self) -> dict: - return {"type": "Library", "name": self.name} + # Verify that the function is deployed + # deployed = False + # while not deployed: + # if self.deployment_client.is_deployed(self.name): + # deployed = True + # else: + # time.sleep(5) - @staticmethod - def deserialize(obj: dict) -> Trigger: - return LibraryTrigger(obj["name"]) + # GCP's fixed style for a function name + config = self.deployment_client.config + full_workflow_name = GCP.get_full_workflow_name(config.project_name, config.region, self.name) + + execution_client = workflow_executions.ExecutionsClient() + + begin = datetime.datetime.now() + res = execution_client.create_execution(request={"parent": full_workflow_name}) + end = datetime.datetime.now() + + gcp_result = ExecutionResult.from_times(begin, end) + + # Wait for execution to finish, then print results. + execution_finished = False + backoff_delay = 1 # Start wait with delay of 1 second + while (not execution_finished): + execution = execution_client.get_execution(request={"name": res.name}) + execution_finished = execution.state != workflow_exec_types.Execution.State.ACTIVE + + # If we haven't seen the result yet, wait a second. + if not execution_finished: + time.sleep(backoff_delay) + backoff_delay *= 2 # Double the delay to provide exponential backoff. + else: + self.logging.error(f"Invocation of {self.name} failed") + self.logging.error(f"Input: {payload}") + gcp_result.stats.failure = True + return gcp_result + + output = json.loads(execution.result) + gcp_result.parse_benchmark_output(output) + return gcp_result class HTTPTrigger(Trigger): From e079d23cb3b4c62ff0469952008c501b353ddb17 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Mon, 7 Mar 2022 13:12:27 +0100 Subject: [PATCH 04/68] Correct workflow execution --- sebs/faas/function.py | 9 +++ sebs/gcp/gcp.py | 156 +++++++++++++++++++++--------------------- sebs/gcp/triggers.py | 16 ++--- 3 files changed, 95 insertions(+), 86 deletions(-) diff --git a/sebs/faas/function.py b/sebs/faas/function.py index 56688779..37ea596f 100644 --- a/sebs/faas/function.py +++ b/sebs/faas/function.py @@ -6,6 +6,8 @@ from enum import Enum from typing import Callable, Dict, List, Optional # noqa +from google.cloud.workflows.executions_v1beta.types import Execution + from sebs.utils import LoggingBase """ @@ -146,6 +148,13 @@ def parse_benchmark_output(self, output: dict): ) / timedelta(microseconds=1) ) + + def parse_benchmark_execution(self, execution: Execution): + self.output = json.loads(execution.result) + self.times.benchmark = int( + (execution.start_time - execution.end_time) + / timedelta(microseconds=1) + ) @staticmethod def deserialize(cached_config: dict) -> "ExecutionResult": diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index cce64f75..50b97d53 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -306,19 +306,6 @@ def create_function_trigger(self, function: Function, trigger_type: Trigger.Trig function.add_trigger(trigger) self.cache_client.update_function(function) return trigger - - def create_workflow_trigger(self, workflow: Workflow, trigger_type: Trigger.TriggerType) -> Trigger: - from sebs.gcp.triggers import WorkflowLibraryTrigger - - if trigger_type == Trigger.TriggerType.HTTP: - raise NotImplementedError('Cannot create http triggers for workflows.') - else: - trigger = WorkflowLibraryTrigger(workflow.name, self) - - trigger.logging_handlers = self.logging_handlers - workflow.add_trigger(trigger) - # self.cache_client.update_workflow(workflow) - return trigger def cached_function(self, function: Function): @@ -368,11 +355,88 @@ def update_function(self, function: Function, code_package: Benchmark): else: break self.logging.info("Published new function code and configuration.") - + @staticmethod def get_full_function_name(project_name: str, location: str, func_name: str): return f"projects/{project_name}/locations/{location}/functions/{func_name}" + def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "GCPWorkflow": + + package = code_package.code_location + benchmark = code_package.benchmark + language_runtime = code_package.language_version + timeout = code_package.benchmark_config.timeout + memory = code_package.benchmark_config.memory + code_bucket: Optional[str] = None + storage_client = self.get_storage() + location = self.config.region + project_name = self.config.project_name + + full_workflow_name = GCP.get_full_workflow_name(project_name, location, workflow_name) + get_req = self.workflow_client.projects().locations().workflows().get(name=full_workflow_name) + + with open('cache/test.yml') as f: + code = f.read() + + try: + get_result = get_req.execute() + except HttpError: + parent = GCP.get_location(project_name, location) + create_req = ( + self.workflow_client.projects() + .locations() + .workflows() + .create( + parent=parent, + workflowId=workflow_name, + body={ + "name": full_workflow_name, + "sourceContents": code, + }, + ) + ) + create_req.execute() + self.logging.info(f"Workflow {workflow_name} has been created!") + + workflow = GCPWorkflow( + workflow_name, benchmark, code_package.hash, timeout, memory, code_bucket + ) + else: + # if result is not empty, then function does exists + self.logging.info("Function {} exists on GCP, update the instance.".format(func_name)) + + workflow = GCPWorkflow( + name=workflow_name, + benchmark=benchmark, + code_package_hash=code_package.hash, + timeout=timeout, + memory=memory, + bucket=code_bucket, + ) + self.update_workflow(workflow, code_package) + + # Add LibraryTrigger to a new function + from sebs.gcp.triggers import WorkflowLibraryTrigger + + trigger = WorkflowLibraryTrigger(workflow_name, self) + trigger.logging_handlers = self.logging_handlers + workflow.add_trigger(trigger) + + return workflow + + def create_workflow_trigger(self, workflow: Workflow, trigger_type: Trigger.TriggerType) -> Trigger: + from sebs.gcp.triggers import WorkflowLibraryTrigger + + if trigger_type == Trigger.TriggerType.HTTP: + raise NotImplementedError('Cannot create http triggers for workflows.') + else: + trigger = WorkflowLibraryTrigger(workflow.name, self) + + trigger.logging_handlers = self.logging_handlers + workflow.add_trigger(trigger) + # self.cache_client.update_workflow(workflow) + return trigger + @staticmethod def get_full_workflow_name(project_name: str, location: str, workflow_name: str): return f"projects/{project_name}/locations/{location}/workflows/{workflow_name}" @@ -596,70 +660,6 @@ def deployment_version(self, func: Function) -> int: @staticmethod def get_location(project_name: str, location: str) -> str: return f"projects/{project_name}/locations/{location}" - - def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "GCPWorkflow": - - package = code_package.code_location - benchmark = code_package.benchmark - language_runtime = code_package.language_version - timeout = code_package.benchmark_config.timeout - memory = code_package.benchmark_config.memory - code_bucket: Optional[str] = None - storage_client = self.get_storage() - location = self.config.region - project_name = self.config.project_name - - full_workflow_name = GCP.get_full_workflow_name(project_name, location, workflow_name) - get_req = self.workflow_client.projects().locations().workflows().get(name=full_workflow_name) - - with open('cache/test.yml') as f: - code = f.read() - - try: - get_result = get_req.execute() - except HttpError: - parent = GCP.get_location(project_name, location) - create_req = ( - self.workflow_client.projects() - .locations() - .workflows() - .create( - parent=parent, - workflowId=workflow_name, - body={ - "name": full_workflow_name, - "sourceContents": code, - }, - ) - ) - create_req.execute() - self.logging.info(f"Workflow {workflow_name} has been created!") - - workflow = GCPWorkflow( - workflow_name, benchmark, code_package.hash, timeout, memory, code_bucket - ) - else: - # if result is not empty, then function does exists - self.logging.info("Function {} exists on GCP, update the instance.".format(func_name)) - - workflow = GCPWorkflow( - name=workflow_name, - benchmark=benchmark, - code_package_hash=code_package.hash, - timeout=timeout, - memory=memory, - bucket=code_bucket, - ) - self.update_workflow(workflow, code_package) - - # Add LibraryTrigger to a new function - from sebs.gcp.triggers import WorkflowLibraryTrigger - - trigger = WorkflowLibraryTrigger(workflow_name, self) - trigger.logging_handlers = self.logging_handlers - workflow.add_trigger(trigger) - - return workflow # @abstractmethod # def get_invocation_error(self, function_name: str, diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 79918568..c1dddb09 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -5,8 +5,8 @@ import os from typing import Dict, Optional # noqa -from google.cloud.workflows import executions_v1beta as workflow_executions -from google.cloud.workflows.executions_v1beta.types import executions as workflow_exec_types +from google.cloud.workflows.executions_v1beta import ExecutionsClient +from google.cloud.workflows.executions_v1beta.types import Execution from sebs.gcp.gcp import GCP from sebs.faas.function import ExecutionResult, Trigger @@ -105,10 +105,11 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: config = self.deployment_client.config full_workflow_name = GCP.get_full_workflow_name(config.project_name, config.region, self.name) - execution_client = workflow_executions.ExecutionsClient() + execution_client = ExecutionsClient() + execution = Execution(argument=json.dumps(payload)) begin = datetime.datetime.now() - res = execution_client.create_execution(request={"parent": full_workflow_name}) + res = execution_client.create_execution(parent=full_workflow_name, execution=execution) end = datetime.datetime.now() gcp_result = ExecutionResult.from_times(begin, end) @@ -118,20 +119,19 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: backoff_delay = 1 # Start wait with delay of 1 second while (not execution_finished): execution = execution_client.get_execution(request={"name": res.name}) - execution_finished = execution.state != workflow_exec_types.Execution.State.ACTIVE + execution_finished = execution.state != Execution.State.ACTIVE # If we haven't seen the result yet, wait a second. if not execution_finished: time.sleep(backoff_delay) backoff_delay *= 2 # Double the delay to provide exponential backoff. - else: + elif execution.state == Execution.State.FAILED: self.logging.error(f"Invocation of {self.name} failed") self.logging.error(f"Input: {payload}") gcp_result.stats.failure = True return gcp_result - output = json.loads(execution.result) - gcp_result.parse_benchmark_output(output) + gcp_result.parse_benchmark_execution(execution) return gcp_result From 578b483b5cc91a3935869254a67fd83142384851 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Mon, 7 Mar 2022 13:41:20 +0100 Subject: [PATCH 05/68] Implement workflow updating --- sebs/gcp/gcp.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 50b97d53..b9008dad 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -403,7 +403,7 @@ def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "GCPWo ) else: # if result is not empty, then function does exists - self.logging.info("Function {} exists on GCP, update the instance.".format(func_name)) + self.logging.info("Workflow {} exists on GCP, update the instance.".format(workflow_name)) workflow = GCPWorkflow( name=workflow_name, @@ -437,6 +437,34 @@ def create_workflow_trigger(self, workflow: Workflow, trigger_type: Trigger.Trig # self.cache_client.update_workflow(workflow) return trigger + def update_workflow(self, workflow: Workflow, code_package: Benchmark): + + workflow = cast(GCPWorkflow, workflow) + language_runtime = code_package.language_version + code_package_name = os.path.basename(code_package.code_location) + storage = cast(GCPStorage, self.get_storage()) + + with open('cache/test.yml') as f: + code = f.read() + + full_workflow_name = GCP.get_full_workflow_name( + self.config.project_name, self.config.region, workflow.name + ) + req = ( + self.workflow_client.projects() + .locations() + .workflows() + .patch( + name=full_workflow_name, + body={ + "name": full_workflow_name, + "sourceContents": code + }, + ) + ) + res = req.execute() + self.logging.info("Published new workflow code and configuration.") + @staticmethod def get_full_workflow_name(project_name: str, location: str, workflow_name: str): return f"projects/{project_name}/locations/{location}/workflows/{workflow_name}" From 42a6c5026dbe1f82baa45fd290b76dccb097489f Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Mon, 7 Mar 2022 14:27:47 +0100 Subject: [PATCH 06/68] Add fixme comment --- sebs/faas/system.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sebs/faas/system.py b/sebs/faas/system.py index 6f9e7712..f908a596 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -223,6 +223,7 @@ def get_workflow(self, code_package: Benchmark, workflow_name: Optional[str] = N # workflow_name = self.default_function_name(code_package) rebuilt, _ = code_package.build(self.package_code) + # FIXME: cache workflows return self.create_workflow(code_package, workflow_name) From 7fe0ef259e14a91ce63869016bd6f218485b8038 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Wed, 9 Mar 2022 11:31:03 +0100 Subject: [PATCH 07/68] Extend aws role policies --- sebs/aws/config.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sebs/aws/config.py b/sebs/aws/config.py index 849f40aa..250378ad 100644 --- a/sebs/aws/config.py +++ b/sebs/aws/config.py @@ -119,8 +119,14 @@ def lambda_role(self, boto3_session: boto3.session.Session) -> str: "Version": "2012-10-17", "Statement": [ { + "Sid": "", "Effect": "Allow", - "Principal": {"Service": "lambda.amazonaws.com"}, + "Principal": { + "Service": [ + "lambda.amazonaws.com", + "states.amazonaws.com" + ] + }, "Action": "sts:AssumeRole", } ], @@ -129,6 +135,7 @@ def lambda_role(self, boto3_session: boto3.session.Session) -> str: attached_policies = [ "arn:aws:iam::aws:policy/AmazonS3FullAccess", "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole", + "arn:aws:iam::aws:policy/service-role/AWSLambdaRole" ] try: out = iam_client.get_role(RoleName=role_name) From 0514130dba454f69ab6f340d2f2e4f2ef3bca035 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Wed, 9 Mar 2022 12:30:39 +0100 Subject: [PATCH 08/68] Basic aws workflow execution --- sebs.py | 1 + sebs/aws/aws.py | 211 ++++++++++++++++++++++++++++++++---------- sebs/aws/triggers.py | 58 ++++++++++-- sebs/aws/workflow.py | 58 ++++++++++++ sebs/benchmark.py | 29 +++++- sebs/faas/system.py | 2 +- sebs/faas/workflow.py | 2 +- 7 files changed, 299 insertions(+), 62 deletions(-) create mode 100644 sebs/aws/workflow.py diff --git a/sebs.py b/sebs.py index e726ffaa..8808c38d 100755 --- a/sebs.py +++ b/sebs.py @@ -220,6 +220,7 @@ def function(benchmark, benchmark_input_size, repetitions, trigger, function_nam ) else: trigger = triggers[0] + for i in range(repetitions): sebs_client.logging.info(f"Beginning repetition {i+1}/{repetitions}") ret = trigger.sync_invoke(input_config) diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 055c4779..02377578 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -1,7 +1,9 @@ import math import os +import re import shutil import time +import json import uuid from typing import cast, Dict, List, Optional, Tuple, Type, Union # noqa @@ -10,6 +12,7 @@ from sebs.aws.s3 import S3 from sebs.aws.function import LambdaFunction +from sebs.aws.workflow import SFNWorkflow from sebs.aws.config import AWSConfig from sebs.utils import execute from sebs.benchmark import Benchmark @@ -17,6 +20,7 @@ from sebs.config import SeBSConfig from sebs.utils import LoggingHandlers from sebs.faas.function import Function, ExecutionResult, Trigger +from sebs.faas.workflow import Workflow from sebs.faas.storage import PersistentStorage from sebs.faas.system import System @@ -68,15 +72,24 @@ def initialize(self, config: Dict[str, str] = {}): aws_secret_access_key=self.config.credentials.secret_key, ) self.get_lambda_client() + self.get_sfn_client() self.get_storage() def get_lambda_client(self): - if not hasattr(self, "client"): - self.client = self.session.client( + if not hasattr(self, "lambda_client"): + self.lambda_client = self.session.client( service_name="lambda", region_name=self.config.region, ) - return self.client + return self.lambda_client + + def get_sfn_client(self): + if not hasattr(self, "stepfunctions_client"): + self.sfn_client = self.session.client( + service_name="stepfunctions", + region_name=self.config.region, + ) + return self.sfn_client """ Create a client instance for cloud storage. When benchmark and buckets @@ -149,7 +162,7 @@ def package_code(self, directory: str, language_name: str, benchmark: str) -> Tu return os.path.join(directory, "{}.zip".format(benchmark)), bytes_size - def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFunction": + def create_function(self, code_package: Benchmark, func_name: str, handler: str=None) -> "LambdaFunction": package = code_package.code_location benchmark = code_package.benchmark @@ -159,13 +172,13 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun memory = code_package.benchmark_config.memory code_size = code_package.code_size code_bucket: Optional[str] = None - func_name = AWS.format_function_name(func_name) + func_name = AWS.format_resource_name(func_name) storage_client = self.get_storage() # we can either check for exception or use list_functions # there's no API for test try: - ret = self.client.get_function(FunctionName=func_name) + ret = self.lambda_client.get_function(FunctionName=func_name) self.logging.info( "Function {} exists on AWS, retrieve configuration.".format(func_name) ) @@ -180,10 +193,10 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun language_runtime, self.config.resources.lambda_role(self.session), ) - self.update_function(lambda_function, code_package) + # self.update_function(lambda_function, code_package) lambda_function.updated_code = True # TODO: get configuration of REST API - except self.client.exceptions.ResourceNotFoundException: + except self.lambda_client.exceptions.ResourceNotFoundException: self.logging.info("Creating function {} from {}".format(func_name, package)) # AWS Lambda limit on zip deployment size @@ -201,10 +214,10 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun storage_client.upload(code_bucket, package, code_package_name) self.logging.info("Uploading function {} code to {}".format(func_name, code_bucket)) code_config = {"S3Bucket": code_bucket, "S3Key": code_package_name} - ret = self.client.create_function( + ret = self.lambda_client.create_function( FunctionName=func_name, Runtime="{}{}".format(language, language_runtime), - Handler="handler.handler", + Handler=handler if handler else "handler.handler", Role=self.config.resources.lambda_role(self.session), MemorySize=memory, Timeout=timeout, @@ -225,9 +238,9 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun ) # Add LibraryTrigger to a new function - from sebs.aws.triggers import LibraryTrigger + from sebs.aws.triggers import FunctionLibraryTrigger - trigger = LibraryTrigger(func_name, self) + trigger = FunctionLibraryTrigger(func_name, self) trigger.logging_handlers = self.logging_handlers lambda_function.add_trigger(trigger) @@ -264,21 +277,153 @@ def update_function(self, function: Function, code_package: Benchmark): # AWS Lambda limit on zip deployment if code_size < 50 * 1024 * 1024: with open(package, "rb") as code_body: - self.client.update_function_code(FunctionName=name, ZipFile=code_body.read()) + self.lambda_client.update_function_code(FunctionName=name, ZipFile=code_body.read()) # Upload code package to S3, then update else: code_package_name = os.path.basename(package) storage = cast(S3, self.get_storage()) bucket = function.code_bucket(code_package.benchmark, storage) storage.upload(bucket, package, code_package_name) - self.client.update_function_code( + self.lambda_client.update_function_code( FunctionName=name, S3Bucket=bucket, S3Key=code_package_name ) # and update config - self.client.update_function_configuration( + self.lambda_client.update_function_configuration( FunctionName=name, Timeout=function.timeout, MemorySize=function.memory ) self.logging.info("Published new function code") + + def create_function_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: + from sebs.aws.triggers import HTTPTrigger + + function = cast(LambdaFunction, func) + + if trigger_type == Trigger.TriggerType.HTTP: + + api_name = "{}-http-api".format(function.name) + http_api = self.config.resources.http_api(api_name, function, self.session) + # https://aws.amazon.com/blogs/compute/announcing-http-apis-for-amazon-api-gateway/ + # but this is wrong - source arn must be {api-arn}/*/* + self.get_lambda_client().add_permission( + FunctionName=function.name, + StatementId=str(uuid.uuid1()), + Action="lambda:InvokeFunction", + Principal="apigateway.amazonaws.com", + SourceArn=f"{http_api.arn}/*/*", + ) + trigger = HTTPTrigger(http_api.endpoint, api_name) + trigger.logging_handlers = self.logging_handlers + elif trigger_type == Trigger.TriggerType.LIBRARY: + # should already exist + return func.triggers(Trigger.TriggerType.LIBRARY)[0] + else: + raise RuntimeError("Not supported!") + + function.add_trigger(trigger) + self.cache_client.update_function(function) + return trigger + + def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "LambdaWorkflow": + + workflow_name = AWS.format_resource_name(workflow_name) + + # Make sure we have a valid workflow benchmark + definition = code_package.workflow_definition + if not code_package.workflow_definition: + raise ValueError(f"No workflow definition found for {workflow_name}") + + # First we create a lambda function for each code file + code_files = list(code_package.get_code_files(include_config=False)) + func_names = [os.path.splitext(os.path.basename(p))[0] for p in code_files] + # func_names = [fn for fn in func_names if '__init__' != fn] + funcs = [self.create_function(code_package, workflow_name+"-"+fn, handler="function."+fn+".handler") for fn in func_names] + + # Set the ARN to the corresponding states in the workflow definition + for name, func in zip(func_names, funcs): + try: + definition["States"][name]["Resource"] = func.arn + except KeyError: + raise ValueError(f"Workflow definition for {workflow_name} missing state {func.name}") + + package = code_package.code_location + + + # We cannot retrieve the state machine because we don't know its ARN + # so we just create it and catch any errors + try: + ret = self.sfn_client.create_state_machine( + name=workflow_name, + definition=json.dumps(definition), + roleArn=self.config.resources.lambda_role(self.session), + ) + + self.logging.info("Creating workflow {} from {}".format(workflow_name, package)) + + workflow = SFNWorkflow( + workflow_name, + funcs, + code_package.benchmark, + ret["stateMachineArn"], + code_package.hash, + self.config.resources.lambda_role(self.session), + ) + except self.sfn_client.exceptions.StateMachineAlreadyExists as e: + arn = re.search("'([^']*)'", str(e)).group()[1:-1] + + self.logging.info( + "Workflow {} exists on AWS, retrieve configuration.".format(workflow_name) + ) + + # Here we assume a single Lambda role + workflow = SFNWorkflow( + workflow_name, + funcs, + code_package.benchmark, + arn, + code_package.hash, + self.config.resources.lambda_role(self.session), + ) + + self.update_workflow(workflow, definition, code_package) + workflow.updated_code = True + + # Add LibraryTrigger to a new function + from sebs.aws.triggers import WorkflowLibraryTrigger + + trigger = WorkflowLibraryTrigger(workflow.arn, self) + trigger.logging_handlers = self.logging_handlers + workflow.add_trigger(trigger) + + return workflow + + def update_workflow(self, workflow: Workflow, definition: str, code_package: Benchmark): + + workflow = cast(SFNWorkflow, workflow) + + # and update config + self.sfn_client.update_state_machine( + stateMachineArn=workflow.arn, + definition=json.dumps(definition), + roleArn=self.config.resources.lambda_role(self.session), + ) + self.logging.info("Published new workflow code") + + def create_workflow_trigger(self, workflow: Workflow, trigger_type: Trigger.TriggerType) -> Trigger: + from sebs.aws.triggers import HTTPTrigger + + workflow = cast(LambdaWorkflow, workflow) + + if trigger_type == Trigger.TriggerType.HTTP: + raise RuntimeError("Not supported!") + elif trigger_type == Trigger.TriggerType.LIBRARY: + # should already exist + return func.triggers(Trigger.TriggerType.LIBRARY)[0] + else: + raise RuntimeError("Not supported!") + + workflow.add_trigger(trigger) + self.cache_client.update_function(function) + return trigger @staticmethod def default_function_name(code_package: Benchmark) -> str: @@ -288,10 +433,10 @@ def default_function_name(code_package: Benchmark) -> str: code_package.language_name, code_package.benchmark_config.memory, ) - return AWS.format_function_name(func_name) + return AWS.format_resource_name(func_name) @staticmethod - def format_function_name(func_name: str) -> str: + def format_resource_name(func_name: str) -> str: # AWS Lambda does not allow hyphens in function names func_name = func_name.replace("-", "_") func_name = func_name.replace(".", "_") @@ -304,7 +449,7 @@ def format_function_name(func_name: str) -> str: def delete_function(self, func_name: Optional[str]): self.logging.debug("Deleting function {}".format(func_name)) try: - self.client.delete_function(FunctionName=func_name) + self.lambda_client.delete_function(FunctionName=func_name) except Exception: self.logging.debug("Function {} does not exist!".format(func_name)) @@ -447,36 +592,6 @@ def download_metrics( f"out of {results_count} invocations" ) - def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: - from sebs.aws.triggers import HTTPTrigger - - function = cast(LambdaFunction, func) - - if trigger_type == Trigger.TriggerType.HTTP: - - api_name = "{}-http-api".format(function.name) - http_api = self.config.resources.http_api(api_name, function, self.session) - # https://aws.amazon.com/blogs/compute/announcing-http-apis-for-amazon-api-gateway/ - # but this is wrong - source arn must be {api-arn}/*/* - self.get_lambda_client().add_permission( - FunctionName=function.name, - StatementId=str(uuid.uuid1()), - Action="lambda:InvokeFunction", - Principal="apigateway.amazonaws.com", - SourceArn=f"{http_api.arn}/*/*", - ) - trigger = HTTPTrigger(http_api.endpoint, api_name) - trigger.logging_handlers = self.logging_handlers - elif trigger_type == Trigger.TriggerType.LIBRARY: - # should already exist - return func.triggers(Trigger.TriggerType.LIBRARY)[0] - else: - raise RuntimeError("Not supported!") - - function.add_trigger(trigger) - self.cache_client.update_function(function) - return trigger - def _enforce_cold_start(self, function: Function): func = cast(LambdaFunction, function) self.get_lambda_client().update_function_configuration( diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index f1831459..a85479ba 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -2,6 +2,7 @@ import concurrent.futures import datetime import json +import time from typing import Dict, Optional # noqa from sebs.aws.aws import AWS @@ -31,12 +32,21 @@ def deployment_client(self, deployment_client: AWS): def trigger_type() -> Trigger.TriggerType: return Trigger.TriggerType.LIBRARY + def serialize(self) -> dict: + return {"type": "Library", "name": self.name} + + @staticmethod + def deserialize(obj: dict) -> Trigger: + return LibraryTrigger(obj["name"]) + + +class FunctionLibraryTrigger(LibraryTrigger): def sync_invoke(self, payload: dict) -> ExecutionResult: - self.logging.debug(f"Invoke function {self.name}") + self.logging.debug(f"Invoke workflow {self.name}") serialized_payload = json.dumps(payload).encode("utf-8") - client = self.deployment_client.get_lambda_client() + client = self.deployment_client.get_sfn_client() begin = datetime.datetime.now() ret = client.invoke(FunctionName=self.name, Payload=serialized_payload, LogType="Tail") end = datetime.datetime.now() @@ -84,14 +94,46 @@ def async_invoke(self, payload: dict): raise RuntimeError() return ret - def serialize(self) -> dict: - return {"type": "Library", "name": self.name} - @staticmethod - def deserialize(obj: dict) -> Trigger: - return LibraryTrigger(obj["name"]) +class WorkflowLibraryTrigger(LibraryTrigger): + def sync_invoke(self, payload: dict) -> ExecutionResult: + + self.logging.debug(f"Invoke workflow {self.name}") + + client = self.deployment_client.get_sfn_client() + begin = datetime.datetime.now() + ret = client.start_execution(stateMachineArn=self.name, input=json.dumps(payload)) + end = datetime.datetime.now() + + aws_result = ExecutionResult.from_times(begin, end) + aws_result.request_id = ret["ResponseMetadata"]["RequestId"] + execution_arn = ret["executionArn"] + + # Wait for execution to finish, then print results. + execution_finished = False + backoff_delay = 1 # Start wait with delay of 1 second + while (not execution_finished): + execution = client.describe_execution(executionArn=execution_arn) + status = execution["status"] + execution_finished = status != "RUNNING" + + # If we haven't seen the result yet, wait a second. + if not execution_finished: + time.sleep(backoff_delay) + backoff_delay *= 2 # Double the delay to provide exponential backoff. + elif status == "FAILED": + self.logging.error(f"Invocation of {self.name} failed") + self.logging.error(f"Input: {payload}") + aws_result.stats.failure = True + return aws_result + return aws_result + + def async_invoke(self, payload: dict): + raise NotImplementedError('Async invocation is not implemented') + + class HTTPTrigger(Trigger): def __init__(self, url: str, api_id: str): super().__init__() @@ -122,4 +164,4 @@ def serialize(self) -> dict: @staticmethod def deserialize(obj: dict) -> Trigger: - return HTTPTrigger(obj["url"], obj["api-id"]) + return HTTPTrigger(obj["url"], obj["api-id"]) \ No newline at end of file diff --git a/sebs/aws/workflow.py b/sebs/aws/workflow.py new file mode 100644 index 00000000..1cb27aa9 --- /dev/null +++ b/sebs/aws/workflow.py @@ -0,0 +1,58 @@ +from typing import cast, Optional, List + +from sebs.aws.s3 import S3 +from sebs.aws.function import LambdaFunction +from sebs.faas.workflow import Workflow + + +class SFNWorkflow(Workflow): + def __init__( + self, + name: str, + functions: List[LambdaFunction], + benchmark: str, + arn: str, + code_package_hash: str, + role: str + ): + super().__init__(benchmark, name, code_package_hash) + self.functions = functions + self.arn = arn + self.role = role + + @staticmethod + def typename() -> str: + return "AWS.SFNWorkflow" + + def serialize(self) -> dict: + return { + **super().serialize(), + "functions": self.functions, + "arn": self.arn, + "role": self.role + } + + @staticmethod + def deserialize(cached_config: dict) -> "SFNWorkflow": + from sebs.faas.function import Trigger + from sebs.aws.triggers import LibraryTrigger, HTTPTrigger + + ret = LambdaWorkflow( + cached_config["name"], + cached_config["functions"], + cached_config["hash"], + cached_config["arn"], + cached_config["role"] + ) + for trigger in cached_config["triggers"]: + trigger_type = cast( + Trigger, + {"Library": LibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), + ) + assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) + ret.add_trigger(trigger_type.deserialize(trigger)) + return ret + + def code_bucket(self, benchmark: str, storage_client: S3): + self.bucket, idx = storage_client.add_input_bucket(benchmark) + return self.bucket diff --git a/sebs/benchmark.py b/sebs/benchmark.py index a631f2d8..6b6bd5d8 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -162,6 +162,13 @@ def __init__( self._benchmark_config: BenchmarkConfig = BenchmarkConfig.deserialize( json.load(json_file) ) + definition_path = os.path.join(self._benchmark_path, "definition.json") + if os.path.exists(definition_path): + with open(definition_path) as json_file: + self.workflow_definition = json.load(json_file) + else: + self.workflow_definition = None + if self.language not in self.benchmark_config.languages: raise RuntimeError( "Benchmark {} not available for language {}".format(self.benchmark, self.language) @@ -233,15 +240,29 @@ def query_cache(self): self._is_cached = False self._is_cached_valid = False - def copy_code(self, output_dir): + def get_code_files(self, include_config=True): FILES = { - "python": ["*.py", "requirements.txt*"], - "nodejs": ["*.js", "package.json"], + "python": ["*.py"], + "nodejs": ["*.js"], } + if include_config: + FILES["python"].append("requirements.txt*") + FILES["nodejs"].append("package.json") + path = os.path.join(self.benchmark_path, self.language_name) for file_type in FILES[self.language_name]: for f in glob.glob(os.path.join(path, file_type)): - shutil.copy2(os.path.join(path, f), output_dir) + yield os.path.join(path, f) + + def copy_code(self, output_dir): + for path in self.get_code_files(): + shutil.copy2(path, output_dir) + + # For python, add an __init__ file + if self.language_name == "python": + path = os.path.join(output_dir, "__init__.py") + with open(path, 'a'): + os.utime(path, None) def add_benchmark_data(self, output_dir): cmd = "/bin/bash {benchmark_path}/init.sh {output_dir} false" diff --git a/sebs/faas/system.py b/sebs/faas/system.py index f908a596..8c7586cb 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -113,7 +113,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> Function: pass @abstractmethod - def create_workflow(self, code_package: Benchmark, workflow_name: str): + def create_workflow(self, code_package: Benchmark, workflow_name: str) -> Workflow: pass @abstractmethod diff --git a/sebs/faas/workflow.py b/sebs/faas/workflow.py index 3aa02cba..6c404475 100644 --- a/sebs/faas/workflow.py +++ b/sebs/faas/workflow.py @@ -76,5 +76,5 @@ def serialize(self) -> dict: @staticmethod @abstractmethod - def deserialize(cached_config: dict) -> "Function": + def deserialize(cached_config: dict) -> "Workflow": pass From 98ca73fb05c3b41285ed030975c5e4b19692ca9c Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Wed, 9 Mar 2022 13:42:15 +0100 Subject: [PATCH 09/68] Fix trigger deserialization --- sebs/aws/function.py | 4 ++-- sebs/aws/triggers.py | 6 +++--- sebs/aws/workflow.py | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sebs/aws/function.py b/sebs/aws/function.py index 36b52c27..28468f9d 100644 --- a/sebs/aws/function.py +++ b/sebs/aws/function.py @@ -43,7 +43,7 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "LambdaFunction": from sebs.faas.function import Trigger - from sebs.aws.triggers import LibraryTrigger, HTTPTrigger + from sebs.aws.triggers import FunctionLibraryTrigger, HTTPTrigger ret = LambdaFunction( cached_config["name"], @@ -59,7 +59,7 @@ def deserialize(cached_config: dict) -> "LambdaFunction": for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": LibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), + {"Library": FunctionLibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index a85479ba..a80b18e0 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -35,9 +35,9 @@ def trigger_type() -> Trigger.TriggerType: def serialize(self) -> dict: return {"type": "Library", "name": self.name} - @staticmethod - def deserialize(obj: dict) -> Trigger: - return LibraryTrigger(obj["name"]) + @classmethod + def deserialize(cls, obj: dict) -> Trigger: + return cls(obj["name"]) class FunctionLibraryTrigger(LibraryTrigger): diff --git a/sebs/aws/workflow.py b/sebs/aws/workflow.py index 1cb27aa9..032f3a39 100644 --- a/sebs/aws/workflow.py +++ b/sebs/aws/workflow.py @@ -35,7 +35,7 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "SFNWorkflow": from sebs.faas.function import Trigger - from sebs.aws.triggers import LibraryTrigger, HTTPTrigger + from sebs.aws.triggers import WorkflowLibraryTrigger, HTTPTrigger ret = LambdaWorkflow( cached_config["name"], @@ -47,7 +47,7 @@ def deserialize(cached_config: dict) -> "SFNWorkflow": for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": LibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), + {"Library": WorkflowLibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) From fe4d4731e57b5af4669f4d010d9cf7ca385de1e1 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Wed, 9 Mar 2022 13:42:58 +0100 Subject: [PATCH 10/68] Fix gcp trigger deserialization --- sebs/gcp/triggers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index c1dddb09..06e18bc6 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -41,9 +41,9 @@ def async_invoke(self, payload: dict): def serialize(self) -> dict: return {"type": "Library", "name": self.name} - @staticmethod - def deserialize(obj: dict) -> Trigger: - return LibraryTrigger(obj["name"]) + @classmethod + def deserialize(cls, obj: dict) -> Trigger: + return cls(obj["name"]) class FunctionLibraryTrigger(LibraryTrigger): From a15dff010daa8499f2007ac10f34e5356cbaa9e9 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Wed, 9 Mar 2022 13:45:53 +0100 Subject: [PATCH 11/68] Make Trigger.deserialize a classmethod --- sebs/azure/triggers.py | 4 ++-- sebs/faas/function.py | 4 ++-- sebs/gcp/triggers.py | 4 ++-- sebs/local/function.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index 66be8c6d..f746385f 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -42,6 +42,6 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: def serialize(self) -> dict: return {"type": "HTTP", "url": self.url} - @staticmethod - def deserialize(obj: dict) -> Trigger: + @classmethod + def deserialize(cls, obj: dict) -> Trigger: return HTTPTrigger(obj["url"]) diff --git a/sebs/faas/function.py b/sebs/faas/function.py index 37ea596f..4766b899 100644 --- a/sebs/faas/function.py +++ b/sebs/faas/function.py @@ -247,9 +247,9 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: def serialize(self) -> dict: pass - @staticmethod + @classmethod @abstractmethod - def deserialize(cached_config: dict) -> "Trigger": + def deserialize(cls, cached_config: dict) -> "Trigger": pass diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 06e18bc6..139547f6 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -161,6 +161,6 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: def serialize(self) -> dict: return {"type": "HTTP", "url": self.url} - @staticmethod - def deserialize(obj: dict) -> Trigger: + @classmethod + def deserialize(cls, obj: dict) -> Trigger: return HTTPTrigger(obj["url"]) diff --git a/sebs/local/function.py b/sebs/local/function.py index 8bf408be..cb6e19b5 100644 --- a/sebs/local/function.py +++ b/sebs/local/function.py @@ -30,8 +30,8 @@ def async_invoke(self, payload: dict) -> concurrent.futures.Future: def serialize(self) -> dict: return {"type": "HTTP", "url": self.url} - @staticmethod - def deserialize(obj: dict) -> Trigger: + @classmethod + def deserialize(cls, obj: dict) -> Trigger: return HTTPTrigger(obj["url"]) From e7e2bfd4c74289569ecddbc951d24029022ef8d3 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Fri, 11 Mar 2022 14:53:05 +0100 Subject: [PATCH 12/68] Run azure cmds as docker_user --- sebs/azure/cli.py | 2 +- tools/create_azure_credentials.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sebs/azure/cli.py b/sebs/azure/cli.py index 5a69a843..f98226e4 100644 --- a/sebs/azure/cli.py +++ b/sebs/azure/cli.py @@ -41,7 +41,7 @@ def __init__(self, system_config: SeBSConfig, docker_client: docker.client): """ def execute(self, cmd: str): - exit_code, out = self.docker_instance.exec_run(cmd) + exit_code, out = self.docker_instance.exec_run(cmd, user="docker_user") if exit_code != 0: raise RuntimeError( "Command {} failed at Azure CLI docker!\n Output {}".format( diff --git a/tools/create_azure_credentials.py b/tools/create_azure_credentials.py index 88f4db07..50260745 100755 --- a/tools/create_azure_credentials.py +++ b/tools/create_azure_credentials.py @@ -40,14 +40,14 @@ ) print('Please provide the intended principal name') principal_name = input() -_, out = container.exec_run("az login", stream=True) +_, out = container.exec_run("az login", user="docker_user", stream=True) print('Please follow the login instructions to generate credentials...') print(next(out).decode()) # wait for login finish ret = next(out) ret_json = json.loads(ret.decode()) print('Loggin succesfull with user {}'.format(ret_json[0]['user'])) -status, out = container.exec_run("az ad sp create-for-rbac --name {} --only-show-errors".format(principal_name)) +status, out = container.exec_run("az ad sp create-for-rbac --name {} --only-show-errors".format(principal_name), user="docker_user") if status: print('Unsuccesfull principal creation!') print(out.decode()) From 9b7323b09adcae8c7d1952ade3a24ca6976d974d Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Mon, 14 Mar 2022 13:01:55 +0100 Subject: [PATCH 13/68] Add __init__ for aws only --- sebs/aws/aws.py | 6 ++++++ sebs/benchmark.py | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 02377578..31accd5c 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -149,6 +149,12 @@ def package_code(self, directory: str, language_name: str, benchmark: str) -> Tu if file not in package_config: file = os.path.join(directory, file) shutil.move(file, function_dir) + + # For python, add an __init__ file + if language_name == "python": + path = os.path.join(function_dir, "__init__.py") + with open(path, "a"): + os.utime(path, None) # FIXME: use zipfile # create zip with hidden directory but without parent directory diff --git a/sebs/benchmark.py b/sebs/benchmark.py index 6b6bd5d8..43292ad4 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -257,12 +257,6 @@ def get_code_files(self, include_config=True): def copy_code(self, output_dir): for path in self.get_code_files(): shutil.copy2(path, output_dir) - - # For python, add an __init__ file - if self.language_name == "python": - path = os.path.join(output_dir, "__init__.py") - with open(path, 'a'): - os.utime(path, None) def add_benchmark_data(self, output_dir): cmd = "/bin/bash {benchmark_path}/init.sh {output_dir} false" From 0b95a111cc4138f9aeaf3031e0c5caf5b260fe7c Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Thu, 17 Mar 2022 09:21:17 +0100 Subject: [PATCH 14/68] Minor bug fixes and clean ups --- sebs.py | 10 +++++----- sebs/aws/aws.py | 1 - sebs/benchmark.py | 2 ++ sebs/faas/function.py | 6 +++--- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/sebs.py b/sebs.py index 8808c38d..91844385 100755 --- a/sebs.py +++ b/sebs.py @@ -246,17 +246,17 @@ def function(benchmark, benchmark_input_size, repetitions, trigger, function_nam @click.option( "--trigger", type=click.Choice(["library", "http"]), - default="library", + default="http", help="Workflow trigger to be used." ) @click.option( - "--function-name", + "--workflow-name", default=None, type=str, - help="Override function name for random generation.", + help="Override workflow name for random generation.", ) @common_params -def workflow(benchmark, benchmark_input_size, repetitions, trigger, function_name, **kwargs): +def workflow(benchmark, benchmark_input_size, repetitions, trigger, workflow_name, **kwargs): ( config, @@ -274,7 +274,7 @@ def workflow(benchmark, benchmark_input_size, repetitions, trigger, function_nam logging_filename=logging_filename, ) workflow = deployment_client.get_workflow( - benchmark_obj, 'test' #function_name if function_name else deployment_client.default_function_name(benchmark_obj) + benchmark_obj, workflow_name if workflow_name else deployment_client.default_function_name(benchmark_obj) ) storage = deployment_client.get_storage( replace_existing=experiment_config.update_storage diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 31accd5c..45bce484 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -341,7 +341,6 @@ def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "Lambd # First we create a lambda function for each code file code_files = list(code_package.get_code_files(include_config=False)) func_names = [os.path.splitext(os.path.basename(p))[0] for p in code_files] - # func_names = [fn for fn in func_names if '__init__' != fn] funcs = [self.create_function(code_package, workflow_name+"-"+fn, handler="function."+fn+".handler") for fn in func_names] # Set the ARN to the corresponding states in the workflow definition diff --git a/sebs/benchmark.py b/sebs/benchmark.py index 43292ad4..ec7905c2 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -294,6 +294,8 @@ def add_deployment_package_python(self, output_dir): ) if len(packages): with open(os.path.join(output_dir, "requirements.txt"), "a") as out: + # make sure to start with a newline + out.write("\n") for package in packages: out.write(package) diff --git a/sebs/faas/function.py b/sebs/faas/function.py index 4766b899..c7302781 100644 --- a/sebs/faas/function.py +++ b/sebs/faas/function.py @@ -210,9 +210,9 @@ def _http_invoke(self, payload: dict, url: str) -> ExecutionResult: try: output = json.loads(data.getvalue()) - + if status_code != 200: - self.logging.error("Invocation on URL {} failed!".format(url)) + self.logging.error("Invocation on URL {} failed with status code {}!".format(url, status_code)) self.logging.error("Output: {}".format(output)) raise RuntimeError(f"Failed invocation of function! Output: {output}") @@ -225,7 +225,7 @@ def _http_invoke(self, payload: dict, url: str) -> ExecutionResult: result.parse_benchmark_output(output) return result except json.decoder.JSONDecodeError: - self.logging.error("Invocation on URL {} failed!".format(url)) + self.logging.error("Invocation on URL {} failed with status code {}!".format(url, status_code)) self.logging.error("Output: {}".format(data.getvalue().decode())) raise RuntimeError(f"Failed invocation of function! Output: {data.getvalue().decode()}") From a2d7a8128fe340b888c3ca7eeb632f779d73dc19 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Thu, 17 Mar 2022 09:22:05 +0100 Subject: [PATCH 15/68] Basic azure workflows --- benchmarks/wrappers/azure/python/handler.py | 10 +- sebs/azure/azure.py | 184 ++++++++++++++++++-- sebs/azure/workflow.py | 36 ++++ sebs/benchmark.py | 4 +- 4 files changed, 218 insertions(+), 16 deletions(-) create mode 100644 sebs/azure/workflow.py diff --git a/benchmarks/wrappers/azure/python/handler.py b/benchmarks/wrappers/azure/python/handler.py index 5f7f14f2..422bfa1d 100644 --- a/benchmarks/wrappers/azure/python/handler.py +++ b/benchmarks/wrappers/azure/python/handler.py @@ -6,7 +6,7 @@ # TODO: usual trigger # implement support for blob and others -def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: +def main(req: func.HttpRequest, starter: str, context: func.Context) -> func.HttpResponse: income_timestamp = datetime.datetime.now().timestamp() req_json = req.get_json() if 'connection_string' in req_json: @@ -16,7 +16,13 @@ def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: begin = datetime.datetime.now() # We are deployed in the same directory from . import function - ret = function.handler(req_json) + kwargs = { + 'event': req_json, + 'starter': starter, + 'context': context + } + kwargs = {k:v for (k,v) in kwargs.items() if k in function.handler.__code__.co_varnames} + ret = function.handler(**kwargs) end = datetime.datetime.now() log_data = { diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index a12289e4..4423283e 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -1,5 +1,6 @@ import datetime import json +import glob import os import shutil import time @@ -10,6 +11,7 @@ from sebs.azure.blob_storage import BlobStorage from sebs.azure.cli import AzureCLI from sebs.azure.function import AzureFunction +from sebs.azure.workflow import AzureWorkflow from sebs.azure.config import AzureConfig, AzureResources from sebs.azure.triggers import AzureTrigger, HTTPTrigger from sebs.faas.function import Trigger @@ -18,6 +20,7 @@ from sebs.config import SeBSConfig from sebs.utils import LoggingHandlers, execute from ..faas.function import Function, ExecutionResult +from ..faas.workflow import Workflow from ..faas.storage import PersistentStorage from ..faas.system import System @@ -106,15 +109,7 @@ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: self.storage.replace_existing = replace_existing return self.storage - # Directory structure - # handler - # - source files - # - Azure wrappers - handler, storage - # - additional resources - # - function.json - # host.json - # requirements.txt/package.json - def package_code(self, directory: str, language_name: str, benchmark: str) -> Tuple[str, int]: + def package_code2(self, directory: str, language_name: str, benchmark: str) -> Tuple[str, int]: # In previous step we ran a Docker container which installed packages # Python packages are in .python_packages because this is expected by Azure @@ -164,6 +159,90 @@ def package_code(self, directory: str, language_name: str, benchmark: str) -> Tu code_size = Benchmark.directory_size(directory) execute("zip -qu -r9 {}.zip * .".format(benchmark), shell=True, cwd=directory) return directory, code_size + + # Directory structure + # handler + # - source files + # - Azure wrappers - handler, storage + # - additional resources + # - function.json + # host.json + # requirements.txt/package.json + def package_code(self, directory: str, language_name: str, benchmark: str) -> Tuple[str, int]: + + # In previous step we ran a Docker container which installed packages + # Python packages are in .python_packages because this is expected by Azure + FILES = {"python": "*.py", "nodejs": "*.js"} + CONFIG_FILES = { + "python": ["requirements.txt", ".python_packages"], + "nodejs": ["package.json", "node_modules"], + } + SUPPORTING_FILES = ["function", "storage"] + WRAPPER_FILES = ["handler"] + SUPPORTING_FILES + file_type = FILES[language_name] + package_config = CONFIG_FILES[language_name] + + # TODO: extension to other triggers than HTTP + default_function_json = { + "bindings": [ + { + "authLevel": "function", + "type": "httpTrigger", + "direction": "in", + "name": "req", + "methods": ["get", "post"], + }, + {"name": "starter", "type": "durableClient", "direction": "in"}, + {"name": "$return", "type": "http", "direction": "out"}, + ], + } + + for file_path in glob.glob(os.path.join(directory, file_type)): + file = os.path.basename(file_path) + + if file in package_config: + continue + + # move file directory/f.py to directory/f/f.py + name, ext = os.path.splitext(file) + if name in WRAPPER_FILES: + func_dir = os.path.join(directory, "handler") + else: + func_dir = os.path.join(directory, name) + + dst_file = os.path.join(func_dir, file) + src_file = os.path.join(directory, file) + if not os.path.exists(func_dir): + os.makedirs(func_dir) + shutil.move(src_file, dst_file) + + # generate function.json if none provided + # we don't do this for supporting files + if name in SUPPORTING_FILES: + continue + + src_json = os.path.join(directory, name+".json") + dst_json = os.path.join(os.path.dirname(dst_file), "function.json") + + if os.path.exists(src_json): + shutil.move(src_json, dst_json) + else: + default_function_json["scriptFile"] = file + json.dump(default_function_json, open(dst_json, "w"), indent=2) + + # generate host.json + default_host_json = { + "version": "2.0", + "extensionBundle": { + "id": "Microsoft.Azure.Functions.ExtensionBundle", + "version": "[2.*, 3.0.0)" + }, + } + json.dump(default_host_json, open(os.path.join(directory, "host.json"), "w"), indent=2) + + code_size = Benchmark.directory_size(directory) + execute("zip -qu -r9 {}.zip * .".format(benchmark), shell=True, cwd=directory) + return directory, code_size def publish_function( self, @@ -340,6 +419,85 @@ def cached_function(self, function: Function): azure_trigger = cast(AzureTrigger, trigger) azure_trigger.logging_handlers = self.logging_handlers azure_trigger.data_storage_account = data_storage_account + + def create_workflow(self, code_package: Benchmark, workflow_name: str) -> AzureFunction: + + language = code_package.language_name + language_runtime = code_package.language_version + resource_group = self.config.resources.resource_group(self.cli_instance) + region = self.config.region + + config = { + "resource_group": resource_group, + "workflow_name": workflow_name, + "region": region, + "runtime": self.AZURE_RUNTIMES[language], + "runtime_version": language_runtime, + } + + # check if function does not exist + # no API to verify existence + try: + ret = self.cli_instance.execute( + ( + " az functionapp config appsettings list " + " --resource-group {resource_group} " + " --name {workflow_name} " + ).format(**config) + ) + for setting in json.loads(ret.decode()): + if setting["name"] == "AzureWebJobsStorage": + connection_string = setting["value"] + elems = [z for y in connection_string.split(";") for z in y.split("=")] + account_name = elems[elems.index("AccountName") + 1] + function_storage_account = AzureResources.Storage.from_cache( + account_name, connection_string + ) + self.logging.info("Azure: Selected {} function app".format(workflow_name)) + except RuntimeError: + function_storage_account = self.config.resources.add_storage_account(self.cli_instance) + config["storage_account"] = function_storage_account.account_name + + # FIXME: only Linux type is supported + while True: + try: + # create function app + self.cli_instance.execute( + ( + " az functionapp create --resource-group {resource_group} " + " --os-type Linux --consumption-plan-location {region} " + " --runtime {runtime} --runtime-version {runtime_version} " + " --name {workflow_name} --storage-account {storage_account}" + ).format(**config) + ) + self.logging.info("Azure: Created workflow app {}".format(workflow_name)) + break + except RuntimeError as e: + # Azure does not allow some concurrent operations + if "another operation is in progress" in str(e): + self.logging.info( + f"Repeat {workflow_name} creation, another operation in progress" + ) + # Rethrow -> another error + else: + raise + workflow = AzureWorkflow( + name=workflow_name, + benchmark=code_package.benchmark, + code_hash=code_package.hash, + function_storage=function_storage_account, + ) + + # update existing function app + self.update_function(workflow, code_package) + + # self.cache_client.add_function( + # deployment_name=self.name(), + # language_name=language, + # code_package=code_package, + # function=function, + # ) + return workflow """ Prepare Azure resources to store experiment results. @@ -456,10 +614,12 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) The only implemented trigger at the moment is HTTPTrigger. It is automatically created for each function. """ - - def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + + def create_function_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + raise NotImplementedError() + + def create_workflow_trigger(self, workflow: Workflow, trigger_type: Trigger.TriggerType) -> Trigger: raise NotImplementedError() - # # def create_azure_function(self, fname, config): diff --git a/sebs/azure/workflow.py b/sebs/azure/workflow.py new file mode 100644 index 00000000..0a568325 --- /dev/null +++ b/sebs/azure/workflow.py @@ -0,0 +1,36 @@ +from sebs.azure.config import AzureResources +from sebs.faas.workflow import Workflow + + +class AzureWorkflow(Workflow): + def __init__( + self, + name: str, + benchmark: str, + code_hash: str, + function_storage: AzureResources.Storage, + ): + super().__init__(benchmark, name, code_hash) + self.function_storage = function_storage + + def serialize(self) -> dict: + return { + **super().serialize(), + "function_storage": self.function_storage.serialize(), + } + + @staticmethod + def deserialize(cached_config: dict) -> Workflow: + ret = AzureWorkflow( + cached_config["name"], + cached_config["benchmark"], + cached_config["hash"], + AzureResources.Storage.deserialize(cached_config["function_storage"]), + ) + from sebs.azure.triggers import HTTPTrigger + + for trigger in cached_config["triggers"]: + trigger_type = {"HTTP": HTTPTrigger}.get(trigger["type"]) + assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) + ret.add_trigger(trigger_type.deserialize(trigger)) + return ret diff --git a/sebs/benchmark.py b/sebs/benchmark.py index ec7905c2..c6c49342 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -246,8 +246,8 @@ def get_code_files(self, include_config=True): "nodejs": ["*.js"], } if include_config: - FILES["python"].append("requirements.txt*") - FILES["nodejs"].append("package.json") + FILES["python"] += ["requirements.txt*", "*.json"] + FILES["nodejs"] += ["package.json", "*.json"] path = os.path.join(self.benchmark_path, self.language_name) for file_type in FILES[self.language_name]: From a336c5532b8896095b4043ea84d47b158e81515e Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Thu, 17 Mar 2022 09:32:19 +0100 Subject: [PATCH 16/68] Cleanup benchmark interface --- sebs/aws/aws.py | 11 +++++++---- sebs/benchmark.py | 6 ------ 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 45bce484..5dfaaa2c 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -329,13 +329,16 @@ def create_function_trigger(self, func: Function, trigger_type: Trigger.TriggerT self.cache_client.update_function(function) return trigger - def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "LambdaWorkflow": + def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "SFNWorkflow": workflow_name = AWS.format_resource_name(workflow_name) # Make sure we have a valid workflow benchmark - definition = code_package.workflow_definition - if not code_package.workflow_definition: + definition_path = os.path.join(code_package.benchmark_path, "definition.json") + if os.path.exists(definition_path): + with open(definition_path) as json_file: + definition = json.load(json_file) + else: raise ValueError(f"No workflow definition found for {workflow_name}") # First we create a lambda function for each code file @@ -416,7 +419,7 @@ def update_workflow(self, workflow: Workflow, definition: str, code_package: Ben def create_workflow_trigger(self, workflow: Workflow, trigger_type: Trigger.TriggerType) -> Trigger: from sebs.aws.triggers import HTTPTrigger - workflow = cast(LambdaWorkflow, workflow) + workflow = cast(SFNWorkflow, workflow) if trigger_type == Trigger.TriggerType.HTTP: raise RuntimeError("Not supported!") diff --git a/sebs/benchmark.py b/sebs/benchmark.py index c6c49342..139c8ed9 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -162,12 +162,6 @@ def __init__( self._benchmark_config: BenchmarkConfig = BenchmarkConfig.deserialize( json.load(json_file) ) - definition_path = os.path.join(self._benchmark_path, "definition.json") - if os.path.exists(definition_path): - with open(definition_path) as json_file: - self.workflow_definition = json.load(json_file) - else: - self.workflow_definition = None if self.language not in self.benchmark_config.languages: raise RuntimeError( From 714bed744dd03d3cb0cfb94ea63613e86da44a1a Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Thu, 17 Mar 2022 14:53:37 +0100 Subject: [PATCH 17/68] Make sure to update aws functions --- sebs/aws/aws.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 5dfaaa2c..44049e45 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -199,7 +199,7 @@ def create_function(self, code_package: Benchmark, func_name: str, handler: str= language_runtime, self.config.resources.lambda_role(self.session), ) - # self.update_function(lambda_function, code_package) + self.update_function(lambda_function, code_package) lambda_function.updated_code = True # TODO: get configuration of REST API except self.lambda_client.exceptions.ResourceNotFoundException: From 8a361fd7c1993e2293dec561c4c6a56038256ff7 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Thu, 17 Mar 2022 17:06:49 +0100 Subject: [PATCH 18/68] Wait for aws function before triggering --- sebs/aws/aws.py | 21 ++++++++++++++++++++- sebs/aws/triggers.py | 6 ++++-- sebs/faas/system.py | 4 ++-- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 44049e45..03a11b39 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -168,6 +168,26 @@ def package_code(self, directory: str, language_name: str, benchmark: str) -> Tu return os.path.join(directory, "{}.zip".format(benchmark)), bytes_size + def wait_for_function(self, func_name: str): + active = False + backoff_delay = 1 # Start wait with delay of 1 second + while (not active): + ret = self.lambda_client.get_function(FunctionName=func_name) + status = ret["Configuration"]["State"] + active = status == "Active" + + # If we haven't seen the result yet, wait a second. + if not active: + time.sleep(backoff_delay) + backoff_delay *= 2 # Double the delay to provide exponential backoff. + elif status == "Failed": + self.logging.error(f"Cannot wait for failed {func_name}") + break + + if backoff_delay > 60: + self.logging.error(f"Function {func_name} stuck in state {status} after 60s") + break + def create_function(self, code_package: Benchmark, func_name: str, handler: str=None) -> "LambdaFunction": package = code_package.code_location @@ -305,7 +325,6 @@ def create_function_trigger(self, func: Function, trigger_type: Trigger.TriggerT function = cast(LambdaFunction, func) if trigger_type == Trigger.TriggerType.HTTP: - api_name = "{}-http-api".format(function.name) http_api = self.config.resources.http_api(api_name, function, self.session) # https://aws.amazon.com/blogs/compute/announcing-http-apis-for-amazon-api-gateway/ diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index a80b18e0..275c2dd4 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -43,10 +43,12 @@ def deserialize(cls, obj: dict) -> Trigger: class FunctionLibraryTrigger(LibraryTrigger): def sync_invoke(self, payload: dict) -> ExecutionResult: - self.logging.debug(f"Invoke workflow {self.name}") + self.logging.debug(f"Invoke function {self.name}") + + self.deployment_client.wait_for_function(self.name) serialized_payload = json.dumps(payload).encode("utf-8") - client = self.deployment_client.get_sfn_client() + client = self.deployment_client.get_lambda_client() begin = datetime.datetime.now() ret = client.invoke(FunctionName=self.name, Payload=serialized_payload, LogType="Tail") end = datetime.datetime.now() diff --git a/sebs/faas/system.py b/sebs/faas/system.py index 8c7586cb..33b578b4 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -301,9 +301,9 @@ def download_metrics( def create_trigger(self, obj, trigger_type: Trigger.TriggerType) -> Trigger: if isinstance(obj, Function): - self.create_function_trigger(obj, trigger_type) + return self.create_function_trigger(obj, trigger_type) elif isinstance(obj, Workflow): - self.create_workflow_trigger(obj, trigger_type) + return self.create_workflow_trigger(obj, trigger_type) else: raise TypeError("Cannot create trigger for {obj}") From faaa2c2a492a43a0d89ed68b3ac79d9d0adc95d0 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Fri, 18 Mar 2022 10:12:02 +0100 Subject: [PATCH 19/68] Improve function code update --- sebs/aws/aws.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 03a11b39..90931271 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -169,18 +169,19 @@ def package_code(self, directory: str, language_name: str, benchmark: str) -> Tu return os.path.join(directory, "{}.zip".format(benchmark)), bytes_size def wait_for_function(self, func_name: str): - active = False + ready = False backoff_delay = 1 # Start wait with delay of 1 second - while (not active): + while (not ready): ret = self.lambda_client.get_function(FunctionName=func_name) - status = ret["Configuration"]["State"] - active = status == "Active" + state = ret["Configuration"]["State"] + update_status = ret["Configuration"]["LastUpdateStatus"] + ready = (state == "Active") and (update_status == "Successful") # If we haven't seen the result yet, wait a second. - if not active: + if not ready: time.sleep(backoff_delay) backoff_delay *= 2 # Double the delay to provide exponential backoff. - elif status == "Failed": + elif "Failed" in (state, update_status): self.logging.error(f"Cannot wait for failed {func_name}") break @@ -299,6 +300,7 @@ def update_function(self, function: Function, code_package: Benchmark): name = function.name code_size = code_package.code_size package = code_package.code_location + # Run AWS update # AWS Lambda limit on zip deployment if code_size < 50 * 1024 * 1024: @@ -313,6 +315,10 @@ def update_function(self, function: Function, code_package: Benchmark): self.lambda_client.update_function_code( FunctionName=name, S3Bucket=bucket, S3Key=code_package_name ) + + # Wait for code update to finish before updating config + self.wait_for_function(name) + # and update config self.lambda_client.update_function_configuration( FunctionName=name, Timeout=function.timeout, MemorySize=function.memory From 1d4c3461dbea08b11f4fbe3c965bdd2604ddc527 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Tue, 22 Mar 2022 11:11:45 +0100 Subject: [PATCH 20/68] New aws role/policies --- sebs/aws/config.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sebs/aws/config.py b/sebs/aws/config.py index 250378ad..c930f666 100644 --- a/sebs/aws/config.py +++ b/sebs/aws/config.py @@ -131,11 +131,13 @@ def lambda_role(self, boto3_session: boto3.session.Session) -> str: } ], } - role_name = "sebs-lambda-role" + role_name = "sebs-role" attached_policies = [ "arn:aws:iam::aws:policy/AmazonS3FullAccess", + "arn:aws:iam::aws:policy/CloudWatchLogsFullAccess", "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole", - "arn:aws:iam::aws:policy/service-role/AWSLambdaRole" + "arn:aws:iam::aws:policy/service-role/AWSLambdaRole", + "arn:aws:iam::aws:policy/AWSXRayDaemonWriteAccess" ] try: out = iam_client.get_role(RoleName=role_name) From e11748c1e4b5fdc6b2cb3315241b11aa00224904 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Tue, 22 Mar 2022 13:12:13 +0100 Subject: [PATCH 21/68] Cleanup --- sebs/aws/aws.py | 128 +++++++++++++++++++++++------------------ sebs/aws/config.py | 44 +++++++++----- sebs/aws/triggers.py | 40 +++++++------ sebs/aws/workflow.py | 4 +- sebs/azure/azure.py | 112 ++++++++++++++++++++++-------------- sebs/benchmark.py | 4 +- sebs/faas/function.py | 22 ++++--- sebs/faas/system.py | 18 +++--- sebs/faas/workflow.py | 7 +-- sebs/gcp/gcp.py | 129 ++++++++++++++++++++++++------------------ sebs/gcp/triggers.py | 31 +++++----- 11 files changed, 315 insertions(+), 224 deletions(-) diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 90931271..56100d32 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -82,7 +82,7 @@ def get_lambda_client(self): region_name=self.config.region, ) return self.lambda_client - + def get_sfn_client(self): if not hasattr(self, "stepfunctions_client"): self.sfn_client = self.session.client( @@ -149,7 +149,7 @@ def package_code(self, directory: str, language_name: str, benchmark: str) -> Tu if file not in package_config: file = os.path.join(directory, file) shutil.move(file, function_dir) - + # For python, add an __init__ file if language_name == "python": path = os.path.join(function_dir, "__init__.py") @@ -158,11 +158,13 @@ def package_code(self, directory: str, language_name: str, benchmark: str) -> Tu # FIXME: use zipfile # create zip with hidden directory but without parent directory - execute("zip -qu -r9 {}.zip * .".format(benchmark), shell=True, cwd=directory) + execute("zip -qu -r9 {}.zip * .".format(benchmark), + shell=True, cwd=directory) benchmark_archive = "{}.zip".format(os.path.join(directory, benchmark)) self.logging.info("Created {} archive".format(benchmark_archive)) - bytes_size = os.path.getsize(os.path.join(directory, benchmark_archive)) + bytes_size = os.path.getsize( + os.path.join(directory, benchmark_archive)) mbytes = bytes_size / 1024.0 / 1024.0 self.logging.info("Zip archive size {:2f} MB".format(mbytes)) @@ -173,23 +175,25 @@ def wait_for_function(self, func_name: str): backoff_delay = 1 # Start wait with delay of 1 second while (not ready): ret = self.lambda_client.get_function(FunctionName=func_name) - state = ret["Configuration"]["State"] + state = ret["Configuration"]["State"] update_status = ret["Configuration"]["LastUpdateStatus"] ready = (state == "Active") and (update_status == "Successful") - + # If we haven't seen the result yet, wait a second. if not ready: time.sleep(backoff_delay) - backoff_delay *= 2 # Double the delay to provide exponential backoff. + # Double the delay to provide exponential backoff. + backoff_delay *= 2 elif "Failed" in (state, update_status): self.logging.error(f"Cannot wait for failed {func_name}") break - + if backoff_delay > 60: - self.logging.error(f"Function {func_name} stuck in state {status} after 60s") + self.logging.error( + f"Function {func_name} stuck in state {state} after 60s") break - def create_function(self, code_package: Benchmark, func_name: str, handler: str=None) -> "LambdaFunction": + def create_function(self, code_package: Benchmark, func_name: str, handler: str = None) -> "LambdaFunction": package = code_package.code_location benchmark = code_package.benchmark @@ -207,7 +211,8 @@ def create_function(self, code_package: Benchmark, func_name: str, handler: str= try: ret = self.lambda_client.get_function(FunctionName=func_name) self.logging.info( - "Function {} exists on AWS, retrieve configuration.".format(func_name) + "Function {} exists on AWS, retrieve configuration.".format( + func_name) ) # Here we assume a single Lambda role lambda_function = LambdaFunction( @@ -224,7 +229,8 @@ def create_function(self, code_package: Benchmark, func_name: str, handler: str= lambda_function.updated_code = True # TODO: get configuration of REST API except self.lambda_client.exceptions.ResourceNotFoundException: - self.logging.info("Creating function {} from {}".format(func_name, package)) + self.logging.info( + "Creating function {} from {}".format(func_name, package)) # AWS Lambda limit on zip deployment size # Limit to 50 MB @@ -239,8 +245,10 @@ def create_function(self, code_package: Benchmark, func_name: str, handler: str= code_package_name = cast(str, os.path.basename(package)) code_bucket, idx = storage_client.add_input_bucket(benchmark) storage_client.upload(code_bucket, package, code_package_name) - self.logging.info("Uploading function {} code to {}".format(func_name, code_bucket)) - code_config = {"S3Bucket": code_bucket, "S3Key": code_package_name} + self.logging.info( + "Uploading function {} code to {}".format(func_name, code_bucket)) + code_config = {"S3Bucket": code_bucket, + "S3Key": code_package_name} ret = self.lambda_client.create_function( FunctionName=func_name, Runtime="{}{}".format(language, language_runtime), @@ -300,12 +308,13 @@ def update_function(self, function: Function, code_package: Benchmark): name = function.name code_size = code_package.code_size package = code_package.code_location - + # Run AWS update # AWS Lambda limit on zip deployment if code_size < 50 * 1024 * 1024: with open(package, "rb") as code_body: - self.lambda_client.update_function_code(FunctionName=name, ZipFile=code_body.read()) + self.lambda_client.update_function_code( + FunctionName=name, ZipFile=code_body.read()) # Upload code package to S3, then update else: code_package_name = os.path.basename(package) @@ -315,16 +324,16 @@ def update_function(self, function: Function, code_package: Benchmark): self.lambda_client.update_function_code( FunctionName=name, S3Bucket=bucket, S3Key=code_package_name ) - + # Wait for code update to finish before updating config self.wait_for_function(name) - + # and update config self.lambda_client.update_function_configuration( FunctionName=name, Timeout=function.timeout, MemorySize=function.memory ) self.logging.info("Published new function code") - + def create_function_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: from sebs.aws.triggers import HTTPTrigger @@ -332,7 +341,8 @@ def create_function_trigger(self, func: Function, trigger_type: Trigger.TriggerT if trigger_type == Trigger.TriggerType.HTTP: api_name = "{}-http-api".format(function.name) - http_api = self.config.resources.http_api(api_name, function, self.session) + http_api = self.config.resources.http_api( + api_name, function, self.session) # https://aws.amazon.com/blogs/compute/announcing-http-apis-for-amazon-api-gateway/ # but this is wrong - source arn must be {api-arn}/*/* self.get_lambda_client().add_permission( @@ -353,34 +363,38 @@ def create_function_trigger(self, func: Function, trigger_type: Trigger.TriggerT function.add_trigger(trigger) self.cache_client.update_function(function) return trigger - + def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "SFNWorkflow": - + workflow_name = AWS.format_resource_name(workflow_name) - + # Make sure we have a valid workflow benchmark - definition_path = os.path.join(code_package.benchmark_path, "definition.json") + definition_path = os.path.join( + code_package.benchmark_path, "definition.json") if os.path.exists(definition_path): with open(definition_path) as json_file: definition = json.load(json_file) else: - raise ValueError(f"No workflow definition found for {workflow_name}") - + raise ValueError( + f"No workflow definition found for {workflow_name}") + # First we create a lambda function for each code file code_files = list(code_package.get_code_files(include_config=False)) - func_names = [os.path.splitext(os.path.basename(p))[0] for p in code_files] - funcs = [self.create_function(code_package, workflow_name+"-"+fn, handler="function."+fn+".handler") for fn in func_names] - + func_names = [os.path.splitext(os.path.basename(p))[ + 0] for p in code_files] + funcs = [self.create_function( + code_package, workflow_name+"-"+fn, handler="function."+fn+".handler") for fn in func_names] + # Set the ARN to the corresponding states in the workflow definition for name, func in zip(func_names, funcs): try: definition["States"][name]["Resource"] = func.arn except KeyError: - raise ValueError(f"Workflow definition for {workflow_name} missing state {func.name}") - + raise ValueError( + f"Workflow definition for {workflow_name} missing state {func.name}") + package = code_package.code_location - - + # We cannot retrieve the state machine because we don't know its ARN # so we just create it and catch any errors try: @@ -389,9 +403,10 @@ def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "SFNWo definition=json.dumps(definition), roleArn=self.config.resources.lambda_role(self.session), ) - - self.logging.info("Creating workflow {} from {}".format(workflow_name, package)) - + + self.logging.info( + "Creating workflow {} from {}".format(workflow_name, package)) + workflow = SFNWorkflow( workflow_name, funcs, @@ -402,11 +417,12 @@ def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "SFNWo ) except self.sfn_client.exceptions.StateMachineAlreadyExists as e: arn = re.search("'([^']*)'", str(e)).group()[1:-1] - + self.logging.info( - "Workflow {} exists on AWS, retrieve configuration.".format(workflow_name) + "Workflow {} exists on AWS, retrieve configuration.".format( + workflow_name) ) - + # Here we assume a single Lambda role workflow = SFNWorkflow( workflow_name, @@ -416,7 +432,7 @@ def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "SFNWo code_package.hash, self.config.resources.lambda_role(self.session), ) - + self.update_workflow(workflow, definition, code_package) workflow.updated_code = True @@ -428,36 +444,30 @@ def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "SFNWo workflow.add_trigger(trigger) return workflow - + def update_workflow(self, workflow: Workflow, definition: str, code_package: Benchmark): workflow = cast(SFNWorkflow, workflow) # and update config self.sfn_client.update_state_machine( - stateMachineArn=workflow.arn, - definition=json.dumps(definition), + stateMachineArn=workflow.arn, + definition=json.dumps(definition), roleArn=self.config.resources.lambda_role(self.session), ) self.logging.info("Published new workflow code") def create_workflow_trigger(self, workflow: Workflow, trigger_type: Trigger.TriggerType) -> Trigger: - from sebs.aws.triggers import HTTPTrigger - workflow = cast(SFNWorkflow, workflow) if trigger_type == Trigger.TriggerType.HTTP: raise RuntimeError("Not supported!") elif trigger_type == Trigger.TriggerType.LIBRARY: # should already exist - return func.triggers(Trigger.TriggerType.LIBRARY)[0] + return workflow.triggers(Trigger.TriggerType.LIBRARY)[0] else: raise RuntimeError("Not supported!") - workflow.add_trigger(trigger) - self.cache_client.update_function(function) - return trigger - @staticmethod def default_function_name(code_package: Benchmark) -> str: # Create function name @@ -527,10 +537,12 @@ def parse_aws_report( return request_id output = requests[request_id] output.request_id = request_id - output.provider_times.execution = int(float(aws_vals["Duration"]) * 1000) + output.provider_times.execution = int( + float(aws_vals["Duration"]) * 1000) output.stats.memory_used = float(aws_vals["Max Memory Used"]) if "Init Duration" in aws_vals: - output.provider_times.initialization = int(float(aws_vals["Init Duration"]) * 1000) + output.provider_times.initialization = int( + float(aws_vals["Init Duration"]) * 1000) output.billing.billed_time = int(aws_vals["Billed Duration"]) output.billing.memory = int(aws_vals["Memory Size"]) output.billing.gb_seconds = output.billing.billed_time * output.billing.memory @@ -564,12 +576,14 @@ def get_invocation_error(self, function_name: str, start_time: int, end_time: in time.sleep(5) response = self.logs_client.get_query_results(queryId=query_id) if len(response["results"]) == 0: - self.logging.info("AWS logs are not yet available, repeat after 15s...") + self.logging.info( + "AWS logs are not yet available, repeat after 15s...") time.sleep(15) response = None else: break - self.logging.error(f"Invocation error for AWS Lambda function {function_name}") + self.logging.error( + f"Invocation error for AWS Lambda function {function_name}") for message in response["results"]: for value in message: if value["field"] == "@message": @@ -616,7 +630,8 @@ def download_metrics( for val in results: for result_part in val: if result_part["field"] == "@message": - request_id = AWS.parse_aws_report(result_part["value"], requests) + request_id = AWS.parse_aws_report( + result_part["value"], requests) if request_id in requests: results_processed += 1 requests_ids.remove(request_id) @@ -631,7 +646,8 @@ def _enforce_cold_start(self, function: Function): FunctionName=func.name, Timeout=func.timeout, MemorySize=func.memory, - Environment={"Variables": {"ForceColdStart": str(self.cold_start_counter)}}, + Environment={"Variables": { + "ForceColdStart": str(self.cold_start_counter)}}, ) def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): diff --git a/sebs/aws/config.py b/sebs/aws/config.py index c930f666..13fa49e3 100644 --- a/sebs/aws/config.py +++ b/sebs/aws/config.py @@ -47,13 +47,15 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden ret: AWSCredentials # Load cached values if cached_config and "credentials" in cached_config: - ret = cast(AWSCredentials, AWSCredentials.initialize(cached_config["credentials"])) + ret = cast(AWSCredentials, AWSCredentials.initialize( + cached_config["credentials"])) ret.logging_handlers = handlers ret.logging.info("Using cached credentials for AWS") else: # Check for new config if "credentials" in config: - ret = cast(AWSCredentials, AWSCredentials.initialize(config["credentials"])) + ret = cast(AWSCredentials, AWSCredentials.initialize( + config["credentials"])) elif "AWS_ACCESS_KEY_ID" in os.environ: ret = AWSCredentials( os.environ["AWS_ACCESS_KEY_ID"], os.environ["AWS_SECRET_ACCESS_KEY"] @@ -64,13 +66,16 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden "up environmental variables AWS_ACCESS_KEY_ID and " "AWS_SECRET_ACCESS_KEY" ) - ret.logging.info("No cached credentials for AWS found, initialize!") + ret.logging.info( + "No cached credentials for AWS found, initialize!") ret.logging_handlers = handlers return ret def update_cache(self, cache: Cache): - cache.update_config(val=self.access_key, keys=["aws", "credentials", "access_key"]) - cache.update_config(val=self.secret_key, keys=["aws", "credentials", "secret_key"]) + cache.update_config(val=self.access_key, keys=[ + "aws", "credentials", "access_key"]) + cache.update_config(val=self.secret_key, keys=[ + "aws", "credentials", "secret_key"]) def serialize(self) -> dict: out = {"access_key": self.access_key, "secret_key": self.secret_key} @@ -123,7 +128,7 @@ def lambda_role(self, boto3_session: boto3.session.Session) -> str: "Effect": "Allow", "Principal": { "Service": [ - "lambda.amazonaws.com", + "lambda.amazonaws.com", "states.amazonaws.com" ] }, @@ -142,7 +147,8 @@ def lambda_role(self, boto3_session: boto3.session.Session) -> str: try: out = iam_client.get_role(RoleName=role_name) self._lambda_role = out["Role"]["Arn"] - self.logging.info(f"AWS: Selected {self._lambda_role} IAM role") + self.logging.info( + f"AWS: Selected {self._lambda_role} IAM role") except iam_client.exceptions.NoSuchEntityException: out = iam_client.create_role( RoleName=role_name, @@ -156,7 +162,8 @@ def lambda_role(self, boto3_session: boto3.session.Session) -> str: time.sleep(10) # Attach basic AWS Lambda and S3 policies. for policy in attached_policies: - iam_client.attach_role_policy(RoleName=role_name, PolicyArn=policy) + iam_client.attach_role_policy( + RoleName=role_name, PolicyArn=policy) return self._lambda_role def http_api( @@ -213,9 +220,11 @@ def serialize(self) -> dict: return out def update_cache(self, cache: Cache): - cache.update_config(val=self._lambda_role, keys=["aws", "resources", "lambda-role"]) + cache.update_config(val=self._lambda_role, keys=[ + "aws", "resources", "lambda-role"]) for name, api in self._http_apis.items(): - cache.update_config(val=api.serialize(), keys=["aws", "resources", "http-apis", name]) + cache.update_config(val=api.serialize(), keys=[ + "aws", "resources", "http-apis", name]) @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: @@ -224,15 +233,18 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour ret: AWSResources # Load cached values if cached_config and "resources" in cached_config: - ret = cast(AWSResources, AWSResources.initialize(cached_config["resources"])) + ret = cast(AWSResources, AWSResources.initialize( + cached_config["resources"])) ret.logging_handlers = handlers ret.logging.info("Using cached resources for AWS") else: # Check for new config if "resources" in config: - ret = cast(AWSResources, AWSResources.initialize(config["resources"])) + ret = cast(AWSResources, AWSResources.initialize( + config["resources"])) ret.logging_handlers = handlers - ret.logging.info("No cached resources for AWS found, using user configuration.") + ret.logging.info( + "No cached resources for AWS found, using user configuration.") else: ret = AWSResources(lambda_role="") ret.logging_handlers = handlers @@ -270,8 +282,10 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config cached_config = cache.get_config("aws") # FIXME: use future annotations (see sebs/faas/system) - credentials = cast(AWSCredentials, AWSCredentials.deserialize(config, cache, handlers)) - resources = cast(AWSResources, AWSResources.deserialize(config, cache, handlers)) + credentials = cast( + AWSCredentials, AWSCredentials.deserialize(config, cache, handlers)) + resources = cast(AWSResources, AWSResources.deserialize( + config, cache, handlers)) config_obj = AWSConfig(credentials, resources) config_obj.logging_handlers = handlers # Load cached values diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index 275c2dd4..4288903b 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -44,25 +44,28 @@ class FunctionLibraryTrigger(LibraryTrigger): def sync_invoke(self, payload: dict) -> ExecutionResult: self.logging.debug(f"Invoke function {self.name}") - + self.deployment_client.wait_for_function(self.name) serialized_payload = json.dumps(payload).encode("utf-8") client = self.deployment_client.get_lambda_client() begin = datetime.datetime.now() - ret = client.invoke(FunctionName=self.name, Payload=serialized_payload, LogType="Tail") + ret = client.invoke(FunctionName=self.name, + Payload=serialized_payload, LogType="Tail") end = datetime.datetime.now() aws_result = ExecutionResult.from_times(begin, end) aws_result.request_id = ret["ResponseMetadata"]["RequestId"] if ret["StatusCode"] != 200: self.logging.error("Invocation of {} failed!".format(self.name)) - self.logging.error("Input: {}".format(serialized_payload.decode("utf-8"))) + self.logging.error("Input: {}".format( + serialized_payload.decode("utf-8"))) aws_result.stats.failure = True return aws_result if "FunctionError" in ret: self.logging.error("Invocation of {} failed!".format(self.name)) - self.logging.error("Input: {}".format(serialized_payload.decode("utf-8"))) + self.logging.error("Input: {}".format( + serialized_payload.decode("utf-8"))) aws_result.stats.failure = True return aws_result self.logging.debug(f"Invoke of function {self.name} was successful") @@ -76,7 +79,8 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: if isinstance(function_output["body"], dict): aws_result.parse_benchmark_output(function_output["body"]) else: - aws_result.parse_benchmark_output(json.loads(function_output["body"])) + aws_result.parse_benchmark_output( + json.loads(function_output["body"])) return aws_result def async_invoke(self, payload: dict): @@ -91,8 +95,10 @@ def async_invoke(self, payload: dict): LogType="Tail", ) if ret["StatusCode"] != 202: - self.logging.error("Async invocation of {} failed!".format(self.name)) - self.logging.error("Input: {}".format(serialized_payload.decode("utf-8"))) + self.logging.error( + "Async invocation of {} failed!".format(self.name)) + self.logging.error("Input: {}".format( + serialized_payload.decode("utf-8"))) raise RuntimeError() return ret @@ -104,25 +110,27 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: client = self.deployment_client.get_sfn_client() begin = datetime.datetime.now() - ret = client.start_execution(stateMachineArn=self.name, input=json.dumps(payload)) + ret = client.start_execution( + stateMachineArn=self.name, input=json.dumps(payload)) end = datetime.datetime.now() - + aws_result = ExecutionResult.from_times(begin, end) aws_result.request_id = ret["ResponseMetadata"]["RequestId"] execution_arn = ret["executionArn"] - + # Wait for execution to finish, then print results. execution_finished = False backoff_delay = 1 # Start wait with delay of 1 second while (not execution_finished): execution = client.describe_execution(executionArn=execution_arn) - status = execution["status"] + status = execution["status"] execution_finished = status != "RUNNING" - + # If we haven't seen the result yet, wait a second. if not execution_finished: time.sleep(backoff_delay) - backoff_delay *= 2 # Double the delay to provide exponential backoff. + # Double the delay to provide exponential backoff. + backoff_delay *= 2 elif status == "FAILED": self.logging.error(f"Invocation of {self.name} failed") self.logging.error(f"Input: {payload}") @@ -134,8 +142,8 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: def async_invoke(self, payload: dict): raise NotImplementedError('Async invocation is not implemented') - - + + class HTTPTrigger(Trigger): def __init__(self, url: str, api_id: str): super().__init__() @@ -166,4 +174,4 @@ def serialize(self) -> dict: @staticmethod def deserialize(obj: dict) -> Trigger: - return HTTPTrigger(obj["url"], obj["api-id"]) \ No newline at end of file + return HTTPTrigger(obj["url"], obj["api-id"]) diff --git a/sebs/aws/workflow.py b/sebs/aws/workflow.py index 032f3a39..be3b1962 100644 --- a/sebs/aws/workflow.py +++ b/sebs/aws/workflow.py @@ -1,4 +1,4 @@ -from typing import cast, Optional, List +from typing import cast, List from sebs.aws.s3 import S3 from sebs.aws.function import LambdaFunction @@ -37,7 +37,7 @@ def deserialize(cached_config: dict) -> "SFNWorkflow": from sebs.faas.function import Trigger from sebs.aws.triggers import WorkflowLibraryTrigger, HTTPTrigger - ret = LambdaWorkflow( + ret = SFNWorkflow( cached_config["name"], cached_config["functions"], cached_config["hash"], diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index 4423283e..be0b9e98 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -101,7 +101,8 @@ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: self.storage = BlobStorage( self.config.region, self.cache_client, - self.config.resources.data_storage_account(self.cli_instance).connection_string, + self.config.resources.data_storage_account( + self.cli_instance).connection_string, replace_existing=replace_existing, ) self.storage.logging_handlers = self.logging_handlers @@ -154,12 +155,14 @@ def package_code2(self, directory: str, language_name: str, benchmark: str) -> T "version": "[1.*, 2.0.0)", }, } - json.dump(default_host_json, open(os.path.join(directory, "host.json"), "w"), indent=2) + json.dump(default_host_json, open( + os.path.join(directory, "host.json"), "w"), indent=2) code_size = Benchmark.directory_size(directory) - execute("zip -qu -r9 {}.zip * .".format(benchmark), shell=True, cwd=directory) + execute("zip -qu -r9 {}.zip * .".format(benchmark), + shell=True, cwd=directory) return directory, code_size - + # Directory structure # handler # - source files @@ -181,7 +184,7 @@ def package_code(self, directory: str, language_name: str, benchmark: str) -> Tu WRAPPER_FILES = ["handler"] + SUPPORTING_FILES file_type = FILES[language_name] package_config = CONFIG_FILES[language_name] - + # TODO: extension to other triggers than HTTP default_function_json = { "bindings": [ @@ -197,33 +200,33 @@ def package_code(self, directory: str, language_name: str, benchmark: str) -> Tu ], } - for file_path in glob.glob(os.path.join(directory, file_type)): + for file_path in glob.glob(os.path.join(directory, file_type)): file = os.path.basename(file_path) - + if file in package_config: continue - + # move file directory/f.py to directory/f/f.py name, ext = os.path.splitext(file) if name in WRAPPER_FILES: - func_dir = os.path.join(directory, "handler") + func_dir = os.path.join(directory, "handler") else: func_dir = os.path.join(directory, name) - + dst_file = os.path.join(func_dir, file) src_file = os.path.join(directory, file) if not os.path.exists(func_dir): os.makedirs(func_dir) shutil.move(src_file, dst_file) - + # generate function.json if none provided # we don't do this for supporting files if name in SUPPORTING_FILES: continue - + src_json = os.path.join(directory, name+".json") dst_json = os.path.join(os.path.dirname(dst_file), "function.json") - + if os.path.exists(src_json): shutil.move(src_json, dst_json) else: @@ -238,10 +241,12 @@ def package_code(self, directory: str, language_name: str, benchmark: str) -> Tu "version": "[2.*, 3.0.0)" }, } - json.dump(default_host_json, open(os.path.join(directory, "host.json"), "w"), indent=2) + json.dump(default_host_json, open( + os.path.join(directory, "host.json"), "w"), indent=2) code_size = Benchmark.directory_size(directory) - execute("zip -qu -r9 {}.zip * .".format(benchmark), shell=True, cwd=directory) + execute("zip -qu -r9 {}.zip * .".format(benchmark), + shell=True, cwd=directory) return directory, code_size def publish_function( @@ -252,7 +257,8 @@ def publish_function( ) -> str: success = False url = "" - self.logging.info("Attempting publish of function {}".format(function.name)) + self.logging.info( + "Attempting publish of function {}".format(function.name)) while not success: try: ret = self.cli_instance.execute( @@ -276,7 +282,8 @@ def publish_function( url = line.split("Invoke url:")[1].strip() break if url == "": - raise RuntimeError("Couldnt find URL in {}".format(ret.decode("utf-8"))) + raise RuntimeError( + "Couldnt find URL in {}".format(ret.decode("utf-8"))) success = True except RuntimeError as e: error = str(e) @@ -312,12 +319,14 @@ def update_function(self, function: Function, code_package: Benchmark): self._mount_function_code(code_package) url = self.publish_function(function, code_package, True) - trigger = HTTPTrigger(url, self.config.resources.data_storage_account(self.cli_instance)) + trigger = HTTPTrigger( + url, self.config.resources.data_storage_account(self.cli_instance)) trigger.logging_handlers = self.logging_handlers function.add_trigger(trigger) def _mount_function_code(self, code_package: Benchmark): - self.cli_instance.upload_package(code_package.code_location, "/mnt/function/") + self.cli_instance.upload_package( + code_package.code_location, "/mnt/function/") def default_function_name(self, code_package: Benchmark) -> str: """ @@ -338,7 +347,8 @@ def create_function(self, code_package: Benchmark, func_name: str) -> AzureFunct language = code_package.language_name language_runtime = code_package.language_version - resource_group = self.config.resources.resource_group(self.cli_instance) + resource_group = self.config.resources.resource_group( + self.cli_instance) region = self.config.region config = { @@ -362,14 +372,17 @@ def create_function(self, code_package: Benchmark, func_name: str) -> AzureFunct for setting in json.loads(ret.decode()): if setting["name"] == "AzureWebJobsStorage": connection_string = setting["value"] - elems = [z for y in connection_string.split(";") for z in y.split("=")] + elems = [z for y in connection_string.split( + ";") for z in y.split("=")] account_name = elems[elems.index("AccountName") + 1] function_storage_account = AzureResources.Storage.from_cache( account_name, connection_string ) - self.logging.info("Azure: Selected {} function app".format(func_name)) + self.logging.info( + "Azure: Selected {} function app".format(func_name)) except RuntimeError: - function_storage_account = self.config.resources.add_storage_account(self.cli_instance) + function_storage_account = self.config.resources.add_storage_account( + self.cli_instance) config["storage_account"] = function_storage_account.account_name # FIXME: only Linux type is supported while True: @@ -383,7 +396,8 @@ def create_function(self, code_package: Benchmark, func_name: str) -> AzureFunct " --name {func_name} --storage-account {storage_account}" ).format(**config) ) - self.logging.info("Azure: Created function app {}".format(func_name)) + self.logging.info( + "Azure: Created function app {}".format(func_name)) break except RuntimeError as e: # Azure does not allow some concurrent operations @@ -414,17 +428,19 @@ def create_function(self, code_package: Benchmark, func_name: str) -> AzureFunct def cached_function(self, function: Function): - data_storage_account = self.config.resources.data_storage_account(self.cli_instance) + data_storage_account = self.config.resources.data_storage_account( + self.cli_instance) for trigger in function.triggers_all(): azure_trigger = cast(AzureTrigger, trigger) azure_trigger.logging_handlers = self.logging_handlers azure_trigger.data_storage_account = data_storage_account - + def create_workflow(self, code_package: Benchmark, workflow_name: str) -> AzureFunction: language = code_package.language_name language_runtime = code_package.language_version - resource_group = self.config.resources.resource_group(self.cli_instance) + resource_group = self.config.resources.resource_group( + self.cli_instance) region = self.config.region config = { @@ -448,16 +464,19 @@ def create_workflow(self, code_package: Benchmark, workflow_name: str) -> AzureF for setting in json.loads(ret.decode()): if setting["name"] == "AzureWebJobsStorage": connection_string = setting["value"] - elems = [z for y in connection_string.split(";") for z in y.split("=")] + elems = [z for y in connection_string.split( + ";") for z in y.split("=")] account_name = elems[elems.index("AccountName") + 1] function_storage_account = AzureResources.Storage.from_cache( account_name, connection_string ) - self.logging.info("Azure: Selected {} function app".format(workflow_name)) + self.logging.info( + "Azure: Selected {} function app".format(workflow_name)) except RuntimeError: - function_storage_account = self.config.resources.add_storage_account(self.cli_instance) + function_storage_account = self.config.resources.add_storage_account( + self.cli_instance) config["storage_account"] = function_storage_account.account_name - + # FIXME: only Linux type is supported while True: try: @@ -470,7 +489,8 @@ def create_workflow(self, code_package: Benchmark, workflow_name: str) -> AzureF " --name {workflow_name} --storage-account {storage_account}" ).format(**config) ) - self.logging.info("Azure: Created workflow app {}".format(workflow_name)) + self.logging.info( + "Azure: Created workflow app {}".format(workflow_name)) break except RuntimeError as e: # Azure does not allow some concurrent operations @@ -508,7 +528,8 @@ def create_workflow(self, code_package: Benchmark, workflow_name: str) -> AzureF """ def prepare_experiment(self, benchmark: str): - logs_container = self.storage.add_output_bucket(benchmark, suffix="logs") + logs_container = self.storage.add_output_bucket( + benchmark, suffix="logs") return logs_container def download_metrics( @@ -520,7 +541,8 @@ def download_metrics( metrics: Dict[str, dict], ): - resource_group = self.config.resources.resource_group(self.cli_instance) + resource_group = self.config.resources.resource_group( + self.cli_instance) # Avoid warnings in the next step ret = self.cli_instance.execute( "az feature register --name AIWorkspacePreview " "--namespace microsoft.insights" @@ -539,7 +561,8 @@ def download_metrics( start_time_str = datetime.datetime.fromtimestamp(start_time).strftime( "%Y-%m-%d %H:%M:%S.%f" ) - end_time_str = datetime.datetime.fromtimestamp(end_time + 1).strftime("%Y-%m-%d %H:%M:%S") + end_time_str = datetime.datetime.fromtimestamp( + end_time + 1).strftime("%Y-%m-%d %H:%M:%S") from tzlocal import get_localzone timezone_str = datetime.datetime.now(get_localzone()).strftime("%z") @@ -578,21 +601,24 @@ def download_metrics( # duration = request[4] func_exec_time = request[-1] invocations_processed.add(invocation_id) - requests[invocation_id].provider_times.execution = int(float(func_exec_time) * 1000) + requests[invocation_id].provider_times.execution = int( + float(func_exec_time) * 1000) self.logging.info( f"Azure: Found time metrics for {len(invocations_processed)} " f"out of {len(requests.keys())} invocations." ) if len(invocations_processed) < len(requests.keys()): time.sleep(5) - self.logging.info(f"Missing the requests: {invocations_to_process - invocations_processed}") + self.logging.info( + f"Missing the requests: {invocations_to_process - invocations_processed}") # TODO: query performance counters for mem def _enforce_cold_start(self, function: Function, code_package: Benchmark): fname = function.name - resource_group = self.config.resources.resource_group(self.cli_instance) + resource_group = self.config.resources.resource_group( + self.cli_instance) self.cli_instance.execute( f"az functionapp config appsettings set --name {fname} " @@ -614,11 +640,13 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) The only implemented trigger at the moment is HTTPTrigger. It is automatically created for each function. """ - - def create_function_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + + def create_function_trigger(self, function: Function, + trigger_type: Trigger.TriggerType) -> Trigger: raise NotImplementedError() - - def create_workflow_trigger(self, workflow: Workflow, trigger_type: Trigger.TriggerType) -> Trigger: + + def create_workflow_trigger(self, workflow: Workflow, + trigger_type: Trigger.TriggerType) -> Trigger: raise NotImplementedError() # diff --git a/sebs/benchmark.py b/sebs/benchmark.py index 139c8ed9..4b415b05 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -162,7 +162,7 @@ def __init__( self._benchmark_config: BenchmarkConfig = BenchmarkConfig.deserialize( json.load(json_file) ) - + if self.language not in self.benchmark_config.languages: raise RuntimeError( "Benchmark {} not available for language {}".format(self.benchmark, self.language) @@ -242,7 +242,7 @@ def get_code_files(self, include_config=True): if include_config: FILES["python"] += ["requirements.txt*", "*.json"] FILES["nodejs"] += ["package.json", "*.json"] - + path = os.path.join(self.benchmark_path, self.language_name) for file_type in FILES[self.language_name]: for f in glob.glob(os.path.join(path, file_type)): diff --git a/sebs/faas/function.py b/sebs/faas/function.py index c7302781..46e9a5fc 100644 --- a/sebs/faas/function.py +++ b/sebs/faas/function.py @@ -135,7 +135,8 @@ def from_times(client_time_begin: datetime, client_time_end: datetime) -> "Execu ret = ExecutionResult() ret.times.client_begin = client_time_begin ret.times.client_end = client_time_end - ret.times.client = int((client_time_end - client_time_begin) / timedelta(microseconds=1)) + ret.times.client = int( + (client_time_end - client_time_begin) / timedelta(microseconds=1)) return ret def parse_benchmark_output(self, output: dict): @@ -148,7 +149,7 @@ def parse_benchmark_output(self, output: dict): ) / timedelta(microseconds=1) ) - + def parse_benchmark_execution(self, execution: Execution): self.output = json.loads(execution.result) self.times.benchmark = int( @@ -161,7 +162,8 @@ def deserialize(cached_config: dict) -> "ExecutionResult": ret = ExecutionResult() ret.times = ExecutionTimes.deserialize(cached_config["times"]) ret.billing = ExecutionBilling.deserialize(cached_config["billing"]) - ret.provider_times = ProviderTimes.deserialize(cached_config["provider_times"]) + ret.provider_times = ProviderTimes.deserialize( + cached_config["provider_times"]) ret.stats = ExecutionStats.deserialize(cached_config["stats"]) ret.request_id = cached_config["request_id"] ret.output = cached_config["output"] @@ -210,11 +212,13 @@ def _http_invoke(self, payload: dict, url: str) -> ExecutionResult: try: output = json.loads(data.getvalue()) - + if status_code != 200: - self.logging.error("Invocation on URL {} failed with status code {}!".format(url, status_code)) + self.logging.error( + "Invocation on URL {} failed with status code {}!".format(url, status_code)) self.logging.error("Output: {}".format(output)) - raise RuntimeError(f"Failed invocation of function! Output: {output}") + raise RuntimeError( + f"Failed invocation of function! Output: {output}") self.logging.debug("Invoke of function was successful") result = ExecutionResult.from_times(begin, end) @@ -225,9 +229,11 @@ def _http_invoke(self, payload: dict, url: str) -> ExecutionResult: result.parse_benchmark_output(output) return result except json.decoder.JSONDecodeError: - self.logging.error("Invocation on URL {} failed with status code {}!".format(url, status_code)) + self.logging.error( + "Invocation on URL {} failed with status code {}!".format(url, status_code)) self.logging.error("Output: {}".format(data.getvalue().decode())) - raise RuntimeError(f"Failed invocation of function! Output: {data.getvalue().decode()}") + raise RuntimeError( + f"Failed invocation of function! Output: {data.getvalue().decode()}") # FIXME: 3.7+, future annotations @staticmethod diff --git a/sebs/faas/system.py b/sebs/faas/system.py index 33b578b4..a3baf2e6 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -111,7 +111,7 @@ def package_code(self, directory: str, language_name: str, benchmark: str) -> Tu @abstractmethod def create_function(self, code_package: Benchmark, func_name: str) -> Function: pass - + @abstractmethod def create_workflow(self, code_package: Benchmark, workflow_name: str) -> Workflow: pass @@ -184,7 +184,8 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) function = self.function_type().deserialize(cached_function) self.cached_function(function) self.logging.info( - "Using cached function {fname} in {loc}".format(fname=func_name, loc=code_location) + "Using cached function {fname} in {loc}".format( + fname=func_name, loc=code_location) ) # is the function up-to-date? if function.code_package_hash != code_package.hash or rebuilt: @@ -205,8 +206,7 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) ) code_package.query_cache() return function - - + def get_workflow(self, code_package: Benchmark, workflow_name: Optional[str] = None): if code_package.language_version not in self.system_config.supported_language_versions( self.name(), code_package.language_name @@ -222,10 +222,9 @@ def get_workflow(self, code_package: Benchmark, workflow_name: Optional[str] = N # if not workflow_name: # workflow_name = self.default_function_name(code_package) rebuilt, _ = code_package.build(self.package_code) - + # FIXME: cache workflows return self.create_workflow(code_package, workflow_name) - """ There's no function with that name? @@ -258,7 +257,8 @@ def get_workflow(self, code_package: Benchmark, workflow_name: Optional[str] = N function = self.function_type().deserialize(cached_function) self.cached_function(function) self.logging.info( - "Using cached function {fname} in {loc}".format(fname=func_name, loc=code_location) + "Using cached function {fname} in {loc}".format( + fname=func_name, loc=code_location) ) # is the function up-to-date? if function.code_package_hash != code_package.hash or rebuilt: @@ -298,7 +298,7 @@ def download_metrics( metrics: dict, ): pass - + def create_trigger(self, obj, trigger_type: Trigger.TriggerType) -> Trigger: if isinstance(obj, Function): return self.create_function_trigger(obj, trigger_type) @@ -310,7 +310,7 @@ def create_trigger(self, obj, trigger_type: Trigger.TriggerType) -> Trigger: @abstractmethod def create_function_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: pass - + @abstractmethod def create_workflow_trigger(self, workflow: Workflow, trigger_type: Trigger.TriggerType) -> Trigger: pass diff --git a/sebs/faas/workflow.py b/sebs/faas/workflow.py index 6c404475..86a7b5c6 100644 --- a/sebs/faas/workflow.py +++ b/sebs/faas/workflow.py @@ -1,10 +1,5 @@ -import json -from abc import ABC from abc import abstractmethod -import concurrent.futures -from datetime import datetime, timedelta -from enum import Enum -from typing import Callable, Dict, List, Optional # noqa +from typing import Callable, Dict, List # noqa from sebs.utils import LoggingBase from .function import Trigger diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 808a1b09..1e657b59 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -74,13 +74,14 @@ def function_type() -> "Type[Function]": """ def initialize(self, config: Dict[str, str] = {}): - self.function_client = build("cloudfunctions", "v1", cache_discovery=False) + self.function_client = build( + "cloudfunctions", "v1", cache_discovery=False) self.workflow_client = build("workflows", "v1", cache_discovery=False) self.get_storage() def get_function_client(self): return self.function_client - + def get_workflow_client(self): return self.workflow_client @@ -99,7 +100,8 @@ def get_storage( buckets=None, ) -> PersistentStorage: if not self.storage: - self.storage = GCPStorage(self.config.region, self.cache_client, replace_existing) + self.storage = GCPStorage( + self.config.region, self.cache_client, replace_existing) self.storage.logging_handlers = self.logging_handlers else: self.storage.replace_existing = replace_existing @@ -204,10 +206,13 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti code_package_name = cast(str, os.path.basename(package)) code_bucket, idx = storage_client.add_input_bucket(benchmark) storage_client.upload(code_bucket, package, code_package_name) - self.logging.info("Uploading function {} code to {}".format(func_name, code_bucket)) + self.logging.info( + "Uploading function {} code to {}".format(func_name, code_bucket)) - full_func_name = GCP.get_full_function_name(project_name, location, func_name) - get_req = self.function_client.projects().locations().functions().get(name=full_func_name) + full_func_name = GCP.get_full_function_name( + project_name, location, func_name) + get_req = self.function_client.projects( + ).locations().functions().get(name=full_func_name) try: get_req.execute() except HttpError: @@ -240,21 +245,24 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti body={ "policy": { "bindings": [ - {"role": "roles/cloudfunctions.invoker", "members": ["allUsers"]} + {"role": "roles/cloudfunctions.invoker", + "members": ["allUsers"]} ] } }, ) ) allow_unauthenticated_req.execute() - self.logging.info(f"Function {func_name} accepts now unauthenticated invocations!") + self.logging.info( + f"Function {func_name} accepts now unauthenticated invocations!") function = GCPFunction( func_name, benchmark, code_package.hash, timeout, memory, code_bucket ) else: # if result is not empty, then function does exists - self.logging.info("Function {} exists on GCP, update the instance.".format(func_name)) + self.logging.info( + "Function {} exists on GCP, update the instance.".format(func_name)) function = GCPFunction( name=func_name, @@ -275,15 +283,18 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFuncti return function - def create_function_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + def create_function_trigger(self, function: Function, + trigger_type: Trigger.TriggerType) -> Trigger: from sebs.gcp.triggers import HTTPTrigger if trigger_type == Trigger.TriggerType.HTTP: location = self.config.region project_name = self.config.project_name - full_func_name = GCP.get_full_function_name(project_name, location, function.name) - self.logging.info(f"Function {function.name} - waiting for deployment...") + full_func_name = GCP.get_full_function_name( + project_name, location, function.name) + self.logging.info( + f"Function {function.name} - waiting for deployment...") our_function_req = ( self.function_client.projects().locations().functions().get(name=full_func_name) ) @@ -325,7 +336,8 @@ def update_function(self, function: Function, code_package: Benchmark): bucket = function.code_bucket(code_package.benchmark, storage) storage.upload(bucket, code_package.code_location, code_package_name) - self.logging.info(f"Uploaded new code package to {bucket}/{code_package_name}") + self.logging.info( + f"Uploaded new code package to {bucket}/{code_package_name}") full_func_name = GCP.get_full_function_name( self.config.project_name, self.config.region, function.name ) @@ -354,33 +366,31 @@ def update_function(self, function: Function, code_package: Benchmark): else: break self.logging.info("Published new function code and configuration.") - + @staticmethod def get_full_function_name(project_name: str, location: str, func_name: str): return f"projects/{project_name}/locations/{location}/functions/{func_name}" - - def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "GCPWorkflow": - package = code_package.code_location + def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "GCPWorkflow": benchmark = code_package.benchmark - language_runtime = code_package.language_version timeout = code_package.benchmark_config.timeout memory = code_package.benchmark_config.memory code_bucket: Optional[str] = None - storage_client = self.get_storage() location = self.config.region project_name = self.config.project_name - full_workflow_name = GCP.get_full_workflow_name(project_name, location, workflow_name) - get_req = self.workflow_client.projects().locations().workflows().get(name=full_workflow_name) - + full_workflow_name = GCP.get_full_workflow_name( + project_name, location, workflow_name) + get_req = self.workflow_client.projects().locations( + ).workflows().get(name=full_workflow_name) + with open('cache/test.yml') as f: code = f.read() - + try: - get_result = get_req.execute() - except HttpError: - parent = GCP.get_location(project_name, location) + get_req.execute() + except HttpError: + parent = GCP.get_location(project_name, location) create_req = ( self.workflow_client.projects() .locations() @@ -402,8 +412,9 @@ def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "GCPWo ) else: # if result is not empty, then function does exists - self.logging.info("Workflow {} exists on GCP, update the instance.".format(workflow_name)) - + self.logging.info( + "Workflow {} exists on GCP, update the instance.".format(workflow_name)) + workflow = GCPWorkflow( name=workflow_name, benchmark=benchmark, @@ -422,12 +433,14 @@ def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "GCPWo workflow.add_trigger(trigger) return workflow - - def create_workflow_trigger(self, workflow: Workflow, trigger_type: Trigger.TriggerType) -> Trigger: + + def create_workflow_trigger(self, workflow: Workflow, + trigger_type: Trigger.TriggerType) -> Trigger: from sebs.gcp.triggers import WorkflowLibraryTrigger if trigger_type == Trigger.TriggerType.HTTP: - raise NotImplementedError('Cannot create http triggers for workflows.') + raise NotImplementedError( + 'Cannot create http triggers for workflows.') else: trigger = WorkflowLibraryTrigger(workflow.name, self) @@ -435,17 +448,12 @@ def create_workflow_trigger(self, workflow: Workflow, trigger_type: Trigger.Trig workflow.add_trigger(trigger) # self.cache_client.update_workflow(workflow) return trigger - - def update_workflow(self, workflow: Workflow, code_package: Benchmark): - workflow = cast(GCPWorkflow, workflow) - language_runtime = code_package.language_version - code_package_name = os.path.basename(code_package.code_location) - storage = cast(GCPStorage, self.get_storage()) - + def update_workflow(self, workflow: Workflow, code_package: Benchmark): with open('cache/test.yml') as f: code = f.read() + workflow = cast(GCPWorkflow, workflow) full_workflow_name = GCP.get_full_workflow_name( self.config.project_name, self.config.region, workflow.name ) @@ -461,9 +469,9 @@ def update_workflow(self, workflow: Workflow, code_package: Benchmark): }, ) ) - res = req.execute() + req.execute() self.logging.info("Published new workflow code and configuration.") - + @staticmethod def get_full_workflow_name(project_name: str, location: str, workflow_name: str): return f"projects/{project_name}/locations/{location}/workflows/{workflow_name}" @@ -489,7 +497,8 @@ def wrapper(gen): except StopIteration: break except exceptions.ResourceExhausted: - self.logging.info("Google Cloud resources exhausted, sleeping 30s") + self.logging.info( + "Google Cloud resources exhausted, sleeping 30s") sleep(30) """ @@ -501,7 +510,8 @@ def wrapper(gen): from google.cloud import logging as gcp_logging logging_client = gcp_logging.Client() - logger = logging_client.logger("cloudfunctions.googleapis.com%2Fcloud-functions") + logger = logging_client.logger( + "cloudfunctions.googleapis.com%2Fcloud-functions") """ GCP accepts only single date format: 'YYYY-MM-DDTHH:MM:SSZ'. @@ -543,7 +553,8 @@ def wrapper(gen): assert regex_result exec_time = regex_result.group().split()[0] # convert into microseconds - requests[execution_id].provider_times.execution = int(exec_time) * 1000 + requests[execution_id].provider_times.execution = int( + exec_time) * 1000 invocations_processed += 1 self.logging.info( f"GCP: Received {entries} entries, found time metrics for {invocations_processed} " @@ -557,7 +568,8 @@ def wrapper(gen): """ # Set expected metrics here - available_metrics = ["execution_times", "user_memory_bytes", "network_egress"] + available_metrics = ["execution_times", + "user_memory_bytes", "network_egress"] client = monitoring_v3.MetricServiceClient() project_name = client.common_project_path(self.config.project_name) @@ -578,7 +590,8 @@ def wrapper(gen): list_request = monitoring_v3.ListTimeSeriesRequest( name=project_name, - filter='metric.type = "cloudfunctions.googleapis.com/function/{}"'.format(metric), + filter='metric.type = "cloudfunctions.googleapis.com/function/{}"'.format( + metric), interval=interval, ) @@ -606,7 +619,8 @@ def _enforce_cold_start(self, function: Function): .patch( name=name, updateMask="environmentVariables", - body={"environmentVariables": {"cold_start": str(self.cold_start_counter)}}, + body={"environmentVariables": { + "cold_start": str(self.cold_start_counter)}}, ) ) res = req.execute() @@ -629,7 +643,8 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) if not self.is_deployed(func.name, versionId): undeployed_functions.append((versionId, func)) deployed = len(new_versions) - len(undeployed_functions) - self.logging.info(f"Redeployed {deployed} out of {len(new_versions)}") + self.logging.info( + f"Redeployed {deployed} out of {len(new_versions)}") if deployed == len(new_versions): deployment_done = True break @@ -655,8 +670,10 @@ def get_functions(self, code_package: Benchmark, function_names: List[str]) -> L for func in undeployed_functions_before: if not self.is_deployed(func.name): undeployed_functions.append(func) - deployed = len(undeployed_functions_before) - len(undeployed_functions) - self.logging.info(f"Deployed {deployed} out of {len(undeployed_functions_before)}") + deployed = len(undeployed_functions_before) - \ + len(undeployed_functions) + self.logging.info( + f"Deployed {deployed} out of {len(undeployed_functions_before)}") if deployed == len(undeployed_functions_before): deployment_done = True break @@ -668,7 +685,8 @@ def get_functions(self, code_package: Benchmark, function_names: List[str]) -> L return functions def is_deployed(self, func_name: str, versionId: int = -1) -> bool: - name = GCP.get_full_function_name(self.config.project_name, self.config.region, func_name) + name = GCP.get_full_function_name( + self.config.project_name, self.config.region, func_name) function_client = self.get_function_client() status_req = function_client.projects().locations().functions().get(name=name) status_res = status_req.execute() @@ -678,12 +696,13 @@ def is_deployed(self, func_name: str, versionId: int = -1) -> bool: return status_res["versionId"] == versionId def deployment_version(self, func: Function) -> int: - name = GCP.get_full_function_name(self.config.project_name, self.config.region, func.name) + name = GCP.get_full_function_name( + self.config.project_name, self.config.region, func.name) function_client = self.get_function_client() status_req = function_client.projects().locations().functions().get(name=name) status_res = status_req.execute() return int(status_res["versionId"]) - + @staticmethod def get_location(project_name: str, location: str) -> str: return f"projects/{project_name}/locations/{location}" @@ -714,7 +733,8 @@ def helper_zip(base_directory: str, path: str, archive: zipfile.ZipFile): GCP.helper_zip(base_directory, directory, archive) else: if directory != archive.filename: # prevent form including itself - archive.write(directory, os.path.relpath(directory, base_directory)) + archive.write(directory, os.path.relpath( + directory, base_directory)) """ https://gist.github.com/felixSchl/d38b455df8bf83a78d3d @@ -729,7 +749,8 @@ def helper_zip(base_directory: str, path: str, archive: zipfile.ZipFile): @staticmethod def recursive_zip(directory: str, archname: str): - archive = zipfile.ZipFile(archname, "w", zipfile.ZIP_DEFLATED, compresslevel=9) + archive = zipfile.ZipFile( + archname, "w", zipfile.ZIP_DEFLATED, compresslevel=9) if os.path.isdir(directory): GCP.helper_zip(directory, directory, archive) else: diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 139547f6..3424234f 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -2,7 +2,6 @@ import datetime import json import time -import os from typing import Dict, Optional # noqa from google.cloud.workflows.executions_v1beta import ExecutionsClient @@ -44,8 +43,8 @@ def serialize(self) -> dict: @classmethod def deserialize(cls, obj: dict) -> Trigger: return cls(obj["name"]) - - + + class FunctionLibraryTrigger(LibraryTrigger): def sync_invoke(self, payload: dict) -> ExecutionResult: @@ -86,8 +85,8 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: output = json.loads(res["result"]) gcp_result.parse_benchmark_output(output) return gcp_result - - + + class WorkflowLibraryTrigger(LibraryTrigger): def sync_invoke(self, payload: dict) -> ExecutionResult: @@ -103,28 +102,32 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # GCP's fixed style for a function name config = self.deployment_client.config - full_workflow_name = GCP.get_full_workflow_name(config.project_name, config.region, self.name) - + full_workflow_name = GCP.get_full_workflow_name( + config.project_name, config.region, self.name) + execution_client = ExecutionsClient() execution = Execution(argument=json.dumps(payload)) - + begin = datetime.datetime.now() - res = execution_client.create_execution(parent=full_workflow_name, execution=execution) + res = execution_client.create_execution( + parent=full_workflow_name, execution=execution) end = datetime.datetime.now() - + gcp_result = ExecutionResult.from_times(begin, end) - + # Wait for execution to finish, then print results. execution_finished = False backoff_delay = 1 # Start wait with delay of 1 second while (not execution_finished): - execution = execution_client.get_execution(request={"name": res.name}) + execution = execution_client.get_execution( + request={"name": res.name}) execution_finished = execution.state != Execution.State.ACTIVE - + # If we haven't seen the result yet, wait a second. if not execution_finished: time.sleep(backoff_delay) - backoff_delay *= 2 # Double the delay to provide exponential backoff. + # Double the delay to provide exponential backoff. + backoff_delay *= 2 elif execution.state == Execution.State.FAILED: self.logging.error(f"Invocation of {self.name} failed") self.logging.error(f"Input: {payload}") From c4683c0fed76fc40b4e5749f8c68946e2e57734e Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Thu, 24 Mar 2022 10:32:32 +0100 Subject: [PATCH 22/68] Fix aws function waiting bug --- sebs/aws/aws.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 56100d32..8d9d8e54 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -176,7 +176,7 @@ def wait_for_function(self, func_name: str): while (not ready): ret = self.lambda_client.get_function(FunctionName=func_name) state = ret["Configuration"]["State"] - update_status = ret["Configuration"]["LastUpdateStatus"] + update_status = ret["Configuration"].get("LastUpdateStatus", "Successful") ready = (state == "Active") and (update_status == "Successful") # If we haven't seen the result yet, wait a second. From 7123245292f83924c00ff828ec6df1ea2c70298e Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Thu, 24 Mar 2022 11:00:16 +0100 Subject: [PATCH 23/68] Separate aws sfm func handler --- benchmarks/wrappers/aws/python/handler_sfm.py | 52 ++++++++ config/systems.json | 8 +- sebs/aws/aws.py | 25 ++-- sebs/azure/azure.py | 116 +----------------- sebs/benchmark.py | 5 +- sebs/faas/system.py | 6 +- sebs/gcp/gcp.py | 2 +- sebs/local/local.py | 2 +- 8 files changed, 81 insertions(+), 135 deletions(-) create mode 100644 benchmarks/wrappers/aws/python/handler_sfm.py diff --git a/benchmarks/wrappers/aws/python/handler_sfm.py b/benchmarks/wrappers/aws/python/handler_sfm.py new file mode 100644 index 00000000..69dd6ff8 --- /dev/null +++ b/benchmarks/wrappers/aws/python/handler_sfm.py @@ -0,0 +1,52 @@ + +import datetime +import io +import json +import os +import sys +import uuid +import importlib + +import boto3 + +# Add current directory to allow location of packages +sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) + + +def probe_cold_start(): + is_cold = False + fname = os.path.join("/tmp", "cold_run") + if not os.path.exists(fname): + is_cold = True + container_id = str(uuid.uuid4())[0:8] + with open(fname, "a") as f: + f.write(container_id) + else: + with open(fname, "r") as f: + container_id = f.read() + + return is_cold + + +def handler(event, context): + start = datetime.datetime.now().timestamp() + + workflow_name, func_name = context.function_name.split("___") + function = importlib.import_module(f"function.{func_name}") + res = function.handler(event) + + end = datetime.datetime.now().timestamp() + + payload = { + "start": start, + "end": end, + "is_cold": probe_cold_start() + } + + data = io.BytesIO(json.dumps(payload).encode("utf-8")) + path = os.path.join(workflow_name, func_name+".json") + + s3 = boto3.client("s3") + s3.upload_fileobj(data, "workflow-experiments", path) + + return res diff --git a/config/systems.json b/config/systems.json index 7a3bf450..3e7ed392 100644 --- a/config/systems.json +++ b/config/systems.json @@ -32,7 +32,7 @@ } } }, - "aws": { + "aws": { "languages": { "python": { "base_images": { @@ -44,14 +44,14 @@ "images": ["build"], "username": "docker_user", "deployment": { - "files": [ "handler.py", "storage.py"], + "files": ["handler.py", "handler_sfm.py", "storage.py"], "packages": [] } }, "nodejs": { "base_images": { "12.x" : "lambci/lambda:build-nodejs12.x", - "10.x" : "lambci/lambda:build-nodejs10.x" + "10.x" : "lambci/lambda:build-nodejs10.x" }, "versions": ["10.x", "12.x"], "images": ["build"], @@ -82,7 +82,7 @@ "nodejs": { "base_images": { "10" : "mcr.microsoft.com/azure-functions/node:2.0-node10", - "8" : "mcr.microsoft.com/azure-functions/node:2.0-node8" + "8" : "mcr.microsoft.com/azure-functions/node:2.0-node8" }, "images": ["build"], "username": "docker_user", diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 8d9d8e54..d243639a 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -135,12 +135,22 @@ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: benchmark: benchmark name """ - def package_code(self, directory: str, language_name: str, benchmark: str) -> Tuple[str, int]: + def package_code(self, directory: str, language_name: str, benchmark: str, is_workflow: bool) -> Tuple[str, int]: CONFIG_FILES = { "python": ["handler.py", "requirements.txt", ".python_packages"], "nodejs": ["handler.js", "package.json", "node_modules"], } + + # Todo: sfm support for nodejs + # rename handler_sfm.py to handler.py if necessary + handler_path = os.path.join(directory, "handler.py") + handler_sfm_path = os.path.join(directory, "handler_sfm.py") + if is_workflow: + os.rename(handler_sfm_path, handler_path) + else: + os.remove(handler_sfm_path) + package_config = CONFIG_FILES[language_name] function_dir = os.path.join(directory, "function") os.makedirs(function_dir) @@ -173,7 +183,7 @@ def package_code(self, directory: str, language_name: str, benchmark: str) -> Tu def wait_for_function(self, func_name: str): ready = False backoff_delay = 1 # Start wait with delay of 1 second - while (not ready): + while not ready: ret = self.lambda_client.get_function(FunctionName=func_name) state = ret["Configuration"]["State"] update_status = ret["Configuration"].get("LastUpdateStatus", "Successful") @@ -193,8 +203,7 @@ def wait_for_function(self, func_name: str): f"Function {func_name} stuck in state {state} after 60s") break - def create_function(self, code_package: Benchmark, func_name: str, handler: str = None) -> "LambdaFunction": - + def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFunction": package = code_package.code_location benchmark = code_package.benchmark language = code_package.language_name @@ -252,7 +261,7 @@ def create_function(self, code_package: Benchmark, func_name: str, handler: str ret = self.lambda_client.create_function( FunctionName=func_name, Runtime="{}{}".format(language, language_runtime), - Handler=handler if handler else "handler.handler", + Handler="handler.handler", Role=self.config.resources.lambda_role(self.session), MemorySize=memory, Timeout=timeout, @@ -380,10 +389,8 @@ def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "SFNWo # First we create a lambda function for each code file code_files = list(code_package.get_code_files(include_config=False)) - func_names = [os.path.splitext(os.path.basename(p))[ - 0] for p in code_files] - funcs = [self.create_function( - code_package, workflow_name+"-"+fn, handler="function."+fn+".handler") for fn in func_names] + func_names = [os.path.splitext(os.path.basename(p))[0] for p in code_files] + funcs = [self.create_function(code_package, workflow_name+"___"+fn) for fn in func_names] # Set the ARN to the corresponding states in the workflow definition for name, func in zip(func_names, funcs): diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index be0b9e98..b45b9f99 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -110,59 +110,6 @@ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: self.storage.replace_existing = replace_existing return self.storage - def package_code2(self, directory: str, language_name: str, benchmark: str) -> Tuple[str, int]: - - # In previous step we ran a Docker container which installed packages - # Python packages are in .python_packages because this is expected by Azure - EXEC_FILES = {"python": "handler.py", "nodejs": "handler.js"} - CONFIG_FILES = { - "python": ["requirements.txt", ".python_packages"], - "nodejs": ["package.json", "node_modules"], - } - package_config = CONFIG_FILES[language_name] - - handler_dir = os.path.join(directory, "handler") - os.makedirs(handler_dir) - # move all files to 'handler' except package config - for f in os.listdir(directory): - if f not in package_config: - source_file = os.path.join(directory, f) - shutil.move(source_file, handler_dir) - - # generate function.json - # TODO: extension to other triggers than HTTP - default_function_json = { - "scriptFile": EXEC_FILES[language_name], - "bindings": [ - { - "authLevel": "function", - "type": "httpTrigger", - "direction": "in", - "name": "req", - "methods": ["get", "post"], - }, - {"type": "http", "direction": "out", "name": "$return"}, - ], - } - json_out = os.path.join(directory, "handler", "function.json") - json.dump(default_function_json, open(json_out, "w"), indent=2) - - # generate host.json - default_host_json = { - "version": "2.0", - "extensionBundle": { - "id": "Microsoft.Azure.Functions.ExtensionBundle", - "version": "[1.*, 2.0.0)", - }, - } - json.dump(default_host_json, open( - os.path.join(directory, "host.json"), "w"), indent=2) - - code_size = Benchmark.directory_size(directory) - execute("zip -qu -r9 {}.zip * .".format(benchmark), - shell=True, cwd=directory) - return directory, code_size - # Directory structure # handler # - source files @@ -171,7 +118,7 @@ def package_code2(self, directory: str, language_name: str, benchmark: str) -> T # - function.json # host.json # requirements.txt/package.json - def package_code(self, directory: str, language_name: str, benchmark: str) -> Tuple[str, int]: + def package_code(self, directory: str, language_name: str, benchmark: str, is_workflow: bool) -> Tuple[str, int]: # In previous step we ran a Docker container which installed packages # Python packages are in .python_packages because this is expected by Azure @@ -648,64 +595,3 @@ def create_function_trigger(self, function: Function, def create_workflow_trigger(self, workflow: Workflow, trigger_type: Trigger.TriggerType) -> Trigger: raise NotImplementedError() - -# -# def create_azure_function(self, fname, config): -# -# # create function name -# region = self.config["config"]["region"] -# # only hyphens are allowed -# # and name needs to be globally unique -# func_name = fname.replace(".", "-").replace("_", "-") -# -# # create function app -# self.cli_instance.execute( -# ( -# "az functionapp create --resource-group {} " -# "--os-type Linux --consumption-plan-location {} " -# "--runtime {} --runtime-version {} --name {} " -# "--storage-account {}" -# ).format( -# self.resource_group_name, -# region, -# self.AZURE_RUNTIMES[self.language], -# self.config["config"]["runtime"][self.language], -# func_name, -# self.storage_account_name, -# ) -# ) -# logging.info("Created function app {}".format(func_name)) -# return func_name -# -# init = False -# -# def create_function_copies( -# self, -# function_names: List[str], -# code_package: Benchmark, -# experiment_config: dict, -# ): -# -# if not self.init: -# code_location = code_package.code_location -# # package = self.package_code(code_location, code_package.benchmark) -# # code_size = code_package.code_size -# # Restart Docker instance to make sure code package is mounted -# self.start(code_location, restart=True) -# self.storage_account() -# self.resource_group() -# self.init = True -# -# # names = [] -# # for fname in function_names: -# # names.append(self.create_azure_function(fname, experiment_config)) -# names = function_names -# -# # time.sleep(30) -# urls = [] -# for fname in function_names: -# url = self.publish_function(fname, repeat_on_failure=True) -# urls.append(url) -# logging.info("Published function app {} with URL {}".format(fname, url)) -# -# return names, urls diff --git a/sebs/benchmark.py b/sebs/benchmark.py index 4b415b05..2ec03b04 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -458,7 +458,8 @@ def recalculate_code_size(self): return self._code_size def build( - self, deployment_build_step: Callable[[str, str, str], Tuple[str, int]] + self, deployment_build_step: Callable[[str, str, str, bool], Tuple[str, int]], + is_workflow: bool ) -> Tuple[bool, str]: # Skip build if files are up to date and user didn't enforce rebuild @@ -488,7 +489,7 @@ def build( self.add_deployment_package(self._output_dir) self.install_dependencies(self._output_dir) self._code_location, self._code_size = deployment_build_step( - os.path.abspath(self._output_dir), self.language_name, self.benchmark + os.path.abspath(self._output_dir), self.language_name, self.benchmark, is_workflow ) self.logging.info( ( diff --git a/sebs/faas/system.py b/sebs/faas/system.py index a3baf2e6..f5da04b2 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -105,7 +105,7 @@ def get_storage(self, replace_existing: bool) -> PersistentStorage: """ @abstractmethod - def package_code(self, directory: str, language_name: str, benchmark: str) -> Tuple[str, int]: + def package_code(self, directory: str, language_name: str, benchmark: str, is_workflow: bool) -> Tuple[str, int]: pass @abstractmethod @@ -151,7 +151,7 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) if not func_name: func_name = self.default_function_name(code_package) - rebuilt, _ = code_package.build(self.package_code) + rebuilt, _ = code_package.build(self.package_code, False) """ There's no function with that name? @@ -221,7 +221,7 @@ def get_workflow(self, code_package: Benchmark, workflow_name: Optional[str] = N # if not workflow_name: # workflow_name = self.default_function_name(code_package) - rebuilt, _ = code_package.build(self.package_code) + rebuilt, _ = code_package.build(self.package_code, True) # FIXME: cache workflows return self.create_workflow(code_package, workflow_name) diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 1e657b59..00d18a0f 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -139,7 +139,7 @@ def format_function_name(func_name: str) -> str: :return: path to packaged code and its size """ - def package_code(self, directory: str, language_name: str, benchmark: str) -> Tuple[str, int]: + def package_code(self, directory: str, language_name: str, benchmark: str, is_workflow: bool) -> Tuple[str, int]: CONFIG_FILES = { "python": ["handler.py", ".python_packages"], diff --git a/sebs/local/local.py b/sebs/local/local.py index 216f0d41..e9b0782d 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -115,7 +115,7 @@ def shutdown(self): benchmark: benchmark name """ - def package_code(self, directory: str, language_name: str, benchmark: str) -> Tuple[str, int]: + def package_code(self, directory: str, language_name: str, benchmark: str, is_workflow: bool) -> Tuple[str, int]: CONFIG_FILES = { "python": ["handler.py", "requirements.txt", ".python_packages"], From 48618e2274871dca7e1cfaf4c9512d4d6bcad686 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Thu, 24 Mar 2022 11:36:13 +0100 Subject: [PATCH 24/68] Save container id to s3 --- benchmarks/wrappers/aws/python/handler_sfm.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/benchmarks/wrappers/aws/python/handler_sfm.py b/benchmarks/wrappers/aws/python/handler_sfm.py index 69dd6ff8..75a5f1a1 100644 --- a/benchmarks/wrappers/aws/python/handler_sfm.py +++ b/benchmarks/wrappers/aws/python/handler_sfm.py @@ -25,7 +25,7 @@ def probe_cold_start(): with open(fname, "r") as f: container_id = f.read() - return is_cold + return is_cold, container_id def handler(event, context): @@ -37,10 +37,12 @@ def handler(event, context): end = datetime.datetime.now().timestamp() + is_cold, container_id = probe_cold_start() payload = { "start": start, "end": end, - "is_cold": probe_cold_start() + "is_cold": is_cold, + "container_id": container_id } data = io.BytesIO(json.dumps(payload).encode("utf-8")) From bfa365ae559585593e10a163cb4e4895721ed054 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Thu, 24 Mar 2022 14:05:17 +0100 Subject: [PATCH 25/68] Fix s3 storage extension --- benchmarks/wrappers/aws/python/storage.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmarks/wrappers/aws/python/storage.py b/benchmarks/wrappers/aws/python/storage.py index f979d07c..c5cc1e7f 100644 --- a/benchmarks/wrappers/aws/python/storage.py +++ b/benchmarks/wrappers/aws/python/storage.py @@ -15,17 +15,17 @@ def __init__(self): @staticmethod def unique_name(name): name, extension = os.path.splitext(name) - return '{name}.{random}.{extension}'.format( + return '{name}.{random}{extension}'.format( name=name, extension=extension, random=str(uuid.uuid4()).split('-')[0] ) - + def upload(self, bucket, file, filepath): key_name = storage.unique_name(file) self.client.upload_file(filepath, bucket, key_name) return key_name - + def download(self, bucket, file, filepath): self.client.download_file(bucket, file, filepath) @@ -46,7 +46,7 @@ def download_stream(self, bucket, file): data = io.BytesIO() self.client.download_fileobj(bucket, file, data) return data.getbuffer() - + def get_instance(): if storage.instance is None: storage.instance = storage() From 0681836ea7660091b30b9865de1b074d5919b868 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Fri, 25 Mar 2022 10:26:16 +0100 Subject: [PATCH 26/68] Rename benchmark to code package --- scripts/run_experiments.py | 8 +- sebs/__init__.py | 2 +- sebs/aws/aws.py | 36 +++---- sebs/azure/azure.py | 28 +++--- sebs/cache.py | 18 ++-- sebs/{benchmark.py => code_package.py} | 122 ++++++++++++------------ sebs/experiments/invocation_overhead.py | 8 +- sebs/faas/system.py | 28 +++--- sebs/gcp/gcp.py | 34 +++---- sebs/local/local.py | 16 ++-- sebs/sebs.py | 10 +- sebs/utils.py | 2 +- tests/aws/create_function.py | 4 +- 13 files changed, 158 insertions(+), 158 deletions(-) rename sebs/{benchmark.py => code_package.py} (86%) diff --git a/scripts/run_experiments.py b/scripts/run_experiments.py index c18b96c0..8198c917 100755 --- a/scripts/run_experiments.py +++ b/scripts/run_experiments.py @@ -507,7 +507,7 @@ def shutdown(self): ''' def create_function(self, code_package: CodePackage, experiment_config :dict): - benchmark = code_package.benchmark + benchmark = code_package.name if code_package.is_cached and code_package.is_cached_valid: func_name = code_package.cached_config['name'] @@ -524,7 +524,7 @@ def create_function(self, code_package: CodePackage, experiment_config :dict): code_location = code_package.code_location # Build code package - package = self.package_code(code_location, code_package.benchmark) + package = self.package_code(code_location, code_package.name) code_size = code_package.recalculate_code_size() cached_cfg = code_package.cached_config @@ -546,7 +546,7 @@ def create_function(self, code_package: CodePackage, experiment_config :dict): code_location = code_package.code_location # Build code package - package = self.package_code(code_location, code_package.benchmark) + package = self.package_code(code_location, code_package.name) code_size = code_package.recalculate_code_size() logging.info('Creating function {fname} in {loc}'.format( fname=func_name, @@ -602,7 +602,7 @@ def create_function(self, code_package: CodePackage, experiment_config :dict): raise RuntimeError('Experiment {} is not supported for language {}!'.format(args.experiment, args.language)) # 2. Locate benchmark - #benchmark_path = find_benchmark(args.benchmark, 'benchmarks') + #benchmark_path = find_package_code(args.benchmark, 'benchmarks') #logging.info('# Located benchmark {} at {}'.format(args.benchmark, benchmark_path)) # 6. Create experiment config diff --git a/sebs/__init__.py b/sebs/__init__.py index 6eceb356..f347b47b 100644 --- a/sebs/__init__.py +++ b/sebs/__init__.py @@ -4,6 +4,6 @@ # from .azure import * # noqa from .cache import Cache # noqa -from .benchmark import Benchmark # noqa +from .code_package import CodePackage # noqa # from .experiments import * # noqa diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index d243639a..c49c6148 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -15,7 +15,7 @@ from sebs.aws.workflow import SFNWorkflow from sebs.aws.config import AWSConfig from sebs.utils import execute -from sebs.benchmark import Benchmark +from sebs.code_package import CodePackage from sebs.cache import Cache from sebs.config import SeBSConfig from sebs.utils import LoggingHandlers @@ -203,13 +203,13 @@ def wait_for_function(self, func_name: str): f"Function {func_name} stuck in state {state} after 60s") break - def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFunction": + def create_function(self, code_package: CodePackage, func_name: str) -> "LambdaFunction": package = code_package.code_location - benchmark = code_package.benchmark + benchmark = code_package.name language = code_package.language_name language_runtime = code_package.language_version - timeout = code_package.benchmark_config.timeout - memory = code_package.benchmark_config.memory + timeout = code_package.config.timeout + memory = code_package.config.memory code_size = code_package.code_size code_bucket: Optional[str] = None func_name = AWS.format_resource_name(func_name) @@ -226,7 +226,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun # Here we assume a single Lambda role lambda_function = LambdaFunction( func_name, - code_package.benchmark, + code_package.name, ret["Configuration"]["FunctionArn"], code_package.hash, timeout, @@ -271,7 +271,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LambdaFun # print(url) lambda_function = LambdaFunction( func_name, - code_package.benchmark, + code_package.name, ret["FunctionArn"], code_package.hash, timeout, @@ -311,7 +311,7 @@ def cached_function(self, function: Function): :param memory: memory limit for function """ - def update_function(self, function: Function, code_package: Benchmark): + def update_function(self, function: Function, code_package: CodePackage): function = cast(LambdaFunction, function) name = function.name @@ -328,7 +328,7 @@ def update_function(self, function: Function, code_package: Benchmark): else: code_package_name = os.path.basename(package) storage = cast(S3, self.get_storage()) - bucket = function.code_bucket(code_package.benchmark, storage) + bucket = function.code_bucket(code_package.name, storage) storage.upload(bucket, package, code_package_name) self.lambda_client.update_function_code( FunctionName=name, S3Bucket=bucket, S3Key=code_package_name @@ -373,13 +373,13 @@ def create_function_trigger(self, func: Function, trigger_type: Trigger.TriggerT self.cache_client.update_function(function) return trigger - def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "SFNWorkflow": + def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "SFNWorkflow": workflow_name = AWS.format_resource_name(workflow_name) # Make sure we have a valid workflow benchmark definition_path = os.path.join( - code_package.benchmark_path, "definition.json") + code_package.path, "definition.json") if os.path.exists(definition_path): with open(definition_path) as json_file: definition = json.load(json_file) @@ -417,7 +417,7 @@ def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "SFNWo workflow = SFNWorkflow( workflow_name, funcs, - code_package.benchmark, + code_package.name, ret["stateMachineArn"], code_package.hash, self.config.resources.lambda_role(self.session), @@ -434,7 +434,7 @@ def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "SFNWo workflow = SFNWorkflow( workflow_name, funcs, - code_package.benchmark, + code_package.name, arn, code_package.hash, self.config.resources.lambda_role(self.session), @@ -452,7 +452,7 @@ def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "SFNWo return workflow - def update_workflow(self, workflow: Workflow, definition: str, code_package: Benchmark): + def update_workflow(self, workflow: Workflow, definition: str, code_package: CodePackage): workflow = cast(SFNWorkflow, workflow) @@ -476,12 +476,12 @@ def create_workflow_trigger(self, workflow: Workflow, trigger_type: Trigger.Trig raise RuntimeError("Not supported!") @staticmethod - def default_function_name(code_package: Benchmark) -> str: + def default_function_name(code_package: CodePackage) -> str: # Create function name func_name = "{}-{}-{}".format( - code_package.benchmark, + code_package.name, code_package.language_name, - code_package.benchmark_config.memory, + code_package.config.memory, ) return AWS.format_resource_name(func_name) @@ -657,7 +657,7 @@ def _enforce_cold_start(self, function: Function): "ForceColdStart": str(self.cold_start_counter)}}, ) - def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + def enforce_cold_start(self, functions: List[Function], code_package: CodePackage): self.cold_start_counter += 1 for func in functions: self._enforce_cold_start(func) diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index b45b9f99..f5a1c43c 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -15,7 +15,7 @@ from sebs.azure.config import AzureConfig, AzureResources from sebs.azure.triggers import AzureTrigger, HTTPTrigger from sebs.faas.function import Trigger -from sebs.benchmark import Benchmark +from sebs.code_package import CodePackage from sebs.cache import Cache from sebs.config import SeBSConfig from sebs.utils import LoggingHandlers, execute @@ -191,7 +191,7 @@ def package_code(self, directory: str, language_name: str, benchmark: str, is_wo json.dump(default_host_json, open( os.path.join(directory, "host.json"), "w"), indent=2) - code_size = Benchmark.directory_size(directory) + code_size = CodePackage.directory_size(directory) execute("zip -qu -r9 {}.zip * .".format(benchmark), shell=True, cwd=directory) return directory, code_size @@ -199,7 +199,7 @@ def package_code(self, directory: str, language_name: str, benchmark: str, is_wo def publish_function( self, function: Function, - code_package: Benchmark, + code_package: CodePackage, repeat_on_failure: bool = False, ) -> str: success = False @@ -218,7 +218,7 @@ def publish_function( # "bash -c 'cd /mnt/function " # "&& az functionapp deployment source config-zip " # "--src {}.zip -g {} -n {} --build-remote false '".format( - # code_package.benchmark, resource_group, function.name + # code_package.name, resource_group, function.name # ) # ) # print(ret) @@ -260,7 +260,7 @@ def publish_function( :return: URL to reach HTTP-triggered function """ - def update_function(self, function: Function, code_package: Benchmark): + def update_function(self, function: Function, code_package: CodePackage): # Mount code package in Docker instance self._mount_function_code(code_package) @@ -271,17 +271,17 @@ def update_function(self, function: Function, code_package: Benchmark): trigger.logging_handlers = self.logging_handlers function.add_trigger(trigger) - def _mount_function_code(self, code_package: Benchmark): + def _mount_function_code(self, code_package: CodePackage): self.cli_instance.upload_package( code_package.code_location, "/mnt/function/") - def default_function_name(self, code_package: Benchmark) -> str: + def default_function_name(self, code_package: CodePackage) -> str: """ Functionapp names must be globally unique in Azure. """ func_name = ( "{}-{}-{}".format( - code_package.benchmark, + code_package.name, code_package.language_name, self.config.resources_id, ) @@ -290,7 +290,7 @@ def default_function_name(self, code_package: Benchmark) -> str: ) return func_name - def create_function(self, code_package: Benchmark, func_name: str) -> AzureFunction: + def create_function(self, code_package: CodePackage, func_name: str) -> AzureFunction: language = code_package.language_name language_runtime = code_package.language_version @@ -357,7 +357,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> AzureFunct raise function = AzureFunction( name=func_name, - benchmark=code_package.benchmark, + benchmark=code_package.name, code_hash=code_package.hash, function_storage=function_storage_account, ) @@ -382,7 +382,7 @@ def cached_function(self, function: Function): azure_trigger.logging_handlers = self.logging_handlers azure_trigger.data_storage_account = data_storage_account - def create_workflow(self, code_package: Benchmark, workflow_name: str) -> AzureFunction: + def create_workflow(self, code_package: CodePackage, workflow_name: str) -> AzureFunction: language = code_package.language_name language_runtime = code_package.language_version @@ -450,7 +450,7 @@ def create_workflow(self, code_package: Benchmark, workflow_name: str) -> AzureF raise workflow = AzureWorkflow( name=workflow_name, - benchmark=code_package.benchmark, + benchmark=code_package.name, code_hash=code_package.hash, function_storage=function_storage_account, ) @@ -561,7 +561,7 @@ def download_metrics( # TODO: query performance counters for mem - def _enforce_cold_start(self, function: Function, code_package: Benchmark): + def _enforce_cold_start(self, function: Function, code_package: CodePackage): fname = function.name resource_group = self.config.resources.resource_group( @@ -575,7 +575,7 @@ def _enforce_cold_start(self, function: Function, code_package: Benchmark): self.update_function(function, code_package) - def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + def enforce_cold_start(self, functions: List[Function], code_package: CodePackage): self.cold_start_counter += 1 for func in functions: self._enforce_cold_start(func, code_package) diff --git a/sebs/cache.py b/sebs/cache.py index dcce8ff7..29112cfa 100644 --- a/sebs/cache.py +++ b/sebs/cache.py @@ -10,7 +10,7 @@ from sebs.utils import LoggingBase if TYPE_CHECKING: - from sebs.benchmark import Benchmark + from sebs.code_package import CodePackage from sebs.faas.function import Function @@ -55,7 +55,7 @@ def __init__(self, cache_dir: str): @staticmethod def typename() -> str: - return "Benchmark" + return "CodePackage" def load_config(self): with self._lock: @@ -162,10 +162,10 @@ def update_storage(self, deployment: str, benchmark: str, config: dict): with open(os.path.join(benchmark_dir, "config.json"), "w") as fp: json.dump(cached_config, fp, indent=2) - def add_code_package(self, deployment_name: str, language_name: str, code_package: "Benchmark"): + def add_code_package(self, deployment_name: str, language_name: str, code_package: "CodePackage"): with self._lock: language = code_package.language_name - benchmark_dir = os.path.join(self.cache_dir, code_package.benchmark) + benchmark_dir = os.path.join(self.cache_dir, code_package.name) os.makedirs(benchmark_dir, exist_ok=True) # Check if cache directory for this deployment exist cached_dir = os.path.join(benchmark_dir, deployment_name, language) @@ -211,16 +211,16 @@ def add_code_package(self, deployment_name: str, language_name: str, code_packag # TODO: update raise RuntimeError( "Cached application {} for {} already exists!".format( - code_package.benchmark, deployment_name + code_package.name, deployment_name ) ) def update_code_package( - self, deployment_name: str, language_name: str, code_package: "Benchmark" + self, deployment_name: str, language_name: str, code_package: "CodePackage" ): with self._lock: language = code_package.language_name - benchmark_dir = os.path.join(self.cache_dir, code_package.benchmark) + benchmark_dir = os.path.join(self.cache_dir, code_package.name) # Check if cache directory for this deployment exist cached_dir = os.path.join(benchmark_dir, deployment_name, language) if os.path.exists(cached_dir): @@ -264,13 +264,13 @@ def add_function( self, deployment_name: str, language_name: str, - code_package: "Benchmark", + code_package: "CodePackage", function: "Function", ): if self.ignore_functions: return with self._lock: - benchmark_dir = os.path.join(self.cache_dir, code_package.benchmark) + benchmark_dir = os.path.join(self.cache_dir, code_package.name) language = code_package.language_name cache_config = os.path.join(benchmark_dir, "config.json") diff --git a/sebs/benchmark.py b/sebs/code_package.py similarity index 86% rename from sebs/benchmark.py rename to sebs/code_package.py index 2ec03b04..aa1a90e1 100644 --- a/sebs/benchmark.py +++ b/sebs/code_package.py @@ -10,7 +10,7 @@ from sebs.config import SeBSConfig from sebs.cache import Cache -from sebs.utils import find_benchmark, project_absolute_path, LoggingBase +from sebs.utils import find_package_code, project_absolute_path, LoggingBase from sebs.faas.storage import PersistentStorage from typing import TYPE_CHECKING @@ -19,7 +19,7 @@ from sebs.experiments.config import Language -class BenchmarkConfig: +class CodePackageConfig: def __init__(self, timeout: int, memory: int, languages: List["Language"]): self._timeout = timeout self._memory = memory @@ -39,10 +39,10 @@ def languages(self) -> List["Language"]: # FIXME: 3.7+ python with future annotations @staticmethod - def deserialize(json_object: dict) -> "BenchmarkConfig": + def deserialize(json_object: dict) -> "CodePackageConfig": from sebs.experiments.config import Language - return BenchmarkConfig( + return CodePackageConfig( json_object["timeout"], json_object["memory"], [Language.deserialize(x) for x in json_object["languages"]], @@ -62,35 +62,35 @@ def deserialize(json_object: dict) -> "BenchmarkConfig": """ -class Benchmark(LoggingBase): +class CodePackage(LoggingBase): @staticmethod def typename() -> str: - return "Benchmark" + return "CodePackage" @property - def benchmark(self): - return self._benchmark + def name(self): + return self._name @property - def benchmark_path(self): - return self._benchmark_path + def path(self): + return self._path @property - def benchmark_config(self) -> BenchmarkConfig: - return self._benchmark_config + def config(self) -> CodePackageConfig: + return self._config @property - def code_package(self) -> dict: - return self._code_package + def payload(self) -> dict: + return self._payload @property - def functions(self) -> Dict[str, Any]: - return self._functions + def benchmarks(self) -> Dict[str, Any]: + return self._benchmarks @property def code_location(self): - if self.code_package: - return os.path.join(self._cache_client.cache_dir, self.code_package["location"]) + if self.payload: + return os.path.join(self._cache_client.cache_dir, self.payload["location"]) else: return self._code_location @@ -128,8 +128,8 @@ def language_version(self): @property # noqa: A003 def hash(self): - path = os.path.join(self.benchmark_path, self.language_name) - self._hash_value = Benchmark.hash_directory(path, self._deployment_name, self.language_name) + path = os.path.join(self.path, self.language_name) + self._hash_value = CodePackage.hash_directory(path, self._deployment_name, self.language_name) return self._hash_value @hash.setter # noqa: A003 @@ -141,7 +141,7 @@ def hash(self, val: str): def __init__( self, - benchmark: str, + name: str, deployment_name: str, config: "ExperimentConfig", system_config: SeBSConfig, @@ -150,28 +150,28 @@ def __init__( docker_client: docker.client, ): super().__init__() - self._benchmark = benchmark + self._name = name self._deployment_name = deployment_name self._experiment_config = config self._language = config.runtime.language self._language_version = config.runtime.version - self._benchmark_path = find_benchmark(self.benchmark, "benchmarks") - if not self._benchmark_path: - raise RuntimeError("Benchmark {benchmark} not found!".format(benchmark=self._benchmark)) - with open(os.path.join(self.benchmark_path, "config.json")) as json_file: - self._benchmark_config: BenchmarkConfig = BenchmarkConfig.deserialize( + self._path = find_package_code(self.name, "benchmarks") + if not self._path: + raise RuntimeError("Benchmark {name} not found!".format(name=self._name)) + with open(os.path.join(self.path, "config.json")) as json_file: + self._config: CodePackageConfig = CodePackageConfig.deserialize( json.load(json_file) ) - if self.language not in self.benchmark_config.languages: + if self.language not in self.config.languages: raise RuntimeError( - "Benchmark {} not available for language {}".format(self.benchmark, self.language) + "Benchmark {} not available for language {}".format(self.name, self.language) ) self._cache_client = cache_client self._docker_client = docker_client self._system_config = system_config self._hash_value = None - self._output_dir = os.path.join(output_dir, f"{benchmark}_code") + self._output_dir = os.path.join(output_dir, f"{name}_code") # verify existence of function in cache self.query_cache() @@ -212,22 +212,22 @@ def serialize(self) -> dict: return {"size": self.code_size, "hash": self.hash} def query_cache(self): - self._code_package = self._cache_client.get_code_package( + self._payload = self._cache_client.get_code_package( deployment=self._deployment_name, - benchmark=self._benchmark, + benchmark=self._name, language=self.language_name, ) - self._functions = self._cache_client.get_functions( + self._benchmarks = self._cache_client.get_functions( deployment=self._deployment_name, - benchmark=self._benchmark, + benchmark=self._name, language=self.language_name, ) - if self._code_package is not None: + if self._payload is not None: # compare hashes current_hash = self.hash - old_hash = self._code_package["hash"] - self._code_size = self._code_package["size"] + old_hash = self._payload["hash"] + self._code_size = self._payload["size"] self._is_cached = True self._is_cached_valid = current_hash == old_hash else: @@ -243,7 +243,7 @@ def get_code_files(self, include_config=True): FILES["python"] += ["requirements.txt*", "*.json"] FILES["nodejs"] += ["package.json", "*.json"] - path = os.path.join(self.benchmark_path, self.language_name) + path = os.path.join(self.path, self.language_name) for file_type in FILES[self.language_name]: for f in glob.glob(os.path.join(path, file_type)): yield os.path.join(path, f) @@ -253,15 +253,15 @@ def copy_code(self, output_dir): shutil.copy2(path, output_dir) def add_benchmark_data(self, output_dir): - cmd = "/bin/bash {benchmark_path}/init.sh {output_dir} false" + cmd = "/bin/bash {path}/init.sh {output_dir} false" paths = [ - self.benchmark_path, - os.path.join(self.benchmark_path, self.language_name), + self.path, + os.path.join(self.path, self.language_name), ] for path in paths: if os.path.exists(os.path.join(path, "init.sh")): out = subprocess.run( - cmd.format(benchmark_path=path, output_dir=output_dir), + cmd.format(path=path, output_dir=output_dir), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, @@ -360,7 +360,7 @@ def install_dependencies(self, output_dir): if not self._experiment_config.check_flag("docker_copy_build_files"): volumes = {os.path.abspath(output_dir): {"bind": "/mnt/function", "mode": "rw"}} package_script = os.path.abspath( - os.path.join(self._benchmark_path, self.language_name, "package.sh") + os.path.join(self._path, self.language_name, "package.sh") ) # does this benchmark has package.sh script? if os.path.exists(package_script): @@ -389,7 +389,7 @@ def install_dependencies(self, output_dir): stdout = self._docker_client.containers.run( "{}:{}".format(repo_name, image_name), volumes=volumes, - environment={"APP": self.benchmark}, + environment={"APP": self.name}, # user="1000:1000", user=uid, remove=True, @@ -401,7 +401,7 @@ def install_dependencies(self, output_dir): else: container = self._docker_client.containers.run( "{}:{}".format(repo_name, image_name), - environment={"APP": self.benchmark}, + environment={"APP": self.name}, # user="1000:1000", user=uid, # remove=True, @@ -454,7 +454,7 @@ def install_dependencies(self, output_dir): raise e def recalculate_code_size(self): - self._code_size = Benchmark.directory_size(self._output_dir) + self._code_size = CodePackage.directory_size(self._output_dir) return self._code_size def build( @@ -465,7 +465,7 @@ def build( # Skip build if files are up to date and user didn't enforce rebuild if self.is_cached and self.is_cached_valid: self.logging.info( - "Using cached benchmark {} at {}".format(self.benchmark, self.code_location) + "Using cached benchmark {} at {}".format(self.name, self.code_location) ) return False, self.code_location @@ -474,9 +474,9 @@ def build( if not self.is_cached else "cached code package is not up to date/build enforced." ) - self.logging.info("Building benchmark {}. Reason: {}".format(self.benchmark, msg)) + self.logging.info("Building benchmark {}. Reason: {}".format(self.name, msg)) # clear existing cache information - self._code_package = None + self._payload = None # create directory to be deployed if os.path.exists(self._output_dir): @@ -489,7 +489,7 @@ def build( self.add_deployment_package(self._output_dir) self.install_dependencies(self._output_dir) self._code_location, self._code_size = deployment_build_step( - os.path.abspath(self._output_dir), self.language_name, self.benchmark, is_workflow + os.path.abspath(self._output_dir), self.language_name, self.name, is_workflow ) self.logging.info( ( @@ -518,15 +518,15 @@ def build( :param client: Deployment client :param benchmark: - :param benchmark_path: + :param path: :param size: Benchmark workload size """ def prepare_input(self, storage: PersistentStorage, size: str): - benchmark_data_path = find_benchmark(self._benchmark, "benchmarks-data") - mod = load_benchmark_input(self._benchmark_path) + benchmark_data_path = find_package_code(self.name, "benchmarks-data") + mod = load_benchmark_input(self._path) buckets = mod.buckets_count() - storage.allocate_buckets(self.benchmark, buckets) + storage.allocate_buckets(self.name, buckets) # Get JSON and upload data as required by benchmark input_config = mod.generate_input( benchmark_data_path, @@ -544,9 +544,9 @@ def prepare_input(self, storage: PersistentStorage, size: str): def code_package_modify(self, filename: str, data: bytes): - if self.code_package_is_archive(): + if self.is_archive(): self._update_zip(self.code_location, filename, data) - new_size = self.code_package_recompute_size() / 1024.0 / 1024.0 + new_size = self.recompute_size() / 1024.0 / 1024.0 self.logging.info(f"Modified zip package {self.code_location}, new size {new_size} MB") else: raise NotImplementedError() @@ -556,13 +556,13 @@ def code_package_modify(self, filename: str, data: bytes): Azure: directory """ - def code_package_is_archive(self) -> bool: + def is_archive(self) -> bool: if os.path.isfile(self.code_location): extension = os.path.splitext(self.code_location)[1] return extension in [".zip"] return False - def code_package_recompute_size(self) -> float: + def recompute_size(self) -> float: bytes_size = os.path.getsize(self.code_location) self._code_size = bytes_size return bytes_size @@ -600,7 +600,7 @@ def _update_zip(zipname: str, filename: str, data: bytes): """ -class BenchmarkModuleInterface: +class CodePackageModuleInterface: @staticmethod def buckets_count() -> Tuple[int, int]: pass @@ -616,11 +616,11 @@ def generate_input( pass -def load_benchmark_input(benchmark_path: str) -> BenchmarkModuleInterface: +def load_benchmark_input(path: str) -> CodePackageModuleInterface: # Look for input generator file in the directory containing benchmark import importlib.machinery - loader = importlib.machinery.SourceFileLoader("input", os.path.join(benchmark_path, "input.py")) + loader = importlib.machinery.SourceFileLoader("input", os.path.join(path, "input.py")) spec = importlib.util.spec_from_loader(loader.name, loader) assert spec mod = importlib.util.module_from_spec(spec) diff --git a/sebs/experiments/invocation_overhead.py b/sebs/experiments/invocation_overhead.py index 76f9a41a..90816432 100644 --- a/sebs/experiments/invocation_overhead.py +++ b/sebs/experiments/invocation_overhead.py @@ -5,7 +5,7 @@ from datetime import datetime from typing import Dict, TYPE_CHECKING -from sebs.benchmark import Benchmark +from sebs.code_package import CodePackage from sebs.faas.system import System as FaaSSystem from sebs.experiments.experiment import Experiment from sebs.experiments.config import Config as ExperimentConfig @@ -15,7 +15,7 @@ class CodePackageSize: - def __init__(self, deployment_client: FaaSSystem, benchmark: Benchmark, settings: dict): + def __init__(self, deployment_client: FaaSSystem, benchmark: CodePackage, settings: dict): import math from numpy import linspace @@ -24,9 +24,9 @@ def __init__(self, deployment_client: FaaSSystem, benchmark: Benchmark, settings settings["code_package_end"], settings["code_package_points"], ) - from sebs.utils import find_benchmark + from sebs.utils import find_package_code - self._benchmark_path = find_benchmark("030.clock-synchronization", "benchmarks") + self._benchmark_path = find_package_code("030.clock-synchronization", "benchmarks") self._benchmark = benchmark random.seed(1410) diff --git a/sebs/faas/system.py b/sebs/faas/system.py index f5da04b2..6c0414ec 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -5,7 +5,7 @@ import docker -from sebs.benchmark import Benchmark +from sebs.code_package import CodePackage from sebs.cache import Cache from sebs.config import SeBSConfig from sebs.faas.function import Function, Trigger, ExecutionResult @@ -109,11 +109,11 @@ def package_code(self, directory: str, language_name: str, benchmark: str, is_wo pass @abstractmethod - def create_function(self, code_package: Benchmark, func_name: str) -> Function: + def create_function(self, code_package: CodePackage, func_name: str) -> Function: pass @abstractmethod - def create_workflow(self, code_package: Benchmark, workflow_name: str) -> Workflow: + def create_workflow(self, code_package: CodePackage, workflow_name: str) -> Workflow: pass @abstractmethod @@ -121,7 +121,7 @@ def cached_function(self, function: Function): pass @abstractmethod - def update_function(self, function: Function, code_package: Benchmark): + def update_function(self, function: Function, code_package: CodePackage): pass """ @@ -137,7 +137,7 @@ def update_function(self, function: Function, code_package: Benchmark): """ - def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) -> Function: + def get_function(self, code_package: CodePackage, func_name: Optional[str] = None) -> Function: if code_package.language_version not in self.system_config.supported_language_versions( self.name(), code_package.language_name ): @@ -160,8 +160,8 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) b) no -> retrieve function from the cache. Function code in cloud will be updated if the local version is different. """ - functions = code_package.functions - if not functions or func_name not in functions: + benchmarks = code_package.benchmarks + if not benchmarks or func_name not in benchmarks: msg = ( "function name not provided." if not func_name @@ -179,7 +179,7 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) return function else: # retrieve function - cached_function = functions[func_name] + cached_function = benchmarks[func_name] code_location = code_package.code_location function = self.function_type().deserialize(cached_function) self.cached_function(function) @@ -207,7 +207,7 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) code_package.query_cache() return function - def get_workflow(self, code_package: Benchmark, workflow_name: Optional[str] = None): + def get_workflow(self, code_package: CodePackage, workflow_name: Optional[str] = None): if code_package.language_version not in self.system_config.supported_language_versions( self.name(), code_package.language_name ): @@ -233,8 +233,8 @@ def get_workflow(self, code_package: Benchmark, workflow_name: Optional[str] = N b) no -> retrieve function from the cache. Function code in cloud will be updated if the local version is different. """ - functions = code_package.functions - if not functions or func_name not in functions: + benchmarks = code_package.benchmarks + if not benchmarks or func_name not in benchmarks: msg = ( "function name not provided." if not func_name @@ -252,7 +252,7 @@ def get_workflow(self, code_package: Benchmark, workflow_name: Optional[str] = N return function else: # retrieve function - cached_function = functions[func_name] + cached_function = benchmarks[func_name] code_location = code_package.code_location function = self.function_type().deserialize(cached_function) self.cached_function(function) @@ -281,11 +281,11 @@ def get_workflow(self, code_package: Benchmark, workflow_name: Optional[str] = N return function @abstractmethod - def default_function_name(self, code_package: Benchmark) -> str: + def default_function_name(self, code_package: CodePackage) -> str: pass @abstractmethod - def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + def enforce_cold_start(self, functions: List[Function], code_package: CodePackage): pass @abstractmethod diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 00d18a0f..20563bd6 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -15,7 +15,7 @@ from sebs.cache import Cache from sebs.config import SeBSConfig -from sebs.benchmark import Benchmark +from sebs.code_package import CodePackage from ..faas.function import Function, Trigger from ..faas.workflow import Workflow from .storage import PersistentStorage @@ -108,12 +108,12 @@ def get_storage( return self.storage @staticmethod - def default_function_name(code_package: Benchmark) -> str: + def default_function_name(code_package: CodePackage) -> str: # Create function name func_name = "{}-{}-{}".format( - code_package.benchmark, + code_package.name, code_package.language_name, - code_package.benchmark_config.memory, + code_package.config.memory, ) return GCP.format_function_name(func_name) @@ -191,13 +191,13 @@ def package_code(self, directory: str, language_name: str, benchmark: str, is_wo return os.path.join(directory, "{}.zip".format(benchmark)), bytes_size - def create_function(self, code_package: Benchmark, func_name: str) -> "GCPFunction": + def create_function(self, code_package: CodePackage, func_name: str) -> "GCPFunction": package = code_package.code_location - benchmark = code_package.benchmark + benchmark = code_package.name language_runtime = code_package.language_version - timeout = code_package.benchmark_config.timeout - memory = code_package.benchmark_config.memory + timeout = code_package.config.timeout + memory = code_package.config.memory code_bucket: Optional[str] = None storage_client = self.get_storage() location = self.config.region @@ -327,14 +327,14 @@ def cached_function(self, function: Function): gcp_trigger.logging_handlers = self.logging_handlers gcp_trigger.deployment_client = self - def update_function(self, function: Function, code_package: Benchmark): + def update_function(self, function: Function, code_package: CodePackage): function = cast(GCPFunction, function) language_runtime = code_package.language_version code_package_name = os.path.basename(code_package.code_location) storage = cast(GCPStorage, self.get_storage()) - bucket = function.code_bucket(code_package.benchmark, storage) + bucket = function.code_bucket(code_package.name, storage) storage.upload(bucket, code_package.code_location, code_package_name) self.logging.info( f"Uploaded new code package to {bucket}/{code_package_name}") @@ -371,10 +371,10 @@ def update_function(self, function: Function, code_package: Benchmark): def get_full_function_name(project_name: str, location: str, func_name: str): return f"projects/{project_name}/locations/{location}/functions/{func_name}" - def create_workflow(self, code_package: Benchmark, workflow_name: str) -> "GCPWorkflow": - benchmark = code_package.benchmark - timeout = code_package.benchmark_config.timeout - memory = code_package.benchmark_config.memory + def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "GCPWorkflow": + benchmark = code_package.name + timeout = code_package.config.timeout + memory = code_package.config.memory code_bucket: Optional[str] = None location = self.config.region project_name = self.config.project_name @@ -449,7 +449,7 @@ def create_workflow_trigger(self, workflow: Workflow, # self.cache_client.update_workflow(workflow) return trigger - def update_workflow(self, workflow: Workflow, code_package: Benchmark): + def update_workflow(self, workflow: Workflow, code_package: CodePackage): with open('cache/test.yml') as f: code = f.read() @@ -628,7 +628,7 @@ def _enforce_cold_start(self, function: Function): return new_version - def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + def enforce_cold_start(self, functions: List[Function], code_package: CodePackage): new_versions = [] for func in functions: @@ -654,7 +654,7 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) self.cold_start_counter += 1 - def get_functions(self, code_package: Benchmark, function_names: List[str]) -> List["Function"]: + def get_functions(self, code_package: CodePackage, function_names: List[str]) -> List["Function"]: functions: List["Function"] = [] undeployed_functions_before = [] diff --git a/sebs/local/local.py b/sebs/local/local.py index e9b0782d..c89bd01a 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -14,7 +14,7 @@ from sebs.faas.function import Function, ExecutionResult, Trigger from sebs.faas.storage import PersistentStorage from sebs.faas.system import System -from sebs.benchmark import Benchmark +from sebs.code_package import CodePackage class Local(System): @@ -136,7 +136,7 @@ def package_code(self, directory: str, language_name: str, benchmark: str, is_wo return directory, bytes_size - def create_function(self, code_package: Benchmark, func_name: str) -> "LocalFunction": + def create_function(self, code_package: CodePackage, func_name: str) -> "LocalFunction": home_dir = os.path.join( "/home", self._system_config.username(self.name(), code_package.language_name) @@ -176,7 +176,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LocalFunc # tty=True, ) func = LocalFunction( - container, self.DEFAULT_PORT, func_name, code_package.benchmark, code_package.hash + container, self.DEFAULT_PORT, func_name, code_package.name, code_package.hash ) self.logging.info( f"Started {func_name} function at container {container.id} , running on {func._url}" @@ -187,7 +187,7 @@ def create_function(self, code_package: Benchmark, func_name: str) -> "LocalFunc FIXME: restart Docker? """ - def update_function(self, function: Function, code_package: Benchmark): + def update_function(self, function: Function, code_package: CodePackage): pass """ @@ -222,16 +222,16 @@ def download_metrics( ): pass - def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + def enforce_cold_start(self, functions: List[Function], code_package: CodePackage): raise NotImplementedError() @staticmethod - def default_function_name(code_package: Benchmark) -> str: + def default_function_name(code_package: CodePackage) -> str: # Create function name func_name = "{}-{}-{}".format( - code_package.benchmark, + code_package.name, code_package.language_name, - code_package.benchmark_config.memory, + code_package.config.memory, ) return func_name diff --git a/sebs/sebs.py b/sebs/sebs.py index 4562c7bb..e19ae9cd 100644 --- a/sebs/sebs.py +++ b/sebs/sebs.py @@ -5,7 +5,7 @@ from sebs.local import Local from sebs.cache import Cache from sebs.config import SeBSConfig -from sebs.benchmark import Benchmark +from sebs.code_package import CodePackage from sebs.faas.system import System as FaaSSystem from sebs.faas.config import Config from sebs.utils import has_platform, LoggingHandlers, LoggingBase @@ -149,8 +149,8 @@ def get_benchmark( deployment: FaaSSystem, config: ExperimentConfig, logging_filename: Optional[str] = None, - ) -> Benchmark: - benchmark = Benchmark( + ) -> CodePackage: + code_package = CodePackage( name, deployment.name(), config, @@ -159,10 +159,10 @@ def get_benchmark( self.cache_client, self.docker_client, ) - benchmark.logging_handlers = self.generate_logging_handlers( + code_package.logging_handlers = self.generate_logging_handlers( logging_filename=logging_filename ) - return benchmark + return code_package def shutdown(self): self.cache_client.shutdown() diff --git a/sebs/utils.py b/sebs/utils.py index eff58511..33ee7b9a 100644 --- a/sebs/utils.py +++ b/sebs/utils.py @@ -129,7 +129,7 @@ def configure_logging(): """ -def find_benchmark(benchmark: str, path: str): +def find_package_code(benchmark: str, path: str): benchmarks_dir = os.path.join(PROJECT_DIR, path) benchmark_path = find(benchmark, benchmarks_dir) return benchmark_path diff --git a/tests/aws/create_function.py b/tests/aws/create_function.py index e672cc89..4b8b1f3f 100644 --- a/tests/aws/create_function.py +++ b/tests/aws/create_function.py @@ -39,7 +39,7 @@ class AWSCreateFunction(unittest.TestCase): "nodejs": ["handler.js", "function/storage.js", "package.json", "node_modules/"] } benchmark = "110.dynamic-html" - function_name_suffixes = [] + function_name_suffixes = [] def setUp(self): self.tmp_dir = tempfile.TemporaryDirectory() @@ -96,7 +96,7 @@ def tearDownClass(cls): deployment_client.delete_function(func_name + suffix) def check_function( - self, language: str, package: sebs.benchmark.Benchmark, files: List[str] + self, language: str, package: sebs.code_package.CodePackage, files: List[str] ): filename, file_extension = os.path.splitext(package.code_location) self.assertEqual(file_extension, ".zip") From 83e5b39e9c3d62156b34ebffe55d245abb2135ae Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Fri, 25 Mar 2022 11:28:14 +0100 Subject: [PATCH 27/68] Common benchmark class for function and workflow --- scripts/run_experiments.py | 2 +- sebs.py | 8 +- sebs/aws/aws.py | 7 +- sebs/aws/function.py | 8 +- sebs/aws/triggers.py | 2 +- sebs/aws/workflow.py | 4 +- sebs/azure/azure.py | 22 +- sebs/azure/function.py | 4 +- sebs/azure/triggers.py | 2 +- sebs/azure/workflow.py | 4 +- sebs/cache.py | 48 ++-- sebs/code_package.py | 2 +- sebs/experiments/eviction_model.py | 4 +- sebs/experiments/invocation_overhead.py | 2 +- sebs/experiments/network_ping_pong.py | 2 +- sebs/experiments/perf_cost.py | 2 +- sebs/experiments/result.py | 2 +- sebs/faas/benchmark.py | 337 ++++++++++++++++++++++++ sebs/faas/system.py | 25 +- sebs/faas/workflow.py | 75 ------ sebs/gcp/function.py | 6 +- sebs/gcp/gcp.py | 11 +- sebs/gcp/triggers.py | 2 +- sebs/gcp/workflow.py | 6 +- sebs/local/function.py | 2 +- sebs/local/local.py | 4 +- sebs/regression.py | 4 +- tests/aws/create_function.py | 12 +- tests/aws/invoke_function_http.py | 8 +- tests/regression.py | 2 +- 30 files changed, 438 insertions(+), 181 deletions(-) create mode 100644 sebs/faas/benchmark.py delete mode 100644 sebs/faas/workflow.py diff --git a/scripts/run_experiments.py b/scripts/run_experiments.py index 8198c917..bb29e045 100755 --- a/scripts/run_experiments.py +++ b/scripts/run_experiments.py @@ -552,7 +552,7 @@ def create_function(self, code_package: CodePackage, experiment_config :dict): fname=func_name, loc=code_location )) - self.cache_client.add_function( + self.cache_client.add_benchmark( deployment='local', benchmark=benchmark, language=self.language, diff --git a/sebs.py b/sebs.py index 91844385..576a9a5b 100755 --- a/sebs.py +++ b/sebs.py @@ -16,7 +16,7 @@ from sebs.regression import regression_suite from sebs.utils import update_nested_dict from sebs.faas import System as FaaSSystem -from sebs.faas.function import Trigger +from sebs.faas.benchmark import Trigger PROJECT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -198,7 +198,7 @@ def function(benchmark, benchmark_input_size, repetitions, trigger, function_nam logging_filename=logging_filename, ) func = deployment_client.get_function( - benchmark_obj, function_name if function_name else deployment_client.default_function_name(benchmark_obj) + benchmark_obj, function_name if function_name else deployment_client.default_benchmark_name(benchmark_obj) ) storage = deployment_client.get_storage( replace_existing=experiment_config.update_storage @@ -274,7 +274,7 @@ def workflow(benchmark, benchmark_input_size, repetitions, trigger, workflow_nam logging_filename=logging_filename, ) workflow = deployment_client.get_workflow( - benchmark_obj, workflow_name if workflow_name else deployment_client.default_function_name(benchmark_obj) + benchmark_obj, workflow_name if workflow_name else deployment_client.default_benchmark_name(benchmark_obj) ) storage = deployment_client.get_storage( replace_existing=experiment_config.update_storage @@ -435,7 +435,7 @@ def start(benchmark, benchmark_input_size, output, deployments, remove_container result.add_input(input_config) for i in range(deployments): func = deployment_client.get_function( - benchmark_obj, deployment_client.default_function_name(benchmark_obj) + benchmark_obj, deployment_client.default_benchmark_name(benchmark_obj) ) result.add_function(func) diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index c49c6148..e11a7bf0 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -19,8 +19,7 @@ from sebs.cache import Cache from sebs.config import SeBSConfig from sebs.utils import LoggingHandlers -from sebs.faas.function import Function, ExecutionResult, Trigger -from sebs.faas.workflow import Workflow +from sebs.faas.benchmark import Function, ExecutionResult, Trigger, Workflow from sebs.faas.storage import PersistentStorage from sebs.faas.system import System @@ -370,7 +369,7 @@ def create_function_trigger(self, func: Function, trigger_type: Trigger.TriggerT raise RuntimeError("Not supported!") function.add_trigger(trigger) - self.cache_client.update_function(function) + self.cache_client.update_benchmark(function) return trigger def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "SFNWorkflow": @@ -476,7 +475,7 @@ def create_workflow_trigger(self, workflow: Workflow, trigger_type: Trigger.Trig raise RuntimeError("Not supported!") @staticmethod - def default_function_name(code_package: CodePackage) -> str: + def default_benchmark_name(code_package: CodePackage) -> str: # Create function name func_name = "{}-{}-{}".format( code_package.name, diff --git a/sebs/aws/function.py b/sebs/aws/function.py index 28468f9d..e8797a58 100644 --- a/sebs/aws/function.py +++ b/sebs/aws/function.py @@ -1,10 +1,10 @@ from typing import cast, Optional from sebs.aws.s3 import S3 -from sebs.faas.function import Function +from sebs.faas.benchmark import Benchmark -class LambdaFunction(Function): +class LambdaFunction(Benchmark): def __init__( self, name: str, @@ -42,12 +42,12 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "LambdaFunction": - from sebs.faas.function import Trigger + from sebs.faas.benchmark import Trigger from sebs.aws.triggers import FunctionLibraryTrigger, HTTPTrigger ret = LambdaFunction( cached_config["name"], - cached_config["benchmark"], + cached_config["code_package"], cached_config["arn"], cached_config["hash"], cached_config["timeout"], diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index 4288903b..53a47db6 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -6,7 +6,7 @@ from typing import Dict, Optional # noqa from sebs.aws.aws import AWS -from sebs.faas.function import ExecutionResult, Trigger +from sebs.faas.benchmark import ExecutionResult, Trigger class LibraryTrigger(Trigger): diff --git a/sebs/aws/workflow.py b/sebs/aws/workflow.py index be3b1962..7d73acaa 100644 --- a/sebs/aws/workflow.py +++ b/sebs/aws/workflow.py @@ -2,7 +2,7 @@ from sebs.aws.s3 import S3 from sebs.aws.function import LambdaFunction -from sebs.faas.workflow import Workflow +from sebs.faas.benchmark import Workflow class SFNWorkflow(Workflow): @@ -34,7 +34,7 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "SFNWorkflow": - from sebs.faas.function import Trigger + from sebs.faas.benchmark import Trigger from sebs.aws.triggers import WorkflowLibraryTrigger, HTTPTrigger ret = SFNWorkflow( diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index f5a1c43c..aee7b752 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -14,13 +14,11 @@ from sebs.azure.workflow import AzureWorkflow from sebs.azure.config import AzureConfig, AzureResources from sebs.azure.triggers import AzureTrigger, HTTPTrigger -from sebs.faas.function import Trigger from sebs.code_package import CodePackage from sebs.cache import Cache from sebs.config import SeBSConfig from sebs.utils import LoggingHandlers, execute -from ..faas.function import Function, ExecutionResult -from ..faas.workflow import Workflow +from ..faas.benchmark import Function, ExecutionResult, Workflow, Trigger from ..faas.storage import PersistentStorage from ..faas.system import System @@ -275,7 +273,7 @@ def _mount_function_code(self, code_package: CodePackage): self.cli_instance.upload_package( code_package.code_location, "/mnt/function/") - def default_function_name(self, code_package: CodePackage) -> str: + def default_benchmark_name(self, code_package: CodePackage) -> str: """ Functionapp names must be globally unique in Azure. """ @@ -365,11 +363,11 @@ def create_function(self, code_package: CodePackage, func_name: str) -> AzureFun # update existing function app self.update_function(function, code_package) - self.cache_client.add_function( + self.cache_client.add_benchmark( deployment_name=self.name(), language_name=language, code_package=code_package, - function=function, + benchmark=function, ) return function @@ -458,12 +456,12 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> Azur # update existing function app self.update_function(workflow, code_package) - # self.cache_client.add_function( - # deployment_name=self.name(), - # language_name=language, - # code_package=code_package, - # function=function, - # ) + self.cache_client.add_benchmark( + deployment_name=self.name(), + language_name=language, + code_package=code_package, + benchmark=workflow, + ) return workflow """ diff --git a/sebs/azure/function.py b/sebs/azure/function.py index ade7e980..4f0a9671 100644 --- a/sebs/azure/function.py +++ b/sebs/azure/function.py @@ -1,5 +1,5 @@ from sebs.azure.config import AzureResources -from sebs.faas.function import Function +from sebs.faas.benchmark import Function class AzureFunction(Function): @@ -23,7 +23,7 @@ def serialize(self) -> dict: def deserialize(cached_config: dict) -> Function: ret = AzureFunction( cached_config["name"], - cached_config["benchmark"], + cached_config["code_package"], cached_config["hash"], AzureResources.Storage.deserialize(cached_config["function_storage"]), ) diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index f746385f..a8f9a180 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -2,7 +2,7 @@ from typing import Any, Dict, Optional # noqa from sebs.azure.config import AzureResources -from sebs.faas.function import ExecutionResult, Trigger +from sebs.faas.benchmark import ExecutionResult, Trigger class AzureTrigger(Trigger): diff --git a/sebs/azure/workflow.py b/sebs/azure/workflow.py index 0a568325..353fb3c4 100644 --- a/sebs/azure/workflow.py +++ b/sebs/azure/workflow.py @@ -1,5 +1,5 @@ from sebs.azure.config import AzureResources -from sebs.faas.workflow import Workflow +from sebs.faas.benchmark import Workflow class AzureWorkflow(Workflow): @@ -23,7 +23,7 @@ def serialize(self) -> dict: def deserialize(cached_config: dict) -> Workflow: ret = AzureWorkflow( cached_config["name"], - cached_config["benchmark"], + cached_config["code_package"], cached_config["hash"], AzureResources.Storage.deserialize(cached_config["function_storage"]), ) diff --git a/sebs/cache.py b/sebs/cache.py index 29112cfa..eba3d735 100644 --- a/sebs/cache.py +++ b/sebs/cache.py @@ -11,7 +11,7 @@ if TYPE_CHECKING: from sebs.code_package import CodePackage - from sebs.faas.function import Function + from sebs.faas.benchmark import Benchmark def update(d, u): @@ -45,7 +45,7 @@ class Cache(LoggingBase): def __init__(self, cache_dir: str): super().__init__() self.cache_dir = os.path.abspath(cache_dir) - self.ignore_functions: bool = False + self.ignore_benchmarks: bool = False self.ignore_storage: bool = False self._lock = threading.RLock() if not os.path.exists(self.cache_dir): @@ -129,12 +129,12 @@ def get_code_package( else: return None - def get_functions( + def get_benchmarks( self, deployment: str, benchmark: str, language: str ) -> Optional[Dict[str, Any]]: cfg = self.get_benchmark_config(deployment, benchmark) - if cfg and language in cfg and not self.ignore_functions: - return cfg[language]["functions"] + if cfg and language in cfg and not self.ignore_benchmarks: + return cfg[language]["benchmarks"] else: return None @@ -183,7 +183,7 @@ def add_code_package(self, deployment_name: str, language_name: str, code_packag shutil.copy2(code_package.code_location, cached_dir) language_config: Dict[str, Any] = { "code_package": code_package.serialize(), - "functions": {}, + "benchmarks": {}, } # don't store absolute path to avoid problems with moving cache dir relative_cached_loc = os.path.relpath(cached_location, self.cache_dir) @@ -250,7 +250,7 @@ def update_code_package( self.add_code_package(deployment_name, language_name, code_package) """ - Add new function to cache. + Add new benchmark to cache. :param deployment: :param benchmark: @@ -260,14 +260,14 @@ def update_code_package( :param storage_config: Configuration of storage buckets. """ - def add_function( + def add_benchmark( self, deployment_name: str, language_name: str, code_package: "CodePackage", - function: "Function", + benchmark: "Benchmark", ): - if self.ignore_functions: + if self.ignore_benchmarks: return with self._lock: benchmark_dir = os.path.join(self.cache_dir, code_package.name) @@ -275,29 +275,29 @@ def add_function( cache_config = os.path.join(benchmark_dir, "config.json") if os.path.exists(cache_config): - functions_config: Dict[str, Any] = {function.name: {**function.serialize()}} + benchmarks_config: Dict[str, Any] = {benchmark.name: {**benchmark.serialize()}} with open(cache_config, "r") as fp: cached_config = json.load(fp) - if "functions" not in cached_config[deployment_name][language]: - cached_config[deployment_name][language]["functions"] = functions_config + if "benchmarks" not in cached_config[deployment_name][language]: + cached_config[deployment_name][language]["benchmarks"] = benchmarks_config else: - cached_config[deployment_name][language]["functions"].update( - functions_config + cached_config[deployment_name][language]["benchmarks"].update( + benchmarks_config ) config = cached_config with open(cache_config, "w") as fp: json.dump(config, fp, indent=2) else: raise RuntimeError( - "Can't cache function {} for a non-existing code package!".format(function.name) + "Can't cache benchmark {} for a non-existing code package!".format(function.name) ) - def update_function(self, function: "Function"): - if self.ignore_functions: + def update_benchmark(self, benchmark: "Benchmark"): + if self.ignore_benchmarks: return with self._lock: - benchmark_dir = os.path.join(self.cache_dir, function.benchmark) + benchmark_dir = os.path.join(self.cache_dir, benchmark.code_package) cache_config = os.path.join(benchmark_dir, "config.json") if os.path.exists(cache_config): @@ -306,16 +306,16 @@ def update_function(self, function: "Function"): cached_config = json.load(fp) for deployment, cfg in cached_config.items(): for language, cfg2 in cfg.items(): - if "functions" not in cfg2: + if "benchmarks" not in cfg2: continue - for name, func in cfg2["functions"].items(): + for name, func in cfg2["benchmarks"].items(): if name == function.name: - cached_config[deployment][language]["functions"][ + cached_config[deployment][language]["benchmarks"][ name - ] = function.serialize() + ] = benchmark.serialize() with open(cache_config, "w") as fp: json.dump(cached_config, fp, indent=2) else: raise RuntimeError( - "Can't cache function {} for a non-existing code package!".format(function.name) + "Can't cache benchmark {} for a non-existing code package!".format(function.name) ) diff --git a/sebs/code_package.py b/sebs/code_package.py index aa1a90e1..ea62ba0e 100644 --- a/sebs/code_package.py +++ b/sebs/code_package.py @@ -217,7 +217,7 @@ def query_cache(self): benchmark=self._name, language=self.language_name, ) - self._benchmarks = self._cache_client.get_functions( + self._benchmarks = self._cache_client.get_benchmarks( deployment=self._deployment_name, benchmark=self._name, language=self.language_name, diff --git a/sebs/experiments/eviction_model.py b/sebs/experiments/eviction_model.py index 8524c5a4..4d55c66c 100644 --- a/sebs/experiments/eviction_model.py +++ b/sebs/experiments/eviction_model.py @@ -7,7 +7,7 @@ from multiprocessing.pool import AsyncResult, ThreadPool from sebs.faas.system import System as FaaSSystem -from sebs.faas.function import Function, Trigger +from sebs.faas.benchmark import Function, Trigger from sebs.experiments import Experiment, ExperimentResult from sebs.experiments.config import Config as ExperimentConfig from sebs.utils import serialize @@ -183,7 +183,7 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): ) self._deployment_client = deployment_client self._result = ExperimentResult(self.config, deployment_client.config) - name = deployment_client.default_function_name(self._benchmark) + name = deployment_client.default_benchmark_name(self._benchmark) self.functions_names = [ f"{name}-{time}-{copy}" for time in self.times diff --git a/sebs/experiments/invocation_overhead.py b/sebs/experiments/invocation_overhead.py index 90816432..11bbe403 100644 --- a/sebs/experiments/invocation_overhead.py +++ b/sebs/experiments/invocation_overhead.py @@ -73,7 +73,7 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): # deploy network test function from sebs import SeBS # noqa - from sebs.faas.function import Trigger + from sebs.faas.benchmark import Trigger self._benchmark = sebs_client.get_benchmark( "030.clock-synchronization", deployment_client, self.config diff --git a/sebs/experiments/network_ping_pong.py b/sebs/experiments/network_ping_pong.py index 303f6f53..a95506de 100644 --- a/sebs/experiments/network_ping_pong.py +++ b/sebs/experiments/network_ping_pong.py @@ -10,7 +10,7 @@ from multiprocessing.dummy import Pool as ThreadPool from sebs.faas.system import System as FaaSSystem -from sebs.faas.function import Trigger +from sebs.faas.benchmark import Trigger from sebs.experiments.experiment import Experiment from sebs.experiments.config import Config as ExperimentConfig diff --git a/sebs/experiments/perf_cost.py b/sebs/experiments/perf_cost.py index 3fc81482..2457c0e0 100644 --- a/sebs/experiments/perf_cost.py +++ b/sebs/experiments/perf_cost.py @@ -6,7 +6,7 @@ from typing import List, TYPE_CHECKING from sebs.faas.system import System as FaaSSystem -from sebs.faas.function import Trigger +from sebs.faas.benchmark import Trigger from sebs.experiments.experiment import Experiment from sebs.experiments.result import Result as ExperimentResult from sebs.experiments.config import Config as ExperimentConfig diff --git a/sebs/experiments/result.py b/sebs/experiments/result.py index 1a56684c..5087b904 100644 --- a/sebs/experiments/result.py +++ b/sebs/experiments/result.py @@ -3,7 +3,7 @@ from sebs.cache import Cache from sebs.faas.config import Config as DeploymentConfig -from sebs.faas.function import Function, ExecutionResult +from sebs.faas.benchmark import Function, ExecutionResult from sebs.utils import LoggingHandlers from sebs.experiments.config import Config as ExperimentConfig diff --git a/sebs/faas/benchmark.py b/sebs/faas/benchmark.py new file mode 100644 index 00000000..891a9924 --- /dev/null +++ b/sebs/faas/benchmark.py @@ -0,0 +1,337 @@ +import json +from abc import ABC +from abc import abstractmethod +import concurrent.futures +from datetime import datetime, timedelta +from enum import Enum +from typing import Callable, Dict, List, Optional # noqa + +from google.cloud.workflows.executions_v1beta.types import Execution + +from sebs.utils import LoggingBase + +""" + Times are reported in microseconds. +""" + + +class ExecutionTimes: + + client: int + client_begin: datetime + client_end: datetime + benchmark: int + initialization: int + http_startup: int + http_first_byte_return: int + + def __init__(self): + self.client = 0 + self.initialization = 0 + self.benchmark = 0 + + @staticmethod + def deserialize(cached_obj: dict) -> "ExecutionTimes": + ret = ExecutionTimes() + ret.__dict__.update(cached_obj) + return ret + + +class ProviderTimes: + + initialization: int + execution: int + + def __init__(self): + self.execution = 0 + self.initialization = 0 + + @staticmethod + def deserialize(cached_obj: dict) -> "ProviderTimes": + ret = ProviderTimes() + ret.__dict__.update(cached_obj) + return ret + + +class ExecutionStats: + + memory_used: Optional[float] + cold_start: bool + failure: bool + + def __init__(self): + self.memory_used = None + self.cold_start = False + self.failure = False + + @staticmethod + def deserialize(cached_obj: dict) -> "ExecutionStats": + ret = ExecutionStats() + ret.__dict__.update(cached_obj) + return ret + + +class ExecutionBilling: + + _memory: Optional[int] + _billed_time: Optional[int] + _gb_seconds: int + + def __init__(self): + self.memory = None + self.billed_time = None + self.gb_seconds = 0 + + @property + def memory(self) -> Optional[int]: + return self._memory + + @memory.setter + def memory(self, val: int): + self._memory = val + + @property + def billed_time(self) -> Optional[int]: + return self._billed_time + + @billed_time.setter + def billed_time(self, val: int): + self._billed_time = val + + @property + def gb_seconds(self) -> int: + return self._gb_seconds + + @gb_seconds.setter + def gb_seconds(self, val: int): + self._gb_seconds = val + + @staticmethod + def deserialize(cached_obj: dict) -> "ExecutionBilling": + ret = ExecutionBilling() + ret.__dict__.update(cached_obj) + return ret + + +class ExecutionResult: + + output: dict + request_id: str + times: ExecutionTimes + provider_times: ProviderTimes + stats: ExecutionStats + billing: ExecutionBilling + + def __init__(self): + self.output = {} + self.request_id = "" + self.times = ExecutionTimes() + self.provider_times = ProviderTimes() + self.stats = ExecutionStats() + self.billing = ExecutionBilling() + + @staticmethod + def from_times(client_time_begin: datetime, client_time_end: datetime) -> "ExecutionResult": + ret = ExecutionResult() + ret.times.client_begin = client_time_begin + ret.times.client_end = client_time_end + ret.times.client = int( + (client_time_end - client_time_begin) / timedelta(microseconds=1)) + return ret + + def parse_benchmark_output(self, output: dict): + self.output = output + self.stats.cold_start = self.output["is_cold"] + self.times.benchmark = int( + ( + datetime.fromtimestamp(float(self.output["end"])) + - datetime.fromtimestamp(float(self.output["begin"])) + ) + / timedelta(microseconds=1) + ) + + def parse_benchmark_execution(self, execution: Execution): + self.output = json.loads(execution.result) + self.times.benchmark = int( + (execution.start_time - execution.end_time) + / timedelta(microseconds=1) + ) + + @staticmethod + def deserialize(cached_config: dict) -> "ExecutionResult": + ret = ExecutionResult() + ret.times = ExecutionTimes.deserialize(cached_config["times"]) + ret.billing = ExecutionBilling.deserialize(cached_config["billing"]) + ret.provider_times = ProviderTimes.deserialize( + cached_config["provider_times"]) + ret.stats = ExecutionStats.deserialize(cached_config["stats"]) + ret.request_id = cached_config["request_id"] + ret.output = cached_config["output"] + return ret + + +""" + Function trigger and implementation of invocation. + + FIXME: implement a generic HTTP invocation and specialize input and output + processing in classes. +""" + + +class Trigger(ABC, LoggingBase): + class TriggerType(Enum): + HTTP = "http" + LIBRARY = "library" + STORAGE = "storage" + + @staticmethod + def get(name: str) -> "Trigger.TriggerType": + for member in Trigger.TriggerType: + if member.value.lower() == name.lower(): + return member + raise Exception("Unknown trigger type {}".format(member)) + + def _http_invoke(self, payload: dict, url: str) -> ExecutionResult: + import pycurl + from io import BytesIO + + c = pycurl.Curl() + c.setopt(pycurl.HTTPHEADER, ["Content-Type: application/json"]) + c.setopt(pycurl.POST, 1) + c.setopt(pycurl.URL, url) + data = BytesIO() + c.setopt(pycurl.WRITEFUNCTION, data.write) + + c.setopt(pycurl.POSTFIELDS, json.dumps(payload)) + begin = datetime.now() + c.perform() + end = datetime.now() + status_code = c.getinfo(pycurl.RESPONSE_CODE) + conn_time = c.getinfo(pycurl.PRETRANSFER_TIME) + receive_time = c.getinfo(pycurl.STARTTRANSFER_TIME) + + try: + output = json.loads(data.getvalue()) + + if status_code != 200: + self.logging.error( + "Invocation on URL {} failed with status code {}!".format(url, status_code)) + self.logging.error("Output: {}".format(output)) + raise RuntimeError( + f"Failed invocation of function! Output: {output}") + + self.logging.debug("Invoke of function was successful") + result = ExecutionResult.from_times(begin, end) + result.times.http_startup = conn_time + result.times.http_first_byte_return = receive_time + result.request_id = output["request_id"] + # General benchmark output parsing + result.parse_benchmark_output(output) + return result + except json.decoder.JSONDecodeError: + self.logging.error( + "Invocation on URL {} failed with status code {}!".format(url, status_code)) + self.logging.error("Output: {}".format(data.getvalue().decode())) + raise RuntimeError( + f"Failed invocation of function! Output: {data.getvalue().decode()}") + + # FIXME: 3.7+, future annotations + @staticmethod + @abstractmethod + def trigger_type() -> "Trigger.TriggerType": + pass + + @abstractmethod + def sync_invoke(self, payload: dict) -> ExecutionResult: + pass + + @abstractmethod + def async_invoke(self, payload: dict) -> concurrent.futures.Future: + pass + + @abstractmethod + def serialize(self) -> dict: + pass + + @classmethod + @abstractmethod + def deserialize(cls, cached_config: dict) -> "Trigger": + pass + + +""" + Abstraction base class for FaaS benchmarks. Contains a list of associated triggers + and might implement non-trigger execution if supported by the SDK. + Example: direct function invocation through AWS boto3 SDK. +""" + + +class Benchmark(LoggingBase): + def __init__(self, code_package: str, name: str, code_hash: str): + super().__init__() + self._code_package = code_package + self._name = name + self._code_package_hash = code_hash + self._updated_code = False + self._triggers: Dict[Trigger.TriggerType, List[Trigger]] = {} + + @property + def name(self): + return self._name + + @property + def code_package(self): + return self._code_package + + @property + def code_package_hash(self): + return self._code_package_hash + + @code_package_hash.setter + def code_package_hash(self, new_hash: str): + self._code_package_hash = new_hash + + @property + def updated_code(self) -> bool: + return self._updated_code + + @updated_code.setter + def updated_code(self, val: bool): + self._updated_code = val + + def triggers_all(self) -> List[Trigger]: + return [trig for trigger_type, triggers in self._triggers.items() for trig in triggers] + + def triggers(self, trigger_type: Trigger.TriggerType) -> List[Trigger]: + try: + return self._triggers[trigger_type] + except KeyError: + return [] + + def add_trigger(self, trigger: Trigger): + if trigger.trigger_type() not in self._triggers: + self._triggers[trigger.trigger_type()] = [trigger] + else: + self._triggers[trigger.trigger_type()].append(trigger) + + def serialize(self) -> dict: + return { + "name": self._name, + "hash": self._code_package_hash, + "code_package": self._code_package, + "triggers": [ + obj.serialize() for t_type, triggers in self._triggers.items() for obj in triggers + ], + } + + @staticmethod + @abstractmethod + def deserialize(cached_config: dict) -> "Function": + pass + + +class Function(Benchmark): + pass + +class Workflow(Benchmark): + pass \ No newline at end of file diff --git a/sebs/faas/system.py b/sebs/faas/system.py index 6c0414ec..6a6e41b2 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -8,8 +8,7 @@ from sebs.code_package import CodePackage from sebs.cache import Cache from sebs.config import SeBSConfig -from sebs.faas.function import Function, Trigger, ExecutionResult -from sebs.faas.workflow import Workflow +from sebs.faas.benchmark import Function, Trigger, ExecutionResult, Workflow from sebs.faas.storage import PersistentStorage from sebs.utils import LoggingBase from .config import Config @@ -150,7 +149,7 @@ def get_function(self, code_package: CodePackage, func_name: Optional[str] = Non ) if not func_name: - func_name = self.default_function_name(code_package) + func_name = self.default_benchmark_name(code_package) rebuilt, _ = code_package.build(self.package_code, False) """ @@ -169,11 +168,11 @@ def get_function(self, code_package: CodePackage, func_name: Optional[str] = Non ) self.logging.info("Creating new function! Reason: " + msg) function = self.create_function(code_package, func_name) - self.cache_client.add_function( + self.cache_client.add_benchmark( deployment_name=self.name(), language_name=code_package.language_name, code_package=code_package, - function=function, + benchmark=function, ) code_package.query_cache() return function @@ -198,11 +197,11 @@ def get_function(self, code_package: CodePackage, func_name: Optional[str] = Non self.update_function(function, code_package) function.code_package_hash = code_package.hash function.updated_code = True - self.cache_client.add_function( + self.cache_client.add_benchmark( deployment_name=self.name(), language_name=code_package.language_name, code_package=code_package, - function=function, + benchmark=function, ) code_package.query_cache() return function @@ -220,7 +219,7 @@ def get_workflow(self, code_package: CodePackage, workflow_name: Optional[str] = ) # if not workflow_name: - # workflow_name = self.default_function_name(code_package) + # workflow_name = self.default_benchmark_name(code_package) rebuilt, _ = code_package.build(self.package_code, True) # FIXME: cache workflows @@ -242,11 +241,11 @@ def get_workflow(self, code_package: CodePackage, workflow_name: Optional[str] = ) self.logging.info("Creating new function! Reason: " + msg) function = self.create_function(code_package, func_name) - self.cache_client.add_function( + self.cache_client.add_benchmark( deployment_name=self.name(), language_name=code_package.language_name, code_package=code_package, - function=function, + benchmark=function, ) code_package.query_cache() return function @@ -271,17 +270,17 @@ def get_workflow(self, code_package: CodePackage, workflow_name: Optional[str] = self.update_function(function, code_package) function.code_package_hash = code_package.hash function.updated_code = True - self.cache_client.add_function( + self.cache_client.add_benchmark( deployment_name=self.name(), language_name=code_package.language_name, code_package=code_package, - function=function, + benchmark=function, ) code_package.query_cache() return function @abstractmethod - def default_function_name(self, code_package: CodePackage) -> str: + def default_benchmark_name(self, code_package: CodePackage) -> str: pass @abstractmethod diff --git a/sebs/faas/workflow.py b/sebs/faas/workflow.py deleted file mode 100644 index 86a7b5c6..00000000 --- a/sebs/faas/workflow.py +++ /dev/null @@ -1,75 +0,0 @@ -from abc import abstractmethod -from typing import Callable, Dict, List # noqa - -from sebs.utils import LoggingBase -from .function import Trigger - -""" - Abstraction base class for FaaS function. Contains a list of associated triggers - and might implement non-trigger execution if supported by the SDK. - Example: direct function invocation through AWS boto3 SDK. -""" - - -class Workflow(LoggingBase): - def __init__(self, benchmark: str, name: str, code_hash: str): - super().__init__() - self._benchmark = benchmark - self._name = name - self._code_package_hash = code_hash - self._updated_code = False - self._triggers: Dict[Trigger.TriggerType, List[Trigger]] = {} - - @property - def name(self): - return self._name - - @property - def benchmark(self): - return self._benchmark - - @property - def code_package_hash(self): - return self._code_package_hash - - @code_package_hash.setter - def code_package_hash(self, new_hash: str): - self._code_package_hash = new_hash - - @property - def updated_code(self) -> bool: - return self._updated_code - - @updated_code.setter - def updated_code(self, val: bool): - self._updated_code = val - - def triggers_all(self) -> List[Trigger]: - return [trig for trigger_type, triggers in self._triggers.items() for trig in triggers] - - def triggers(self, trigger_type: Trigger.TriggerType) -> List[Trigger]: - try: - return self._triggers[trigger_type] - except KeyError: - return [] - - def add_trigger(self, trigger: Trigger): - if trigger.trigger_type() not in self._triggers: - self._triggers[trigger.trigger_type()] = [trigger] - else: - self._triggers[trigger.trigger_type()].append(trigger) - - def serialize(self) -> dict: - return { - "name": self._name, - "hash": self._code_package_hash, - "benchmark": self._benchmark, - "triggers": [ - obj.serialize() for t_type, triggers in self._triggers.items() for obj in triggers - ], - } - - @staticmethod - @abstractmethod - def deserialize(cached_config: dict) -> "Workflow": - pass diff --git a/sebs/gcp/function.py b/sebs/gcp/function.py index 80d32096..fe1f981e 100644 --- a/sebs/gcp/function.py +++ b/sebs/gcp/function.py @@ -1,6 +1,6 @@ from typing import cast, Optional -from sebs.faas.function import Function +from sebs.faas.benchmark import Function from sebs.gcp.storage import GCPStorage @@ -33,12 +33,12 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "GCPFunction": - from sebs.faas.function import Trigger + from sebs.faas.benchmark import Trigger from sebs.gcp.triggers import LibraryTrigger, HTTPTrigger ret = GCPFunction( cached_config["name"], - cached_config["benchmark"], + cached_config["code_package"], cached_config["hash"], cached_config["timeout"], cached_config["memory"], diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 20563bd6..3d421713 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -16,8 +16,7 @@ from sebs.cache import Cache from sebs.config import SeBSConfig from sebs.code_package import CodePackage -from ..faas.function import Function, Trigger -from ..faas.workflow import Workflow +from ..faas.benchmark import Function, Trigger, Workflow from .storage import PersistentStorage from ..faas.system import System from sebs.gcp.config import GCPConfig @@ -108,7 +107,7 @@ def get_storage( return self.storage @staticmethod - def default_function_name(code_package: CodePackage) -> str: + def default_benchmark_name(code_package: CodePackage) -> str: # Create function name func_name = "{}-{}-{}".format( code_package.name, @@ -314,12 +313,12 @@ def create_function_trigger(self, function: Function, trigger.logging_handlers = self.logging_handlers function.add_trigger(trigger) - self.cache_client.update_function(function) + self.cache_client.update_benchmark(function) return trigger def cached_function(self, function: Function): - from sebs.faas.function import Trigger + from sebs.faas.benchmark import Trigger from sebs.gcp.triggers import LibraryTrigger for trigger in function.triggers(Trigger.TriggerType.LIBRARY): @@ -446,7 +445,7 @@ def create_workflow_trigger(self, workflow: Workflow, trigger.logging_handlers = self.logging_handlers workflow.add_trigger(trigger) - # self.cache_client.update_workflow(workflow) + self.cache_client.update_benchmark(workflow) return trigger def update_workflow(self, workflow: Workflow, code_package: CodePackage): diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 3424234f..88dc9e12 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -8,7 +8,7 @@ from google.cloud.workflows.executions_v1beta.types import Execution from sebs.gcp.gcp import GCP -from sebs.faas.function import ExecutionResult, Trigger +from sebs.faas.benchmark import ExecutionResult, Trigger class LibraryTrigger(Trigger): diff --git a/sebs/gcp/workflow.py b/sebs/gcp/workflow.py index ed8e10dc..3f8be029 100644 --- a/sebs/gcp/workflow.py +++ b/sebs/gcp/workflow.py @@ -1,6 +1,6 @@ from typing import cast, Optional -from sebs.faas.workflow import Workflow +from sebs.faas.benchmark import Workflow from sebs.gcp.storage import GCPStorage @@ -33,12 +33,12 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "GCPWorkflow": - from sebs.faas.function import Trigger + from sebs.faas.benchmark import Trigger from sebs.gcp.triggers import LibraryTrigger, HTTPTrigger ret = GCPWorkflow( cached_config["name"], - cached_config["benchmark"], + cached_config["code_package"], cached_config["hash"], cached_config["timeout"], cached_config["memory"], diff --git a/sebs/local/function.py b/sebs/local/function.py index cb6e19b5..1516390b 100644 --- a/sebs/local/function.py +++ b/sebs/local/function.py @@ -2,7 +2,7 @@ import docker import json -from sebs.faas.function import ExecutionResult, Function, Trigger +from sebs.faas.benchmark import ExecutionResult, Function, Trigger class HTTPTrigger(Trigger): diff --git a/sebs/local/local.py b/sebs/local/local.py index c89bd01a..8e5ea4f6 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -11,7 +11,7 @@ from sebs.local.config import LocalConfig from sebs.local.storage import Minio from sebs.local.function import LocalFunction -from sebs.faas.function import Function, ExecutionResult, Trigger +from sebs.faas.benchmark import Function, ExecutionResult, Trigger from sebs.faas.storage import PersistentStorage from sebs.faas.system import System from sebs.code_package import CodePackage @@ -226,7 +226,7 @@ def enforce_cold_start(self, functions: List[Function], code_package: CodePackag raise NotImplementedError() @staticmethod - def default_function_name(code_package: CodePackage) -> str: + def default_benchmark_name(code_package: CodePackage) -> str: # Create function name func_name = "{}-{}-{}".format( code_package.name, diff --git a/sebs/regression.py b/sebs/regression.py index 37be04aa..9c7e5bf8 100644 --- a/sebs/regression.py +++ b/sebs/regression.py @@ -4,7 +4,7 @@ from time import sleep from typing import cast, Dict, Optional, Set, TYPE_CHECKING -from sebs.faas.function import Trigger +from sebs.faas.benchmark import Trigger if TYPE_CHECKING: from sebs import SeBS @@ -48,7 +48,7 @@ def test(self): replace_existing=experiment_config.update_storage ) func = deployment_client.get_function( - benchmark, deployment_client.default_function_name(benchmark) + benchmark, deployment_client.default_benchmark_name(benchmark) ) input_config = benchmark.prepare_input(storage=storage, size="test") diff --git a/tests/aws/create_function.py b/tests/aws/create_function.py index 4b8b1f3f..3fc20dbb 100644 --- a/tests/aws/create_function.py +++ b/tests/aws/create_function.py @@ -68,7 +68,7 @@ def setUpClass(cls): benchmark = cls.client.get_benchmark( cls.benchmark, cls.tmp_dir.name, deployment_client, experiment_config ) - func_name = deployment_client.default_function_name(benchmark) + func_name = deployment_client.default_benchmark_name(benchmark) for suffix in cls.function_name_suffixes: deployment_client.delete_function(func_name + suffix) @@ -91,7 +91,7 @@ def tearDownClass(cls): benchmark = cls.client.get_benchmark( cls.benchmark, cls.tmp_dir.name, deployment_client, experiment_config ) - func_name = deployment_client.default_function_name(benchmark) + func_name = deployment_client.default_benchmark_name(benchmark) for suffix in cls.function_name_suffixes: deployment_client.delete_function(func_name + suffix) @@ -131,7 +131,7 @@ def test_create_function(self): for language in ["python", "nodejs"]: benchmark, deployment_client, experiment_config = self.generate_benchmark(tmp_dir, language) - func_name = deployment_client.default_function_name(benchmark) + self.function_name_suffixes[0] + func_name = deployment_client.default_benchmark_name(benchmark) + self.function_name_suffixes[0] func = deployment_client.get_function(benchmark, func_name) self.assertIsInstance(func, sebs.aws.LambdaFunction) self.assertEqual(func.name, func_name) @@ -143,7 +143,7 @@ def test_retrieve_cache(self): benchmark, deployment_client, experiment_config = self.generate_benchmark(tmp_dir, language) # generate default variant - func_name = deployment_client.default_function_name(benchmark) + self.function_name_suffixes[1] + func_name = deployment_client.default_benchmark_name(benchmark) + self.function_name_suffixes[1] func = deployment_client.get_function(benchmark, func_name) timestamp = os.path.getmtime(benchmark.code_location) self.assertIsInstance(func, sebs.aws.LambdaFunction) @@ -175,7 +175,7 @@ def test_rebuild_function(self): benchmark, deployment_client, experiment_config = self.generate_benchmark(tmp_dir, language) # generate default variant - func_name = deployment_client.default_function_name(benchmark) + self.function_name_suffixes[2] + func_name = deployment_client.default_benchmark_name(benchmark) + self.function_name_suffixes[2] func = deployment_client.get_function(benchmark, func_name) timestamp = os.path.getmtime(benchmark.code_location) self.assertIsInstance(func, sebs.aws.LambdaFunction) @@ -209,7 +209,7 @@ def test_update_function(self): benchmark, deployment_client, experiment_config = self.generate_benchmark(tmp_dir, language) # generate default variant - func_name = deployment_client.default_function_name(benchmark) + self.function_name_suffixes[3] + func_name = deployment_client.default_benchmark_name(benchmark) + self.function_name_suffixes[3] func = deployment_client.get_function(benchmark, func_name) timestamp = os.path.getmtime(benchmark.code_location) self.assertIsInstance(func, sebs.aws.LambdaFunction) diff --git a/tests/aws/invoke_function_http.py b/tests/aws/invoke_function_http.py index c603fe88..39f0776a 100644 --- a/tests/aws/invoke_function_http.py +++ b/tests/aws/invoke_function_http.py @@ -42,8 +42,8 @@ def test_invoke_sync_python(self): bench_input = benchmark.prepare_input( storage=deployment_client.get_storage(), size="test" ) - func = deployment_client.get_function(benchmark, '{}-http'.format(sebs.aws.AWS.default_function_name(benchmark))) - from sebs.faas.function import Trigger + func = deployment_client.get_function(benchmark, '{}-http'.format(sebs.aws.AWS.default_benchmark_name(benchmark))) + from sebs.faas.benchmark import Trigger deployment_client.create_trigger(func, Trigger.TriggerType.HTTP) self.invoke_sync(func, bench_input) @@ -70,8 +70,8 @@ def test_invoke_sync_nodejs(self): bench_input = benchmark.prepare_input( storage=deployment_client.get_storage(), size="test" ) - func = deployment_client.get_function(benchmark, '{}-http'.format(sebs.aws.AWS.default_function_name(benchmark))) - from sebs.faas.function import Trigger + func = deployment_client.get_function(benchmark, '{}-http'.format(sebs.aws.AWS.default_benchmark_name(benchmark))) + from sebs.faas.benchmark import Trigger deployment_client.create_trigger(func, Trigger.TriggerType.HTTP) self.invoke_sync(func, bench_input) diff --git a/tests/regression.py b/tests/regression.py index 4ec40426..14446502 100755 --- a/tests/regression.py +++ b/tests/regression.py @@ -63,7 +63,7 @@ def test(self): input_config = benchmark.prepare_input(storage=storage, size="test") func = deployment_client.get_function( benchmark, - deployment_client.default_function_name(benchmark) + deployment_client.default_benchmark_name(benchmark) ) ret = func.triggers[0].sync_invoke(input_config) if ret.stats.failure: From 8d1a9366f338f367960bccf4222ee972e840bd78 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Fri, 25 Mar 2022 13:15:19 +0100 Subject: [PATCH 28/68] Cache workflows --- sebs/aws/aws.py | 29 +++++++++++++-------- sebs/aws/workflow.py | 8 +++--- sebs/azure/azure.py | 22 ++++++---------- sebs/faas/system.py | 60 +++++++++++++++++++++++--------------------- sebs/gcp/gcp.py | 10 +++++--- sebs/local/local.py | 8 ++++-- 6 files changed, 75 insertions(+), 62 deletions(-) diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index e11a7bf0..f8a345e9 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -19,7 +19,7 @@ from sebs.cache import Cache from sebs.config import SeBSConfig from sebs.utils import LoggingHandlers -from sebs.faas.benchmark import Function, ExecutionResult, Trigger, Workflow +from sebs.faas.benchmark import Benchmark, Function, ExecutionResult, Trigger, Workflow from sebs.faas.storage import PersistentStorage from sebs.faas.system import System @@ -41,6 +41,10 @@ def typename(): def function_type() -> "Type[Function]": return LambdaFunction + @staticmethod + def workflow_type() -> "Type[Workflow]": + return SFNWorkflow + @property def config(self) -> AWSConfig: return self._config @@ -289,14 +293,14 @@ def create_function(self, code_package: CodePackage, func_name: str) -> "LambdaF return lambda_function - def cached_function(self, function: Function): + def cached_benchmark(self, benchmark: Benchmark): from sebs.aws.triggers import LibraryTrigger - for trigger in function.triggers(Trigger.TriggerType.LIBRARY): + for trigger in benchmark.triggers(Trigger.TriggerType.LIBRARY): trigger.logging_handlers = self.logging_handlers cast(LibraryTrigger, trigger).deployment_client = self - for trigger in function.triggers(Trigger.TriggerType.HTTP): + for trigger in benchmark.triggers(Trigger.TriggerType.HTTP): trigger.logging_handlers = self.logging_handlers """ @@ -451,16 +455,21 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "SFN return workflow - def update_workflow(self, workflow: Workflow, definition: str, code_package: CodePackage): + def update_workflow(self, workflow: Workflow, code_package: CodePackage): workflow = cast(SFNWorkflow, workflow) + for func in workflow.functions: + print(func) + self.update_function(func, code_package) + + # Todo: update workflow definition # and update config - self.sfn_client.update_state_machine( - stateMachineArn=workflow.arn, - definition=json.dumps(definition), - roleArn=self.config.resources.lambda_role(self.session), - ) + # self.sfn_client.update_state_machine( + # stateMachineArn=workflow.arn, + # definition=json.dumps(definition), + # roleArn=self.config.resources.lambda_role(self.session), + # ) self.logging.info("Published new workflow code") def create_workflow_trigger(self, workflow: Workflow, trigger_type: Trigger.TriggerType) -> Trigger: diff --git a/sebs/aws/workflow.py b/sebs/aws/workflow.py index 7d73acaa..599d7e26 100644 --- a/sebs/aws/workflow.py +++ b/sebs/aws/workflow.py @@ -27,7 +27,7 @@ def typename() -> str: def serialize(self) -> dict: return { **super().serialize(), - "functions": self.functions, + "functions": [f.serialize() for f in self.functions], "arn": self.arn, "role": self.role } @@ -37,11 +37,13 @@ def deserialize(cached_config: dict) -> "SFNWorkflow": from sebs.faas.benchmark import Trigger from sebs.aws.triggers import WorkflowLibraryTrigger, HTTPTrigger + funcs = [LambdaFunction.deserialize(f) for f in cached_config["functions"]] ret = SFNWorkflow( cached_config["name"], - cached_config["functions"], - cached_config["hash"], + funcs, + cached_config["code_package"], cached_config["arn"], + cached_config["hash"], cached_config["role"] ) for trigger in cached_config["triggers"]: diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index aee7b752..8eb3dd7b 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -18,7 +18,7 @@ from sebs.cache import Cache from sebs.config import SeBSConfig from sebs.utils import LoggingHandlers, execute -from ..faas.benchmark import Function, ExecutionResult, Workflow, Trigger +from ..faas.benchmark import Benchmark, Function, ExecutionResult, Workflow, Trigger from ..faas.storage import PersistentStorage from ..faas.system import System @@ -44,6 +44,10 @@ def config(self) -> AzureConfig: def function_type() -> Type[Function]: return AzureFunction + @staticmethod + def workflow_type() -> Type[Workflow]: + return AzureWorkflow + def __init__( self, sebs_config: SeBSConfig, @@ -363,19 +367,13 @@ def create_function(self, code_package: CodePackage, func_name: str) -> AzureFun # update existing function app self.update_function(function, code_package) - self.cache_client.add_benchmark( - deployment_name=self.name(), - language_name=language, - code_package=code_package, - benchmark=function, - ) return function - def cached_function(self, function: Function): + def cached_benchmark(self, benchmark: Benchmark): data_storage_account = self.config.resources.data_storage_account( self.cli_instance) - for trigger in function.triggers_all(): + for trigger in benchmark.triggers_all(): azure_trigger = cast(AzureTrigger, trigger) azure_trigger.logging_handlers = self.logging_handlers azure_trigger.data_storage_account = data_storage_account @@ -456,12 +454,6 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> Azur # update existing function app self.update_function(workflow, code_package) - self.cache_client.add_benchmark( - deployment_name=self.name(), - language_name=language, - code_package=code_package, - benchmark=workflow, - ) return workflow """ diff --git a/sebs/faas/system.py b/sebs/faas/system.py index 6a6e41b2..60ef2024 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -8,7 +8,7 @@ from sebs.code_package import CodePackage from sebs.cache import Cache from sebs.config import SeBSConfig -from sebs.faas.benchmark import Function, Trigger, ExecutionResult, Workflow +from sebs.faas.benchmark import Benchmark, Function, Trigger, ExecutionResult, Workflow from sebs.faas.storage import PersistentStorage from sebs.utils import LoggingBase from .config import Config @@ -65,6 +65,11 @@ def config(self) -> Config: def function_type() -> "Type[Function]": pass + @staticmethod + @abstractmethod + def workflow_type() -> "Type[Workflow]": + pass + """ Initialize the system. After the call the local or remot FaaS system should be ready to allocate functions, manage @@ -116,7 +121,7 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> Work pass @abstractmethod - def cached_function(self, function: Function): + def cached_benchmark(self, benchmark: Benchmark): pass @abstractmethod @@ -181,7 +186,7 @@ def get_function(self, code_package: CodePackage, func_name: Optional[str] = Non cached_function = benchmarks[func_name] code_location = code_package.code_location function = self.function_type().deserialize(cached_function) - self.cached_function(function) + self.cached_benchmark(function) self.logging.info( "Using cached function {fname} in {loc}".format( fname=func_name, loc=code_location) @@ -218,13 +223,10 @@ def get_workflow(self, code_package: CodePackage, workflow_name: Optional[str] = ) ) - # if not workflow_name: - # workflow_name = self.default_benchmark_name(code_package) + if not workflow_name: + workflow_name = self.default_benchmark_name(code_package) rebuilt, _ = code_package.build(self.package_code, True) - # FIXME: cache workflows - return self.create_workflow(code_package, workflow_name) - """ There's no function with that name? a) yes -> create new function. Implementation might check if a function @@ -233,51 +235,51 @@ def get_workflow(self, code_package: CodePackage, workflow_name: Optional[str] = be updated if the local version is different. """ benchmarks = code_package.benchmarks - if not benchmarks or func_name not in benchmarks: + if not benchmarks or workflow_name not in benchmarks: msg = ( - "function name not provided." - if not func_name - else "function {} not found in cache.".format(func_name) + "workflow name not provided." + if not workflow_name + else "workflow {} not found in cache.".format(workflow_name) ) - self.logging.info("Creating new function! Reason: " + msg) - function = self.create_function(code_package, func_name) + self.logging.info("Creating new workflow! Reason: " + msg) + workflow = self.create_workflow(code_package, workflow_name) self.cache_client.add_benchmark( deployment_name=self.name(), language_name=code_package.language_name, code_package=code_package, - benchmark=function, + benchmark=workflow, ) code_package.query_cache() - return function + return workflow else: # retrieve function - cached_function = benchmarks[func_name] + cached_workflow = benchmarks[workflow_name] code_location = code_package.code_location - function = self.function_type().deserialize(cached_function) - self.cached_function(function) + workflow = self.workflow_type().deserialize(cached_workflow) + self.cached_benchmark(workflow) self.logging.info( - "Using cached function {fname} in {loc}".format( - fname=func_name, loc=code_location) + "Using cached workflow {workflow_name} in {loc}".format( + workflow_name=workflow_name, loc=code_location) ) # is the function up-to-date? - if function.code_package_hash != code_package.hash or rebuilt: + if workflow.code_package_hash != code_package.hash or rebuilt: self.logging.info( - f"Cached function {func_name} with hash " - f"{function.code_package_hash} is not up to date with " + f"Cached workflow {workflow_name} with hash " + f"{workflow.code_package_hash} is not up to date with " f"current build {code_package.hash} in " f"{code_location}, updating cloud version!" ) - self.update_function(function, code_package) - function.code_package_hash = code_package.hash - function.updated_code = True + self.update_workflow(workflow, code_package) + workflow.code_package_hash = code_package.hash + workflow.updated_code = True self.cache_client.add_benchmark( deployment_name=self.name(), language_name=code_package.language_name, code_package=code_package, - benchmark=function, + benchmark=workflow, ) code_package.query_cache() - return function + return workflow @abstractmethod def default_benchmark_name(self, code_package: CodePackage) -> str: diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 3d421713..a197196a 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -16,7 +16,7 @@ from sebs.cache import Cache from sebs.config import SeBSConfig from sebs.code_package import CodePackage -from ..faas.benchmark import Function, Trigger, Workflow +from ..faas.benchmark import Benchmark, Function, Trigger, Workflow from .storage import PersistentStorage from ..faas.system import System from sebs.gcp.config import GCPConfig @@ -64,6 +64,10 @@ def typename(): def function_type() -> "Type[Function]": return GCPFunction + @staticmethod + def workflow_type() -> "Type[Workflow]": + return GCPWorkflow + """ Initialize the system. After the call the local or remote FaaS system should be ready to allocate functions, manage @@ -316,12 +320,12 @@ def create_function_trigger(self, function: Function, self.cache_client.update_benchmark(function) return trigger - def cached_function(self, function: Function): + def cached_benchmark(self, benchmark: Benchmark): from sebs.faas.benchmark import Trigger from sebs.gcp.triggers import LibraryTrigger - for trigger in function.triggers(Trigger.TriggerType.LIBRARY): + for trigger in benchmark.triggers(Trigger.TriggerType.LIBRARY): gcp_trigger = cast(LibraryTrigger, trigger) gcp_trigger.logging_handlers = self.logging_handlers gcp_trigger.deployment_client = self diff --git a/sebs/local/local.py b/sebs/local/local.py index 8e5ea4f6..92472a10 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -11,7 +11,7 @@ from sebs.local.config import LocalConfig from sebs.local.storage import Minio from sebs.local.function import LocalFunction -from sebs.faas.benchmark import Function, ExecutionResult, Trigger +from sebs.faas.benchmark import Benchmark, Function, ExecutionResult, Trigger from sebs.faas.storage import PersistentStorage from sebs.faas.system import System from sebs.code_package import CodePackage @@ -33,6 +33,10 @@ def typename(): def function_type() -> "Type[Function]": return LocalFunction + @staticmethod + def workflow_type() -> "Type[Workflow]": + raise NotImplementedError() + @property def config(self) -> LocalConfig: return self._config @@ -209,7 +213,7 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T self.cache_client.update_function(function) return trigger - def cached_function(self, function: Function): + def cached_benchmark(self, benchmark: Benchmark): pass def download_metrics( From 3fd886d0a73fb268653297394dca8ddf0ba9e69a Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Fri, 25 Mar 2022 13:59:36 +0100 Subject: [PATCH 29/68] Delete faas.function --- docs/design.md | 6 +- sebs/faas/function.py | 330 ------------------------------------------ 2 files changed, 3 insertions(+), 333 deletions(-) delete mode 100644 sebs/faas/function.py diff --git a/docs/design.md b/docs/design.md index 021fe4ec..f39518d9 100644 --- a/docs/design.md +++ b/docs/design.md @@ -47,7 +47,7 @@ on a given platform, in parallel. configuration. `sebs/statistics.py` - implements common statistics routines. - + `sebs/utils.py` - implements serialization and logging configuration used by SeBS. `sebs/faas/` - the abstract interface for all FaaS platforms, see [below](#faas-interface) for details. @@ -87,8 +87,8 @@ used for microarchitectural analysis of local invocations. for the selected platform. Then, an instance of `sebs.Benchmark` is created, and both objects are used to create or update function code package and upload or update input data in the cloud storage with the help of `sebs.faas.PersistentStorage` implementation. -In the end, an object of `sebs.faas.function.Function` is created with exposes a list of triggers -encapsulated in `sebs.faas.function.Trigger`. The function is invoked via a selected trigger, +In the end, an object of `sebs.faas.benchmark.Function` is created with exposes a list of triggers +encapsulated in `sebs.faas.benchmark.Trigger`. The function is invoked via a selected trigger, and the output includes a JSON file with invocation ID and results. `sebs.py benchmark process` - the JSON result from benchmark invocation is read, deserialized, diff --git a/sebs/faas/function.py b/sebs/faas/function.py deleted file mode 100644 index 46e9a5fc..00000000 --- a/sebs/faas/function.py +++ /dev/null @@ -1,330 +0,0 @@ -import json -from abc import ABC -from abc import abstractmethod -import concurrent.futures -from datetime import datetime, timedelta -from enum import Enum -from typing import Callable, Dict, List, Optional # noqa - -from google.cloud.workflows.executions_v1beta.types import Execution - -from sebs.utils import LoggingBase - -""" - Times are reported in microseconds. -""" - - -class ExecutionTimes: - - client: int - client_begin: datetime - client_end: datetime - benchmark: int - initialization: int - http_startup: int - http_first_byte_return: int - - def __init__(self): - self.client = 0 - self.initialization = 0 - self.benchmark = 0 - - @staticmethod - def deserialize(cached_obj: dict) -> "ExecutionTimes": - ret = ExecutionTimes() - ret.__dict__.update(cached_obj) - return ret - - -class ProviderTimes: - - initialization: int - execution: int - - def __init__(self): - self.execution = 0 - self.initialization = 0 - - @staticmethod - def deserialize(cached_obj: dict) -> "ProviderTimes": - ret = ProviderTimes() - ret.__dict__.update(cached_obj) - return ret - - -class ExecutionStats: - - memory_used: Optional[float] - cold_start: bool - failure: bool - - def __init__(self): - self.memory_used = None - self.cold_start = False - self.failure = False - - @staticmethod - def deserialize(cached_obj: dict) -> "ExecutionStats": - ret = ExecutionStats() - ret.__dict__.update(cached_obj) - return ret - - -class ExecutionBilling: - - _memory: Optional[int] - _billed_time: Optional[int] - _gb_seconds: int - - def __init__(self): - self.memory = None - self.billed_time = None - self.gb_seconds = 0 - - @property - def memory(self) -> Optional[int]: - return self._memory - - @memory.setter - def memory(self, val: int): - self._memory = val - - @property - def billed_time(self) -> Optional[int]: - return self._billed_time - - @billed_time.setter - def billed_time(self, val: int): - self._billed_time = val - - @property - def gb_seconds(self) -> int: - return self._gb_seconds - - @gb_seconds.setter - def gb_seconds(self, val: int): - self._gb_seconds = val - - @staticmethod - def deserialize(cached_obj: dict) -> "ExecutionBilling": - ret = ExecutionBilling() - ret.__dict__.update(cached_obj) - return ret - - -class ExecutionResult: - - output: dict - request_id: str - times: ExecutionTimes - provider_times: ProviderTimes - stats: ExecutionStats - billing: ExecutionBilling - - def __init__(self): - self.output = {} - self.request_id = "" - self.times = ExecutionTimes() - self.provider_times = ProviderTimes() - self.stats = ExecutionStats() - self.billing = ExecutionBilling() - - @staticmethod - def from_times(client_time_begin: datetime, client_time_end: datetime) -> "ExecutionResult": - ret = ExecutionResult() - ret.times.client_begin = client_time_begin - ret.times.client_end = client_time_end - ret.times.client = int( - (client_time_end - client_time_begin) / timedelta(microseconds=1)) - return ret - - def parse_benchmark_output(self, output: dict): - self.output = output - self.stats.cold_start = self.output["is_cold"] - self.times.benchmark = int( - ( - datetime.fromtimestamp(float(self.output["end"])) - - datetime.fromtimestamp(float(self.output["begin"])) - ) - / timedelta(microseconds=1) - ) - - def parse_benchmark_execution(self, execution: Execution): - self.output = json.loads(execution.result) - self.times.benchmark = int( - (execution.start_time - execution.end_time) - / timedelta(microseconds=1) - ) - - @staticmethod - def deserialize(cached_config: dict) -> "ExecutionResult": - ret = ExecutionResult() - ret.times = ExecutionTimes.deserialize(cached_config["times"]) - ret.billing = ExecutionBilling.deserialize(cached_config["billing"]) - ret.provider_times = ProviderTimes.deserialize( - cached_config["provider_times"]) - ret.stats = ExecutionStats.deserialize(cached_config["stats"]) - ret.request_id = cached_config["request_id"] - ret.output = cached_config["output"] - return ret - - -""" - Function trigger and implementation of invocation. - - FIXME: implement a generic HTTP invocation and specialize input and output - processing in classes. -""" - - -class Trigger(ABC, LoggingBase): - class TriggerType(Enum): - HTTP = "http" - LIBRARY = "library" - STORAGE = "storage" - - @staticmethod - def get(name: str) -> "Trigger.TriggerType": - for member in Trigger.TriggerType: - if member.value.lower() == name.lower(): - return member - raise Exception("Unknown trigger type {}".format(member)) - - def _http_invoke(self, payload: dict, url: str) -> ExecutionResult: - import pycurl - from io import BytesIO - - c = pycurl.Curl() - c.setopt(pycurl.HTTPHEADER, ["Content-Type: application/json"]) - c.setopt(pycurl.POST, 1) - c.setopt(pycurl.URL, url) - data = BytesIO() - c.setopt(pycurl.WRITEFUNCTION, data.write) - - c.setopt(pycurl.POSTFIELDS, json.dumps(payload)) - begin = datetime.now() - c.perform() - end = datetime.now() - status_code = c.getinfo(pycurl.RESPONSE_CODE) - conn_time = c.getinfo(pycurl.PRETRANSFER_TIME) - receive_time = c.getinfo(pycurl.STARTTRANSFER_TIME) - - try: - output = json.loads(data.getvalue()) - - if status_code != 200: - self.logging.error( - "Invocation on URL {} failed with status code {}!".format(url, status_code)) - self.logging.error("Output: {}".format(output)) - raise RuntimeError( - f"Failed invocation of function! Output: {output}") - - self.logging.debug("Invoke of function was successful") - result = ExecutionResult.from_times(begin, end) - result.times.http_startup = conn_time - result.times.http_first_byte_return = receive_time - result.request_id = output["request_id"] - # General benchmark output parsing - result.parse_benchmark_output(output) - return result - except json.decoder.JSONDecodeError: - self.logging.error( - "Invocation on URL {} failed with status code {}!".format(url, status_code)) - self.logging.error("Output: {}".format(data.getvalue().decode())) - raise RuntimeError( - f"Failed invocation of function! Output: {data.getvalue().decode()}") - - # FIXME: 3.7+, future annotations - @staticmethod - @abstractmethod - def trigger_type() -> "Trigger.TriggerType": - pass - - @abstractmethod - def sync_invoke(self, payload: dict) -> ExecutionResult: - pass - - @abstractmethod - def async_invoke(self, payload: dict) -> concurrent.futures.Future: - pass - - @abstractmethod - def serialize(self) -> dict: - pass - - @classmethod - @abstractmethod - def deserialize(cls, cached_config: dict) -> "Trigger": - pass - - -""" - Abstraction base class for FaaS function. Contains a list of associated triggers - and might implement non-trigger execution if supported by the SDK. - Example: direct function invocation through AWS boto3 SDK. -""" - - -class Function(LoggingBase): - def __init__(self, benchmark: str, name: str, code_hash: str): - super().__init__() - self._benchmark = benchmark - self._name = name - self._code_package_hash = code_hash - self._updated_code = False - self._triggers: Dict[Trigger.TriggerType, List[Trigger]] = {} - - @property - def name(self): - return self._name - - @property - def benchmark(self): - return self._benchmark - - @property - def code_package_hash(self): - return self._code_package_hash - - @code_package_hash.setter - def code_package_hash(self, new_hash: str): - self._code_package_hash = new_hash - - @property - def updated_code(self) -> bool: - return self._updated_code - - @updated_code.setter - def updated_code(self, val: bool): - self._updated_code = val - - def triggers_all(self) -> List[Trigger]: - return [trig for trigger_type, triggers in self._triggers.items() for trig in triggers] - - def triggers(self, trigger_type: Trigger.TriggerType) -> List[Trigger]: - try: - return self._triggers[trigger_type] - except KeyError: - return [] - - def add_trigger(self, trigger: Trigger): - if trigger.trigger_type() not in self._triggers: - self._triggers[trigger.trigger_type()] = [trigger] - else: - self._triggers[trigger.trigger_type()].append(trigger) - - def serialize(self) -> dict: - return { - "name": self._name, - "hash": self._code_package_hash, - "benchmark": self._benchmark, - "triggers": [ - obj.serialize() for t_type, triggers in self._triggers.items() for obj in triggers - ], - } - - @staticmethod - @abstractmethod - def deserialize(cached_config: dict) -> "Function": - pass From 0ee923c1b39ca50a29e064f7e705ac41615920af Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Tue, 29 Mar 2022 13:14:14 +0200 Subject: [PATCH 30/68] Generate workflow definitions --- .../{handler.py => handler_function.py} | 0 benchmarks/wrappers/aws/python/handler_sfm.py | 2 +- .../{handler.py => handler_function.py} | 0 .../wrappers/gcp/python/handler_workflow.py | 57 ++++++++++ config/systems.json | 4 +- sebs.py | 10 +- sebs/aws/aws.py | 79 ++++++++------ sebs/aws/generator.py | 34 ++++++ sebs/aws/workflow.py | 10 +- sebs/cache.py | 4 +- sebs/code_package.py | 2 +- sebs/faas/generator.py | 101 ++++++++++++++++++ sebs/faas/system.py | 6 +- sebs/gcp/function.py | 4 +- sebs/gcp/gcp.py | 81 +++++++++++--- sebs/gcp/generator.py | 31 ++++++ sebs/gcp/workflow.py | 12 ++- 17 files changed, 365 insertions(+), 72 deletions(-) rename benchmarks/wrappers/aws/python/{handler.py => handler_function.py} (100%) rename benchmarks/wrappers/gcp/python/{handler.py => handler_function.py} (100%) create mode 100644 benchmarks/wrappers/gcp/python/handler_workflow.py create mode 100644 sebs/aws/generator.py create mode 100644 sebs/faas/generator.py create mode 100644 sebs/gcp/generator.py diff --git a/benchmarks/wrappers/aws/python/handler.py b/benchmarks/wrappers/aws/python/handler_function.py similarity index 100% rename from benchmarks/wrappers/aws/python/handler.py rename to benchmarks/wrappers/aws/python/handler_function.py diff --git a/benchmarks/wrappers/aws/python/handler_sfm.py b/benchmarks/wrappers/aws/python/handler_sfm.py index 75a5f1a1..aa6852ee 100644 --- a/benchmarks/wrappers/aws/python/handler_sfm.py +++ b/benchmarks/wrappers/aws/python/handler_sfm.py @@ -49,6 +49,6 @@ def handler(event, context): path = os.path.join(workflow_name, func_name+".json") s3 = boto3.client("s3") - s3.upload_fileobj(data, "workflow-experiments", path) + s3.upload_fileobj(data, "sebs-experiments", path) return res diff --git a/benchmarks/wrappers/gcp/python/handler.py b/benchmarks/wrappers/gcp/python/handler_function.py similarity index 100% rename from benchmarks/wrappers/gcp/python/handler.py rename to benchmarks/wrappers/gcp/python/handler_function.py diff --git a/benchmarks/wrappers/gcp/python/handler_workflow.py b/benchmarks/wrappers/gcp/python/handler_workflow.py new file mode 100644 index 00000000..91a51373 --- /dev/null +++ b/benchmarks/wrappers/gcp/python/handler_workflow.py @@ -0,0 +1,57 @@ + +import datetime +import io +import json +import os +import sys +import uuid +import importlib + +from google.cloud import storage + +# Add current directory to allow location of packages +sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) + + +def probe_cold_start(): + is_cold = False + fname = os.path.join("/tmp", "cold_run") + if not os.path.exists(fname): + is_cold = True + container_id = str(uuid.uuid4())[0:8] + with open(fname, "a") as f: + f.write(container_id) + else: + with open(fname, "r") as f: + container_id = f.read() + + return is_cold, container_id + + +def handler(req): + start = datetime.datetime.now().timestamp() + + full_function_name = os.getenv("FUNCTION_NAME") + workflow_name, func_name = full_function_name.split("___") + function = importlib.import_module(f"function.{func_name}") + res = function.handler(req) + + end = datetime.datetime.now().timestamp() + + is_cold, container_id = probe_cold_start() + payload = { + "start": start, + "end": end, + "is_cold": is_cold, + "container_id": container_id + } + + data = io.BytesIO(json.dumps(payload).encode("utf-8")) + path = os.path.join(workflow_name, func_name+".json") + + client = storage.Client() + bucket = client.bucket("sebs-experiments") + blob = bucket.blob(path) + blob.upload_from_file(data) + + return res diff --git a/config/systems.json b/config/systems.json index 3e7ed392..df4e8c7a 100644 --- a/config/systems.json +++ b/config/systems.json @@ -44,7 +44,7 @@ "images": ["build"], "username": "docker_user", "deployment": { - "files": ["handler.py", "handler_sfm.py", "storage.py"], + "files": ["handler_function.py", "handler_workflow.py", "storage.py"], "packages": [] } }, @@ -110,7 +110,7 @@ "images": ["build"], "username": "docker_user", "deployment": { - "files": [ "handler.py", "storage.py"], + "files": ["handler_function.py", "handler_workflow.py", "storage.py"], "packages": ["google-cloud-storage"] } }, diff --git a/sebs.py b/sebs.py index 576a9a5b..3d2c1030 100755 --- a/sebs.py +++ b/sebs.py @@ -220,7 +220,7 @@ def function(benchmark, benchmark_input_size, repetitions, trigger, function_nam ) else: trigger = triggers[0] - + for i in range(repetitions): sebs_client.logging.info(f"Beginning repetition {i+1}/{repetitions}") ret = trigger.sync_invoke(input_config) @@ -234,7 +234,7 @@ def function(benchmark, benchmark_input_size, repetitions, trigger, function_nam with open("experiments.json", "w") as out_f: out_f.write(sebs.utils.serialize(result)) sebs_client.logging.info("Save results to {}".format(os.path.abspath("experiments.json"))) - + @benchmark.command() @click.argument("benchmark", type=str) # , help="Benchmark to be used.") @click.argument( @@ -254,10 +254,10 @@ def function(benchmark, benchmark_input_size, repetitions, trigger, function_nam default=None, type=str, help="Override workflow name for random generation.", -) +) @common_params def workflow(benchmark, benchmark_input_size, repetitions, trigger, workflow_name, **kwargs): - + ( config, output_dir, @@ -265,7 +265,7 @@ def workflow(benchmark, benchmark_input_size, repetitions, trigger, workflow_nam sebs_client, deployment_client, ) = parse_common_params(**kwargs) - + experiment_config = sebs_client.get_experiment_config(config["experiments"]) benchmark_obj = sebs_client.get_benchmark( benchmark, diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index f8a345e9..0ab2dfe6 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -13,6 +13,7 @@ from sebs.aws.s3 import S3 from sebs.aws.function import LambdaFunction from sebs.aws.workflow import SFNWorkflow +from sebs.aws.generator import SFNGenerator from sebs.aws.config import AWSConfig from sebs.utils import execute from sebs.code_package import CodePackage @@ -139,22 +140,24 @@ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: """ def package_code(self, directory: str, language_name: str, benchmark: str, is_workflow: bool) -> Tuple[str, int]: - CONFIG_FILES = { "python": ["handler.py", "requirements.txt", ".python_packages"], "nodejs": ["handler.js", "package.json", "node_modules"], } + package_config = CONFIG_FILES[language_name] # Todo: sfm support for nodejs - # rename handler_sfm.py to handler.py if necessary + # rename handler_workflow.py to handler.py if necessary handler_path = os.path.join(directory, "handler.py") - handler_sfm_path = os.path.join(directory, "handler_sfm.py") + handler_function_path = os.path.join(directory, "handler_function.py") + handler_workflow_path = os.path.join(directory, "handler_workflow.py") if is_workflow: - os.rename(handler_sfm_path, handler_path) + os.rename(handler_workflow_path, handler_path) + os.remove(handler_function_path) else: - os.remove(handler_sfm_path) + os.rename(handler_function_path, handler_path) + os.remove(handler_workflow_path) - package_config = CONFIG_FILES[language_name] function_dir = os.path.join(directory, "function") os.makedirs(function_dir) # move all files to 'function' except handler.py @@ -383,10 +386,7 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "SFN # Make sure we have a valid workflow benchmark definition_path = os.path.join( code_package.path, "definition.json") - if os.path.exists(definition_path): - with open(definition_path) as json_file: - definition = json.load(json_file) - else: + if not os.path.exists(definition_path): raise ValueError( f"No workflow definition found for {workflow_name}") @@ -395,13 +395,10 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "SFN func_names = [os.path.splitext(os.path.basename(p))[0] for p in code_files] funcs = [self.create_function(code_package, workflow_name+"___"+fn) for fn in func_names] - # Set the ARN to the corresponding states in the workflow definition - for name, func in zip(func_names, funcs): - try: - definition["States"][name]["Resource"] = func.arn - except KeyError: - raise ValueError( - f"Workflow definition for {workflow_name} missing state {func.name}") + # Generate workflow definition.json + gen = SFNGenerator({n: f.arn for (n, f) in zip(func_names, funcs)}) + gen.parse(definition_path) + definition = gen.generate() package = code_package.code_location @@ -410,7 +407,7 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "SFN try: ret = self.sfn_client.create_state_machine( name=workflow_name, - definition=json.dumps(definition), + definition=definition, roleArn=self.config.resources.lambda_role(self.session), ) @@ -422,8 +419,7 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "SFN funcs, code_package.name, ret["stateMachineArn"], - code_package.hash, - self.config.resources.lambda_role(self.session), + code_package.hash ) except self.sfn_client.exceptions.StateMachineAlreadyExists as e: arn = re.search("'([^']*)'", str(e)).group()[1:-1] @@ -439,11 +435,10 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "SFN funcs, code_package.name, arn, - code_package.hash, - self.config.resources.lambda_role(self.session), + code_package.hash ) - self.update_workflow(workflow, definition, code_package) + self.update_workflow(workflow, definition, code_package, False) workflow.updated_code = True # Add LibraryTrigger to a new function @@ -455,21 +450,35 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "SFN return workflow - def update_workflow(self, workflow: Workflow, code_package: CodePackage): - + def update_workflow(self, workflow: Workflow, code_package: CodePackage, update_functions: bool): workflow = cast(SFNWorkflow, workflow) - for func in workflow.functions: - print(func) - self.update_function(func, code_package) + # Make sure we have a valid workflow benchmark + definition_path = os.path.join( + code_package.path, "definition.json") + if not os.path.exists(definition_path): + raise ValueError( + f"No workflow definition found for {workflow.name}") - # Todo: update workflow definition - # and update config - # self.sfn_client.update_state_machine( - # stateMachineArn=workflow.arn, - # definition=json.dumps(definition), - # roleArn=self.config.resources.lambda_role(self.session), - # ) + # Create or update lambda function for each code file + if update_functions: + code_files = list(code_package.get_code_files(include_config=False)) + func_names = [os.path.splitext(os.path.basename(p))[0] for p in code_files] + funcs = [self.create_function(code_package, workflow.name+"___"+fn) for fn in func_names] + else: + funcs = workflow.functions + + # Generate workflow definition.json + gen = SFNGenerator({n: f.arn for (n, f) in zip(func_names, funcs)}) + gen.parse(definition_path) + definition = gen.generate() + + self.sfn_client.update_state_machine( + stateMachineArn=workflow.arn, + definition=definition, + roleArn=self.config.resources.lambda_role(self.session), + ) + workflow.functions = funcs self.logging.info("Published new workflow code") def create_workflow_trigger(self, workflow: Workflow, trigger_type: Trigger.TriggerType) -> Trigger: diff --git a/sebs/aws/generator.py b/sebs/aws/generator.py new file mode 100644 index 00000000..cfd7ab55 --- /dev/null +++ b/sebs/aws/generator.py @@ -0,0 +1,34 @@ +import json +from typing import Dict + +from sebs.faas.generator import * + +class SFNGenerator(Generator): + + def __init__(self, func_arns: Dict[str, str]): + super().__init__() + self._func_arns = func_arns + + + def postprocess(self, states: List[State], payloads: List[dict]) -> dict: + payloads = super().postprocess(states, payloads) + definition = { + "Comment": "SeBS auto-generated benchmark", + "StartAt": states[0].name, + "States": payloads + } + + return definition + + def encode_task(self, state: Task) -> dict: + payload = { + "Type": "Task", + "Resource": self._func_arns[state.name] + } + + if state.next: + payload["Next"] = state.next + else: + payload["End"] = True + + return payload diff --git a/sebs/aws/workflow.py b/sebs/aws/workflow.py index 599d7e26..09debdce 100644 --- a/sebs/aws/workflow.py +++ b/sebs/aws/workflow.py @@ -12,13 +12,11 @@ def __init__( functions: List[LambdaFunction], benchmark: str, arn: str, - code_package_hash: str, - role: str + code_package_hash: str ): super().__init__(benchmark, name, code_package_hash) self.functions = functions self.arn = arn - self.role = role @staticmethod def typename() -> str: @@ -28,8 +26,7 @@ def serialize(self) -> dict: return { **super().serialize(), "functions": [f.serialize() for f in self.functions], - "arn": self.arn, - "role": self.role + "arn": self.arn } @staticmethod @@ -43,8 +40,7 @@ def deserialize(cached_config: dict) -> "SFNWorkflow": funcs, cached_config["code_package"], cached_config["arn"], - cached_config["hash"], - cached_config["role"] + cached_config["hash"] ) for trigger in cached_config["triggers"]: trigger_type = cast( diff --git a/sebs/cache.py b/sebs/cache.py index eba3d735..0a7dbaa9 100644 --- a/sebs/cache.py +++ b/sebs/cache.py @@ -309,7 +309,7 @@ def update_benchmark(self, benchmark: "Benchmark"): if "benchmarks" not in cfg2: continue for name, func in cfg2["benchmarks"].items(): - if name == function.name: + if name == benchmark.name: cached_config[deployment][language]["benchmarks"][ name ] = benchmark.serialize() @@ -317,5 +317,5 @@ def update_benchmark(self, benchmark: "Benchmark"): json.dump(cached_config, fp, indent=2) else: raise RuntimeError( - "Can't cache benchmark {} for a non-existing code package!".format(function.name) + "Can't cache benchmark {} for a non-existing code package!".format(benchmark.name) ) diff --git a/sebs/code_package.py b/sebs/code_package.py index ea62ba0e..887d4959 100644 --- a/sebs/code_package.py +++ b/sebs/code_package.py @@ -248,7 +248,7 @@ def get_code_files(self, include_config=True): for f in glob.glob(os.path.join(path, file_type)): yield os.path.join(path, f) - def copy_code(self, output_dir): + def copy_code(self, output_dir: str): for path in self.get_code_files(): shutil.copy2(path, output_dir) diff --git a/sebs/faas/generator.py b/sebs/faas/generator.py new file mode 100644 index 00000000..3fda506a --- /dev/null +++ b/sebs/faas/generator.py @@ -0,0 +1,101 @@ +from abc import ABC +from abc import abstractmethod +from typing import Optional, List, Callable +from enum import Enum +import json + + +class State(ABC): + + def __init__(self, name: str): + self.name = name + + @staticmethod + def deserialize(payload: dict) -> "State": + cls = _STATE_TYPES[payload["type"]] + return cls.deserialize(payload) + + +class Task(State): + + def __init__(self, name: str, func_name: str, next: Optional[str], parameters: Optional[List[str]]): + self.name = name + self.func_name = func_name + self.next = next + self.parameters = parameters + + @staticmethod + def deserialize(payload: dict) -> State: + return Task( + name=payload["name"], + func_name=payload["func_name"], + next=payload.get("next"), + parameters=payload.get("parameters") + ) + + +# class Switch(State): +# +# class Operator(Enum): +# less = "less" +# less_equal = "less_equal" +# equal = "equal" +# greater_equal = "greater_equal" +# greater = "greater" +# +# class ConditionType(Enum): +# numeric = "numeric" +# string = "string" +# +# class Condition: +# pass +# +# def __init__(self, name: str, condition: Condition, condition_type: ConditionType): +# self.name = name +# self.condition = condition +# self.condition_type = condition_type +# +# @staticmethod +# def deserialize(payload: dict) -> Switch: +# return Switch( +# payload["name"], +# payload["condition"], +# payload["condition_type"] +# ) + + +_STATE_TYPES = { + "task": Task +} + + +class Generator(ABC): + + def __init__(self): + self._states: List[State] = [] + + def parse(self, path: str): + with open(path) as f: + states = json.load(f) + + self._states = [State.deserialize(s) for s in states] + + if len(states) == 0: + raise RuntimeError("A workflow definition must have at least one state.") + + def generate(self) -> str: + payloads = [self.encode_state(s) for s in self._states] + definition = self.postprocess(self._states, payloads) + + return json.dumps(definition) + + def postprocess(self, states: List[State], payloads: List[dict]) -> dict: + return {s.name: p for (s, p) in zip(states, payloads)} + + def encode_state(self, state: State) -> dict: + if isinstance(state, Task): + return self.encode_task(state) + + @abstractmethod + def encode_task(self, state: Task) -> dict: + pass \ No newline at end of file diff --git a/sebs/faas/system.py b/sebs/faas/system.py index 60ef2024..03da8bb1 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -211,6 +211,10 @@ def get_function(self, code_package: CodePackage, func_name: Optional[str] = Non code_package.query_cache() return function + @abstractmethod + def update_workflow(self, workflow: Workflow, code_package: CodePackage, update_functions: bool): + pass + def get_workflow(self, code_package: CodePackage, workflow_name: Optional[str] = None): if code_package.language_version not in self.system_config.supported_language_versions( self.name(), code_package.language_name @@ -269,7 +273,7 @@ def get_workflow(self, code_package: CodePackage, workflow_name: Optional[str] = f"current build {code_package.hash} in " f"{code_location}, updating cloud version!" ) - self.update_workflow(workflow, code_package) + self.update_workflow(workflow, code_package, True) workflow.code_package_hash = code_package.hash workflow.updated_code = True self.cache_client.add_benchmark( diff --git a/sebs/gcp/function.py b/sebs/gcp/function.py index fe1f981e..317781cf 100644 --- a/sebs/gcp/function.py +++ b/sebs/gcp/function.py @@ -34,7 +34,7 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "GCPFunction": from sebs.faas.benchmark import Trigger - from sebs.gcp.triggers import LibraryTrigger, HTTPTrigger + from sebs.gcp.triggers import FunctionLibraryTrigger, HTTPTrigger ret = GCPFunction( cached_config["name"], @@ -47,7 +47,7 @@ def deserialize(cached_config: dict) -> "GCPFunction": for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": LibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), + {"Library": FunctionLibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index a197196a..44870e66 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -16,13 +16,14 @@ from sebs.cache import Cache from sebs.config import SeBSConfig from sebs.code_package import CodePackage -from ..faas.benchmark import Benchmark, Function, Trigger, Workflow +from sebs.faas.benchmark import Benchmark, Function, Trigger, Workflow from .storage import PersistentStorage from ..faas.system import System from sebs.gcp.config import GCPConfig from sebs.gcp.storage import GCPStorage from sebs.gcp.function import GCPFunction from sebs.gcp.workflow import GCPWorkflow +from sebs.gcp.generator import GCPGenerator from sebs.utils import LoggingHandlers """ @@ -153,6 +154,19 @@ def package_code(self, directory: str, language_name: str, benchmark: str, is_wo "nodejs": ("handler.js", "index.js"), } package_config = CONFIG_FILES[language_name] + + # Todo: sfm support for nodejs + # rename handler_workflow.py to handler.py if necessary + handler_path = os.path.join(directory, "handler.py") + handler_function_path = os.path.join(directory, "handler_function.py") + handler_workflow_path = os.path.join(directory, "handler_workflow.py") + if is_workflow: + os.rename(handler_workflow_path, handler_path) + os.remove(handler_function_path) + else: + os.rename(handler_function_path, handler_path) + os.remove(handler_workflow_path) + function_dir = os.path.join(directory, "function") os.makedirs(function_dir) for file in os.listdir(directory): @@ -301,6 +315,7 @@ def create_function_trigger(self, function: Function, our_function_req = ( self.function_client.projects().locations().functions().get(name=full_func_name) ) + deployed = False while not deployed: status_res = our_function_req.execute() @@ -382,14 +397,31 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "GCP location = self.config.region project_name = self.config.project_name + # Make sure we have a valid workflow benchmark + definition_path = os.path.join( + code_package.path, "definition.json") + if not os.path.exists(definition_path): + raise ValueError( + f"No workflow definition found for {workflow_name}") + + # First we create a function for each code file + prefix = workflow_name+"___" + code_files = list(code_package.get_code_files(include_config=False)) + func_names = [os.path.splitext(os.path.basename(p))[0] for p in code_files] + funcs = [self.create_function(code_package, prefix+fn) for fn in func_names] + + # generate workflow definition.json + urls = [self.create_function_trigger(f, Trigger.TriggerType.HTTP).url for f in funcs] + func_triggers = {n: u for (n, u) in zip(func_names, urls)} + gen = GCPGenerator(func_triggers) + gen.parse(definition_path) + definition = gen.generate() + full_workflow_name = GCP.get_full_workflow_name( project_name, location, workflow_name) get_req = self.workflow_client.projects().locations( ).workflows().get(name=full_workflow_name) - with open('cache/test.yml') as f: - code = f.read() - try: get_req.execute() except HttpError: @@ -403,15 +435,15 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "GCP workflowId=workflow_name, body={ "name": full_workflow_name, - "sourceContents": code, + "sourceContents": definition, }, ) ) - create_req.execute() + ret = create_req.execute() self.logging.info(f"Workflow {workflow_name} has been created!") workflow = GCPWorkflow( - workflow_name, benchmark, code_package.hash, timeout, memory, code_bucket + workflow_name, funcs, benchmark, code_package.hash, timeout, memory, code_bucket ) else: # if result is not empty, then function does exists @@ -420,13 +452,14 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "GCP workflow = GCPWorkflow( name=workflow_name, + functions=funcs, benchmark=benchmark, code_package_hash=code_package.hash, timeout=timeout, memory=memory, bucket=code_bucket, ) - self.update_workflow(workflow, code_package) + self.update_workflow(workflow, code_package, False) # Add LibraryTrigger to a new function from sebs.gcp.triggers import WorkflowLibraryTrigger @@ -452,11 +485,32 @@ def create_workflow_trigger(self, workflow: Workflow, self.cache_client.update_benchmark(workflow) return trigger - def update_workflow(self, workflow: Workflow, code_package: CodePackage): - with open('cache/test.yml') as f: - code = f.read() - + def update_workflow(self, workflow: Workflow, code_package: CodePackage, update_functions: bool): workflow = cast(GCPWorkflow, workflow) + + # Make sure we have a valid workflow benchmark + definition_path = os.path.join( + code_package.path, "definition.json") + if not os.path.exists(definition_path): + raise ValueError( + f"No workflow definition found for {workflow.name}") + + # First we create a function for each code file + if update_functions: + prefix = workflow.name+"___" + code_files = list(code_package.get_code_files(include_config=False)) + func_names = [os.path.splitext(os.path.basename(p))[0] for p in code_files] + funcs = [self.create_function(code_package, prefix+fn) for fn in func_names] + else: + funcs = workflow.functions + + # Generate workflow definition.json + urls = [self.create_function_trigger(f, Trigger.TriggerType.HTTP).url for f in funcs] + func_triggers = {n: u for (n, u) in zip(func_names, urls)} + gen = GCPGenerator(func_triggers) + gen.parse(definition_path) + definition = gen.generate() + full_workflow_name = GCP.get_full_workflow_name( self.config.project_name, self.config.region, workflow.name ) @@ -468,11 +522,12 @@ def update_workflow(self, workflow: Workflow, code_package: CodePackage): name=full_workflow_name, body={ "name": full_workflow_name, - "sourceContents": code + "sourceContents": definition }, ) ) req.execute() + workflow.functions = funcs self.logging.info("Published new workflow code and configuration.") @staticmethod diff --git a/sebs/gcp/generator.py b/sebs/gcp/generator.py new file mode 100644 index 00000000..e2832849 --- /dev/null +++ b/sebs/gcp/generator.py @@ -0,0 +1,31 @@ +import json +from typing import Dict + +from sebs.faas.generator import * + +class GCPGenerator(Generator): + + def __init__(self, func_triggers: Dict[str, str]): + super().__init__() + self._func_triggers = func_triggers + + def postprocess(self, states: List[State], payloads: List[dict]) -> dict: + definition = { + "main" : { + "steps": payloads + } + } + + return definition + + def encode_task(self, state: Task) -> dict: + url = self._func_triggers[state.name] + + return { + state.name: { + "call": "http.get", + "args": { + "url": url + } + } + } diff --git a/sebs/gcp/workflow.py b/sebs/gcp/workflow.py index 3f8be029..f1846bc4 100644 --- a/sebs/gcp/workflow.py +++ b/sebs/gcp/workflow.py @@ -1,6 +1,7 @@ -from typing import cast, Optional +from typing import List, cast, Optional from sebs.faas.benchmark import Workflow +from sebs.gcp.function import GCPFunction from sebs.gcp.storage import GCPStorage @@ -8,6 +9,7 @@ class GCPWorkflow(Workflow): def __init__( self, name: str, + functions: List[GCPFunction], benchmark: str, code_package_hash: str, timeout: int, @@ -15,6 +17,7 @@ def __init__( bucket: Optional[str] = None, ): super().__init__(benchmark, name, code_package_hash) + self.functions = functions self.timeout = timeout self.memory = memory self.bucket = bucket @@ -26,6 +29,7 @@ def typename() -> str: def serialize(self) -> dict: return { **super().serialize(), + "functions": [f.serialize() for f in self.functions], "timeout": self.timeout, "memory": self.memory, "bucket": self.bucket, @@ -34,10 +38,12 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "GCPWorkflow": from sebs.faas.benchmark import Trigger - from sebs.gcp.triggers import LibraryTrigger, HTTPTrigger + from sebs.gcp.triggers import WorkflowLibraryTrigger, HTTPTrigger + funcs = [GCPFunction.deserialize(f) for f in cached_config["functions"]] ret = GCPWorkflow( cached_config["name"], + funcs, cached_config["code_package"], cached_config["hash"], cached_config["timeout"], @@ -47,7 +53,7 @@ def deserialize(cached_config: dict) -> "GCPWorkflow": for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": LibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), + {"Library": WorkflowLibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) From 0b82a1a1262ad30f65f4d234dbb24055c71191af Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Thu, 31 Mar 2022 14:36:37 +0200 Subject: [PATCH 31/68] Generate azure workflows --- .../{handler_sfm.py => handler_workflow.py} | 0 .../{handler.py => handler_function.py} | 0 .../wrappers/azure/python/handler_workflow.py | 44 ++++++ .../wrappers/azure/python/main_workflow.py | 49 +++++++ config/systems.json | 4 +- sebs/aws/aws.py | 38 ++--- sebs/azure/azure.py | 133 ++++++++++++------ sebs/azure/generator.py | 32 +++++ sebs/code_package.py | 23 ++- sebs/faas/generator.py | 5 +- sebs/faas/system.py | 6 +- sebs/gcp/gcp.py | 38 ++--- sebs/local/local.py | 4 +- 13 files changed, 263 insertions(+), 113 deletions(-) rename benchmarks/wrappers/aws/python/{handler_sfm.py => handler_workflow.py} (100%) rename benchmarks/wrappers/azure/python/{handler.py => handler_function.py} (100%) create mode 100644 benchmarks/wrappers/azure/python/handler_workflow.py create mode 100644 benchmarks/wrappers/azure/python/main_workflow.py create mode 100644 sebs/azure/generator.py diff --git a/benchmarks/wrappers/aws/python/handler_sfm.py b/benchmarks/wrappers/aws/python/handler_workflow.py similarity index 100% rename from benchmarks/wrappers/aws/python/handler_sfm.py rename to benchmarks/wrappers/aws/python/handler_workflow.py diff --git a/benchmarks/wrappers/azure/python/handler.py b/benchmarks/wrappers/azure/python/handler_function.py similarity index 100% rename from benchmarks/wrappers/azure/python/handler.py rename to benchmarks/wrappers/azure/python/handler_function.py diff --git a/benchmarks/wrappers/azure/python/handler_workflow.py b/benchmarks/wrappers/azure/python/handler_workflow.py new file mode 100644 index 00000000..e91e7f25 --- /dev/null +++ b/benchmarks/wrappers/azure/python/handler_workflow.py @@ -0,0 +1,44 @@ +import datetime +import io +import json +import os +import uuid + +def probe_cold_start(): + is_cold = False + fname = os.path.join("/tmp", "cold_run") + if not os.path.exists(fname): + is_cold = True + container_id = str(uuid.uuid4())[0:8] + with open(fname, "a") as f: + f.write(container_id) + else: + with open(fname, "r") as f: + container_id = f.read() + + return is_cold, container_id + + +def main(event): + start = datetime.datetime.now().timestamp() + + workflow_name, func_name = context.function_name.split("___") + res = func(event) + + end = datetime.datetime.now().timestamp() + + is_cold, container_id = probe_cold_start() + payload = { + "start": start, + "end": end, + "is_cold": is_cold, + "container_id": container_id + } + + data = io.BytesIO(json.dumps(payload).encode("utf-8")) + path = os.path.join(workflow_name, func_name+".json") + + # s3 = boto3.client("s3") + # s3.upload_fileobj(data, "sebs-experiments", path) + + return res diff --git a/benchmarks/wrappers/azure/python/main_workflow.py b/benchmarks/wrappers/azure/python/main_workflow.py new file mode 100644 index 00000000..36185a5c --- /dev/null +++ b/benchmarks/wrappers/azure/python/main_workflow.py @@ -0,0 +1,49 @@ +import os +import json +import datetime +import uuid + +import azure.functions as func +import azure.durable_functions as df + +def probe_cold_start(): + is_cold = False + fname = os.path.join("/tmp", "cold_run") + if not os.path.exists(fname): + is_cold = True + container_id = str(uuid.uuid4())[0:8] + with open(fname, "a") as f: + f.write(container_id) + else: + with open(fname, "r") as f: + container_id = f.read() + + return is_cold, container_id + + +async def main(req: func.HttpRequest, starter: str, context: func.Context) -> func.HttpResponse: + event = req.get_json() + + begin = datetime.datetime.now() + + client = df.DurableOrchestrationClient(starter) + instance_id = await client.start_new("run_workflow", None, event) + + end = datetime.datetime.now() + + is_cold, container_id = probe_cold_start() + res = client.create_check_status_response(req, instance_id) + status_body = json.loads(res.get_body()) + body = { + 'begin': begin.strftime('%s.%f'), + 'end': end.strftime('%s.%f'), + 'environ_container_id': os.environ['CONTAINER_NAME'], + 'request_id': context.invocation_id, + "is_cold": is_cold, + "container_id": container_id, + **status_body + } + return func.HttpResponse( + json.dumps(body), + mimetype="application/json" + ) diff --git a/config/systems.json b/config/systems.json index df4e8c7a..38f086a8 100644 --- a/config/systems.json +++ b/config/systems.json @@ -75,8 +75,8 @@ "images": ["build"], "username": "docker_user", "deployment": { - "files": [ "handler.py", "storage.py"], - "packages": ["azure-storage-blob"] + "files": ["handler_function.py", "main_workflow.py", "handler_workflow.py", "storage.py"], + "packages": ["azure-storage-blob", "azure-functions", "azure-functions-durable"] } }, "nodejs": { diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 0ab2dfe6..649d7f72 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -139,25 +139,12 @@ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: benchmark: benchmark name """ - def package_code(self, directory: str, language_name: str, benchmark: str, is_workflow: bool) -> Tuple[str, int]: + def package_code(self, code_package: CodePackage, directory: str, is_workflow: bool) -> Tuple[str, int]: CONFIG_FILES = { "python": ["handler.py", "requirements.txt", ".python_packages"], "nodejs": ["handler.js", "package.json", "node_modules"], } - package_config = CONFIG_FILES[language_name] - - # Todo: sfm support for nodejs - # rename handler_workflow.py to handler.py if necessary - handler_path = os.path.join(directory, "handler.py") - handler_function_path = os.path.join(directory, "handler_function.py") - handler_workflow_path = os.path.join(directory, "handler_workflow.py") - if is_workflow: - os.rename(handler_workflow_path, handler_path) - os.remove(handler_function_path) - else: - os.rename(handler_function_path, handler_path) - os.remove(handler_workflow_path) - + package_config = CONFIG_FILES[code_package.language_name] function_dir = os.path.join(directory, "function") os.makedirs(function_dir) # move all files to 'function' except handler.py @@ -167,16 +154,16 @@ def package_code(self, directory: str, language_name: str, benchmark: str, is_wo shutil.move(file, function_dir) # For python, add an __init__ file - if language_name == "python": + if code_package.language_name == "python": path = os.path.join(function_dir, "__init__.py") with open(path, "a"): os.utime(path, None) # FIXME: use zipfile # create zip with hidden directory but without parent directory - execute("zip -qu -r9 {}.zip * .".format(benchmark), + execute("zip -qu -r9 {}.zip * .".format(code_package.name), shell=True, cwd=directory) - benchmark_archive = "{}.zip".format(os.path.join(directory, benchmark)) + benchmark_archive = "{}.zip".format(os.path.join(directory, code_package.name)) self.logging.info("Created {} archive".format(benchmark_archive)) bytes_size = os.path.getsize( @@ -184,7 +171,7 @@ def package_code(self, directory: str, language_name: str, benchmark: str, is_wo mbytes = bytes_size / 1024.0 / 1024.0 self.logging.info("Zip archive size {:2f} MB".format(mbytes)) - return os.path.join(directory, "{}.zip".format(benchmark)), bytes_size + return os.path.join(directory, "{}.zip".format(code_package.name)), bytes_size def wait_for_function(self, func_name: str): ready = False @@ -438,7 +425,7 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "SFN code_package.hash ) - self.update_workflow(workflow, definition, code_package, False) + self.update_workflow(workflow, definition, code_package) workflow.updated_code = True # Add LibraryTrigger to a new function @@ -450,7 +437,7 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "SFN return workflow - def update_workflow(self, workflow: Workflow, code_package: CodePackage, update_functions: bool): + def update_workflow(self, workflow: Workflow, code_package: CodePackage): workflow = cast(SFNWorkflow, workflow) # Make sure we have a valid workflow benchmark @@ -461,12 +448,9 @@ def update_workflow(self, workflow: Workflow, code_package: CodePackage, update_ f"No workflow definition found for {workflow.name}") # Create or update lambda function for each code file - if update_functions: - code_files = list(code_package.get_code_files(include_config=False)) - func_names = [os.path.splitext(os.path.basename(p))[0] for p in code_files] - funcs = [self.create_function(code_package, workflow.name+"___"+fn) for fn in func_names] - else: - funcs = workflow.functions + code_files = list(code_package.get_code_files(include_config=False)) + func_names = [os.path.splitext(os.path.basename(p))[0] for p in code_files] + funcs = [self.create_function(code_package, workflow.name+"___"+fn) for fn in func_names] # Generate workflow definition.json gen = SFNGenerator({n: f.arn for (n, f) in zip(func_names, funcs)}) diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index 8eb3dd7b..714c794f 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -10,6 +10,7 @@ from sebs.azure.blob_storage import BlobStorage from sebs.azure.cli import AzureCLI +from sebs.azure.generator import AzureGenerator from sebs.azure.function import AzureFunction from sebs.azure.workflow import AzureWorkflow from sebs.azure.config import AzureConfig, AzureResources @@ -18,9 +19,9 @@ from sebs.cache import Cache from sebs.config import SeBSConfig from sebs.utils import LoggingHandlers, execute -from ..faas.benchmark import Benchmark, Function, ExecutionResult, Workflow, Trigger -from ..faas.storage import PersistentStorage -from ..faas.system import System +from sebs.faas.benchmark import Benchmark, Function, ExecutionResult, Workflow, Trigger +from sebs.faas.storage import PersistentStorage +from sebs.faas.system import System class Azure(System): @@ -120,7 +121,7 @@ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: # - function.json # host.json # requirements.txt/package.json - def package_code(self, directory: str, language_name: str, benchmark: str, is_workflow: bool) -> Tuple[str, int]: + def package_code(self, code_package: CodePackage, directory: str, is_workflow: bool) -> Tuple[str, int]: # In previous step we ran a Docker container which installed packages # Python packages are in .python_packages because this is expected by Azure @@ -129,72 +130,108 @@ def package_code(self, directory: str, language_name: str, benchmark: str, is_wo "python": ["requirements.txt", ".python_packages"], "nodejs": ["package.json", "node_modules"], } - SUPPORTING_FILES = ["function", "storage"] - WRAPPER_FILES = ["handler"] + SUPPORTING_FILES - file_type = FILES[language_name] - package_config = CONFIG_FILES[language_name] + WRAPPER_FILES = { + "python": ["handler.py", "storage.py"], + "nodejs": ["handler.js", "storage.js"] + } + file_type = FILES[code_package.language_name] + package_config = CONFIG_FILES[code_package.language_name] + wrapper_files = WRAPPER_FILES[code_package.language_name] + + main_path = os.path.join(directory, "main_workflow.py") + if is_workflow: + os.rename(main_path, os.path.join(directory, "main.py")) + + # Make sure we have a valid workflow benchmark + definition_path = os.path.join( + code_package.path, "definition.json") + if not os.path.exists(definition_path): + raise ValueError( + f"No workflow definition found for {workflow_name}") + + # Generate workflow code and append it to handler.py + gen = AzureGenerator() + gen.parse(definition_path) + code = gen.generate() + + orchestrator_path = os.path.join(directory, "run_workflow.py") + with open(orchestrator_path, "w") as f: + f.writelines(code) + else: + os.remove(main_path) # TODO: extension to other triggers than HTTP - default_function_json = { - "bindings": [ - { - "authLevel": "function", - "type": "httpTrigger", - "direction": "in", - "name": "req", - "methods": ["get", "post"], - }, - {"name": "starter", "type": "durableClient", "direction": "in"}, - {"name": "$return", "type": "http", "direction": "out"}, - ], - } + main_bindings = [ + {"name": "req", "type": "httpTrigger", "direction": "in", + "authLevel": "function", "methods": ["post"]}, + {"name": "starter", "type": "durableClient", "direction": "in"}, + {"name": "$return", "type": "http", "direction": "out"} + ] + + activity_bindings = [ + {"name": "event", "type": "activityTrigger", "direction": "in"}, + {"name": "$return", "type": "blob", "direction": "out"}, + ] + orchestrator_bindings = [ + {"name": "context", "type": "orchestrationTrigger", "direction": "in"} + ] + + if is_workflow: + bindings = { + "main": main_bindings, + "run_workflow": orchestrator_bindings + } + else: + bindings = {"function": main_bindings} + func_dirs = [] for file_path in glob.glob(os.path.join(directory, file_type)): file = os.path.basename(file_path) - if file in package_config: + if file in package_config or file in wrapper_files: continue # move file directory/f.py to directory/f/f.py name, ext = os.path.splitext(file) - if name in WRAPPER_FILES: - func_dir = os.path.join(directory, "handler") - else: - func_dir = os.path.join(directory, name) + func_dir = os.path.join(directory, name) + func_dirs.append(func_dir) dst_file = os.path.join(func_dir, file) src_file = os.path.join(directory, file) - if not os.path.exists(func_dir): - os.makedirs(func_dir) + os.makedirs(func_dir) shutil.move(src_file, dst_file) - # generate function.json if none provided - # we don't do this for supporting files - if name in SUPPORTING_FILES: - continue - - src_json = os.path.join(directory, name+".json") + # generate function.json + script_file = file if (name in bindings and is_workflow) else "handler.py" + payload = { + "bindings": bindings.get(name, activity_bindings), + "scriptFile": script_file, + "disabled": False + } dst_json = os.path.join(os.path.dirname(dst_file), "function.json") + json.dump(payload, open(dst_json, "w"), indent=2) - if os.path.exists(src_json): - shutil.move(src_json, dst_json) - else: - default_function_json["scriptFile"] = file - json.dump(default_function_json, open(dst_json, "w"), indent=2) + # copy every wrapper file to respective function dirs + for wrapper_file in wrapper_files: + src_path = os.path.join(directory, wrapper_file) + for func_dir in func_dirs: + dst_path = os.path.join(func_dir, wrapper_file) + shutil.copyfile(src_path, dst_path) + os.remove(src_path) # generate host.json - default_host_json = { + host_json = { "version": "2.0", "extensionBundle": { "id": "Microsoft.Azure.Functions.ExtensionBundle", "version": "[2.*, 3.0.0)" }, } - json.dump(default_host_json, open( + json.dump(host_json, open( os.path.join(directory, "host.json"), "w"), indent=2) code_size = CodePackage.directory_size(directory) - execute("zip -qu -r9 {}.zip * .".format(benchmark), + execute("zip -qu -r9 {}.zip * .".format(code_package.name), shell=True, cwd=directory) return directory, code_size @@ -379,7 +416,6 @@ def cached_benchmark(self, benchmark: Benchmark): azure_trigger.data_storage_account = data_storage_account def create_workflow(self, code_package: CodePackage, workflow_name: str) -> AzureFunction: - language = code_package.language_name language_runtime = code_package.language_version resource_group = self.config.resources.resource_group( @@ -456,6 +492,17 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> Azur return workflow + def update_workflow(self, workflow: Workflow, code_package: CodePackage): + # Mount code package in Docker instance + self._mount_function_code(code_package) + url = self.publish_function(workflow, code_package, True) + + trigger = HTTPTrigger( + url, self.config.resources.data_storage_account(self.cli_instance)) + trigger.logging_handlers = self.logging_handlers + function.add_trigger(trigger) + + """ Prepare Azure resources to store experiment results. Allocate one container. diff --git a/sebs/azure/generator.py b/sebs/azure/generator.py new file mode 100644 index 00000000..4c6d6158 --- /dev/null +++ b/sebs/azure/generator.py @@ -0,0 +1,32 @@ +from sebs.faas.generator import * + + +class AzureGenerator(Generator): + + def __init__(self): + def _export(payload: dict): + return payload["src"] + + super().__init__(_export) + + def postprocess(self, states: List[State], payloads: List[dict]) -> dict: + code = ("import azure.durable_functions as df\n\n" + "def run_workflow(context: df.DurableOrchestrationContext):\n" + "\tres = context.get_input()") + + for payload in payloads: + code += "\n\t" + payload["src"] + + code += ("\n\treturn res" + "\n\nmain = df.Orchestrator.create(run_workflow)") + + return { + "src": code + } + + def encode_task(self, state: Task) -> dict: + code = f"res = yield context.call_activity(\"{state.func_name}\", res)" + + return { + "src": code + } \ No newline at end of file diff --git a/sebs/code_package.py b/sebs/code_package.py index 887d4959..23a85b3a 100644 --- a/sebs/code_package.py +++ b/sebs/code_package.py @@ -268,7 +268,7 @@ def add_benchmark_data(self, output_dir): ) self.logging.debug(out.stdout.decode("utf-8")) - def add_deployment_files(self, output_dir): + def add_deployment_files(self, output_dir: str, is_workflow: bool): handlers_dir = project_absolute_path( "benchmarks", "wrappers", self._deployment_name, self.language_name ) @@ -281,6 +281,17 @@ def add_deployment_files(self, output_dir): for file in handlers: shutil.copy2(file, os.path.join(output_dir)) + if self.language_name == "python": + handler_path = os.path.join(output_dir, "handler.py") + handler_function_path = os.path.join(output_dir, "handler_function.py") + handler_workflow_path = os.path.join(output_dir, "handler_workflow.py") + if is_workflow: + os.rename(handler_workflow_path, handler_path) + os.remove(handler_function_path) + else: + os.rename(handler_function_path, handler_path) + os.remove(handler_workflow_path) + def add_deployment_package_python(self, output_dir): # append to the end of requirements file packages = self._system_config.deployment_packages( @@ -288,10 +299,8 @@ def add_deployment_package_python(self, output_dir): ) if len(packages): with open(os.path.join(output_dir, "requirements.txt"), "a") as out: - # make sure to start with a newline - out.write("\n") for package in packages: - out.write(package) + out.write(package+"\n") def add_deployment_package_nodejs(self, output_dir): # modify package.json @@ -458,7 +467,7 @@ def recalculate_code_size(self): return self._code_size def build( - self, deployment_build_step: Callable[[str, str, str, bool], Tuple[str, int]], + self, deployment_build_step: Callable[["CodePackage", str, bool], Tuple[str, int]], is_workflow: bool ) -> Tuple[bool, str]: @@ -485,11 +494,11 @@ def build( self.copy_code(self._output_dir) self.add_benchmark_data(self._output_dir) - self.add_deployment_files(self._output_dir) + self.add_deployment_files(self._output_dir, is_workflow) self.add_deployment_package(self._output_dir) self.install_dependencies(self._output_dir) self._code_location, self._code_size = deployment_build_step( - os.path.abspath(self._output_dir), self.language_name, self.name, is_workflow + self, os.path.abspath(self._output_dir), is_workflow ) self.logging.info( ( diff --git a/sebs/faas/generator.py b/sebs/faas/generator.py index 3fda506a..06e08aa2 100644 --- a/sebs/faas/generator.py +++ b/sebs/faas/generator.py @@ -71,8 +71,9 @@ def deserialize(payload: dict) -> State: class Generator(ABC): - def __init__(self): + def __init__(self, export_func: Callable[[dict], str] = json.dumps): self._states: List[State] = [] + self._export_func = export_func def parse(self, path: str): with open(path) as f: @@ -87,7 +88,7 @@ def generate(self) -> str: payloads = [self.encode_state(s) for s in self._states] definition = self.postprocess(self._states, payloads) - return json.dumps(definition) + return self._export_func(definition) def postprocess(self, states: List[State], payloads: List[dict]) -> dict: return {s.name: p for (s, p) in zip(states, payloads)} diff --git a/sebs/faas/system.py b/sebs/faas/system.py index 03da8bb1..6b345208 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -109,7 +109,7 @@ def get_storage(self, replace_existing: bool) -> PersistentStorage: """ @abstractmethod - def package_code(self, directory: str, language_name: str, benchmark: str, is_workflow: bool) -> Tuple[str, int]: + def package_code(self, code_package: CodePackage, directory: str, is_workflow: bool) -> Tuple[str, int]: pass @abstractmethod @@ -212,7 +212,7 @@ def get_function(self, code_package: CodePackage, func_name: Optional[str] = Non return function @abstractmethod - def update_workflow(self, workflow: Workflow, code_package: CodePackage, update_functions: bool): + def update_workflow(self, workflow: Workflow, code_package: CodePackage): pass def get_workflow(self, code_package: CodePackage, workflow_name: Optional[str] = None): @@ -273,7 +273,7 @@ def get_workflow(self, code_package: CodePackage, workflow_name: Optional[str] = f"current build {code_package.hash} in " f"{code_location}, updating cloud version!" ) - self.update_workflow(workflow, code_package, True) + self.update_workflow(workflow, code_package) workflow.code_package_hash = code_package.hash workflow.updated_code = True self.cache_client.add_benchmark( diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 44870e66..d3126e6d 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -143,8 +143,7 @@ def format_function_name(func_name: str) -> str: :return: path to packaged code and its size """ - def package_code(self, directory: str, language_name: str, benchmark: str, is_workflow: bool) -> Tuple[str, int]: - + def package_code(self, code_package: CodePackage, directory: str, is_workflow: bool) -> Tuple[str, int]: CONFIG_FILES = { "python": ["handler.py", ".python_packages"], "nodejs": ["handler.js", "node_modules"], @@ -153,19 +152,7 @@ def package_code(self, directory: str, language_name: str, benchmark: str, is_wo "python": ("handler.py", "main.py"), "nodejs": ("handler.js", "index.js"), } - package_config = CONFIG_FILES[language_name] - - # Todo: sfm support for nodejs - # rename handler_workflow.py to handler.py if necessary - handler_path = os.path.join(directory, "handler.py") - handler_function_path = os.path.join(directory, "handler_function.py") - handler_workflow_path = os.path.join(directory, "handler_workflow.py") - if is_workflow: - os.rename(handler_workflow_path, handler_path) - os.remove(handler_function_path) - else: - os.rename(handler_function_path, handler_path) - os.remove(handler_workflow_path) + package_config = CONFIG_FILES[code_package.language_name] function_dir = os.path.join(directory, "function") os.makedirs(function_dir) @@ -179,7 +166,7 @@ def package_code(self, directory: str, language_name: str, benchmark: str, is_wo requirements.close() # rename handler function.py since in gcp it has to be caled main.py - old_name, new_name = HANDLER[language_name] + old_name, new_name = HANDLER[code_package.language_name] old_path = os.path.join(directory, old_name) new_path = os.path.join(directory, new_name) shutil.move(old_path, new_path) @@ -195,7 +182,7 @@ def package_code(self, directory: str, language_name: str, benchmark: str, is_wo which leads to a "race condition" when running several benchmarks in parallel, since a change of the current directory is NOT Thread specfic. """ - benchmark_archive = "{}.zip".format(os.path.join(directory, benchmark)) + benchmark_archive = "{}.zip".format(os.path.join(directory, code_package.name)) GCP.recursive_zip(directory, benchmark_archive) logging.info("Created {} archive".format(benchmark_archive)) @@ -206,7 +193,7 @@ def package_code(self, directory: str, language_name: str, benchmark: str, is_wo # rename the main.py back to handler.py shutil.move(new_path, old_path) - return os.path.join(directory, "{}.zip".format(benchmark)), bytes_size + return os.path.join(directory, "{}.zip".format(code_package.name)), bytes_size def create_function(self, code_package: CodePackage, func_name: str) -> "GCPFunction": @@ -459,7 +446,7 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "GCP memory=memory, bucket=code_bucket, ) - self.update_workflow(workflow, code_package, False) + self.update_workflow(workflow, code_package) # Add LibraryTrigger to a new function from sebs.gcp.triggers import WorkflowLibraryTrigger @@ -485,7 +472,7 @@ def create_workflow_trigger(self, workflow: Workflow, self.cache_client.update_benchmark(workflow) return trigger - def update_workflow(self, workflow: Workflow, code_package: CodePackage, update_functions: bool): + def update_workflow(self, workflow: Workflow, code_package: CodePackage): workflow = cast(GCPWorkflow, workflow) # Make sure we have a valid workflow benchmark @@ -496,13 +483,10 @@ def update_workflow(self, workflow: Workflow, code_package: CodePackage, update_ f"No workflow definition found for {workflow.name}") # First we create a function for each code file - if update_functions: - prefix = workflow.name+"___" - code_files = list(code_package.get_code_files(include_config=False)) - func_names = [os.path.splitext(os.path.basename(p))[0] for p in code_files] - funcs = [self.create_function(code_package, prefix+fn) for fn in func_names] - else: - funcs = workflow.functions + prefix = workflow.name+"___" + code_files = list(code_package.get_code_files(include_config=False)) + func_names = [os.path.splitext(os.path.basename(p))[0] for p in code_files] + funcs = [self.create_function(code_package, prefix+fn) for fn in func_names] # Generate workflow definition.json urls = [self.create_function_trigger(f, Trigger.TriggerType.HTTP).url for f in funcs] diff --git a/sebs/local/local.py b/sebs/local/local.py index 92472a10..e8b06c5c 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -119,13 +119,13 @@ def shutdown(self): benchmark: benchmark name """ - def package_code(self, directory: str, language_name: str, benchmark: str, is_workflow: bool) -> Tuple[str, int]: + def package_code(self, code_package: CodePackage, directory: str, is_workflow: bool) -> Tuple[str, int]: CONFIG_FILES = { "python": ["handler.py", "requirements.txt", ".python_packages"], "nodejs": ["handler.js", "package.json", "node_modules"], } - package_config = CONFIG_FILES[language_name] + package_config = CONFIG_FILES[code_package.language_name] function_dir = os.path.join(directory, "function") os.makedirs(function_dir) # move all files to 'function' except handler.py From a4b2fa596ff2c1d9ddcfea9c7aa2fec491ab041c Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Mon, 4 Apr 2022 11:38:17 +0200 Subject: [PATCH 32/68] Basic azure workflow generation --- .../wrappers/azure/python/handler_function.py | 10 ++------ .../wrappers/azure/python/handler_workflow.py | 25 +++++++++++++------ sebs/azure/azure.py | 20 ++++++++++++--- sebs/azure/generator.py | 6 +++-- 4 files changed, 39 insertions(+), 22 deletions(-) diff --git a/benchmarks/wrappers/azure/python/handler_function.py b/benchmarks/wrappers/azure/python/handler_function.py index 422bfa1d..ca17294d 100644 --- a/benchmarks/wrappers/azure/python/handler_function.py +++ b/benchmarks/wrappers/azure/python/handler_function.py @@ -6,7 +6,7 @@ # TODO: usual trigger # implement support for blob and others -def main(req: func.HttpRequest, starter: str, context: func.Context) -> func.HttpResponse: +def main(req: func.HttpRequest, starter: str, context: func.Context) -> func.HttpResponse: income_timestamp = datetime.datetime.now().timestamp() req_json = req.get_json() if 'connection_string' in req_json: @@ -16,13 +16,7 @@ def main(req: func.HttpRequest, starter: str, context: func.Context) -> func.Htt begin = datetime.datetime.now() # We are deployed in the same directory from . import function - kwargs = { - 'event': req_json, - 'starter': starter, - 'context': context - } - kwargs = {k:v for (k,v) in kwargs.items() if k in function.handler.__code__.co_varnames} - ret = function.handler(**kwargs) + ret = function.handler(req_json) end = datetime.datetime.now() log_data = { diff --git a/benchmarks/wrappers/azure/python/handler_workflow.py b/benchmarks/wrappers/azure/python/handler_workflow.py index e91e7f25..bd15cf96 100644 --- a/benchmarks/wrappers/azure/python/handler_workflow.py +++ b/benchmarks/wrappers/azure/python/handler_workflow.py @@ -1,8 +1,11 @@ import datetime -import io import json import os import uuid +import importlib + +from azure.storage.blob import BlobServiceClient +import azure.functions as func def probe_cold_start(): is_cold = False @@ -19,11 +22,20 @@ def probe_cold_start(): return is_cold, container_id +# def main(event, measurements: func.Out[bytes]): def main(event): start = datetime.datetime.now().timestamp() - workflow_name, func_name = context.function_name.split("___") - res = func(event) + workflow_name = os.getenv("APPSETTING_WEBSITE_SITE_NAME") + func_name = os.path.basename(os.path.dirname(__file__)) + + module_name = f"{func_name}.{func_name}" + module_path = f"{func_name}/{func_name}.py" + spec = importlib.util.spec_from_file_location(module_name, module_path) + function = importlib.util.module_from_spec(spec) + spec.loader.exec_module(function) + + res = function.handler(event) end = datetime.datetime.now().timestamp() @@ -35,10 +47,7 @@ def main(event): "container_id": container_id } - data = io.BytesIO(json.dumps(payload).encode("utf-8")) - path = os.path.join(workflow_name, func_name+".json") - - # s3 = boto3.client("s3") - # s3.upload_fileobj(data, "sebs-experiments", path) + # data = json.dumps(payload).encode("utf-8") + # measurements.set(data) return res diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index 714c794f..0cfcd89e 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -2,11 +2,14 @@ import json import glob import os +import io import shutil import time from typing import cast, Dict, List, Optional, Set, Tuple, Type # noqa import docker +import pandas as pd +from azure.storage.blob import BlobServiceClient from sebs.azure.blob_storage import BlobStorage from sebs.azure.cli import AzureCLI @@ -167,11 +170,13 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b {"name": "starter", "type": "durableClient", "direction": "in"}, {"name": "$return", "type": "http", "direction": "out"} ] - activity_bindings = [ {"name": "event", "type": "activityTrigger", "direction": "in"}, - {"name": "$return", "type": "blob", "direction": "out"}, + # {"name": "$return", "type": "http", "direction": "out"}, ] + blob_binding = {"name": "measurements", "type": "blob", + "dataType": "binary", "direction": "out", + "connection": "AzureWebJobsStorage"} orchestrator_bindings = [ {"name": "context", "type": "orchestrationTrigger", "direction": "in"} ] @@ -203,8 +208,15 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b # generate function.json script_file = file if (name in bindings and is_workflow) else "handler.py" + func_blob_binding = { + "path": f"sebs-experiments/{name}", + **blob_binding + } + # default_bindings = activity_bindings + [func_blob_binding] + default_bindings = activity_bindings + payload = { - "bindings": bindings.get(name, activity_bindings), + "bindings": bindings.get(name, default_bindings), "scriptFile": script_file, "disabled": False } @@ -500,7 +512,7 @@ def update_workflow(self, workflow: Workflow, code_package: CodePackage): trigger = HTTPTrigger( url, self.config.resources.data_storage_account(self.cli_instance)) trigger.logging_handlers = self.logging_handlers - function.add_trigger(trigger) + workflow.add_trigger(trigger) """ diff --git a/sebs/azure/generator.py b/sebs/azure/generator.py index 4c6d6158..f82c20f5 100644 --- a/sebs/azure/generator.py +++ b/sebs/azure/generator.py @@ -15,7 +15,9 @@ def postprocess(self, states: List[State], payloads: List[dict]) -> dict: "\tres = context.get_input()") for payload in payloads: - code += "\n\t" + payload["src"] + src = payload["src"].splitlines() + src = "\n\t".join(src) + code += "\n\t" + src code += ("\n\treturn res" "\n\nmain = df.Orchestrator.create(run_workflow)") @@ -25,7 +27,7 @@ def postprocess(self, states: List[State], payloads: List[dict]) -> dict: } def encode_task(self, state: Task) -> dict: - code = f"res = yield context.call_activity(\"{state.func_name}\", res)" + code = (f"res = yield context.call_activity(\"{state.func_name}\", res)\n") return { "src": code From 62f5b5a755014cc7f06486ca5d191892549384a9 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Tue, 5 Apr 2022 13:34:59 +0200 Subject: [PATCH 33/68] Basic workflow switch statements --- benchmarks/wrappers/azure/python/fsm.py | 1 + .../wrappers/azure/python/main_workflow.py | 1 + .../wrappers/azure/python/run_workflow.py | 85 +++++++++++++ config/systems.json | 2 +- sebs/aws/generator.py | 28 ++++- sebs/azure/azure.py | 19 ++- sebs/azure/generator.py | 34 ----- sebs/faas/fsm.py | 119 ++++++++++++++++++ sebs/faas/generator.py | 102 --------------- sebs/gcp/generator.py | 19 ++- 10 files changed, 258 insertions(+), 152 deletions(-) create mode 120000 benchmarks/wrappers/azure/python/fsm.py create mode 100644 benchmarks/wrappers/azure/python/run_workflow.py delete mode 100644 sebs/azure/generator.py create mode 100644 sebs/faas/fsm.py delete mode 100644 sebs/faas/generator.py diff --git a/benchmarks/wrappers/azure/python/fsm.py b/benchmarks/wrappers/azure/python/fsm.py new file mode 120000 index 00000000..315f6590 --- /dev/null +++ b/benchmarks/wrappers/azure/python/fsm.py @@ -0,0 +1 @@ +/Users/Laurin/Documents/ETH/MSc_Thesis/serverless-benchmarks/sebs/faas/fsm.py \ No newline at end of file diff --git a/benchmarks/wrappers/azure/python/main_workflow.py b/benchmarks/wrappers/azure/python/main_workflow.py index 36185a5c..fe69d746 100644 --- a/benchmarks/wrappers/azure/python/main_workflow.py +++ b/benchmarks/wrappers/azure/python/main_workflow.py @@ -6,6 +6,7 @@ import azure.functions as func import azure.durable_functions as df + def probe_cold_start(): is_cold = False fname = os.path.join("/tmp", "cold_run") diff --git a/benchmarks/wrappers/azure/python/run_workflow.py b/benchmarks/wrappers/azure/python/run_workflow.py new file mode 100644 index 00000000..59ee1010 --- /dev/null +++ b/benchmarks/wrappers/azure/python/run_workflow.py @@ -0,0 +1,85 @@ +import json +import sys +import os + +import azure.durable_functions as df + +dir_path = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.join(dir_path, os.path.pardir)) + +from .fsm import * + + +def resolve_var(obj, vars: str): + vars = vars.split(".") + for var in vars: + obj = getattr(obj, var) + + return obj + + +class Executor(): + + def __init__(self, path: str, context: df.DurableOrchestrationContext): + with open(path) as f: + definition = json.load(f) + + self.states = {n: State.deserialize(n, s) + for n, s in definition["states"].items()} + self.root = self.states[definition["root"]] + self.context = context + self.res = None + + def _execute_task(self, state: Task): + self.res = yield context.call_activity(state.func_name, self.res) + + if state.next: + next = self.states[state.next] + self.execute_state(next) + + def _execute_switch(self, state: Switch): + import operator as op + ops = { + "<": op.lt, + "<=": op.le, + "==": op.eq, + ">=": op.ge, + ">": op.gt + } + + for case in state.cases: + var = resolve_var(res, case.var) + op = ops[case.op] + if op(var, case.val): + next = self.states[case.next] + self.execute_state(next) + return + + if state.default: + default = self.state[state.default] + self.execute_state(default) + + def execute_state(self, state: State): + funcs = { + Task: self._execute_task, + Switch: self._execute_switch, + } + + func = funcs[type(state)] + func(state) + + def start_state_machine(self, input): + self.res = input + self.execute_state(self.root) + return self.res + + +def run_workflow(context: df.DurableOrchestrationContext): + input = context.get_input() + executor = Executor("definition.json", context) + res = executor.start_state_machine(input) + + return res + + +main = df.Orchestrator.create(run_workflow) \ No newline at end of file diff --git a/config/systems.json b/config/systems.json index 38f086a8..9bd5ecf8 100644 --- a/config/systems.json +++ b/config/systems.json @@ -75,7 +75,7 @@ "images": ["build"], "username": "docker_user", "deployment": { - "files": ["handler_function.py", "main_workflow.py", "handler_workflow.py", "storage.py"], + "files": ["handler_function.py", "main_workflow.py", "handler_workflow.py", "storage.py", "fsm.py", "run_workflow.py"], "packages": ["azure-storage-blob", "azure-functions", "azure-functions-durable"] } }, diff --git a/sebs/aws/generator.py b/sebs/aws/generator.py index cfd7ab55..c104cb88 100644 --- a/sebs/aws/generator.py +++ b/sebs/aws/generator.py @@ -1,7 +1,8 @@ import json from typing import Dict +import numbers -from sebs.faas.generator import * +from sebs.faas.fsm import * class SFNGenerator(Generator): @@ -32,3 +33,28 @@ def encode_task(self, state: Task) -> dict: payload["End"] = True return payload + + def encode_switch(self, state: Switch) -> dict: + choises = [self._encode_case(c) for c in state.cases] + return { + "Type": "Choice", + "Choices": choises, + "Default": state.default + } + + def _encode_case(self, case: Switch.Case) -> dict: + type = "Numeric" if isinstance(case.val, numbers.Number) else "String" + comp = { + "<": "LessThan", + "<=": "LessThanEquals", + "==": "Equals", + ">=": "GreaterThanEquals", + ">": "GreaterThan" + } + cond = type + comp[case.op] + + return { + "Variable": "$." + case.var, + cond: case.val, + "Next": case.next + } diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index 0cfcd89e..ba2e3f1e 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -13,7 +13,6 @@ from sebs.azure.blob_storage import BlobStorage from sebs.azure.cli import AzureCLI -from sebs.azure.generator import AzureGenerator from sebs.azure.function import AzureFunction from sebs.azure.workflow import AzureWorkflow from sebs.azure.config import AzureConfig, AzureResources @@ -134,7 +133,7 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b "nodejs": ["package.json", "node_modules"], } WRAPPER_FILES = { - "python": ["handler.py", "storage.py"], + "python": ["handler.py", "storage.py", "fsm.py"], "nodejs": ["handler.js", "storage.js"] } file_type = FILES[code_package.language_name] @@ -146,20 +145,14 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b os.rename(main_path, os.path.join(directory, "main.py")) # Make sure we have a valid workflow benchmark - definition_path = os.path.join( + src_path = os.path.join( code_package.path, "definition.json") - if not os.path.exists(definition_path): + if not os.path.exists(src_path): raise ValueError( f"No workflow definition found for {workflow_name}") - # Generate workflow code and append it to handler.py - gen = AzureGenerator() - gen.parse(definition_path) - code = gen.generate() - - orchestrator_path = os.path.join(directory, "run_workflow.py") - with open(orchestrator_path, "w") as f: - f.writelines(code) + dst_path = os.path.join(directory, "definition.json") + shutil.copy2(src_path, dst_path) else: os.remove(main_path) @@ -452,6 +445,8 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> Azur " --name {workflow_name} " ).format(**config) ) + print(json.loads(ret.decode())) + exit() for setting in json.loads(ret.decode()): if setting["name"] == "AzureWebJobsStorage": connection_string = setting["value"] diff --git a/sebs/azure/generator.py b/sebs/azure/generator.py deleted file mode 100644 index f82c20f5..00000000 --- a/sebs/azure/generator.py +++ /dev/null @@ -1,34 +0,0 @@ -from sebs.faas.generator import * - - -class AzureGenerator(Generator): - - def __init__(self): - def _export(payload: dict): - return payload["src"] - - super().__init__(_export) - - def postprocess(self, states: List[State], payloads: List[dict]) -> dict: - code = ("import azure.durable_functions as df\n\n" - "def run_workflow(context: df.DurableOrchestrationContext):\n" - "\tres = context.get_input()") - - for payload in payloads: - src = payload["src"].splitlines() - src = "\n\t".join(src) - code += "\n\t" + src - - code += ("\n\treturn res" - "\n\nmain = df.Orchestrator.create(run_workflow)") - - return { - "src": code - } - - def encode_task(self, state: Task) -> dict: - code = (f"res = yield context.call_activity(\"{state.func_name}\", res)\n") - - return { - "src": code - } \ No newline at end of file diff --git a/sebs/faas/fsm.py b/sebs/faas/fsm.py new file mode 100644 index 00000000..31cd9b12 --- /dev/null +++ b/sebs/faas/fsm.py @@ -0,0 +1,119 @@ +from abc import ABC +from abc import abstractmethod +from typing import Iterator, Optional, List, Callable +from enum import Enum +import json + + +class State(ABC): + + def __init__(self, name: str): + self.name = name + + @staticmethod + def deserialize(name: str, payload: dict) -> "State": + cls = _STATE_TYPES[payload["type"]] + return cls.deserialize(name, payload) + + +class Task(State): + + def __init__(self, + name: str, + func_name: str, + next: Optional[str]): + self.name = name + self.func_name = func_name + self.next = next + + @classmethod + def deserialize(cls, name: str, payload: dict) -> "Task": + return cls( + name=name, + func_name=payload["func_name"], + next=payload.get("next") + ) + + +class Switch(State): + + class Case: + def __init__(self, + var: str, + op: str, + val: str, + next: str): + self.var = var + self.op = op + self.val = val + self.next = next + + @staticmethod + def deserialize(payload: dict) -> "Case": + return Switch.Case( + var=payload["var"], + op=payload["op"], + val=payload.get("val"), + next=payload.get("next") + ) + + def __init__(self, name: str, cases: List[Case], default: Optional[str]): + self.name = name + self.cases = cases + self.default = default + + @classmethod + def deserialize(cls, name: str, payload: dict) -> "Switch": + cases = [Switch.Case.deserialize(c) for c in payload["cases"]] + + return cls( + name=name, + cases=cases, + default=payload["default"] + ) + + +_STATE_TYPES = { + "task": Task, + "switch": Switch +} + + +class Generator(ABC): + + def __init__(self, export_func: Callable[[dict], str] = json.dumps): + self._export_func = export_func + + def parse(self, path: str): + with open(path) as f: + definition = json.load(f) + + self.states = {n: State.deserialize(n, s) + for n, s in definition["states"].items()} + self.root = self.states[definition["root"]] + + def iterate_states() -> Iterator[State]: + return iter(self._states.values()) + + def generate(self) -> str: + payloads = [self.encode_state(s) for s in self._states] + definition = self.postprocess(self._states, payloads) + + return self._export_func(definition) + + def postprocess(self, states: List[State], payloads: List[dict]) -> dict: + return {s.name: p for (s, p) in zip(states, payloads)} + + def encode_state(self, state: State) -> dict: + if isinstance(state, Task): + return self.encode_task(state) + elif isinstance(state, Switch): + return self.encode_switch(state) + + @abstractmethod + def encode_task(self, state: Task) -> dict: + pass + + @abstractmethod + def encode_switch(self, state: Switch) -> dict: + pass \ No newline at end of file diff --git a/sebs/faas/generator.py b/sebs/faas/generator.py deleted file mode 100644 index 06e08aa2..00000000 --- a/sebs/faas/generator.py +++ /dev/null @@ -1,102 +0,0 @@ -from abc import ABC -from abc import abstractmethod -from typing import Optional, List, Callable -from enum import Enum -import json - - -class State(ABC): - - def __init__(self, name: str): - self.name = name - - @staticmethod - def deserialize(payload: dict) -> "State": - cls = _STATE_TYPES[payload["type"]] - return cls.deserialize(payload) - - -class Task(State): - - def __init__(self, name: str, func_name: str, next: Optional[str], parameters: Optional[List[str]]): - self.name = name - self.func_name = func_name - self.next = next - self.parameters = parameters - - @staticmethod - def deserialize(payload: dict) -> State: - return Task( - name=payload["name"], - func_name=payload["func_name"], - next=payload.get("next"), - parameters=payload.get("parameters") - ) - - -# class Switch(State): -# -# class Operator(Enum): -# less = "less" -# less_equal = "less_equal" -# equal = "equal" -# greater_equal = "greater_equal" -# greater = "greater" -# -# class ConditionType(Enum): -# numeric = "numeric" -# string = "string" -# -# class Condition: -# pass -# -# def __init__(self, name: str, condition: Condition, condition_type: ConditionType): -# self.name = name -# self.condition = condition -# self.condition_type = condition_type -# -# @staticmethod -# def deserialize(payload: dict) -> Switch: -# return Switch( -# payload["name"], -# payload["condition"], -# payload["condition_type"] -# ) - - -_STATE_TYPES = { - "task": Task -} - - -class Generator(ABC): - - def __init__(self, export_func: Callable[[dict], str] = json.dumps): - self._states: List[State] = [] - self._export_func = export_func - - def parse(self, path: str): - with open(path) as f: - states = json.load(f) - - self._states = [State.deserialize(s) for s in states] - - if len(states) == 0: - raise RuntimeError("A workflow definition must have at least one state.") - - def generate(self) -> str: - payloads = [self.encode_state(s) for s in self._states] - definition = self.postprocess(self._states, payloads) - - return self._export_func(definition) - - def postprocess(self, states: List[State], payloads: List[dict]) -> dict: - return {s.name: p for (s, p) in zip(states, payloads)} - - def encode_state(self, state: State) -> dict: - if isinstance(state, Task): - return self.encode_task(state) - - @abstractmethod - def encode_task(self, state: Task) -> dict: - pass \ No newline at end of file diff --git a/sebs/gcp/generator.py b/sebs/gcp/generator.py index e2832849..c2c7d1f8 100644 --- a/sebs/gcp/generator.py +++ b/sebs/gcp/generator.py @@ -1,7 +1,7 @@ import json from typing import Dict -from sebs.faas.generator import * +from sebs.faas.fsm import * class GCPGenerator(Generator): @@ -26,6 +26,21 @@ def encode_task(self, state: Task) -> dict: "call": "http.get", "args": { "url": url - } + }, + "result": "res" } } + + def encode_switch(self, state: Switch) -> dict: + return { + "switch": [self._encode_case(c) for c in state.cases], + "next": state.default + } + + def _encode_case(self, case: Switch.Case) -> dict: + cond = "res." + case.var + " " + case.op + " " + str(case.val) + return { + "condition": "${"+cond+"}", + "next": case.next + } + From 3259fc1ac758c0527d1d1cdfc1493bad28fc58ba Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Tue, 5 Apr 2022 15:56:55 +0200 Subject: [PATCH 34/68] Fix workflow def generation --- sebs/aws/generator.py | 2 +- sebs/faas/fsm.py | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/sebs/aws/generator.py b/sebs/aws/generator.py index c104cb88..52112df5 100644 --- a/sebs/aws/generator.py +++ b/sebs/aws/generator.py @@ -15,7 +15,7 @@ def postprocess(self, states: List[State], payloads: List[dict]) -> dict: payloads = super().postprocess(states, payloads) definition = { "Comment": "SeBS auto-generated benchmark", - "StartAt": states[0].name, + "StartAt": self.root.name, "States": payloads } diff --git a/sebs/faas/fsm.py b/sebs/faas/fsm.py index 31cd9b12..97b992a0 100644 --- a/sebs/faas/fsm.py +++ b/sebs/faas/fsm.py @@ -89,15 +89,13 @@ def parse(self, path: str): definition = json.load(f) self.states = {n: State.deserialize(n, s) - for n, s in definition["states"].items()} + for n, s in definition["states"].items()} self.root = self.states[definition["root"]] - def iterate_states() -> Iterator[State]: - return iter(self._states.values()) - def generate(self) -> str: - payloads = [self.encode_state(s) for s in self._states] - definition = self.postprocess(self._states, payloads) + states = self.states.values() + payloads = [self.encode_state(s) for s in states] + definition = self.postprocess(states, payloads) return self._export_func(definition) From 75d2dbca837b17191c49f47c2b6480997a1d8371 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Tue, 5 Apr 2022 15:57:31 +0200 Subject: [PATCH 35/68] Fix azure blob storage --- benchmarks/wrappers/azure/python/handler_workflow.py | 7 +++---- sebs/azure/azure.py | 4 +--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/benchmarks/wrappers/azure/python/handler_workflow.py b/benchmarks/wrappers/azure/python/handler_workflow.py index bd15cf96..43f75e35 100644 --- a/benchmarks/wrappers/azure/python/handler_workflow.py +++ b/benchmarks/wrappers/azure/python/handler_workflow.py @@ -22,8 +22,7 @@ def probe_cold_start(): return is_cold, container_id -# def main(event, measurements: func.Out[bytes]): -def main(event): +def main(event, measurements: func.Out[bytes]): start = datetime.datetime.now().timestamp() workflow_name = os.getenv("APPSETTING_WEBSITE_SITE_NAME") @@ -47,7 +46,7 @@ def main(event): "container_id": container_id } - # data = json.dumps(payload).encode("utf-8") - # measurements.set(data) + data = json.dumps(payload).encode("utf-8") + measurements.set(data) return res diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index ba2e3f1e..5e193397 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -165,7 +165,6 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b ] activity_bindings = [ {"name": "event", "type": "activityTrigger", "direction": "in"}, - # {"name": "$return", "type": "http", "direction": "out"}, ] blob_binding = {"name": "measurements", "type": "blob", "dataType": "binary", "direction": "out", @@ -205,8 +204,7 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b "path": f"sebs-experiments/{name}", **blob_binding } - # default_bindings = activity_bindings + [func_blob_binding] - default_bindings = activity_bindings + default_bindings = activity_bindings + [func_blob_binding] payload = { "bindings": bindings.get(name, default_bindings), From d5dfc44af4ad71f26020b17a65edb9b5c74ad38b Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Wed, 6 Apr 2022 11:53:32 +0200 Subject: [PATCH 36/68] Fix azure credential script --- tools/create_azure_credentials.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/create_azure_credentials.py b/tools/create_azure_credentials.py index 50260745..8c93a60a 100755 --- a/tools/create_azure_credentials.py +++ b/tools/create_azure_credentials.py @@ -53,7 +53,7 @@ print(out.decode()) else: credentials = json.loads(out.decode()) - print('Created service principal {}'.format(credentials['name'])) + print('Created service principal {}'.format(credentials['displayName'])) print('AZURE_SECRET_APPLICATION_ID = {}'.format(credentials['appId'])) print('AZURE_SECRET_TENANT = {}'.format(credentials['tenant'])) print('AZURE_SECRET_PASSWORD = {}'.format(credentials['password'])) From 496134900235dc22da8bdcb97d5c21d728013bed Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Mon, 18 Apr 2022 15:53:24 +0200 Subject: [PATCH 37/68] Send measurements to redis cache --- .../wrappers/aws/python/handler_workflow.py | 17 ++-- .../wrappers/azure/python/handler_workflow.py | 16 +++- .../wrappers/azure/python/main_workflow.py | 4 +- .../wrappers/azure/python/run_workflow.py | 96 +++++++------------ .../wrappers/gcp/python/handler_workflow.py | 20 ++-- config/systems.json | 6 +- sebs.py | 8 +- sebs/aws/aws.py | 5 +- sebs/aws/config.py | 6 ++ sebs/aws/function.py | 4 +- sebs/azure/azure.py | 18 +--- sebs/azure/config.py | 6 ++ sebs/azure/triggers.py | 1 - sebs/gcp/config.py | 6 ++ sebs/gcp/gcp.py | 10 +- sebs/gcp/generator.py | 8 +- sebs/utils.py | 38 ++++++++ 17 files changed, 151 insertions(+), 118 deletions(-) diff --git a/benchmarks/wrappers/aws/python/handler_workflow.py b/benchmarks/wrappers/aws/python/handler_workflow.py index aa6852ee..c3af6f77 100644 --- a/benchmarks/wrappers/aws/python/handler_workflow.py +++ b/benchmarks/wrappers/aws/python/handler_workflow.py @@ -7,11 +7,10 @@ import uuid import importlib -import boto3 - # Add current directory to allow location of packages sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) +from redis import Redis def probe_cold_start(): is_cold = False @@ -38,17 +37,19 @@ def handler(event, context): end = datetime.datetime.now().timestamp() is_cold, container_id = probe_cold_start() - payload = { + payload = json.dumps({ "start": start, "end": end, "is_cold": is_cold, "container_id": container_id - } + }) - data = io.BytesIO(json.dumps(payload).encode("utf-8")) - path = os.path.join(workflow_name, func_name+".json") + redis = Redis(host={{REDIS_HOST}}, + port=6379, + decode_responses=True, + socket_connect_timeout=10) - s3 = boto3.client("s3") - s3.upload_fileobj(data, "sebs-experiments", path) + key = os.path.join(workflow_name, func_name) + redis.set(key, payload) return res diff --git a/benchmarks/wrappers/azure/python/handler_workflow.py b/benchmarks/wrappers/azure/python/handler_workflow.py index 43f75e35..4f598366 100644 --- a/benchmarks/wrappers/azure/python/handler_workflow.py +++ b/benchmarks/wrappers/azure/python/handler_workflow.py @@ -6,6 +6,7 @@ from azure.storage.blob import BlobServiceClient import azure.functions as func +from redis import Redis def probe_cold_start(): is_cold = False @@ -22,7 +23,7 @@ def probe_cold_start(): return is_cold, container_id -def main(event, measurements: func.Out[bytes]): +def main(event): start = datetime.datetime.now().timestamp() workflow_name = os.getenv("APPSETTING_WEBSITE_SITE_NAME") @@ -39,14 +40,19 @@ def main(event, measurements: func.Out[bytes]): end = datetime.datetime.now().timestamp() is_cold, container_id = probe_cold_start() - payload = { + payload = json.dumps({ "start": start, "end": end, "is_cold": is_cold, "container_id": container_id - } + }) - data = json.dumps(payload).encode("utf-8") - measurements.set(data) + redis = Redis(host={{REDIS_HOST}}, + port=6379, + decode_responses=True, + socket_connect_timeout=10) + + key = os.path.join(workflow_name, func_name) + redis.set(key, payload) return res diff --git a/benchmarks/wrappers/azure/python/main_workflow.py b/benchmarks/wrappers/azure/python/main_workflow.py index fe69d746..016f6311 100644 --- a/benchmarks/wrappers/azure/python/main_workflow.py +++ b/benchmarks/wrappers/azure/python/main_workflow.py @@ -33,8 +33,7 @@ async def main(req: func.HttpRequest, starter: str, context: func.Context) -> fu end = datetime.datetime.now() is_cold, container_id = probe_cold_start() - res = client.create_check_status_response(req, instance_id) - status_body = json.loads(res.get_body()) + client.wait_for_completion_or_create_check_status_response(req, instance_id, 1000000) body = { 'begin': begin.strftime('%s.%f'), 'end': end.strftime('%s.%f'), @@ -42,7 +41,6 @@ async def main(req: func.HttpRequest, starter: str, context: func.Context) -> fu 'request_id': context.invocation_id, "is_cold": is_cold, "container_id": container_id, - **status_body } return func.HttpResponse( json.dumps(body), diff --git a/benchmarks/wrappers/azure/python/run_workflow.py b/benchmarks/wrappers/azure/python/run_workflow.py index 59ee1010..93a4bcab 100644 --- a/benchmarks/wrappers/azure/python/run_workflow.py +++ b/benchmarks/wrappers/azure/python/run_workflow.py @@ -1,6 +1,7 @@ import json import sys import os +import operator import azure.durable_functions as df @@ -13,71 +14,48 @@ def resolve_var(obj, vars: str): vars = vars.split(".") for var in vars: - obj = getattr(obj, var) + obj = obj[var] return obj -class Executor(): - - def __init__(self, path: str, context: df.DurableOrchestrationContext): - with open(path) as f: - definition = json.load(f) - - self.states = {n: State.deserialize(n, s) - for n, s in definition["states"].items()} - self.root = self.states[definition["root"]] - self.context = context - self.res = None - - def _execute_task(self, state: Task): - self.res = yield context.call_activity(state.func_name, self.res) - - if state.next: - next = self.states[state.next] - self.execute_state(next) - - def _execute_switch(self, state: Switch): - import operator as op - ops = { - "<": op.lt, - "<=": op.le, - "==": op.eq, - ">=": op.ge, - ">": op.gt - } - - for case in state.cases: - var = resolve_var(res, case.var) - op = ops[case.op] - if op(var, case.val): - next = self.states[case.next] - self.execute_state(next) - return - - if state.default: - default = self.state[state.default] - self.execute_state(default) - - def execute_state(self, state: State): - funcs = { - Task: self._execute_task, - Switch: self._execute_switch, - } - - func = funcs[type(state)] - func(state) - - def start_state_machine(self, input): - self.res = input - self.execute_state(self.root) - return self.res - - def run_workflow(context: df.DurableOrchestrationContext): input = context.get_input() - executor = Executor("definition.json", context) - res = executor.start_state_machine(input) + + with open("definition.json") as f: + definition = json.load(f) + + states = {n: State.deserialize(n, s) + for n, s in definition["states"].items()} + current = states[definition["root"]] + res = None + + while current: + if isinstance(current, Task): + res = yield context.call_activity(current.func_name, res) + current = states.get(current.next, None) + elif isinstance(current, Switch): + ops = { + "<": operator.lt, + "<=": operator.le, + "==": operator.eq, + ">=": operator.ge, + ">": operator.gt + } + + next = None + for case in current.cases: + var = resolve_var(res, case.var) + op = ops[case.op] + if op(var, case.val): + next = states[case.next] + break + + if not next and current.default: + next = states[current.default] + current = next + else: + raise ValueError(f"Undefined state: {current}") return res diff --git a/benchmarks/wrappers/gcp/python/handler_workflow.py b/benchmarks/wrappers/gcp/python/handler_workflow.py index 91a51373..7480a001 100644 --- a/benchmarks/wrappers/gcp/python/handler_workflow.py +++ b/benchmarks/wrappers/gcp/python/handler_workflow.py @@ -7,11 +7,11 @@ import uuid import importlib -from google.cloud import storage - # Add current directory to allow location of packages sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) +from redis import Redis + def probe_cold_start(): is_cold = False @@ -39,19 +39,19 @@ def handler(req): end = datetime.datetime.now().timestamp() is_cold, container_id = probe_cold_start() - payload = { + payload = json.dumps({ "start": start, "end": end, "is_cold": is_cold, "container_id": container_id - } + }) - data = io.BytesIO(json.dumps(payload).encode("utf-8")) - path = os.path.join(workflow_name, func_name+".json") + redis = Redis(host={{REDIS_HOST}}, + port=6379, + decode_responses=True, + socket_connect_timeout=10) - client = storage.Client() - bucket = client.bucket("sebs-experiments") - blob = bucket.blob(path) - blob.upload_from_file(data) + key = os.path.join(workflow_name, func_name) + redis.set(key, payload) return res diff --git a/config/systems.json b/config/systems.json index 9bd5ecf8..a43d1513 100644 --- a/config/systems.json +++ b/config/systems.json @@ -45,7 +45,7 @@ "username": "docker_user", "deployment": { "files": ["handler_function.py", "handler_workflow.py", "storage.py"], - "packages": [] + "packages": ["redis"] } }, "nodejs": { @@ -76,7 +76,7 @@ "username": "docker_user", "deployment": { "files": ["handler_function.py", "main_workflow.py", "handler_workflow.py", "storage.py", "fsm.py", "run_workflow.py"], - "packages": ["azure-storage-blob", "azure-functions", "azure-functions-durable"] + "packages": ["azure-storage-blob", "azure-functions", "azure-functions-durable", "redis"] } }, "nodejs": { @@ -111,7 +111,7 @@ "username": "docker_user", "deployment": { "files": ["handler_function.py", "handler_workflow.py", "storage.py"], - "packages": ["google-cloud-storage"] + "packages": ["google-cloud-storage", "redis"] } }, "nodejs": { diff --git a/sebs.py b/sebs.py index 3d2c1030..082bcbfe 100755 --- a/sebs.py +++ b/sebs.py @@ -14,7 +14,7 @@ import sebs from sebs import SeBS from sebs.regression import regression_suite -from sebs.utils import update_nested_dict +from sebs.utils import update_nested_dict, download_metrics from sebs.faas import System as FaaSSystem from sebs.faas.benchmark import Trigger @@ -301,9 +301,9 @@ def workflow(benchmark, benchmark_input_size, repetitions, trigger, workflow_nam ret = trigger.sync_invoke(input_config) if ret.stats.failure: sebs_client.logging.info(f"Failure on repetition {i+1}/{repetitions}") - #deployment_client.get_invocation_error( - # function_name=func.name, start_time=start_time, end_time=end_time - #) + + results_dir = "cache/results" + download_metrics(deployment_client.config.redis_host, workflow.name, results_dir, rep=i) result.add_invocation(workflow, ret) result.end() with open("experiments.json", "w") as out_f: diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 649d7f72..461d16ef 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -15,7 +15,7 @@ from sebs.aws.workflow import SFNWorkflow from sebs.aws.generator import SFNGenerator from sebs.aws.config import AWSConfig -from sebs.utils import execute +from sebs.utils import execute, replace_string_in_file from sebs.code_package import CodePackage from sebs.cache import Cache from sebs.config import SeBSConfig @@ -153,6 +153,9 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b file = os.path.join(directory, file) shutil.move(file, function_dir) + handler_path = os.path.join(directory, CONFIG_FILES[code_package.language_name][0]) + replace_string_in_file(handler_path, "{{REDIS_HOST}}", f"\"{self.config.redis_host}\"") + # For python, add an __init__ file if code_package.language_name == "python": path = os.path.join(function_dir, "__init__.py") diff --git a/sebs/aws/config.py b/sebs/aws/config.py index 13fa49e3..40c359d5 100644 --- a/sebs/aws/config.py +++ b/sebs/aws/config.py @@ -271,11 +271,16 @@ def credentials(self) -> AWSCredentials: def resources(self) -> AWSResources: return self._resources + @property + def redis_host(self) -> str: + return self._redis_host + # FIXME: use future annotations (see sebs/faas/system) @staticmethod def initialize(cfg: Config, dct: dict): config = cast(AWSConfig, cfg) config._region = dct["region"] + config._redis_host = dct["redis_host"] @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: @@ -317,5 +322,6 @@ def serialize(self) -> dict: "region": self._region, "credentials": self._credentials.serialize(), "resources": self._resources.serialize(), + "redis_host": self._redis_host } return out diff --git a/sebs/aws/function.py b/sebs/aws/function.py index e8797a58..20816745 100644 --- a/sebs/aws/function.py +++ b/sebs/aws/function.py @@ -1,10 +1,10 @@ from typing import cast, Optional from sebs.aws.s3 import S3 -from sebs.faas.benchmark import Benchmark +from sebs.faas.benchmark import Function -class LambdaFunction(Benchmark): +class LambdaFunction(Function): def __init__( self, name: str, diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index 5e193397..663f3715 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -20,7 +20,7 @@ from sebs.code_package import CodePackage from sebs.cache import Cache from sebs.config import SeBSConfig -from sebs.utils import LoggingHandlers, execute +from sebs.utils import LoggingHandlers, execute, replace_string_in_file from sebs.faas.benchmark import Benchmark, Function, ExecutionResult, Workflow, Trigger from sebs.faas.storage import PersistentStorage from sebs.faas.system import System @@ -166,9 +166,6 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b activity_bindings = [ {"name": "event", "type": "activityTrigger", "direction": "in"}, ] - blob_binding = {"name": "measurements", "type": "blob", - "dataType": "binary", "direction": "out", - "connection": "AzureWebJobsStorage"} orchestrator_bindings = [ {"name": "context", "type": "orchestrationTrigger", "direction": "in"} ] @@ -200,20 +197,17 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b # generate function.json script_file = file if (name in bindings and is_workflow) else "handler.py" - func_blob_binding = { - "path": f"sebs-experiments/{name}", - **blob_binding - } - default_bindings = activity_bindings + [func_blob_binding] - payload = { - "bindings": bindings.get(name, default_bindings), + "bindings": bindings.get(name, activity_bindings), "scriptFile": script_file, "disabled": False } dst_json = os.path.join(os.path.dirname(dst_file), "function.json") json.dump(payload, open(dst_json, "w"), indent=2) + handler_path = os.path.join(directory, WRAPPER_FILES[code_package.language_name][0]) + replace_string_in_file(handler_path, "{{REDIS_HOST}}", f"\"{self.config.redis_host}\"") + # copy every wrapper file to respective function dirs for wrapper_file in wrapper_files: src_path = os.path.join(directory, wrapper_file) @@ -443,8 +437,6 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> Azur " --name {workflow_name} " ).format(**config) ) - print(json.loads(ret.decode())) - exit() for setting in json.loads(ret.decode()): if setting["name"] == "AzureWebJobsStorage": connection_string = setting["value"] diff --git a/sebs/azure/config.py b/sebs/azure/config.py index 20591595..23b5936d 100644 --- a/sebs/azure/config.py +++ b/sebs/azure/config.py @@ -287,11 +287,16 @@ def resources(self) -> AzureResources: def resources_id(self) -> str: return self._resources_id + @property + def redis_host(self) -> str: + return self._redis_host + # FIXME: use future annotations (see sebs/faas/system) @staticmethod def initialize(cfg: Config, dct: dict): config = cast(AzureConfig, cfg) config._region = dct["region"] + config._redis_host = dct["redis_host"] if "resources_id" in dct: config._resources_id = dct["resources_id"] else: @@ -341,5 +346,6 @@ def serialize(self) -> dict: "resources_id": self.resources_id, "credentials": self._credentials.serialize(), "resources": self._resources.serialize(), + "redis_host": self._redis_host } return out diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index a8f9a180..a0c8bfdc 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -30,7 +30,6 @@ def trigger_type() -> Trigger.TriggerType: return Trigger.TriggerType.HTTP def sync_invoke(self, payload: dict) -> ExecutionResult: - payload["connection_string"] = self.data_storage_account.connection_string return self._http_invoke(payload, self.url) diff --git a/sebs/gcp/config.py b/sebs/gcp/config.py index c4624ad3..11453376 100644 --- a/sebs/gcp/config.py +++ b/sebs/gcp/config.py @@ -165,6 +165,10 @@ def credentials(self) -> GCPCredentials: def resources(self) -> GCPResources: return self._resources + @property + def redis_host(self) -> str: + return self._redis_host + @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Config": cached_config = cache.get_config("gcp") @@ -219,6 +223,7 @@ def initialize(cfg: Config, dct: dict): config = cast(GCPConfig, cfg) config._project_name = dct["project_name"] config._region = dct["region"] + config._redis_host = dct["redis_host"] def serialize(self) -> dict: out = { @@ -227,6 +232,7 @@ def serialize(self) -> dict: "region": self._region, "credentials": self._credentials.serialize(), "resources": self._resources.serialize(), + "redis_host": self._redis_host } return out diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index d3126e6d..ec3520f8 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -24,7 +24,7 @@ from sebs.gcp.function import GCPFunction from sebs.gcp.workflow import GCPWorkflow from sebs.gcp.generator import GCPGenerator -from sebs.utils import LoggingHandlers +from sebs.utils import LoggingHandlers, replace_string_in_file """ This class provides basic abstractions for the FaaS system. @@ -161,16 +161,14 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b file = os.path.join(directory, file) shutil.move(file, function_dir) - requirements = open(os.path.join(directory, "requirements.txt"), "w") - requirements.write("google-cloud-storage") - requirements.close() - # rename handler function.py since in gcp it has to be caled main.py old_name, new_name = HANDLER[code_package.language_name] old_path = os.path.join(directory, old_name) new_path = os.path.join(directory, new_name) shutil.move(old_path, new_path) + replace_string_in_file(new_path, "{{REDIS_HOST}}", f"\"{self.config.redis_host}\"") + """ zip the whole directroy (the zip-file gets uploaded to gcp later) @@ -423,7 +421,7 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "GCP body={ "name": full_workflow_name, "sourceContents": definition, - }, + } ) ) ret = create_req.execute() diff --git a/sebs/gcp/generator.py b/sebs/gcp/generator.py index c2c7d1f8..e4c7d9d1 100644 --- a/sebs/gcp/generator.py +++ b/sebs/gcp/generator.py @@ -33,12 +33,14 @@ def encode_task(self, state: Task) -> dict: def encode_switch(self, state: Switch) -> dict: return { - "switch": [self._encode_case(c) for c in state.cases], - "next": state.default + state.name: { + "switch": [self._encode_case(c) for c in state.cases], + "next": state.default + } } def _encode_case(self, case: Switch.Case) -> dict: - cond = "res." + case.var + " " + case.op + " " + str(case.val) + cond = "res.body." + case.var + " " + case.op + " " + str(case.val) return { "condition": "${"+cond+"}", "next": case.next diff --git a/sebs/utils.py b/sebs/utils.py index 33ee7b9a..f3bb28ef 100644 --- a/sebs/utils.py +++ b/sebs/utils.py @@ -7,6 +7,9 @@ import uuid from typing import List, Optional, TextIO, Union +from redis import Redis +import pandas as pd + PROJECT_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir) PACK_CODE_APP = "pack_code_{}.sh" @@ -85,6 +88,41 @@ def configure_logging(): logging.getLogger(name).setLevel(logging.ERROR) +def replace_string_in_file(path: str, from_str: str, to_str: str): + with open(path, "rt") as f: + data = f.read() + + data = data.replace(from_str, to_str) + + with open(path, "wt") as f: + f.write(data) + + +def download_metrics(host: str, workflow_name: str, dst_dir: str, **static_args): + redis = Redis(host=host, + port=6379, + decode_responses=True, + socket_connect_timeout=10) + redis.ping() + + df = pd.DataFrame(columns=["func", "rep", "start", "end"]) + for func_name in redis.scan_iter(pattern=f"{workflow_name}/*"): + payload = redis.get(func_name) + payload = json.loads(payload) + payload = {**payload, **static_args} + + payload["func"] = func_name + payload = pd.DataFrame([payload]) + + df = pd.concat([df, payload]) + redis.delete(func_name) + + if df.shape[0] == 0: + raise RuntimeError(f"Did not find any measurements for {workflow_name}") + + path = os.path.join(dst_dir, workflow_name+".csv") + df.to_csv(path, index=False) + # def configure_logging(verbose: bool = False, output_dir: Optional[str] = None): # logging_format = "%(asctime)s,%(msecs)d %(levelname)s %(name)s: %(message)s" # logging_date_format = "%H:%M:%S" From 8a6c404a0e480eb3b796edbecda0de2831c583dd Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Wed, 20 Apr 2022 11:02:24 +0200 Subject: [PATCH 38/68] Fix minor workflow measurement issues --- .../wrappers/aws/python/handler_workflow.py | 1 + .../wrappers/azure/python/handler_workflow.py | 3 +- .../wrappers/azure/python/main_workflow.py | 12 ++++--- .../wrappers/azure/python/run_workflow.py | 7 ++-- .../wrappers/gcp/python/handler_workflow.py | 4 ++- sebs.py | 16 +++++++-- sebs/gcp/generator.py | 8 +++-- sebs/utils.py | 36 ++++++++++--------- 8 files changed, 54 insertions(+), 33 deletions(-) diff --git a/benchmarks/wrappers/aws/python/handler_workflow.py b/benchmarks/wrappers/aws/python/handler_workflow.py index c3af6f77..31c2980c 100644 --- a/benchmarks/wrappers/aws/python/handler_workflow.py +++ b/benchmarks/wrappers/aws/python/handler_workflow.py @@ -38,6 +38,7 @@ def handler(event, context): is_cold, container_id = probe_cold_start() payload = json.dumps({ + "func": func_name, "start": start, "end": end, "is_cold": is_cold, diff --git a/benchmarks/wrappers/azure/python/handler_workflow.py b/benchmarks/wrappers/azure/python/handler_workflow.py index 4f598366..0436de88 100644 --- a/benchmarks/wrappers/azure/python/handler_workflow.py +++ b/benchmarks/wrappers/azure/python/handler_workflow.py @@ -41,10 +41,11 @@ def main(event): is_cold, container_id = probe_cold_start() payload = json.dumps({ + "func": func_name, "start": start, "end": end, "is_cold": is_cold, - "container_id": container_id + "container_id": container_id, }) redis = Redis(host={{REDIS_HOST}}, diff --git a/benchmarks/wrappers/azure/python/main_workflow.py b/benchmarks/wrappers/azure/python/main_workflow.py index 016f6311..68e2afbf 100644 --- a/benchmarks/wrappers/azure/python/main_workflow.py +++ b/benchmarks/wrappers/azure/python/main_workflow.py @@ -29,19 +29,21 @@ async def main(req: func.HttpRequest, starter: str, context: func.Context) -> fu client = df.DurableOrchestrationClient(starter) instance_id = await client.start_new("run_workflow", None, event) + res = await client.wait_for_completion_or_create_check_status_response(req, instance_id, 1000000) end = datetime.datetime.now() is_cold, container_id = probe_cold_start() - client.wait_for_completion_or_create_check_status_response(req, instance_id, 1000000) + status_body = json.loads(res.get_body()) body = { - 'begin': begin.strftime('%s.%f'), - 'end': end.strftime('%s.%f'), - 'environ_container_id': os.environ['CONTAINER_NAME'], - 'request_id': context.invocation_id, + "begin": begin.strftime("%s.%f"), + "end": end.strftime("%s.%f"), "is_cold": is_cold, "container_id": container_id, + "request_id": context.invocation_id, + **status_body } + return func.HttpResponse( json.dumps(body), mimetype="application/json" diff --git a/benchmarks/wrappers/azure/python/run_workflow.py b/benchmarks/wrappers/azure/python/run_workflow.py index 93a4bcab..3b28f6f1 100644 --- a/benchmarks/wrappers/azure/python/run_workflow.py +++ b/benchmarks/wrappers/azure/python/run_workflow.py @@ -20,19 +20,18 @@ def resolve_var(obj, vars: str): def run_workflow(context: df.DurableOrchestrationContext): - input = context.get_input() - with open("definition.json") as f: definition = json.load(f) states = {n: State.deserialize(n, s) for n, s in definition["states"].items()} current = states[definition["root"]] - res = None + res = context.get_input() while current: if isinstance(current, Task): - res = yield context.call_activity(current.func_name, res) + payload = yield context.call_activity(current.func_name, res) + res = {**res, **payload} current = states.get(current.next, None) elif isinstance(current, Switch): ops = { diff --git a/benchmarks/wrappers/gcp/python/handler_workflow.py b/benchmarks/wrappers/gcp/python/handler_workflow.py index 7480a001..da7175c1 100644 --- a/benchmarks/wrappers/gcp/python/handler_workflow.py +++ b/benchmarks/wrappers/gcp/python/handler_workflow.py @@ -30,16 +30,18 @@ def probe_cold_start(): def handler(req): start = datetime.datetime.now().timestamp() + event = req.get_json() full_function_name = os.getenv("FUNCTION_NAME") workflow_name, func_name = full_function_name.split("___") function = importlib.import_module(f"function.{func_name}") - res = function.handler(req) + res = function.handler(event) end = datetime.datetime.now().timestamp() is_cold, container_id = probe_cold_start() payload = json.dumps({ + "func": func_name, "start": start, "end": end, "is_cold": is_cold, diff --git a/sebs.py b/sebs.py index 082bcbfe..64c3d877 100755 --- a/sebs.py +++ b/sebs.py @@ -7,14 +7,16 @@ import os import sys import traceback +from time import sleep from typing import cast, Optional import click +import pandas as pd import sebs from sebs import SeBS from sebs.regression import regression_suite -from sebs.utils import update_nested_dict, download_metrics +from sebs.utils import update_nested_dict, download_measurements, connect_to_redis_cache from sebs.faas import System as FaaSSystem from sebs.faas.benchmark import Trigger @@ -265,6 +267,7 @@ def workflow(benchmark, benchmark_input_size, repetitions, trigger, workflow_nam sebs_client, deployment_client, ) = parse_common_params(**kwargs) + redis = connect_to_redis_cache(deployment_client.config.redis_host) experiment_config = sebs_client.get_experiment_config(config["experiments"]) benchmark_obj = sebs_client.get_benchmark( @@ -283,6 +286,7 @@ def workflow(benchmark, benchmark_input_size, repetitions, trigger, workflow_nam storage=storage, size=benchmark_input_size ) + df = pd.DataFrame(columns=["func", "rep", "start", "end"]) result = sebs.experiments.ExperimentResult( experiment_config, deployment_client.config ) @@ -302,10 +306,16 @@ def workflow(benchmark, benchmark_input_size, repetitions, trigger, workflow_nam if ret.stats.failure: sebs_client.logging.info(f"Failure on repetition {i+1}/{repetitions}") - results_dir = "cache/results" - download_metrics(deployment_client.config.redis_host, workflow.name, results_dir, rep=i) + df_i = download_measurements(redis, workflow.name, rep=i) + df = pd.concat([df, df_i]) + result.add_invocation(workflow, ret) result.end() + + path = os.path.join(output_dir, "results", workflow.name, deployment_client.name()+".csv") + os.makedirs(os.path.dirname(path), exist_ok=True) + df.to_csv(path, index=False) + with open("experiments.json", "w") as out_f: out_f.write(sebs.utils.serialize(result)) sebs_client.logging.info("Save results to {}".format(os.path.abspath("experiments.json"))) diff --git a/sebs/gcp/generator.py b/sebs/gcp/generator.py index e4c7d9d1..c828dd1a 100644 --- a/sebs/gcp/generator.py +++ b/sebs/gcp/generator.py @@ -12,6 +12,9 @@ def __init__(self, func_triggers: Dict[str, str]): def postprocess(self, states: List[State], payloads: List[dict]) -> dict: definition = { "main" : { + "params": [ + "res" + ], "steps": payloads } } @@ -23,9 +26,10 @@ def encode_task(self, state: Task) -> dict: return { state.name: { - "call": "http.get", + "call": "http.post", "args": { - "url": url + "url": url, + "body": "${res}" }, "result": "res" } diff --git a/sebs/utils.py b/sebs/utils.py index f3bb28ef..cd65436c 100644 --- a/sebs/utils.py +++ b/sebs/utils.py @@ -98,30 +98,32 @@ def replace_string_in_file(path: str, from_str: str, to_str: str): f.write(data) -def download_metrics(host: str, workflow_name: str, dst_dir: str, **static_args): +def connect_to_redis_cache(host: str): redis = Redis(host=host, - port=6379, - decode_responses=True, - socket_connect_timeout=10) + port=6379, + decode_responses=True, + socket_connect_timeout=10) redis.ping() - df = pd.DataFrame(columns=["func", "rep", "start", "end"]) - for func_name in redis.scan_iter(pattern=f"{workflow_name}/*"): - payload = redis.get(func_name) - payload = json.loads(payload) - payload = {**payload, **static_args} + return redis - payload["func"] = func_name - payload = pd.DataFrame([payload]) - df = pd.concat([df, payload]) - redis.delete(func_name) +def download_measurements(redis: Redis, workflow_name: str, **static_args): + for key in redis.scan_iter(pattern=f"{workflow_name}/*"): + payload = redis.get(key) - if df.shape[0] == 0: - raise RuntimeError(f"Did not find any measurements for {workflow_name}") + try: + payload = json.loads(payload) + payload = {**payload, **static_args} + + df = pd.DataFrame([payload]) + except json.decoder.JSONDecodeError: + print(f"Failed to decode payload: {payload}") + finally: + redis.delete(key) + + return df - path = os.path.join(dst_dir, workflow_name+".csv") - df.to_csv(path, index=False) # def configure_logging(verbose: bool = False, output_dir: Optional[str] = None): # logging_format = "%(asctime)s,%(msecs)d %(levelname)s %(name)s: %(message)s" From 60b9ddc88bfada112f8a276034714a5965dd6a5b Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Wed, 20 Apr 2022 11:02:42 +0200 Subject: [PATCH 39/68] Add generic workflow benchmark --- benchmarks/600.workflows/610.gen/config.json | 5 +++ .../600.workflows/610.gen/definition.json | 37 +++++++++++++++++++ benchmarks/600.workflows/610.gen/input.py | 12 ++++++ .../610.gen/python/process_one.py | 7 ++++ .../610.gen/python/process_two.py | 5 +++ .../610.gen/python/process_zero.py | 11 ++++++ 6 files changed, 77 insertions(+) create mode 100644 benchmarks/600.workflows/610.gen/config.json create mode 100644 benchmarks/600.workflows/610.gen/definition.json create mode 100644 benchmarks/600.workflows/610.gen/input.py create mode 100644 benchmarks/600.workflows/610.gen/python/process_one.py create mode 100644 benchmarks/600.workflows/610.gen/python/process_two.py create mode 100644 benchmarks/600.workflows/610.gen/python/process_zero.py diff --git a/benchmarks/600.workflows/610.gen/config.json b/benchmarks/600.workflows/610.gen/config.json new file mode 100644 index 00000000..8eae0824 --- /dev/null +++ b/benchmarks/600.workflows/610.gen/config.json @@ -0,0 +1,5 @@ +{ + "timeout": 120, + "memory": 128, + "languages": ["python"] +} diff --git a/benchmarks/600.workflows/610.gen/definition.json b/benchmarks/600.workflows/610.gen/definition.json new file mode 100644 index 00000000..50933c59 --- /dev/null +++ b/benchmarks/600.workflows/610.gen/definition.json @@ -0,0 +1,37 @@ +{ + "root": "process_zero", + "states": { + "process_zero": { + "type": "task", + "func_name": "process_zero", + "next": "select_astros_number" + }, + "select_astros_number": { + "type": "switch", + "cases": [ + { + "var": "astros.number", + "op": "<", + "val": 10, + "next": "process_one" + }, + { + "var": "astros.number", + "op": ">=", + "val": 10, + "next": "process_two" + } + ], + "default": "process_one" + }, + "process_one": { + "type": "task", + "func_name": "process_one", + "next": "process_two" + }, + "process_two": { + "type": "task", + "func_name": "process_two" + } + } +} \ No newline at end of file diff --git a/benchmarks/600.workflows/610.gen/input.py b/benchmarks/600.workflows/610.gen/input.py new file mode 100644 index 00000000..477cf27a --- /dev/null +++ b/benchmarks/600.workflows/610.gen/input.py @@ -0,0 +1,12 @@ + +size_generators = { + 'test' : 1, + 'small' : 100, + 'large': 1000 +} + +def buckets_count(): + return (0, 0) + +def generate_input(data_dir, size, input_buckets, output_buckets, upload_func): + return { "idx": 0 } diff --git a/benchmarks/600.workflows/610.gen/python/process_one.py b/benchmarks/600.workflows/610.gen/python/process_one.py new file mode 100644 index 00000000..7f85f64a --- /dev/null +++ b/benchmarks/600.workflows/610.gen/python/process_one.py @@ -0,0 +1,7 @@ +from time import sleep + +def handler(event): + print(event) + + data = (str(i % 255) for i in range(2**4)) + return {"buffer": "".join(data)} \ No newline at end of file diff --git a/benchmarks/600.workflows/610.gen/python/process_two.py b/benchmarks/600.workflows/610.gen/python/process_two.py new file mode 100644 index 00000000..afbc46aa --- /dev/null +++ b/benchmarks/600.workflows/610.gen/python/process_two.py @@ -0,0 +1,5 @@ +from time import sleep + +def handler(event): + data = (str(i % 255) for i in range(2**4)) + return {"buffer": "".join(data)} \ No newline at end of file diff --git a/benchmarks/600.workflows/610.gen/python/process_zero.py b/benchmarks/600.workflows/610.gen/python/process_zero.py new file mode 100644 index 00000000..c4e71760 --- /dev/null +++ b/benchmarks/600.workflows/610.gen/python/process_zero.py @@ -0,0 +1,11 @@ +from time import sleep +import requests + +def handler(event): + res = requests.get("http://api.open-notify.org/astros.json") + + data = (str(i % 255) for i in range(2**4)) + return { + "buffer": "".join(data), + "astros": res.json() + } \ No newline at end of file From 2a253d5e28e5bf78610ff0063191721404d94963 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Wed, 20 Apr 2022 14:42:36 +0200 Subject: [PATCH 40/68] Add aws map state --- .../600.workflows/610.gen/definition.json | 7 +++ .../610.gen/python/process_astros.py | 7 +++ sebs/aws/generator.py | 29 ++++++++++++- sebs/faas/fsm.py | 43 ++++++++++++++++--- 4 files changed, 78 insertions(+), 8 deletions(-) create mode 100644 benchmarks/600.workflows/610.gen/python/process_astros.py diff --git a/benchmarks/600.workflows/610.gen/definition.json b/benchmarks/600.workflows/610.gen/definition.json index 50933c59..e9b9e1b8 100644 --- a/benchmarks/600.workflows/610.gen/definition.json +++ b/benchmarks/600.workflows/610.gen/definition.json @@ -4,6 +4,13 @@ "process_zero": { "type": "task", "func_name": "process_zero", + "next": "process_astros" + }, + "process_astros": { + "type": "map", + "func_name": "process_astros", + "array": "astros.people", + "max_concurrency": 5, "next": "select_astros_number" }, "select_astros_number": { diff --git a/benchmarks/600.workflows/610.gen/python/process_astros.py b/benchmarks/600.workflows/610.gen/python/process_astros.py new file mode 100644 index 00000000..b98b5e9d --- /dev/null +++ b/benchmarks/600.workflows/610.gen/python/process_astros.py @@ -0,0 +1,7 @@ +def handler(elem): + name = elem["name"] + fn, ln = name.split(" ") + name = " ".join([ln, fn]) + elem["name_rev"] = name + + return elem \ No newline at end of file diff --git a/sebs/aws/generator.py b/sebs/aws/generator.py index 52112df5..db853311 100644 --- a/sebs/aws/generator.py +++ b/sebs/aws/generator.py @@ -24,7 +24,7 @@ def postprocess(self, states: List[State], payloads: List[dict]) -> dict: def encode_task(self, state: Task) -> dict: payload = { "Type": "Task", - "Resource": self._func_arns[state.name] + "Resource": self._func_arns[state.func_name] } if state.next: @@ -58,3 +58,30 @@ def _encode_case(self, case: Switch.Case) -> dict: cond: case.val, "Next": case.next } + + def encode_map(self, state: Map) -> dict: + payload = { + "Type": "Map", + "ItemsPath": "$."+state.array, + "Iterator": { + "StartAt": "func", + "States": { + "func": { + "Type": "Task", + "Resource": self._func_arns[state.func_name], + "End": True + } + } + }, + "ResultPath": "$."+state.array + } + + if state.next: + payload["Next"] = state.next + else: + payload["End"] = True + + if state.max_concurrency: + payload["MaxConcurrency"] = state.max_concurrency + + return payload diff --git a/sebs/faas/fsm.py b/sebs/faas/fsm.py index 97b992a0..256a36c2 100644 --- a/sebs/faas/fsm.py +++ b/sebs/faas/fsm.py @@ -50,12 +50,7 @@ def __init__(self, @staticmethod def deserialize(payload: dict) -> "Case": - return Switch.Case( - var=payload["var"], - op=payload["op"], - val=payload.get("val"), - next=payload.get("next") - ) + return Switch.Case(**payload) def __init__(self, name: str, cases: List[Case], default: Optional[str]): self.name = name @@ -73,9 +68,35 @@ def deserialize(cls, name: str, payload: dict) -> "Switch": ) +class Map(State): + + def __init__(self, + name: str, + func_name: str, + array: str, + max_concurrency: Optional[int], + next: Optional[str]): + self.name = name + self.func_name = func_name + self.array = array + self.max_concurrency = max_concurrency + self.next = next + + @classmethod + def deserialize(cls, name: str, payload: dict) -> "Map": + return cls( + name=name, + func_name=payload["func_name"], + array=payload["array"], + max_concurrency=payload.get("max_concurrency"), + next=payload.get("next") + ) + + _STATE_TYPES = { "task": Task, - "switch": Switch + "switch": Switch, + "map": Map } @@ -107,6 +128,10 @@ def encode_state(self, state: State) -> dict: return self.encode_task(state) elif isinstance(state, Switch): return self.encode_switch(state) + elif isinstance(state, Map): + return self.encode_map(state) + else: + raise ValueError(f"Unknown state of type {type(state)}.") @abstractmethod def encode_task(self, state: Task) -> dict: @@ -114,4 +139,8 @@ def encode_task(self, state: Task) -> dict: @abstractmethod def encode_switch(self, state: Switch) -> dict: + pass + + @abstractmethod + def encode_map(self, state: Map) -> dict: pass \ No newline at end of file From c4db88599fa87812f79794013362281372b17104 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Wed, 20 Apr 2022 15:51:37 +0200 Subject: [PATCH 41/68] Add azure map support --- .../wrappers/azure/python/main_workflow.py | 4 +- .../wrappers/azure/python/run_workflow.py | 41 ++++++++++++++++--- sebs.py | 8 ++-- sebs/faas/fsm.py | 15 ++++--- sebs/utils.py | 8 ++-- 5 files changed, 56 insertions(+), 20 deletions(-) diff --git a/benchmarks/wrappers/azure/python/main_workflow.py b/benchmarks/wrappers/azure/python/main_workflow.py index 68e2afbf..5b68d47d 100644 --- a/benchmarks/wrappers/azure/python/main_workflow.py +++ b/benchmarks/wrappers/azure/python/main_workflow.py @@ -35,6 +35,7 @@ async def main(req: func.HttpRequest, starter: str, context: func.Context) -> fu is_cold, container_id = probe_cold_start() status_body = json.loads(res.get_body()) + code = 500 if status_body.get("runtimeStatus") == "Failed" else 200 body = { "begin": begin.strftime("%s.%f"), "end": end.strftime("%s.%f"), @@ -45,6 +46,7 @@ async def main(req: func.HttpRequest, starter: str, context: func.Context) -> fu } return func.HttpResponse( - json.dumps(body), + status_code=code, + body=json.dumps(body), mimetype="application/json" ) diff --git a/benchmarks/wrappers/azure/python/run_workflow.py b/benchmarks/wrappers/azure/python/run_workflow.py index 3b28f6f1..e4c92db4 100644 --- a/benchmarks/wrappers/azure/python/run_workflow.py +++ b/benchmarks/wrappers/azure/python/run_workflow.py @@ -11,14 +11,31 @@ from .fsm import * -def resolve_var(obj, vars: str): - vars = vars.split(".") - for var in vars: - obj = obj[var] +def get_var(obj, path: str): + names = path.split(".") + assert(len(names) > 0) + + for n in names: + obj = obj[n] return obj +def set_var(obj, var, path: str): + names = path.split(".") + assert(len(names) > 0) + + for n in names[:-1]: + obj = obj[n] + + obj[names[-1]] = var + + +def chunks(lst, n): + for i in range(0, len(lst), n): + yield lst[i:i + n] + + def run_workflow(context: df.DurableOrchestrationContext): with open("definition.json") as f: definition = json.load(f) @@ -44,7 +61,7 @@ def run_workflow(context: df.DurableOrchestrationContext): next = None for case in current.cases: - var = resolve_var(res, case.var) + var = get_var(res, case.var) op = ops[case.op] if op(var, case.val): next = states[case.next] @@ -53,6 +70,20 @@ def run_workflow(context: df.DurableOrchestrationContext): if not next and current.default: next = states[current.default] current = next + elif isinstance(current, Map): + array = get_var(res, current.array) + array_res = [] + + if current.max_concurrency: + for c in chunks(array, current.max_concurrency): + tasks = [context.call_activity(current.func_name, e) for e in c] + array_res += yield context.task_all(tasks) + else: + tasks = [context.call_activity(current.func_name, e) for e in array] + array_res = yield context.task_all(tasks) + + set_var(res, array_res, current.array) + current = states.get(current.next, None) else: raise ValueError(f"Undefined state: {current}") diff --git a/sebs.py b/sebs.py index 64c3d877..6a2c4bb2 100755 --- a/sebs.py +++ b/sebs.py @@ -286,7 +286,7 @@ def workflow(benchmark, benchmark_input_size, repetitions, trigger, workflow_nam storage=storage, size=benchmark_input_size ) - df = pd.DataFrame(columns=["func", "rep", "start", "end"]) + measurements = [] result = sebs.experiments.ExperimentResult( experiment_config, deployment_client.config ) @@ -306,14 +306,14 @@ def workflow(benchmark, benchmark_input_size, repetitions, trigger, workflow_nam if ret.stats.failure: sebs_client.logging.info(f"Failure on repetition {i+1}/{repetitions}") - df_i = download_measurements(redis, workflow.name, rep=i) - df = pd.concat([df, df_i]) - + measurements += download_measurements(redis, workflow.name, rep=i) result.add_invocation(workflow, ret) result.end() path = os.path.join(output_dir, "results", workflow.name, deployment_client.name()+".csv") os.makedirs(os.path.dirname(path), exist_ok=True) + + df = pd.DataFrame(measurements) df.to_csv(path, index=False) with open("experiments.json", "w") as out_f: diff --git a/sebs/faas/fsm.py b/sebs/faas/fsm.py index 256a36c2..449fc433 100644 --- a/sebs/faas/fsm.py +++ b/sebs/faas/fsm.py @@ -52,7 +52,10 @@ def __init__(self, def deserialize(payload: dict) -> "Case": return Switch.Case(**payload) - def __init__(self, name: str, cases: List[Case], default: Optional[str]): + def __init__(self, + name: str, + cases: List[Case], + default: Optional[str]): self.name = name self.cases = cases self.default = default @@ -71,11 +74,11 @@ def deserialize(cls, name: str, payload: dict) -> "Switch": class Map(State): def __init__(self, - name: str, - func_name: str, - array: str, - max_concurrency: Optional[int], - next: Optional[str]): + name: str, + func_name: str, + array: str, + max_concurrency: Optional[int], + next: Optional[str]): self.name = name self.func_name = func_name self.array = array diff --git a/sebs/utils.py b/sebs/utils.py index cd65436c..627f692a 100644 --- a/sebs/utils.py +++ b/sebs/utils.py @@ -8,7 +8,6 @@ from typing import List, Optional, TextIO, Union from redis import Redis -import pandas as pd PROJECT_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir) PACK_CODE_APP = "pack_code_{}.sh" @@ -109,20 +108,21 @@ def connect_to_redis_cache(host: str): def download_measurements(redis: Redis, workflow_name: str, **static_args): + payloads = [] + for key in redis.scan_iter(pattern=f"{workflow_name}/*"): payload = redis.get(key) try: payload = json.loads(payload) payload = {**payload, **static_args} - - df = pd.DataFrame([payload]) + payloads.append(payload) except json.decoder.JSONDecodeError: print(f"Failed to decode payload: {payload}") finally: redis.delete(key) - return df + return payloads # def configure_logging(verbose: bool = False, output_dir: Optional[str] = None): From afcdf5dd1b7b84b44198a2e9276e6f610f395386 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Wed, 20 Apr 2022 15:52:58 +0200 Subject: [PATCH 42/68] Make sure keys are unique --- benchmarks/wrappers/aws/python/handler_workflow.py | 2 +- benchmarks/wrappers/azure/python/handler_workflow.py | 2 +- benchmarks/wrappers/gcp/python/handler_workflow.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks/wrappers/aws/python/handler_workflow.py b/benchmarks/wrappers/aws/python/handler_workflow.py index 31c2980c..f95d01ff 100644 --- a/benchmarks/wrappers/aws/python/handler_workflow.py +++ b/benchmarks/wrappers/aws/python/handler_workflow.py @@ -50,7 +50,7 @@ def handler(event, context): decode_responses=True, socket_connect_timeout=10) - key = os.path.join(workflow_name, func_name) + key = os.path.join(workflow_name, func_name, str(uuid.uuid4())[0:8]) redis.set(key, payload) return res diff --git a/benchmarks/wrappers/azure/python/handler_workflow.py b/benchmarks/wrappers/azure/python/handler_workflow.py index 0436de88..e54c6a89 100644 --- a/benchmarks/wrappers/azure/python/handler_workflow.py +++ b/benchmarks/wrappers/azure/python/handler_workflow.py @@ -53,7 +53,7 @@ def main(event): decode_responses=True, socket_connect_timeout=10) - key = os.path.join(workflow_name, func_name) + key = os.path.join(workflow_name, func_name, str(uuid.uuid4())[0:8]) redis.set(key, payload) return res diff --git a/benchmarks/wrappers/gcp/python/handler_workflow.py b/benchmarks/wrappers/gcp/python/handler_workflow.py index da7175c1..e0dbaa56 100644 --- a/benchmarks/wrappers/gcp/python/handler_workflow.py +++ b/benchmarks/wrappers/gcp/python/handler_workflow.py @@ -53,7 +53,7 @@ def handler(req): decode_responses=True, socket_connect_timeout=10) - key = os.path.join(workflow_name, func_name) + key = os.path.join(workflow_name, func_name, str(uuid.uuid4())[0:8]) redis.set(key, payload) return res From 57d1fe8eb3e1ba436485141ac3d968e6dd81994a Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Thu, 21 Apr 2022 10:24:44 +0200 Subject: [PATCH 43/68] Remove max_concurrency --- benchmarks/600.workflows/610.gen/definition.json | 1 - benchmarks/wrappers/azure/python/run_workflow.py | 11 ++--------- sebs/aws/generator.py | 3 --- sebs/faas/fsm.py | 3 --- 4 files changed, 2 insertions(+), 16 deletions(-) diff --git a/benchmarks/600.workflows/610.gen/definition.json b/benchmarks/600.workflows/610.gen/definition.json index e9b9e1b8..3d6ba112 100644 --- a/benchmarks/600.workflows/610.gen/definition.json +++ b/benchmarks/600.workflows/610.gen/definition.json @@ -10,7 +10,6 @@ "type": "map", "func_name": "process_astros", "array": "astros.people", - "max_concurrency": 5, "next": "select_astros_number" }, "select_astros_number": { diff --git a/benchmarks/wrappers/azure/python/run_workflow.py b/benchmarks/wrappers/azure/python/run_workflow.py index e4c92db4..3bcbacaa 100644 --- a/benchmarks/wrappers/azure/python/run_workflow.py +++ b/benchmarks/wrappers/azure/python/run_workflow.py @@ -72,15 +72,8 @@ def run_workflow(context: df.DurableOrchestrationContext): current = next elif isinstance(current, Map): array = get_var(res, current.array) - array_res = [] - - if current.max_concurrency: - for c in chunks(array, current.max_concurrency): - tasks = [context.call_activity(current.func_name, e) for e in c] - array_res += yield context.task_all(tasks) - else: - tasks = [context.call_activity(current.func_name, e) for e in array] - array_res = yield context.task_all(tasks) + tasks = [context.call_activity(current.func_name, e) for e in array] + array_res = yield context.task_all(tasks) set_var(res, array_res, current.array) current = states.get(current.next, None) diff --git a/sebs/aws/generator.py b/sebs/aws/generator.py index db853311..9555d927 100644 --- a/sebs/aws/generator.py +++ b/sebs/aws/generator.py @@ -81,7 +81,4 @@ def encode_map(self, state: Map) -> dict: else: payload["End"] = True - if state.max_concurrency: - payload["MaxConcurrency"] = state.max_concurrency - return payload diff --git a/sebs/faas/fsm.py b/sebs/faas/fsm.py index 449fc433..771ff107 100644 --- a/sebs/faas/fsm.py +++ b/sebs/faas/fsm.py @@ -77,12 +77,10 @@ def __init__(self, name: str, func_name: str, array: str, - max_concurrency: Optional[int], next: Optional[str]): self.name = name self.func_name = func_name self.array = array - self.max_concurrency = max_concurrency self.next = next @classmethod @@ -91,7 +89,6 @@ def deserialize(cls, name: str, payload: dict) -> "Map": name=name, func_name=payload["func_name"], array=payload["array"], - max_concurrency=payload.get("max_concurrency"), next=payload.get("next") ) From 22d10eac0454b32cae429f910f386392b985b38f Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Thu, 21 Apr 2022 15:26:36 +0200 Subject: [PATCH 44/68] Fix workflow running variable --- .../600.workflows/610.gen/definition.json | 39 +++++---- benchmarks/600.workflows/610.gen/input.py | 9 +- .../610.gen/python/few_people.py | 5 ++ .../python/{process_zero.py => get_astros.py} | 3 - .../610.gen/python/many_people.py | 5 ++ .../610.gen/python/map_astros.py | 7 ++ .../610.gen/python/process_astros.py | 12 ++- .../610.gen/python/process_one.py | 7 -- .../610.gen/python/process_two.py | 5 -- .../wrappers/azure/python/run_workflow.py | 21 +---- sebs/aws/generator.py | 11 ++- sebs/faas/fsm.py | 21 +++-- sebs/gcp/gcp.py | 46 +++++++++- sebs/gcp/generator.py | 85 +++++++++++++++---- 14 files changed, 180 insertions(+), 96 deletions(-) create mode 100644 benchmarks/600.workflows/610.gen/python/few_people.py rename benchmarks/600.workflows/610.gen/python/{process_zero.py => get_astros.py} (59%) create mode 100644 benchmarks/600.workflows/610.gen/python/many_people.py create mode 100644 benchmarks/600.workflows/610.gen/python/map_astros.py delete mode 100644 benchmarks/600.workflows/610.gen/python/process_one.py delete mode 100644 benchmarks/600.workflows/610.gen/python/process_two.py diff --git a/benchmarks/600.workflows/610.gen/definition.json b/benchmarks/600.workflows/610.gen/definition.json index 3d6ba112..d788231d 100644 --- a/benchmarks/600.workflows/610.gen/definition.json +++ b/benchmarks/600.workflows/610.gen/definition.json @@ -1,15 +1,9 @@ { - "root": "process_zero", + "root": "get_astros", "states": { - "process_zero": { + "get_astros": { "type": "task", - "func_name": "process_zero", - "next": "process_astros" - }, - "process_astros": { - "type": "map", - "func_name": "process_astros", - "array": "astros.people", + "func_name": "get_astros", "next": "select_astros_number" }, "select_astros_number": { @@ -19,25 +13,36 @@ "var": "astros.number", "op": "<", "val": 10, - "next": "process_one" + "next": "few_people" }, { "var": "astros.number", "op": ">=", "val": 10, - "next": "process_two" + "next": "many_people" } ], - "default": "process_one" + "default": "few_people" + }, + "few_people": { + "type": "task", + "func_name": "few_people", + "next": "map_astros" }, - "process_one": { + "many_people": { "type": "task", - "func_name": "process_one", - "next": "process_two" + "func_name": "many_people", + "next": "map_astros" + }, + "map_astros": { + "type": "map", + "array": "astros.people", + "func_name": "map_astros", + "next": "process_astros" }, - "process_two": { + "process_astros": { "type": "task", - "func_name": "process_two" + "func_name": "process_astros" } } } \ No newline at end of file diff --git a/benchmarks/600.workflows/610.gen/input.py b/benchmarks/600.workflows/610.gen/input.py index 477cf27a..68f82e81 100644 --- a/benchmarks/600.workflows/610.gen/input.py +++ b/benchmarks/600.workflows/610.gen/input.py @@ -1,12 +1,5 @@ - -size_generators = { - 'test' : 1, - 'small' : 100, - 'large': 1000 -} - def buckets_count(): return (0, 0) def generate_input(data_dir, size, input_buckets, output_buckets, upload_func): - return { "idx": 0 } + return dict() \ No newline at end of file diff --git a/benchmarks/600.workflows/610.gen/python/few_people.py b/benchmarks/600.workflows/610.gen/python/few_people.py new file mode 100644 index 00000000..9c70d9fb --- /dev/null +++ b/benchmarks/600.workflows/610.gen/python/few_people.py @@ -0,0 +1,5 @@ +def handler(event): + return { + "many_astros": False, + **event + } \ No newline at end of file diff --git a/benchmarks/600.workflows/610.gen/python/process_zero.py b/benchmarks/600.workflows/610.gen/python/get_astros.py similarity index 59% rename from benchmarks/600.workflows/610.gen/python/process_zero.py rename to benchmarks/600.workflows/610.gen/python/get_astros.py index c4e71760..627c6523 100644 --- a/benchmarks/600.workflows/610.gen/python/process_zero.py +++ b/benchmarks/600.workflows/610.gen/python/get_astros.py @@ -1,11 +1,8 @@ -from time import sleep import requests def handler(event): res = requests.get("http://api.open-notify.org/astros.json") - data = (str(i % 255) for i in range(2**4)) return { - "buffer": "".join(data), "astros": res.json() } \ No newline at end of file diff --git a/benchmarks/600.workflows/610.gen/python/many_people.py b/benchmarks/600.workflows/610.gen/python/many_people.py new file mode 100644 index 00000000..2d339f32 --- /dev/null +++ b/benchmarks/600.workflows/610.gen/python/many_people.py @@ -0,0 +1,5 @@ +def handler(event): + return { + "many_astros": True, + **event + } \ No newline at end of file diff --git a/benchmarks/600.workflows/610.gen/python/map_astros.py b/benchmarks/600.workflows/610.gen/python/map_astros.py new file mode 100644 index 00000000..b98b5e9d --- /dev/null +++ b/benchmarks/600.workflows/610.gen/python/map_astros.py @@ -0,0 +1,7 @@ +def handler(elem): + name = elem["name"] + fn, ln = name.split(" ") + name = " ".join([ln, fn]) + elem["name_rev"] = name + + return elem \ No newline at end of file diff --git a/benchmarks/600.workflows/610.gen/python/process_astros.py b/benchmarks/600.workflows/610.gen/python/process_astros.py index b98b5e9d..a981660e 100644 --- a/benchmarks/600.workflows/610.gen/python/process_astros.py +++ b/benchmarks/600.workflows/610.gen/python/process_astros.py @@ -1,7 +1,5 @@ -def handler(elem): - name = elem["name"] - fn, ln = name.split(" ") - name = " ".join([ln, fn]) - elem["name_rev"] = name - - return elem \ No newline at end of file +def handler(arr): + return { + "astros": arr, + "done": True + } \ No newline at end of file diff --git a/benchmarks/600.workflows/610.gen/python/process_one.py b/benchmarks/600.workflows/610.gen/python/process_one.py deleted file mode 100644 index 7f85f64a..00000000 --- a/benchmarks/600.workflows/610.gen/python/process_one.py +++ /dev/null @@ -1,7 +0,0 @@ -from time import sleep - -def handler(event): - print(event) - - data = (str(i % 255) for i in range(2**4)) - return {"buffer": "".join(data)} \ No newline at end of file diff --git a/benchmarks/600.workflows/610.gen/python/process_two.py b/benchmarks/600.workflows/610.gen/python/process_two.py deleted file mode 100644 index afbc46aa..00000000 --- a/benchmarks/600.workflows/610.gen/python/process_two.py +++ /dev/null @@ -1,5 +0,0 @@ -from time import sleep - -def handler(event): - data = (str(i % 255) for i in range(2**4)) - return {"buffer": "".join(data)} \ No newline at end of file diff --git a/benchmarks/wrappers/azure/python/run_workflow.py b/benchmarks/wrappers/azure/python/run_workflow.py index 3bcbacaa..4e21986d 100644 --- a/benchmarks/wrappers/azure/python/run_workflow.py +++ b/benchmarks/wrappers/azure/python/run_workflow.py @@ -21,21 +21,6 @@ def get_var(obj, path: str): return obj -def set_var(obj, var, path: str): - names = path.split(".") - assert(len(names) > 0) - - for n in names[:-1]: - obj = obj[n] - - obj[names[-1]] = var - - -def chunks(lst, n): - for i in range(0, len(lst), n): - yield lst[i:i + n] - - def run_workflow(context: df.DurableOrchestrationContext): with open("definition.json") as f: definition = json.load(f) @@ -47,8 +32,7 @@ def run_workflow(context: df.DurableOrchestrationContext): while current: if isinstance(current, Task): - payload = yield context.call_activity(current.func_name, res) - res = {**res, **payload} + res = yield context.call_activity(current.func_name, res) current = states.get(current.next, None) elif isinstance(current, Switch): ops = { @@ -73,9 +57,8 @@ def run_workflow(context: df.DurableOrchestrationContext): elif isinstance(current, Map): array = get_var(res, current.array) tasks = [context.call_activity(current.func_name, e) for e in array] - array_res = yield context.task_all(tasks) + res = yield context.task_all(tasks) - set_var(res, array_res, current.array) current = states.get(current.next, None) else: raise ValueError(f"Undefined state: {current}") diff --git a/sebs/aws/generator.py b/sebs/aws/generator.py index 9555d927..579fa16d 100644 --- a/sebs/aws/generator.py +++ b/sebs/aws/generator.py @@ -1,5 +1,5 @@ import json -from typing import Dict +from typing import Dict, List, Union import numbers from sebs.faas.fsm import * @@ -21,7 +21,7 @@ def postprocess(self, states: List[State], payloads: List[dict]) -> dict: return definition - def encode_task(self, state: Task) -> dict: + def encode_task(self, state: Task) -> Union[dict, List[dict]]: payload = { "Type": "Task", "Resource": self._func_arns[state.func_name] @@ -34,7 +34,7 @@ def encode_task(self, state: Task) -> dict: return payload - def encode_switch(self, state: Switch) -> dict: + def encode_switch(self, state: Switch) -> Union[dict, List[dict]]: choises = [self._encode_case(c) for c in state.cases] return { "Type": "Choice", @@ -59,7 +59,7 @@ def _encode_case(self, case: Switch.Case) -> dict: "Next": case.next } - def encode_map(self, state: Map) -> dict: + def encode_map(self, state: Map) -> Union[dict, List[dict]]: payload = { "Type": "Map", "ItemsPath": "$."+state.array, @@ -72,8 +72,7 @@ def encode_map(self, state: Map) -> dict: "End": True } } - }, - "ResultPath": "$."+state.array + } } if state.next: diff --git a/sebs/faas/fsm.py b/sebs/faas/fsm.py index 771ff107..b3d183be 100644 --- a/sebs/faas/fsm.py +++ b/sebs/faas/fsm.py @@ -1,6 +1,6 @@ from abc import ABC from abc import abstractmethod -from typing import Iterator, Optional, List, Callable +from typing import Iterator, Optional, List, Callable, Union from enum import Enum import json @@ -115,7 +115,16 @@ def parse(self, path: str): def generate(self) -> str: states = self.states.values() - payloads = [self.encode_state(s) for s in states] + payloads = [] + for s in states: + obj = self.encode_state(s) + if isinstance(obj, dict): + payloads.append(obj) + elif isinstance(obj, list): + payloads += obj + else: + raise ValueError("Unknown encoded state returned.") + definition = self.postprocess(states, payloads) return self._export_func(definition) @@ -123,7 +132,7 @@ def generate(self) -> str: def postprocess(self, states: List[State], payloads: List[dict]) -> dict: return {s.name: p for (s, p) in zip(states, payloads)} - def encode_state(self, state: State) -> dict: + def encode_state(self, state: State) -> Union[dict, List[dict]]: if isinstance(state, Task): return self.encode_task(state) elif isinstance(state, Switch): @@ -134,13 +143,13 @@ def encode_state(self, state: State) -> dict: raise ValueError(f"Unknown state of type {type(state)}.") @abstractmethod - def encode_task(self, state: Task) -> dict: + def encode_task(self, state: Task) -> Union[dict, List[dict]]: pass @abstractmethod - def encode_switch(self, state: Switch) -> dict: + def encode_switch(self, state: Switch) -> Union[dict, List[dict]]: pass @abstractmethod - def encode_map(self, state: Map) -> dict: + def encode_map(self, state: Map) -> Union[dict, List[dict]]: pass \ No newline at end of file diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index ec3520f8..27640e00 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -398,10 +398,32 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "GCP # generate workflow definition.json urls = [self.create_function_trigger(f, Trigger.TriggerType.HTTP).url for f in funcs] func_triggers = {n: u for (n, u) in zip(func_names, urls)} - gen = GCPGenerator(func_triggers) + + gen = GCPGenerator(workflow_name, func_triggers) gen.parse(definition_path) definition = gen.generate() + # map functions require their own workflows + parent = GCP.get_location(project_name, location) + for map_id, map_def in gen.generate_maps(): + full_workflow_name = GCP.get_full_workflow_name( + project_name, location, map_id) + create_req = ( + self.workflow_client.projects() + .locations() + .workflows() + .create( + parent=parent, + workflowId=map_id, + body={ + "name": full_workflow_name, + "sourceContents": map_def, + } + ) + ) + ret = create_req.execute() + self.logging.info(f"Map workflow {map_id} has been created!") + full_workflow_name = GCP.get_full_workflow_name( project_name, location, workflow_name) get_req = self.workflow_client.projects().locations( @@ -410,7 +432,6 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "GCP try: get_req.execute() except HttpError: - parent = GCP.get_location(project_name, location) create_req = ( self.workflow_client.projects() .locations() @@ -489,10 +510,29 @@ def update_workflow(self, workflow: Workflow, code_package: CodePackage): # Generate workflow definition.json urls = [self.create_function_trigger(f, Trigger.TriggerType.HTTP).url for f in funcs] func_triggers = {n: u for (n, u) in zip(func_names, urls)} - gen = GCPGenerator(func_triggers) + gen = GCPGenerator(workflow.name, func_triggers) gen.parse(definition_path) definition = gen.generate() + for map_id, map_def in gen.generate_maps(): + full_workflow_name = GCP.get_full_workflow_name( + self.config.project_name, self.config.region, map_id + ) + patch_req = ( + self.workflow_client.projects() + .locations() + .workflows() + .patch( + name=full_workflow_name, + body={ + "name": full_workflow_name, + "sourceContents": map_def, + } + ) + ) + ret = patch_req.execute() + self.logging.info("Published new map workflow code.") + full_workflow_name = GCP.get_full_workflow_name( self.config.project_name, self.config.region, workflow.name ) diff --git a/sebs/gcp/generator.py b/sebs/gcp/generator.py index c828dd1a..3935a223 100644 --- a/sebs/gcp/generator.py +++ b/sebs/gcp/generator.py @@ -1,13 +1,16 @@ import json -from typing import Dict +import uuid +from typing import Dict, Union, List from sebs.faas.fsm import * class GCPGenerator(Generator): - def __init__(self, func_triggers: Dict[str, str]): + def __init__(self, workflow_name: str, func_triggers: Dict[str, str]): super().__init__() + self._workflow_name = workflow_name self._func_triggers = func_triggers + self._map_funcs = dict() def postprocess(self, states: List[State], payloads: List[dict]) -> dict: definition = { @@ -21,21 +24,32 @@ def postprocess(self, states: List[State], payloads: List[dict]) -> dict: return definition - def encode_task(self, state: Task) -> dict: - url = self._func_triggers[state.name] + def encode_task(self, state: Task) -> Union[dict, List[dict]]: + url = self._func_triggers[state.func_name] - return { - state.name: { - "call": "http.post", - "args": { - "url": url, - "body": "${res}" - }, - "result": "res" + return [ + { + state.name: { + "call": "http.post", + "args": { + "url": url, + "body": "${res}" + }, + "result": "res" + } + }, + { + "assign_res_"+state.name: { + "assign": [ + { + "res": "${res.body}" + } + ] + } } - } + ] - def encode_switch(self, state: Switch) -> dict: + def encode_switch(self, state: Switch) -> Union[dict, List[dict]]: return { state.name: { "switch": [self._encode_case(c) for c in state.cases], @@ -44,9 +58,50 @@ def encode_switch(self, state: Switch) -> dict: } def _encode_case(self, case: Switch.Case) -> dict: - cond = "res.body." + case.var + " " + case.op + " " + str(case.val) + cond = "res." + case.var + " " + case.op + " " + str(case.val) return { "condition": "${"+cond+"}", "next": case.next } + def encode_map(self, state: Map) -> Union[dict, List[dict]]: + id = self._workflow_name + "_" + "map" + str(uuid.uuid4())[0:8] + self._map_funcs[id] = self._func_triggers[state.func_name] + + return { + state.name: { + "call": "experimental.executions.map", + "args": { + "workflow_id": id, + "arguments": "${res." + state.array + "}" + }, + "result": "res" + } + } + + + def generate_maps(self): + for workflow_id, url in self._map_funcs.items(): + yield (workflow_id, self._export_func({ + "main" : { + "params": ["elem"], + "steps": [ + { + "map": { + "call": "http.post", + "args": { + "url": url, + "body": "${elem}" + }, + "result": "elem" + } + }, + { + "ret": { + "return": "${elem.body}" + } + } + ] + } + })) + From af75dc34c43ac6e2494fc5f642f864768e4d906f Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Thu, 21 Apr 2022 15:26:44 +0200 Subject: [PATCH 45/68] Add benchmarks --- .../600.workflows/620.func_invo/config.json | 5 ++++ .../620.func_invo/definition.json | 29 +++++++++++++++++++ .../600.workflows/620.func_invo/input.py | 11 +++++++ .../620.func_invo/python/gen_buffer.py | 6 ++++ .../600.workflows/630.parallel/config.json | 5 ++++ .../630.parallel/definition.json | 15 ++++++++++ .../600.workflows/630.parallel/input.py | 11 +++++++ .../630.parallel/python/generate.py | 2 ++ .../630.parallel/python/process.py | 7 +++++ 9 files changed, 91 insertions(+) create mode 100644 benchmarks/600.workflows/620.func_invo/config.json create mode 100644 benchmarks/600.workflows/620.func_invo/definition.json create mode 100644 benchmarks/600.workflows/620.func_invo/input.py create mode 100644 benchmarks/600.workflows/620.func_invo/python/gen_buffer.py create mode 100644 benchmarks/600.workflows/630.parallel/config.json create mode 100644 benchmarks/600.workflows/630.parallel/definition.json create mode 100644 benchmarks/600.workflows/630.parallel/input.py create mode 100644 benchmarks/600.workflows/630.parallel/python/generate.py create mode 100644 benchmarks/600.workflows/630.parallel/python/process.py diff --git a/benchmarks/600.workflows/620.func_invo/config.json b/benchmarks/600.workflows/620.func_invo/config.json new file mode 100644 index 00000000..8eae0824 --- /dev/null +++ b/benchmarks/600.workflows/620.func_invo/config.json @@ -0,0 +1,5 @@ +{ + "timeout": 120, + "memory": 128, + "languages": ["python"] +} diff --git a/benchmarks/600.workflows/620.func_invo/definition.json b/benchmarks/600.workflows/620.func_invo/definition.json new file mode 100644 index 00000000..bd50b477 --- /dev/null +++ b/benchmarks/600.workflows/620.func_invo/definition.json @@ -0,0 +1,29 @@ +{ + "root": "gen_one", + "states": { + "gen_one": { + "type": "task", + "func_name": "gen_buffer", + "next": "gen_two" + }, + "gen_two": { + "type": "task", + "func_name": "gen_buffer", + "next": "gen_three" + }, + "gen_three": { + "type": "task", + "func_name": "gen_buffer", + "next": "gen_four" + }, + "gen_four": { + "type": "task", + "func_name": "gen_buffer", + "next": "gen_five" + }, + "gen_five": { + "type": "task", + "func_name": "gen_buffer" + } + } +} \ No newline at end of file diff --git a/benchmarks/600.workflows/620.func_invo/input.py b/benchmarks/600.workflows/620.func_invo/input.py new file mode 100644 index 00000000..661c056d --- /dev/null +++ b/benchmarks/600.workflows/620.func_invo/input.py @@ -0,0 +1,11 @@ +size_generators = { + 'test' : 10, + 'small' : 2**10, + 'large': 2**20 +} + +def buckets_count(): + return (0, 0) + +def generate_input(data_dir, size, input_buckets, output_buckets, upload_func): + return { 'size': size_generators[size] } \ No newline at end of file diff --git a/benchmarks/600.workflows/620.func_invo/python/gen_buffer.py b/benchmarks/600.workflows/620.func_invo/python/gen_buffer.py new file mode 100644 index 00000000..bbbea63c --- /dev/null +++ b/benchmarks/600.workflows/620.func_invo/python/gen_buffer.py @@ -0,0 +1,6 @@ +def handler(event): + size = int(event["size"]) if isinstance(event, dict) else len(event) + data = (str(i % 255) for i in range(size)) + data = "".join(data) + + return data \ No newline at end of file diff --git a/benchmarks/600.workflows/630.parallel/config.json b/benchmarks/600.workflows/630.parallel/config.json new file mode 100644 index 00000000..8eae0824 --- /dev/null +++ b/benchmarks/600.workflows/630.parallel/config.json @@ -0,0 +1,5 @@ +{ + "timeout": 120, + "memory": 128, + "languages": ["python"] +} diff --git a/benchmarks/600.workflows/630.parallel/definition.json b/benchmarks/600.workflows/630.parallel/definition.json new file mode 100644 index 00000000..6f9f7def --- /dev/null +++ b/benchmarks/600.workflows/630.parallel/definition.json @@ -0,0 +1,15 @@ +{ + "root": "generate", + "states": { + "generate": { + "type": "task", + "func_name": "generate", + "next": "process" + }, + "process": { + "type": "map", + "func_name": "process", + "array": "buffer", + } + } +} \ No newline at end of file diff --git a/benchmarks/600.workflows/630.parallel/input.py b/benchmarks/600.workflows/630.parallel/input.py new file mode 100644 index 00000000..f80fafd1 --- /dev/null +++ b/benchmarks/600.workflows/630.parallel/input.py @@ -0,0 +1,11 @@ +size_generators = { + 'test' : 5, + 'small' : 100, + 'large': 1000 +} + +def buckets_count(): + return (0, 0) + +def generate_input(data_dir, size, input_buckets, output_buckets, upload_func): + return { 'size': size_generators[size] } \ No newline at end of file diff --git a/benchmarks/600.workflows/630.parallel/python/generate.py b/benchmarks/600.workflows/630.parallel/python/generate.py new file mode 100644 index 00000000..41712b35 --- /dev/null +++ b/benchmarks/600.workflows/630.parallel/python/generate.py @@ -0,0 +1,2 @@ +def handler(elem): + return elem[::-1] \ No newline at end of file diff --git a/benchmarks/600.workflows/630.parallel/python/process.py b/benchmarks/600.workflows/630.parallel/python/process.py new file mode 100644 index 00000000..79a27dda --- /dev/null +++ b/benchmarks/600.workflows/630.parallel/python/process.py @@ -0,0 +1,7 @@ +def handler(event): + size = int(event["size"]) + buffer = size * ["asdf"] + + return { + "buffer": buffer + } \ No newline at end of file From 56356086631633cfd6c95dae6f21772f17f5d237 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Thu, 21 Apr 2022 16:24:51 +0200 Subject: [PATCH 46/68] Return correct result --- benchmarks/wrappers/azure/python/main_workflow.py | 9 +++++++-- sebs/gcp/generator.py | 8 ++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/benchmarks/wrappers/azure/python/main_workflow.py b/benchmarks/wrappers/azure/python/main_workflow.py index 5b68d47d..3863d89c 100644 --- a/benchmarks/wrappers/azure/python/main_workflow.py +++ b/benchmarks/wrappers/azure/python/main_workflow.py @@ -35,16 +35,21 @@ async def main(req: func.HttpRequest, starter: str, context: func.Context) -> fu is_cold, container_id = probe_cold_start() status_body = json.loads(res.get_body()) - code = 500 if status_body.get("runtimeStatus") == "Failed" else 200 + failed = status_body.get("runtimeStatus") == "Failed" + code = 500 if failed else 200 body = { "begin": begin.strftime("%s.%f"), "end": end.strftime("%s.%f"), "is_cold": is_cold, "container_id": container_id, "request_id": context.invocation_id, - **status_body } + if failed: + body = {**body, **status_body} + else: + body["res"] = status_body + return func.HttpResponse( status_code=code, body=json.dumps(body), diff --git a/sebs/gcp/generator.py b/sebs/gcp/generator.py index 3935a223..4e792262 100644 --- a/sebs/gcp/generator.py +++ b/sebs/gcp/generator.py @@ -13,6 +13,14 @@ def __init__(self, workflow_name: str, func_triggers: Dict[str, str]): self._map_funcs = dict() def postprocess(self, states: List[State], payloads: List[dict]) -> dict: + payloads.append({ + "final": { + "return": [ + "${res}" + ] + } + }) + definition = { "main" : { "params": [ From 98f38a971ad302a98470c96d5715f8d521040216 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Fri, 22 Apr 2022 12:03:16 +0200 Subject: [PATCH 47/68] Fix benchmarks --- benchmarks/600.workflows/620.func_invo/definition.json | 10 +++++----- .../python/{gen_buffer.py => gen_buffer_five.py} | 0 .../620.func_invo/python/gen_buffer_four.py | 6 ++++++ .../620.func_invo/python/gen_buffer_one.py | 6 ++++++ .../620.func_invo/python/gen_buffer_three.py | 6 ++++++ .../620.func_invo/python/gen_buffer_two.py | 6 ++++++ benchmarks/600.workflows/630.parallel/definition.json | 2 +- .../600.workflows/630.parallel/python/generate.py | 9 +++++++-- .../600.workflows/630.parallel/python/process.py | 9 ++------- 9 files changed, 39 insertions(+), 15 deletions(-) rename benchmarks/600.workflows/620.func_invo/python/{gen_buffer.py => gen_buffer_five.py} (100%) create mode 100644 benchmarks/600.workflows/620.func_invo/python/gen_buffer_four.py create mode 100644 benchmarks/600.workflows/620.func_invo/python/gen_buffer_one.py create mode 100644 benchmarks/600.workflows/620.func_invo/python/gen_buffer_three.py create mode 100644 benchmarks/600.workflows/620.func_invo/python/gen_buffer_two.py diff --git a/benchmarks/600.workflows/620.func_invo/definition.json b/benchmarks/600.workflows/620.func_invo/definition.json index bd50b477..52437e0d 100644 --- a/benchmarks/600.workflows/620.func_invo/definition.json +++ b/benchmarks/600.workflows/620.func_invo/definition.json @@ -3,27 +3,27 @@ "states": { "gen_one": { "type": "task", - "func_name": "gen_buffer", + "func_name": "gen_buffer_one", "next": "gen_two" }, "gen_two": { "type": "task", - "func_name": "gen_buffer", + "func_name": "gen_buffer_two", "next": "gen_three" }, "gen_three": { "type": "task", - "func_name": "gen_buffer", + "func_name": "gen_buffer_three", "next": "gen_four" }, "gen_four": { "type": "task", - "func_name": "gen_buffer", + "func_name": "gen_buffer_four", "next": "gen_five" }, "gen_five": { "type": "task", - "func_name": "gen_buffer" + "func_name": "gen_buffer_five" } } } \ No newline at end of file diff --git a/benchmarks/600.workflows/620.func_invo/python/gen_buffer.py b/benchmarks/600.workflows/620.func_invo/python/gen_buffer_five.py similarity index 100% rename from benchmarks/600.workflows/620.func_invo/python/gen_buffer.py rename to benchmarks/600.workflows/620.func_invo/python/gen_buffer_five.py diff --git a/benchmarks/600.workflows/620.func_invo/python/gen_buffer_four.py b/benchmarks/600.workflows/620.func_invo/python/gen_buffer_four.py new file mode 100644 index 00000000..bbbea63c --- /dev/null +++ b/benchmarks/600.workflows/620.func_invo/python/gen_buffer_four.py @@ -0,0 +1,6 @@ +def handler(event): + size = int(event["size"]) if isinstance(event, dict) else len(event) + data = (str(i % 255) for i in range(size)) + data = "".join(data) + + return data \ No newline at end of file diff --git a/benchmarks/600.workflows/620.func_invo/python/gen_buffer_one.py b/benchmarks/600.workflows/620.func_invo/python/gen_buffer_one.py new file mode 100644 index 00000000..bbbea63c --- /dev/null +++ b/benchmarks/600.workflows/620.func_invo/python/gen_buffer_one.py @@ -0,0 +1,6 @@ +def handler(event): + size = int(event["size"]) if isinstance(event, dict) else len(event) + data = (str(i % 255) for i in range(size)) + data = "".join(data) + + return data \ No newline at end of file diff --git a/benchmarks/600.workflows/620.func_invo/python/gen_buffer_three.py b/benchmarks/600.workflows/620.func_invo/python/gen_buffer_three.py new file mode 100644 index 00000000..bbbea63c --- /dev/null +++ b/benchmarks/600.workflows/620.func_invo/python/gen_buffer_three.py @@ -0,0 +1,6 @@ +def handler(event): + size = int(event["size"]) if isinstance(event, dict) else len(event) + data = (str(i % 255) for i in range(size)) + data = "".join(data) + + return data \ No newline at end of file diff --git a/benchmarks/600.workflows/620.func_invo/python/gen_buffer_two.py b/benchmarks/600.workflows/620.func_invo/python/gen_buffer_two.py new file mode 100644 index 00000000..bbbea63c --- /dev/null +++ b/benchmarks/600.workflows/620.func_invo/python/gen_buffer_two.py @@ -0,0 +1,6 @@ +def handler(event): + size = int(event["size"]) if isinstance(event, dict) else len(event) + data = (str(i % 255) for i in range(size)) + data = "".join(data) + + return data \ No newline at end of file diff --git a/benchmarks/600.workflows/630.parallel/definition.json b/benchmarks/600.workflows/630.parallel/definition.json index 6f9f7def..cf5664b9 100644 --- a/benchmarks/600.workflows/630.parallel/definition.json +++ b/benchmarks/600.workflows/630.parallel/definition.json @@ -9,7 +9,7 @@ "process": { "type": "map", "func_name": "process", - "array": "buffer", + "array": "buffer" } } } \ No newline at end of file diff --git a/benchmarks/600.workflows/630.parallel/python/generate.py b/benchmarks/600.workflows/630.parallel/python/generate.py index 41712b35..79a27dda 100644 --- a/benchmarks/600.workflows/630.parallel/python/generate.py +++ b/benchmarks/600.workflows/630.parallel/python/generate.py @@ -1,2 +1,7 @@ -def handler(elem): - return elem[::-1] \ No newline at end of file +def handler(event): + size = int(event["size"]) + buffer = size * ["asdf"] + + return { + "buffer": buffer + } \ No newline at end of file diff --git a/benchmarks/600.workflows/630.parallel/python/process.py b/benchmarks/600.workflows/630.parallel/python/process.py index 79a27dda..41712b35 100644 --- a/benchmarks/600.workflows/630.parallel/python/process.py +++ b/benchmarks/600.workflows/630.parallel/python/process.py @@ -1,7 +1,2 @@ -def handler(event): - size = int(event["size"]) - buffer = size * ["asdf"] - - return { - "buffer": buffer - } \ No newline at end of file +def handler(elem): + return elem[::-1] \ No newline at end of file From 25ddf371124448dd521303fd3cba4f8954064289 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Fri, 22 Apr 2022 12:03:37 +0200 Subject: [PATCH 48/68] Improve measurement download --- sebs.py | 2 +- sebs/aws/aws.py | 2 +- sebs/utils.py | 25 +++++++++++++++---------- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/sebs.py b/sebs.py index 6a2c4bb2..36aae3c0 100755 --- a/sebs.py +++ b/sebs.py @@ -306,7 +306,7 @@ def workflow(benchmark, benchmark_input_size, repetitions, trigger, workflow_nam if ret.stats.failure: sebs_client.logging.info(f"Failure on repetition {i+1}/{repetitions}") - measurements += download_measurements(redis, workflow.name, rep=i) + measurements += download_measurements(redis, workflow.name, result.begin_time, rep=i) result.add_invocation(workflow, ret) result.end() diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 461d16ef..61aaeaea 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -428,7 +428,7 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "SFN code_package.hash ) - self.update_workflow(workflow, definition, code_package) + self.update_workflow(workflow, code_package) workflow.updated_code = True # Add LibraryTrigger to a new function diff --git a/sebs/utils.py b/sebs/utils.py index 627f692a..da059869 100644 --- a/sebs/utils.py +++ b/sebs/utils.py @@ -107,20 +107,25 @@ def connect_to_redis_cache(host: str): return redis -def download_measurements(redis: Redis, workflow_name: str, **static_args): +def download_measurements(redis: Redis, workflow_name: str, after: float, **static_args): payloads = [] - for key in redis.scan_iter(pattern=f"{workflow_name}/*"): + for key in redis.scan_iter(match=f"{workflow_name}/*"): + assert key[:len(workflow_name)] == workflow_name + payload = redis.get(key) + redis.delete(key) - try: - payload = json.loads(payload) - payload = {**payload, **static_args} - payloads.append(payload) - except json.decoder.JSONDecodeError: - print(f"Failed to decode payload: {payload}") - finally: - redis.delete(key) + if payload: + try: + payload = json.loads(payload) + + # make sure only measurements from our benchmark are saved + if payload["start"] > after: + payload = {**payload, **static_args} + payloads.append(payload) + except json.decoder.JSONDecodeError: + print(f"Failed to decode payload: {payload}") return payloads From 7efe689f8bf83f6fb3c4fa60b1f89019d4e5b216 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Tue, 26 Apr 2022 11:25:42 +0200 Subject: [PATCH 49/68] Clean up azure platform --- sebs/azure/__init__.py | 2 +- sebs/azure/azure.py | 151 +++++--------------- sebs/azure/{function.py => function_app.py} | 8 +- sebs/azure/workflow.py | 36 ----- 4 files changed, 41 insertions(+), 156 deletions(-) rename sebs/azure/{function.py => function_app.py} (90%) delete mode 100644 sebs/azure/workflow.py diff --git a/sebs/azure/__init__.py b/sebs/azure/__init__.py index 499b1372..5736abdc 100644 --- a/sebs/azure/__init__.py +++ b/sebs/azure/__init__.py @@ -1,4 +1,4 @@ from .azure import Azure # noqa -from .function import AzureFunction # noqa +from .function_app import AzureFunction, AzureWorkflow # noqa from .config import AzureConfig # noqa from .blob_storage import BlobStorage # noqa diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index 663f3715..659f9439 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -2,19 +2,15 @@ import json import glob import os -import io import shutil import time -from typing import cast, Dict, List, Optional, Set, Tuple, Type # noqa +from typing import cast, Dict, List, Optional, Set, Tuple, Type, TypeVar # noqa import docker -import pandas as pd -from azure.storage.blob import BlobServiceClient from sebs.azure.blob_storage import BlobStorage from sebs.azure.cli import AzureCLI -from sebs.azure.function import AzureFunction -from sebs.azure.workflow import AzureWorkflow +from sebs.azure.function_app import FunctionApp, AzureFunction, AzureWorkflow from sebs.azure.config import AzureConfig, AzureResources from sebs.azure.triggers import AzureTrigger, HTTPTrigger from sebs.code_package import CodePackage @@ -232,32 +228,25 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b shell=True, cwd=directory) return directory, code_size - def publish_function( + def publish_benchmark( self, - function: Function, + benchmark: Benchmark, code_package: CodePackage, repeat_on_failure: bool = False, ) -> str: success = False url = "" self.logging.info( - "Attempting publish of function {}".format(function.name)) + "Attempting publish of {}".format(benchmark.name)) while not success: try: ret = self.cli_instance.execute( "bash -c 'cd /mnt/function " "&& func azure functionapp publish {} --{} --no-build'".format( - function.name, self.AZURE_RUNTIMES[code_package.language_name] + benchmark.name, self.AZURE_RUNTIMES[code_package.language_name] ) ) - # ret = self.cli_instance.execute( - # "bash -c 'cd /mnt/function " - # "&& az functionapp deployment source config-zip " - # "--src {}.zip -g {} -n {} --build-remote false '".format( - # code_package.name, resource_group, function.name - # ) - # ) - # print(ret) + url = "" for line in ret.split(b"\n"): line = line.decode("utf-8") @@ -277,7 +266,7 @@ def publish_function( time.sleep(30) self.logging.info( "Sleep 30 seconds for Azure to register function app {}".format( - function.name + benchmark.name ) ) # escape loop. we failed! @@ -296,11 +285,11 @@ def publish_function( :return: URL to reach HTTP-triggered function """ - def update_function(self, function: Function, code_package: CodePackage): + def update_benchmark(self, benchmark: Benchmark, code_package: CodePackage): # Mount code package in Docker instance self._mount_function_code(code_package) - url = self.publish_function(function, code_package, True) + url = self.publish_benchmark(benchmark, code_package, True) trigger = HTTPTrigger( url, self.config.resources.data_storage_account(self.cli_instance)) @@ -326,8 +315,8 @@ def default_benchmark_name(self, code_package: CodePackage) -> str: ) return func_name - def create_function(self, code_package: CodePackage, func_name: str) -> AzureFunction: - + B = TypeVar("B", bound=FunctionApp) + def create_benchmark(self, code_package: CodePackage, name: str, benchmark_cls: B) -> B: language = code_package.language_name language_runtime = code_package.language_version resource_group = self.config.resources.resource_group( @@ -336,7 +325,7 @@ def create_function(self, code_package: CodePackage, func_name: str) -> AzureFun config = { "resource_group": resource_group, - "func_name": func_name, + "name": name, "region": region, "runtime": self.AZURE_RUNTIMES[language], "runtime_version": language_runtime, @@ -349,7 +338,7 @@ def create_function(self, code_package: CodePackage, func_name: str) -> AzureFun ( " az functionapp config appsettings list " " --resource-group {resource_group} " - " --name {func_name} " + " --name {name} " ).format(**config) ) for setting in json.loads(ret.decode()): @@ -362,7 +351,7 @@ def create_function(self, code_package: CodePackage, func_name: str) -> AzureFun account_name, connection_string ) self.logging.info( - "Azure: Selected {} function app".format(func_name)) + "Azure: Selected {} function app".format(name)) except RuntimeError: function_storage_account = self.config.resources.add_storage_account( self.cli_instance) @@ -373,35 +362,36 @@ def create_function(self, code_package: CodePackage, func_name: str) -> AzureFun # create function app self.cli_instance.execute( ( - " az functionapp create --resource-group {resource_group} " - " --os-type Linux --consumption-plan-location {region} " + " az functionapp create --functions-version 3 " + " --resource-group {resource_group} --os-type Linux" + " --consumption-plan-location {region} " " --runtime {runtime} --runtime-version {runtime_version} " - " --name {func_name} --storage-account {storage_account}" + " --name {name} --storage-account {storage_account}" ).format(**config) ) self.logging.info( - "Azure: Created function app {}".format(func_name)) + "Azure: Created function app {}".format(name)) break except RuntimeError as e: # Azure does not allow some concurrent operations if "another operation is in progress" in str(e): self.logging.info( - f"Repeat {func_name} creation, another operation in progress" + f"Repeat {name} creation, another operation in progress" ) # Rethrow -> another error else: raise - function = AzureFunction( - name=func_name, + benchmark = benchmark_cls( + name=name, benchmark=code_package.name, code_hash=code_package.hash, function_storage=function_storage_account, ) # update existing function app - self.update_function(function, code_package) + self.update_benchmark(benchmark, code_package) - return function + return benchmark def cached_benchmark(self, benchmark: Benchmark): @@ -412,92 +402,17 @@ def cached_benchmark(self, benchmark: Benchmark): azure_trigger.logging_handlers = self.logging_handlers azure_trigger.data_storage_account = data_storage_account - def create_workflow(self, code_package: CodePackage, workflow_name: str) -> AzureFunction: - language = code_package.language_name - language_runtime = code_package.language_version - resource_group = self.config.resources.resource_group( - self.cli_instance) - region = self.config.region - - config = { - "resource_group": resource_group, - "workflow_name": workflow_name, - "region": region, - "runtime": self.AZURE_RUNTIMES[language], - "runtime_version": language_runtime, - } - - # check if function does not exist - # no API to verify existence - try: - ret = self.cli_instance.execute( - ( - " az functionapp config appsettings list " - " --resource-group {resource_group} " - " --name {workflow_name} " - ).format(**config) - ) - for setting in json.loads(ret.decode()): - if setting["name"] == "AzureWebJobsStorage": - connection_string = setting["value"] - elems = [z for y in connection_string.split( - ";") for z in y.split("=")] - account_name = elems[elems.index("AccountName") + 1] - function_storage_account = AzureResources.Storage.from_cache( - account_name, connection_string - ) - self.logging.info( - "Azure: Selected {} function app".format(workflow_name)) - except RuntimeError: - function_storage_account = self.config.resources.add_storage_account( - self.cli_instance) - config["storage_account"] = function_storage_account.account_name - - # FIXME: only Linux type is supported - while True: - try: - # create function app - self.cli_instance.execute( - ( - " az functionapp create --resource-group {resource_group} " - " --os-type Linux --consumption-plan-location {region} " - " --runtime {runtime} --runtime-version {runtime_version} " - " --name {workflow_name} --storage-account {storage_account}" - ).format(**config) - ) - self.logging.info( - "Azure: Created workflow app {}".format(workflow_name)) - break - except RuntimeError as e: - # Azure does not allow some concurrent operations - if "another operation is in progress" in str(e): - self.logging.info( - f"Repeat {workflow_name} creation, another operation in progress" - ) - # Rethrow -> another error - else: - raise - workflow = AzureWorkflow( - name=workflow_name, - benchmark=code_package.name, - code_hash=code_package.hash, - function_storage=function_storage_account, - ) + def create_function(self, code_package: CodePackage, func_name: str) -> AzureFunction: + return self.create_benchmark(code_package, func_name, AzureFunction) - # update existing function app - self.update_function(workflow, code_package) + def update_function(self, function: Function, code_package: CodePackage): + self.update_benchmark(function, code_package) - return workflow + def create_workflow(self, code_package: CodePackage, workflow_name: str) -> AzureWorkflow: + return self.create_benchmark(code_package, workflow_name, AzureWorkflow) def update_workflow(self, workflow: Workflow, code_package: CodePackage): - # Mount code package in Docker instance - self._mount_function_code(code_package) - url = self.publish_function(workflow, code_package, True) - - trigger = HTTPTrigger( - url, self.config.resources.data_storage_account(self.cli_instance)) - trigger.logging_handlers = self.logging_handlers - workflow.add_trigger(trigger) + self.update_benchmark(workflow, code_package) """ @@ -607,7 +522,7 @@ def _enforce_cold_start(self, function: Function, code_package: CodePackage): f" --settings ForceColdStart={self.cold_start_counter}" ) - self.update_function(function, code_package) + self.update_benchmark(function, code_package) def enforce_cold_start(self, functions: List[Function], code_package: CodePackage): self.cold_start_counter += 1 diff --git a/sebs/azure/function.py b/sebs/azure/function_app.py similarity index 90% rename from sebs/azure/function.py rename to sebs/azure/function_app.py index 4f0a9671..bf86df8e 100644 --- a/sebs/azure/function.py +++ b/sebs/azure/function_app.py @@ -2,7 +2,7 @@ from sebs.faas.benchmark import Function -class AzureFunction(Function): +class FunctionApp(Function): def __init__( self, name: str, @@ -34,3 +34,9 @@ def deserialize(cached_config: dict) -> Function: assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) return ret + +class AzureFunction(FunctionApp): + pass + +class AzureWorkflow(FunctionApp): + pass \ No newline at end of file diff --git a/sebs/azure/workflow.py b/sebs/azure/workflow.py deleted file mode 100644 index 353fb3c4..00000000 --- a/sebs/azure/workflow.py +++ /dev/null @@ -1,36 +0,0 @@ -from sebs.azure.config import AzureResources -from sebs.faas.benchmark import Workflow - - -class AzureWorkflow(Workflow): - def __init__( - self, - name: str, - benchmark: str, - code_hash: str, - function_storage: AzureResources.Storage, - ): - super().__init__(benchmark, name, code_hash) - self.function_storage = function_storage - - def serialize(self) -> dict: - return { - **super().serialize(), - "function_storage": self.function_storage.serialize(), - } - - @staticmethod - def deserialize(cached_config: dict) -> Workflow: - ret = AzureWorkflow( - cached_config["name"], - cached_config["code_package"], - cached_config["hash"], - AzureResources.Storage.deserialize(cached_config["function_storage"]), - ) - from sebs.azure.triggers import HTTPTrigger - - for trigger in cached_config["triggers"]: - trigger_type = {"HTTP": HTTPTrigger}.get(trigger["type"]) - assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) - ret.add_trigger(trigger_type.deserialize(trigger)) - return ret From 2894b624a954c6598b44c00e62061e3cb8a68645 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Tue, 26 Apr 2022 11:49:40 +0200 Subject: [PATCH 50/68] Linting --- sebs/aws/generator.py | 4 ++-- sebs/faas/fsm.py | 5 ++--- sebs/gcp/generator.py | 4 ++-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/sebs/aws/generator.py b/sebs/aws/generator.py index 579fa16d..ec434b02 100644 --- a/sebs/aws/generator.py +++ b/sebs/aws/generator.py @@ -1,11 +1,11 @@ -import json from typing import Dict, List, Union import numbers -from sebs.faas.fsm import * +from sebs.faas.fsm import State, Task, Switch, Map class SFNGenerator(Generator): + def __init__(self, func_arns: Dict[str, str]): super().__init__() self._func_arns = func_arns diff --git a/sebs/faas/fsm.py b/sebs/faas/fsm.py index b3d183be..fb46c1ef 100644 --- a/sebs/faas/fsm.py +++ b/sebs/faas/fsm.py @@ -1,7 +1,6 @@ from abc import ABC from abc import abstractmethod -from typing import Iterator, Optional, List, Callable, Union -from enum import Enum +from typing import Optional, List, Callable, Union import json @@ -49,7 +48,7 @@ def __init__(self, self.next = next @staticmethod - def deserialize(payload: dict) -> "Case": + def deserialize(payload: dict) -> "Switch.Case": return Switch.Case(**payload) def __init__(self, diff --git a/sebs/gcp/generator.py b/sebs/gcp/generator.py index 4e792262..2389cf08 100644 --- a/sebs/gcp/generator.py +++ b/sebs/gcp/generator.py @@ -1,11 +1,11 @@ -import json import uuid from typing import Dict, Union, List -from sebs.faas.fsm import * +from sebs.faas.fsm import State, Task, Switch, Map class GCPGenerator(Generator): + def __init__(self, workflow_name: str, func_triggers: Dict[str, str]): super().__init__() self._workflow_name = workflow_name From 9ea8d39a47506832230900724e324be7726c67d3 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Tue, 26 Apr 2022 12:03:34 +0200 Subject: [PATCH 51/68] Linting 2 --- sebs/aws/aws.py | 1 - sebs/aws/generator.py | 4 ++-- sebs/azure/azure.py | 4 ++-- sebs/cache.py | 2 +- sebs/gcp/generator.py | 4 ++-- sebs/utils.py | 6 +++--- 6 files changed, 10 insertions(+), 11 deletions(-) diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 61aaeaea..65c8dd8d 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -3,7 +3,6 @@ import re import shutil import time -import json import uuid from typing import cast, Dict, List, Optional, Tuple, Type, Union # noqa diff --git a/sebs/aws/generator.py b/sebs/aws/generator.py index ec434b02..1a736a98 100644 --- a/sebs/aws/generator.py +++ b/sebs/aws/generator.py @@ -1,10 +1,10 @@ from typing import Dict, List, Union import numbers -from sebs.faas.fsm import State, Task, Switch, Map +from sebs.faas.fsm import Generator, State, Task, Switch, Map -class SFNGenerator(Generator): +class SFNGenerator(Generator): def __init__(self, func_arns: Dict[str, str]): super().__init__() diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index 659f9439..e740b2d0 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -145,7 +145,7 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b code_package.path, "definition.json") if not os.path.exists(src_path): raise ValueError( - f"No workflow definition found for {workflow_name}") + f"No workflow definition found in {directory}") dst_path = os.path.join(directory, "definition.json") shutil.copy2(src_path, dst_path) @@ -294,7 +294,7 @@ def update_benchmark(self, benchmark: Benchmark, code_package: CodePackage): trigger = HTTPTrigger( url, self.config.resources.data_storage_account(self.cli_instance)) trigger.logging_handlers = self.logging_handlers - function.add_trigger(trigger) + benchmark.add_trigger(trigger) def _mount_function_code(self, code_package: CodePackage): self.cli_instance.upload_package( diff --git a/sebs/cache.py b/sebs/cache.py index 0a7dbaa9..42adc8b6 100644 --- a/sebs/cache.py +++ b/sebs/cache.py @@ -290,7 +290,7 @@ def add_benchmark( json.dump(config, fp, indent=2) else: raise RuntimeError( - "Can't cache benchmark {} for a non-existing code package!".format(function.name) + "Can't cache benchmark {} for a non-existing code package!".format(benchmark.name) ) def update_benchmark(self, benchmark: "Benchmark"): diff --git a/sebs/gcp/generator.py b/sebs/gcp/generator.py index 2389cf08..83ef9f14 100644 --- a/sebs/gcp/generator.py +++ b/sebs/gcp/generator.py @@ -1,10 +1,10 @@ import uuid from typing import Dict, Union, List -from sebs.faas.fsm import State, Task, Switch, Map +from sebs.faas.fsm import Generator, State, Task, Switch, Map -class GCPGenerator(Generator): +class GCPGenerator(Generator): def __init__(self, workflow_name: str, func_triggers: Dict[str, str]): super().__init__() diff --git a/sebs/utils.py b/sebs/utils.py index da059869..16f6dea2 100644 --- a/sebs/utils.py +++ b/sebs/utils.py @@ -99,9 +99,9 @@ def replace_string_in_file(path: str, from_str: str, to_str: str): def connect_to_redis_cache(host: str): redis = Redis(host=host, - port=6379, - decode_responses=True, - socket_connect_timeout=10) + port=6379, + decode_responses=True, + socket_connect_timeout=10) redis.ping() return redis From 89dafe48166e57fdced845a490769226265276d1 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Tue, 26 Apr 2022 15:00:40 +0200 Subject: [PATCH 52/68] Linting 3 --- sebs/aws/aws.py | 113 +++++++------ sebs/aws/config.py | 75 +++++---- sebs/aws/function.py | 4 +- sebs/aws/generator.py | 31 ++-- sebs/aws/s3.py | 12 +- sebs/aws/triggers.py | 27 ++-- sebs/aws/workflow.py | 10 +- sebs/azure/azure.py | 138 +++++++++------- sebs/azure/blob_storage.py | 8 +- sebs/azure/cli.py | 4 +- sebs/azure/config.py | 64 ++++++-- sebs/azure/function_app.py | 4 +- sebs/azure/triggers.py | 4 +- sebs/cache.py | 46 ++++-- sebs/code_package.py | 51 ++++-- sebs/config.py | 28 ++-- sebs/experiments/environment.py | 16 +- sebs/experiments/eviction_model.py | 24 ++- sebs/experiments/invocation_overhead.py | 62 +++++-- sebs/experiments/network_ping_pong.py | 16 +- sebs/experiments/perf_cost.py | 38 +++-- sebs/experiments/result.py | 8 +- sebs/faas/benchmark.py | 42 +++-- sebs/faas/config.py | 8 +- sebs/faas/fsm.py | 54 ++----- sebs/faas/storage.py | 20 ++- sebs/faas/system.py | 44 +++-- sebs/gcp/config.py | 42 +++-- sebs/gcp/function.py | 4 +- sebs/gcp/gcp.py | 204 ++++++++++++++---------- sebs/gcp/generator.py | 89 ++++------- sebs/gcp/storage.py | 12 +- sebs/gcp/triggers.py | 14 +- sebs/gcp/workflow.py | 4 +- sebs/local/config.py | 4 +- sebs/local/deployment.py | 6 +- sebs/local/function.py | 7 +- sebs/local/local.py | 26 ++- sebs/local/storage.py | 21 ++- sebs/regression.py | 30 +++- sebs/sebs.py | 8 +- sebs/utils.py | 21 ++- 42 files changed, 885 insertions(+), 558 deletions(-) diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 65c8dd8d..eab47b33 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -138,7 +138,9 @@ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: benchmark: benchmark name """ - def package_code(self, code_package: CodePackage, directory: str, is_workflow: bool) -> Tuple[str, int]: + def package_code( + self, code_package: CodePackage, directory: str, is_workflow: bool + ) -> Tuple[str, int]: CONFIG_FILES = { "python": ["handler.py", "requirements.txt", ".python_packages"], "nodejs": ["handler.js", "package.json", "node_modules"], @@ -152,8 +154,12 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b file = os.path.join(directory, file) shutil.move(file, function_dir) - handler_path = os.path.join(directory, CONFIG_FILES[code_package.language_name][0]) - replace_string_in_file(handler_path, "{{REDIS_HOST}}", f"\"{self.config.redis_host}\"") + handler_path = os.path.join( + directory, CONFIG_FILES[code_package.language_name][0] + ) + replace_string_in_file( + handler_path, "{{REDIS_HOST}}", f'"{self.config.redis_host}"' + ) # For python, add an __init__ file if code_package.language_name == "python": @@ -163,13 +169,15 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b # FIXME: use zipfile # create zip with hidden directory but without parent directory - execute("zip -qu -r9 {}.zip * .".format(code_package.name), - shell=True, cwd=directory) + execute( + "zip -qu -r9 {}.zip * .".format(code_package.name), + shell=True, + cwd=directory, + ) benchmark_archive = "{}.zip".format(os.path.join(directory, code_package.name)) self.logging.info("Created {} archive".format(benchmark_archive)) - bytes_size = os.path.getsize( - os.path.join(directory, benchmark_archive)) + bytes_size = os.path.getsize(os.path.join(directory, benchmark_archive)) mbytes = bytes_size / 1024.0 / 1024.0 self.logging.info("Zip archive size {:2f} MB".format(mbytes)) @@ -195,10 +203,13 @@ def wait_for_function(self, func_name: str): if backoff_delay > 60: self.logging.error( - f"Function {func_name} stuck in state {state} after 60s") + f"Function {func_name} stuck in state {state} after 60s" + ) break - def create_function(self, code_package: CodePackage, func_name: str) -> "LambdaFunction": + def create_function( + self, code_package: CodePackage, func_name: str + ) -> "LambdaFunction": package = code_package.code_location benchmark = code_package.name language = code_package.language_name @@ -215,8 +226,7 @@ def create_function(self, code_package: CodePackage, func_name: str) -> "LambdaF try: ret = self.lambda_client.get_function(FunctionName=func_name) self.logging.info( - "Function {} exists on AWS, retrieve configuration.".format( - func_name) + "Function {} exists on AWS, retrieve configuration.".format(func_name) ) # Here we assume a single Lambda role lambda_function = LambdaFunction( @@ -233,8 +243,7 @@ def create_function(self, code_package: CodePackage, func_name: str) -> "LambdaF lambda_function.updated_code = True # TODO: get configuration of REST API except self.lambda_client.exceptions.ResourceNotFoundException: - self.logging.info( - "Creating function {} from {}".format(func_name, package)) + self.logging.info("Creating function {} from {}".format(func_name, package)) # AWS Lambda limit on zip deployment size # Limit to 50 MB @@ -250,9 +259,9 @@ def create_function(self, code_package: CodePackage, func_name: str) -> "LambdaF code_bucket, idx = storage_client.add_input_bucket(benchmark) storage_client.upload(code_bucket, package, code_package_name) self.logging.info( - "Uploading function {} code to {}".format(func_name, code_bucket)) - code_config = {"S3Bucket": code_bucket, - "S3Key": code_package_name} + "Uploading function {} code to {}".format(func_name, code_bucket) + ) + code_config = {"S3Bucket": code_bucket, "S3Key": code_package_name} ret = self.lambda_client.create_function( FunctionName=func_name, Runtime="{}{}".format(language, language_runtime), @@ -318,7 +327,8 @@ def update_function(self, function: Function, code_package: CodePackage): if code_size < 50 * 1024 * 1024: with open(package, "rb") as code_body: self.lambda_client.update_function_code( - FunctionName=name, ZipFile=code_body.read()) + FunctionName=name, ZipFile=code_body.read() + ) # Upload code package to S3, then update else: code_package_name = os.path.basename(package) @@ -338,15 +348,16 @@ def update_function(self, function: Function, code_package: CodePackage): ) self.logging.info("Published new function code") - def create_function_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: + def create_function_trigger( + self, func: Function, trigger_type: Trigger.TriggerType + ) -> Trigger: from sebs.aws.triggers import HTTPTrigger function = cast(LambdaFunction, func) if trigger_type == Trigger.TriggerType.HTTP: api_name = "{}-http-api".format(function.name) - http_api = self.config.resources.http_api( - api_name, function, self.session) + http_api = self.config.resources.http_api(api_name, function, self.session) # https://aws.amazon.com/blogs/compute/announcing-http-apis-for-amazon-api-gateway/ # but this is wrong - source arn must be {api-arn}/*/* self.get_lambda_client().add_permission( @@ -368,21 +379,24 @@ def create_function_trigger(self, func: Function, trigger_type: Trigger.TriggerT self.cache_client.update_benchmark(function) return trigger - def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "SFNWorkflow": + def create_workflow( + self, code_package: CodePackage, workflow_name: str + ) -> "SFNWorkflow": workflow_name = AWS.format_resource_name(workflow_name) # Make sure we have a valid workflow benchmark - definition_path = os.path.join( - code_package.path, "definition.json") + definition_path = os.path.join(code_package.path, "definition.json") if not os.path.exists(definition_path): - raise ValueError( - f"No workflow definition found for {workflow_name}") + raise ValueError(f"No workflow definition found for {workflow_name}") # First we create a lambda function for each code file code_files = list(code_package.get_code_files(include_config=False)) func_names = [os.path.splitext(os.path.basename(p))[0] for p in code_files] - funcs = [self.create_function(code_package, workflow_name+"___"+fn) for fn in func_names] + funcs = [ + self.create_function(code_package, workflow_name + "___" + fn) + for fn in func_names + ] # Generate workflow definition.json gen = SFNGenerator({n: f.arn for (n, f) in zip(func_names, funcs)}) @@ -401,30 +415,28 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "SFN ) self.logging.info( - "Creating workflow {} from {}".format(workflow_name, package)) + "Creating workflow {} from {}".format(workflow_name, package) + ) workflow = SFNWorkflow( workflow_name, funcs, code_package.name, ret["stateMachineArn"], - code_package.hash + code_package.hash, ) except self.sfn_client.exceptions.StateMachineAlreadyExists as e: arn = re.search("'([^']*)'", str(e)).group()[1:-1] self.logging.info( "Workflow {} exists on AWS, retrieve configuration.".format( - workflow_name) + workflow_name + ) ) # Here we assume a single Lambda role workflow = SFNWorkflow( - workflow_name, - funcs, - code_package.name, - arn, - code_package.hash + workflow_name, funcs, code_package.name, arn, code_package.hash ) self.update_workflow(workflow, code_package) @@ -443,16 +455,17 @@ def update_workflow(self, workflow: Workflow, code_package: CodePackage): workflow = cast(SFNWorkflow, workflow) # Make sure we have a valid workflow benchmark - definition_path = os.path.join( - code_package.path, "definition.json") + definition_path = os.path.join(code_package.path, "definition.json") if not os.path.exists(definition_path): - raise ValueError( - f"No workflow definition found for {workflow.name}") + raise ValueError(f"No workflow definition found for {workflow.name}") # Create or update lambda function for each code file code_files = list(code_package.get_code_files(include_config=False)) func_names = [os.path.splitext(os.path.basename(p))[0] for p in code_files] - funcs = [self.create_function(code_package, workflow.name+"___"+fn) for fn in func_names] + funcs = [ + self.create_function(code_package, workflow.name + "___" + fn) + for fn in func_names + ] # Generate workflow definition.json gen = SFNGenerator({n: f.arn for (n, f) in zip(func_names, funcs)}) @@ -467,7 +480,9 @@ def update_workflow(self, workflow: Workflow, code_package: CodePackage): workflow.functions = funcs self.logging.info("Published new workflow code") - def create_workflow_trigger(self, workflow: Workflow, trigger_type: Trigger.TriggerType) -> Trigger: + def create_workflow_trigger( + self, workflow: Workflow, trigger_type: Trigger.TriggerType + ) -> Trigger: workflow = cast(SFNWorkflow, workflow) if trigger_type == Trigger.TriggerType.HTTP: @@ -547,12 +562,12 @@ def parse_aws_report( return request_id output = requests[request_id] output.request_id = request_id - output.provider_times.execution = int( - float(aws_vals["Duration"]) * 1000) + output.provider_times.execution = int(float(aws_vals["Duration"]) * 1000) output.stats.memory_used = float(aws_vals["Max Memory Used"]) if "Init Duration" in aws_vals: output.provider_times.initialization = int( - float(aws_vals["Init Duration"]) * 1000) + float(aws_vals["Init Duration"]) * 1000 + ) output.billing.billed_time = int(aws_vals["Billed Duration"]) output.billing.memory = int(aws_vals["Memory Size"]) output.billing.gb_seconds = output.billing.billed_time * output.billing.memory @@ -586,14 +601,12 @@ def get_invocation_error(self, function_name: str, start_time: int, end_time: in time.sleep(5) response = self.logs_client.get_query_results(queryId=query_id) if len(response["results"]) == 0: - self.logging.info( - "AWS logs are not yet available, repeat after 15s...") + self.logging.info("AWS logs are not yet available, repeat after 15s...") time.sleep(15) response = None else: break - self.logging.error( - f"Invocation error for AWS Lambda function {function_name}") + self.logging.error(f"Invocation error for AWS Lambda function {function_name}") for message in response["results"]: for value in message: if value["field"] == "@message": @@ -640,8 +653,7 @@ def download_metrics( for val in results: for result_part in val: if result_part["field"] == "@message": - request_id = AWS.parse_aws_report( - result_part["value"], requests) + request_id = AWS.parse_aws_report(result_part["value"], requests) if request_id in requests: results_processed += 1 requests_ids.remove(request_id) @@ -656,8 +668,7 @@ def _enforce_cold_start(self, function: Function): FunctionName=func.name, Timeout=func.timeout, MemorySize=func.memory, - Environment={"Variables": { - "ForceColdStart": str(self.cold_start_counter)}}, + Environment={"Variables": {"ForceColdStart": str(self.cold_start_counter)}}, ) def enforce_cold_start(self, functions: List[Function], code_package: CodePackage): diff --git a/sebs/aws/config.py b/sebs/aws/config.py index 40c359d5..53c8bd67 100644 --- a/sebs/aws/config.py +++ b/sebs/aws/config.py @@ -39,7 +39,9 @@ def initialize(dct: dict) -> Credentials: return AWSCredentials(dct["access_key"], dct["secret_key"]) @staticmethod - def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: + def deserialize( + config: dict, cache: Cache, handlers: LoggingHandlers + ) -> Credentials: # FIXME: update return types of both functions to avoid cast # needs 3.7+ to support annotations @@ -47,15 +49,17 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden ret: AWSCredentials # Load cached values if cached_config and "credentials" in cached_config: - ret = cast(AWSCredentials, AWSCredentials.initialize( - cached_config["credentials"])) + ret = cast( + AWSCredentials, AWSCredentials.initialize(cached_config["credentials"]) + ) ret.logging_handlers = handlers ret.logging.info("Using cached credentials for AWS") else: # Check for new config if "credentials" in config: - ret = cast(AWSCredentials, AWSCredentials.initialize( - config["credentials"])) + ret = cast( + AWSCredentials, AWSCredentials.initialize(config["credentials"]) + ) elif "AWS_ACCESS_KEY_ID" in os.environ: ret = AWSCredentials( os.environ["AWS_ACCESS_KEY_ID"], os.environ["AWS_SECRET_ACCESS_KEY"] @@ -66,16 +70,17 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden "up environmental variables AWS_ACCESS_KEY_ID and " "AWS_SECRET_ACCESS_KEY" ) - ret.logging.info( - "No cached credentials for AWS found, initialize!") + ret.logging.info("No cached credentials for AWS found, initialize!") ret.logging_handlers = handlers return ret def update_cache(self, cache: Cache): - cache.update_config(val=self.access_key, keys=[ - "aws", "credentials", "access_key"]) - cache.update_config(val=self.secret_key, keys=[ - "aws", "credentials", "secret_key"]) + cache.update_config( + val=self.access_key, keys=["aws", "credentials", "access_key"] + ) + cache.update_config( + val=self.secret_key, keys=["aws", "credentials", "secret_key"] + ) def serialize(self) -> dict: out = {"access_key": self.access_key, "secret_key": self.secret_key} @@ -127,10 +132,7 @@ def lambda_role(self, boto3_session: boto3.session.Session) -> str: "Sid": "", "Effect": "Allow", "Principal": { - "Service": [ - "lambda.amazonaws.com", - "states.amazonaws.com" - ] + "Service": ["lambda.amazonaws.com", "states.amazonaws.com"] }, "Action": "sts:AssumeRole", } @@ -142,13 +144,12 @@ def lambda_role(self, boto3_session: boto3.session.Session) -> str: "arn:aws:iam::aws:policy/CloudWatchLogsFullAccess", "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole", "arn:aws:iam::aws:policy/service-role/AWSLambdaRole", - "arn:aws:iam::aws:policy/AWSXRayDaemonWriteAccess" + "arn:aws:iam::aws:policy/AWSXRayDaemonWriteAccess", ] try: out = iam_client.get_role(RoleName=role_name) self._lambda_role = out["Role"]["Arn"] - self.logging.info( - f"AWS: Selected {self._lambda_role} IAM role") + self.logging.info(f"AWS: Selected {self._lambda_role} IAM role") except iam_client.exceptions.NoSuchEntityException: out = iam_client.create_role( RoleName=role_name, @@ -162,8 +163,7 @@ def lambda_role(self, boto3_session: boto3.session.Session) -> str: time.sleep(10) # Attach basic AWS Lambda and S3 policies. for policy in attached_policies: - iam_client.attach_role_policy( - RoleName=role_name, PolicyArn=policy) + iam_client.attach_role_policy(RoleName=role_name, PolicyArn=policy) return self._lambda_role def http_api( @@ -215,16 +215,20 @@ def initialize(dct: dict) -> Resources: def serialize(self) -> dict: out = { "lambda-role": self._lambda_role, - "http-apis": {key: value.serialize() for (key, value) in self._http_apis.items()}, + "http-apis": { + key: value.serialize() for (key, value) in self._http_apis.items() + }, } return out def update_cache(self, cache: Cache): - cache.update_config(val=self._lambda_role, keys=[ - "aws", "resources", "lambda-role"]) + cache.update_config( + val=self._lambda_role, keys=["aws", "resources", "lambda-role"] + ) for name, api in self._http_apis.items(): - cache.update_config(val=api.serialize(), keys=[ - "aws", "resources", "http-apis", name]) + cache.update_config( + val=api.serialize(), keys=["aws", "resources", "http-apis", name] + ) @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: @@ -233,18 +237,19 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour ret: AWSResources # Load cached values if cached_config and "resources" in cached_config: - ret = cast(AWSResources, AWSResources.initialize( - cached_config["resources"])) + ret = cast( + AWSResources, AWSResources.initialize(cached_config["resources"]) + ) ret.logging_handlers = handlers ret.logging.info("Using cached resources for AWS") else: # Check for new config if "resources" in config: - ret = cast(AWSResources, AWSResources.initialize( - config["resources"])) + ret = cast(AWSResources, AWSResources.initialize(config["resources"])) ret.logging_handlers = handlers ret.logging.info( - "No cached resources for AWS found, using user configuration.") + "No cached resources for AWS found, using user configuration." + ) else: ret = AWSResources(lambda_role="") ret.logging_handlers = handlers @@ -288,9 +293,11 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config cached_config = cache.get_config("aws") # FIXME: use future annotations (see sebs/faas/system) credentials = cast( - AWSCredentials, AWSCredentials.deserialize(config, cache, handlers)) - resources = cast(AWSResources, AWSResources.deserialize( - config, cache, handlers)) + AWSCredentials, AWSCredentials.deserialize(config, cache, handlers) + ) + resources = cast( + AWSResources, AWSResources.deserialize(config, cache, handlers) + ) config_obj = AWSConfig(credentials, resources) config_obj.logging_handlers = handlers # Load cached values @@ -322,6 +329,6 @@ def serialize(self) -> dict: "region": self._region, "credentials": self._credentials.serialize(), "resources": self._resources.serialize(), - "redis_host": self._redis_host + "redis_host": self._redis_host, } return out diff --git a/sebs/aws/function.py b/sebs/aws/function.py index 20816745..a3d77d54 100644 --- a/sebs/aws/function.py +++ b/sebs/aws/function.py @@ -59,7 +59,9 @@ def deserialize(cached_config: dict) -> "LambdaFunction": for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": FunctionLibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), + {"Library": FunctionLibraryTrigger, "HTTP": HTTPTrigger}.get( + trigger["type"] + ), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) diff --git a/sebs/aws/generator.py b/sebs/aws/generator.py index 1a736a98..d00cfe2b 100644 --- a/sebs/aws/generator.py +++ b/sebs/aws/generator.py @@ -5,27 +5,22 @@ class SFNGenerator(Generator): - def __init__(self, func_arns: Dict[str, str]): super().__init__() self._func_arns = func_arns - def postprocess(self, states: List[State], payloads: List[dict]) -> dict: payloads = super().postprocess(states, payloads) definition = { "Comment": "SeBS auto-generated benchmark", "StartAt": self.root.name, - "States": payloads + "States": payloads, } return definition def encode_task(self, state: Task) -> Union[dict, List[dict]]: - payload = { - "Type": "Task", - "Resource": self._func_arns[state.func_name] - } + payload = {"Type": "Task", "Resource": self._func_arns[state.func_name]} if state.next: payload["Next"] = state.next @@ -36,11 +31,7 @@ def encode_task(self, state: Task) -> Union[dict, List[dict]]: def encode_switch(self, state: Switch) -> Union[dict, List[dict]]: choises = [self._encode_case(c) for c in state.cases] - return { - "Type": "Choice", - "Choices": choises, - "Default": state.default - } + return {"Type": "Choice", "Choices": choises, "Default": state.default} def _encode_case(self, case: Switch.Case) -> dict: type = "Numeric" if isinstance(case.val, numbers.Number) else "String" @@ -49,30 +40,26 @@ def _encode_case(self, case: Switch.Case) -> dict: "<=": "LessThanEquals", "==": "Equals", ">=": "GreaterThanEquals", - ">": "GreaterThan" + ">": "GreaterThan", } cond = type + comp[case.op] - return { - "Variable": "$." + case.var, - cond: case.val, - "Next": case.next - } + return {"Variable": "$." + case.var, cond: case.val, "Next": case.next} def encode_map(self, state: Map) -> Union[dict, List[dict]]: payload = { "Type": "Map", - "ItemsPath": "$."+state.array, + "ItemsPath": "$." + state.array, "Iterator": { "StartAt": "func", "States": { "func": { "Type": "Task", "Resource": self._func_arns[state.func_name], - "End": True + "End": True, } - } - } + }, + }, } if state.next: diff --git a/sebs/aws/s3.py b/sebs/aws/s3.py index e47bd77f..72560717 100644 --- a/sebs/aws/s3.py +++ b/sebs/aws/s3.py @@ -49,7 +49,9 @@ def _create_bucket(self, name: str, buckets: List[str] = []): for bucket_name in buckets: if name in bucket_name: self.logging.info( - "Bucket {} for {} already exists, skipping.".format(bucket_name, name) + "Bucket {} for {} already exists, skipping.".format( + bucket_name, name + ) ) return bucket_name random_name = str(uuid.uuid4())[0:16] @@ -66,7 +68,9 @@ def _create_bucket(self, name: str, buckets: List[str] = []): self.client.create_bucket(Bucket=bucket_name) self.logging.info("Created bucket {}".format(bucket_name)) except self.client.exceptions.BucketAlreadyExists as e: - self.logging.error(f"The bucket {bucket_name} exists already in region {self.region}!") + self.logging.error( + f"The bucket {bucket_name} exists already in region {self.region}!" + ) raise e except self.client.exceptions.ClientError as e: self.logging.error( @@ -114,7 +118,9 @@ def list_bucket(self, bucket_name: str): def list_buckets(self, bucket_name: str) -> List[str]: s3_buckets = self.client.list_buckets()["Buckets"] - return [bucket["Name"] for bucket in s3_buckets if bucket_name in bucket["Name"]] + return [ + bucket["Name"] for bucket in s3_buckets if bucket_name in bucket["Name"] + ] def clean_bucket(self, bucket: str): objects = self.client.list_objects_v2(Bucket=bucket) diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index 53a47db6..e368a641 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -50,22 +50,21 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: serialized_payload = json.dumps(payload).encode("utf-8") client = self.deployment_client.get_lambda_client() begin = datetime.datetime.now() - ret = client.invoke(FunctionName=self.name, - Payload=serialized_payload, LogType="Tail") + ret = client.invoke( + FunctionName=self.name, Payload=serialized_payload, LogType="Tail" + ) end = datetime.datetime.now() aws_result = ExecutionResult.from_times(begin, end) aws_result.request_id = ret["ResponseMetadata"]["RequestId"] if ret["StatusCode"] != 200: self.logging.error("Invocation of {} failed!".format(self.name)) - self.logging.error("Input: {}".format( - serialized_payload.decode("utf-8"))) + self.logging.error("Input: {}".format(serialized_payload.decode("utf-8"))) aws_result.stats.failure = True return aws_result if "FunctionError" in ret: self.logging.error("Invocation of {} failed!".format(self.name)) - self.logging.error("Input: {}".format( - serialized_payload.decode("utf-8"))) + self.logging.error("Input: {}".format(serialized_payload.decode("utf-8"))) aws_result.stats.failure = True return aws_result self.logging.debug(f"Invoke of function {self.name} was successful") @@ -79,8 +78,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: if isinstance(function_output["body"], dict): aws_result.parse_benchmark_output(function_output["body"]) else: - aws_result.parse_benchmark_output( - json.loads(function_output["body"])) + aws_result.parse_benchmark_output(json.loads(function_output["body"])) return aws_result def async_invoke(self, payload: dict): @@ -95,10 +93,8 @@ def async_invoke(self, payload: dict): LogType="Tail", ) if ret["StatusCode"] != 202: - self.logging.error( - "Async invocation of {} failed!".format(self.name)) - self.logging.error("Input: {}".format( - serialized_payload.decode("utf-8"))) + self.logging.error("Async invocation of {} failed!".format(self.name)) + self.logging.error("Input: {}".format(serialized_payload.decode("utf-8"))) raise RuntimeError() return ret @@ -111,7 +107,8 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: client = self.deployment_client.get_sfn_client() begin = datetime.datetime.now() ret = client.start_execution( - stateMachineArn=self.name, input=json.dumps(payload)) + stateMachineArn=self.name, input=json.dumps(payload) + ) end = datetime.datetime.now() aws_result = ExecutionResult.from_times(begin, end) @@ -121,7 +118,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Wait for execution to finish, then print results. execution_finished = False backoff_delay = 1 # Start wait with delay of 1 second - while (not execution_finished): + while not execution_finished: execution = client.describe_execution(executionArn=execution_arn) status = execution["status"] execution_finished = status != "RUNNING" @@ -141,7 +138,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: def async_invoke(self, payload: dict): - raise NotImplementedError('Async invocation is not implemented') + raise NotImplementedError("Async invocation is not implemented") class HTTPTrigger(Trigger): diff --git a/sebs/aws/workflow.py b/sebs/aws/workflow.py index 09debdce..3489a469 100644 --- a/sebs/aws/workflow.py +++ b/sebs/aws/workflow.py @@ -12,7 +12,7 @@ def __init__( functions: List[LambdaFunction], benchmark: str, arn: str, - code_package_hash: str + code_package_hash: str, ): super().__init__(benchmark, name, code_package_hash) self.functions = functions @@ -26,7 +26,7 @@ def serialize(self) -> dict: return { **super().serialize(), "functions": [f.serialize() for f in self.functions], - "arn": self.arn + "arn": self.arn, } @staticmethod @@ -40,12 +40,14 @@ def deserialize(cached_config: dict) -> "SFNWorkflow": funcs, cached_config["code_package"], cached_config["arn"], - cached_config["hash"] + cached_config["hash"], ) for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": WorkflowLibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), + {"Library": WorkflowLibraryTrigger, "HTTP": HTTPTrigger}.get( + trigger["type"] + ), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index e740b2d0..4ac8fed3 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -103,7 +103,8 @@ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: self.config.region, self.cache_client, self.config.resources.data_storage_account( - self.cli_instance).connection_string, + self.cli_instance + ).connection_string, replace_existing=replace_existing, ) self.storage.logging_handlers = self.logging_handlers @@ -119,7 +120,9 @@ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: # - function.json # host.json # requirements.txt/package.json - def package_code(self, code_package: CodePackage, directory: str, is_workflow: bool) -> Tuple[str, int]: + def package_code( + self, code_package: CodePackage, directory: str, is_workflow: bool + ) -> Tuple[str, int]: # In previous step we ran a Docker container which installed packages # Python packages are in .python_packages because this is expected by Azure @@ -130,7 +133,7 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b } WRAPPER_FILES = { "python": ["handler.py", "storage.py", "fsm.py"], - "nodejs": ["handler.js", "storage.js"] + "nodejs": ["handler.js", "storage.js"], } file_type = FILES[code_package.language_name] package_config = CONFIG_FILES[code_package.language_name] @@ -141,11 +144,9 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b os.rename(main_path, os.path.join(directory, "main.py")) # Make sure we have a valid workflow benchmark - src_path = os.path.join( - code_package.path, "definition.json") + src_path = os.path.join(code_package.path, "definition.json") if not os.path.exists(src_path): - raise ValueError( - f"No workflow definition found in {directory}") + raise ValueError(f"No workflow definition found in {directory}") dst_path = os.path.join(directory, "definition.json") shutil.copy2(src_path, dst_path) @@ -154,10 +155,15 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b # TODO: extension to other triggers than HTTP main_bindings = [ - {"name": "req", "type": "httpTrigger", "direction": "in", - "authLevel": "function", "methods": ["post"]}, + { + "name": "req", + "type": "httpTrigger", + "direction": "in", + "authLevel": "function", + "methods": ["post"], + }, {"name": "starter", "type": "durableClient", "direction": "in"}, - {"name": "$return", "type": "http", "direction": "out"} + {"name": "$return", "type": "http", "direction": "out"}, ] activity_bindings = [ {"name": "event", "type": "activityTrigger", "direction": "in"}, @@ -167,10 +173,7 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b ] if is_workflow: - bindings = { - "main": main_bindings, - "run_workflow": orchestrator_bindings - } + bindings = {"main": main_bindings, "run_workflow": orchestrator_bindings} else: bindings = {"function": main_bindings} @@ -196,13 +199,17 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b payload = { "bindings": bindings.get(name, activity_bindings), "scriptFile": script_file, - "disabled": False + "disabled": False, } dst_json = os.path.join(os.path.dirname(dst_file), "function.json") json.dump(payload, open(dst_json, "w"), indent=2) - handler_path = os.path.join(directory, WRAPPER_FILES[code_package.language_name][0]) - replace_string_in_file(handler_path, "{{REDIS_HOST}}", f"\"{self.config.redis_host}\"") + handler_path = os.path.join( + directory, WRAPPER_FILES[code_package.language_name][0] + ) + replace_string_in_file( + handler_path, "{{REDIS_HOST}}", f'"{self.config.redis_host}"' + ) # copy every wrapper file to respective function dirs for wrapper_file in wrapper_files: @@ -217,15 +224,17 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b "version": "2.0", "extensionBundle": { "id": "Microsoft.Azure.Functions.ExtensionBundle", - "version": "[2.*, 3.0.0)" + "version": "[2.*, 3.0.0)", }, } - json.dump(host_json, open( - os.path.join(directory, "host.json"), "w"), indent=2) + json.dump(host_json, open(os.path.join(directory, "host.json"), "w"), indent=2) code_size = CodePackage.directory_size(directory) - execute("zip -qu -r9 {}.zip * .".format(code_package.name), - shell=True, cwd=directory) + execute( + "zip -qu -r9 {}.zip * .".format(code_package.name), + shell=True, + cwd=directory, + ) return directory, code_size def publish_benchmark( @@ -236,8 +245,7 @@ def publish_benchmark( ) -> str: success = False url = "" - self.logging.info( - "Attempting publish of {}".format(benchmark.name)) + self.logging.info("Attempting publish of {}".format(benchmark.name)) while not success: try: ret = self.cli_instance.execute( @@ -255,7 +263,8 @@ def publish_benchmark( break if url == "": raise RuntimeError( - "Couldnt find URL in {}".format(ret.decode("utf-8"))) + "Couldnt find URL in {}".format(ret.decode("utf-8")) + ) success = True except RuntimeError as e: error = str(e) @@ -292,13 +301,13 @@ def update_benchmark(self, benchmark: Benchmark, code_package: CodePackage): url = self.publish_benchmark(benchmark, code_package, True) trigger = HTTPTrigger( - url, self.config.resources.data_storage_account(self.cli_instance)) + url, self.config.resources.data_storage_account(self.cli_instance) + ) trigger.logging_handlers = self.logging_handlers benchmark.add_trigger(trigger) def _mount_function_code(self, code_package: CodePackage): - self.cli_instance.upload_package( - code_package.code_location, "/mnt/function/") + self.cli_instance.upload_package(code_package.code_location, "/mnt/function/") def default_benchmark_name(self, code_package: CodePackage) -> str: """ @@ -316,11 +325,13 @@ def default_benchmark_name(self, code_package: CodePackage) -> str: return func_name B = TypeVar("B", bound=FunctionApp) - def create_benchmark(self, code_package: CodePackage, name: str, benchmark_cls: B) -> B: + + def create_benchmark( + self, code_package: CodePackage, name: str, benchmark_cls: B + ) -> B: language = code_package.language_name language_runtime = code_package.language_version - resource_group = self.config.resources.resource_group( - self.cli_instance) + resource_group = self.config.resources.resource_group(self.cli_instance) region = self.config.region config = { @@ -344,17 +355,18 @@ def create_benchmark(self, code_package: CodePackage, name: str, benchmark_cls: for setting in json.loads(ret.decode()): if setting["name"] == "AzureWebJobsStorage": connection_string = setting["value"] - elems = [z for y in connection_string.split( - ";") for z in y.split("=")] + elems = [ + z for y in connection_string.split(";") for z in y.split("=") + ] account_name = elems[elems.index("AccountName") + 1] function_storage_account = AzureResources.Storage.from_cache( account_name, connection_string ) - self.logging.info( - "Azure: Selected {} function app".format(name)) + self.logging.info("Azure: Selected {} function app".format(name)) except RuntimeError: function_storage_account = self.config.resources.add_storage_account( - self.cli_instance) + self.cli_instance + ) config["storage_account"] = function_storage_account.account_name # FIXME: only Linux type is supported while True: @@ -369,8 +381,7 @@ def create_benchmark(self, code_package: CodePackage, name: str, benchmark_cls: " --name {name} --storage-account {storage_account}" ).format(**config) ) - self.logging.info( - "Azure: Created function app {}".format(name)) + self.logging.info("Azure: Created function app {}".format(name)) break except RuntimeError as e: # Azure does not allow some concurrent operations @@ -396,25 +407,29 @@ def create_benchmark(self, code_package: CodePackage, name: str, benchmark_cls: def cached_benchmark(self, benchmark: Benchmark): data_storage_account = self.config.resources.data_storage_account( - self.cli_instance) + self.cli_instance + ) for trigger in benchmark.triggers_all(): azure_trigger = cast(AzureTrigger, trigger) azure_trigger.logging_handlers = self.logging_handlers azure_trigger.data_storage_account = data_storage_account - def create_function(self, code_package: CodePackage, func_name: str) -> AzureFunction: + def create_function( + self, code_package: CodePackage, func_name: str + ) -> AzureFunction: return self.create_benchmark(code_package, func_name, AzureFunction) def update_function(self, function: Function, code_package: CodePackage): self.update_benchmark(function, code_package) - def create_workflow(self, code_package: CodePackage, workflow_name: str) -> AzureWorkflow: + def create_workflow( + self, code_package: CodePackage, workflow_name: str + ) -> AzureWorkflow: return self.create_benchmark(code_package, workflow_name, AzureWorkflow) def update_workflow(self, workflow: Workflow, code_package: CodePackage): self.update_benchmark(workflow, code_package) - """ Prepare Azure resources to store experiment results. Allocate one container. @@ -424,8 +439,7 @@ def update_workflow(self, workflow: Workflow, code_package: CodePackage): """ def prepare_experiment(self, benchmark: str): - logs_container = self.storage.add_output_bucket( - benchmark, suffix="logs") + logs_container = self.storage.add_output_bucket(benchmark, suffix="logs") return logs_container def download_metrics( @@ -437,16 +451,16 @@ def download_metrics( metrics: Dict[str, dict], ): - resource_group = self.config.resources.resource_group( - self.cli_instance) + resource_group = self.config.resources.resource_group(self.cli_instance) # Avoid warnings in the next step ret = self.cli_instance.execute( - "az feature register --name AIWorkspacePreview " "--namespace microsoft.insights" + "az feature register --name AIWorkspacePreview " + "--namespace microsoft.insights" ) app_id_query = self.cli_instance.execute( - ("az monitor app-insights component show " "--app {} --resource-group {}").format( - function_name, resource_group - ) + ( + "az monitor app-insights component show " "--app {} --resource-group {}" + ).format(function_name, resource_group) ).decode("utf-8") application_id = json.loads(app_id_query)["appId"] @@ -457,8 +471,9 @@ def download_metrics( start_time_str = datetime.datetime.fromtimestamp(start_time).strftime( "%Y-%m-%d %H:%M:%S.%f" ) - end_time_str = datetime.datetime.fromtimestamp( - end_time + 1).strftime("%Y-%m-%d %H:%M:%S") + end_time_str = datetime.datetime.fromtimestamp(end_time + 1).strftime( + "%Y-%m-%d %H:%M:%S" + ) from tzlocal import get_localzone timezone_str = datetime.datetime.now(get_localzone()).strftime("%z") @@ -498,7 +513,8 @@ def download_metrics( func_exec_time = request[-1] invocations_processed.add(invocation_id) requests[invocation_id].provider_times.execution = int( - float(func_exec_time) * 1000) + float(func_exec_time) * 1000 + ) self.logging.info( f"Azure: Found time metrics for {len(invocations_processed)} " f"out of {len(requests.keys())} invocations." @@ -506,15 +522,15 @@ def download_metrics( if len(invocations_processed) < len(requests.keys()): time.sleep(5) self.logging.info( - f"Missing the requests: {invocations_to_process - invocations_processed}") + f"Missing the requests: {invocations_to_process - invocations_processed}" + ) # TODO: query performance counters for mem def _enforce_cold_start(self, function: Function, code_package: CodePackage): fname = function.name - resource_group = self.config.resources.resource_group( - self.cli_instance) + resource_group = self.config.resources.resource_group(self.cli_instance) self.cli_instance.execute( f"az functionapp config appsettings set --name {fname} " @@ -537,10 +553,12 @@ def enforce_cold_start(self, functions: List[Function], code_package: CodePackag It is automatically created for each function. """ - def create_function_trigger(self, function: Function, - trigger_type: Trigger.TriggerType) -> Trigger: + def create_function_trigger( + self, function: Function, trigger_type: Trigger.TriggerType + ) -> Trigger: raise NotImplementedError() - def create_workflow_trigger(self, workflow: Workflow, - trigger_type: Trigger.TriggerType) -> Trigger: + def create_workflow_trigger( + self, workflow: Workflow, trigger_type: Trigger.TriggerType + ) -> Trigger: raise NotImplementedError() diff --git a/sebs/azure/blob_storage.py b/sebs/azure/blob_storage.py index cad108a8..e87d8d75 100644 --- a/sebs/azure/blob_storage.py +++ b/sebs/azure/blob_storage.py @@ -16,7 +16,9 @@ def typename() -> str: def deployment_name(): return "azure" - def __init__(self, region: str, cache_client: Cache, conn_string: str, replace_existing: bool): + def __init__( + self, region: str, cache_client: Cache, conn_string: str, replace_existing: bool + ): super().__init__(region, cache_client, replace_existing) self.client = BlobServiceClient.from_connection_string(conn_string) @@ -27,7 +29,9 @@ def __init__(self, region: str, cache_client: Cache, conn_string: str, replace_e def _create_bucket(self, name: str, containers: List[str] = []) -> str: for c in containers: if name in c: - self.logging.info("Container {} for {} already exists, skipping.".format(c, name)) + self.logging.info( + "Container {} for {} already exists, skipping.".format(c, name) + ) return c random_name = str(uuid.uuid4())[0:16] name = "{}-{}".format(name, random_name) diff --git a/sebs/azure/cli.py b/sebs/azure/cli.py index f98226e4..9d15eeb0 100644 --- a/sebs/azure/cli.py +++ b/sebs/azure/cli.py @@ -17,7 +17,9 @@ def __init__(self, system_config: SeBSConfig, docker_client: docker.client): except docker.errors.ImageNotFound: try: logging.info( - "Docker pull of image {repo}:{image}".format(repo=repo_name, image=image_name) + "Docker pull of image {repo}:{image}".format( + repo=repo_name, image=image_name + ) ) docker_client.images.pull(repo_name, image_name) except docker.errors.APIError: diff --git a/sebs/azure/config.py b/sebs/azure/config.py index 23b5936d..a5bb3277 100644 --- a/sebs/azure/config.py +++ b/sebs/azure/config.py @@ -40,7 +40,9 @@ def initialize(dct: dict) -> Credentials: return AzureCredentials(dct["appId"], dct["tenant"], dct["password"]) @staticmethod - def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: + def deserialize( + config: dict, cache: Cache, handlers: LoggingHandlers + ) -> Credentials: # FIXME: update return types of both functions to avoid cast # needs 3.7+ to support annotations @@ -94,12 +96,18 @@ def __init__(self, account_name: str, connection_string: str): # FIXME: 3.7+ migration with future annotations @staticmethod - def from_cache(account_name: str, connection_string: str) -> "AzureResources.Storage": - assert connection_string, "Empty connection string for account {}".format(account_name) + def from_cache( + account_name: str, connection_string: str + ) -> "AzureResources.Storage": + assert connection_string, "Empty connection string for account {}".format( + account_name + ) return AzureResources.Storage(account_name, connection_string) @staticmethod - def from_allocation(account_name: str, cli_instance: AzureCLI) -> "AzureResources.Storage": + def from_allocation( + account_name: str, cli_instance: AzureCLI + ) -> "AzureResources.Storage": connection_string = AzureResources.Storage.query_connection_string( account_name, cli_instance ) @@ -113,7 +121,9 @@ def from_allocation(account_name: str, cli_instance: AzureCLI) -> "AzureResource @staticmethod def query_connection_string(account_name: str, cli_instance: AzureCLI) -> str: ret = cli_instance.execute( - "az storage account show-connection-string --name {}".format(account_name) + "az storage account show-connection-string --name {}".format( + account_name + ) ) ret = json.loads(ret.decode("utf-8")) connection_string = ret["connectionString"] @@ -124,7 +134,9 @@ def serialize(self) -> dict: @staticmethod def deserialize(obj: dict) -> "AzureResources.Storage": - return AzureResources.Storage.from_cache(obj["account_name"], obj["connection_string"]) + return AzureResources.Storage.from_cache( + obj["account_name"], obj["connection_string"] + ) # FIXME: 3.7 Python, future annotations def __init__( @@ -195,12 +207,16 @@ def add_storage_account(self, cli_instance: AzureCLI) -> "AzureResources.Storage does NOT add the account to any resource collection. """ - def _create_storage_account(self, cli_instance: AzureCLI) -> "AzureResources.Storage": + def _create_storage_account( + self, cli_instance: AzureCLI + ) -> "AzureResources.Storage": sku = "Standard_LRS" # Create account. Only alphanumeric characters are allowed uuid_name = str(uuid.uuid1())[0:8] account_name = "sebsstorage{}".format(uuid_name) - self.logging.info("Starting allocation of storage account {}.".format(account_name)) + self.logging.info( + "Starting allocation of storage account {}.".format(account_name) + ) cli_instance.execute( ( "az storage account create --name {0} --location {1} " @@ -233,7 +249,9 @@ def initialize(dct: dict) -> Resources: storage_accounts=[ AzureResources.Storage.deserialize(x) for x in dct["storage_accounts"] ], - data_storage_account=AzureResources.Storage.deserialize(dct["data_storage_account"]), + data_storage_account=AzureResources.Storage.deserialize( + dct["data_storage_account"] + ), ) def serialize(self) -> dict: @@ -252,15 +270,25 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour cached_config = cache.get_config("azure") ret: AzureResources # Load cached values - if cached_config and "resources" in cached_config and len(cached_config["resources"]) > 0: + if ( + cached_config + and "resources" in cached_config + and len(cached_config["resources"]) > 0 + ): logging.info("Using cached resources for Azure") - ret = cast(AzureResources, AzureResources.initialize(cached_config["resources"])) + ret = cast( + AzureResources, AzureResources.initialize(cached_config["resources"]) + ) else: # Check for new config if "resources" in config: - ret = cast(AzureResources, AzureResources.initialize(config["resources"])) + ret = cast( + AzureResources, AzureResources.initialize(config["resources"]) + ) ret.logging_handlers = handlers - ret.logging.info("No cached resources for Azure found, using user configuration.") + ret.logging.info( + "No cached resources for Azure found, using user configuration." + ) else: ret = AzureResources() ret.logging_handlers = handlers @@ -311,8 +339,12 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config cached_config = cache.get_config("azure") # FIXME: use future annotations (see sebs/faas/system) - credentials = cast(AzureCredentials, AzureCredentials.deserialize(config, cache, handlers)) - resources = cast(AzureResources, AzureResources.deserialize(config, cache, handlers)) + credentials = cast( + AzureCredentials, AzureCredentials.deserialize(config, cache, handlers) + ) + resources = cast( + AzureResources, AzureResources.deserialize(config, cache, handlers) + ) config_obj = AzureConfig(credentials, resources) config_obj.logging_handlers = handlers # Load cached values @@ -346,6 +378,6 @@ def serialize(self) -> dict: "resources_id": self.resources_id, "credentials": self._credentials.serialize(), "resources": self._resources.serialize(), - "redis_host": self._redis_host + "redis_host": self._redis_host, } return out diff --git a/sebs/azure/function_app.py b/sebs/azure/function_app.py index bf86df8e..7667ca0c 100644 --- a/sebs/azure/function_app.py +++ b/sebs/azure/function_app.py @@ -35,8 +35,10 @@ def deserialize(cached_config: dict) -> Function: ret.add_trigger(trigger_type.deserialize(trigger)) return ret + class AzureFunction(FunctionApp): pass + class AzureWorkflow(FunctionApp): - pass \ No newline at end of file + pass diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index a0c8bfdc..9376a71f 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -21,7 +21,9 @@ def data_storage_account(self, data_storage_account: AzureResources.Storage): class HTTPTrigger(AzureTrigger): - def __init__(self, url: str, data_storage_account: Optional[AzureResources.Storage] = None): + def __init__( + self, url: str, data_storage_account: Optional[AzureResources.Storage] = None + ): super().__init__(data_storage_account) self.url = url diff --git a/sebs/cache.py b/sebs/cache.py index 42adc8b6..9b17c4b6 100644 --- a/sebs/cache.py +++ b/sebs/cache.py @@ -60,7 +60,9 @@ def typename() -> str: def load_config(self): with self._lock: for cloud in ["azure", "aws", "gcp"]: - cloud_config_file = os.path.join(self.cache_dir, "{}.json".format(cloud)) + cloud_config_file = os.path.join( + self.cache_dir, "{}.json".format(cloud) + ) if os.path.exists(cloud_config_file): self.cached_config[cloud] = json.load(open(cloud_config_file, "r")) @@ -88,8 +90,12 @@ def shutdown(self): if self.config_updated: for cloud in ["azure", "aws", "gcp"]: if cloud in self.cached_config: - cloud_config_file = os.path.join(self.cache_dir, "{}.json".format(cloud)) - self.logging.info("Update cached config {}".format(cloud_config_file)) + cloud_config_file = os.path.join( + self.cache_dir, "{}.json".format(cloud) + ) + self.logging.info( + "Update cached config {}".format(cloud_config_file) + ) with open(cloud_config_file, "w") as out: json.dump(self.cached_config[cloud], out, indent=2) @@ -149,7 +155,11 @@ def get_benchmarks( def get_storage_config(self, deployment: str, benchmark: str): cfg = self.get_benchmark_config(deployment, benchmark) - return cfg["storage"] if cfg and "storage" in cfg and not self.ignore_storage else None + return ( + cfg["storage"] + if cfg and "storage" in cfg and not self.ignore_storage + else None + ) def update_storage(self, deployment: str, benchmark: str, config: dict): if self.ignore_storage: @@ -162,7 +172,9 @@ def update_storage(self, deployment: str, benchmark: str, config: dict): with open(os.path.join(benchmark_dir, "config.json"), "w") as fp: json.dump(cached_config, fp, indent=2) - def add_code_package(self, deployment_name: str, language_name: str, code_package: "CodePackage"): + def add_code_package( + self, deployment_name: str, language_name: str, code_package: "CodePackage" + ): with self._lock: language = code_package.language_name benchmark_dir = os.path.join(self.cache_dir, code_package.name) @@ -242,8 +254,12 @@ def update_code_package( with open(os.path.join(benchmark_dir, "config.json"), "r") as fp: config = json.load(fp) date = str(datetime.datetime.now()) - config[deployment_name][language]["code_package"]["date"]["modified"] = date - config[deployment_name][language]["code_package"]["hash"] = code_package.hash + config[deployment_name][language]["code_package"]["date"][ + "modified" + ] = date + config[deployment_name][language]["code_package"][ + "hash" + ] = code_package.hash with open(os.path.join(benchmark_dir, "config.json"), "w") as fp: json.dump(config, fp, indent=2) else: @@ -275,12 +291,16 @@ def add_benchmark( cache_config = os.path.join(benchmark_dir, "config.json") if os.path.exists(cache_config): - benchmarks_config: Dict[str, Any] = {benchmark.name: {**benchmark.serialize()}} + benchmarks_config: Dict[str, Any] = { + benchmark.name: {**benchmark.serialize()} + } with open(cache_config, "r") as fp: cached_config = json.load(fp) if "benchmarks" not in cached_config[deployment_name][language]: - cached_config[deployment_name][language]["benchmarks"] = benchmarks_config + cached_config[deployment_name][language][ + "benchmarks" + ] = benchmarks_config else: cached_config[deployment_name][language]["benchmarks"].update( benchmarks_config @@ -290,7 +310,9 @@ def add_benchmark( json.dump(config, fp, indent=2) else: raise RuntimeError( - "Can't cache benchmark {} for a non-existing code package!".format(benchmark.name) + "Can't cache benchmark {} for a non-existing code package!".format( + benchmark.name + ) ) def update_benchmark(self, benchmark: "Benchmark"): @@ -317,5 +339,7 @@ def update_benchmark(self, benchmark: "Benchmark"): json.dump(cached_config, fp, indent=2) else: raise RuntimeError( - "Can't cache benchmark {} for a non-existing code package!".format(benchmark.name) + "Can't cache benchmark {} for a non-existing code package!".format( + benchmark.name + ) ) diff --git a/sebs/code_package.py b/sebs/code_package.py index 23a85b3a..5f5cdb56 100644 --- a/sebs/code_package.py +++ b/sebs/code_package.py @@ -129,7 +129,9 @@ def language_version(self): @property # noqa: A003 def hash(self): path = os.path.join(self.path, self.language_name) - self._hash_value = CodePackage.hash_directory(path, self._deployment_name, self.language_name) + self._hash_value = CodePackage.hash_directory( + path, self._deployment_name, self.language_name + ) return self._hash_value @hash.setter # noqa: A003 @@ -165,7 +167,9 @@ def __init__( if self.language not in self.config.languages: raise RuntimeError( - "Benchmark {} not available for language {}".format(self.name, self.language) + "Benchmark {} not available for language {}".format( + self.name, self.language + ) ) self._cache_client = cache_client self._docker_client = docker_client @@ -300,7 +304,7 @@ def add_deployment_package_python(self, output_dir): if len(packages): with open(os.path.join(output_dir, "requirements.txt"), "a") as out: for package in packages: - out.write(package+"\n") + out.write(package + "\n") def add_deployment_package_nodejs(self, output_dir): # modify package.json @@ -363,11 +367,15 @@ def install_dependencies(self, output_dir): ) self._docker_client.images.pull(repo_name, image_name) except docker.errors.APIError: - raise RuntimeError("Docker pull of image {} failed!".format(image_name)) + raise RuntimeError( + "Docker pull of image {} failed!".format(image_name) + ) # Create set of mounted volumes unless Docker volumes are disabled if not self._experiment_config.check_flag("docker_copy_build_files"): - volumes = {os.path.abspath(output_dir): {"bind": "/mnt/function", "mode": "rw"}} + volumes = { + os.path.abspath(output_dir): {"bind": "/mnt/function", "mode": "rw"} + } package_script = os.path.abspath( os.path.join(self._path, self.language_name, "package.sh") ) @@ -385,11 +393,15 @@ def install_dependencies(self, output_dir): try: self.logging.info( "Docker build of benchmark dependencies in container " - "of image {repo}:{image}".format(repo=repo_name, image=image_name) + "of image {repo}:{image}".format( + repo=repo_name, image=image_name + ) ) uid = os.getuid() # Standard, simplest build - if not self._experiment_config.check_flag("docker_copy_build_files"): + if not self._experiment_config.check_flag( + "docker_copy_build_files" + ): self.logging.info( "Docker mount of benchmark code from path {path}".format( path=os.path.abspath(output_dir) @@ -425,7 +437,9 @@ def install_dependencies(self, output_dir): "Send benchmark code from path {path} to " "Docker instance".format(path=os.path.abspath(output_dir)) ) - tar_archive = os.path.join(output_dir, os.path.pardir, "function.tar") + tar_archive = os.path.join( + output_dir, os.path.pardir, "function.tar" + ) with tarfile.open(tar_archive, "w") as tar: for f in os.listdir(output_dir): tar.add(os.path.join(output_dir, f), arcname=f) @@ -467,8 +481,9 @@ def recalculate_code_size(self): return self._code_size def build( - self, deployment_build_step: Callable[["CodePackage", str, bool], Tuple[str, int]], - is_workflow: bool + self, + deployment_build_step: Callable[["CodePackage", str, bool], Tuple[str, int]], + is_workflow: bool, ) -> Tuple[bool, str]: # Skip build if files are up to date and user didn't enforce rebuild @@ -514,9 +529,13 @@ def build( # package already exists if self.is_cached: - self._cache_client.update_code_package(self._deployment_name, self.language_name, self) + self._cache_client.update_code_package( + self._deployment_name, self.language_name, self + ) else: - self._cache_client.add_code_package(self._deployment_name, self.language_name, self) + self._cache_client.add_code_package( + self._deployment_name, self.language_name, self + ) self.query_cache() return True, self._code_location @@ -556,7 +575,9 @@ def code_package_modify(self, filename: str, data: bytes): if self.is_archive(): self._update_zip(self.code_location, filename, data) new_size = self.recompute_size() / 1024.0 / 1024.0 - self.logging.info(f"Modified zip package {self.code_location}, new size {new_size} MB") + self.logging.info( + f"Modified zip package {self.code_location}, new size {new_size} MB" + ) else: raise NotImplementedError() @@ -629,7 +650,9 @@ def load_benchmark_input(path: str) -> CodePackageModuleInterface: # Look for input generator file in the directory containing benchmark import importlib.machinery - loader = importlib.machinery.SourceFileLoader("input", os.path.join(path, "input.py")) + loader = importlib.machinery.SourceFileLoader( + "input", os.path.join(path, "input.py") + ) spec = importlib.util.spec_from_loader(loader.name, loader) assert spec mod = importlib.util.module_from_spec(spec) diff --git a/sebs/config.py b/sebs/config.py index fd7f66aa..d238dedd 100644 --- a/sebs/config.py +++ b/sebs/config.py @@ -12,23 +12,31 @@ def __init__(self): def docker_repository(self) -> str: return self._system_config["general"]["docker_repository"] - def deployment_packages(self, deployment_name: str, language_name: str) -> Dict[str, str]: - return self._system_config[deployment_name]["languages"][language_name]["deployment"][ - "packages" - ] + def deployment_packages( + self, deployment_name: str, language_name: str + ) -> Dict[str, str]: + return self._system_config[deployment_name]["languages"][language_name][ + "deployment" + ]["packages"] def deployment_files(self, deployment_name: str, language_name: str) -> List[str]: - return self._system_config[deployment_name]["languages"][language_name]["deployment"][ - "files" - ] + return self._system_config[deployment_name]["languages"][language_name][ + "deployment" + ]["files"] def docker_image_types(self, deployment_name: str, language_name: str) -> List[str]: - return self._system_config[deployment_name]["languages"][language_name]["images"] + return self._system_config[deployment_name]["languages"][language_name][ + "images" + ] - def supported_language_versions(self, deployment_name: str, language_name: str) -> List[str]: + def supported_language_versions( + self, deployment_name: str, language_name: str + ) -> List[str]: return self._system_config[deployment_name]["languages"][language_name][ "base_images" ].keys() def username(self, deployment_name: str, language_name: str) -> str: - return self._system_config[deployment_name]["languages"][language_name]["username"] + return self._system_config[deployment_name]["languages"][language_name][ + "username" + ] diff --git a/sebs/experiments/environment.py b/sebs/experiments/environment.py index 86576f11..29bf608b 100644 --- a/sebs/experiments/environment.py +++ b/sebs/experiments/environment.py @@ -13,7 +13,9 @@ class ExperimentEnvironment: def __init__(self): # find CPU mapping - ret = execute('cat /proc/cpuinfo | grep -e "processor" -e "core id"', shell=True) + ret = execute( + 'cat /proc/cpuinfo | grep -e "processor" -e "core id"', shell=True + ) # skip empty line at the end mapping = [int(x.split(":")[1]) for x in ret.split("\n") if x] @@ -47,7 +49,9 @@ def __init__(self): raise NotImplementedError() # Assume all CPU use the same - scaling_governor_path = "/sys/devices/system/cpu/cpu{cpu_id}/cpufreq/scaling_driver" + scaling_governor_path = ( + "/sys/devices/system/cpu/cpu{cpu_id}/cpufreq/scaling_driver" + ) governor = execute("cat {path}".format(path=scaling_governor_path)) if governor == "intel_pstate": self._governor = governor @@ -62,7 +66,9 @@ def write_cpu_status(self, cores: List[int], status: int): for logical_core in logical_cores[1:]: path = cpu_status_path.format(cpu_id=logical_core["core"]) execute( - cmd="echo {status} | sudo tee {path}".format(status=status, path=path), + cmd="echo {status} | sudo tee {path}".format( + status=status, path=path + ), shell=True, ) @@ -101,7 +107,9 @@ def set_frequency(self, max_freq: int): def unset_frequency(self): path = "/sys/devices/system/cpu/intel_pstate/min_perf_pct" - execute("echo {freq} | sudo tee {path}".format(freq=self._prev_min_freq, path=path)) + execute( + "echo {freq} | sudo tee {path}".format(freq=self._prev_min_freq, path=path) + ) def setup_benchmarking(self, cores: List[int]): self.disable_boost(cores) diff --git a/sebs/experiments/eviction_model.py b/sebs/experiments/eviction_model.py index 4d55c66c..839b6856 100644 --- a/sebs/experiments/eviction_model.py +++ b/sebs/experiments/eviction_model.py @@ -95,10 +95,14 @@ def accept_replies(port: int, invocations: int): s.close() @staticmethod - def execute_instance(sleep_time: int, pid: int, tid: int, func: Function, payload: dict): + def execute_instance( + sleep_time: int, pid: int, tid: int, func: Function, payload: dict + ): try: - print(f"Process {pid} Thread {tid} Invoke function {func.name} with {payload} now!") + print( + f"Process {pid} Thread {tid} Invoke function {func.name} with {payload} now!" + ) begin = datetime.now() res = func.triggers(Trigger.TriggerType.HTTP)[0].sync_invoke(payload) end = datetime.now() @@ -111,7 +115,9 @@ def execute_instance(sleep_time: int, pid: int, tid: int, func: Function, payloa logging.error(f"First Invocation Failed at function {func.name}, {e}") raise RuntimeError() - time_spent = float(datetime.now().strftime("%s.%f")) - float(end.strftime("%s.%f")) + time_spent = float(datetime.now().strftime("%s.%f")) - float( + end.strftime("%s.%f") + ) seconds_sleep = sleep_time - time_spent print(f"PID {pid} TID {tid} with time {time}, sleep {seconds_sleep}") time.sleep(seconds_sleep) @@ -198,7 +204,9 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): # if self._benchmark.functions and fname in self._benchmark.functions: # self.logging.info(f"Skip {fname}, exists already.") # continue - self.functions.append(deployment_client.get_function(self._benchmark, func_name=fname)) + self.functions.append( + deployment_client.get_function(self._benchmark, func_name=fname) + ) def run(self): @@ -215,7 +223,9 @@ def run(self): # function_names = self.functions_names[invocation_idx :: self.function_copies_per_time] # flake8 issue # https://github.com/PyCQA/pycodestyle/issues/373 - functions = self.functions[invocation_idx :: self.function_copies_per_time] # noqa + functions = self.functions[ + invocation_idx :: self.function_copies_per_time + ] # noqa results = {} # Disable logging - otherwise we have RLock that can't get be pickled @@ -257,7 +267,9 @@ def run(self): """ for j in range(0, threads): servers_results.append( - pool.apply_async(EvictionModel.accept_replies, args=(port + j, invocations)) + pool.apply_async( + EvictionModel.accept_replies, args=(port + j, invocations) + ) ) """ diff --git a/sebs/experiments/invocation_overhead.py b/sebs/experiments/invocation_overhead.py index 11bbe403..0cbbdd8c 100644 --- a/sebs/experiments/invocation_overhead.py +++ b/sebs/experiments/invocation_overhead.py @@ -15,7 +15,9 @@ class CodePackageSize: - def __init__(self, deployment_client: FaaSSystem, benchmark: CodePackage, settings: dict): + def __init__( + self, deployment_client: FaaSSystem, benchmark: CodePackage, settings: dict + ): import math from numpy import linspace @@ -26,7 +28,9 @@ def __init__(self, deployment_client: FaaSSystem, benchmark: CodePackage, settin ) from sebs.utils import find_package_code - self._benchmark_path = find_package_code("030.clock-synchronization", "benchmarks") + self._benchmark_path = find_package_code( + "030.clock-synchronization", "benchmarks" + ) self._benchmark = benchmark random.seed(1410) @@ -89,7 +93,9 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): self._trigger = triggers[0] self._storage = deployment_client.get_storage(replace_existing=True) - self.benchmark_input = self._benchmark.prepare_input(storage=self._storage, size="test") + self.benchmark_input = self._benchmark.prepare_input( + storage=self._storage, size="test" + ) self._out_dir = os.path.join( sebs_client.output_dir, "invocation-overhead", self.settings["type"] ) @@ -107,7 +113,9 @@ def run(self): N = self.settings["N"] if self.settings["type"] == "code": - experiment = CodePackageSize(self._deployment_client, self._benchmark, self.settings) + experiment = CodePackageSize( + self._deployment_client, self._benchmark, self.settings + ) else: experiment = PayloadSize(self.settings) @@ -143,9 +151,13 @@ def run(self): for i in range(repetitions): succesful = False while not succesful: - self.logging.info(f"Starting with {size} bytes, repetition {i}") + self.logging.info( + f"Starting with {size} bytes, repetition {i}" + ) if result_type == "cold": - self._deployment_client.enforce_cold_start([self._function]) + self._deployment_client.enforce_cold_start( + [self._function] + ) time.sleep(1) row = self.receive_datagrams(input_benchmark, N, 12000, ip) if result_type == "cold": @@ -160,7 +172,9 @@ def run(self): succesful = True time.sleep(5) - self._storage.download_bucket(self.benchmark_input["output-bucket"], self._out_dir) + self._storage.download_bucket( + self.benchmark_input["output-bucket"], self._out_dir + ) def process( self, @@ -175,7 +189,9 @@ def process( full_data: Dict[str, pd.Dataframe] = {} for f in glob.glob( - os.path.join(directory, "invocation-overhead", self.settings["type"], "*.csv") + os.path.join( + directory, "invocation-overhead", self.settings["type"], "*.csv" + ) ): if "result.csv" in f or "result-processed.csv" in f: @@ -188,13 +204,18 @@ def process( else: full_data[request_id] = data df = pd.concat(full_data.values()).reset_index(drop=True) - df["rtt"] = (df["server_rcv"] - df["client_send"]) + (df["client_rcv"] - df["server_send"]) + df["rtt"] = (df["server_rcv"] - df["client_send"]) + ( + df["client_rcv"] - df["server_send"] + ) df["clock_drift"] = ( - (df["client_send"] - df["server_rcv"]) + (df["client_rcv"] - df["server_send"]) + (df["client_send"] - df["server_rcv"]) + + (df["client_rcv"] - df["server_send"]) ) / 2 with open( - os.path.join(directory, "invocation-overhead", self.settings["type"], "result.csv") + os.path.join( + directory, "invocation-overhead", self.settings["type"], "result.csv" + ) ) as csvfile: with open( os.path.join( @@ -226,15 +247,23 @@ def process( request_id = row[-1] clock_drift = df[df["id"] == request_id]["clock_drift"].mean() clock_drift_std = df[df["id"] == request_id]["clock_drift"].std() - invocation_time = float(row[5]) - float(row[4]) - float(row[3]) + clock_drift - writer.writerow(row + [clock_drift, clock_drift_std, invocation_time]) + invocation_time = ( + float(row[5]) - float(row[4]) - float(row[3]) + clock_drift + ) + writer.writerow( + row + [clock_drift, clock_drift_std, invocation_time] + ) - def receive_datagrams(self, input_benchmark: dict, repetitions: int, port: int, ip: str): + def receive_datagrams( + self, input_benchmark: dict, repetitions: int, port: int, ip: str + ): import socket input_benchmark["server-port"] = port - self.logging.info(f"Starting invocation with {repetitions} repetitions on port {port}") + self.logging.info( + f"Starting invocation with {repetitions} repetitions on port {port}" + ) socket.setdefaulttimeout(4) server_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) server_socket.bind(("", port)) @@ -260,7 +289,8 @@ def receive_datagrams(self, input_benchmark: dict, repetitions: int, port: int, # stop after 5 attempts if j == 5: self.logging.error( - "Failing after 5 unsuccesfull attempts to " "communicate with the function!" + "Failing after 5 unsuccesfull attempts to " + "communicate with the function!" ) break # check if function invocation failed, and if yes: raise the exception diff --git a/sebs/experiments/network_ping_pong.py b/sebs/experiments/network_ping_pong.py index a95506de..b9a767d3 100644 --- a/sebs/experiments/network_ping_pong.py +++ b/sebs/experiments/network_ping_pong.py @@ -30,7 +30,9 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): ) self._function = deployment_client.get_function(benchmark) self._storage = deployment_client.get_storage(replace_existing=True) - self.benchmark_input = benchmark.prepare_input(storage=self._storage, size="test") + self.benchmark_input = benchmark.prepare_input( + storage=self._storage, size="test" + ) self._out_dir = os.path.join(sebs_client.output_dir, "network-ping-pong") if not os.path.exists(self._out_dir): # shutil.rmtree(self._out_dir) @@ -59,7 +61,9 @@ def run(self): # give functions time to finish and upload result time.sleep(5) - self._storage.download_bucket(self.benchmark_input["output-bucket"], self._out_dir) + self._storage.download_bucket( + self.benchmark_input["output-bucket"], self._out_dir + ) def process(self, directory: str): @@ -73,7 +77,9 @@ def process(self, directory: str): else: full_data[request_id] = data df = pd.concat(full_data.values()).reset_index(drop=True) - df["rtt"] = (df["server_rcv"] - df["client_send"]) + (df["client_rcv"] - df["server_send"]) + df["rtt"] = (df["server_rcv"] - df["client_send"]) + ( + df["client_rcv"] - df["server_send"] + ) print("Rows: ", df.shape[0]) print("Mean: ", df["rtt"].mean()) print("STD: ", df["rtt"].std()) @@ -101,7 +107,9 @@ def receive_datagrams(self, repetitions: int, port: int, ip: str): "repetitions": repetitions, **self.benchmark_input, } - self._function.triggers(Trigger.TriggerType.HTTP)[0].async_invoke(input_benchmark) + self._function.triggers(Trigger.TriggerType.HTTP)[0].async_invoke( + input_benchmark + ) begin = datetime.now() times = [] diff --git a/sebs/experiments/perf_cost.py b/sebs/experiments/perf_cost.py index 2457c0e0..7c41003e 100644 --- a/sebs/experiments/perf_cost.py +++ b/sebs/experiments/perf_cost.py @@ -48,7 +48,9 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): ) self._function = deployment_client.get_function(self._benchmark) # prepare benchmark input - self._storage = deployment_client.get_storage(replace_existing=self.config.update_storage) + self._storage = deployment_client.get_storage( + replace_existing=self.config.update_storage + ) self._benchmark_input = self._benchmark.prepare_input( storage=self._storage, size=settings["input-size"] ) @@ -82,7 +84,9 @@ def run(self): self._function.memory = memory self._deployment_client.update_function(self._function, self._benchmark) self._sebs_client.cache_client.update_function(self._function) - self.run_configuration(settings, settings["repetitions"], suffix=str(memory)) + self.run_configuration( + settings, settings["repetitions"], suffix=str(memory) + ) def compute_statistics(self, times: List[float]): @@ -152,7 +156,10 @@ def _run_configuration( first_iteration = True while samples_gathered < repetitions: - if run_type == PerfCost.RunType.COLD or run_type == PerfCost.RunType.BURST: + if ( + run_type == PerfCost.RunType.COLD + or run_type == PerfCost.RunType.BURST + ): self._deployment_client.enforce_cold_start( [self._function], self._benchmark ) @@ -173,8 +180,12 @@ def _run_configuration( ret = res.get() if first_iteration: continue - if (run_type == PerfCost.RunType.COLD and not ret.stats.cold_start) or ( - run_type == PerfCost.RunType.WARM and ret.stats.cold_start + if ( + run_type == PerfCost.RunType.COLD + and not ret.stats.cold_start + ) or ( + run_type == PerfCost.RunType.WARM + and ret.stats.cold_start ): self.logging.info( f"Invocation {ret.request_id} " @@ -258,7 +269,9 @@ def run_configuration(self, settings: dict, repetitions: int, suffix: str = ""): PerfCost.RunType.SEQUENTIAL, settings, 1, repetitions, suffix ) else: - raise RuntimeError(f"Unknown experiment type {experiment_type} for Perf-Cost!") + raise RuntimeError( + f"Unknown experiment type {experiment_type} for Perf-Cost!" + ) def process( self, @@ -305,7 +318,9 @@ def process( else: if os.path.exists( - os.path.join(directory, "perf-cost", f"{name}-processed{extension}") + os.path.join( + directory, "perf-cost", f"{name}-processed{extension}" + ) ): self.logging.info(f"Skipping already processed {f}") continue @@ -349,12 +364,17 @@ def process( name, extension = os.path.splitext(f) with open( - os.path.join(directory, "perf-cost", f"{name}-processed{extension}"), + os.path.join( + directory, "perf-cost", f"{name}-processed{extension}" + ), "w", ) as out_f: out_f.write( serialize( - {**json.loads(serialize(experiments)), "statistics": statistics} + { + **json.loads(serialize(experiments)), + "statistics": statistics, + } ) ) for func in experiments.functions(): diff --git a/sebs/experiments/result.py b/sebs/experiments/result.py index 5087b904..3357ace5 100644 --- a/sebs/experiments/result.py +++ b/sebs/experiments/result.py @@ -61,7 +61,9 @@ def metrics(self, func: str) -> dict: return self._metrics[func] @staticmethod - def deserialize(cached_config: dict, cache: Cache, handlers: LoggingHandlers) -> "Result": + def deserialize( + cached_config: dict, cache: Cache, handlers: LoggingHandlers + ) -> "Result": invocations: Dict[str, dict] = {} for func, func_invocations in cached_config["_invocations"].items(): invocations[func] = {} @@ -69,7 +71,9 @@ def deserialize(cached_config: dict, cache: Cache, handlers: LoggingHandlers) -> invocations[func][invoc_id] = ExecutionResult.deserialize(invoc) ret = Result( ExperimentConfig.deserialize(cached_config["config"]["experiments"]), - DeploymentConfig.deserialize(cached_config["config"]["deployment"], cache, handlers), + DeploymentConfig.deserialize( + cached_config["config"]["deployment"], cache, handlers + ), invocations, # FIXME: compatibility with old results cached_config["metrics"] if "metrics" in cached_config else {}, diff --git a/sebs/faas/benchmark.py b/sebs/faas/benchmark.py index 891a9924..60458495 100644 --- a/sebs/faas/benchmark.py +++ b/sebs/faas/benchmark.py @@ -131,12 +131,15 @@ def __init__(self): self.billing = ExecutionBilling() @staticmethod - def from_times(client_time_begin: datetime, client_time_end: datetime) -> "ExecutionResult": + def from_times( + client_time_begin: datetime, client_time_end: datetime + ) -> "ExecutionResult": ret = ExecutionResult() ret.times.client_begin = client_time_begin ret.times.client_end = client_time_end ret.times.client = int( - (client_time_end - client_time_begin) / timedelta(microseconds=1)) + (client_time_end - client_time_begin) / timedelta(microseconds=1) + ) return ret def parse_benchmark_output(self, output: dict): @@ -153,8 +156,7 @@ def parse_benchmark_output(self, output: dict): def parse_benchmark_execution(self, execution: Execution): self.output = json.loads(execution.result) self.times.benchmark = int( - (execution.start_time - execution.end_time) - / timedelta(microseconds=1) + (execution.start_time - execution.end_time) / timedelta(microseconds=1) ) @staticmethod @@ -162,8 +164,7 @@ def deserialize(cached_config: dict) -> "ExecutionResult": ret = ExecutionResult() ret.times = ExecutionTimes.deserialize(cached_config["times"]) ret.billing = ExecutionBilling.deserialize(cached_config["billing"]) - ret.provider_times = ProviderTimes.deserialize( - cached_config["provider_times"]) + ret.provider_times = ProviderTimes.deserialize(cached_config["provider_times"]) ret.stats = ExecutionStats.deserialize(cached_config["stats"]) ret.request_id = cached_config["request_id"] ret.output = cached_config["output"] @@ -215,10 +216,12 @@ def _http_invoke(self, payload: dict, url: str) -> ExecutionResult: if status_code != 200: self.logging.error( - "Invocation on URL {} failed with status code {}!".format(url, status_code)) + "Invocation on URL {} failed with status code {}!".format( + url, status_code + ) + ) self.logging.error("Output: {}".format(output)) - raise RuntimeError( - f"Failed invocation of function! Output: {output}") + raise RuntimeError(f"Failed invocation of function! Output: {output}") self.logging.debug("Invoke of function was successful") result = ExecutionResult.from_times(begin, end) @@ -230,10 +233,14 @@ def _http_invoke(self, payload: dict, url: str) -> ExecutionResult: return result except json.decoder.JSONDecodeError: self.logging.error( - "Invocation on URL {} failed with status code {}!".format(url, status_code)) + "Invocation on URL {} failed with status code {}!".format( + url, status_code + ) + ) self.logging.error("Output: {}".format(data.getvalue().decode())) raise RuntimeError( - f"Failed invocation of function! Output: {data.getvalue().decode()}") + f"Failed invocation of function! Output: {data.getvalue().decode()}" + ) # FIXME: 3.7+, future annotations @staticmethod @@ -300,7 +307,11 @@ def updated_code(self, val: bool): self._updated_code = val def triggers_all(self) -> List[Trigger]: - return [trig for trigger_type, triggers in self._triggers.items() for trig in triggers] + return [ + trig + for trigger_type, triggers in self._triggers.items() + for trig in triggers + ] def triggers(self, trigger_type: Trigger.TriggerType) -> List[Trigger]: try: @@ -320,7 +331,9 @@ def serialize(self) -> dict: "hash": self._code_package_hash, "code_package": self._code_package, "triggers": [ - obj.serialize() for t_type, triggers in self._triggers.items() for obj in triggers + obj.serialize() + for t_type, triggers in self._triggers.items() + for obj in triggers ], } @@ -333,5 +346,6 @@ def deserialize(cached_config: dict) -> "Function": class Function(Benchmark): pass + class Workflow(Benchmark): - pass \ No newline at end of file + pass diff --git a/sebs/faas/config.py b/sebs/faas/config.py index 55730e88..4eb349fe 100644 --- a/sebs/faas/config.py +++ b/sebs/faas/config.py @@ -29,7 +29,9 @@ def __init__(self): @staticmethod @abstractmethod - def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Credentials": + def deserialize( + config: dict, cache: Cache, handlers: LoggingHandlers + ) -> "Credentials": pass """ @@ -60,7 +62,9 @@ def __init__(self): @staticmethod @abstractmethod - def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Resources": + def deserialize( + config: dict, cache: Cache, handlers: LoggingHandlers + ) -> "Resources": pass """ diff --git a/sebs/faas/fsm.py b/sebs/faas/fsm.py index fb46c1ef..896b1f36 100644 --- a/sebs/faas/fsm.py +++ b/sebs/faas/fsm.py @@ -5,7 +5,6 @@ class State(ABC): - def __init__(self, name: str): self.name = name @@ -16,32 +15,19 @@ def deserialize(name: str, payload: dict) -> "State": class Task(State): - - def __init__(self, - name: str, - func_name: str, - next: Optional[str]): + def __init__(self, name: str, func_name: str, next: Optional[str]): self.name = name self.func_name = func_name self.next = next @classmethod def deserialize(cls, name: str, payload: dict) -> "Task": - return cls( - name=name, - func_name=payload["func_name"], - next=payload.get("next") - ) + return cls(name=name, func_name=payload["func_name"], next=payload.get("next")) class Switch(State): - class Case: - def __init__(self, - var: str, - op: str, - val: str, - next: str): + def __init__(self, var: str, op: str, val: str, next: str): self.var = var self.op = op self.val = val @@ -51,10 +37,7 @@ def __init__(self, def deserialize(payload: dict) -> "Switch.Case": return Switch.Case(**payload) - def __init__(self, - name: str, - cases: List[Case], - default: Optional[str]): + def __init__(self, name: str, cases: List[Case], default: Optional[str]): self.name = name self.cases = cases self.default = default @@ -63,20 +46,11 @@ def __init__(self, def deserialize(cls, name: str, payload: dict) -> "Switch": cases = [Switch.Case.deserialize(c) for c in payload["cases"]] - return cls( - name=name, - cases=cases, - default=payload["default"] - ) + return cls(name=name, cases=cases, default=payload["default"]) class Map(State): - - def __init__(self, - name: str, - func_name: str, - array: str, - next: Optional[str]): + def __init__(self, name: str, func_name: str, array: str, next: Optional[str]): self.name = name self.func_name = func_name self.array = array @@ -88,19 +62,14 @@ def deserialize(cls, name: str, payload: dict) -> "Map": name=name, func_name=payload["func_name"], array=payload["array"], - next=payload.get("next") + next=payload.get("next"), ) -_STATE_TYPES = { - "task": Task, - "switch": Switch, - "map": Map -} +_STATE_TYPES = {"task": Task, "switch": Switch, "map": Map} class Generator(ABC): - def __init__(self, export_func: Callable[[dict], str] = json.dumps): self._export_func = export_func @@ -108,8 +77,9 @@ def parse(self, path: str): with open(path) as f: definition = json.load(f) - self.states = {n: State.deserialize(n, s) - for n, s in definition["states"].items()} + self.states = { + n: State.deserialize(n, s) for n, s in definition["states"].items() + } self.root = self.states[definition["root"]] def generate(self) -> str: @@ -151,4 +121,4 @@ def encode_switch(self, state: Switch) -> Union[dict, List[dict]]: @abstractmethod def encode_map(self, state: Map) -> Union[dict, List[dict]]: - pass \ No newline at end of file + pass diff --git a/sebs/faas/storage.py b/sebs/faas/storage.py index e54812e2..77f8fd80 100644 --- a/sebs/faas/storage.py +++ b/sebs/faas/storage.py @@ -168,7 +168,9 @@ def clean_bucket(self, bucket_name: str): def allocate_buckets(self, benchmark: str, requested_buckets: Tuple[int, int]): # Load cached information - cached_buckets = self.cache_client.get_storage_config(self.deployment_name(), benchmark) + cached_buckets = self.cache_client.get_storage_config( + self.deployment_name(), benchmark + ) if cached_buckets: self.input_buckets = cached_buckets["buckets"]["input"] for bucket in self.input_buckets: @@ -177,19 +179,27 @@ def allocate_buckets(self, benchmark: str, requested_buckets: Tuple[int, int]): # for bucket in self.output_buckets: # self.clean_bucket(bucket) self.cached = True - self.logging.info("Using cached storage input buckets {}".format(self.input_buckets)) - self.logging.info("Using cached storage output buckets {}".format(self.output_buckets)) + self.logging.info( + "Using cached storage input buckets {}".format(self.input_buckets) + ) + self.logging.info( + "Using cached storage output buckets {}".format(self.output_buckets) + ) return buckets = self.list_buckets(self.correct_name(benchmark)) for i in range(0, requested_buckets[0]): self.input_buckets.append( - self._create_bucket(self.correct_name("{}-{}-input".format(benchmark, i)), buckets) + self._create_bucket( + self.correct_name("{}-{}-input".format(benchmark, i)), buckets + ) ) self.input_buckets_files.append(self.list_bucket(self.input_buckets[-1])) for i in range(0, requested_buckets[1]): self.output_buckets.append( - self._create_bucket(self.correct_name("{}-{}-output".format(benchmark, i)), buckets) + self._create_bucket( + self.correct_name("{}-{}-output".format(benchmark, i)), buckets + ) ) self.save_storage(benchmark) diff --git a/sebs/faas/system.py b/sebs/faas/system.py index 6b345208..24274fba 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -109,7 +109,9 @@ def get_storage(self, replace_existing: bool) -> PersistentStorage: """ @abstractmethod - def package_code(self, code_package: CodePackage, directory: str, is_workflow: bool) -> Tuple[str, int]: + def package_code( + self, code_package: CodePackage, directory: str, is_workflow: bool + ) -> Tuple[str, int]: pass @abstractmethod @@ -117,7 +119,9 @@ def create_function(self, code_package: CodePackage, func_name: str) -> Function pass @abstractmethod - def create_workflow(self, code_package: CodePackage, workflow_name: str) -> Workflow: + def create_workflow( + self, code_package: CodePackage, workflow_name: str + ) -> Workflow: pass @abstractmethod @@ -141,9 +145,14 @@ def update_function(self, function: Function, code_package: CodePackage): """ - def get_function(self, code_package: CodePackage, func_name: Optional[str] = None) -> Function: - if code_package.language_version not in self.system_config.supported_language_versions( - self.name(), code_package.language_name + def get_function( + self, code_package: CodePackage, func_name: Optional[str] = None + ) -> Function: + if ( + code_package.language_version + not in self.system_config.supported_language_versions( + self.name(), code_package.language_name + ) ): raise Exception( "Unsupported {language} version {version} in {system}!".format( @@ -189,7 +198,8 @@ def get_function(self, code_package: CodePackage, func_name: Optional[str] = Non self.cached_benchmark(function) self.logging.info( "Using cached function {fname} in {loc}".format( - fname=func_name, loc=code_location) + fname=func_name, loc=code_location + ) ) # is the function up-to-date? if function.code_package_hash != code_package.hash or rebuilt: @@ -215,9 +225,14 @@ def get_function(self, code_package: CodePackage, func_name: Optional[str] = Non def update_workflow(self, workflow: Workflow, code_package: CodePackage): pass - def get_workflow(self, code_package: CodePackage, workflow_name: Optional[str] = None): - if code_package.language_version not in self.system_config.supported_language_versions( - self.name(), code_package.language_name + def get_workflow( + self, code_package: CodePackage, workflow_name: Optional[str] = None + ): + if ( + code_package.language_version + not in self.system_config.supported_language_versions( + self.name(), code_package.language_name + ) ): raise Exception( "Unsupported {language} version {version} in {system}!".format( @@ -263,7 +278,8 @@ def get_workflow(self, code_package: CodePackage, workflow_name: Optional[str] = self.cached_benchmark(workflow) self.logging.info( "Using cached workflow {workflow_name} in {loc}".format( - workflow_name=workflow_name, loc=code_location) + workflow_name=workflow_name, loc=code_location + ) ) # is the function up-to-date? if workflow.code_package_hash != code_package.hash or rebuilt: @@ -313,11 +329,15 @@ def create_trigger(self, obj, trigger_type: Trigger.TriggerType) -> Trigger: raise TypeError("Cannot create trigger for {obj}") @abstractmethod - def create_function_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + def create_function_trigger( + self, function: Function, trigger_type: Trigger.TriggerType + ) -> Trigger: pass @abstractmethod - def create_workflow_trigger(self, workflow: Workflow, trigger_type: Trigger.TriggerType) -> Trigger: + def create_workflow_trigger( + self, workflow: Workflow, trigger_type: Trigger.TriggerType + ) -> Trigger: pass # @abstractmethod diff --git a/sebs/gcp/config.py b/sebs/gcp/config.py index 11453376..ed448e02 100644 --- a/sebs/gcp/config.py +++ b/sebs/gcp/config.py @@ -36,7 +36,9 @@ def initialize(gcp_credentials: str) -> Credentials: return GCPCredentials(gcp_credentials) @staticmethod - def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: + def deserialize( + config: dict, cache: Cache, handlers: LoggingHandlers + ) -> Credentials: cached_config = cache.get_config("gcp") ret: GCPCredentials # Load cached values but only if they are non-empty @@ -55,7 +57,9 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden else: # Check for new config if "credentials" in config and config["credentials"]: - ret = cast(GCPCredentials, GCPCredentials.initialize(config["credentials"])) + ret = cast( + GCPCredentials, GCPCredentials.initialize(config["credentials"]) + ) os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ret.gcp_credentials # Look for default GCP credentials elif "GOOGLE_APPLICATION_CREDENTIALS" in os.environ: @@ -89,7 +93,9 @@ def serialize(self) -> dict: return out def update_cache(self, cache: Cache): - cache.update_config(val=self.gcp_credentials, keys=["gcp", "credentials", "keys_json"]) + cache.update_config( + val=self.gcp_credentials, keys=["gcp", "credentials", "keys_json"] + ) """ @@ -117,17 +123,23 @@ def serialize(self) -> dict: return {} @staticmethod - def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Resources": + def deserialize( + config: dict, cache: Cache, handlers: LoggingHandlers + ) -> "Resources": cached_config = cache.get_config("gcp") ret: GCPResources if cached_config and "resources" in cached_config: - ret = cast(GCPResources, GCPResources.initialize(cached_config["resources"])) + ret = cast( + GCPResources, GCPResources.initialize(cached_config["resources"]) + ) ret.logging_handlers = handlers ret.logging.info("Using cached resources for GCP") else: ret = cast(GCPResources, GCPResources.initialize(config)) ret.logging_handlers = handlers - ret.logging.info("No cached resources for GCP found, using user configuration.") + ret.logging.info( + "No cached resources for GCP found, using user configuration." + ) return ret def update_cache(self, cache: Cache): @@ -172,8 +184,12 @@ def redis_host(self) -> str: @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Config": cached_config = cache.get_config("gcp") - credentials = cast(GCPCredentials, GCPCredentials.deserialize(config, cache, handlers)) - resources = cast(GCPResources, GCPResources.deserialize(config, cache, handlers)) + credentials = cast( + GCPCredentials, GCPCredentials.deserialize(config, cache, handlers) + ) + resources = cast( + GCPResources, GCPResources.deserialize(config, cache, handlers) + ) config_obj = GCPConfig(credentials, resources) config_obj.logging_handlers = handlers if cached_config: @@ -186,7 +202,8 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Confi if "project_name" not in config or not config["project_name"]: if "GCP_PROJECT_NAME" in os.environ: GCPConfig.initialize( - config_obj, {**config, "project_name": os.environ["GCP_PROJECT_NAME"]} + config_obj, + {**config, "project_name": os.environ["GCP_PROJECT_NAME"]}, ) else: raise RuntimeError( @@ -208,7 +225,10 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Confi old_value = getattr(config_obj, config_key) # ignore empty values - if getattr(config_obj, config_key) != config[config_key] and config[config_key]: + if ( + getattr(config_obj, config_key) != config[config_key] + and config[config_key] + ): config_obj.logging.info( f"Updating cached key {config_key} with {old_value} " f"to user-provided value {config[config_key]}." @@ -232,7 +252,7 @@ def serialize(self) -> dict: "region": self._region, "credentials": self._credentials.serialize(), "resources": self._resources.serialize(), - "redis_host": self._redis_host + "redis_host": self._redis_host, } return out diff --git a/sebs/gcp/function.py b/sebs/gcp/function.py index 317781cf..1a70fe4c 100644 --- a/sebs/gcp/function.py +++ b/sebs/gcp/function.py @@ -47,7 +47,9 @@ def deserialize(cached_config: dict) -> "GCPFunction": for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": FunctionLibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), + {"Library": FunctionLibraryTrigger, "HTTP": HTTPTrigger}.get( + trigger["type"] + ), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 27640e00..76a76dca 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -78,8 +78,7 @@ def workflow_type() -> "Type[Workflow]": """ def initialize(self, config: Dict[str, str] = {}): - self.function_client = build( - "cloudfunctions", "v1", cache_discovery=False) + self.function_client = build("cloudfunctions", "v1", cache_discovery=False) self.workflow_client = build("workflows", "v1", cache_discovery=False) self.get_storage() @@ -105,7 +104,8 @@ def get_storage( ) -> PersistentStorage: if not self.storage: self.storage = GCPStorage( - self.config.region, self.cache_client, replace_existing) + self.config.region, self.cache_client, replace_existing + ) self.storage.logging_handlers = self.logging_handlers else: self.storage.replace_existing = replace_existing @@ -143,7 +143,9 @@ def format_function_name(func_name: str) -> str: :return: path to packaged code and its size """ - def package_code(self, code_package: CodePackage, directory: str, is_workflow: bool) -> Tuple[str, int]: + def package_code( + self, code_package: CodePackage, directory: str, is_workflow: bool + ) -> Tuple[str, int]: CONFIG_FILES = { "python": ["handler.py", ".python_packages"], "nodejs": ["handler.js", "node_modules"], @@ -167,7 +169,9 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b new_path = os.path.join(directory, new_name) shutil.move(old_path, new_path) - replace_string_in_file(new_path, "{{REDIS_HOST}}", f"\"{self.config.redis_host}\"") + replace_string_in_file( + new_path, "{{REDIS_HOST}}", f'"{self.config.redis_host}"' + ) """ zip the whole directroy (the zip-file gets uploaded to gcp later) @@ -193,7 +197,9 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b return os.path.join(directory, "{}.zip".format(code_package.name)), bytes_size - def create_function(self, code_package: CodePackage, func_name: str) -> "GCPFunction": + def create_function( + self, code_package: CodePackage, func_name: str + ) -> "GCPFunction": package = code_package.code_location benchmark = code_package.name @@ -209,12 +215,16 @@ def create_function(self, code_package: CodePackage, func_name: str) -> "GCPFunc code_bucket, idx = storage_client.add_input_bucket(benchmark) storage_client.upload(code_bucket, package, code_package_name) self.logging.info( - "Uploading function {} code to {}".format(func_name, code_bucket)) + "Uploading function {} code to {}".format(func_name, code_bucket) + ) - full_func_name = GCP.get_full_function_name( - project_name, location, func_name) - get_req = self.function_client.projects( - ).locations().functions().get(name=full_func_name) + full_func_name = GCP.get_full_function_name(project_name, location, func_name) + get_req = ( + self.function_client.projects() + .locations() + .functions() + .get(name=full_func_name) + ) try: get_req.execute() except HttpError: @@ -227,12 +237,16 @@ def create_function(self, code_package: CodePackage, func_name: str) -> "GCPFunc body={ "name": full_func_name, "entryPoint": "handler", - "runtime": code_package.language_name + language_runtime.replace(".", ""), + "runtime": code_package.language_name + + language_runtime.replace(".", ""), "availableMemoryMb": memory, "timeout": str(timeout) + "s", "httpsTrigger": {}, "ingressSettings": "ALLOW_ALL", - "sourceArchiveUrl": "gs://" + code_bucket + "/" + code_package_name, + "sourceArchiveUrl": "gs://" + + code_bucket + + "/" + + code_package_name, }, ) ) @@ -247,8 +261,10 @@ def create_function(self, code_package: CodePackage, func_name: str) -> "GCPFunc body={ "policy": { "bindings": [ - {"role": "roles/cloudfunctions.invoker", - "members": ["allUsers"]} + { + "role": "roles/cloudfunctions.invoker", + "members": ["allUsers"], + } ] } }, @@ -256,7 +272,8 @@ def create_function(self, code_package: CodePackage, func_name: str) -> "GCPFunc ) allow_unauthenticated_req.execute() self.logging.info( - f"Function {func_name} accepts now unauthenticated invocations!") + f"Function {func_name} accepts now unauthenticated invocations!" + ) function = GCPFunction( func_name, benchmark, code_package.hash, timeout, memory, code_bucket @@ -264,7 +281,8 @@ def create_function(self, code_package: CodePackage, func_name: str) -> "GCPFunc else: # if result is not empty, then function does exists self.logging.info( - "Function {} exists on GCP, update the instance.".format(func_name)) + "Function {} exists on GCP, update the instance.".format(func_name) + ) function = GCPFunction( name=func_name, @@ -285,8 +303,9 @@ def create_function(self, code_package: CodePackage, func_name: str) -> "GCPFunc return function - def create_function_trigger(self, function: Function, - trigger_type: Trigger.TriggerType) -> Trigger: + def create_function_trigger( + self, function: Function, trigger_type: Trigger.TriggerType + ) -> Trigger: from sebs.gcp.triggers import HTTPTrigger if trigger_type == Trigger.TriggerType.HTTP: @@ -294,11 +313,14 @@ def create_function_trigger(self, function: Function, location = self.config.region project_name = self.config.project_name full_func_name = GCP.get_full_function_name( - project_name, location, function.name) - self.logging.info( - f"Function {function.name} - waiting for deployment...") + project_name, location, function.name + ) + self.logging.info(f"Function {function.name} - waiting for deployment...") our_function_req = ( - self.function_client.projects().locations().functions().get(name=full_func_name) + self.function_client.projects() + .locations() + .functions() + .get(name=full_func_name) ) deployed = False @@ -339,8 +361,7 @@ def update_function(self, function: Function, code_package: CodePackage): bucket = function.code_bucket(code_package.name, storage) storage.upload(bucket, code_package.code_location, code_package_name) - self.logging.info( - f"Uploaded new code package to {bucket}/{code_package_name}") + self.logging.info(f"Uploaded new code package to {bucket}/{code_package_name}") full_func_name = GCP.get_full_function_name( self.config.project_name, self.config.region, function.name ) @@ -353,7 +374,8 @@ def update_function(self, function: Function, code_package: CodePackage): body={ "name": full_func_name, "entryPoint": "handler", - "runtime": code_package.language_name + language_runtime.replace(".", ""), + "runtime": code_package.language_name + + language_runtime.replace(".", ""), "availableMemoryMb": function.memory, "timeout": str(function.timeout) + "s", "httpsTrigger": {}, @@ -374,7 +396,9 @@ def update_function(self, function: Function, code_package: CodePackage): def get_full_function_name(project_name: str, location: str, func_name: str): return f"projects/{project_name}/locations/{location}/functions/{func_name}" - def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "GCPWorkflow": + def create_workflow( + self, code_package: CodePackage, workflow_name: str + ) -> "GCPWorkflow": benchmark = code_package.name timeout = code_package.config.timeout memory = code_package.config.memory @@ -383,20 +407,20 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "GCP project_name = self.config.project_name # Make sure we have a valid workflow benchmark - definition_path = os.path.join( - code_package.path, "definition.json") + definition_path = os.path.join(code_package.path, "definition.json") if not os.path.exists(definition_path): - raise ValueError( - f"No workflow definition found for {workflow_name}") + raise ValueError(f"No workflow definition found for {workflow_name}") # First we create a function for each code file - prefix = workflow_name+"___" + prefix = workflow_name + "___" code_files = list(code_package.get_code_files(include_config=False)) func_names = [os.path.splitext(os.path.basename(p))[0] for p in code_files] - funcs = [self.create_function(code_package, prefix+fn) for fn in func_names] + funcs = [self.create_function(code_package, prefix + fn) for fn in func_names] # generate workflow definition.json - urls = [self.create_function_trigger(f, Trigger.TriggerType.HTTP).url for f in funcs] + urls = [ + self.create_function_trigger(f, Trigger.TriggerType.HTTP).url for f in funcs + ] func_triggers = {n: u for (n, u) in zip(func_names, urls)} gen = GCPGenerator(workflow_name, func_triggers) @@ -407,7 +431,8 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "GCP parent = GCP.get_location(project_name, location) for map_id, map_def in gen.generate_maps(): full_workflow_name = GCP.get_full_workflow_name( - project_name, location, map_id) + project_name, location, map_id + ) create_req = ( self.workflow_client.projects() .locations() @@ -418,16 +443,21 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "GCP body={ "name": full_workflow_name, "sourceContents": map_def, - } + }, ) ) ret = create_req.execute() self.logging.info(f"Map workflow {map_id} has been created!") full_workflow_name = GCP.get_full_workflow_name( - project_name, location, workflow_name) - get_req = self.workflow_client.projects().locations( - ).workflows().get(name=full_workflow_name) + project_name, location, workflow_name + ) + get_req = ( + self.workflow_client.projects() + .locations() + .workflows() + .get(name=full_workflow_name) + ) try: get_req.execute() @@ -442,19 +472,26 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "GCP body={ "name": full_workflow_name, "sourceContents": definition, - } + }, ) ) ret = create_req.execute() self.logging.info(f"Workflow {workflow_name} has been created!") workflow = GCPWorkflow( - workflow_name, funcs, benchmark, code_package.hash, timeout, memory, code_bucket + workflow_name, + funcs, + benchmark, + code_package.hash, + timeout, + memory, + code_bucket, ) else: # if result is not empty, then function does exists self.logging.info( - "Workflow {} exists on GCP, update the instance.".format(workflow_name)) + "Workflow {} exists on GCP, update the instance.".format(workflow_name) + ) workflow = GCPWorkflow( name=workflow_name, @@ -476,13 +513,13 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "GCP return workflow - def create_workflow_trigger(self, workflow: Workflow, - trigger_type: Trigger.TriggerType) -> Trigger: + def create_workflow_trigger( + self, workflow: Workflow, trigger_type: Trigger.TriggerType + ) -> Trigger: from sebs.gcp.triggers import WorkflowLibraryTrigger if trigger_type == Trigger.TriggerType.HTTP: - raise NotImplementedError( - 'Cannot create http triggers for workflows.') + raise NotImplementedError("Cannot create http triggers for workflows.") else: trigger = WorkflowLibraryTrigger(workflow.name, self) @@ -495,20 +532,20 @@ def update_workflow(self, workflow: Workflow, code_package: CodePackage): workflow = cast(GCPWorkflow, workflow) # Make sure we have a valid workflow benchmark - definition_path = os.path.join( - code_package.path, "definition.json") + definition_path = os.path.join(code_package.path, "definition.json") if not os.path.exists(definition_path): - raise ValueError( - f"No workflow definition found for {workflow.name}") + raise ValueError(f"No workflow definition found for {workflow.name}") # First we create a function for each code file - prefix = workflow.name+"___" + prefix = workflow.name + "___" code_files = list(code_package.get_code_files(include_config=False)) func_names = [os.path.splitext(os.path.basename(p))[0] for p in code_files] - funcs = [self.create_function(code_package, prefix+fn) for fn in func_names] + funcs = [self.create_function(code_package, prefix + fn) for fn in func_names] # Generate workflow definition.json - urls = [self.create_function_trigger(f, Trigger.TriggerType.HTTP).url for f in funcs] + urls = [ + self.create_function_trigger(f, Trigger.TriggerType.HTTP).url for f in funcs + ] func_triggers = {n: u for (n, u) in zip(func_names, urls)} gen = GCPGenerator(workflow.name, func_triggers) gen.parse(definition_path) @@ -527,7 +564,7 @@ def update_workflow(self, workflow: Workflow, code_package: CodePackage): body={ "name": full_workflow_name, "sourceContents": map_def, - } + }, ) ) ret = patch_req.execute() @@ -542,10 +579,7 @@ def update_workflow(self, workflow: Workflow, code_package: CodePackage): .workflows() .patch( name=full_workflow_name, - body={ - "name": full_workflow_name, - "sourceContents": definition - }, + body={"name": full_workflow_name, "sourceContents": definition}, ) ) req.execute() @@ -564,7 +598,12 @@ def shutdown(self) -> None: super().shutdown() def download_metrics( - self, function_name: str, start_time: int, end_time: int, requests: dict, metrics: dict + self, + function_name: str, + start_time: int, + end_time: int, + requests: dict, + metrics: dict, ): from google.api_core import exceptions @@ -577,8 +616,7 @@ def wrapper(gen): except StopIteration: break except exceptions.ResourceExhausted: - self.logging.info( - "Google Cloud resources exhausted, sleeping 30s") + self.logging.info("Google Cloud resources exhausted, sleeping 30s") sleep(30) """ @@ -591,7 +629,8 @@ def wrapper(gen): logging_client = gcp_logging.Client() logger = logging_client.logger( - "cloudfunctions.googleapis.com%2Fcloud-functions") + "cloudfunctions.googleapis.com%2Fcloud-functions" + ) """ GCP accepts only single date format: 'YYYY-MM-DDTHH:MM:SSZ'. @@ -633,8 +672,9 @@ def wrapper(gen): assert regex_result exec_time = regex_result.group().split()[0] # convert into microseconds - requests[execution_id].provider_times.execution = int( - exec_time) * 1000 + requests[execution_id].provider_times.execution = ( + int(exec_time) * 1000 + ) invocations_processed += 1 self.logging.info( f"GCP: Received {entries} entries, found time metrics for {invocations_processed} " @@ -648,8 +688,7 @@ def wrapper(gen): """ # Set expected metrics here - available_metrics = ["execution_times", - "user_memory_bytes", "network_egress"] + available_metrics = ["execution_times", "user_memory_bytes", "network_egress"] client = monitoring_v3.MetricServiceClient() project_name = client.common_project_path(self.config.project_name) @@ -671,7 +710,8 @@ def wrapper(gen): list_request = monitoring_v3.ListTimeSeriesRequest( name=project_name, filter='metric.type = "cloudfunctions.googleapis.com/function/{}"'.format( - metric), + metric + ), interval=interval, ) @@ -699,8 +739,9 @@ def _enforce_cold_start(self, function: Function): .patch( name=name, updateMask="environmentVariables", - body={"environmentVariables": { - "cold_start": str(self.cold_start_counter)}}, + body={ + "environmentVariables": {"cold_start": str(self.cold_start_counter)} + }, ) ) res = req.execute() @@ -723,8 +764,7 @@ def enforce_cold_start(self, functions: List[Function], code_package: CodePackag if not self.is_deployed(func.name, versionId): undeployed_functions.append((versionId, func)) deployed = len(new_versions) - len(undeployed_functions) - self.logging.info( - f"Redeployed {deployed} out of {len(new_versions)}") + self.logging.info(f"Redeployed {deployed} out of {len(new_versions)}") if deployed == len(new_versions): deployment_done = True break @@ -734,7 +774,9 @@ def enforce_cold_start(self, functions: List[Function], code_package: CodePackag self.cold_start_counter += 1 - def get_functions(self, code_package: CodePackage, function_names: List[str]) -> List["Function"]: + def get_functions( + self, code_package: CodePackage, function_names: List[str] + ) -> List["Function"]: functions: List["Function"] = [] undeployed_functions_before = [] @@ -750,10 +792,10 @@ def get_functions(self, code_package: CodePackage, function_names: List[str]) -> for func in undeployed_functions_before: if not self.is_deployed(func.name): undeployed_functions.append(func) - deployed = len(undeployed_functions_before) - \ - len(undeployed_functions) + deployed = len(undeployed_functions_before) - len(undeployed_functions) self.logging.info( - f"Deployed {deployed} out of {len(undeployed_functions_before)}") + f"Deployed {deployed} out of {len(undeployed_functions_before)}" + ) if deployed == len(undeployed_functions_before): deployment_done = True break @@ -766,7 +808,8 @@ def get_functions(self, code_package: CodePackage, function_names: List[str]) -> def is_deployed(self, func_name: str, versionId: int = -1) -> bool: name = GCP.get_full_function_name( - self.config.project_name, self.config.region, func_name) + self.config.project_name, self.config.region, func_name + ) function_client = self.get_function_client() status_req = function_client.projects().locations().functions().get(name=name) status_res = status_req.execute() @@ -777,7 +820,8 @@ def is_deployed(self, func_name: str, versionId: int = -1) -> bool: def deployment_version(self, func: Function) -> int: name = GCP.get_full_function_name( - self.config.project_name, self.config.region, func.name) + self.config.project_name, self.config.region, func.name + ) function_client = self.get_function_client() status_req = function_client.projects().locations().functions().get(name=name) status_res = status_req.execute() @@ -813,8 +857,7 @@ def helper_zip(base_directory: str, path: str, archive: zipfile.ZipFile): GCP.helper_zip(base_directory, directory, archive) else: if directory != archive.filename: # prevent form including itself - archive.write(directory, os.path.relpath( - directory, base_directory)) + archive.write(directory, os.path.relpath(directory, base_directory)) """ https://gist.github.com/felixSchl/d38b455df8bf83a78d3d @@ -829,8 +872,7 @@ def helper_zip(base_directory: str, path: str, archive: zipfile.ZipFile): @staticmethod def recursive_zip(directory: str, archname: str): - archive = zipfile.ZipFile( - archname, "w", zipfile.ZIP_DEFLATED, compresslevel=9) + archive = zipfile.ZipFile(archname, "w", zipfile.ZIP_DEFLATED, compresslevel=9) if os.path.isdir(directory): GCP.helper_zip(directory, directory, archive) else: diff --git a/sebs/gcp/generator.py b/sebs/gcp/generator.py index 83ef9f14..0dde4a6a 100644 --- a/sebs/gcp/generator.py +++ b/sebs/gcp/generator.py @@ -5,7 +5,6 @@ class GCPGenerator(Generator): - def __init__(self, workflow_name: str, func_triggers: Dict[str, str]): super().__init__() self._workflow_name = workflow_name @@ -13,22 +12,9 @@ def __init__(self, workflow_name: str, func_triggers: Dict[str, str]): self._map_funcs = dict() def postprocess(self, states: List[State], payloads: List[dict]) -> dict: - payloads.append({ - "final": { - "return": [ - "${res}" - ] - } - }) + payloads.append({"final": {"return": ["${res}"]}}) - definition = { - "main" : { - "params": [ - "res" - ], - "steps": payloads - } - } + definition = {"main": {"params": ["res"], "steps": payloads}} return definition @@ -39,38 +25,24 @@ def encode_task(self, state: Task) -> Union[dict, List[dict]]: { state.name: { "call": "http.post", - "args": { - "url": url, - "body": "${res}" - }, - "result": "res" + "args": {"url": url, "body": "${res}"}, + "result": "res", } }, - { - "assign_res_"+state.name: { - "assign": [ - { - "res": "${res.body}" - } - ] - } - } + {"assign_res_" + state.name: {"assign": [{"res": "${res.body}"}]}}, ] def encode_switch(self, state: Switch) -> Union[dict, List[dict]]: return { state.name: { "switch": [self._encode_case(c) for c in state.cases], - "next": state.default + "next": state.default, } } def _encode_case(self, case: Switch.Case) -> dict: cond = "res." + case.var + " " + case.op + " " + str(case.val) - return { - "condition": "${"+cond+"}", - "next": case.next - } + return {"condition": "${" + cond + "}", "next": case.next} def encode_map(self, state: Map) -> Union[dict, List[dict]]: id = self._workflow_name + "_" + "map" + str(uuid.uuid4())[0:8] @@ -79,37 +51,30 @@ def encode_map(self, state: Map) -> Union[dict, List[dict]]: return { state.name: { "call": "experimental.executions.map", - "args": { - "workflow_id": id, - "arguments": "${res." + state.array + "}" - }, - "result": "res" + "args": {"workflow_id": id, "arguments": "${res." + state.array + "}"}, + "result": "res", } } - def generate_maps(self): for workflow_id, url in self._map_funcs.items(): - yield (workflow_id, self._export_func({ - "main" : { - "params": ["elem"], - "steps": [ - { - "map": { - "call": "http.post", - "args": { - "url": url, - "body": "${elem}" + yield ( + workflow_id, + self._export_func( + { + "main": { + "params": ["elem"], + "steps": [ + { + "map": { + "call": "http.post", + "args": {"url": url, "body": "${elem}"}, + "result": "elem", + } }, - "result": "elem" - } - }, - { - "ret": { - "return": "${elem.body}" - } + {"ret": {"return": "${elem.body}"}}, + ], } - ] - } - })) - + } + ), + ) diff --git a/sebs/gcp/storage.py b/sebs/gcp/storage.py index 8202cd0e..9b8503da 100644 --- a/sebs/gcp/storage.py +++ b/sebs/gcp/storage.py @@ -48,7 +48,9 @@ def _create_bucket(self, name, buckets: List[str] = []): logging.info("Created bucket {}".format(bucket_name)) return bucket_name else: - logging.info("Bucket {} for {} already exists, skipping.".format(bucket_name, name)) + logging.info( + "Bucket {} for {} already exists, skipping.".format(bucket_name, name) + ) return bucket_name def download(self, bucket_name: str, key: str, filepath: str) -> None: @@ -61,7 +63,9 @@ def upload(self, bucket_name: str, filepath: str, key: str): logging.info("Upload {} to {}".format(filepath, bucket_name)) bucket_instance = self.client.bucket(bucket_name) blob = bucket_instance.blob(key, chunk_size=4 * 1024 * 1024) - gcp_storage.blob._MAX_MULTIPART_SIZE = 5 * 1024 * 1024 # workaround for connection timeout + gcp_storage.blob._MAX_MULTIPART_SIZE = ( + 5 * 1024 * 1024 + ) # workaround for connection timeout blob.upload_from_filename(filepath) def list_bucket(self, bucket_name: str) -> List[str]: @@ -96,7 +100,9 @@ def uploader_func(self, bucket_idx: int, key: str, filepath: str) -> None: if not self.replace_existing: for blob in self.input_buckets_files[bucket_idx]: if key == blob: - logging.info("Skipping upload of {} to {}".format(filepath, bucket_name)) + logging.info( + "Skipping upload of {} to {}".format(filepath, bucket_name) + ) return bucket_name = self.input_buckets[bucket_idx] self.upload(bucket_name, filepath, key) diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 88dc9e12..fe000b8e 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -61,7 +61,8 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # GCP's fixed style for a function name config = self.deployment_client.config full_func_name = ( - f"projects/{config.project_name}/locations/" f"{config.region}/functions/{self.name}" + f"projects/{config.project_name}/locations/" + f"{config.region}/functions/{self.name}" ) function_client = self.deployment_client.get_function_client() req = ( @@ -103,14 +104,16 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # GCP's fixed style for a function name config = self.deployment_client.config full_workflow_name = GCP.get_full_workflow_name( - config.project_name, config.region, self.name) + config.project_name, config.region, self.name + ) execution_client = ExecutionsClient() execution = Execution(argument=json.dumps(payload)) begin = datetime.datetime.now() res = execution_client.create_execution( - parent=full_workflow_name, execution=execution) + parent=full_workflow_name, execution=execution + ) end = datetime.datetime.now() gcp_result = ExecutionResult.from_times(begin, end) @@ -118,9 +121,8 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Wait for execution to finish, then print results. execution_finished = False backoff_delay = 1 # Start wait with delay of 1 second - while (not execution_finished): - execution = execution_client.get_execution( - request={"name": res.name}) + while not execution_finished: + execution = execution_client.get_execution(request={"name": res.name}) execution_finished = execution.state != Execution.State.ACTIVE # If we haven't seen the result yet, wait a second. diff --git a/sebs/gcp/workflow.py b/sebs/gcp/workflow.py index f1846bc4..d598a16e 100644 --- a/sebs/gcp/workflow.py +++ b/sebs/gcp/workflow.py @@ -53,7 +53,9 @@ def deserialize(cached_config: dict) -> "GCPWorkflow": for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": WorkflowLibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), + {"Library": WorkflowLibraryTrigger, "HTTP": HTTPTrigger}.get( + trigger["type"] + ), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) diff --git a/sebs/local/config.py b/sebs/local/config.py index 3c5e18ec..9a33c93a 100644 --- a/sebs/local/config.py +++ b/sebs/local/config.py @@ -8,7 +8,9 @@ def serialize(self) -> dict: return {} @staticmethod - def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: + def deserialize( + config: dict, cache: Cache, handlers: LoggingHandlers + ) -> Credentials: return LocalCredentials() diff --git a/sebs/local/deployment.py b/sebs/local/deployment.py index d3f0e4b7..66a1d50c 100644 --- a/sebs/local/deployment.py +++ b/sebs/local/deployment.py @@ -26,7 +26,11 @@ def serialize(self, path: str): with open(path, "w") as out: out.write( serialize( - {"functions": self._functions, "storage": self._storage, "inputs": self._inputs} + { + "functions": self._functions, + "storage": self._storage, + "inputs": self._inputs, + } ) ) diff --git a/sebs/local/function.py b/sebs/local/function.py index 1516390b..397efabb 100644 --- a/sebs/local/function.py +++ b/sebs/local/function.py @@ -37,7 +37,12 @@ def deserialize(cls, obj: dict) -> Trigger: class LocalFunction(Function): def __init__( - self, docker_container, port: int, name: str, benchmark: str, code_package_hash: str + self, + docker_container, + port: int, + name: str, + benchmark: str, + code_package_hash: str, ): super().__init__(benchmark, name, code_package_hash) self._instance = docker_container diff --git a/sebs/local/local.py b/sebs/local/local.py index e8b06c5c..5f38ffc0 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -119,7 +119,9 @@ def shutdown(self): benchmark: benchmark name """ - def package_code(self, code_package: CodePackage, directory: str, is_workflow: bool) -> Tuple[str, int]: + def package_code( + self, code_package: CodePackage, directory: str, is_workflow: bool + ) -> Tuple[str, int]: CONFIG_FILES = { "python": ["handler.py", "requirements.txt", ".python_packages"], @@ -140,10 +142,13 @@ def package_code(self, code_package: CodePackage, directory: str, is_workflow: b return directory, bytes_size - def create_function(self, code_package: CodePackage, func_name: str) -> "LocalFunction": + def create_function( + self, code_package: CodePackage, func_name: str + ) -> "LocalFunction": home_dir = os.path.join( - "/home", self._system_config.username(self.name(), code_package.language_name) + "/home", + self._system_config.username(self.name(), code_package.language_name), ) container_name = "{}:run.local.{}.{}".format( self._system_config.docker_repository(), @@ -161,7 +166,10 @@ def create_function(self, code_package: CodePackage, func_name: str) -> "LocalFu image=container_name, command=f"python3 server.py {self.DEFAULT_PORT}", volumes={ - code_package.code_location: {"bind": os.path.join(home_dir, "code"), "mode": "ro"} + code_package.code_location: { + "bind": os.path.join(home_dir, "code"), + "mode": "ro", + } }, environment=environment, # FIXME: make CPUs configurable @@ -180,7 +188,11 @@ def create_function(self, code_package: CodePackage, func_name: str) -> "LocalFu # tty=True, ) func = LocalFunction( - container, self.DEFAULT_PORT, func_name, code_package.name, code_package.hash + container, + self.DEFAULT_PORT, + func_name, + code_package.name, + code_package.hash, ) self.logging.info( f"Started {func_name} function at container {container.id} , running on {func._url}" @@ -199,7 +211,9 @@ def update_function(self, function: Function, code_package: CodePackage): There's only one trigger - HTTP. """ - def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: + def create_trigger( + self, func: Function, trigger_type: Trigger.TriggerType + ) -> Trigger: from sebs.local.function import HTTPTrigger function = cast(LocalFunction, func) diff --git a/sebs/local/storage.py b/sebs/local/storage.py index c34f4c0d..2caaab3c 100644 --- a/sebs/local/storage.py +++ b/sebs/local/storage.py @@ -23,7 +23,9 @@ def deployment_name(): # the location does not matter MINIO_REGION = "us-east-1" - def __init__(self, docker_client: docker.client, cache_client: Cache, replace_existing: bool): + def __init__( + self, docker_client: docker.client, cache_client: Cache, replace_existing: bool + ): super().__init__(self.MINIO_REGION, cache_client, replace_existing) self._docker_client = docker_client self._port = 9000 @@ -54,7 +56,9 @@ def start(self): self.logging.error("Starting Minio storage failed! Reason: {}".format(e)) raise RuntimeError("Starting Minio storage unsuccesful") except Exception as e: - self.logging.error("Starting Minio storage failed! Unknown error: {}".format(e)) + self.logging.error( + "Starting Minio storage failed! Unknown error: {}".format(e) + ) raise RuntimeError("Starting Minio storage unsuccesful") def configure_connection(self): @@ -83,14 +87,19 @@ def stop(self): def get_connection(self): return minio.Minio( - self._url, access_key=self._access_key, secret_key=self._secret_key, secure=False + self._url, + access_key=self._access_key, + secret_key=self._secret_key, + secure=False, ) def _create_bucket(self, name: str, buckets: List[str] = []): for bucket_name in buckets: if name in bucket_name: self.logging.info( - "Bucket {} for {} already exists, skipping.".format(bucket_name, name) + "Bucket {} for {} already exists, skipping.".format( + bucket_name, name + ) ) return bucket_name # minio has limit of bucket name to 16 characters @@ -137,7 +146,9 @@ def clean_bucket(self, bucket: str): ) errors = self.connection.remove_objects(bucket, delete_object_list) for error in errors: - self.logging.error("Error when deleting object from bucket {}: {}!", bucket, error) + self.logging.error( + "Error when deleting object from bucket {}: {}!", bucket, error + ) def correct_name(self, name: str) -> str: return name diff --git a/sebs/regression.py b/sebs/regression.py index 9c7e5bf8..e75b002e 100644 --- a/sebs/regression.py +++ b/sebs/regression.py @@ -40,7 +40,9 @@ def test(self): f"Begin regression test of {benchmark_name} on {deployment_client.name()}, " f"region: {deployment_client.config.region}." ) - experiment_config = self.client.get_experiment_config(self.experiment_config) + experiment_config = self.client.get_experiment_config( + self.experiment_config + ) benchmark = self.client.get_benchmark( benchmark_name, deployment_client, experiment_config ) @@ -72,7 +74,9 @@ def test(self): failure = True print(f"{benchmark_name} fail on trigger: {trigger_type}") else: - print(f"{benchmark_name} success on trigger: {trigger_type}") + print( + f"{benchmark_name} success on trigger: {trigger_type}" + ) except RuntimeError: failure = True print(f"{benchmark_name} fail on trigger: {trigger_type}") @@ -162,7 +166,9 @@ def __init__(self): # no way to directly access test instance from here def status(self, *args, **kwargs): - self.all_correct = self.all_correct and (kwargs["test_status"] in ["inprogress", "success"]) + self.all_correct = self.all_correct and ( + kwargs["test_status"] in ["inprogress", "success"] + ) test_name = kwargs["test_id"].split("_")[-1] if not kwargs["test_status"]: test_id = kwargs["test_id"] @@ -172,7 +178,11 @@ def status(self, *args, **kwargs): elif kwargs["test_status"] == "fail": print("\n-------------\n") print("{0[test_id]}: {0[test_status]}".format(kwargs)) - print("{0[test_id]}: {1}".format(kwargs, self.output[kwargs["test_id"]].decode())) + print( + "{0[test_id]}: {1}".format( + kwargs, self.output[kwargs["test_id"]].decode() + ) + ) print("\n-------------\n") self.failures.add(test_name) elif kwargs["test_status"] == "success": @@ -194,7 +204,9 @@ def regression_suite( suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(AWSTestSequence)) if "azure" in providers: assert "azure" in cloud_config - suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(AzureTestSequence)) + suite.addTest( + unittest.defaultTestLoader.loadTestsFromTestCase(AzureTestSequence) + ) if "gcp" in providers: assert "gcp" in cloud_config suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(GCPTestSequence)) @@ -211,7 +223,9 @@ def regression_suite( print(f"Select test {test_name}") else: print(f"Skip test {test_name}") - concurrent_suite = testtools.ConcurrentStreamTestSuite(lambda: ((test, None) for test in tests)) + concurrent_suite = testtools.ConcurrentStreamTestSuite( + lambda: ((test, None) for test in tests) + ) result = TracingStreamResult() result.startTestRun() concurrent_suite.run(result) @@ -220,7 +234,9 @@ def regression_suite( for suc in result.success: print(f"- {suc}") if len(result.failures): - print(f"Failures when executing {len(result.failures)} out of {len(tests)} functions") + print( + f"Failures when executing {len(result.failures)} out of {len(tests)} functions" + ) for failure in result.failures: print(f"- {failure}") return not result.all_correct diff --git a/sebs/sebs.py b/sebs/sebs.py index e19ae9cd..37edcb09 100644 --- a/sebs/sebs.py +++ b/sebs/sebs.py @@ -35,7 +35,9 @@ def verbose(self) -> bool: def logging_filename(self) -> Optional[str]: return self._logging_filename - def generate_logging_handlers(self, logging_filename: Optional[str] = None) -> LoggingHandlers: + def generate_logging_handlers( + self, logging_filename: Optional[str] = None + ) -> LoggingHandlers: filename = logging_filename if logging_filename else self.logging_filename if filename in self._handlers: return self._handlers[filename] @@ -137,7 +139,9 @@ def get_experiment( } if experiment_type not in implementations: raise RuntimeError(f"Experiment {experiment_type} not supported!") - experiment = implementations[experiment_type](self.get_experiment_config(config)) + experiment = implementations[experiment_type]( + self.get_experiment_config(config) + ) experiment.logging_handlers = self.generate_logging_handlers( logging_filename=logging_filename ) diff --git a/sebs/utils.py b/sebs/utils.py index 16f6dea2..cf3b9cf5 100644 --- a/sebs/utils.py +++ b/sebs/utils.py @@ -98,20 +98,21 @@ def replace_string_in_file(path: str, from_str: str, to_str: str): def connect_to_redis_cache(host: str): - redis = Redis(host=host, - port=6379, - decode_responses=True, - socket_connect_timeout=10) + redis = Redis( + host=host, port=6379, decode_responses=True, socket_connect_timeout=10 + ) redis.ping() return redis -def download_measurements(redis: Redis, workflow_name: str, after: float, **static_args): +def download_measurements( + redis: Redis, workflow_name: str, after: float, **static_args +): payloads = [] for key in redis.scan_iter(match=f"{workflow_name}/*"): - assert key[:len(workflow_name)] == workflow_name + assert key[: len(workflow_name)] == workflow_name payload = redis.get(key) redis.delete(key) @@ -183,7 +184,9 @@ def find_package_code(benchmark: str, path: str): def global_logging(): logging_format = "%(asctime)s,%(msecs)d %(levelname)s %(name)s: %(message)s" logging_date_format = "%H:%M:%S" - logging.basicConfig(format=logging_format, datefmt=logging_date_format, level=logging.INFO) + logging.basicConfig( + format=logging_format, datefmt=logging_date_format, level=logging.INFO + ) class LoggingHandlers: @@ -191,7 +194,9 @@ def __init__(self, verbose: bool = False, filename: Optional[str] = None): logging_format = "%(asctime)s,%(msecs)d %(levelname)s %(name)s: %(message)s" logging_date_format = "%H:%M:%S" formatter = logging.Formatter(logging_format, logging_date_format) - self.handlers: List[Union[logging.FileHandler, logging.StreamHandler[TextIO]]] = [] + self.handlers: List[ + Union[logging.FileHandler, logging.StreamHandler[TextIO]] + ] = [] # Add stdout output if verbose: From 02cbce78fd49b215a8a3ed889a0092521a2a292a Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Tue, 26 Apr 2022 15:05:06 +0200 Subject: [PATCH 53/68] Linting 4 --- sebs/gcp/gcp.py | 6 +++--- sebs/local/local.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 76a76dca..587e03e7 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -446,7 +446,7 @@ def create_workflow( }, ) ) - ret = create_req.execute() + create_req.execute() self.logging.info(f"Map workflow {map_id} has been created!") full_workflow_name = GCP.get_full_workflow_name( @@ -475,7 +475,7 @@ def create_workflow( }, ) ) - ret = create_req.execute() + create_req.execute() self.logging.info(f"Workflow {workflow_name} has been created!") workflow = GCPWorkflow( @@ -567,7 +567,7 @@ def update_workflow(self, workflow: Workflow, code_package: CodePackage): }, ) ) - ret = patch_req.execute() + patch_req.execute() self.logging.info("Published new map workflow code.") full_workflow_name = GCP.get_full_workflow_name( diff --git a/sebs/local/local.py b/sebs/local/local.py index 5f38ffc0..6d452072 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -11,7 +11,7 @@ from sebs.local.config import LocalConfig from sebs.local.storage import Minio from sebs.local.function import LocalFunction -from sebs.faas.benchmark import Benchmark, Function, ExecutionResult, Trigger +from sebs.faas.benchmark import Benchmark, Function, Workflow, ExecutionResult, Trigger from sebs.faas.storage import PersistentStorage from sebs.faas.system import System from sebs.code_package import CodePackage From 50464034b24fa31f80ce996e03bc23ab49909c4f Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Tue, 26 Apr 2022 15:50:22 +0200 Subject: [PATCH 54/68] Linting 5 --- sebs/aws/aws.py | 54 ++++-------- sebs/aws/config.py | 52 +++--------- sebs/aws/function.py | 4 +- sebs/aws/s3.py | 12 +-- sebs/aws/triggers.py | 8 +- sebs/aws/workflow.py | 4 +- sebs/azure/azure.py | 69 +++++----------- sebs/azure/blob_storage.py | 8 +- sebs/azure/cli.py | 4 +- sebs/azure/config.py | 62 ++++---------- sebs/azure/triggers.py | 4 +- sebs/cache.py | 34 ++------ sebs/code_package.py | 44 +++------- sebs/config.py | 28 +++---- sebs/experiments/environment.py | 16 +--- sebs/experiments/eviction_model.py | 24 ++---- sebs/experiments/invocation_overhead.py | 62 ++++---------- sebs/experiments/network_ping_pong.py | 16 +--- sebs/experiments/perf_cost.py | 33 ++------ sebs/experiments/result.py | 8 +- sebs/faas/benchmark.py | 30 ++----- sebs/faas/config.py | 8 +- sebs/faas/fsm.py | 4 +- sebs/faas/storage.py | 20 ++--- sebs/faas/system.py | 30 ++----- sebs/gcp/config.py | 37 ++------- sebs/gcp/function.py | 4 +- sebs/gcp/gcp.py | 104 ++++++------------------ sebs/gcp/storage.py | 12 +-- sebs/gcp/triggers.py | 7 +- sebs/gcp/workflow.py | 4 +- sebs/local/config.py | 4 +- sebs/local/local.py | 8 +- sebs/local/storage.py | 16 +--- sebs/regression.py | 30 ++----- sebs/sebs.py | 8 +- sebs/utils.py | 16 +--- 37 files changed, 225 insertions(+), 663 deletions(-) diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index eab47b33..8774072b 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -154,12 +154,8 @@ def package_code( file = os.path.join(directory, file) shutil.move(file, function_dir) - handler_path = os.path.join( - directory, CONFIG_FILES[code_package.language_name][0] - ) - replace_string_in_file( - handler_path, "{{REDIS_HOST}}", f'"{self.config.redis_host}"' - ) + handler_path = os.path.join(directory, CONFIG_FILES[code_package.language_name][0]) + replace_string_in_file(handler_path, "{{REDIS_HOST}}", f'"{self.config.redis_host}"') # For python, add an __init__ file if code_package.language_name == "python": @@ -202,14 +198,10 @@ def wait_for_function(self, func_name: str): break if backoff_delay > 60: - self.logging.error( - f"Function {func_name} stuck in state {state} after 60s" - ) + self.logging.error(f"Function {func_name} stuck in state {state} after 60s") break - def create_function( - self, code_package: CodePackage, func_name: str - ) -> "LambdaFunction": + def create_function(self, code_package: CodePackage, func_name: str) -> "LambdaFunction": package = code_package.code_location benchmark = code_package.name language = code_package.language_name @@ -258,9 +250,7 @@ def create_function( code_package_name = cast(str, os.path.basename(package)) code_bucket, idx = storage_client.add_input_bucket(benchmark) storage_client.upload(code_bucket, package, code_package_name) - self.logging.info( - "Uploading function {} code to {}".format(func_name, code_bucket) - ) + self.logging.info("Uploading function {} code to {}".format(func_name, code_bucket)) code_config = {"S3Bucket": code_bucket, "S3Key": code_package_name} ret = self.lambda_client.create_function( FunctionName=func_name, @@ -326,9 +316,7 @@ def update_function(self, function: Function, code_package: CodePackage): # AWS Lambda limit on zip deployment if code_size < 50 * 1024 * 1024: with open(package, "rb") as code_body: - self.lambda_client.update_function_code( - FunctionName=name, ZipFile=code_body.read() - ) + self.lambda_client.update_function_code(FunctionName=name, ZipFile=code_body.read()) # Upload code package to S3, then update else: code_package_name = os.path.basename(package) @@ -348,9 +336,7 @@ def update_function(self, function: Function, code_package: CodePackage): ) self.logging.info("Published new function code") - def create_function_trigger( - self, func: Function, trigger_type: Trigger.TriggerType - ) -> Trigger: + def create_function_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: from sebs.aws.triggers import HTTPTrigger function = cast(LambdaFunction, func) @@ -379,9 +365,7 @@ def create_function_trigger( self.cache_client.update_benchmark(function) return trigger - def create_workflow( - self, code_package: CodePackage, workflow_name: str - ) -> "SFNWorkflow": + def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "SFNWorkflow": workflow_name = AWS.format_resource_name(workflow_name) @@ -394,8 +378,7 @@ def create_workflow( code_files = list(code_package.get_code_files(include_config=False)) func_names = [os.path.splitext(os.path.basename(p))[0] for p in code_files] funcs = [ - self.create_function(code_package, workflow_name + "___" + fn) - for fn in func_names + self.create_function(code_package, workflow_name + "___" + fn) for fn in func_names ] # Generate workflow definition.json @@ -414,9 +397,7 @@ def create_workflow( roleArn=self.config.resources.lambda_role(self.session), ) - self.logging.info( - "Creating workflow {} from {}".format(workflow_name, package) - ) + self.logging.info("Creating workflow {} from {}".format(workflow_name, package)) workflow = SFNWorkflow( workflow_name, @@ -429,15 +410,11 @@ def create_workflow( arn = re.search("'([^']*)'", str(e)).group()[1:-1] self.logging.info( - "Workflow {} exists on AWS, retrieve configuration.".format( - workflow_name - ) + "Workflow {} exists on AWS, retrieve configuration.".format(workflow_name) ) # Here we assume a single Lambda role - workflow = SFNWorkflow( - workflow_name, funcs, code_package.name, arn, code_package.hash - ) + workflow = SFNWorkflow(workflow_name, funcs, code_package.name, arn, code_package.hash) self.update_workflow(workflow, code_package) workflow.updated_code = True @@ -463,8 +440,7 @@ def update_workflow(self, workflow: Workflow, code_package: CodePackage): code_files = list(code_package.get_code_files(include_config=False)) func_names = [os.path.splitext(os.path.basename(p))[0] for p in code_files] funcs = [ - self.create_function(code_package, workflow.name + "___" + fn) - for fn in func_names + self.create_function(code_package, workflow.name + "___" + fn) for fn in func_names ] # Generate workflow definition.json @@ -565,9 +541,7 @@ def parse_aws_report( output.provider_times.execution = int(float(aws_vals["Duration"]) * 1000) output.stats.memory_used = float(aws_vals["Max Memory Used"]) if "Init Duration" in aws_vals: - output.provider_times.initialization = int( - float(aws_vals["Init Duration"]) * 1000 - ) + output.provider_times.initialization = int(float(aws_vals["Init Duration"]) * 1000) output.billing.billed_time = int(aws_vals["Billed Duration"]) output.billing.memory = int(aws_vals["Memory Size"]) output.billing.gb_seconds = output.billing.billed_time * output.billing.memory diff --git a/sebs/aws/config.py b/sebs/aws/config.py index 53c8bd67..c2be7f40 100644 --- a/sebs/aws/config.py +++ b/sebs/aws/config.py @@ -39,9 +39,7 @@ def initialize(dct: dict) -> Credentials: return AWSCredentials(dct["access_key"], dct["secret_key"]) @staticmethod - def deserialize( - config: dict, cache: Cache, handlers: LoggingHandlers - ) -> Credentials: + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: # FIXME: update return types of both functions to avoid cast # needs 3.7+ to support annotations @@ -49,17 +47,13 @@ def deserialize( ret: AWSCredentials # Load cached values if cached_config and "credentials" in cached_config: - ret = cast( - AWSCredentials, AWSCredentials.initialize(cached_config["credentials"]) - ) + ret = cast(AWSCredentials, AWSCredentials.initialize(cached_config["credentials"])) ret.logging_handlers = handlers ret.logging.info("Using cached credentials for AWS") else: # Check for new config if "credentials" in config: - ret = cast( - AWSCredentials, AWSCredentials.initialize(config["credentials"]) - ) + ret = cast(AWSCredentials, AWSCredentials.initialize(config["credentials"])) elif "AWS_ACCESS_KEY_ID" in os.environ: ret = AWSCredentials( os.environ["AWS_ACCESS_KEY_ID"], os.environ["AWS_SECRET_ACCESS_KEY"] @@ -75,12 +69,8 @@ def deserialize( return ret def update_cache(self, cache: Cache): - cache.update_config( - val=self.access_key, keys=["aws", "credentials", "access_key"] - ) - cache.update_config( - val=self.secret_key, keys=["aws", "credentials", "secret_key"] - ) + cache.update_config(val=self.access_key, keys=["aws", "credentials", "access_key"]) + cache.update_config(val=self.secret_key, keys=["aws", "credentials", "secret_key"]) def serialize(self) -> dict: out = {"access_key": self.access_key, "secret_key": self.secret_key} @@ -131,9 +121,7 @@ def lambda_role(self, boto3_session: boto3.session.Session) -> str: { "Sid": "", "Effect": "Allow", - "Principal": { - "Service": ["lambda.amazonaws.com", "states.amazonaws.com"] - }, + "Principal": {"Service": ["lambda.amazonaws.com", "states.amazonaws.com"]}, "Action": "sts:AssumeRole", } ], @@ -215,20 +203,14 @@ def initialize(dct: dict) -> Resources: def serialize(self) -> dict: out = { "lambda-role": self._lambda_role, - "http-apis": { - key: value.serialize() for (key, value) in self._http_apis.items() - }, + "http-apis": {key: value.serialize() for (key, value) in self._http_apis.items()}, } return out def update_cache(self, cache: Cache): - cache.update_config( - val=self._lambda_role, keys=["aws", "resources", "lambda-role"] - ) + cache.update_config(val=self._lambda_role, keys=["aws", "resources", "lambda-role"]) for name, api in self._http_apis.items(): - cache.update_config( - val=api.serialize(), keys=["aws", "resources", "http-apis", name] - ) + cache.update_config(val=api.serialize(), keys=["aws", "resources", "http-apis", name]) @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: @@ -237,9 +219,7 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour ret: AWSResources # Load cached values if cached_config and "resources" in cached_config: - ret = cast( - AWSResources, AWSResources.initialize(cached_config["resources"]) - ) + ret = cast(AWSResources, AWSResources.initialize(cached_config["resources"])) ret.logging_handlers = handlers ret.logging.info("Using cached resources for AWS") else: @@ -247,9 +227,7 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour if "resources" in config: ret = cast(AWSResources, AWSResources.initialize(config["resources"])) ret.logging_handlers = handlers - ret.logging.info( - "No cached resources for AWS found, using user configuration." - ) + ret.logging.info("No cached resources for AWS found, using user configuration.") else: ret = AWSResources(lambda_role="") ret.logging_handlers = handlers @@ -292,12 +270,8 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config cached_config = cache.get_config("aws") # FIXME: use future annotations (see sebs/faas/system) - credentials = cast( - AWSCredentials, AWSCredentials.deserialize(config, cache, handlers) - ) - resources = cast( - AWSResources, AWSResources.deserialize(config, cache, handlers) - ) + credentials = cast(AWSCredentials, AWSCredentials.deserialize(config, cache, handlers)) + resources = cast(AWSResources, AWSResources.deserialize(config, cache, handlers)) config_obj = AWSConfig(credentials, resources) config_obj.logging_handlers = handlers # Load cached values diff --git a/sebs/aws/function.py b/sebs/aws/function.py index a3d77d54..20816745 100644 --- a/sebs/aws/function.py +++ b/sebs/aws/function.py @@ -59,9 +59,7 @@ def deserialize(cached_config: dict) -> "LambdaFunction": for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": FunctionLibraryTrigger, "HTTP": HTTPTrigger}.get( - trigger["type"] - ), + {"Library": FunctionLibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) diff --git a/sebs/aws/s3.py b/sebs/aws/s3.py index 72560717..e47bd77f 100644 --- a/sebs/aws/s3.py +++ b/sebs/aws/s3.py @@ -49,9 +49,7 @@ def _create_bucket(self, name: str, buckets: List[str] = []): for bucket_name in buckets: if name in bucket_name: self.logging.info( - "Bucket {} for {} already exists, skipping.".format( - bucket_name, name - ) + "Bucket {} for {} already exists, skipping.".format(bucket_name, name) ) return bucket_name random_name = str(uuid.uuid4())[0:16] @@ -68,9 +66,7 @@ def _create_bucket(self, name: str, buckets: List[str] = []): self.client.create_bucket(Bucket=bucket_name) self.logging.info("Created bucket {}".format(bucket_name)) except self.client.exceptions.BucketAlreadyExists as e: - self.logging.error( - f"The bucket {bucket_name} exists already in region {self.region}!" - ) + self.logging.error(f"The bucket {bucket_name} exists already in region {self.region}!") raise e except self.client.exceptions.ClientError as e: self.logging.error( @@ -118,9 +114,7 @@ def list_bucket(self, bucket_name: str): def list_buckets(self, bucket_name: str) -> List[str]: s3_buckets = self.client.list_buckets()["Buckets"] - return [ - bucket["Name"] for bucket in s3_buckets if bucket_name in bucket["Name"] - ] + return [bucket["Name"] for bucket in s3_buckets if bucket_name in bucket["Name"]] def clean_bucket(self, bucket: str): objects = self.client.list_objects_v2(Bucket=bucket) diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index e368a641..0cadcd9b 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -50,9 +50,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: serialized_payload = json.dumps(payload).encode("utf-8") client = self.deployment_client.get_lambda_client() begin = datetime.datetime.now() - ret = client.invoke( - FunctionName=self.name, Payload=serialized_payload, LogType="Tail" - ) + ret = client.invoke(FunctionName=self.name, Payload=serialized_payload, LogType="Tail") end = datetime.datetime.now() aws_result = ExecutionResult.from_times(begin, end) @@ -106,9 +104,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: client = self.deployment_client.get_sfn_client() begin = datetime.datetime.now() - ret = client.start_execution( - stateMachineArn=self.name, input=json.dumps(payload) - ) + ret = client.start_execution(stateMachineArn=self.name, input=json.dumps(payload)) end = datetime.datetime.now() aws_result = ExecutionResult.from_times(begin, end) diff --git a/sebs/aws/workflow.py b/sebs/aws/workflow.py index 3489a469..ac48dc4e 100644 --- a/sebs/aws/workflow.py +++ b/sebs/aws/workflow.py @@ -45,9 +45,7 @@ def deserialize(cached_config: dict) -> "SFNWorkflow": for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": WorkflowLibraryTrigger, "HTTP": HTTPTrigger}.get( - trigger["type"] - ), + {"Library": WorkflowLibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index 4ac8fed3..7a24a95c 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -102,9 +102,7 @@ def get_storage(self, replace_existing: bool = False) -> PersistentStorage: self.storage = BlobStorage( self.config.region, self.cache_client, - self.config.resources.data_storage_account( - self.cli_instance - ).connection_string, + self.config.resources.data_storage_account(self.cli_instance).connection_string, replace_existing=replace_existing, ) self.storage.logging_handlers = self.logging_handlers @@ -204,12 +202,8 @@ def package_code( dst_json = os.path.join(os.path.dirname(dst_file), "function.json") json.dump(payload, open(dst_json, "w"), indent=2) - handler_path = os.path.join( - directory, WRAPPER_FILES[code_package.language_name][0] - ) - replace_string_in_file( - handler_path, "{{REDIS_HOST}}", f'"{self.config.redis_host}"' - ) + handler_path = os.path.join(directory, WRAPPER_FILES[code_package.language_name][0]) + replace_string_in_file(handler_path, "{{REDIS_HOST}}", f'"{self.config.redis_host}"') # copy every wrapper file to respective function dirs for wrapper_file in wrapper_files: @@ -262,9 +256,7 @@ def publish_benchmark( url = line.split("Invoke url:")[1].strip() break if url == "": - raise RuntimeError( - "Couldnt find URL in {}".format(ret.decode("utf-8")) - ) + raise RuntimeError("Couldnt find URL in {}".format(ret.decode("utf-8"))) success = True except RuntimeError as e: error = str(e) @@ -300,9 +292,7 @@ def update_benchmark(self, benchmark: Benchmark, code_package: CodePackage): self._mount_function_code(code_package) url = self.publish_benchmark(benchmark, code_package, True) - trigger = HTTPTrigger( - url, self.config.resources.data_storage_account(self.cli_instance) - ) + trigger = HTTPTrigger(url, self.config.resources.data_storage_account(self.cli_instance)) trigger.logging_handlers = self.logging_handlers benchmark.add_trigger(trigger) @@ -326,9 +316,7 @@ def default_benchmark_name(self, code_package: CodePackage) -> str: B = TypeVar("B", bound=FunctionApp) - def create_benchmark( - self, code_package: CodePackage, name: str, benchmark_cls: B - ) -> B: + def create_benchmark(self, code_package: CodePackage, name: str, benchmark_cls: B) -> B: language = code_package.language_name language_runtime = code_package.language_version resource_group = self.config.resources.resource_group(self.cli_instance) @@ -355,18 +343,14 @@ def create_benchmark( for setting in json.loads(ret.decode()): if setting["name"] == "AzureWebJobsStorage": connection_string = setting["value"] - elems = [ - z for y in connection_string.split(";") for z in y.split("=") - ] + elems = [z for y in connection_string.split(";") for z in y.split("=")] account_name = elems[elems.index("AccountName") + 1] function_storage_account = AzureResources.Storage.from_cache( account_name, connection_string ) self.logging.info("Azure: Selected {} function app".format(name)) except RuntimeError: - function_storage_account = self.config.resources.add_storage_account( - self.cli_instance - ) + function_storage_account = self.config.resources.add_storage_account(self.cli_instance) config["storage_account"] = function_storage_account.account_name # FIXME: only Linux type is supported while True: @@ -386,9 +370,7 @@ def create_benchmark( except RuntimeError as e: # Azure does not allow some concurrent operations if "another operation is in progress" in str(e): - self.logging.info( - f"Repeat {name} creation, another operation in progress" - ) + self.logging.info(f"Repeat {name} creation, another operation in progress") # Rethrow -> another error else: raise @@ -406,25 +388,19 @@ def create_benchmark( def cached_benchmark(self, benchmark: Benchmark): - data_storage_account = self.config.resources.data_storage_account( - self.cli_instance - ) + data_storage_account = self.config.resources.data_storage_account(self.cli_instance) for trigger in benchmark.triggers_all(): azure_trigger = cast(AzureTrigger, trigger) azure_trigger.logging_handlers = self.logging_handlers azure_trigger.data_storage_account = data_storage_account - def create_function( - self, code_package: CodePackage, func_name: str - ) -> AzureFunction: + def create_function(self, code_package: CodePackage, func_name: str) -> AzureFunction: return self.create_benchmark(code_package, func_name, AzureFunction) def update_function(self, function: Function, code_package: CodePackage): self.update_benchmark(function, code_package) - def create_workflow( - self, code_package: CodePackage, workflow_name: str - ) -> AzureWorkflow: + def create_workflow(self, code_package: CodePackage, workflow_name: str) -> AzureWorkflow: return self.create_benchmark(code_package, workflow_name, AzureWorkflow) def update_workflow(self, workflow: Workflow, code_package: CodePackage): @@ -454,13 +430,12 @@ def download_metrics( resource_group = self.config.resources.resource_group(self.cli_instance) # Avoid warnings in the next step ret = self.cli_instance.execute( - "az feature register --name AIWorkspacePreview " - "--namespace microsoft.insights" + "az feature register --name AIWorkspacePreview " "--namespace microsoft.insights" ) app_id_query = self.cli_instance.execute( - ( - "az monitor app-insights component show " "--app {} --resource-group {}" - ).format(function_name, resource_group) + ("az monitor app-insights component show " "--app {} --resource-group {}").format( + function_name, resource_group + ) ).decode("utf-8") application_id = json.loads(app_id_query)["appId"] @@ -471,9 +446,7 @@ def download_metrics( start_time_str = datetime.datetime.fromtimestamp(start_time).strftime( "%Y-%m-%d %H:%M:%S.%f" ) - end_time_str = datetime.datetime.fromtimestamp(end_time + 1).strftime( - "%Y-%m-%d %H:%M:%S" - ) + end_time_str = datetime.datetime.fromtimestamp(end_time + 1).strftime("%Y-%m-%d %H:%M:%S") from tzlocal import get_localzone timezone_str = datetime.datetime.now(get_localzone()).strftime("%z") @@ -512,18 +485,14 @@ def download_metrics( # duration = request[4] func_exec_time = request[-1] invocations_processed.add(invocation_id) - requests[invocation_id].provider_times.execution = int( - float(func_exec_time) * 1000 - ) + requests[invocation_id].provider_times.execution = int(float(func_exec_time) * 1000) self.logging.info( f"Azure: Found time metrics for {len(invocations_processed)} " f"out of {len(requests.keys())} invocations." ) if len(invocations_processed) < len(requests.keys()): time.sleep(5) - self.logging.info( - f"Missing the requests: {invocations_to_process - invocations_processed}" - ) + self.logging.info(f"Missing the requests: {invocations_to_process - invocations_processed}") # TODO: query performance counters for mem diff --git a/sebs/azure/blob_storage.py b/sebs/azure/blob_storage.py index e87d8d75..cad108a8 100644 --- a/sebs/azure/blob_storage.py +++ b/sebs/azure/blob_storage.py @@ -16,9 +16,7 @@ def typename() -> str: def deployment_name(): return "azure" - def __init__( - self, region: str, cache_client: Cache, conn_string: str, replace_existing: bool - ): + def __init__(self, region: str, cache_client: Cache, conn_string: str, replace_existing: bool): super().__init__(region, cache_client, replace_existing) self.client = BlobServiceClient.from_connection_string(conn_string) @@ -29,9 +27,7 @@ def __init__( def _create_bucket(self, name: str, containers: List[str] = []) -> str: for c in containers: if name in c: - self.logging.info( - "Container {} for {} already exists, skipping.".format(c, name) - ) + self.logging.info("Container {} for {} already exists, skipping.".format(c, name)) return c random_name = str(uuid.uuid4())[0:16] name = "{}-{}".format(name, random_name) diff --git a/sebs/azure/cli.py b/sebs/azure/cli.py index 9d15eeb0..f98226e4 100644 --- a/sebs/azure/cli.py +++ b/sebs/azure/cli.py @@ -17,9 +17,7 @@ def __init__(self, system_config: SeBSConfig, docker_client: docker.client): except docker.errors.ImageNotFound: try: logging.info( - "Docker pull of image {repo}:{image}".format( - repo=repo_name, image=image_name - ) + "Docker pull of image {repo}:{image}".format(repo=repo_name, image=image_name) ) docker_client.images.pull(repo_name, image_name) except docker.errors.APIError: diff --git a/sebs/azure/config.py b/sebs/azure/config.py index a5bb3277..ea673fc2 100644 --- a/sebs/azure/config.py +++ b/sebs/azure/config.py @@ -40,9 +40,7 @@ def initialize(dct: dict) -> Credentials: return AzureCredentials(dct["appId"], dct["tenant"], dct["password"]) @staticmethod - def deserialize( - config: dict, cache: Cache, handlers: LoggingHandlers - ) -> Credentials: + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: # FIXME: update return types of both functions to avoid cast # needs 3.7+ to support annotations @@ -96,18 +94,12 @@ def __init__(self, account_name: str, connection_string: str): # FIXME: 3.7+ migration with future annotations @staticmethod - def from_cache( - account_name: str, connection_string: str - ) -> "AzureResources.Storage": - assert connection_string, "Empty connection string for account {}".format( - account_name - ) + def from_cache(account_name: str, connection_string: str) -> "AzureResources.Storage": + assert connection_string, "Empty connection string for account {}".format(account_name) return AzureResources.Storage(account_name, connection_string) @staticmethod - def from_allocation( - account_name: str, cli_instance: AzureCLI - ) -> "AzureResources.Storage": + def from_allocation(account_name: str, cli_instance: AzureCLI) -> "AzureResources.Storage": connection_string = AzureResources.Storage.query_connection_string( account_name, cli_instance ) @@ -121,9 +113,7 @@ def from_allocation( @staticmethod def query_connection_string(account_name: str, cli_instance: AzureCLI) -> str: ret = cli_instance.execute( - "az storage account show-connection-string --name {}".format( - account_name - ) + "az storage account show-connection-string --name {}".format(account_name) ) ret = json.loads(ret.decode("utf-8")) connection_string = ret["connectionString"] @@ -134,9 +124,7 @@ def serialize(self) -> dict: @staticmethod def deserialize(obj: dict) -> "AzureResources.Storage": - return AzureResources.Storage.from_cache( - obj["account_name"], obj["connection_string"] - ) + return AzureResources.Storage.from_cache(obj["account_name"], obj["connection_string"]) # FIXME: 3.7 Python, future annotations def __init__( @@ -207,16 +195,12 @@ def add_storage_account(self, cli_instance: AzureCLI) -> "AzureResources.Storage does NOT add the account to any resource collection. """ - def _create_storage_account( - self, cli_instance: AzureCLI - ) -> "AzureResources.Storage": + def _create_storage_account(self, cli_instance: AzureCLI) -> "AzureResources.Storage": sku = "Standard_LRS" # Create account. Only alphanumeric characters are allowed uuid_name = str(uuid.uuid1())[0:8] account_name = "sebsstorage{}".format(uuid_name) - self.logging.info( - "Starting allocation of storage account {}.".format(account_name) - ) + self.logging.info("Starting allocation of storage account {}.".format(account_name)) cli_instance.execute( ( "az storage account create --name {0} --location {1} " @@ -249,9 +233,7 @@ def initialize(dct: dict) -> Resources: storage_accounts=[ AzureResources.Storage.deserialize(x) for x in dct["storage_accounts"] ], - data_storage_account=AzureResources.Storage.deserialize( - dct["data_storage_account"] - ), + data_storage_account=AzureResources.Storage.deserialize(dct["data_storage_account"]), ) def serialize(self) -> dict: @@ -270,25 +252,15 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour cached_config = cache.get_config("azure") ret: AzureResources # Load cached values - if ( - cached_config - and "resources" in cached_config - and len(cached_config["resources"]) > 0 - ): + if cached_config and "resources" in cached_config and len(cached_config["resources"]) > 0: logging.info("Using cached resources for Azure") - ret = cast( - AzureResources, AzureResources.initialize(cached_config["resources"]) - ) + ret = cast(AzureResources, AzureResources.initialize(cached_config["resources"])) else: # Check for new config if "resources" in config: - ret = cast( - AzureResources, AzureResources.initialize(config["resources"]) - ) + ret = cast(AzureResources, AzureResources.initialize(config["resources"])) ret.logging_handlers = handlers - ret.logging.info( - "No cached resources for Azure found, using user configuration." - ) + ret.logging.info("No cached resources for Azure found, using user configuration.") else: ret = AzureResources() ret.logging_handlers = handlers @@ -339,12 +311,8 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config cached_config = cache.get_config("azure") # FIXME: use future annotations (see sebs/faas/system) - credentials = cast( - AzureCredentials, AzureCredentials.deserialize(config, cache, handlers) - ) - resources = cast( - AzureResources, AzureResources.deserialize(config, cache, handlers) - ) + credentials = cast(AzureCredentials, AzureCredentials.deserialize(config, cache, handlers)) + resources = cast(AzureResources, AzureResources.deserialize(config, cache, handlers)) config_obj = AzureConfig(credentials, resources) config_obj.logging_handlers = handlers # Load cached values diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index 9376a71f..a0c8bfdc 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -21,9 +21,7 @@ def data_storage_account(self, data_storage_account: AzureResources.Storage): class HTTPTrigger(AzureTrigger): - def __init__( - self, url: str, data_storage_account: Optional[AzureResources.Storage] = None - ): + def __init__(self, url: str, data_storage_account: Optional[AzureResources.Storage] = None): super().__init__(data_storage_account) self.url = url diff --git a/sebs/cache.py b/sebs/cache.py index 9b17c4b6..dc6fe536 100644 --- a/sebs/cache.py +++ b/sebs/cache.py @@ -60,9 +60,7 @@ def typename() -> str: def load_config(self): with self._lock: for cloud in ["azure", "aws", "gcp"]: - cloud_config_file = os.path.join( - self.cache_dir, "{}.json".format(cloud) - ) + cloud_config_file = os.path.join(self.cache_dir, "{}.json".format(cloud)) if os.path.exists(cloud_config_file): self.cached_config[cloud] = json.load(open(cloud_config_file, "r")) @@ -90,12 +88,8 @@ def shutdown(self): if self.config_updated: for cloud in ["azure", "aws", "gcp"]: if cloud in self.cached_config: - cloud_config_file = os.path.join( - self.cache_dir, "{}.json".format(cloud) - ) - self.logging.info( - "Update cached config {}".format(cloud_config_file) - ) + cloud_config_file = os.path.join(self.cache_dir, "{}.json".format(cloud)) + self.logging.info("Update cached config {}".format(cloud_config_file)) with open(cloud_config_file, "w") as out: json.dump(self.cached_config[cloud], out, indent=2) @@ -155,11 +149,7 @@ def get_benchmarks( def get_storage_config(self, deployment: str, benchmark: str): cfg = self.get_benchmark_config(deployment, benchmark) - return ( - cfg["storage"] - if cfg and "storage" in cfg and not self.ignore_storage - else None - ) + return cfg["storage"] if cfg and "storage" in cfg and not self.ignore_storage else None def update_storage(self, deployment: str, benchmark: str, config: dict): if self.ignore_storage: @@ -254,12 +244,8 @@ def update_code_package( with open(os.path.join(benchmark_dir, "config.json"), "r") as fp: config = json.load(fp) date = str(datetime.datetime.now()) - config[deployment_name][language]["code_package"]["date"][ - "modified" - ] = date - config[deployment_name][language]["code_package"][ - "hash" - ] = code_package.hash + config[deployment_name][language]["code_package"]["date"]["modified"] = date + config[deployment_name][language]["code_package"]["hash"] = code_package.hash with open(os.path.join(benchmark_dir, "config.json"), "w") as fp: json.dump(config, fp, indent=2) else: @@ -291,16 +277,12 @@ def add_benchmark( cache_config = os.path.join(benchmark_dir, "config.json") if os.path.exists(cache_config): - benchmarks_config: Dict[str, Any] = { - benchmark.name: {**benchmark.serialize()} - } + benchmarks_config: Dict[str, Any] = {benchmark.name: {**benchmark.serialize()}} with open(cache_config, "r") as fp: cached_config = json.load(fp) if "benchmarks" not in cached_config[deployment_name][language]: - cached_config[deployment_name][language][ - "benchmarks" - ] = benchmarks_config + cached_config[deployment_name][language]["benchmarks"] = benchmarks_config else: cached_config[deployment_name][language]["benchmarks"].update( benchmarks_config diff --git a/sebs/code_package.py b/sebs/code_package.py index 5f5cdb56..8b384b07 100644 --- a/sebs/code_package.py +++ b/sebs/code_package.py @@ -161,15 +161,11 @@ def __init__( if not self._path: raise RuntimeError("Benchmark {name} not found!".format(name=self._name)) with open(os.path.join(self.path, "config.json")) as json_file: - self._config: CodePackageConfig = CodePackageConfig.deserialize( - json.load(json_file) - ) + self._config: CodePackageConfig = CodePackageConfig.deserialize(json.load(json_file)) if self.language not in self.config.languages: raise RuntimeError( - "Benchmark {} not available for language {}".format( - self.name, self.language - ) + "Benchmark {} not available for language {}".format(self.name, self.language) ) self._cache_client = cache_client self._docker_client = docker_client @@ -367,15 +363,11 @@ def install_dependencies(self, output_dir): ) self._docker_client.images.pull(repo_name, image_name) except docker.errors.APIError: - raise RuntimeError( - "Docker pull of image {} failed!".format(image_name) - ) + raise RuntimeError("Docker pull of image {} failed!".format(image_name)) # Create set of mounted volumes unless Docker volumes are disabled if not self._experiment_config.check_flag("docker_copy_build_files"): - volumes = { - os.path.abspath(output_dir): {"bind": "/mnt/function", "mode": "rw"} - } + volumes = {os.path.abspath(output_dir): {"bind": "/mnt/function", "mode": "rw"}} package_script = os.path.abspath( os.path.join(self._path, self.language_name, "package.sh") ) @@ -393,15 +385,11 @@ def install_dependencies(self, output_dir): try: self.logging.info( "Docker build of benchmark dependencies in container " - "of image {repo}:{image}".format( - repo=repo_name, image=image_name - ) + "of image {repo}:{image}".format(repo=repo_name, image=image_name) ) uid = os.getuid() # Standard, simplest build - if not self._experiment_config.check_flag( - "docker_copy_build_files" - ): + if not self._experiment_config.check_flag("docker_copy_build_files"): self.logging.info( "Docker mount of benchmark code from path {path}".format( path=os.path.abspath(output_dir) @@ -437,9 +425,7 @@ def install_dependencies(self, output_dir): "Send benchmark code from path {path} to " "Docker instance".format(path=os.path.abspath(output_dir)) ) - tar_archive = os.path.join( - output_dir, os.path.pardir, "function.tar" - ) + tar_archive = os.path.join(output_dir, os.path.pardir, "function.tar") with tarfile.open(tar_archive, "w") as tar: for f in os.listdir(output_dir): tar.add(os.path.join(output_dir, f), arcname=f) @@ -529,13 +515,9 @@ def build( # package already exists if self.is_cached: - self._cache_client.update_code_package( - self._deployment_name, self.language_name, self - ) + self._cache_client.update_code_package(self._deployment_name, self.language_name, self) else: - self._cache_client.add_code_package( - self._deployment_name, self.language_name, self - ) + self._cache_client.add_code_package(self._deployment_name, self.language_name, self) self.query_cache() return True, self._code_location @@ -575,9 +557,7 @@ def code_package_modify(self, filename: str, data: bytes): if self.is_archive(): self._update_zip(self.code_location, filename, data) new_size = self.recompute_size() / 1024.0 / 1024.0 - self.logging.info( - f"Modified zip package {self.code_location}, new size {new_size} MB" - ) + self.logging.info(f"Modified zip package {self.code_location}, new size {new_size} MB") else: raise NotImplementedError() @@ -650,9 +630,7 @@ def load_benchmark_input(path: str) -> CodePackageModuleInterface: # Look for input generator file in the directory containing benchmark import importlib.machinery - loader = importlib.machinery.SourceFileLoader( - "input", os.path.join(path, "input.py") - ) + loader = importlib.machinery.SourceFileLoader("input", os.path.join(path, "input.py")) spec = importlib.util.spec_from_loader(loader.name, loader) assert spec mod = importlib.util.module_from_spec(spec) diff --git a/sebs/config.py b/sebs/config.py index d238dedd..fd7f66aa 100644 --- a/sebs/config.py +++ b/sebs/config.py @@ -12,31 +12,23 @@ def __init__(self): def docker_repository(self) -> str: return self._system_config["general"]["docker_repository"] - def deployment_packages( - self, deployment_name: str, language_name: str - ) -> Dict[str, str]: - return self._system_config[deployment_name]["languages"][language_name][ - "deployment" - ]["packages"] + def deployment_packages(self, deployment_name: str, language_name: str) -> Dict[str, str]: + return self._system_config[deployment_name]["languages"][language_name]["deployment"][ + "packages" + ] def deployment_files(self, deployment_name: str, language_name: str) -> List[str]: - return self._system_config[deployment_name]["languages"][language_name][ - "deployment" - ]["files"] + return self._system_config[deployment_name]["languages"][language_name]["deployment"][ + "files" + ] def docker_image_types(self, deployment_name: str, language_name: str) -> List[str]: - return self._system_config[deployment_name]["languages"][language_name][ - "images" - ] + return self._system_config[deployment_name]["languages"][language_name]["images"] - def supported_language_versions( - self, deployment_name: str, language_name: str - ) -> List[str]: + def supported_language_versions(self, deployment_name: str, language_name: str) -> List[str]: return self._system_config[deployment_name]["languages"][language_name][ "base_images" ].keys() def username(self, deployment_name: str, language_name: str) -> str: - return self._system_config[deployment_name]["languages"][language_name][ - "username" - ] + return self._system_config[deployment_name]["languages"][language_name]["username"] diff --git a/sebs/experiments/environment.py b/sebs/experiments/environment.py index 29bf608b..86576f11 100644 --- a/sebs/experiments/environment.py +++ b/sebs/experiments/environment.py @@ -13,9 +13,7 @@ class ExperimentEnvironment: def __init__(self): # find CPU mapping - ret = execute( - 'cat /proc/cpuinfo | grep -e "processor" -e "core id"', shell=True - ) + ret = execute('cat /proc/cpuinfo | grep -e "processor" -e "core id"', shell=True) # skip empty line at the end mapping = [int(x.split(":")[1]) for x in ret.split("\n") if x] @@ -49,9 +47,7 @@ def __init__(self): raise NotImplementedError() # Assume all CPU use the same - scaling_governor_path = ( - "/sys/devices/system/cpu/cpu{cpu_id}/cpufreq/scaling_driver" - ) + scaling_governor_path = "/sys/devices/system/cpu/cpu{cpu_id}/cpufreq/scaling_driver" governor = execute("cat {path}".format(path=scaling_governor_path)) if governor == "intel_pstate": self._governor = governor @@ -66,9 +62,7 @@ def write_cpu_status(self, cores: List[int], status: int): for logical_core in logical_cores[1:]: path = cpu_status_path.format(cpu_id=logical_core["core"]) execute( - cmd="echo {status} | sudo tee {path}".format( - status=status, path=path - ), + cmd="echo {status} | sudo tee {path}".format(status=status, path=path), shell=True, ) @@ -107,9 +101,7 @@ def set_frequency(self, max_freq: int): def unset_frequency(self): path = "/sys/devices/system/cpu/intel_pstate/min_perf_pct" - execute( - "echo {freq} | sudo tee {path}".format(freq=self._prev_min_freq, path=path) - ) + execute("echo {freq} | sudo tee {path}".format(freq=self._prev_min_freq, path=path)) def setup_benchmarking(self, cores: List[int]): self.disable_boost(cores) diff --git a/sebs/experiments/eviction_model.py b/sebs/experiments/eviction_model.py index 839b6856..4d55c66c 100644 --- a/sebs/experiments/eviction_model.py +++ b/sebs/experiments/eviction_model.py @@ -95,14 +95,10 @@ def accept_replies(port: int, invocations: int): s.close() @staticmethod - def execute_instance( - sleep_time: int, pid: int, tid: int, func: Function, payload: dict - ): + def execute_instance(sleep_time: int, pid: int, tid: int, func: Function, payload: dict): try: - print( - f"Process {pid} Thread {tid} Invoke function {func.name} with {payload} now!" - ) + print(f"Process {pid} Thread {tid} Invoke function {func.name} with {payload} now!") begin = datetime.now() res = func.triggers(Trigger.TriggerType.HTTP)[0].sync_invoke(payload) end = datetime.now() @@ -115,9 +111,7 @@ def execute_instance( logging.error(f"First Invocation Failed at function {func.name}, {e}") raise RuntimeError() - time_spent = float(datetime.now().strftime("%s.%f")) - float( - end.strftime("%s.%f") - ) + time_spent = float(datetime.now().strftime("%s.%f")) - float(end.strftime("%s.%f")) seconds_sleep = sleep_time - time_spent print(f"PID {pid} TID {tid} with time {time}, sleep {seconds_sleep}") time.sleep(seconds_sleep) @@ -204,9 +198,7 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): # if self._benchmark.functions and fname in self._benchmark.functions: # self.logging.info(f"Skip {fname}, exists already.") # continue - self.functions.append( - deployment_client.get_function(self._benchmark, func_name=fname) - ) + self.functions.append(deployment_client.get_function(self._benchmark, func_name=fname)) def run(self): @@ -223,9 +215,7 @@ def run(self): # function_names = self.functions_names[invocation_idx :: self.function_copies_per_time] # flake8 issue # https://github.com/PyCQA/pycodestyle/issues/373 - functions = self.functions[ - invocation_idx :: self.function_copies_per_time - ] # noqa + functions = self.functions[invocation_idx :: self.function_copies_per_time] # noqa results = {} # Disable logging - otherwise we have RLock that can't get be pickled @@ -267,9 +257,7 @@ def run(self): """ for j in range(0, threads): servers_results.append( - pool.apply_async( - EvictionModel.accept_replies, args=(port + j, invocations) - ) + pool.apply_async(EvictionModel.accept_replies, args=(port + j, invocations)) ) """ diff --git a/sebs/experiments/invocation_overhead.py b/sebs/experiments/invocation_overhead.py index 0cbbdd8c..11bbe403 100644 --- a/sebs/experiments/invocation_overhead.py +++ b/sebs/experiments/invocation_overhead.py @@ -15,9 +15,7 @@ class CodePackageSize: - def __init__( - self, deployment_client: FaaSSystem, benchmark: CodePackage, settings: dict - ): + def __init__(self, deployment_client: FaaSSystem, benchmark: CodePackage, settings: dict): import math from numpy import linspace @@ -28,9 +26,7 @@ def __init__( ) from sebs.utils import find_package_code - self._benchmark_path = find_package_code( - "030.clock-synchronization", "benchmarks" - ) + self._benchmark_path = find_package_code("030.clock-synchronization", "benchmarks") self._benchmark = benchmark random.seed(1410) @@ -93,9 +89,7 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): self._trigger = triggers[0] self._storage = deployment_client.get_storage(replace_existing=True) - self.benchmark_input = self._benchmark.prepare_input( - storage=self._storage, size="test" - ) + self.benchmark_input = self._benchmark.prepare_input(storage=self._storage, size="test") self._out_dir = os.path.join( sebs_client.output_dir, "invocation-overhead", self.settings["type"] ) @@ -113,9 +107,7 @@ def run(self): N = self.settings["N"] if self.settings["type"] == "code": - experiment = CodePackageSize( - self._deployment_client, self._benchmark, self.settings - ) + experiment = CodePackageSize(self._deployment_client, self._benchmark, self.settings) else: experiment = PayloadSize(self.settings) @@ -151,13 +143,9 @@ def run(self): for i in range(repetitions): succesful = False while not succesful: - self.logging.info( - f"Starting with {size} bytes, repetition {i}" - ) + self.logging.info(f"Starting with {size} bytes, repetition {i}") if result_type == "cold": - self._deployment_client.enforce_cold_start( - [self._function] - ) + self._deployment_client.enforce_cold_start([self._function]) time.sleep(1) row = self.receive_datagrams(input_benchmark, N, 12000, ip) if result_type == "cold": @@ -172,9 +160,7 @@ def run(self): succesful = True time.sleep(5) - self._storage.download_bucket( - self.benchmark_input["output-bucket"], self._out_dir - ) + self._storage.download_bucket(self.benchmark_input["output-bucket"], self._out_dir) def process( self, @@ -189,9 +175,7 @@ def process( full_data: Dict[str, pd.Dataframe] = {} for f in glob.glob( - os.path.join( - directory, "invocation-overhead", self.settings["type"], "*.csv" - ) + os.path.join(directory, "invocation-overhead", self.settings["type"], "*.csv") ): if "result.csv" in f or "result-processed.csv" in f: @@ -204,18 +188,13 @@ def process( else: full_data[request_id] = data df = pd.concat(full_data.values()).reset_index(drop=True) - df["rtt"] = (df["server_rcv"] - df["client_send"]) + ( - df["client_rcv"] - df["server_send"] - ) + df["rtt"] = (df["server_rcv"] - df["client_send"]) + (df["client_rcv"] - df["server_send"]) df["clock_drift"] = ( - (df["client_send"] - df["server_rcv"]) - + (df["client_rcv"] - df["server_send"]) + (df["client_send"] - df["server_rcv"]) + (df["client_rcv"] - df["server_send"]) ) / 2 with open( - os.path.join( - directory, "invocation-overhead", self.settings["type"], "result.csv" - ) + os.path.join(directory, "invocation-overhead", self.settings["type"], "result.csv") ) as csvfile: with open( os.path.join( @@ -247,23 +226,15 @@ def process( request_id = row[-1] clock_drift = df[df["id"] == request_id]["clock_drift"].mean() clock_drift_std = df[df["id"] == request_id]["clock_drift"].std() - invocation_time = ( - float(row[5]) - float(row[4]) - float(row[3]) + clock_drift - ) - writer.writerow( - row + [clock_drift, clock_drift_std, invocation_time] - ) + invocation_time = float(row[5]) - float(row[4]) - float(row[3]) + clock_drift + writer.writerow(row + [clock_drift, clock_drift_std, invocation_time]) - def receive_datagrams( - self, input_benchmark: dict, repetitions: int, port: int, ip: str - ): + def receive_datagrams(self, input_benchmark: dict, repetitions: int, port: int, ip: str): import socket input_benchmark["server-port"] = port - self.logging.info( - f"Starting invocation with {repetitions} repetitions on port {port}" - ) + self.logging.info(f"Starting invocation with {repetitions} repetitions on port {port}") socket.setdefaulttimeout(4) server_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) server_socket.bind(("", port)) @@ -289,8 +260,7 @@ def receive_datagrams( # stop after 5 attempts if j == 5: self.logging.error( - "Failing after 5 unsuccesfull attempts to " - "communicate with the function!" + "Failing after 5 unsuccesfull attempts to " "communicate with the function!" ) break # check if function invocation failed, and if yes: raise the exception diff --git a/sebs/experiments/network_ping_pong.py b/sebs/experiments/network_ping_pong.py index b9a767d3..a95506de 100644 --- a/sebs/experiments/network_ping_pong.py +++ b/sebs/experiments/network_ping_pong.py @@ -30,9 +30,7 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): ) self._function = deployment_client.get_function(benchmark) self._storage = deployment_client.get_storage(replace_existing=True) - self.benchmark_input = benchmark.prepare_input( - storage=self._storage, size="test" - ) + self.benchmark_input = benchmark.prepare_input(storage=self._storage, size="test") self._out_dir = os.path.join(sebs_client.output_dir, "network-ping-pong") if not os.path.exists(self._out_dir): # shutil.rmtree(self._out_dir) @@ -61,9 +59,7 @@ def run(self): # give functions time to finish and upload result time.sleep(5) - self._storage.download_bucket( - self.benchmark_input["output-bucket"], self._out_dir - ) + self._storage.download_bucket(self.benchmark_input["output-bucket"], self._out_dir) def process(self, directory: str): @@ -77,9 +73,7 @@ def process(self, directory: str): else: full_data[request_id] = data df = pd.concat(full_data.values()).reset_index(drop=True) - df["rtt"] = (df["server_rcv"] - df["client_send"]) + ( - df["client_rcv"] - df["server_send"] - ) + df["rtt"] = (df["server_rcv"] - df["client_send"]) + (df["client_rcv"] - df["server_send"]) print("Rows: ", df.shape[0]) print("Mean: ", df["rtt"].mean()) print("STD: ", df["rtt"].std()) @@ -107,9 +101,7 @@ def receive_datagrams(self, repetitions: int, port: int, ip: str): "repetitions": repetitions, **self.benchmark_input, } - self._function.triggers(Trigger.TriggerType.HTTP)[0].async_invoke( - input_benchmark - ) + self._function.triggers(Trigger.TriggerType.HTTP)[0].async_invoke(input_benchmark) begin = datetime.now() times = [] diff --git a/sebs/experiments/perf_cost.py b/sebs/experiments/perf_cost.py index 7c41003e..80514db4 100644 --- a/sebs/experiments/perf_cost.py +++ b/sebs/experiments/perf_cost.py @@ -48,9 +48,7 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): ) self._function = deployment_client.get_function(self._benchmark) # prepare benchmark input - self._storage = deployment_client.get_storage( - replace_existing=self.config.update_storage - ) + self._storage = deployment_client.get_storage(replace_existing=self.config.update_storage) self._benchmark_input = self._benchmark.prepare_input( storage=self._storage, size=settings["input-size"] ) @@ -84,9 +82,7 @@ def run(self): self._function.memory = memory self._deployment_client.update_function(self._function, self._benchmark) self._sebs_client.cache_client.update_function(self._function) - self.run_configuration( - settings, settings["repetitions"], suffix=str(memory) - ) + self.run_configuration(settings, settings["repetitions"], suffix=str(memory)) def compute_statistics(self, times: List[float]): @@ -156,10 +152,7 @@ def _run_configuration( first_iteration = True while samples_gathered < repetitions: - if ( - run_type == PerfCost.RunType.COLD - or run_type == PerfCost.RunType.BURST - ): + if run_type == PerfCost.RunType.COLD or run_type == PerfCost.RunType.BURST: self._deployment_client.enforce_cold_start( [self._function], self._benchmark ) @@ -180,12 +173,8 @@ def _run_configuration( ret = res.get() if first_iteration: continue - if ( - run_type == PerfCost.RunType.COLD - and not ret.stats.cold_start - ) or ( - run_type == PerfCost.RunType.WARM - and ret.stats.cold_start + if (run_type == PerfCost.RunType.COLD and not ret.stats.cold_start) or ( + run_type == PerfCost.RunType.WARM and ret.stats.cold_start ): self.logging.info( f"Invocation {ret.request_id} " @@ -269,9 +258,7 @@ def run_configuration(self, settings: dict, repetitions: int, suffix: str = ""): PerfCost.RunType.SEQUENTIAL, settings, 1, repetitions, suffix ) else: - raise RuntimeError( - f"Unknown experiment type {experiment_type} for Perf-Cost!" - ) + raise RuntimeError(f"Unknown experiment type {experiment_type} for Perf-Cost!") def process( self, @@ -318,9 +305,7 @@ def process( else: if os.path.exists( - os.path.join( - directory, "perf-cost", f"{name}-processed{extension}" - ) + os.path.join(directory, "perf-cost", f"{name}-processed{extension}") ): self.logging.info(f"Skipping already processed {f}") continue @@ -364,9 +349,7 @@ def process( name, extension = os.path.splitext(f) with open( - os.path.join( - directory, "perf-cost", f"{name}-processed{extension}" - ), + os.path.join(directory, "perf-cost", f"{name}-processed{extension}"), "w", ) as out_f: out_f.write( diff --git a/sebs/experiments/result.py b/sebs/experiments/result.py index 3357ace5..5087b904 100644 --- a/sebs/experiments/result.py +++ b/sebs/experiments/result.py @@ -61,9 +61,7 @@ def metrics(self, func: str) -> dict: return self._metrics[func] @staticmethod - def deserialize( - cached_config: dict, cache: Cache, handlers: LoggingHandlers - ) -> "Result": + def deserialize(cached_config: dict, cache: Cache, handlers: LoggingHandlers) -> "Result": invocations: Dict[str, dict] = {} for func, func_invocations in cached_config["_invocations"].items(): invocations[func] = {} @@ -71,9 +69,7 @@ def deserialize( invocations[func][invoc_id] = ExecutionResult.deserialize(invoc) ret = Result( ExperimentConfig.deserialize(cached_config["config"]["experiments"]), - DeploymentConfig.deserialize( - cached_config["config"]["deployment"], cache, handlers - ), + DeploymentConfig.deserialize(cached_config["config"]["deployment"], cache, handlers), invocations, # FIXME: compatibility with old results cached_config["metrics"] if "metrics" in cached_config else {}, diff --git a/sebs/faas/benchmark.py b/sebs/faas/benchmark.py index 60458495..1df12f33 100644 --- a/sebs/faas/benchmark.py +++ b/sebs/faas/benchmark.py @@ -131,15 +131,11 @@ def __init__(self): self.billing = ExecutionBilling() @staticmethod - def from_times( - client_time_begin: datetime, client_time_end: datetime - ) -> "ExecutionResult": + def from_times(client_time_begin: datetime, client_time_end: datetime) -> "ExecutionResult": ret = ExecutionResult() ret.times.client_begin = client_time_begin ret.times.client_end = client_time_end - ret.times.client = int( - (client_time_end - client_time_begin) / timedelta(microseconds=1) - ) + ret.times.client = int((client_time_end - client_time_begin) / timedelta(microseconds=1)) return ret def parse_benchmark_output(self, output: dict): @@ -216,9 +212,7 @@ def _http_invoke(self, payload: dict, url: str) -> ExecutionResult: if status_code != 200: self.logging.error( - "Invocation on URL {} failed with status code {}!".format( - url, status_code - ) + "Invocation on URL {} failed with status code {}!".format(url, status_code) ) self.logging.error("Output: {}".format(output)) raise RuntimeError(f"Failed invocation of function! Output: {output}") @@ -233,14 +227,10 @@ def _http_invoke(self, payload: dict, url: str) -> ExecutionResult: return result except json.decoder.JSONDecodeError: self.logging.error( - "Invocation on URL {} failed with status code {}!".format( - url, status_code - ) + "Invocation on URL {} failed with status code {}!".format(url, status_code) ) self.logging.error("Output: {}".format(data.getvalue().decode())) - raise RuntimeError( - f"Failed invocation of function! Output: {data.getvalue().decode()}" - ) + raise RuntimeError(f"Failed invocation of function! Output: {data.getvalue().decode()}") # FIXME: 3.7+, future annotations @staticmethod @@ -307,11 +297,7 @@ def updated_code(self, val: bool): self._updated_code = val def triggers_all(self) -> List[Trigger]: - return [ - trig - for trigger_type, triggers in self._triggers.items() - for trig in triggers - ] + return [trig for trigger_type, triggers in self._triggers.items() for trig in triggers] def triggers(self, trigger_type: Trigger.TriggerType) -> List[Trigger]: try: @@ -331,9 +317,7 @@ def serialize(self) -> dict: "hash": self._code_package_hash, "code_package": self._code_package, "triggers": [ - obj.serialize() - for t_type, triggers in self._triggers.items() - for obj in triggers + obj.serialize() for t_type, triggers in self._triggers.items() for obj in triggers ], } diff --git a/sebs/faas/config.py b/sebs/faas/config.py index 4eb349fe..55730e88 100644 --- a/sebs/faas/config.py +++ b/sebs/faas/config.py @@ -29,9 +29,7 @@ def __init__(self): @staticmethod @abstractmethod - def deserialize( - config: dict, cache: Cache, handlers: LoggingHandlers - ) -> "Credentials": + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Credentials": pass """ @@ -62,9 +60,7 @@ def __init__(self): @staticmethod @abstractmethod - def deserialize( - config: dict, cache: Cache, handlers: LoggingHandlers - ) -> "Resources": + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Resources": pass """ diff --git a/sebs/faas/fsm.py b/sebs/faas/fsm.py index 896b1f36..2b752047 100644 --- a/sebs/faas/fsm.py +++ b/sebs/faas/fsm.py @@ -77,9 +77,7 @@ def parse(self, path: str): with open(path) as f: definition = json.load(f) - self.states = { - n: State.deserialize(n, s) for n, s in definition["states"].items() - } + self.states = {n: State.deserialize(n, s) for n, s in definition["states"].items()} self.root = self.states[definition["root"]] def generate(self) -> str: diff --git a/sebs/faas/storage.py b/sebs/faas/storage.py index 77f8fd80..e54812e2 100644 --- a/sebs/faas/storage.py +++ b/sebs/faas/storage.py @@ -168,9 +168,7 @@ def clean_bucket(self, bucket_name: str): def allocate_buckets(self, benchmark: str, requested_buckets: Tuple[int, int]): # Load cached information - cached_buckets = self.cache_client.get_storage_config( - self.deployment_name(), benchmark - ) + cached_buckets = self.cache_client.get_storage_config(self.deployment_name(), benchmark) if cached_buckets: self.input_buckets = cached_buckets["buckets"]["input"] for bucket in self.input_buckets: @@ -179,27 +177,19 @@ def allocate_buckets(self, benchmark: str, requested_buckets: Tuple[int, int]): # for bucket in self.output_buckets: # self.clean_bucket(bucket) self.cached = True - self.logging.info( - "Using cached storage input buckets {}".format(self.input_buckets) - ) - self.logging.info( - "Using cached storage output buckets {}".format(self.output_buckets) - ) + self.logging.info("Using cached storage input buckets {}".format(self.input_buckets)) + self.logging.info("Using cached storage output buckets {}".format(self.output_buckets)) return buckets = self.list_buckets(self.correct_name(benchmark)) for i in range(0, requested_buckets[0]): self.input_buckets.append( - self._create_bucket( - self.correct_name("{}-{}-input".format(benchmark, i)), buckets - ) + self._create_bucket(self.correct_name("{}-{}-input".format(benchmark, i)), buckets) ) self.input_buckets_files.append(self.list_bucket(self.input_buckets[-1])) for i in range(0, requested_buckets[1]): self.output_buckets.append( - self._create_bucket( - self.correct_name("{}-{}-output".format(benchmark, i)), buckets - ) + self._create_bucket(self.correct_name("{}-{}-output".format(benchmark, i)), buckets) ) self.save_storage(benchmark) diff --git a/sebs/faas/system.py b/sebs/faas/system.py index 24274fba..0bfc410a 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -119,9 +119,7 @@ def create_function(self, code_package: CodePackage, func_name: str) -> Function pass @abstractmethod - def create_workflow( - self, code_package: CodePackage, workflow_name: str - ) -> Workflow: + def create_workflow(self, code_package: CodePackage, workflow_name: str) -> Workflow: pass @abstractmethod @@ -145,14 +143,9 @@ def update_function(self, function: Function, code_package: CodePackage): """ - def get_function( - self, code_package: CodePackage, func_name: Optional[str] = None - ) -> Function: - if ( - code_package.language_version - not in self.system_config.supported_language_versions( - self.name(), code_package.language_name - ) + def get_function(self, code_package: CodePackage, func_name: Optional[str] = None) -> Function: + if code_package.language_version not in self.system_config.supported_language_versions( + self.name(), code_package.language_name ): raise Exception( "Unsupported {language} version {version} in {system}!".format( @@ -197,9 +190,7 @@ def get_function( function = self.function_type().deserialize(cached_function) self.cached_benchmark(function) self.logging.info( - "Using cached function {fname} in {loc}".format( - fname=func_name, loc=code_location - ) + "Using cached function {fname} in {loc}".format(fname=func_name, loc=code_location) ) # is the function up-to-date? if function.code_package_hash != code_package.hash or rebuilt: @@ -225,14 +216,9 @@ def get_function( def update_workflow(self, workflow: Workflow, code_package: CodePackage): pass - def get_workflow( - self, code_package: CodePackage, workflow_name: Optional[str] = None - ): - if ( - code_package.language_version - not in self.system_config.supported_language_versions( - self.name(), code_package.language_name - ) + def get_workflow(self, code_package: CodePackage, workflow_name: Optional[str] = None): + if code_package.language_version not in self.system_config.supported_language_versions( + self.name(), code_package.language_name ): raise Exception( "Unsupported {language} version {version} in {system}!".format( diff --git a/sebs/gcp/config.py b/sebs/gcp/config.py index ed448e02..2a68e007 100644 --- a/sebs/gcp/config.py +++ b/sebs/gcp/config.py @@ -36,9 +36,7 @@ def initialize(gcp_credentials: str) -> Credentials: return GCPCredentials(gcp_credentials) @staticmethod - def deserialize( - config: dict, cache: Cache, handlers: LoggingHandlers - ) -> Credentials: + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: cached_config = cache.get_config("gcp") ret: GCPCredentials # Load cached values but only if they are non-empty @@ -57,9 +55,7 @@ def deserialize( else: # Check for new config if "credentials" in config and config["credentials"]: - ret = cast( - GCPCredentials, GCPCredentials.initialize(config["credentials"]) - ) + ret = cast(GCPCredentials, GCPCredentials.initialize(config["credentials"])) os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ret.gcp_credentials # Look for default GCP credentials elif "GOOGLE_APPLICATION_CREDENTIALS" in os.environ: @@ -93,9 +89,7 @@ def serialize(self) -> dict: return out def update_cache(self, cache: Cache): - cache.update_config( - val=self.gcp_credentials, keys=["gcp", "credentials", "keys_json"] - ) + cache.update_config(val=self.gcp_credentials, keys=["gcp", "credentials", "keys_json"]) """ @@ -123,23 +117,17 @@ def serialize(self) -> dict: return {} @staticmethod - def deserialize( - config: dict, cache: Cache, handlers: LoggingHandlers - ) -> "Resources": + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Resources": cached_config = cache.get_config("gcp") ret: GCPResources if cached_config and "resources" in cached_config: - ret = cast( - GCPResources, GCPResources.initialize(cached_config["resources"]) - ) + ret = cast(GCPResources, GCPResources.initialize(cached_config["resources"])) ret.logging_handlers = handlers ret.logging.info("Using cached resources for GCP") else: ret = cast(GCPResources, GCPResources.initialize(config)) ret.logging_handlers = handlers - ret.logging.info( - "No cached resources for GCP found, using user configuration." - ) + ret.logging.info("No cached resources for GCP found, using user configuration.") return ret def update_cache(self, cache: Cache): @@ -184,12 +172,8 @@ def redis_host(self) -> str: @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Config": cached_config = cache.get_config("gcp") - credentials = cast( - GCPCredentials, GCPCredentials.deserialize(config, cache, handlers) - ) - resources = cast( - GCPResources, GCPResources.deserialize(config, cache, handlers) - ) + credentials = cast(GCPCredentials, GCPCredentials.deserialize(config, cache, handlers)) + resources = cast(GCPResources, GCPResources.deserialize(config, cache, handlers)) config_obj = GCPConfig(credentials, resources) config_obj.logging_handlers = handlers if cached_config: @@ -225,10 +209,7 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Confi old_value = getattr(config_obj, config_key) # ignore empty values - if ( - getattr(config_obj, config_key) != config[config_key] - and config[config_key] - ): + if getattr(config_obj, config_key) != config[config_key] and config[config_key]: config_obj.logging.info( f"Updating cached key {config_key} with {old_value} " f"to user-provided value {config[config_key]}." diff --git a/sebs/gcp/function.py b/sebs/gcp/function.py index 1a70fe4c..317781cf 100644 --- a/sebs/gcp/function.py +++ b/sebs/gcp/function.py @@ -47,9 +47,7 @@ def deserialize(cached_config: dict) -> "GCPFunction": for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": FunctionLibraryTrigger, "HTTP": HTTPTrigger}.get( - trigger["type"] - ), + {"Library": FunctionLibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 587e03e7..fc780b90 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -103,9 +103,7 @@ def get_storage( buckets=None, ) -> PersistentStorage: if not self.storage: - self.storage = GCPStorage( - self.config.region, self.cache_client, replace_existing - ) + self.storage = GCPStorage(self.config.region, self.cache_client, replace_existing) self.storage.logging_handlers = self.logging_handlers else: self.storage.replace_existing = replace_existing @@ -169,9 +167,7 @@ def package_code( new_path = os.path.join(directory, new_name) shutil.move(old_path, new_path) - replace_string_in_file( - new_path, "{{REDIS_HOST}}", f'"{self.config.redis_host}"' - ) + replace_string_in_file(new_path, "{{REDIS_HOST}}", f'"{self.config.redis_host}"') """ zip the whole directroy (the zip-file gets uploaded to gcp later) @@ -197,9 +193,7 @@ def package_code( return os.path.join(directory, "{}.zip".format(code_package.name)), bytes_size - def create_function( - self, code_package: CodePackage, func_name: str - ) -> "GCPFunction": + def create_function(self, code_package: CodePackage, func_name: str) -> "GCPFunction": package = code_package.code_location benchmark = code_package.name @@ -214,17 +208,10 @@ def create_function( code_package_name = cast(str, os.path.basename(package)) code_bucket, idx = storage_client.add_input_bucket(benchmark) storage_client.upload(code_bucket, package, code_package_name) - self.logging.info( - "Uploading function {} code to {}".format(func_name, code_bucket) - ) + self.logging.info("Uploading function {} code to {}".format(func_name, code_bucket)) full_func_name = GCP.get_full_function_name(project_name, location, func_name) - get_req = ( - self.function_client.projects() - .locations() - .functions() - .get(name=full_func_name) - ) + get_req = self.function_client.projects().locations().functions().get(name=full_func_name) try: get_req.execute() except HttpError: @@ -237,16 +224,12 @@ def create_function( body={ "name": full_func_name, "entryPoint": "handler", - "runtime": code_package.language_name - + language_runtime.replace(".", ""), + "runtime": code_package.language_name + language_runtime.replace(".", ""), "availableMemoryMb": memory, "timeout": str(timeout) + "s", "httpsTrigger": {}, "ingressSettings": "ALLOW_ALL", - "sourceArchiveUrl": "gs://" - + code_bucket - + "/" - + code_package_name, + "sourceArchiveUrl": "gs://" + code_bucket + "/" + code_package_name, }, ) ) @@ -271,18 +254,14 @@ def create_function( ) ) allow_unauthenticated_req.execute() - self.logging.info( - f"Function {func_name} accepts now unauthenticated invocations!" - ) + self.logging.info(f"Function {func_name} accepts now unauthenticated invocations!") function = GCPFunction( func_name, benchmark, code_package.hash, timeout, memory, code_bucket ) else: # if result is not empty, then function does exists - self.logging.info( - "Function {} exists on GCP, update the instance.".format(func_name) - ) + self.logging.info("Function {} exists on GCP, update the instance.".format(func_name)) function = GCPFunction( name=func_name, @@ -312,15 +291,10 @@ def create_function_trigger( location = self.config.region project_name = self.config.project_name - full_func_name = GCP.get_full_function_name( - project_name, location, function.name - ) + full_func_name = GCP.get_full_function_name(project_name, location, function.name) self.logging.info(f"Function {function.name} - waiting for deployment...") our_function_req = ( - self.function_client.projects() - .locations() - .functions() - .get(name=full_func_name) + self.function_client.projects().locations().functions().get(name=full_func_name) ) deployed = False @@ -374,8 +348,7 @@ def update_function(self, function: Function, code_package: CodePackage): body={ "name": full_func_name, "entryPoint": "handler", - "runtime": code_package.language_name - + language_runtime.replace(".", ""), + "runtime": code_package.language_name + language_runtime.replace(".", ""), "availableMemoryMb": function.memory, "timeout": str(function.timeout) + "s", "httpsTrigger": {}, @@ -396,9 +369,7 @@ def update_function(self, function: Function, code_package: CodePackage): def get_full_function_name(project_name: str, location: str, func_name: str): return f"projects/{project_name}/locations/{location}/functions/{func_name}" - def create_workflow( - self, code_package: CodePackage, workflow_name: str - ) -> "GCPWorkflow": + def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "GCPWorkflow": benchmark = code_package.name timeout = code_package.config.timeout memory = code_package.config.memory @@ -418,9 +389,7 @@ def create_workflow( funcs = [self.create_function(code_package, prefix + fn) for fn in func_names] # generate workflow definition.json - urls = [ - self.create_function_trigger(f, Trigger.TriggerType.HTTP).url for f in funcs - ] + urls = [self.create_function_trigger(f, Trigger.TriggerType.HTTP).url for f in funcs] func_triggers = {n: u for (n, u) in zip(func_names, urls)} gen = GCPGenerator(workflow_name, func_triggers) @@ -430,9 +399,7 @@ def create_workflow( # map functions require their own workflows parent = GCP.get_location(project_name, location) for map_id, map_def in gen.generate_maps(): - full_workflow_name = GCP.get_full_workflow_name( - project_name, location, map_id - ) + full_workflow_name = GCP.get_full_workflow_name(project_name, location, map_id) create_req = ( self.workflow_client.projects() .locations() @@ -449,14 +416,9 @@ def create_workflow( create_req.execute() self.logging.info(f"Map workflow {map_id} has been created!") - full_workflow_name = GCP.get_full_workflow_name( - project_name, location, workflow_name - ) + full_workflow_name = GCP.get_full_workflow_name(project_name, location, workflow_name) get_req = ( - self.workflow_client.projects() - .locations() - .workflows() - .get(name=full_workflow_name) + self.workflow_client.projects().locations().workflows().get(name=full_workflow_name) ) try: @@ -543,9 +505,7 @@ def update_workflow(self, workflow: Workflow, code_package: CodePackage): funcs = [self.create_function(code_package, prefix + fn) for fn in func_names] # Generate workflow definition.json - urls = [ - self.create_function_trigger(f, Trigger.TriggerType.HTTP).url for f in funcs - ] + urls = [self.create_function_trigger(f, Trigger.TriggerType.HTTP).url for f in funcs] func_triggers = {n: u for (n, u) in zip(func_names, urls)} gen = GCPGenerator(workflow.name, func_triggers) gen.parse(definition_path) @@ -628,9 +588,7 @@ def wrapper(gen): from google.cloud import logging as gcp_logging logging_client = gcp_logging.Client() - logger = logging_client.logger( - "cloudfunctions.googleapis.com%2Fcloud-functions" - ) + logger = logging_client.logger("cloudfunctions.googleapis.com%2Fcloud-functions") """ GCP accepts only single date format: 'YYYY-MM-DDTHH:MM:SSZ'. @@ -672,9 +630,7 @@ def wrapper(gen): assert regex_result exec_time = regex_result.group().split()[0] # convert into microseconds - requests[execution_id].provider_times.execution = ( - int(exec_time) * 1000 - ) + requests[execution_id].provider_times.execution = int(exec_time) * 1000 invocations_processed += 1 self.logging.info( f"GCP: Received {entries} entries, found time metrics for {invocations_processed} " @@ -709,9 +665,7 @@ def wrapper(gen): list_request = monitoring_v3.ListTimeSeriesRequest( name=project_name, - filter='metric.type = "cloudfunctions.googleapis.com/function/{}"'.format( - metric - ), + filter='metric.type = "cloudfunctions.googleapis.com/function/{}"'.format(metric), interval=interval, ) @@ -739,9 +693,7 @@ def _enforce_cold_start(self, function: Function): .patch( name=name, updateMask="environmentVariables", - body={ - "environmentVariables": {"cold_start": str(self.cold_start_counter)} - }, + body={"environmentVariables": {"cold_start": str(self.cold_start_counter)}}, ) ) res = req.execute() @@ -793,9 +745,7 @@ def get_functions( if not self.is_deployed(func.name): undeployed_functions.append(func) deployed = len(undeployed_functions_before) - len(undeployed_functions) - self.logging.info( - f"Deployed {deployed} out of {len(undeployed_functions_before)}" - ) + self.logging.info(f"Deployed {deployed} out of {len(undeployed_functions_before)}") if deployed == len(undeployed_functions_before): deployment_done = True break @@ -807,9 +757,7 @@ def get_functions( return functions def is_deployed(self, func_name: str, versionId: int = -1) -> bool: - name = GCP.get_full_function_name( - self.config.project_name, self.config.region, func_name - ) + name = GCP.get_full_function_name(self.config.project_name, self.config.region, func_name) function_client = self.get_function_client() status_req = function_client.projects().locations().functions().get(name=name) status_res = status_req.execute() @@ -819,9 +767,7 @@ def is_deployed(self, func_name: str, versionId: int = -1) -> bool: return status_res["versionId"] == versionId def deployment_version(self, func: Function) -> int: - name = GCP.get_full_function_name( - self.config.project_name, self.config.region, func.name - ) + name = GCP.get_full_function_name(self.config.project_name, self.config.region, func.name) function_client = self.get_function_client() status_req = function_client.projects().locations().functions().get(name=name) status_res = status_req.execute() diff --git a/sebs/gcp/storage.py b/sebs/gcp/storage.py index 9b8503da..8202cd0e 100644 --- a/sebs/gcp/storage.py +++ b/sebs/gcp/storage.py @@ -48,9 +48,7 @@ def _create_bucket(self, name, buckets: List[str] = []): logging.info("Created bucket {}".format(bucket_name)) return bucket_name else: - logging.info( - "Bucket {} for {} already exists, skipping.".format(bucket_name, name) - ) + logging.info("Bucket {} for {} already exists, skipping.".format(bucket_name, name)) return bucket_name def download(self, bucket_name: str, key: str, filepath: str) -> None: @@ -63,9 +61,7 @@ def upload(self, bucket_name: str, filepath: str, key: str): logging.info("Upload {} to {}".format(filepath, bucket_name)) bucket_instance = self.client.bucket(bucket_name) blob = bucket_instance.blob(key, chunk_size=4 * 1024 * 1024) - gcp_storage.blob._MAX_MULTIPART_SIZE = ( - 5 * 1024 * 1024 - ) # workaround for connection timeout + gcp_storage.blob._MAX_MULTIPART_SIZE = 5 * 1024 * 1024 # workaround for connection timeout blob.upload_from_filename(filepath) def list_bucket(self, bucket_name: str) -> List[str]: @@ -100,9 +96,7 @@ def uploader_func(self, bucket_idx: int, key: str, filepath: str) -> None: if not self.replace_existing: for blob in self.input_buckets_files[bucket_idx]: if key == blob: - logging.info( - "Skipping upload of {} to {}".format(filepath, bucket_name) - ) + logging.info("Skipping upload of {} to {}".format(filepath, bucket_name)) return bucket_name = self.input_buckets[bucket_idx] self.upload(bucket_name, filepath, key) diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index fe000b8e..f1be549a 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -61,8 +61,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # GCP's fixed style for a function name config = self.deployment_client.config full_func_name = ( - f"projects/{config.project_name}/locations/" - f"{config.region}/functions/{self.name}" + f"projects/{config.project_name}/locations/" f"{config.region}/functions/{self.name}" ) function_client = self.deployment_client.get_function_client() req = ( @@ -111,9 +110,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: execution = Execution(argument=json.dumps(payload)) begin = datetime.datetime.now() - res = execution_client.create_execution( - parent=full_workflow_name, execution=execution - ) + res = execution_client.create_execution(parent=full_workflow_name, execution=execution) end = datetime.datetime.now() gcp_result = ExecutionResult.from_times(begin, end) diff --git a/sebs/gcp/workflow.py b/sebs/gcp/workflow.py index d598a16e..f1846bc4 100644 --- a/sebs/gcp/workflow.py +++ b/sebs/gcp/workflow.py @@ -53,9 +53,7 @@ def deserialize(cached_config: dict) -> "GCPWorkflow": for trigger in cached_config["triggers"]: trigger_type = cast( Trigger, - {"Library": WorkflowLibraryTrigger, "HTTP": HTTPTrigger}.get( - trigger["type"] - ), + {"Library": WorkflowLibraryTrigger, "HTTP": HTTPTrigger}.get(trigger["type"]), ) assert trigger_type, "Unknown trigger type {}".format(trigger["type"]) ret.add_trigger(trigger_type.deserialize(trigger)) diff --git a/sebs/local/config.py b/sebs/local/config.py index 9a33c93a..3c5e18ec 100644 --- a/sebs/local/config.py +++ b/sebs/local/config.py @@ -8,9 +8,7 @@ def serialize(self) -> dict: return {} @staticmethod - def deserialize( - config: dict, cache: Cache, handlers: LoggingHandlers - ) -> Credentials: + def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: return LocalCredentials() diff --git a/sebs/local/local.py b/sebs/local/local.py index 6d452072..1695c1cd 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -142,9 +142,7 @@ def package_code( return directory, bytes_size - def create_function( - self, code_package: CodePackage, func_name: str - ) -> "LocalFunction": + def create_function(self, code_package: CodePackage, func_name: str) -> "LocalFunction": home_dir = os.path.join( "/home", @@ -211,9 +209,7 @@ def update_function(self, function: Function, code_package: CodePackage): There's only one trigger - HTTP. """ - def create_trigger( - self, func: Function, trigger_type: Trigger.TriggerType - ) -> Trigger: + def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: from sebs.local.function import HTTPTrigger function = cast(LocalFunction, func) diff --git a/sebs/local/storage.py b/sebs/local/storage.py index 2caaab3c..9af64149 100644 --- a/sebs/local/storage.py +++ b/sebs/local/storage.py @@ -23,9 +23,7 @@ def deployment_name(): # the location does not matter MINIO_REGION = "us-east-1" - def __init__( - self, docker_client: docker.client, cache_client: Cache, replace_existing: bool - ): + def __init__(self, docker_client: docker.client, cache_client: Cache, replace_existing: bool): super().__init__(self.MINIO_REGION, cache_client, replace_existing) self._docker_client = docker_client self._port = 9000 @@ -56,9 +54,7 @@ def start(self): self.logging.error("Starting Minio storage failed! Reason: {}".format(e)) raise RuntimeError("Starting Minio storage unsuccesful") except Exception as e: - self.logging.error( - "Starting Minio storage failed! Unknown error: {}".format(e) - ) + self.logging.error("Starting Minio storage failed! Unknown error: {}".format(e)) raise RuntimeError("Starting Minio storage unsuccesful") def configure_connection(self): @@ -97,9 +93,7 @@ def _create_bucket(self, name: str, buckets: List[str] = []): for bucket_name in buckets: if name in bucket_name: self.logging.info( - "Bucket {} for {} already exists, skipping.".format( - bucket_name, name - ) + "Bucket {} for {} already exists, skipping.".format(bucket_name, name) ) return bucket_name # minio has limit of bucket name to 16 characters @@ -146,9 +140,7 @@ def clean_bucket(self, bucket: str): ) errors = self.connection.remove_objects(bucket, delete_object_list) for error in errors: - self.logging.error( - "Error when deleting object from bucket {}: {}!", bucket, error - ) + self.logging.error("Error when deleting object from bucket {}: {}!", bucket, error) def correct_name(self, name: str) -> str: return name diff --git a/sebs/regression.py b/sebs/regression.py index e75b002e..9c7e5bf8 100644 --- a/sebs/regression.py +++ b/sebs/regression.py @@ -40,9 +40,7 @@ def test(self): f"Begin regression test of {benchmark_name} on {deployment_client.name()}, " f"region: {deployment_client.config.region}." ) - experiment_config = self.client.get_experiment_config( - self.experiment_config - ) + experiment_config = self.client.get_experiment_config(self.experiment_config) benchmark = self.client.get_benchmark( benchmark_name, deployment_client, experiment_config ) @@ -74,9 +72,7 @@ def test(self): failure = True print(f"{benchmark_name} fail on trigger: {trigger_type}") else: - print( - f"{benchmark_name} success on trigger: {trigger_type}" - ) + print(f"{benchmark_name} success on trigger: {trigger_type}") except RuntimeError: failure = True print(f"{benchmark_name} fail on trigger: {trigger_type}") @@ -166,9 +162,7 @@ def __init__(self): # no way to directly access test instance from here def status(self, *args, **kwargs): - self.all_correct = self.all_correct and ( - kwargs["test_status"] in ["inprogress", "success"] - ) + self.all_correct = self.all_correct and (kwargs["test_status"] in ["inprogress", "success"]) test_name = kwargs["test_id"].split("_")[-1] if not kwargs["test_status"]: test_id = kwargs["test_id"] @@ -178,11 +172,7 @@ def status(self, *args, **kwargs): elif kwargs["test_status"] == "fail": print("\n-------------\n") print("{0[test_id]}: {0[test_status]}".format(kwargs)) - print( - "{0[test_id]}: {1}".format( - kwargs, self.output[kwargs["test_id"]].decode() - ) - ) + print("{0[test_id]}: {1}".format(kwargs, self.output[kwargs["test_id"]].decode())) print("\n-------------\n") self.failures.add(test_name) elif kwargs["test_status"] == "success": @@ -204,9 +194,7 @@ def regression_suite( suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(AWSTestSequence)) if "azure" in providers: assert "azure" in cloud_config - suite.addTest( - unittest.defaultTestLoader.loadTestsFromTestCase(AzureTestSequence) - ) + suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(AzureTestSequence)) if "gcp" in providers: assert "gcp" in cloud_config suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(GCPTestSequence)) @@ -223,9 +211,7 @@ def regression_suite( print(f"Select test {test_name}") else: print(f"Skip test {test_name}") - concurrent_suite = testtools.ConcurrentStreamTestSuite( - lambda: ((test, None) for test in tests) - ) + concurrent_suite = testtools.ConcurrentStreamTestSuite(lambda: ((test, None) for test in tests)) result = TracingStreamResult() result.startTestRun() concurrent_suite.run(result) @@ -234,9 +220,7 @@ def regression_suite( for suc in result.success: print(f"- {suc}") if len(result.failures): - print( - f"Failures when executing {len(result.failures)} out of {len(tests)} functions" - ) + print(f"Failures when executing {len(result.failures)} out of {len(tests)} functions") for failure in result.failures: print(f"- {failure}") return not result.all_correct diff --git a/sebs/sebs.py b/sebs/sebs.py index 37edcb09..e19ae9cd 100644 --- a/sebs/sebs.py +++ b/sebs/sebs.py @@ -35,9 +35,7 @@ def verbose(self) -> bool: def logging_filename(self) -> Optional[str]: return self._logging_filename - def generate_logging_handlers( - self, logging_filename: Optional[str] = None - ) -> LoggingHandlers: + def generate_logging_handlers(self, logging_filename: Optional[str] = None) -> LoggingHandlers: filename = logging_filename if logging_filename else self.logging_filename if filename in self._handlers: return self._handlers[filename] @@ -139,9 +137,7 @@ def get_experiment( } if experiment_type not in implementations: raise RuntimeError(f"Experiment {experiment_type} not supported!") - experiment = implementations[experiment_type]( - self.get_experiment_config(config) - ) + experiment = implementations[experiment_type](self.get_experiment_config(config)) experiment.logging_handlers = self.generate_logging_handlers( logging_filename=logging_filename ) diff --git a/sebs/utils.py b/sebs/utils.py index cf3b9cf5..cef1f6cf 100644 --- a/sebs/utils.py +++ b/sebs/utils.py @@ -98,17 +98,13 @@ def replace_string_in_file(path: str, from_str: str, to_str: str): def connect_to_redis_cache(host: str): - redis = Redis( - host=host, port=6379, decode_responses=True, socket_connect_timeout=10 - ) + redis = Redis(host=host, port=6379, decode_responses=True, socket_connect_timeout=10) redis.ping() return redis -def download_measurements( - redis: Redis, workflow_name: str, after: float, **static_args -): +def download_measurements(redis: Redis, workflow_name: str, after: float, **static_args): payloads = [] for key in redis.scan_iter(match=f"{workflow_name}/*"): @@ -184,9 +180,7 @@ def find_package_code(benchmark: str, path: str): def global_logging(): logging_format = "%(asctime)s,%(msecs)d %(levelname)s %(name)s: %(message)s" logging_date_format = "%H:%M:%S" - logging.basicConfig( - format=logging_format, datefmt=logging_date_format, level=logging.INFO - ) + logging.basicConfig(format=logging_format, datefmt=logging_date_format, level=logging.INFO) class LoggingHandlers: @@ -194,9 +188,7 @@ def __init__(self, verbose: bool = False, filename: Optional[str] = None): logging_format = "%(asctime)s,%(msecs)d %(levelname)s %(name)s: %(message)s" logging_date_format = "%H:%M:%S" formatter = logging.Formatter(logging_format, logging_date_format) - self.handlers: List[ - Union[logging.FileHandler, logging.StreamHandler[TextIO]] - ] = [] + self.handlers: List[Union[logging.FileHandler, logging.StreamHandler[TextIO]]] = [] # Add stdout output if verbose: From 7ab52da44d7729e89c9c02ecb73c4800c3a99a58 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Tue, 26 Apr 2022 16:38:19 +0200 Subject: [PATCH 55/68] Fix azure main --- .../wrappers/azure/python/main_workflow.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/benchmarks/wrappers/azure/python/main_workflow.py b/benchmarks/wrappers/azure/python/main_workflow.py index 3863d89c..154baa93 100644 --- a/benchmarks/wrappers/azure/python/main_workflow.py +++ b/benchmarks/wrappers/azure/python/main_workflow.py @@ -34,22 +34,23 @@ async def main(req: func.HttpRequest, starter: str, context: func.Context) -> fu end = datetime.datetime.now() is_cold, container_id = probe_cold_start() - status_body = json.loads(res.get_body()) - failed = status_body.get("runtimeStatus") == "Failed" - code = 500 if failed else 200 + status = await client.get_status(instance_id) + code = 500 if status.runtime_status == "Failed" else 200 + + try: + result = json.loads(res.get_body()) + except json.decoder.JSONDecodeError: + result = res.get_body().decode() + body = { "begin": begin.strftime("%s.%f"), "end": end.strftime("%s.%f"), "is_cold": is_cold, "container_id": container_id, "request_id": context.invocation_id, + "result": result } - if failed: - body = {**body, **status_body} - else: - body["res"] = status_body - return func.HttpResponse( status_code=code, body=json.dumps(body), From f22e41b3dbd035c1847ad989665ef42c31527aec Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Wed, 27 Apr 2022 14:19:24 +0200 Subject: [PATCH 56/68] Linting 6 --- .mypy.ini | 3 +++ sebs/aws/aws.py | 5 ++++- sebs/aws/config.py | 5 +++-- sebs/aws/generator.py | 10 +++++----- sebs/azure/azure.py | 2 +- sebs/azure/config.py | 5 +++-- sebs/azure/function_app.py | 22 ++++++++++++++-------- sebs/faas/benchmark.py | 12 +++++++++--- sebs/faas/fsm.py | 6 +++--- sebs/gcp/config.py | 5 +++-- sebs/gcp/gcp.py | 24 +++++++++++++----------- sebs/gcp/generator.py | 2 +- sebs/gcp/storage.py | 2 +- sebs/local/local.py | 2 +- 14 files changed, 64 insertions(+), 41 deletions(-) diff --git a/.mypy.ini b/.mypy.ini index fece12c6..fd7e432c 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -32,3 +32,6 @@ ignore_missing_imports = True [mypy-testtools] ignore_missing_imports = True + +[mypy-redis] +ignore_missing_imports = True \ No newline at end of file diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 8774072b..5429c9ff 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -407,8 +407,11 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "SFN code_package.hash, ) except self.sfn_client.exceptions.StateMachineAlreadyExists as e: - arn = re.search("'([^']*)'", str(e)).group()[1:-1] + match = re.search("'([^']*)'", str(e)) + if not match: + raise + arn = match.group()[1:-1] self.logging.info( "Workflow {} exists on AWS, retrieve configuration.".format(workflow_name) ) diff --git a/sebs/aws/config.py b/sebs/aws/config.py index c2be7f40..fa9d0887 100644 --- a/sebs/aws/config.py +++ b/sebs/aws/config.py @@ -237,10 +237,11 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour class AWSConfig(Config): - def __init__(self, credentials: AWSCredentials, resources: AWSResources): + def __init__(self, credentials: AWSCredentials, resources: AWSResources, redis_host: str): super().__init__() self._credentials = credentials self._resources = resources + self._redis_host = redis_host @staticmethod def typename() -> str: @@ -272,7 +273,7 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config # FIXME: use future annotations (see sebs/faas/system) credentials = cast(AWSCredentials, AWSCredentials.deserialize(config, cache, handlers)) resources = cast(AWSResources, AWSResources.deserialize(config, cache, handlers)) - config_obj = AWSConfig(credentials, resources) + config_obj = AWSConfig(credentials, resources, cached_config["redis_host"]) config_obj.logging_handlers = handlers # Load cached values if cached_config: diff --git a/sebs/aws/generator.py b/sebs/aws/generator.py index d00cfe2b..229c6392 100644 --- a/sebs/aws/generator.py +++ b/sebs/aws/generator.py @@ -1,4 +1,4 @@ -from typing import Dict, List, Union +from typing import Dict, List, Union, Any import numbers from sebs.faas.fsm import Generator, State, Task, Switch, Map @@ -10,17 +10,17 @@ def __init__(self, func_arns: Dict[str, str]): self._func_arns = func_arns def postprocess(self, states: List[State], payloads: List[dict]) -> dict: - payloads = super().postprocess(states, payloads) + state_payloads = super().postprocess(states, payloads) definition = { "Comment": "SeBS auto-generated benchmark", "StartAt": self.root.name, - "States": payloads, + "States": state_payloads, } return definition def encode_task(self, state: Task) -> Union[dict, List[dict]]: - payload = {"Type": "Task", "Resource": self._func_arns[state.func_name]} + payload: Dict[str, Any] = {"Type": "Task", "Resource": self._func_arns[state.func_name]} if state.next: payload["Next"] = state.next @@ -47,7 +47,7 @@ def _encode_case(self, case: Switch.Case) -> dict: return {"Variable": "$." + case.var, cond: case.val, "Next": case.next} def encode_map(self, state: Map) -> Union[dict, List[dict]]: - payload = { + payload: Dict[str, Any] = { "Type": "Map", "ItemsPath": "$." + state.array, "Iterator": { diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index 7a24a95c..3fb3fcbd 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -316,7 +316,7 @@ def default_benchmark_name(self, code_package: CodePackage) -> str: B = TypeVar("B", bound=FunctionApp) - def create_benchmark(self, code_package: CodePackage, name: str, benchmark_cls: B) -> B: + def create_benchmark(self, code_package: CodePackage, name: str, benchmark_cls: Type[B]) -> B: language = code_package.language_name language_runtime = code_package.language_version resource_group = self.config.resources.resource_group(self.cli_instance) diff --git a/sebs/azure/config.py b/sebs/azure/config.py index ea673fc2..ebdd2e87 100644 --- a/sebs/azure/config.py +++ b/sebs/azure/config.py @@ -269,11 +269,12 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour class AzureConfig(Config): - def __init__(self, credentials: AzureCredentials, resources: AzureResources): + def __init__(self, credentials: AzureCredentials, resources: AzureResources, redis_host: str): super().__init__() self._resources_id = "" self._credentials = credentials self._resources = resources + self._redis_host = redis_host @property def credentials(self) -> AzureCredentials: @@ -313,7 +314,7 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config # FIXME: use future annotations (see sebs/faas/system) credentials = cast(AzureCredentials, AzureCredentials.deserialize(config, cache, handlers)) resources = cast(AzureResources, AzureResources.deserialize(config, cache, handlers)) - config_obj = AzureConfig(credentials, resources) + config_obj = AzureConfig(credentials, resources, cached_config["redis_host"]) config_obj.logging_handlers = handlers # Load cached values if cached_config: diff --git a/sebs/azure/function_app.py b/sebs/azure/function_app.py index 7667ca0c..fbe51bee 100644 --- a/sebs/azure/function_app.py +++ b/sebs/azure/function_app.py @@ -1,8 +1,10 @@ from sebs.azure.config import AzureResources -from sebs.faas.benchmark import Function +from sebs.faas.benchmark import Benchmark, Function, Workflow +from typing import cast -class FunctionApp(Function): + +class FunctionApp(Benchmark): def __init__( self, name: str, @@ -20,8 +22,8 @@ def serialize(self) -> dict: } @staticmethod - def deserialize(cached_config: dict) -> Function: - ret = AzureFunction( + def deserialize(cached_config: dict) -> FunctionApp: + ret = FunctionApp( cached_config["name"], cached_config["code_package"], cached_config["hash"], @@ -36,9 +38,13 @@ def deserialize(cached_config: dict) -> Function: return ret -class AzureFunction(FunctionApp): - pass +class AzureFunction(Function, FunctionApp): + @staticmethod + def deserialize(cached_config: dict) -> AzureFunction: + return cast(AzureFunction, FunctionApp.deserialize(cached_config)) -class AzureWorkflow(FunctionApp): - pass +class AzureWorkflow(Workflow, FunctionApp): + @staticmethod + def deserialize(cached_config: dict) -> AzureWorkflow: + return cast(AzureWorkflow, FunctionApp.deserialize(cached_config)) diff --git a/sebs/faas/benchmark.py b/sebs/faas/benchmark.py index 1df12f33..a96e0cd2 100644 --- a/sebs/faas/benchmark.py +++ b/sebs/faas/benchmark.py @@ -323,13 +323,19 @@ def serialize(self) -> dict: @staticmethod @abstractmethod - def deserialize(cached_config: dict) -> "Function": + def deserialize(cached_config: dict) -> "Benchmark": pass class Function(Benchmark): - pass + @staticmethod + @abstractmethod + def deserialize(cached_config: dict) -> "Function": + pass class Workflow(Benchmark): - pass + @staticmethod + @abstractmethod + def deserialize(cached_config: dict) -> "Workflow": + pass diff --git a/sebs/faas/fsm.py b/sebs/faas/fsm.py index 2b752047..e3fa98da 100644 --- a/sebs/faas/fsm.py +++ b/sebs/faas/fsm.py @@ -1,6 +1,6 @@ from abc import ABC from abc import abstractmethod -from typing import Optional, List, Callable, Union +from typing import Optional, List, Callable, Union, Dict, Type import json @@ -66,7 +66,7 @@ def deserialize(cls, name: str, payload: dict) -> "Map": ) -_STATE_TYPES = {"task": Task, "switch": Switch, "map": Map} +_STATE_TYPES: Dict[str, Type[State]] = {"task": Task, "switch": Switch, "map": Map} class Generator(ABC): @@ -81,7 +81,7 @@ def parse(self, path: str): self.root = self.states[definition["root"]] def generate(self) -> str: - states = self.states.values() + states = list(self.states.values()) payloads = [] for s in states: obj = self.encode_state(s) diff --git a/sebs/gcp/config.py b/sebs/gcp/config.py index 2a68e007..52f234ba 100644 --- a/sebs/gcp/config.py +++ b/sebs/gcp/config.py @@ -144,10 +144,11 @@ class GCPConfig(Config): _project_name: str - def __init__(self, credentials: GCPCredentials, resources: GCPResources): + def __init__(self, credentials: GCPCredentials, resources: GCPResources, redis_host: str): super().__init__() self._credentials = credentials self._resources = resources + self._redis_host = redis_host @property def region(self) -> str: @@ -174,7 +175,7 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Confi cached_config = cache.get_config("gcp") credentials = cast(GCPCredentials, GCPCredentials.deserialize(config, cache, handlers)) resources = cast(GCPResources, GCPResources.deserialize(config, cache, handlers)) - config_obj = GCPConfig(credentials, resources) + config_obj = GCPConfig(credentials, resources, cached_config["redis_host"]) config_obj.logging_handlers = handlers if cached_config: config_obj.logging.info("Loading cached config for GCP") diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index fc780b90..d46fb974 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -11,7 +11,7 @@ from googleapiclient.discovery import build from googleapiclient.errors import HttpError -from google.cloud import monitoring_v3 +from google.cloud import monitoring_v3 # type: ignore from sebs.cache import Cache from sebs.config import SeBSConfig @@ -21,6 +21,7 @@ from ..faas.system import System from sebs.gcp.config import GCPConfig from sebs.gcp.storage import GCPStorage +from sebs.gcp.triggers import HTTPTrigger from sebs.gcp.function import GCPFunction from sebs.gcp.workflow import GCPWorkflow from sebs.gcp.generator import GCPGenerator @@ -285,8 +286,6 @@ def create_function(self, code_package: CodePackage, func_name: str) -> "GCPFunc def create_function_trigger( self, function: Function, trigger_type: Trigger.TriggerType ) -> Trigger: - from sebs.gcp.triggers import HTTPTrigger - if trigger_type == Trigger.TriggerType.HTTP: location = self.config.region @@ -389,7 +388,8 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "GCP funcs = [self.create_function(code_package, prefix + fn) for fn in func_names] # generate workflow definition.json - urls = [self.create_function_trigger(f, Trigger.TriggerType.HTTP).url for f in funcs] + triggers = [self.create_function_trigger(f, Trigger.TriggerType.HTTP) for f in funcs] + urls = [cast(HTTPTrigger, t).url for t in triggers] func_triggers = {n: u for (n, u) in zip(func_names, urls)} gen = GCPGenerator(workflow_name, func_triggers) @@ -401,7 +401,7 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "GCP for map_id, map_def in gen.generate_maps(): full_workflow_name = GCP.get_full_workflow_name(project_name, location, map_id) create_req = ( - self.workflow_client.projects() + self.workflow_client.projects() # type: ignore .locations() .workflows() .create( @@ -418,14 +418,14 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "GCP full_workflow_name = GCP.get_full_workflow_name(project_name, location, workflow_name) get_req = ( - self.workflow_client.projects().locations().workflows().get(name=full_workflow_name) + self.workflow_client.projects().locations().workflows().get(name=full_workflow_name) # type: ignore ) try: get_req.execute() except HttpError: create_req = ( - self.workflow_client.projects() + self.workflow_client.projects() # type: ignore .locations() .workflows() .create( @@ -505,7 +505,8 @@ def update_workflow(self, workflow: Workflow, code_package: CodePackage): funcs = [self.create_function(code_package, prefix + fn) for fn in func_names] # Generate workflow definition.json - urls = [self.create_function_trigger(f, Trigger.TriggerType.HTTP).url for f in funcs] + triggers = [self.create_function_trigger(f, Trigger.TriggerType.HTTP) for f in funcs] + urls = [cast(HTTPTrigger, t).url for t in triggers] func_triggers = {n: u for (n, u) in zip(func_names, urls)} gen = GCPGenerator(workflow.name, func_triggers) gen.parse(definition_path) @@ -516,7 +517,7 @@ def update_workflow(self, workflow: Workflow, code_package: CodePackage): self.config.project_name, self.config.region, map_id ) patch_req = ( - self.workflow_client.projects() + self.workflow_client.projects() # type: ignore .locations() .workflows() .patch( @@ -534,7 +535,7 @@ def update_workflow(self, workflow: Workflow, code_package: CodePackage): self.config.project_name, self.config.region, workflow.name ) req = ( - self.workflow_client.projects() + self.workflow_client.projects() # type: ignore .locations() .workflows() .patch( @@ -585,7 +586,8 @@ def wrapper(gen): There shouldn't be problem of waiting for complete results, since logs appear very quickly here. """ - from google.cloud import logging as gcp_logging + + from google.cloud import logging as gcp_logging # type: ignore logging_client = gcp_logging.Client() logger = logging_client.logger("cloudfunctions.googleapis.com%2Fcloud-functions") diff --git a/sebs/gcp/generator.py b/sebs/gcp/generator.py index 0dde4a6a..7694a766 100644 --- a/sebs/gcp/generator.py +++ b/sebs/gcp/generator.py @@ -9,7 +9,7 @@ def __init__(self, workflow_name: str, func_triggers: Dict[str, str]): super().__init__() self._workflow_name = workflow_name self._func_triggers = func_triggers - self._map_funcs = dict() + self._map_funcs: Dict[str, str] = dict() def postprocess(self, states: List[State], payloads: List[dict]) -> dict: payloads.append({"final": {"return": ["${res}"]}}) diff --git a/sebs/gcp/storage.py b/sebs/gcp/storage.py index 8202cd0e..8c170a90 100644 --- a/sebs/gcp/storage.py +++ b/sebs/gcp/storage.py @@ -2,7 +2,7 @@ import uuid from typing import List -from google.cloud import storage as gcp_storage +from google.cloud import storage as gcp_storage # type: ignore from sebs.cache import Cache from ..faas.storage import PersistentStorage diff --git a/sebs/local/local.py b/sebs/local/local.py index 1695c1cd..8d8ffc8b 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -220,7 +220,7 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T raise RuntimeError("Not supported!") function.add_trigger(trigger) - self.cache_client.update_function(function) + self.cache_client.update_benchmark(function) return trigger def cached_benchmark(self, benchmark: Benchmark): From 33e1ee24100050bc0a271fa90f5be62671928952 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Wed, 27 Apr 2022 14:24:31 +0200 Subject: [PATCH 57/68] Linting 7 --- .mypy.ini | 5 +---- sebs/azure/function_app.py | 6 +++--- sebs/gcp/gcp.py | 5 ++++- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.mypy.ini b/.mypy.ini index fd7e432c..0c41d4cb 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -24,10 +24,7 @@ ignore_missing_imports = True [mypy-minio] ignore_missing_imports = True -[mypy-google.cloud] -ignore_missing_imports = True - -[mypy-google.api_core] +[mypy-google.*] ignore_missing_imports = True [mypy-testtools] diff --git a/sebs/azure/function_app.py b/sebs/azure/function_app.py index fbe51bee..0f62c400 100644 --- a/sebs/azure/function_app.py +++ b/sebs/azure/function_app.py @@ -22,7 +22,7 @@ def serialize(self) -> dict: } @staticmethod - def deserialize(cached_config: dict) -> FunctionApp: + def deserialize(cached_config: dict) -> "FunctionApp": ret = FunctionApp( cached_config["name"], cached_config["code_package"], @@ -40,11 +40,11 @@ def deserialize(cached_config: dict) -> FunctionApp: class AzureFunction(Function, FunctionApp): @staticmethod - def deserialize(cached_config: dict) -> AzureFunction: + def deserialize(cached_config: dict) -> "AzureFunction": return cast(AzureFunction, FunctionApp.deserialize(cached_config)) class AzureWorkflow(Workflow, FunctionApp): @staticmethod - def deserialize(cached_config: dict) -> AzureWorkflow: + def deserialize(cached_config: dict) -> "AzureWorkflow": return cast(AzureWorkflow, FunctionApp.deserialize(cached_config)) diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index d46fb974..8434f4ec 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -418,7 +418,10 @@ def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "GCP full_workflow_name = GCP.get_full_workflow_name(project_name, location, workflow_name) get_req = ( - self.workflow_client.projects().locations().workflows().get(name=full_workflow_name) # type: ignore + self.workflow_client.projects() # type: ignore + .locations() + .workflows() + .get(name=full_workflow_name) ) try: From ad6262096044620d4de1833ea82fefd110c2dd8f Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Wed, 27 Apr 2022 14:40:58 +0200 Subject: [PATCH 58/68] Relative soft link --- benchmarks/wrappers/azure/python/fsm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/wrappers/azure/python/fsm.py b/benchmarks/wrappers/azure/python/fsm.py index 315f6590..b3891312 120000 --- a/benchmarks/wrappers/azure/python/fsm.py +++ b/benchmarks/wrappers/azure/python/fsm.py @@ -1 +1 @@ -/Users/Laurin/Documents/ETH/MSc_Thesis/serverless-benchmarks/sebs/faas/fsm.py \ No newline at end of file +../../../../../serverless-benchmarks/sebs/faas/fsm.py \ No newline at end of file From 43a8a6e05f469ec3687db0f024c76e446416beb2 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Wed, 27 Apr 2022 15:00:52 +0200 Subject: [PATCH 59/68] Error message workflow local deployment --- sebs.py | 4 ++++ sebs/gcp/gcp.py | 6 +++++- sebs/local/local.py | 13 ++++++++++++- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/sebs.py b/sebs.py index 36aae3c0..740d485f 100755 --- a/sebs.py +++ b/sebs.py @@ -19,6 +19,7 @@ from sebs.utils import update_nested_dict, download_measurements, connect_to_redis_cache from sebs.faas import System as FaaSSystem from sebs.faas.benchmark import Trigger +from sebs.local import Local PROJECT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -267,6 +268,9 @@ def workflow(benchmark, benchmark_input_size, repetitions, trigger, workflow_nam sebs_client, deployment_client, ) = parse_common_params(**kwargs) + if isinstance(deployment_client, Local): + raise NotImplementedError("Local workflow deployment is currently not supported.") + redis = connect_to_redis_cache(deployment_client.config.redis_host) experiment_config = sebs_client.get_experiment_config(config["experiments"]) diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 8434f4ec..09906f34 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -21,7 +21,6 @@ from ..faas.system import System from sebs.gcp.config import GCPConfig from sebs.gcp.storage import GCPStorage -from sebs.gcp.triggers import HTTPTrigger from sebs.gcp.function import GCPFunction from sebs.gcp.workflow import GCPWorkflow from sebs.gcp.generator import GCPGenerator @@ -287,6 +286,7 @@ def create_function_trigger( self, function: Function, trigger_type: Trigger.TriggerType ) -> Trigger: if trigger_type == Trigger.TriggerType.HTTP: + from sebs.gcp.triggers import HTTPTrigger location = self.config.region project_name = self.config.project_name @@ -369,6 +369,8 @@ def get_full_function_name(project_name: str, location: str, func_name: str): return f"projects/{project_name}/locations/{location}/functions/{func_name}" def create_workflow(self, code_package: CodePackage, workflow_name: str) -> "GCPWorkflow": + from sebs.gcp.triggers import HTTPTrigger + benchmark = code_package.name timeout = code_package.config.timeout memory = code_package.config.memory @@ -494,6 +496,8 @@ def create_workflow_trigger( return trigger def update_workflow(self, workflow: Workflow, code_package: CodePackage): + from sebs.gcp.triggers import HTTPTrigger + workflow = cast(GCPWorkflow, workflow) # Make sure we have a valid workflow benchmark diff --git a/sebs/local/local.py b/sebs/local/local.py index 8d8ffc8b..90f4125b 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -209,7 +209,7 @@ def update_function(self, function: Function, code_package: CodePackage): There's only one trigger - HTTP. """ - def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: + def create_function_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: from sebs.local.function import HTTPTrigger function = cast(LocalFunction, func) @@ -252,3 +252,14 @@ def default_benchmark_name(code_package: CodePackage) -> str: @staticmethod def format_function_name(func_name: str) -> str: return func_name + + def create_workflow(self, code_package: CodePackage, workflow_name: str) -> Workflow: + raise NotImplementedError() + + def create_workflow_trigger( + self, workflow: Workflow, trigger_type: Trigger.TriggerType + ) -> Trigger: + raise NotImplementedError() + + def update_workflow(self, workflow: Workflow, code_package: CodePackage): + raise NotImplementedError() From 6355ea508e411265e9f7cc844b95c8b3eddb9838 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Wed, 27 Apr 2022 16:59:28 +0200 Subject: [PATCH 60/68] Basic workflow docs --- docs/workflows.md | 88 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 docs/workflows.md diff --git a/docs/workflows.md b/docs/workflows.md new file mode 100644 index 00000000..963d6d41 --- /dev/null +++ b/docs/workflows.md @@ -0,0 +1,88 @@ +## Workflows + +### Installation + +SeBS makes use of [redis](https://redis.io) in order to make reliable and accurate measurements during the execution of workflows. Ideally, the redis instance should be deployed in the same cloud region such that the write latency is minimal. +Because not all platforms allow connections from a workflow execution to a VPC cache, it proved to be easiest to just deploy a VM and have that machine host redis. Make sure to open port `6379` and admit connections in your VPC accordingly. Redis can be hosted as follows: +```bash +docker run --network=host --name redis -d redis redis-server --save 60 1 --loglevel warning +``` + +### Definition + +All platforms accept different scheduling schemes which makes it cumbersome to run the same tests on different platforms. SeBS defines a workflow scheduling language that is transcribed to the desired platform's scheme. +The schedule is represented by a state machine and is encoded in a JSON file. It starts with the following keys: + +```json +{ + "root": "first_state", + "states": { + } +} +``` + +`root` defines the initial state to start the workflow from, while `states` holds a dictionary of `(name, state)` tuples. The following state types are supported. + +#### Task + +A task state is the most basic state: it executes a serverless function. + +```json +{ + "type": "task", + "func_name": "a_very_useful_func", + "next": "postprocess_the_useful_func" +}, +``` + +`func_name` is the name of the file in the benchmark directory, `next` sets the state with which to follow. + +#### Switch + +A switch state makes it possible to encode basic control flow. + +```json +{ + "type": "switch", + "cases": [ + { + "var": "people.number", + "op": "<", + "val": 10, + "next": "few_people" + }, + { + "var": "people.number", + "op": ">=", + "val": 10, + "next": "many_people" + } + ], + "default": "few_people" +} +``` + +This state transcribes to the following Python expression: +```python +if people.number < 10: + few_people() +elif people.number >= 10: + many_people() +else: + few_people() +``` + +#### Map + +A map state takes a list as input and processes each element in parallel using the given function: + +```json +{ + "type": "map", + "array": "people", + "func_name": "rename_person", + "next": "save" +} +``` + +`array` defines the list to be processed, while `func_name` is the name of the file in the benchmark directory. Note that in contrast to a `task`'s function, this one receives only an element of the given array, not the entire running variable. From f49290cdeea87ee03819ae17ad5152857ad1a7f9 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Thu, 28 Apr 2022 14:01:49 +0200 Subject: [PATCH 61/68] Write new line first --- sebs/code_package.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sebs/code_package.py b/sebs/code_package.py index 8b384b07..36c311d6 100644 --- a/sebs/code_package.py +++ b/sebs/code_package.py @@ -299,6 +299,7 @@ def add_deployment_package_python(self, output_dir): ) if len(packages): with open(os.path.join(output_dir, "requirements.txt"), "a") as out: + out.write("\n") for package in packages: out.write(package + "\n") From 2d90c5418df44f9f1f7f059910b7af9759597e17 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Thu, 5 May 2022 13:19:11 +0200 Subject: [PATCH 62/68] Hash definition.json too --- sebs/code_package.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sebs/code_package.py b/sebs/code_package.py index 36c311d6..e349a610 100644 --- a/sebs/code_package.py +++ b/sebs/code_package.py @@ -198,6 +198,13 @@ def hash_directory(directory: str, deployment: str, language: str): path = os.path.join(directory, f) with open(path, "rb") as opened_file: hash_sum.update(opened_file.read()) + + # workflow definition + definition_path = os.path.join(directory, os.path.pardir, "definition.json") + if os.path.exists(definition_path): + with open(definition_path, "rb") as opened_file: + hash_sum.update(opened_file.read()) + # wrappers wrappers = project_absolute_path( "benchmarks", "wrappers", deployment, language, WRAPPERS[language] From dad2b4b3a4b460c971bb973dbe2702e64d532a7d Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Fri, 6 May 2022 10:56:41 +0200 Subject: [PATCH 63/68] Remove exponential backoff --- sebs/aws/aws.py | 9 ++++----- sebs/aws/triggers.py | 5 +---- sebs/gcp/triggers.py | 5 +---- 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 5429c9ff..44018154 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -181,7 +181,7 @@ def package_code( def wait_for_function(self, func_name: str): ready = False - backoff_delay = 1 # Start wait with delay of 1 second + count = 0 while not ready: ret = self.lambda_client.get_function(FunctionName=func_name) state = ret["Configuration"]["State"] @@ -190,14 +190,13 @@ def wait_for_function(self, func_name: str): # If we haven't seen the result yet, wait a second. if not ready: - time.sleep(backoff_delay) - # Double the delay to provide exponential backoff. - backoff_delay *= 2 + count += 1 + time.sleep(10) elif "Failed" in (state, update_status): self.logging.error(f"Cannot wait for failed {func_name}") break - if backoff_delay > 60: + if count > 6: self.logging.error(f"Function {func_name} stuck in state {state} after 60s") break diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index 0cadcd9b..0a5332f0 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -113,7 +113,6 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Wait for execution to finish, then print results. execution_finished = False - backoff_delay = 1 # Start wait with delay of 1 second while not execution_finished: execution = client.describe_execution(executionArn=execution_arn) status = execution["status"] @@ -121,9 +120,7 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # If we haven't seen the result yet, wait a second. if not execution_finished: - time.sleep(backoff_delay) - # Double the delay to provide exponential backoff. - backoff_delay *= 2 + time.sleep(10) elif status == "FAILED": self.logging.error(f"Invocation of {self.name} failed") self.logging.error(f"Input: {payload}") diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index f1be549a..9135512c 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -117,16 +117,13 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: # Wait for execution to finish, then print results. execution_finished = False - backoff_delay = 1 # Start wait with delay of 1 second while not execution_finished: execution = execution_client.get_execution(request={"name": res.name}) execution_finished = execution.state != Execution.State.ACTIVE # If we haven't seen the result yet, wait a second. if not execution_finished: - time.sleep(backoff_delay) - # Double the delay to provide exponential backoff. - backoff_delay *= 2 + time.sleep(10) elif execution.state == Execution.State.FAILED: self.logging.error(f"Invocation of {self.name} failed") self.logging.error(f"Input: {payload}") From fff3c11787f50a6487eba4aa24f0b574ea91290c Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Thu, 19 May 2022 10:55:43 +0200 Subject: [PATCH 64/68] Unique aws map func name --- sebs/aws/generator.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sebs/aws/generator.py b/sebs/aws/generator.py index 229c6392..0be11202 100644 --- a/sebs/aws/generator.py +++ b/sebs/aws/generator.py @@ -1,5 +1,6 @@ from typing import Dict, List, Union, Any import numbers +import uuid from sebs.faas.fsm import Generator, State, Task, Switch, Map @@ -47,13 +48,15 @@ def _encode_case(self, case: Switch.Case) -> dict: return {"Variable": "$." + case.var, cond: case.val, "Next": case.next} def encode_map(self, state: Map) -> Union[dict, List[dict]]: + map_func_name = "func_" + str(uuid.uuid4())[:8] + payload: Dict[str, Any] = { "Type": "Map", "ItemsPath": "$." + state.array, "Iterator": { - "StartAt": "func", + "StartAt": map_func_name, "States": { - "func": { + map_func_name: { "Type": "Task", "Resource": self._func_arns[state.func_name], "End": True, From e47dedbd6e2319a87fccb3b67915ae4130325bf9 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Fri, 20 May 2022 12:16:25 +0200 Subject: [PATCH 65/68] Add loop state --- .../620.func_invo/definition.json | 29 +++------------- .../600.workflows/620.func_invo/input.py | 2 +- .../620.func_invo/python/gen_buffer_five.py | 6 ---- .../620.func_invo/python/gen_buffer_four.py | 6 ---- .../620.func_invo/python/gen_buffer_one.py | 6 ---- .../620.func_invo/python/gen_buffer_three.py | 6 ---- .../620.func_invo/python/gen_buffer_two.py | 6 ---- .../620.func_invo/python/process.py | 14 ++++++++ .../wrappers/azure/python/run_workflow.py | 4 +++ sebs/aws/generator.py | 24 ++++++++++--- sebs/faas/fsm.py | 34 ++++++++++++++++--- sebs/gcp/generator.py | 4 +-- 12 files changed, 74 insertions(+), 67 deletions(-) delete mode 100644 benchmarks/600.workflows/620.func_invo/python/gen_buffer_five.py delete mode 100644 benchmarks/600.workflows/620.func_invo/python/gen_buffer_four.py delete mode 100644 benchmarks/600.workflows/620.func_invo/python/gen_buffer_one.py delete mode 100644 benchmarks/600.workflows/620.func_invo/python/gen_buffer_three.py delete mode 100644 benchmarks/600.workflows/620.func_invo/python/gen_buffer_two.py create mode 100644 benchmarks/600.workflows/620.func_invo/python/process.py diff --git a/benchmarks/600.workflows/620.func_invo/definition.json b/benchmarks/600.workflows/620.func_invo/definition.json index 52437e0d..bd4ef736 100644 --- a/benchmarks/600.workflows/620.func_invo/definition.json +++ b/benchmarks/600.workflows/620.func_invo/definition.json @@ -1,29 +1,10 @@ { - "root": "gen_one", + "root": "process", "states": { - "gen_one": { - "type": "task", - "func_name": "gen_buffer_one", - "next": "gen_two" - }, - "gen_two": { - "type": "task", - "func_name": "gen_buffer_two", - "next": "gen_three" - }, - "gen_three": { - "type": "task", - "func_name": "gen_buffer_three", - "next": "gen_four" - }, - "gen_four": { - "type": "task", - "func_name": "gen_buffer_four", - "next": "gen_five" - }, - "gen_five": { - "type": "task", - "func_name": "gen_buffer_five" + "process": { + "type": "loop", + "func_name": "process", + "count": 10 } } } \ No newline at end of file diff --git a/benchmarks/600.workflows/620.func_invo/input.py b/benchmarks/600.workflows/620.func_invo/input.py index 661c056d..237cc3f7 100644 --- a/benchmarks/600.workflows/620.func_invo/input.py +++ b/benchmarks/600.workflows/620.func_invo/input.py @@ -1,7 +1,7 @@ size_generators = { 'test' : 10, 'small' : 2**10, - 'large': 2**20 + 'large': 2**15 } def buckets_count(): diff --git a/benchmarks/600.workflows/620.func_invo/python/gen_buffer_five.py b/benchmarks/600.workflows/620.func_invo/python/gen_buffer_five.py deleted file mode 100644 index bbbea63c..00000000 --- a/benchmarks/600.workflows/620.func_invo/python/gen_buffer_five.py +++ /dev/null @@ -1,6 +0,0 @@ -def handler(event): - size = int(event["size"]) if isinstance(event, dict) else len(event) - data = (str(i % 255) for i in range(size)) - data = "".join(data) - - return data \ No newline at end of file diff --git a/benchmarks/600.workflows/620.func_invo/python/gen_buffer_four.py b/benchmarks/600.workflows/620.func_invo/python/gen_buffer_four.py deleted file mode 100644 index bbbea63c..00000000 --- a/benchmarks/600.workflows/620.func_invo/python/gen_buffer_four.py +++ /dev/null @@ -1,6 +0,0 @@ -def handler(event): - size = int(event["size"]) if isinstance(event, dict) else len(event) - data = (str(i % 255) for i in range(size)) - data = "".join(data) - - return data \ No newline at end of file diff --git a/benchmarks/600.workflows/620.func_invo/python/gen_buffer_one.py b/benchmarks/600.workflows/620.func_invo/python/gen_buffer_one.py deleted file mode 100644 index bbbea63c..00000000 --- a/benchmarks/600.workflows/620.func_invo/python/gen_buffer_one.py +++ /dev/null @@ -1,6 +0,0 @@ -def handler(event): - size = int(event["size"]) if isinstance(event, dict) else len(event) - data = (str(i % 255) for i in range(size)) - data = "".join(data) - - return data \ No newline at end of file diff --git a/benchmarks/600.workflows/620.func_invo/python/gen_buffer_three.py b/benchmarks/600.workflows/620.func_invo/python/gen_buffer_three.py deleted file mode 100644 index bbbea63c..00000000 --- a/benchmarks/600.workflows/620.func_invo/python/gen_buffer_three.py +++ /dev/null @@ -1,6 +0,0 @@ -def handler(event): - size = int(event["size"]) if isinstance(event, dict) else len(event) - data = (str(i % 255) for i in range(size)) - data = "".join(data) - - return data \ No newline at end of file diff --git a/benchmarks/600.workflows/620.func_invo/python/gen_buffer_two.py b/benchmarks/600.workflows/620.func_invo/python/gen_buffer_two.py deleted file mode 100644 index bbbea63c..00000000 --- a/benchmarks/600.workflows/620.func_invo/python/gen_buffer_two.py +++ /dev/null @@ -1,6 +0,0 @@ -def handler(event): - size = int(event["size"]) if isinstance(event, dict) else len(event) - data = (str(i % 255) for i in range(size)) - data = "".join(data) - - return data \ No newline at end of file diff --git a/benchmarks/600.workflows/620.func_invo/python/process.py b/benchmarks/600.workflows/620.func_invo/python/process.py new file mode 100644 index 00000000..807a9fb2 --- /dev/null +++ b/benchmarks/600.workflows/620.func_invo/python/process.py @@ -0,0 +1,14 @@ +from random import shuffle + +def handler(event): + size = int(event["size"]) if isinstance(event, dict) else len(event) + elems = list(range(size)) + shuffle(elems) + + data = "" + for i in elems: + data += str(i % 255) + if len(data) > size: + break + + return data[:size] \ No newline at end of file diff --git a/benchmarks/wrappers/azure/python/run_workflow.py b/benchmarks/wrappers/azure/python/run_workflow.py index 4e21986d..84142517 100644 --- a/benchmarks/wrappers/azure/python/run_workflow.py +++ b/benchmarks/wrappers/azure/python/run_workflow.py @@ -60,6 +60,10 @@ def run_workflow(context: df.DurableOrchestrationContext): res = yield context.task_all(tasks) current = states.get(current.next, None) + elif isinstance(current, Loop): + for i in range(current.count): + res = yield context.call_activity(current.func_name, res) + current = states.get(current.next, None) else: raise ValueError(f"Undefined state: {current}") diff --git a/sebs/aws/generator.py b/sebs/aws/generator.py index 0be11202..8361d31f 100644 --- a/sebs/aws/generator.py +++ b/sebs/aws/generator.py @@ -2,7 +2,7 @@ import numbers import uuid -from sebs.faas.fsm import Generator, State, Task, Switch, Map +from sebs.faas.fsm import Generator, State, Task, Switch, Map, Loop class SFNGenerator(Generator): @@ -10,8 +10,12 @@ def __init__(self, func_arns: Dict[str, str]): super().__init__() self._func_arns = func_arns - def postprocess(self, states: List[State], payloads: List[dict]) -> dict: - state_payloads = super().postprocess(states, payloads) + def postprocess(self, payloads: List[dict]) -> dict: + def _nameless(p: dict) -> dict: + del p["Name"] + return p + + state_payloads = {p["Name"]: _nameless(p) for p in payloads} definition = { "Comment": "SeBS auto-generated benchmark", "StartAt": self.root.name, @@ -21,7 +25,11 @@ def postprocess(self, states: List[State], payloads: List[dict]) -> dict: return definition def encode_task(self, state: Task) -> Union[dict, List[dict]]: - payload: Dict[str, Any] = {"Type": "Task", "Resource": self._func_arns[state.func_name]} + payload: Dict[str, Any] = { + "Name": state.name, + "Type": "Task", + "Resource": self._func_arns[state.func_name] + } if state.next: payload["Next"] = state.next @@ -32,7 +40,12 @@ def encode_task(self, state: Task) -> Union[dict, List[dict]]: def encode_switch(self, state: Switch) -> Union[dict, List[dict]]: choises = [self._encode_case(c) for c in state.cases] - return {"Type": "Choice", "Choices": choises, "Default": state.default} + return { + "Name": state.name, + "Type": "Choice", + "Choices": choises, + "Default": state.default + } def _encode_case(self, case: Switch.Case) -> dict: type = "Numeric" if isinstance(case.val, numbers.Number) else "String" @@ -51,6 +64,7 @@ def encode_map(self, state: Map) -> Union[dict, List[dict]]: map_func_name = "func_" + str(uuid.uuid4())[:8] payload: Dict[str, Any] = { + "Name": state.name, "Type": "Map", "ItemsPath": "$." + state.array, "Iterator": { diff --git a/sebs/faas/fsm.py b/sebs/faas/fsm.py index e3fa98da..20718f1f 100644 --- a/sebs/faas/fsm.py +++ b/sebs/faas/fsm.py @@ -1,6 +1,6 @@ from abc import ABC from abc import abstractmethod -from typing import Optional, List, Callable, Union, Dict, Type +from typing import Optional, List, Callable, Union, Dict, Type, Tuple import json @@ -66,7 +66,19 @@ def deserialize(cls, name: str, payload: dict) -> "Map": ) -_STATE_TYPES: Dict[str, Type[State]] = {"task": Task, "switch": Switch, "map": Map} +class Loop(State): + def __init__(self, name: str, func_name: str, count: int, next: Optional[str]): + self.name = name + self.func_name = func_name + self.count = count + self.next = next + + @classmethod + def deserialize(cls, name: str, payload: dict) -> "Task": + return cls(name=name, func_name=payload["func_name"], count=payload["count"], next=payload.get("next")) + + +_STATE_TYPES: Dict[str, Type[State]] = {"task": Task, "switch": Switch, "map": Map, "loop": Loop} class Generator(ABC): @@ -92,12 +104,12 @@ def generate(self) -> str: else: raise ValueError("Unknown encoded state returned.") - definition = self.postprocess(states, payloads) + definition = self.postprocess(payloads) return self._export_func(definition) - def postprocess(self, states: List[State], payloads: List[dict]) -> dict: - return {s.name: p for (s, p) in zip(states, payloads)} + def postprocess(self, payloads: List[dict]) -> dict: + pass def encode_state(self, state: State) -> Union[dict, List[dict]]: if isinstance(state, Task): @@ -106,6 +118,8 @@ def encode_state(self, state: State) -> Union[dict, List[dict]]: return self.encode_switch(state) elif isinstance(state, Map): return self.encode_map(state) + elif isinstance(state, Loop): + return self.encode_loop(state) else: raise ValueError(f"Unknown state of type {type(state)}.") @@ -120,3 +134,13 @@ def encode_switch(self, state: Switch) -> Union[dict, List[dict]]: @abstractmethod def encode_map(self, state: Map) -> Union[dict, List[dict]]: pass + + def encode_loop(self, state: Loop) -> Union[dict, List[dict]]: + tasks = [] + for i in range(state.count): + name = state.name if i == 0 else f"{state.name}_{i}" + next = state.next if i == state.count-1 else f"{state.name}_{i+1}" + task = Task(name, state.func_name, next) + tasks.append(self.encode_task(task)) + + return tasks diff --git a/sebs/gcp/generator.py b/sebs/gcp/generator.py index 7694a766..c0aa7643 100644 --- a/sebs/gcp/generator.py +++ b/sebs/gcp/generator.py @@ -1,7 +1,7 @@ import uuid from typing import Dict, Union, List -from sebs.faas.fsm import Generator, State, Task, Switch, Map +from sebs.faas.fsm import Generator, State, Task, Switch, Map, Loop class GCPGenerator(Generator): @@ -11,7 +11,7 @@ def __init__(self, workflow_name: str, func_triggers: Dict[str, str]): self._func_triggers = func_triggers self._map_funcs: Dict[str, str] = dict() - def postprocess(self, states: List[State], payloads: List[dict]) -> dict: + def postprocess(self, payloads: List[dict]) -> dict: payloads.append({"final": {"return": ["${res}"]}}) definition = {"main": {"params": ["res"], "steps": payloads}} From c92ceabed32131fa9a6286e8adb5f86e403ebec2 Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Fri, 20 May 2022 14:19:13 +0200 Subject: [PATCH 66/68] Rename loop to repeat --- benchmarks/wrappers/azure/python/run_workflow.py | 2 +- sebs/aws/generator.py | 2 +- sebs/faas/fsm.py | 10 +++++----- sebs/gcp/generator.py | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/benchmarks/wrappers/azure/python/run_workflow.py b/benchmarks/wrappers/azure/python/run_workflow.py index 84142517..345f85e9 100644 --- a/benchmarks/wrappers/azure/python/run_workflow.py +++ b/benchmarks/wrappers/azure/python/run_workflow.py @@ -60,7 +60,7 @@ def run_workflow(context: df.DurableOrchestrationContext): res = yield context.task_all(tasks) current = states.get(current.next, None) - elif isinstance(current, Loop): + elif isinstance(current, Repeat): for i in range(current.count): res = yield context.call_activity(current.func_name, res) current = states.get(current.next, None) diff --git a/sebs/aws/generator.py b/sebs/aws/generator.py index 8361d31f..337a8687 100644 --- a/sebs/aws/generator.py +++ b/sebs/aws/generator.py @@ -2,7 +2,7 @@ import numbers import uuid -from sebs.faas.fsm import Generator, State, Task, Switch, Map, Loop +from sebs.faas.fsm import Generator, State, Task, Switch, Map, Repeat class SFNGenerator(Generator): diff --git a/sebs/faas/fsm.py b/sebs/faas/fsm.py index 20718f1f..0119db2f 100644 --- a/sebs/faas/fsm.py +++ b/sebs/faas/fsm.py @@ -66,7 +66,7 @@ def deserialize(cls, name: str, payload: dict) -> "Map": ) -class Loop(State): +class Repeat(State): def __init__(self, name: str, func_name: str, count: int, next: Optional[str]): self.name = name self.func_name = func_name @@ -78,7 +78,7 @@ def deserialize(cls, name: str, payload: dict) -> "Task": return cls(name=name, func_name=payload["func_name"], count=payload["count"], next=payload.get("next")) -_STATE_TYPES: Dict[str, Type[State]] = {"task": Task, "switch": Switch, "map": Map, "loop": Loop} +_STATE_TYPES: Dict[str, Type[State]] = {"task": Task, "switch": Switch, "map": Map, "repeat": Repeat} class Generator(ABC): @@ -118,8 +118,8 @@ def encode_state(self, state: State) -> Union[dict, List[dict]]: return self.encode_switch(state) elif isinstance(state, Map): return self.encode_map(state) - elif isinstance(state, Loop): - return self.encode_loop(state) + elif isinstance(state, Repeat): + return self.encode_repeat(state) else: raise ValueError(f"Unknown state of type {type(state)}.") @@ -135,7 +135,7 @@ def encode_switch(self, state: Switch) -> Union[dict, List[dict]]: def encode_map(self, state: Map) -> Union[dict, List[dict]]: pass - def encode_loop(self, state: Loop) -> Union[dict, List[dict]]: + def encode_repeat(self, state: Repeat) -> Union[dict, List[dict]]: tasks = [] for i in range(state.count): name = state.name if i == 0 else f"{state.name}_{i}" diff --git a/sebs/gcp/generator.py b/sebs/gcp/generator.py index c0aa7643..9f62705c 100644 --- a/sebs/gcp/generator.py +++ b/sebs/gcp/generator.py @@ -1,7 +1,7 @@ import uuid from typing import Dict, Union, List -from sebs.faas.fsm import Generator, State, Task, Switch, Map, Loop +from sebs.faas.fsm import Generator, State, Task, Switch, Map, Repeat class GCPGenerator(Generator): From d1b1baad9222e9be7a5dcc2b3d0a8a70925d0bfc Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Fri, 20 May 2022 15:17:27 +0200 Subject: [PATCH 67/68] Add loop state --- .../wrappers/azure/python/run_workflow.py | 5 ++++ sebs/aws/generator.py | 12 ++++++++- sebs/faas/fsm.py | 25 +++++++++++++++++-- sebs/gcp/generator.py | 23 ++++++++++++++++- 4 files changed, 61 insertions(+), 4 deletions(-) diff --git a/benchmarks/wrappers/azure/python/run_workflow.py b/benchmarks/wrappers/azure/python/run_workflow.py index 345f85e9..c9e5fa7e 100644 --- a/benchmarks/wrappers/azure/python/run_workflow.py +++ b/benchmarks/wrappers/azure/python/run_workflow.py @@ -64,6 +64,11 @@ def run_workflow(context: df.DurableOrchestrationContext): for i in range(current.count): res = yield context.call_activity(current.func_name, res) current = states.get(current.next, None) + elif isinstance(current, Loop): + array = get_var(res, current.array) + for elem in array: + yield context.call_activity(current.func_name, elem) + current = states.get(current.next, None) else: raise ValueError(f"Undefined state: {current}") diff --git a/sebs/aws/generator.py b/sebs/aws/generator.py index 337a8687..4bc38ee7 100644 --- a/sebs/aws/generator.py +++ b/sebs/aws/generator.py @@ -2,7 +2,7 @@ import numbers import uuid -from sebs.faas.fsm import Generator, State, Task, Switch, Map, Repeat +from sebs.faas.fsm import Generator, State, Task, Switch, Map, Repeat, Loop class SFNGenerator(Generator): @@ -85,3 +85,13 @@ def encode_map(self, state: Map) -> Union[dict, List[dict]]: payload["End"] = True return payload + + def encode_loop(self, state: Loop) -> Union[dict, List[dict]]: + map_state = Map(self.name, self.func_name, self.array, self.next) + payload = self.encode_map(map_state) + payload["MaxConcurrency"] = 1 + payload["ResultSelector"] = dict() + payload["ResultPath"] = "$." + str(uuid.uuid4())[:8] + + return payload + diff --git a/sebs/faas/fsm.py b/sebs/faas/fsm.py index 0119db2f..8e59be83 100644 --- a/sebs/faas/fsm.py +++ b/sebs/faas/fsm.py @@ -74,11 +74,28 @@ def __init__(self, name: str, func_name: str, count: int, next: Optional[str]): self.next = next @classmethod - def deserialize(cls, name: str, payload: dict) -> "Task": + def deserialize(cls, name: str, payload: dict) -> "Repeat": return cls(name=name, func_name=payload["func_name"], count=payload["count"], next=payload.get("next")) -_STATE_TYPES: Dict[str, Type[State]] = {"task": Task, "switch": Switch, "map": Map, "repeat": Repeat} +class Loop(State): + def __init__(self, name: str, func_name: str, array: str, next: Optional[str]): + self.name = name + self.func_name = func_name + self.array = array + self.next = next + + @classmethod + def deserialize(cls, name: str, payload: dict) -> "Loop": + return cls( + name=name, + func_name=payload["func_name"], + array=payload["array"], + next=payload.get("next"), + ) + + +_STATE_TYPES: Dict[str, Type[State]] = {"task": Task, "switch": Switch, "map": Map, "repeat": Repeat, "loop": Loop} class Generator(ABC): @@ -144,3 +161,7 @@ def encode_repeat(self, state: Repeat) -> Union[dict, List[dict]]: tasks.append(self.encode_task(task)) return tasks + + @abstractmethod + def encode_loop(self, state: Loop) -> Union[dict, List[dict]]: + pass diff --git a/sebs/gcp/generator.py b/sebs/gcp/generator.py index 9f62705c..223c0c4b 100644 --- a/sebs/gcp/generator.py +++ b/sebs/gcp/generator.py @@ -1,7 +1,7 @@ import uuid from typing import Dict, Union, List -from sebs.faas.fsm import Generator, State, Task, Switch, Map, Repeat +from sebs.faas.fsm import Generator, State, Task, Switch, Map, Repeat, Loop class GCPGenerator(Generator): @@ -56,6 +56,27 @@ def encode_map(self, state: Map) -> Union[dict, List[dict]]: } } + def encode_loop(self, state: Loop) -> Union[dict, List[dict]]: + url = self._func_triggers[state.func_name] + + return { + state.name: { + "for": { + "value": "val", + "index": "idx", + "in": "${"+state.array+"}", + "steps": [ + { + "body": { + "call": "http.post", + "args": {"url": url, "body": "${val}"} + } + } + ] + } + } + } + def generate_maps(self): for workflow_id, url in self._map_funcs.items(): yield ( From cc5e1a08bdda57f53f8cd43cb34f66fa570b74bc Mon Sep 17 00:00:00 2001 From: Laurin Brandner Date: Mon, 11 Dec 2023 15:16:45 +0100 Subject: [PATCH 68/68] Update benchmarks-data --- benchmarks-data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks-data b/benchmarks-data index 6a17a460..9166d3f8 160000 --- a/benchmarks-data +++ b/benchmarks-data @@ -1 +1 @@ -Subproject commit 6a17a460f289e166abb47ea6298fb939e80e8beb +Subproject commit 9166d3f89621ad01919c8dd47bacdf04e36b890d