From 8450742086c4b002f5946a36e2d5b87f8849dc3f Mon Sep 17 00:00:00 2001 From: Salvador Girones Gil Date: Mon, 22 Jan 2024 14:01:28 +0100 Subject: [PATCH 1/6] Bump surrealml-core to v0.0.7 (#17) --- modules/utils/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/utils/Cargo.toml b/modules/utils/Cargo.toml index 75d1a90..a6a7294 100644 --- a/modules/utils/Cargo.toml +++ b/modules/utils/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "surrealml-core" -version = "0.0.6" +version = "0.0.7" edition = "2021" build = "./build.rs" description = "The core machine learning library for SurrealML that enables SurrealDB to store and load ML models" From 4e0c3d4487e96045fee04d2c4f81b1f495a30afd Mon Sep 17 00:00:00 2001 From: maxwellflitton Date: Mon, 22 Jan 2024 17:34:58 +0000 Subject: [PATCH 2/6] updating interfaces for different packages --- .../workflows/surrealml_core_deployment.yml | 4 + .gitignore | 2 + setup.py | 1 + surrealml/__init__.py | 2 +- surrealml/engine/__init__.py | 18 ++ surrealml/engine/sklearn.py | 38 ++++ surrealml/engine/torch.py | 35 ++++ surrealml/engine/utils.py | 17 ++ surrealml/engine_enum.py | 18 -- surrealml/model_cache.py | 76 ------- surrealml/rust_adapter.py | 187 ++++++++++++++++++ surrealml/surml_file.py | 93 ++++----- tests/integration_tests/__init__.py | 0 tests/unit_tests/__init__.py | 0 tests/unit_tests/engine/__init__.py | 0 tests/unit_tests/engine/test_sklearn.py | 57 ++++++ tests/unit_tests/engine/test_torch.py | 46 +++++ tests/unit_tests/test_rust_adapter.py | 24 +++ tests/utils/__init__.py | 0 tests/utils/torch_linear.py | 54 +++++ 20 files changed, 526 insertions(+), 146 deletions(-) create mode 100644 surrealml/engine/__init__.py create mode 100644 surrealml/engine/sklearn.py create mode 100644 surrealml/engine/torch.py create mode 100644 surrealml/engine/utils.py delete mode 100644 surrealml/engine_enum.py delete mode 100644 surrealml/model_cache.py create mode 100644 surrealml/rust_adapter.py create mode 100644 tests/integration_tests/__init__.py create mode 100644 tests/unit_tests/__init__.py create mode 100644 tests/unit_tests/engine/__init__.py create mode 100644 tests/unit_tests/engine/test_sklearn.py create mode 100644 tests/unit_tests/engine/test_torch.py create mode 100644 tests/unit_tests/test_rust_adapter.py create mode 100644 tests/utils/__init__.py create mode 100644 tests/utils/torch_linear.py diff --git a/.github/workflows/surrealml_core_deployment.yml b/.github/workflows/surrealml_core_deployment.yml index a22702c..3a62cb8 100644 --- a/.github/workflows/surrealml_core_deployment.yml +++ b/.github/workflows/surrealml_core_deployment.yml @@ -4,6 +4,10 @@ on: push: branches: - main + paths: + - 'src/**' + - 'build.rs' + - 'Cargo.toml' jobs: post_merge_job: diff --git a/.gitignore b/.gitignore index 3859ddc..8d24aae 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,5 @@ modules/utils/target/ modules/onnx_driver/target/ surrealdb_build/ modules/utils/onnx_driver/ +*.so +surrealml/rust_surrealml.cpython-310-darwin.so diff --git a/setup.py b/setup.py index 95ffc0a..c1e6323 100644 --- a/setup.py +++ b/setup.py @@ -18,6 +18,7 @@ rust_extensions=[RustExtension("surrealml.rust_surrealml", binding=Binding.PyO3)], packages=[ "surrealml", + "surrealml.engine", # "surrealdb.execution_mixins" ], package_data={ diff --git a/surrealml/__init__.py b/surrealml/__init__.py index 38722ac..83492be 100644 --- a/surrealml/__init__.py +++ b/surrealml/__init__.py @@ -1 +1 @@ -from surrealml.surml_file import SurMlFile \ No newline at end of file +# from surrealml.surml_file import SurMlFile \ No newline at end of file diff --git a/surrealml/engine/__init__.py b/surrealml/engine/__init__.py new file mode 100644 index 0000000..7dfa704 --- /dev/null +++ b/surrealml/engine/__init__.py @@ -0,0 +1,18 @@ +from enum import Enum + +from surrealml.engine.sklearn import SklearnOnnxAdapter +from surrealml.engine.torch import TorchOnnxAdapter + + +class Engine(Enum): + """ + The Engine enum is used to specify the engine to use for a given model. + + Attributes: + PYTORCH: The PyTorch engine which will be PyTorch and ONNX. + NATIVE: The native engine which will be native rust and linfa. + SKLEARN: The sklearn engine which will be sklearn and ONNX + """ + PYTORCH = "pytorch" + NATIVE = "native" + SKLEARN = "sklearn" diff --git a/surrealml/engine/sklearn.py b/surrealml/engine/sklearn.py new file mode 100644 index 0000000..ed5671b --- /dev/null +++ b/surrealml/engine/sklearn.py @@ -0,0 +1,38 @@ +try: + import skl2onnx +except ImportError: + skl2onnx = None + +from surrealml.engine.utils import create_file_cache_path + + +class SklearnOnnxAdapter: + + @staticmethod + def check_dependency() -> None: + """ + Checks if the sklearn dependency is installed raising an error if not. + Please call this function when performing any sklearn related operations. + """ + if skl2onnx is None: + raise ImportError("sklearn feature needs to be installed to use sklearn features") + + @staticmethod + def save_model_to_onnx(model, inputs) -> str: + """ + Saves a sklearn model to an onnx file. + + :param model: the sklearn model to convert. + :param inputs: the inputs to the model needed to trace the model + :return: the path to the cache created with a unique id to prevent collisions. + """ + SklearnOnnxAdapter.check_dependency() + file_path = create_file_cache_path() + # the below check is to satisfy type checkers + if skl2onnx is not None: + onnx = skl2onnx.to_onnx(model, inputs) + + with open(file_path, "wb") as f: + f.write(onnx.SerializeToString()) + + return file_path diff --git a/surrealml/engine/torch.py b/surrealml/engine/torch.py new file mode 100644 index 0000000..87fd9f4 --- /dev/null +++ b/surrealml/engine/torch.py @@ -0,0 +1,35 @@ +try: + import torch +except ImportError: + torch = None + +from surrealml.engine.utils import create_file_cache_path + + +class TorchOnnxAdapter: + + @staticmethod + def check_dependency() -> None: + """ + Checks if the sklearn dependency is installed raising an error if not. + Please call this function when performing any sklearn related operations. + """ + if torch is None: + raise ImportError("torch feature needs to be installed to use torch features") + + @staticmethod + def save_model_to_onnx(model, inputs) -> str: + """ + Saves a torch model to an onnx file. + + :param model: the torch model to convert. + :param inputs: the inputs to the model needed to trace the model + :return: the path to the cache created with a unique id to prevent collisions. + """ + # the dynamic import it to prevent the torch dependency from being required for the whole package. + file_path = create_file_cache_path() + # below is to satisfy type checkers + if torch is not None: + traced_script_module = torch.jit.trace(model, inputs) + torch.onnx.export(traced_script_module, inputs, file_path) + return file_path \ No newline at end of file diff --git a/surrealml/engine/utils.py b/surrealml/engine/utils.py new file mode 100644 index 0000000..ff69a36 --- /dev/null +++ b/surrealml/engine/utils.py @@ -0,0 +1,17 @@ +import os +import uuid + + +def create_file_cache_path(): + """ + Creates a file cache path for the model (creating the file cache if not there). + + :return: the path to the cache created with a unique id to prevent collisions. + """ + cache_folder = '.surmlcache' + + if not os.path.exists(cache_folder): + os.makedirs(cache_folder) + unique_id = str(uuid.uuid4()) + file_name = f"{unique_id}.surml" + return os.path.join(cache_folder, file_name) diff --git a/surrealml/engine_enum.py b/surrealml/engine_enum.py deleted file mode 100644 index 629944b..0000000 --- a/surrealml/engine_enum.py +++ /dev/null @@ -1,18 +0,0 @@ -""" -This file contains the Engine enum, which is used to specify the engine to use for a given model. -""" -from enum import Enum - - -class Engine(Enum): - """ - The Engine enum is used to specify the engine to use for a given model. - - Attributes: - PYTORCH: The PyTorch engine which will be PyTorch and tch-rs. - NATIVE: The native engine which will be native rust and linfa. - UNDEFINED: The undefined engine which will be used when the engine is not defined. - """ - PYTORCH = "pytorch" - NATIVE = "native" - UNDEFINED = "" diff --git a/surrealml/model_cache.py b/surrealml/model_cache.py deleted file mode 100644 index 5aa1135..0000000 --- a/surrealml/model_cache.py +++ /dev/null @@ -1,76 +0,0 @@ -""" -Defines the functionality of caching and processing a sklearn model. -""" -import os -import shutil -import uuid -import zipfile - -import torch -from hummingbird.ml import convert - - -class SkLearnModelCache: - """ - This class is responsible for caching and converting an sklearn model to a torchscript model. - """ - - @staticmethod - def create_file_cache(): - """ - Creates a file cache for the model. - - :return: the path to the cache created with a unique id to prevent collisions. - """ - cache_folder = '.surmlcache' - - if not os.path.exists(cache_folder): - os.makedirs(cache_folder) - unique_id = str(uuid.uuid4()) - file_name = f"{unique_id}.surml" - return os.path.join(cache_folder, file_name) - - # @staticmethod - # def cache_model(model, inputs, name=None): - # """ - # Caches a model and returns the file id. - # - # :param model: - # :param inputs: - # :param name: - # :return: - # """ - # file_path = SkLearnModelCache.create_file_cache() - # - # traced_script_module = torch.jit.trace(model, inputs) - # traced_script_module.save(file_path) - # file_id = load_cached_raw_model(str(file_path)) - # os.remove(file_path) - # if name is not None: - # add_name(file_id, name) - # return file_id - - @staticmethod - def convert_sklearn_model(model, inputs): - """ - Converts the sklearn model to a torchscript model. - - :param model: the sklearn model to convert. - :param inputs: the inputs to the model needed to trace the model - :return: the converted model. - """ - file_path = SkLearnModelCache.create_file_cache() - model = convert(model, 'torch.jit', inputs) - file_path = str(file_path).replace(".surml", "") - model.save(file_path) - zip_path = str(file_path) + ".zip" - - # Open the zip archive - with zipfile.ZipFile(zip_path, 'r') as zip_ref: - # Extract all the contents to the specified directory - zip_ref.extractall(file_path) - - model = torch.jit.load(os.path.join(file_path, "deploy_model.zip")) - shutil.rmtree(file_path) - os.remove(zip_path) - return model diff --git a/surrealml/rust_adapter.py b/surrealml/rust_adapter.py new file mode 100644 index 0000000..0c6a3fe --- /dev/null +++ b/surrealml/rust_adapter.py @@ -0,0 +1,187 @@ +try: + from surrealml.rust_surrealml import load_cached_raw_model, add_column, add_output, add_normaliser, save_model, \ + add_name, load_model, add_description, add_version, to_bytes, add_engine, add_author, add_origin + from surrealml.rust_surrealml import raw_compute, buffered_compute, upload_model +except ImportError: + load_cached_raw_model = None + add_column = None + add_output = None + add_normaliser = None + save_model = None + add_name = None + load_model = None + add_description = None + add_version = None + to_bytes = None + add_engine = None + add_author = None + add_origin = None + raw_compute = None + buffered_compute = None + upload_model = None + +from typing import Optional + +from surrealml.engine import Engine + + +class RustAdapter: + + def __init__(self, file_id: str, engine: Engine) -> None: + self.file_id: str = file_id + self.engine: Engine = engine + + @staticmethod + def pass_raw_model_into_rust(file_path: str) -> str: + """ + Points to a raw ONNX file and passes it into the rust library so it can be loaded + and tagged with a unique id so the Rust library can reference this model again + from within the rust library. + + :param file_path: the path to the raw ONNX file. + + :return: the unique id of the model. + """ + return load_cached_raw_model(file_path) + + def add_column(self, name: str) -> None: + """ + Adds a column to the model to the metadata (this needs to be called in order of the columns). + + :param name: the name of the column. + :return: None + """ + add_column(self.file_id, name) + + def add_output(self, output_name, normaliser_type, one, two): + """ + Adds an output to the model to the metadata. + :param output_name: the name of the output. + :param normaliser_type: the type of normaliser to use. + :param one: the first parameter of the normaliser. + :param two: the second parameter of the normaliser. + :return: None + """ + add_output(self.file_id, output_name, normaliser_type, one, two) + + def add_description(self, description): + """ + Adds a description to the model to the metadata. + + :param description: the description of the model. + :return: None + """ + add_description(self.file_id, description) + + def add_version(self, version): + """ + Adds a version to the model to the metadata. + + :param version: the version of the model. + :return: None + """ + add_version(self.file_id, version) + + def add_normaliser(self, column_name, normaliser_type, one, two): + """ + Adds a normaliser to the model to the metadata for a column. + + :param column_name: the name of the column (column already needs to be in the metadata to create mapping) + :param normaliser_type: the type of normaliser to use. + :param one: the first parameter of the normaliser. + :param two: the second parameter of the normaliser. + :return: None + """ + add_normaliser(self.file_id, column_name, normaliser_type, one, two) + + def add_author(self, author): + """ + Adds an author to the model to the metadata. + + :param author: the author of the model. + :return: None + """ + add_author(self.file_id, author) + + def save(self, path): + """ + Saves the model to a file. + + :param path: the path to save the model to. + :return: None + """ + # right now the only engine is pytorch so we can hardcode it but when we add more engines we will need to + # add a parameter to the save function to specify the engine + add_engine(self.file_id, self.engine.value) + add_origin(self.file_id, "local") + save_model(path, self.file_id) + + def to_bytes(self): + """ + Converts the model to bytes. + + :return: the model as bytes. + """ + return to_bytes(self.file_id) + + @staticmethod + def load(path): + """ + Loads a model from a file. + + :param path: the path to load the model from. + :return: + """ + return load_model(path) + + @staticmethod + def upload( + path: str, + url: str, + chunk_size: int, + namespace: str, + database: str, + username: Optional[str] = None, + password: Optional[str] = None + ) -> None: + """ + Uploads a model to a remote server. + + :param path: the path to load the model from. + :param url: the url of the remote server. + :param chunk_size: the size of each chunk to upload. + :param namespace: the namespace of the remote server. + :param database: the database of the remote server. + :param username: the username of the remote server. + :param password: the password of the remote server. + + :return: None + """ + upload_model( + path, + url, + chunk_size, + namespace, + database, + username, + password + ) + + def raw_compute(self, input_vector, dims=None): + """ + Calculates an output from the model given an input vector. + + :param input_vector: a 1D vector of inputs to the model. + :param dims: the dimensions of the input vector to be sliced into + :return: the output of the model. + """ + return raw_compute(self.file_id, input_vector, dims) + + def buffered_compute(self, value_map): + """ + Calculates an output from the model given a value map. + + :param value_map: a dictionary of inputs to the model with the column names as keys and floats as values. + :return: the output of the model. + """ + return buffered_compute(self.file_id, value_map) diff --git a/surrealml/surml_file.py b/surrealml/surml_file.py index 08c620e..891ef13 100644 --- a/surrealml/surml_file.py +++ b/surrealml/surml_file.py @@ -5,18 +5,14 @@ import uuid from typing import Optional -import torch -from surrealml.rust_surrealml import load_cached_raw_model, add_column, add_output, add_normaliser, save_model, \ - add_name, load_model, add_description, add_version, to_bytes, add_engine, add_author, add_origin -from surrealml.rust_surrealml import raw_compute, buffered_compute, upload_model -from surrealml.model_cache import SkLearnModelCache -from surrealml.engine_enum import Engine +from surrealml.engine import Engine, SklearnOnnxAdapter, TorchOnnxAdapter +from surrealml.rust_adapter import RustAdapter class SurMlFile: - def __init__(self, model=None, name=None, inputs=None, sklearn=False): + def __init__(self, model=None, name=None, inputs=None, engine=None): """ The constructor for the SurMlFile class. @@ -28,40 +24,33 @@ def __init__(self, model=None, name=None, inputs=None, sklearn=False): self.model = model self.name = name self.inputs = inputs - self.sklearn = sklearn - if self.model is not None: - if sklearn is True: - self.model = SkLearnModelCache.convert_sklearn_model(model=self.model, inputs=self.inputs) - self.file_id = self._cache_model() - else: - self.file_id = None + self.engine = engine + self.file_id = self._cache_model() + self.rust_adapter = RustAdapter(self.file_id, self.engine) - def _cache_model(self): + def _cache_model(self) -> Optional[str]: """ Caches a model, so it can be loaded as raw bytes to be fused with the header. :return: the file id of the model so it can be retrieved from the cache. """ - cache_folder = '.surmlcache' - - if not os.path.exists(cache_folder): - os.makedirs(cache_folder) - - unique_id = str(uuid.uuid4()) - file_name = f"{unique_id}.surml" - file_path = os.path.join(cache_folder, file_name) - - if self.sklearn is True: - traced_script_module = self.model + # This is triggered when the model is loaded from a file as we are not passing in a model + if self.model is None and self.name is None and self.inputs is None and self.engine is None: + return None + + if self.engine == Engine.SKLEARN: + raw_file_path: str = SklearnOnnxAdapter.save_model_to_onnx( + model=self.model, + inputs=self.inputs + ) + elif self.engine == Engine.PYTORCH: + raw_file_path: str = TorchOnnxAdapter.save_model_to_onnx( + model=self.model, + inputs=self.inputs + ) else: - traced_script_module = torch.jit.trace(self.model, self.inputs) - - torch.onnx.export(traced_script_module, self.inputs, file_path) - file_id = load_cached_raw_model(str(file_path)) - os.remove(file_path) - if self.name is not None: - add_name(file_id, self.name) - return file_id + raise ValueError(f"Engine {self.engine} not supported") + return RustAdapter.pass_raw_model_into_rust(raw_file_path) def add_column(self, name): """ @@ -70,7 +59,7 @@ def add_column(self, name): :param name: the name of the column. :return: None """ - add_column(self.file_id, name) + self.rust_adapter.add_column(name=name) def add_output(self, output_name, normaliser_type, one, two): """ @@ -81,7 +70,7 @@ def add_output(self, output_name, normaliser_type, one, two): :param two: the second parameter of the normaliser. :return: None """ - add_output(self.file_id, output_name, normaliser_type, one, two) + self.rust_adapter.add_output(output_name, normaliser_type, one, two) def add_description(self, description): """ @@ -90,7 +79,7 @@ def add_description(self, description): :param description: the description of the model. :return: None """ - add_description(self.file_id, description) + self.rust_adapter.add_description(description) def add_version(self, version): """ @@ -99,7 +88,7 @@ def add_version(self, version): :param version: the version of the model. :return: None """ - add_version(self.file_id, version) + self.rust_adapter.add_version(self.file_id) def add_normaliser(self, column_name, normaliser_type, one, two): """ @@ -111,7 +100,7 @@ def add_normaliser(self, column_name, normaliser_type, one, two): :param two: the second parameter of the normaliser. :return: None """ - add_normaliser(self.file_id, column_name, normaliser_type, one, two) + self.rust_adapter.add_normaliser(column_name, normaliser_type, one, two) def add_author(self, author): """ @@ -120,7 +109,7 @@ def add_author(self, author): :param author: the author of the model. :return: None """ - add_author(self.file_id, author) + self.rust_adapter.add_author(author) def save(self, path): """ @@ -131,9 +120,7 @@ def save(self, path): """ # right now the only engine is pytorch so we can hardcode it but when we add more engines we will need to # add a parameter to the save function to specify the engine - add_engine(self.file_id, Engine.PYTORCH.value) - add_origin(self.file_id, "local") - save_model(path, self.file_id) + self.rust_adapter.save(path=path) def to_bytes(self): """ @@ -141,18 +128,22 @@ def to_bytes(self): :return: the model as bytes. """ - return to_bytes(self.file_id) + return self.rust_adapter.to_bytes() @staticmethod - def load(path): + def load(path, engine: Engine): """ - Loads a model from a file. + Loads a model from a file so compute operations can be done. :param path: the path to load the model from. - :return: + :param engine: the engine to use to load the model. + + :return: The SurMlFile with loaded model and engine definition """ self = SurMlFile() - self.file_id = load_model(path) + self.file_id = self.rust_adapter.load(path) + self.engine = engine + self.rust_adapter = RustAdapter(self.file_id, self.engine) return self @staticmethod @@ -178,7 +169,7 @@ def upload( :return: None """ - upload_model( + RustAdapter.upload( path, url, chunk_size, @@ -196,7 +187,7 @@ def raw_compute(self, input_vector, dims=None): :param dims: the dimensions of the input vector to be sliced into :return: the output of the model. """ - return raw_compute(self.file_id, input_vector, dims) + return self.rust_adapter.raw_compute(input_vector, dims) def buffered_compute(self, value_map): """ @@ -205,4 +196,4 @@ def buffered_compute(self, value_map): :param value_map: a dictionary of inputs to the model with the column names as keys and floats as values. :return: the output of the model. """ - return buffered_compute(self.file_id, value_map) + return self.rust_adapter.buffered_compute(value_map) diff --git a/tests/integration_tests/__init__.py b/tests/integration_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit_tests/__init__.py b/tests/unit_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit_tests/engine/__init__.py b/tests/unit_tests/engine/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit_tests/engine/test_sklearn.py b/tests/unit_tests/engine/test_sklearn.py new file mode 100644 index 0000000..38b67d8 --- /dev/null +++ b/tests/unit_tests/engine/test_sklearn.py @@ -0,0 +1,57 @@ +""" +This test purely tests the storage of sklearn models in ONNX, we will test indiviudal sklearn models +in the integrations tests +""" +import shutil +from unittest import main, TestCase + +import numpy as np +import onnxruntime as ort +from sklearn.linear_model import LinearRegression + +from surrealml.engine.sklearn import SklearnOnnxAdapter + + +class TestSklearn(TestCase): + + def setUp(self): + np.random.seed(0) # For reproducibility + self.x = np.random.rand(100, 1) * 10 # 100 random numbers between 0 and 10 + noise = np.random.randn(100, 1) # 100 random noise values + self.y = 3 * self.x + 4 + noise # Linear relationship with noise + # Create and train the model + self.model = LinearRegression() + self.model.fit(self.x, self.y) + + def tearDown(self): + try: + shutil.rmtree(".surmlcache") + except OSError as e: + print(f"Error: surmlcache : {e.strerror}") + + def test_store_and_run(self): + file_path = SklearnOnnxAdapter.save_model_to_onnx(self.model, self.x[:1]) + + # Load the ONNX model + session = ort.InferenceSession(file_path) + + # Prepare input data (adjust the shape according to your model's requirements) + # For a linear regression model, it usually expects a single feature vector. + # Example: Predicting for a single value + input_data = np.array([[5]], dtype=np.float64) # Replace with your input data + + # Get the name of the input node + input_name = session.get_inputs()[0].name + + # Run the model (make a prediction) + result = session.run(None, {input_name: input_data}) + + # The result is a list of outputs (since a model can have multiple outputs) + # For a simple linear regression model, it typically has a single output. + predicted_value = result[0][0][0] + + self.assertEqual(19.190618588148247, predicted_value) + + +if __name__ == '__main__': + main() diff --git a/tests/unit_tests/engine/test_torch.py b/tests/unit_tests/engine/test_torch.py new file mode 100644 index 0000000..dc32789 --- /dev/null +++ b/tests/unit_tests/engine/test_torch.py @@ -0,0 +1,46 @@ +""" +This test is just testing the storage of the model in ONNX, we will test indiviudal torch models +in the integration tests. +""" +import shutil +from unittest import main, TestCase + +import numpy as np +import onnxruntime as ort + +from surrealml.engine.torch import TorchOnnxAdapter +from tests.utils.torch_linear import train_model + + +class TestTorch(TestCase): + + def setUp(self): + self.model, self.x = train_model() + + def tearDown(self): + try: + shutil.rmtree(".surmlcache") + except OSError as e: + print(f"Error: surmlcache : {e.strerror}") + + def test_store_and_run(self): + file_path = TorchOnnxAdapter.save_model_to_onnx(self.model, self.x[:1]) + + # Load the ONNX model + session = ort.InferenceSession(file_path) + + # Prepare input data (adjust the shape according to your model's requirements) + # For a linear regression model, it usually expects a single feature vector. + # Example: Predicting for a single value + input_data = np.array([[2800, 3200]], dtype=np.float32) # Replace with your input data + + # Get the name of the input node + input_name = session.get_inputs()[0].name + + # Run the model (make a prediction) + result = session.run(None, {input_name: input_data})[0][0][0] + self.assertEqual(np.float32, type(result)) + + +if __name__ == '__main__': + main() diff --git a/tests/unit_tests/test_rust_adapter.py b/tests/unit_tests/test_rust_adapter.py new file mode 100644 index 0000000..cdfe4c6 --- /dev/null +++ b/tests/unit_tests/test_rust_adapter.py @@ -0,0 +1,24 @@ +from unittest import TestCase, main +from tests.utils.torch_linear import train_model +from surrealml.rust_adapter import RustAdapter +from surrealml.surml_file import SurMlFile +from surrealml.engine import Engine + + +class TestRustAdapter(TestCase): + + def setUp(self): + self.model, self.x = train_model() + self.file = SurMlFile(model=self.model, name="linear", inputs=self.x, engine=Engine.PYTORCH) + + def tearDown(self): + pass + + def test_basic_store(self): + pass + # self.file.add_column(name="x") + # self.file.save(path="./unit_test.surml") + + +if __name__ == '__main__': + main() diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/utils/torch_linear.py b/tests/utils/torch_linear.py new file mode 100644 index 0000000..96e0dc6 --- /dev/null +++ b/tests/utils/torch_linear.py @@ -0,0 +1,54 @@ +""" +Trains a basic torch model that can be used for testing. +""" +import numpy as np +import torch +import torch.nn as nn +import torch.optim as optim + + +class LinearRegressionModel(nn.Module): + def __init__(self): + super(LinearRegressionModel, self).__init__() + self.linear = nn.Linear(2, 1) # 2 input features, 1 output + + def forward(self, x): + return self.linear(x) + +def train_model(): + squarefoot = np.array([1000, 1200, 1500, 1800, 2000, 2200, 2500, 2800, 3000, 3200], dtype=np.float32) + num_floors = np.array([1, 1, 1.5, 1.5, 2, 2, 2.5, 2.5, 3, 3], dtype=np.float32) + house_price = np.array([200000, 230000, 280000, 320000, 350000, 380000, 420000, 470000, 500000, 520000], + dtype=np.float32) + squarefoot = (squarefoot - squarefoot.mean()) / squarefoot.std() + num_floors = (num_floors - num_floors.mean()) / num_floors.std() + house_price = (house_price - house_price.mean()) / house_price.std() + squarefoot_tensor = torch.from_numpy(squarefoot) + num_floors_tensor = torch.from_numpy(num_floors) + house_price_tensor = torch.from_numpy(house_price) + + X = torch.stack([squarefoot_tensor, num_floors_tensor], dim=1) + # Initialize the model + model = LinearRegressionModel() + + # Define the loss function and optimizer + criterion = nn.MSELoss() + optimizer = optim.SGD(model.parameters(), lr=0.01) + + num_epochs = 1000 + for epoch in range(num_epochs): + # Forward pass + y_pred = model(X) + + # Compute the loss + loss = criterion(y_pred.squeeze(), house_price_tensor) + + # Backward pass and optimization + optimizer.zero_grad() + loss.backward() + optimizer.step() + + test_squarefoot = torch.tensor([2800, 3200], dtype=torch.float32) + test_num_floors = torch.tensor([2.5, 3], dtype=torch.float32) + x = torch.stack([test_squarefoot, test_num_floors], dim=1) + return model, x From 83d4d18f4c7abd5be310222f3407ce7367fb533f Mon Sep 17 00:00:00 2001 From: maxwellflitton Date: Tue, 23 Jan 2024 17:02:14 +0000 Subject: [PATCH 3/6] updating testing suite so surml loading and execution can be tested in python unit tests --- .../workflows/surrealml_core_deployment.yml | 2 +- .github/workflows/surrealml_core_test.yml | 2 +- .gitignore | 5 +- modules/{utils => core}/.dockerignore | 0 modules/{utils => core}/.gitignore | 0 modules/{utils => core}/Cargo.toml | 0 modules/{utils => core}/LICENSE | 0 modules/{utils => core}/README.md | 0 modules/{utils => core}/build.rs | 0 .../{utils => core}/builds/Dockerfile.linux | 0 .../{utils => core}/builds/Dockerfile.macos | 0 modules/{utils => core}/builds/Dockerfile.nix | 0 .../{utils => core}/builds/Dockerfile.windows | 0 .../builds/docker_configs/linux.yml | 0 .../builds/docker_configs/macos.yml | 0 .../builds/docker_configs/nix.yml | 0 .../builds/docker_configs/windows.yml | 0 modules/{utils => core}/docker-compose.yml | 0 .../{utils => core}/scripts/linux_compose.sh | 0 .../{utils => core}/scripts/nix_compose.sh | 0 .../scripts/windows_compose.sh | 0 modules/{utils => core}/src/error.rs | 0 .../{utils => core}/src/execution/compute.rs | 0 modules/{utils => core}/src/execution/mod.rs | 0 .../src/execution/onnx_environment.rs | 0 modules/{utils => core}/src/lib.rs | 0 .../src/storage/header/engine.rs | 0 .../src/storage/header/input_dims.rs | 0 .../src/storage/header/keys.rs | 0 .../{utils => core}/src/storage/header/mod.rs | 0 .../storage/header/normalisers/clipping.rs | 0 .../header/normalisers/linear_scaling.rs | 0 .../storage/header/normalisers/log_scale.rs | 0 .../src/storage/header/normalisers/mod.rs | 0 .../src/storage/header/normalisers/traits.rs | 0 .../src/storage/header/normalisers/utils.rs | 0 .../src/storage/header/normalisers/wrapper.rs | 0 .../src/storage/header/normalisers/z_score.rs | 0 .../src/storage/header/origin.rs | 0 .../src/storage/header/output.rs | 0 .../src/storage/header/string_value.rs | 0 .../src/storage/header/version.rs | 0 modules/{utils => core}/src/storage/mod.rs | 0 .../src/storage/stream_adapter.rs | 0 .../{utils => core}/src/storage/surml_file.rs | 0 modules/{utils => core}/stash/forrest.surml | Bin .../{utils => core}/stash/forrest_test.onnx | Bin .../{utils => core}/stash/linear_test.onnx | 0 modules/{utils => core}/stash/test.surml | Bin scripts/local_build.sh | 39 ++++++++ scripts/run_tests.sh | 10 ++ src/python_apis/execution.rs | 20 +++- src/python_apis/storage.rs | 4 +- surrealml/__init__.py | 3 +- surrealml/model_templates/__init__.py | 0 surrealml/rust_adapter.py | 29 ++---- test.surml | Bin 365 -> 0 bytes test_forrest.surml | Bin 218624 -> 0 bytes tests/README.md | 0 tests/__init__.py | 0 tests/scripts/local_build.py | 94 ++++++++++++++++++ tests/test.surml | Bin 3606 -> 0 bytes tests/unit_tests/test_rust_adapter.py | 7 +- tests/unit_tests/test_surml_file.py | 59 +++++++++++ 64 files changed, 237 insertions(+), 37 deletions(-) rename modules/{utils => core}/.dockerignore (100%) rename modules/{utils => core}/.gitignore (100%) rename modules/{utils => core}/Cargo.toml (100%) rename modules/{utils => core}/LICENSE (100%) rename modules/{utils => core}/README.md (100%) rename modules/{utils => core}/build.rs (100%) rename modules/{utils => core}/builds/Dockerfile.linux (100%) rename modules/{utils => core}/builds/Dockerfile.macos (100%) rename modules/{utils => core}/builds/Dockerfile.nix (100%) rename modules/{utils => core}/builds/Dockerfile.windows (100%) rename modules/{utils => core}/builds/docker_configs/linux.yml (100%) rename modules/{utils => core}/builds/docker_configs/macos.yml (100%) rename modules/{utils => core}/builds/docker_configs/nix.yml (100%) rename modules/{utils => core}/builds/docker_configs/windows.yml (100%) rename modules/{utils => core}/docker-compose.yml (100%) rename modules/{utils => core}/scripts/linux_compose.sh (100%) rename modules/{utils => core}/scripts/nix_compose.sh (100%) rename modules/{utils => core}/scripts/windows_compose.sh (100%) rename modules/{utils => core}/src/error.rs (100%) rename modules/{utils => core}/src/execution/compute.rs (100%) rename modules/{utils => core}/src/execution/mod.rs (100%) rename modules/{utils => core}/src/execution/onnx_environment.rs (100%) rename modules/{utils => core}/src/lib.rs (100%) rename modules/{utils => core}/src/storage/header/engine.rs (100%) rename modules/{utils => core}/src/storage/header/input_dims.rs (100%) rename modules/{utils => core}/src/storage/header/keys.rs (100%) rename modules/{utils => core}/src/storage/header/mod.rs (100%) rename modules/{utils => core}/src/storage/header/normalisers/clipping.rs (100%) rename modules/{utils => core}/src/storage/header/normalisers/linear_scaling.rs (100%) rename modules/{utils => core}/src/storage/header/normalisers/log_scale.rs (100%) rename modules/{utils => core}/src/storage/header/normalisers/mod.rs (100%) rename modules/{utils => core}/src/storage/header/normalisers/traits.rs (100%) rename modules/{utils => core}/src/storage/header/normalisers/utils.rs (100%) rename modules/{utils => core}/src/storage/header/normalisers/wrapper.rs (100%) rename modules/{utils => core}/src/storage/header/normalisers/z_score.rs (100%) rename modules/{utils => core}/src/storage/header/origin.rs (100%) rename modules/{utils => core}/src/storage/header/output.rs (100%) rename modules/{utils => core}/src/storage/header/string_value.rs (100%) rename modules/{utils => core}/src/storage/header/version.rs (100%) rename modules/{utils => core}/src/storage/mod.rs (100%) rename modules/{utils => core}/src/storage/stream_adapter.rs (100%) rename modules/{utils => core}/src/storage/surml_file.rs (100%) rename modules/{utils => core}/stash/forrest.surml (100%) rename modules/{utils => core}/stash/forrest_test.onnx (100%) rename modules/{utils => core}/stash/linear_test.onnx (100%) rename modules/{utils => core}/stash/test.surml (100%) create mode 100644 scripts/local_build.sh create mode 100644 scripts/run_tests.sh create mode 100644 surrealml/model_templates/__init__.py delete mode 100644 test.surml delete mode 100644 test_forrest.surml create mode 100644 tests/README.md create mode 100644 tests/__init__.py create mode 100644 tests/scripts/local_build.py delete mode 100644 tests/test.surml create mode 100644 tests/unit_tests/test_surml_file.py diff --git a/.github/workflows/surrealml_core_deployment.yml b/.github/workflows/surrealml_core_deployment.yml index 3a62cb8..5b1bf63 100644 --- a/.github/workflows/surrealml_core_deployment.yml +++ b/.github/workflows/surrealml_core_deployment.yml @@ -19,6 +19,6 @@ jobs: - uses: katyo/publish-crates@v2 with: - path: './modules/utils' + path: './modules/core' args: --no-verify registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }} diff --git a/.github/workflows/surrealml_core_test.yml b/.github/workflows/surrealml_core_test.yml index 41acdd9..1ab3630 100644 --- a/.github/workflows/surrealml_core_test.yml +++ b/.github/workflows/surrealml_core_test.yml @@ -21,4 +21,4 @@ jobs: override: true - name: Run Unit Tests - run: cd modules/utils && cargo test + run: cd modules/core && cargo test diff --git a/.gitignore b/.gitignore index 8d24aae..352f693 100644 --- a/.gitignore +++ b/.gitignore @@ -11,10 +11,11 @@ venv/ surrealml.egg-info/ .vscode/ ./modules/utils/target/ -modules/utils/target/ +modules/core/target/ ./modules/onnx_driver/target/ modules/onnx_driver/target/ surrealdb_build/ -modules/utils/onnx_driver/ +modules/core/onnx_driver/ *.so surrealml/rust_surrealml.cpython-310-darwin.so +.surmlcache diff --git a/modules/utils/.dockerignore b/modules/core/.dockerignore similarity index 100% rename from modules/utils/.dockerignore rename to modules/core/.dockerignore diff --git a/modules/utils/.gitignore b/modules/core/.gitignore similarity index 100% rename from modules/utils/.gitignore rename to modules/core/.gitignore diff --git a/modules/utils/Cargo.toml b/modules/core/Cargo.toml similarity index 100% rename from modules/utils/Cargo.toml rename to modules/core/Cargo.toml diff --git a/modules/utils/LICENSE b/modules/core/LICENSE similarity index 100% rename from modules/utils/LICENSE rename to modules/core/LICENSE diff --git a/modules/utils/README.md b/modules/core/README.md similarity index 100% rename from modules/utils/README.md rename to modules/core/README.md diff --git a/modules/utils/build.rs b/modules/core/build.rs similarity index 100% rename from modules/utils/build.rs rename to modules/core/build.rs diff --git a/modules/utils/builds/Dockerfile.linux b/modules/core/builds/Dockerfile.linux similarity index 100% rename from modules/utils/builds/Dockerfile.linux rename to modules/core/builds/Dockerfile.linux diff --git a/modules/utils/builds/Dockerfile.macos b/modules/core/builds/Dockerfile.macos similarity index 100% rename from modules/utils/builds/Dockerfile.macos rename to modules/core/builds/Dockerfile.macos diff --git a/modules/utils/builds/Dockerfile.nix b/modules/core/builds/Dockerfile.nix similarity index 100% rename from modules/utils/builds/Dockerfile.nix rename to modules/core/builds/Dockerfile.nix diff --git a/modules/utils/builds/Dockerfile.windows b/modules/core/builds/Dockerfile.windows similarity index 100% rename from modules/utils/builds/Dockerfile.windows rename to modules/core/builds/Dockerfile.windows diff --git a/modules/utils/builds/docker_configs/linux.yml b/modules/core/builds/docker_configs/linux.yml similarity index 100% rename from modules/utils/builds/docker_configs/linux.yml rename to modules/core/builds/docker_configs/linux.yml diff --git a/modules/utils/builds/docker_configs/macos.yml b/modules/core/builds/docker_configs/macos.yml similarity index 100% rename from modules/utils/builds/docker_configs/macos.yml rename to modules/core/builds/docker_configs/macos.yml diff --git a/modules/utils/builds/docker_configs/nix.yml b/modules/core/builds/docker_configs/nix.yml similarity index 100% rename from modules/utils/builds/docker_configs/nix.yml rename to modules/core/builds/docker_configs/nix.yml diff --git a/modules/utils/builds/docker_configs/windows.yml b/modules/core/builds/docker_configs/windows.yml similarity index 100% rename from modules/utils/builds/docker_configs/windows.yml rename to modules/core/builds/docker_configs/windows.yml diff --git a/modules/utils/docker-compose.yml b/modules/core/docker-compose.yml similarity index 100% rename from modules/utils/docker-compose.yml rename to modules/core/docker-compose.yml diff --git a/modules/utils/scripts/linux_compose.sh b/modules/core/scripts/linux_compose.sh similarity index 100% rename from modules/utils/scripts/linux_compose.sh rename to modules/core/scripts/linux_compose.sh diff --git a/modules/utils/scripts/nix_compose.sh b/modules/core/scripts/nix_compose.sh similarity index 100% rename from modules/utils/scripts/nix_compose.sh rename to modules/core/scripts/nix_compose.sh diff --git a/modules/utils/scripts/windows_compose.sh b/modules/core/scripts/windows_compose.sh similarity index 100% rename from modules/utils/scripts/windows_compose.sh rename to modules/core/scripts/windows_compose.sh diff --git a/modules/utils/src/error.rs b/modules/core/src/error.rs similarity index 100% rename from modules/utils/src/error.rs rename to modules/core/src/error.rs diff --git a/modules/utils/src/execution/compute.rs b/modules/core/src/execution/compute.rs similarity index 100% rename from modules/utils/src/execution/compute.rs rename to modules/core/src/execution/compute.rs diff --git a/modules/utils/src/execution/mod.rs b/modules/core/src/execution/mod.rs similarity index 100% rename from modules/utils/src/execution/mod.rs rename to modules/core/src/execution/mod.rs diff --git a/modules/utils/src/execution/onnx_environment.rs b/modules/core/src/execution/onnx_environment.rs similarity index 100% rename from modules/utils/src/execution/onnx_environment.rs rename to modules/core/src/execution/onnx_environment.rs diff --git a/modules/utils/src/lib.rs b/modules/core/src/lib.rs similarity index 100% rename from modules/utils/src/lib.rs rename to modules/core/src/lib.rs diff --git a/modules/utils/src/storage/header/engine.rs b/modules/core/src/storage/header/engine.rs similarity index 100% rename from modules/utils/src/storage/header/engine.rs rename to modules/core/src/storage/header/engine.rs diff --git a/modules/utils/src/storage/header/input_dims.rs b/modules/core/src/storage/header/input_dims.rs similarity index 100% rename from modules/utils/src/storage/header/input_dims.rs rename to modules/core/src/storage/header/input_dims.rs diff --git a/modules/utils/src/storage/header/keys.rs b/modules/core/src/storage/header/keys.rs similarity index 100% rename from modules/utils/src/storage/header/keys.rs rename to modules/core/src/storage/header/keys.rs diff --git a/modules/utils/src/storage/header/mod.rs b/modules/core/src/storage/header/mod.rs similarity index 100% rename from modules/utils/src/storage/header/mod.rs rename to modules/core/src/storage/header/mod.rs diff --git a/modules/utils/src/storage/header/normalisers/clipping.rs b/modules/core/src/storage/header/normalisers/clipping.rs similarity index 100% rename from modules/utils/src/storage/header/normalisers/clipping.rs rename to modules/core/src/storage/header/normalisers/clipping.rs diff --git a/modules/utils/src/storage/header/normalisers/linear_scaling.rs b/modules/core/src/storage/header/normalisers/linear_scaling.rs similarity index 100% rename from modules/utils/src/storage/header/normalisers/linear_scaling.rs rename to modules/core/src/storage/header/normalisers/linear_scaling.rs diff --git a/modules/utils/src/storage/header/normalisers/log_scale.rs b/modules/core/src/storage/header/normalisers/log_scale.rs similarity index 100% rename from modules/utils/src/storage/header/normalisers/log_scale.rs rename to modules/core/src/storage/header/normalisers/log_scale.rs diff --git a/modules/utils/src/storage/header/normalisers/mod.rs b/modules/core/src/storage/header/normalisers/mod.rs similarity index 100% rename from modules/utils/src/storage/header/normalisers/mod.rs rename to modules/core/src/storage/header/normalisers/mod.rs diff --git a/modules/utils/src/storage/header/normalisers/traits.rs b/modules/core/src/storage/header/normalisers/traits.rs similarity index 100% rename from modules/utils/src/storage/header/normalisers/traits.rs rename to modules/core/src/storage/header/normalisers/traits.rs diff --git a/modules/utils/src/storage/header/normalisers/utils.rs b/modules/core/src/storage/header/normalisers/utils.rs similarity index 100% rename from modules/utils/src/storage/header/normalisers/utils.rs rename to modules/core/src/storage/header/normalisers/utils.rs diff --git a/modules/utils/src/storage/header/normalisers/wrapper.rs b/modules/core/src/storage/header/normalisers/wrapper.rs similarity index 100% rename from modules/utils/src/storage/header/normalisers/wrapper.rs rename to modules/core/src/storage/header/normalisers/wrapper.rs diff --git a/modules/utils/src/storage/header/normalisers/z_score.rs b/modules/core/src/storage/header/normalisers/z_score.rs similarity index 100% rename from modules/utils/src/storage/header/normalisers/z_score.rs rename to modules/core/src/storage/header/normalisers/z_score.rs diff --git a/modules/utils/src/storage/header/origin.rs b/modules/core/src/storage/header/origin.rs similarity index 100% rename from modules/utils/src/storage/header/origin.rs rename to modules/core/src/storage/header/origin.rs diff --git a/modules/utils/src/storage/header/output.rs b/modules/core/src/storage/header/output.rs similarity index 100% rename from modules/utils/src/storage/header/output.rs rename to modules/core/src/storage/header/output.rs diff --git a/modules/utils/src/storage/header/string_value.rs b/modules/core/src/storage/header/string_value.rs similarity index 100% rename from modules/utils/src/storage/header/string_value.rs rename to modules/core/src/storage/header/string_value.rs diff --git a/modules/utils/src/storage/header/version.rs b/modules/core/src/storage/header/version.rs similarity index 100% rename from modules/utils/src/storage/header/version.rs rename to modules/core/src/storage/header/version.rs diff --git a/modules/utils/src/storage/mod.rs b/modules/core/src/storage/mod.rs similarity index 100% rename from modules/utils/src/storage/mod.rs rename to modules/core/src/storage/mod.rs diff --git a/modules/utils/src/storage/stream_adapter.rs b/modules/core/src/storage/stream_adapter.rs similarity index 100% rename from modules/utils/src/storage/stream_adapter.rs rename to modules/core/src/storage/stream_adapter.rs diff --git a/modules/utils/src/storage/surml_file.rs b/modules/core/src/storage/surml_file.rs similarity index 100% rename from modules/utils/src/storage/surml_file.rs rename to modules/core/src/storage/surml_file.rs diff --git a/modules/utils/stash/forrest.surml b/modules/core/stash/forrest.surml similarity index 100% rename from modules/utils/stash/forrest.surml rename to modules/core/stash/forrest.surml diff --git a/modules/utils/stash/forrest_test.onnx b/modules/core/stash/forrest_test.onnx similarity index 100% rename from modules/utils/stash/forrest_test.onnx rename to modules/core/stash/forrest_test.onnx diff --git a/modules/utils/stash/linear_test.onnx b/modules/core/stash/linear_test.onnx similarity index 100% rename from modules/utils/stash/linear_test.onnx rename to modules/core/stash/linear_test.onnx diff --git a/modules/utils/stash/test.surml b/modules/core/stash/test.surml similarity index 100% rename from modules/utils/stash/test.surml rename to modules/core/stash/test.surml diff --git a/scripts/local_build.sh b/scripts/local_build.sh new file mode 100644 index 0000000..bf1c130 --- /dev/null +++ b/scripts/local_build.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +# navigate to directory +SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" +cd $SCRIPTPATH + + +delete_directory() { + dir_path="$1" + + if [ -d "$dir_path" ]; then + rm -rf "$dir_path" + echo "Directory '$dir_path' has been deleted." + else + echo "Directory '$dir_path' does not exist." + fi +} + +delete_file() { + file_path="$1" + + if [ -f "$file_path" ]; then + rm "$file_path" + echo "File '$file_path' has been deleted." + else + echo "File '$file_path' does not exist." + fi +} + + +cd .. + +delete_directory ./build +delete_directory ./tests/venv +cd tests +python3 -m venv venv +source venv/bin/activate +cd .. +pip install --no-cache-dir . diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh new file mode 100644 index 0000000..cf05c54 --- /dev/null +++ b/scripts/run_tests.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +# navigate to directory +SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" +cd $SCRIPTPATH + +cd .. + + +python -m unittest discover diff --git a/src/python_apis/execution.rs b/src/python_apis/execution.rs index 1f17173..ddc55fd 100644 --- a/src/python_apis/execution.rs +++ b/src/python_apis/execution.rs @@ -16,8 +16,14 @@ use crate::python_state::PYTHON_STATE; /// The computed output vector from the loaded model. #[pyfunction] pub fn raw_compute(file_id: String, input_vector: Vec, dims: Option<(i32, i32)>) -> Vec { - let mut python_state = PYTHON_STATE.lock().unwrap(); - let mut file = python_state.get_mut(&file_id).unwrap(); + let mut python_state = match PYTHON_STATE.lock() { + Ok(state) => state, + Err(error) => panic!("{}", format!("Error getting python state: {}", error)) + }; + let mut file = match python_state.get_mut(&file_id) { + Some(file) => file, + None => panic!("File not found for id: {}, here is the state: {:?}", file_id, python_state.keys()) + }; let tensor = ndarray::arr1(&input_vector).into_dyn(); let compute_unit = ModelComputation { surml_file: &mut file @@ -36,8 +42,14 @@ pub fn raw_compute(file_id: String, input_vector: Vec, dims: Option<(i32, i /// The computed output vector from the loaded model. #[pyfunction] pub fn buffered_compute(file_id: String, mut input_values_map: HashMap) -> Vec { - let mut python_state = PYTHON_STATE.lock().unwrap(); - let mut file = python_state.get_mut(&file_id).unwrap(); + let mut python_state = match PYTHON_STATE.lock() { + Ok(state) => state, + Err(error) => panic!("{}", format!("Error getting python state: {}", error)) + }; + let mut file = match python_state.get_mut(&file_id) { + Some(file) => file, + None => panic!("File not found for id: {}, here is the state: {:?}", file_id, python_state.keys()) + }; let compute_unit = ModelComputation { surml_file: &mut file diff --git a/src/python_apis/storage.rs b/src/python_apis/storage.rs index 46f6784..5a9e579 100644 --- a/src/python_apis/storage.rs +++ b/src/python_apis/storage.rs @@ -17,7 +17,7 @@ use hyper::header::CONTENT_TYPE; use hyper::{Client, Uri}; use hyper::header::AUTHORIZATION; use hyper::header::HeaderValue; -use base64::{encode}; +use base64::encode; use crate::python_state::{PYTHON_STATE, generate_unique_id}; use surrealml_core::storage::stream_adapter::StreamAdapter; @@ -40,7 +40,7 @@ pub fn load_model(file_path: String) -> String { } -/// Saves a model to a file. +/// Saves a model to a file, deleting the file from the `PYTHON_STATE` in the process. /// /// # Arguments /// * `file_path` - The path to the file to save to. diff --git a/surrealml/__init__.py b/surrealml/__init__.py index 83492be..fcd35cc 100644 --- a/surrealml/__init__.py +++ b/surrealml/__init__.py @@ -1 +1,2 @@ -# from surrealml.surml_file import SurMlFile \ No newline at end of file +from surrealml.surml_file import SurMlFile +from surrealml.engine import Engine diff --git a/surrealml/model_templates/__init__.py b/surrealml/model_templates/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/surrealml/rust_adapter.py b/surrealml/rust_adapter.py index 0c6a3fe..7e5582b 100644 --- a/surrealml/rust_adapter.py +++ b/surrealml/rust_adapter.py @@ -1,24 +1,7 @@ -try: - from surrealml.rust_surrealml import load_cached_raw_model, add_column, add_output, add_normaliser, save_model, \ - add_name, load_model, add_description, add_version, to_bytes, add_engine, add_author, add_origin - from surrealml.rust_surrealml import raw_compute, buffered_compute, upload_model -except ImportError: - load_cached_raw_model = None - add_column = None - add_output = None - add_normaliser = None - save_model = None - add_name = None - load_model = None - add_description = None - add_version = None - to_bytes = None - add_engine = None - add_author = None - add_origin = None - raw_compute = None - buffered_compute = None - upload_model = None +# try: +from surrealml.rust_surrealml import load_cached_raw_model, add_column, add_output, add_normaliser, save_model, \ + add_name, load_model, add_description, add_version, to_bytes, add_engine, add_author, add_origin +from surrealml.rust_surrealml import raw_compute, buffered_compute, upload_model from typing import Optional @@ -125,12 +108,12 @@ def to_bytes(self): return to_bytes(self.file_id) @staticmethod - def load(path): + def load(path) -> str: """ Loads a model from a file. :param path: the path to load the model from. - :return: + :return: the id of the model being loaded. """ return load_model(path) diff --git a/test.surml b/test.surml deleted file mode 100644 index 5afc2175e8e3c96720e9e602823c5933a7dcdd0c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 365 zcmZQzVDQ%0x3w!SEKMv*P0PoyC1*&n8tED68Ctb6aRqYm<>%#9SXucdmiU(D#2cXT&4k4Db29T% z6N~hrD)LK93Q9}j4W#%`C6w483b?GfxFI_9k}?yExp3(hVl6B!oD@ z4vf#rEYT9@U=(0`Qn=LJp$W~0!u2s!YN(*T(i80ksOv_ADl(LhoVM0YB*+VFWtYu4z z{K%4}e$`(}_6R@!_ojJirWvWE=#0<%-1pu+_uO;8=iKw|y!RS798On5!-UXyqm%u; z1N;Wh44~5~L5#s(RB}*|?@V6`y|Iv;jgT-7Cnkn#@{z&y!g-PW0=y|yk{8*_hoa$4 zW{@>BEleeF?hPLWb~`zCDakHH*kvDfDbFqi*=2WjiM7Yt+1ZGSg_|BsutVxu+Yjth z=VhOkh3Uvf_B9yFXXlN=Q$nKsQ9}9bC4{qAQV32m|C!(d6^K0(C`9e(T?Dkg*utKG z+7-3aT&R~5oewbq<*ynq&%Uq}P&CPr6_-IbPrS;tU;kawZjvhbs|L4!4j*emIzxC@tVApzfkh>sv!TkBN z=MT(bn8PrK|Li&37XP)SE#wZ!9S}c%cJ6>V409Of@Si=0+v2~rw1wR9=jRUa;h*nA zn8RH+f5ZHR`P-KH-*z< zq5rPEegOZ2|2un*{G09jo5u#Z1#%0-H>|HA4?rG(JOFusS03PXt}De#R~joXU-LTG z)tSKfyH2hIUw0i}L#~5$FwA$D?~U_7<9&z?hz*DbxJLZWJn%dFYLD1%tIoELzped4 zdzkNSiNUtCMPsk6@z>G)x7B`V5A(0BF%0ct-41yK@(APs$ODiEAP+zufII+s0P+Cj z0muW82OtkX9)LUmc>wYNuROr(9K=f3iIuMIYlwfiHg@$n0ORjE`JOx9f$zJn@6lL5 zKM(_5HwM7>UDx-#=05ki=Hi1}zNN>_>4$oV&z8m%$ip=q^2X2Q4VWu1S75IE+*|=p zzzOmt8D6XF@J(U1oq4?rIHxjX=K1?CFOm7kj{zzH}(zJz=Uc>waj z-;xI)mLZn^mRN>yz_q!h`HVYexZ7j(*m>-DXRq1DpH>}d_V=qWd91|rKnZyliwP(p zZ*^2YfY^iB>!{d+emnje2tMd|A3zR<91QcYYtF;w=6-W=(OmoH>YLLK^<6VA!S7wu z@7(#Kz2ktpKRAZ9PMMEfW>zfkb5Baz>JG)T|IAf zG=J#7WAjf}kAE0{>(&jx4Y)z>?%3Q7{DFU4^Dx8^#86vfs4eZ=()L%x?yu0%(PM!A zVJ!x640#;#K+E$$S7+AId84iT+iHJX`-k=|pL1>X8MOa(*Shv*+1~S`t@gLKeVtu@ zTlqozUpE&!d#r$cf8VtZeERqKv?V#YC9)7Je_yPCPyarj!kmUR8O&>#*N_Jw4?rG( zJOFtB@&La)0I>nF!7n!8{NI1=u(J^rb27^bz>O8ap_`qJkT4D>CWht(&W{X^fb$~x z1$a}aBrmd;4@JY9%phxwwlFoqxwrb$lif*Qc8PS(`t)O$z1gJ*yOd{_f?UV!LtGE` z4%V(e`&g1)N@0TR5|fu*tF@f5^$fTDSl-00 z7g+u8nty2ijTm|}4Tls`v||93-&$vSL^-T_5JR3 zBft9`Kg${W{>e}NZ_8)s`qLpcE&+#L=#Z_&>rcb>U4QcP19qO@{VuE?N+8e?1o*9o zI>Hv{69@nSAOHlKCxAT%``PD4*!^WWba zV+I0100;nq&O!j*6YngZ06YF90_gjPN%!gEI6PuCvbFsEL#;ph{lm`UD|ipGvv>mR z00KY&2mk>f00e*l5C8%|00;m9AOHk_01yBIKmZ5;0U!VbfB+Bx0zd!=00AHX1b_e# z00KY&2mk>f00e*l5a`SV;CJ*ovvf00e*l5C8%|00;m9AOHk_01#*&0_g7#YUXZK z!Pz1Dk*(#wKd24AKiEF50j)p)2mk>f00e*l5C8%|00;m9AOHk_01yBIKmZ5;0U!Vb zfB+Bx0zd!=00AHX1b_e#00KY&2mk>f00e*l5C8%|00;m9AOHk_01yBIKmZ5;0U!Vb zfB+Bx0zd!=00AHX1b_e#00KY&2mk>f00e*l5C8%|00;m9AOHk_01yBIKmZ5;0U!Vb zfB+Bx0zd!=00AHX1b_e#00KY&2mk>f00e*l5C8&y00D8fjXnMVTfsyi00e*l5C8%| z00;m9AOHk_01yBIKmZ5;0U!VbfB+Bx0zd!=00AHX1b_e#00KY&2mk>f00e*l5C8%| z00;m9AOHk_01yBIKmZ5;0U!Vb+LwTxji{JR!rVu=M5G^Nv$GKr#^J=on)WJ-Gr@V0 z`~tiwRFW6j%ZH-jO=gfav~(;?4RP+R{m37eCxirUA{;IMMws{V2w~4APggXAfV%! z>R0y44yj{pKNyc1&+&}5Fje{a@!b44hh^WCydB23#|sc89c&0M-#8Fg2G!g5&Y&OB zGOFCAlVAu;^!G?oq5_dhjO9Mqk-+-OQYvjaz1JQ70__9P-Y z(CtV59mtc1u7#;Qe;#Wts1YCMX*AelvFs3QzaVr4p?utM;`phCEL(PMoIs2)dToN3 zfICXL*&weJG@0n7o6>NM)}x=fPst7EM41+Zk>U}ow)M3*+*%0&@x5H%2ANM+7MaM% zb27tZ8MoH4@7U%Mwd z`!&=BN&d^zjvwjfuA6*UfUl02=bIbGbI6`R%pEbO%jPO(pZ4GxO~!=Pq2-;8-8ky27_Gk+HrCtRKAL~sN)OB6x*?Lt*7E#o z@JsVAx4oyH6}Bpdxb?s>4p+TM23Y=Kfsino!I2mKF!O(#x#8;SM42(dP2xMzuBXY7 zlJ3CpCq3aq?g6ZRf|cHpfjWU<0OzCkgetGPs4Uye>OL66@lS=ebroBs0RY=Nnn5C`M)Ml zv?nLf4FuXkpqF55r?@xOmN7tEAOHk_05<`>v%0W`;D!RnttPPRbpUaRLH#>Dtyb~} zH0;5wf5)e-raBA|f$x`Xg$V6|KnD^~Nmhk9+=2Xm)HQnl@tpVd6*w703fWqG|501B zL+?NSXly@Q=b-ft)_6Y)3*>+R5C8%|00;m9AOHk_01yBIKmZ5;0U!VbfB+Bx0>6pC zvc;|Yd$iwV9&`f)fIt@}kVAP!&`YGSo_{)A)r9vXyKpSD$I#!(8Nsvl_5ceyfdCKy z0zd!=00AHX1lolF`u?FEb9)31kElnsmcM_f^~b+|*e+l}DG&exKmZ5;0U!VbfB+Bx z0zd!=00AHX1b_e#00KY&2mk>f00e*l5C8%|00;m9AOHk_01yBIKmZ5;0U!VbfB+Bx z0zd!=00AHX1b_e#00KaulMt{ejy8#W><+)<-bwrcW&i;o00e*l5C8%|00;m9AOHk_ z01yBIKmZ5;0U!VbfB+Bx0zjan2%x_|XenTxg0n*`KsNrrKiC&%jPoM-1$a}aBrmd; z4@JY9%pj`=&Y@bE8sOYp`B^0BMItc*=w3b~lDbc@pP#S)EKgs$x4IuyokW>O@d`#N zCCsrM8ct<#1l~;yF_u9m`}!l&G=V6$zQ{4jH;6Pdm`WvilD+24CDXl0J^=x9Xo9wG z5@>BeAcamwbOq6b8uiHPS>4}X9T`_Jl@g@x72wZ6^it>&lUQda`UWv1Xu@c(J_1ob zh&BO4n?rh^6fcGo zQs+b`BeFqc>UfHmZxB+8p0q;bC1@h7A)|w!2Lv!k{)h*HXrf;ArjvphzSJNI5}o21 z>`V0~F)02)0d!J`wggQKk9QMAS_L7Jn0H$+Q4;C_u9 z{DUa5Bh$%#6vW5%fg;Zelx`emkEZoU%$7m=RzS^`sYlE1R+2+hmo@+q#Ur8vQBgdH zs5FPD9!(JuMMpFU6}96K?aLvmPaBMgCL*FkP|-vV(f%Bw2DG6FsIdZw>@Y+Y-D3p; z+u8vf!iF>@M0k-PBCL!GBYTa76&r*F5vp36wBe`^x*^gdP-$eZvGib0X)PKal|}RRP}iVtAA?HsaeKYAwkFLO zl}6oe!jk6gcDA$@jets{ZYQ#&dAdE=6a7aUtBzjlaxzbhq#|*EZs@g61}7$lUQIYZ z1~_%DYn>O8bv3YHAZav8`5JyyjfN|gdSIx9>Btsb4V~9(1uWH+TweQSmE-Ve57p`d z0hQuYX_Xs$nvW2gaMY8^NOxYaVC{DNbK$U>w^_AeNoOMSW9s@1aDC#xaz*7#(M1_} z7oBdN>%}JcxtKK{J=0xT4j6>GsMa=T?bs+je^Rw^`y3rcfUA zyixJU^pL}`$?BeU&qs`pJ$KvPu!1oC!_5^;(@{2;G^)+Q2nAwErE0t0cweqiaNJgL z=Rl$H)%%vaA6kSCQoSvJ^V`zz=wj(9H5n_m^ft)Jv70JP)44fm&yG4pvJJC$GA`2F z1HVr)BW#}FhscQSQMv*YQlPVy+^gxv;j)VYhW3$FoE^F&T>?*VTuI#&n;BLsiu$H} zgjz2#7>dm&sWq)|y3_<)yrsNdbkL{jL=HzeuN+4vZRhg`Vi| zt0!V=X?AAP@WC-d>Em2V=B*68L%+CAF zxM(DG=3(WD#k=Aj*@lN#^^5GvHQJDuY#jb%{I=S>OXI0rEyRX#R=ePdL zx%`*1Bf{-6Z<#Uu(SH^3x+}}x;Fi!0V7MYi_D&^mowV85hXA({>Kmt-!EHQb_U=<|8 zFGey3{{S_@Y5X`qS<49xe}4`3qEn+`(aB05sx3$#?%Ox!&Ay~5xvVBsVuNG%oYCKg z6in4vzPbEqym+qr^m{{(dAra**Zng*Z%FdEg51I73M-1uA9#vY=IJLTyGoZ7e+oSp zkME{A*lY3z1Khq5&I4`zP137lWaZcDyfmwJs?Lgu7B5(L*K}>&$K{X4R(#cZT|1K>$-A4gugCx7B)6t{xIEgm6rg8QVVLNG&pj$sgvBhMEN#mSIu2xQ#Bu6I) zE87Vqr*Wne&1pC{v|pOQa+;tGj=g4|?aNRn1^Rl;p;9F3SM9$33@Yo3h7f)lp2Bdj z!P(#lX~Wjqg41J~RkZZwWwPN|(q))&v7d#S*uDt*FuqQ%A$3%;bd*o4?jqj-vC z`~PFOxAFz;S35sLVO$FxPIs(<|k9p|dMbIXY+Et1cXQwC3@fLumnHUfG4F<PvGe$)w2x`} z;bR8%a@9Z1(CLw|H0AJvQ}V7Usi&(%GG3fa^bOwZ8XCMT?c^!-i3=VdxRi;%Gd2D5 zC)GJ0DzgqRy!bX?Z?AsdIjdDh`>ma69UOdozU%DbZ|4pl2)-{g``+nOm4nCXzo;3O zyzp(h(Cf6#0$9-&^U+ z8#_Mjtw0U&PZ_0GF6(b_tKTIC6SzkY9#kvEex#RCDXJ3@k z7$ryzz+D^>j;k_okm%a{QHOy{DYu0Ar3`d3RMKW0i& zzueVl&h%ThAzp4+=mu28eo5K#>C*)-7UbDPpD94JY&sq$d3sL7hWR&p>)yPb;!PKP z!FmFb_>(RS%-OK}zs;M8u5j0MEKD@jC)*#!dF8BWF{&`nH_p~XQerm3ZU&TCg zj;pTiUVCk}cE*u4hp(^q_!ejRY;@?S0a@yi#cN)r;y#aDv{2gdpyABo)Bb0k*A0I4 zqFTV(VEh2a-R<$AJ0pBkL{>OHDLXMKBxXy6r*?1rx4IASuYAZq@^H|LX$BGox^lXk zR|%hT^!ldiwW>xszDj!ZoY^@(s!_H>&&jUn)2m{o@^XXlXqv&&0h^-bdl~MuNE+^F zA0|ilxvFeHAtVimf53RBOh^bc4$BH$Lm$2O%uf7--VW>bFsF=nD92MS@4L0{noQ|j z6|>b(S2^}|yng6NR-MEy*_V!e`%(0gZU~#lnpno>;cYe^JFjx(j?W$MRmyW!;vE;| z7*Gt8Mose5uG=$S_2S-9TGzvE4U_S2MOEI)m-V!ovWsaLTr4(QJLmqi>qkux-RovC z_4kArM;Qd0S?-{(vG1#MeVZdQXDKs>sV%2e)^G2`X@(o)CcgftP9a~l+D6w-zVZ3N zL5*PZY(klE(x?ii^r~aw(Z{yg+ge>0w~akT_nfV*yz4YGPvyM6K^enBo^X4Hew~Bs z_!383t?RGX9Gf9i_Ho?x*UV#2mC9t2N9|J`v2(bi>EmSTS$(m^j*lm*bPqbU^0kuG z9oY?aO#Oak-nNE&@FRLV*v@21Z+s(y+M}WyD;0m5DQ%{nqcY)Hk06q9f18qZ8DC?r ze3KyFpv5k9-F%WX!9_sf)t!o$jF*RFsxv)2ZUmSQJ8N-C*je)iQ}>GY=~0Ko*33}) z9QNt?LNxm;oJ_A>g5>;HGC#_To-JI}wy_qZKtu)mCpZI2aV+d4(2-oHJ5<@^PZ^>RG9!CnjCpqqxk;*>bFVjFk11ix2ct zkjrgiiQH=Z&XGOs7L0v(LgkB@m=Zql@_&Lg=7OI#oxk89ll!(Z^jvMi+lY!AYbNKf zb#`@fs;YV}J@s>`=;^w-S88i(gR6^WYo0u~pEZnRuTV%gUtn7m`gIsR(MNXGhKZhE zGGc58P1BfJ@J>;%Qi_x|bI}?)K}Wu4&)pJB`}F+R;*jYs zrfT-4%&M)<_?ThYHr;Yp?bROUd(gwk-n#5q&Z!O3X_Nf&=G;AE9H%;9vW&>uFOQ0r z$L6N!45ui?ul1%cP4fME@lHPd&NR|Q>cT-9Mrk|rOO%6RC)(}}%KLZXf02w^ zc>n!l^xizNb{?fPO!?rx9R?eickk#rsyqKzeR+6HzM##ZZ3$c7pHqG4u;Aia$Gx_Z z8)`2T^i}=K&IEio6XA2=;br1{`bp21=MP;ge!M7i>G2VBhn4rrOL$~|V_>@M_=3;*Y;Mx4o<$Lr>totOUaI=!Ns%u0s^Rw+|9Yzu$rvG>X9 zyEzjatRj3Bgp&5Zn_(QImh5Q$bp_eCdV~AHJF{H2%8n%Id@}nc?W#(dqE)dqC@-}9 z%e8NETVrO%JlEX5hkR<{$>&V9j4HKk7nRhV)=LV9A1<`^9B9ob+Lj)@DZt2fR$5_7wo%NyXt!^SqM*2G z=7)A>Cy)G;pYm1v-v9Kp=_;?CN7uQ>EbISd@Wb;AYD{|8cDIvK#AnCmO1`i!v5pbm z`y_DtOQpL9$HxdM-h3AQ>XC@Gzv%JHv$fx-JTo7*!_4#(?!L%gug~^=Q*92BqS8D< zdZr2#SM?GX%D6RToOCa7=P=FiBYunP>>dOp^|DJZU>u8>IP3K4qC}}uj}W!gA<8$$ zC>fYs3_cZKbX1B&xAe)d=_UR^KlAL1mDBcRug{O=J!h5w5L8VFA>W(F@ zw&F>QVwG{9)$W~)Eh?+48hup7c!jXv@(b_$hJ-Aa>gkalQ#6(NKW!z0Sdn!OCnr73 zO}TARWvzU<$IRT5((?~Wt$*lJEpE2tgY4m&Fi+R#^Y1Mx&-9!@%360@L1f+Sc>FY7 zrGEcioU!hbh-ZLfNpFvkVW}roQ_n9?a(cco(fPEJ)0E03uXNSJ|1HC(9JN=Sc4O_6 z@l^w@sX@>1?=zHqK4+XAnEiNvZ!I}J^IOW}#^k>*o>(1XG;bHb8B z&)(}C(k4xjHKEd;z>g#CN zC52WR>u#&Whr~^#5K|T2RhViVFZ&!-yf3>q_237yBf`~chie2p^;NX)iRS6#M@FRK zl1C0&w^3U8(ufNM)n4l^nPi9HQ{!?Kl|qi6wU02c_9KWtP~5R>WO2XgMT(iC$j}Q1;8a`UeJ0c&@WR{6WwHac6a}k8;%Y#*1&; z=vB%RgG!~dLsVYd5ZB$Fj|c=GuEC|we>bG>`8|Q%lzegpCq7Y1-6PdUeT*gbq||9a zscokO4Xo!=-StndGCx#6^6$1)Z6UKL%R2me`fNEtsS`FHPlkBTXMUvN;wsW`AO4YL zPVfB`pSAdQzMxc(BO-_wqpF=tYCZJKs|XQEA~a^XsIt%MB~k~RM2CFYIG8~1X`M^G zvubcq>H0no_h#Q6yl$hIbwOCh*-P0k??t*v#t$9DOx#{pw`TE@>xaZ z(|1(K3C|7VLt>{Y+f>Ls4gQo3+-SoVNOeiprK-^?pf5o2pD`nmcUj2buQ+Iy36RMRIRWS@lY zdbtGW2y(Dwgw`U77ZT6mb4-pj7f41zt%?lUNs#(f+ zozM5HD2yLfLA_ELsNnC`&)(D8)JjEOd9MtUZuQpq*3C<=?k`+Bb`ImIQsp%d%6~)QM31P->=e>M=v9#>u7$FCn2(s*)KC*t6xsg*;9-HbvzVPnLDg*Te z+x{oEFHTYKf)L7V-s14J2a%fx4a|l??EZ>xIM_$UjHz?Mfd-Z=jZiLCx7rD z7{~P>Sb*1qU_thSZJ~z8!NSPXQF~f9@i|H4fVCa_;_z#(4` zxjs$bXwcB)))2MRa7*;;$9d8>9kcr4cgEqGYC$pRY+*m{y z**I-@%3FgDw}#U>FKSSG(T!7sov8pX8%A?$m{Cd4V4J|DA<&Gc23>9qWfzweE<(J6 zZk#qqn)B44$E_h^=*=`7`jj5oxHMSu)S%C;!67y-0l5u~LpM$h(UW*;FyPiuk`b@P zYKk=H)F6pWA|Bo}J-? zgt?Dc;gE>KajJ+eObpN46MXk;$ndI*PZ7CZXw8dRPVvO)eE{c&lgOi} zPwLV4yS{%UM+)%>E%f49KhM5D({KDl+2FeZWbcb6w~n8G!q44Ht(}dC$S8K7 yib84)y$@yYVdW-tS%Q3SBZ|-u(hhxr6Gw5bKSox1$hmIlYizio$UjuRUiSYkWTl1x diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..e69de29 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/scripts/local_build.py b/tests/scripts/local_build.py new file mode 100644 index 0000000..c75adc4 --- /dev/null +++ b/tests/scripts/local_build.py @@ -0,0 +1,94 @@ +""" +This script compiles the Rust library and injects the .so rust python lib into the surrealml +directory so we can run python unit tests against the Rust library. +""" +import os +import shutil +import fnmatch + + +def delete_directory(dir_path: os.path) -> None: + """ + Checks to see if a directory exists and deletes it if it does. + + :param dir_path: the path to the directory. + """ + if os.path.exists(dir_path): + shutil.rmtree(dir_path) + print(f"Directory '{dir_path}' has been deleted.") + else: + print(f"Directory '{dir_path}' does not exist.") + + +def delete_file(file_path: os.path) -> None: + """ + Checks to see if a file exists and deletes it if it does. + + :param file_path: the path to the file. + """ + if os.path.isfile(file_path): + os.remove(file_path) + print(f"File '{file_path}' has been deleted.") + else: + print(f"File '{file_path}' does not exist.") + + +def find_and_move_rust_surrealml_file(start_path: os.path, destination_path: os.path, new_name: str) -> None: + """ + Finds the rust_surrealml.so file and moves it to the surrealml directory. + + :param start_path: the path to start the search from for the built .so rust lib. + :param destination_path: the path to move the rust lib to. + :param new_name: the new name of the rust lib .so file. + """ + for root, dirs, files in os.walk(start_path): + if 'lib' in root: + for filename in fnmatch.filter(files, 'rust_surrealml*.so'): + source_file = os.path.join(root, filename) + destination_file = os.path.join(destination_path, new_name) + shutil.move(source_file, destination_file) + return destination_file + return None + + +script_path = os.path.abspath(__file__) +script_directory = os.path.dirname(script_path) + +tests_directory = os.path.join(script_directory, "..") +main_directory = os.path.join(script_directory, "..", "..") +target_directory = os.path.join(main_directory, "target") +egg_info_dir = os.path.join(main_directory, "surrealml.egg-info") +build_dir = os.path.join(main_directory, "build") +surrealml_dir = os.path.join(main_directory, "surrealml") +embedded_rust_lib_dir = os.path.join(main_directory, "surrealml", "rust_surrealml.so") +test_venv_dir = os.path.join(tests_directory, "venv") +source_venv = os.path.join(test_venv_dir, "bin", "activate") + + +def main(): + # delete the old dirs and embedded rust lib if present + delete_directory(dir_path=test_venv_dir) + delete_directory(dir_path=build_dir) + delete_directory(dir_path=egg_info_dir) + delete_directory(dir_path=target_directory) + delete_file(file_path=embedded_rust_lib_dir) + + # setup venv and build the rust lib + os.system(f"python3 -m venv {test_venv_dir}") + os.system(f"source {source_venv} && pip install --no-cache-dir {main_directory}") + + # move the rust lib into the surrealml directory + find_and_move_rust_surrealml_file( + start_path=build_dir, + destination_path=surrealml_dir, + new_name="rust_surrealml.so" + ) + + # cleanup + # delete_directory(dir_path=test_venv_dir) + # delete_directory(dir_path=build_dir) + # delete_directory(dir_path=egg_info_dir) + + +if __name__ == '__main__': + main() diff --git a/tests/test.surml b/tests/test.surml deleted file mode 100644 index 8d8b851364894537156ec18363734af404a169c2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3606 zcmbVO3se(V8lLbF2ndLXSauiz@!eXJdgJ^}v zvL!yQtDU?5vb9hmX}J_%>QNK0Kay4Ovi`2q-QUCoSAVFElsF#iBL(DOj&DQefK++4 z{%h5LFE}%buASO9P5ZL<%cQ|=HeI3L&h(x>*nL;yrfpGp+l%V5@a>}Sx6SQaQJM3> zp`nl`Ct+$!SN*~qYzCo;8r{;@=LBEKd$KKMr2>Ve!@48oIr--1nTcHitsp)jHarnd zicVY%FH1~_j*Eg5!y}eN;*c1Ofo$2Q-RR+YDSf$0wsXP%$X~h z2$>*+gpd}u(7iw}mP?aUBSL6w%u+u_)5Z#D@-ag#K1Hij*!kqb>=HmsJ#SVRAEY|t z6ltt5w(jh)l}h&Lab@d|t4?_qy3L8*5nkNHv5Oq~tm%H$?ghaMy?ZSJq-X8RnOT2+ zXPUI9;=?;7L6%=_9U1v`Fl_B~uiqQ;3U7Y!Y0{2c|2F?|Sb3v^;~7;garm+~|3PiV zzqD!O+}-l(q}yno8QmCSYB|1^SZ zeS9_~nBH=LF78M_zIu!86~UA2^FDKKqnDDuI~*b6+c@nmO{r=bc#P%UdtPk6abul& z=a!TPt+XoU-PmeI?)X!1q4|JcY+1PV(&oedG+&EuA+k1S-;qPt zPi|CP>Fhf0W3hPghKNS1&Xr%^x03IT`6TMun)zXEzw>@R(c=zvwVvxw#RR8+Y3sZc zf2{w7wvPPI(61wzQ-_;R=0Dg8cb2t%;x_?Pu(P6JSkv+Fqtykk5?RU;74)YUlIPcOiyL4|*WK6kV zd`v&PLiE|5vY>u#_oFjco(;N#QtnlrLC@j7ljO=O?Q89booc4juu=A%pZf)^TyM{kXxn`q%@7~^audtdEc5uPp+Gn}cz4Z9VeSKN) zlAE;)PlZ=qO$eFg5MN%_Ip=guac|1Qn3CuPGppm=x4*x;*%w=H^VH?Huhtz^B@#=^ zH-F)LKg<90qQPRT(4UU2@NO$F?0K$n-7s8uh0iV8J2QV~!b00=9_u`#4({Aiv%REg zK_&LoVYnlf`)%3fI=4I5dm??RL+Ur3Tr;(Xzup1siv6abj$7%)2;AMc=}Zq?({w9C z{bO|7?RmL%^cUuo-c|;(Pc;Fp`GK47&9}cB#Ct_=N`jOqNkU%&aR0cH^2~g72_uVb zlQen#F#`%VnL#$9rYZ$N1n-Y?Q(6w7Q|rI9KYh6wIB(P=64I#GSWRbGl{r%bM$I^V z=_@=_1jE|O7=cR8uR-9NA{buSRNXV)+CleK1S%YQjPa}8@P_gj2LQb@V7{=67EsFlP|ONx;UFkLer@^B&&_*2{8Y*a@x*QxZe5hZ~Vdf&3?$`vfGWYZ}*& zp|ahK=q`hr%aqRGJelsfp-5*K0VUd)0*xLEk#Ncszfn_n^LNLbfW5B$|9wHHcnG>Y e`Umi%Dq3j2o!$Gcsk#`l0Uyd Date: Wed, 24 Jan 2024 16:38:06 +0000 Subject: [PATCH 4/6] adding test model build pipeline and unit tests --- .gitignore | 1 + modules/core/src/execution/compute.rs | 21 ++-- modules/core/stash/test.surml | Bin 433 -> 369 bytes setup.py | 2 +- surrealml/engine/torch.py | 2 +- .../model_templates/datasets}/__init__.py | 0 .../model_templates/datasets/house_linear.py | 41 +++++++ surrealml/model_templates/sklearn/__init__.py | 0 .../model_templates/sklearn/sklearn_linear.py | 43 +++++++ surrealml/model_templates/torch/__init__.py | 0 .../model_templates/torch/torch_linear.py | 90 +++++++++++++++ tests/README.md | 34 ++++++ tests/scripts/build_assets.py | 105 ++++++++++++++++++ tests/scripts/local_build.py | 2 +- tests/unit_tests/engine/test_sklearn.py | 17 +-- tests/unit_tests/engine/test_torch.py | 2 +- tests/unit_tests/test_rust_adapter.py | 2 +- tests/unit_tests/test_surml_file.py | 10 +- tests/utils/torch_linear.py | 54 --------- 19 files changed, 338 insertions(+), 88 deletions(-) rename {tests/utils => surrealml/model_templates/datasets}/__init__.py (100%) create mode 100644 surrealml/model_templates/datasets/house_linear.py create mode 100644 surrealml/model_templates/sklearn/__init__.py create mode 100644 surrealml/model_templates/sklearn/sklearn_linear.py create mode 100644 surrealml/model_templates/torch/__init__.py create mode 100644 surrealml/model_templates/torch/torch_linear.py create mode 100644 tests/scripts/build_assets.py delete mode 100644 tests/utils/torch_linear.py diff --git a/.gitignore b/.gitignore index 352f693..34ddfbe 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,4 @@ modules/core/onnx_driver/ *.so surrealml/rust_surrealml.cpython-310-darwin.so .surmlcache +modules/core/model_stash/ diff --git a/modules/core/src/execution/compute.rs b/modules/core/src/execution/compute.rs index 5f2cbd4..e0c9303 100644 --- a/modules/core/src/execution/compute.rs +++ b/modules/core/src/execution/compute.rs @@ -64,12 +64,6 @@ impl <'a>ModelComputation<'a> { tensor_placeholder = tensor; } - // let environment = Arc::new( - // Environment::builder() - // .with_execution_providers([ExecutionProvider::CPU(Default::default())]) - // .build() - // .map_err(|e| e.to_string())? - // ); let session = SessionBuilder::new(&ENVIRONMENT).map_err(|e| e.to_string())? .with_model_from_memory(&self.surml_file.model) .map_err(|e| e.to_string())?; @@ -148,9 +142,8 @@ mod tests { use super::*; #[test] - fn test_raw_compute() { - - let mut file = SurMlFile::from_file("./stash/test.surml").unwrap(); + fn test_raw_compute_linear_sklearn() { + let mut file = SurMlFile::from_file("./model_stash/sklearn/surml/linear.surml").unwrap(); let model_computation = ModelComputation { surml_file: &mut file, }; @@ -159,15 +152,17 @@ mod tests { input_values.insert(String::from("squarefoot"), 1000.0); input_values.insert(String::from("num_floors"), 2.0); - let output = model_computation.raw_compute(model_computation.input_tensor_from_key_bindings(input_values), None).unwrap(); + let raw_input = model_computation.input_tensor_from_key_bindings(input_values); + + let output = model_computation.raw_compute(raw_input, Some((1, 2))).unwrap(); assert_eq!(output.len(), 1); - assert_eq!(output[0], 725.42053); + assert_eq!(output[0], 985.57745); } #[test] - fn test_buffered_compute() { - let mut file = SurMlFile::from_file("./stash/test.surml").unwrap(); + fn test_buffered_compute_linear_sklearn() { + let mut file = SurMlFile::from_file("./model_stash/sklearn/surml/linear.surml").unwrap(); let model_computation = ModelComputation { surml_file: &mut file, }; diff --git a/modules/core/stash/test.surml b/modules/core/stash/test.surml index 61da29a225240cea28e447c93c4f81df8bbdf875..45bcd456f17700db03c0f62c8c6901d3c45c2484 100644 GIT binary patch delta 43 wcmdnU{E str: if torch is not None: traced_script_module = torch.jit.trace(model, inputs) torch.onnx.export(traced_script_module, inputs, file_path) - return file_path \ No newline at end of file + return file_path diff --git a/tests/utils/__init__.py b/surrealml/model_templates/datasets/__init__.py similarity index 100% rename from tests/utils/__init__.py rename to surrealml/model_templates/datasets/__init__.py diff --git a/surrealml/model_templates/datasets/house_linear.py b/surrealml/model_templates/datasets/house_linear.py new file mode 100644 index 0000000..e592c28 --- /dev/null +++ b/surrealml/model_templates/datasets/house_linear.py @@ -0,0 +1,41 @@ +import numpy as np + + +raw_squarefoot = np.array([1000, 1200, 1500, 1800, 2000, 2200, 2500, 2800, 3000, 3200], dtype=np.float32) +raw_num_floors = np.array([1, 1, 1.5, 1.5, 2, 2, 2.5, 2.5, 3, 3], dtype=np.float32) +raw_house_price = np.array([200000, 230000, 280000, 320000, 350000, 380000, 420000, 470000, 500000, 520000], + dtype=np.float32) +squarefoot = (raw_squarefoot - raw_squarefoot.mean()) / raw_squarefoot.std() +num_floors = (raw_num_floors - raw_num_floors.mean()) / raw_num_floors.std() +house_price = (raw_house_price - raw_house_price.mean()) / raw_house_price.std() +inputs = np.column_stack((squarefoot, num_floors)) + + +HOUSE_LINEAR = { + "inputs": inputs, + "outputs": house_price, + + "squarefoot": squarefoot, + "num_floors": num_floors, + "input order": ["squarefoot", "num_floors"], + "raw_inputs": { + "squarefoot": raw_squarefoot, + "num_floors": raw_num_floors, + }, + "normalised_inputs": { + "squarefoot": squarefoot, + "num_floors": num_floors, + }, + "normalisers": { + "squarefoot": { + "type": "z_score", + "mean": squarefoot.mean(), + "std": squarefoot.std() + }, + "num_floors": { + "type": "z_score", + "mean": num_floors.mean(), + "std": num_floors.std() + } + }, +} diff --git a/surrealml/model_templates/sklearn/__init__.py b/surrealml/model_templates/sklearn/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/surrealml/model_templates/sklearn/sklearn_linear.py b/surrealml/model_templates/sklearn/sklearn_linear.py new file mode 100644 index 0000000..e5f6896 --- /dev/null +++ b/surrealml/model_templates/sklearn/sklearn_linear.py @@ -0,0 +1,43 @@ +""" +Trains a linear regression model using sklearn. This is a basic model that can be used for testing. +""" +from sklearn.linear_model import LinearRegression + +from surrealml.model_templates.datasets.house_linear import HOUSE_LINEAR + + +def train_model(): + """ + Trains a linear regression model using sklearn. This is a basic model that can be used for testing. + """ + model = LinearRegression() + model.fit(HOUSE_LINEAR["inputs"], HOUSE_LINEAR["outputs"]) + return model + + +def export_model_onnx(model): + """ + Exports the model to ONNX format. + + :param model: the model to export. + :return: the path to the exported model. + """ + import skl2onnx + return skl2onnx.to_onnx(model, HOUSE_LINEAR["inputs"]) + + +def export_model_surml(model): + """ + Exports the model to SURML format. + + :param model: the model to export. + :return: the path to the exported model. + """ + from surrealml import SurMlFile, Engine + file = SurMlFile(model=model, name="linear", inputs=HOUSE_LINEAR["inputs"], engine=Engine.SKLEARN) + file.add_column("squarefoot") + file.add_column("num_floors") + file.add_normaliser("squarefoot", "z_score", HOUSE_LINEAR["squarefoot"].mean(), HOUSE_LINEAR["squarefoot"].std()) + file.add_normaliser("num_floors", "z_score", HOUSE_LINEAR["num_floors"].mean(), HOUSE_LINEAR["num_floors"].std()) + file.add_output("house_price", "z_score", HOUSE_LINEAR["outputs"].mean(), HOUSE_LINEAR["outputs"].std()) + return file diff --git a/surrealml/model_templates/torch/__init__.py b/surrealml/model_templates/torch/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/surrealml/model_templates/torch/torch_linear.py b/surrealml/model_templates/torch/torch_linear.py new file mode 100644 index 0000000..4abf7d2 --- /dev/null +++ b/surrealml/model_templates/torch/torch_linear.py @@ -0,0 +1,90 @@ +""" +Trains a linear regression model in torch. Should be used for testing certain processes +for linear regression and torch. +""" +import torch +import torch.nn as nn +import torch.optim as optim + +from surrealml.model_templates.datasets.house_linear import HOUSE_LINEAR + + +class LinearRegressionModel(nn.Module): + def __init__(self): + super(LinearRegressionModel, self).__init__() + self.linear = nn.Linear(2, 1) # 2 input features, 1 output + + def forward(self, x): + return self.linear(x) + + +def train_model(): + """ + Trains a linear regression model in torch. Should be used for testing certain processes. + """ + tensor = [ + torch.from_numpy(HOUSE_LINEAR["squarefoot"]), + torch.from_numpy(HOUSE_LINEAR["num_floors"]) + ] + X = torch.stack(tensor, dim=1) + + # Initialize the model + model = LinearRegressionModel() + + # Define the loss function and optimizer + criterion = nn.MSELoss() + optimizer = optim.SGD(model.parameters(), lr=0.01) + + num_epochs = 1000 + for epoch in range(num_epochs): + # Forward pass + y_pred = model(X) + + # Compute the loss + loss = criterion(y_pred.squeeze(), torch.from_numpy(HOUSE_LINEAR["outputs"])) + + # Backward pass and optimization + optimizer.zero_grad() + loss.backward() + optimizer.step() + + test_squarefoot = torch.tensor([2800, 3200], dtype=torch.float32) + test_num_floors = torch.tensor([2.5, 3], dtype=torch.float32) + x = torch.stack([test_squarefoot, test_num_floors], dim=1) + return model, x + + +def export_model_onnx(model): + """ + Exports the model to ONNX format. + """ + tensor = [ + torch.from_numpy(HOUSE_LINEAR["squarefoot"]), + torch.from_numpy(HOUSE_LINEAR["num_floors"]) + ] + inputs = torch.stack(tensor, dim=1) + return torch.jit.trace(model, inputs) + + +def export_model_surml(model): + """ + Exports the model to SURML format. + + :param model: the model to export. + :return: the path to the exported model. + """ + from surrealml import SurMlFile, Engine + + tensor = [ + torch.from_numpy(HOUSE_LINEAR["squarefoot"]), + torch.from_numpy(HOUSE_LINEAR["num_floors"]) + ] + inputs = torch.stack(tensor, dim=1) + + file = SurMlFile(model=model, name="linear", inputs=inputs, engine=Engine.PYTORCH) + file.add_column("squarefoot") + file.add_column("num_floors") + file.add_normaliser("squarefoot", "z_score", HOUSE_LINEAR["squarefoot"].mean(), HOUSE_LINEAR["squarefoot"].std()) + file.add_normaliser("num_floors", "z_score", HOUSE_LINEAR["num_floors"].mean(), HOUSE_LINEAR["num_floors"].std()) + file.add_output("house_price", "z_score", HOUSE_LINEAR["outputs"].mean(), HOUSE_LINEAR["outputs"].std()) + return file diff --git a/tests/README.md b/tests/README.md index e69de29..abe4b7b 100644 --- a/tests/README.md +++ b/tests/README.md @@ -0,0 +1,34 @@ +# Tests +This section houses the functionality of testing the repo in terms of unit tests and integration tests. + +## Library Setup + +There has to be a little bit of setup to run unit tests for this repo. This is because a large part of the code +is written in Rust. Therefore, the Rust binary has to be compiled and put into the correct place for the rest of the +python repo to reference it. If the Rust binary is not compiled, then the unit tests will fail as they are trying to +reference a binary that does not exist. Storage and execution of machine learning models is done in Rust so we can +ensure that if the package runs locally in Python, it will run in production in Rust in the same way in the database. +There is a script that will compile the Rust binary and put it in the correct place. To run this script, run the +following command ensuring that you are in the root directory of the repo and that you have not activated a virtual +environment as the script will build a temporary virtual environment for the build and then delete the virtual +environment after the build is complete: + +```bash +python tests/scripts/local_build.py +``` + +## Model Setup + +Surml aims to support a range of different machine learning models as long as we can concert those models to ONNX. +To keep the feedback loop tight and to ensure that the models are working as expected, we have a set tests and +run against trained models in the core library and the surrealml library. These tests are run against the that are +freshly trained using the approaches that we advocate for. We can train our models and deploy them in the testing +environment using the following command: + +```bash +python tests/scripts/build_assets.py +``` + +The trained models will be stored in the `modules/core/model_stash/` directory. This directory is ignored by git +so if you have recently cloned the repo or you are adding a github action that involves the models, you will need +to ensure that the `build_assets.py` file is run at some point before you rely on those models. diff --git a/tests/scripts/build_assets.py b/tests/scripts/build_assets.py new file mode 100644 index 0000000..88aa257 --- /dev/null +++ b/tests/scripts/build_assets.py @@ -0,0 +1,105 @@ +import sys +import os +import onnx + +script_path = os.path.abspath(__file__) +script_directory = os.path.dirname(script_path) + +tests_directory = os.path.join(script_directory, "..") +main_directory = os.path.join(script_directory, "..", "..") + +# Add a directory to the PYTHONPATH +sys.path.append(main_directory) + + +import shutil +from surrealml.model_templates.sklearn.sklearn_linear import train_model as linear_sklearn_train_model +from surrealml.model_templates.sklearn.sklearn_linear import export_model_onnx as linear_sklearn_export_model_onnx +from surrealml.model_templates.sklearn.sklearn_linear import export_model_surml as linear_sklearn_export_model_surml + +from surrealml.model_templates.torch.torch_linear import train_model as linear_torch_train_model + + +def delete_directory(dir_path: os.path) -> None: + """ + Checks to see if a directory exists and deletes it if it does. + + :param dir_path: the path to the directory. + """ + if os.path.exists(dir_path): + shutil.rmtree(dir_path) + print(f"Directory '{dir_path}' has been deleted.") + else: + print(f"Directory '{dir_path}' does not exist.") + + +def delete_file(file_path: os.path) -> None: + """ + Checks to see if a file exists and deletes it if it does. + + :param file_path: the path to the file. + """ + if os.path.isfile(file_path): + os.remove(file_path) + print(f"File '{file_path}' has been deleted.") + else: + print(f"File '{file_path}' does not exist.") + + +def write_file(file_path: os.path, model, file_name) -> None: + """ + Writes a file to the specified path. + + :param file_path: the path to write the file to. + :param model: the model to write to the file. + :param file_name: the name of the file to write. + """ + with open(os.path.join(file_path, file_name), "wb") as f: + f.write(model) + +core_directory = os.path.join(main_directory, "modules", "core") + +model_stash_directory = os.path.join(core_directory, "model_stash") +sklearn_stash_directory = os.path.join(model_stash_directory, "sklearn") +sklearn_surml_stash_directory = os.path.join(sklearn_stash_directory, "surml") +sklearn_onnx_stash_directory = os.path.join(sklearn_stash_directory, "onnx") +torch_stash_directory = os.path.join(model_stash_directory, "torch") +torch_surml_stash_directory = os.path.join(torch_stash_directory, "surml") +torch_onnx_stash_directory = os.path.join(torch_stash_directory, "onnx") + + +target_directory = os.path.join(main_directory, "target") +egg_info_dir = os.path.join(main_directory, "surrealml.egg-info") + + +def main(): + print("main running") + # wipe and create directories for model stashes + delete_directory(model_stash_directory) + + os.mkdir(model_stash_directory) + os.mkdir(sklearn_stash_directory) + os.mkdir(sklearn_surml_stash_directory) + os.mkdir(sklearn_onnx_stash_directory) + os.mkdir(torch_stash_directory) + os.mkdir(torch_surml_stash_directory) + os.mkdir(torch_onnx_stash_directory) + + # train and stash sklearn models + sklearn_linear_model = linear_sklearn_train_model() + sklearn_linear_surml_file = linear_sklearn_export_model_surml(sklearn_linear_model) + sklearn_linear_onnx_file = linear_sklearn_export_model_onnx(sklearn_linear_model) + + sklearn_linear_surml_file.save( + path=str(os.path.join(sklearn_surml_stash_directory, "linear.surml")) + ) + onnx.save( + sklearn_linear_onnx_file, + os.path.join(sklearn_onnx_stash_directory, "linear.onnx") + ) + os.system(f"cd {model_stash_directory} && tree") + # write_file(sklearn_onnx_stash_directory, sklearn_linear_onnx_file, "linear.onnx") + + +if __name__ == '__main__': + main() diff --git a/tests/scripts/local_build.py b/tests/scripts/local_build.py index c75adc4..50b4b50 100644 --- a/tests/scripts/local_build.py +++ b/tests/scripts/local_build.py @@ -2,9 +2,9 @@ This script compiles the Rust library and injects the .so rust python lib into the surrealml directory so we can run python unit tests against the Rust library. """ +import fnmatch import os import shutil -import fnmatch def delete_directory(dir_path: os.path) -> None: diff --git a/tests/unit_tests/engine/test_sklearn.py b/tests/unit_tests/engine/test_sklearn.py index 38b67d8..5468eaf 100644 --- a/tests/unit_tests/engine/test_sklearn.py +++ b/tests/unit_tests/engine/test_sklearn.py @@ -7,21 +7,16 @@ import numpy as np import onnxruntime as ort -from sklearn.linear_model import LinearRegression from surrealml.engine.sklearn import SklearnOnnxAdapter +from surrealml.model_templates.sklearn.sklearn_linear import train_model +from surrealml.model_templates.datasets.house_linear import HOUSE_LINEAR class TestSklearn(TestCase): def setUp(self): - np.random.seed(0) # For reproducibility - self.x = np.random.rand(100, 1) * 10 # 100 random numbers between 0 and 10 - noise = np.random.randn(100, 1) # 100 random noise values - self.y = 3 * self.x + 4 + noise # Linear relationship with noise - # Create and train the model - self.model = LinearRegression() - self.model.fit(self.x, self.y) + self.model = train_model() def tearDown(self): try: @@ -30,7 +25,7 @@ def tearDown(self): print(f"Error: surmlcache : {e.strerror}") def test_store_and_run(self): - file_path = SklearnOnnxAdapter.save_model_to_onnx(self.model, self.x[:1]) + file_path = SklearnOnnxAdapter.save_model_to_onnx(self.model, HOUSE_LINEAR["inputs"][:1]) # Load the ONNX model session = ort.InferenceSession(file_path) @@ -38,7 +33,7 @@ def test_store_and_run(self): # Prepare input data (adjust the shape according to your model's requirements) # For a linear regression model, it usually expects a single feature vector. # Example: Predicting for a single value - input_data = np.array([[5]], dtype=np.float64) # Replace with your input data + input_data = np.array([[5, 6]], dtype=np.float32) # Replace with your input data # Get the name of the input node input_name = session.get_inputs()[0].name @@ -50,7 +45,7 @@ def test_store_and_run(self): # For a simple linear regression model, it typically has a single output. predicted_value = result[0][0][0] - self.assertEqual(19.190618588148247, predicted_value) + self.assertEqual(5.013289451599121, float(predicted_value)) if __name__ == '__main__': diff --git a/tests/unit_tests/engine/test_torch.py b/tests/unit_tests/engine/test_torch.py index dc32789..136b68d 100644 --- a/tests/unit_tests/engine/test_torch.py +++ b/tests/unit_tests/engine/test_torch.py @@ -9,7 +9,7 @@ import onnxruntime as ort from surrealml.engine.torch import TorchOnnxAdapter -from tests.utils.torch_linear import train_model +from surrealml.model_templates.torch.torch_linear import train_model class TestTorch(TestCase): diff --git a/tests/unit_tests/test_rust_adapter.py b/tests/unit_tests/test_rust_adapter.py index 5147aa6..b2af7aa 100644 --- a/tests/unit_tests/test_rust_adapter.py +++ b/tests/unit_tests/test_rust_adapter.py @@ -1,5 +1,5 @@ from unittest import TestCase, main -from tests.utils.torch_linear import train_model +from surrealml.model_templates.torch.torch_linear import train_model from surrealml.rust_adapter import RustAdapter from surrealml.surml_file import SurMlFile from surrealml.engine import Engine diff --git a/tests/unit_tests/test_surml_file.py b/tests/unit_tests/test_surml_file.py index a023930..b34948a 100644 --- a/tests/unit_tests/test_surml_file.py +++ b/tests/unit_tests/test_surml_file.py @@ -3,7 +3,7 @@ from unittest import TestCase from surrealml import Engine, SurMlFile -from tests.utils.torch_linear import train_model +from surrealml.model_templates.torch.torch_linear import train_model import numpy as np @@ -53,7 +53,7 @@ def test_full_torch_run(self): # print(new_file.raw_compute([1.0, 2.0])) - print(new_file.buffered_compute({ - "squarefoot": 3200.0, - "num_floors": 2.0 - })) + # print(new_file.buffered_compute({ + # "squarefoot": 3200.0, + # "num_floors": 2.0 + # })) diff --git a/tests/utils/torch_linear.py b/tests/utils/torch_linear.py deleted file mode 100644 index 96e0dc6..0000000 --- a/tests/utils/torch_linear.py +++ /dev/null @@ -1,54 +0,0 @@ -""" -Trains a basic torch model that can be used for testing. -""" -import numpy as np -import torch -import torch.nn as nn -import torch.optim as optim - - -class LinearRegressionModel(nn.Module): - def __init__(self): - super(LinearRegressionModel, self).__init__() - self.linear = nn.Linear(2, 1) # 2 input features, 1 output - - def forward(self, x): - return self.linear(x) - -def train_model(): - squarefoot = np.array([1000, 1200, 1500, 1800, 2000, 2200, 2500, 2800, 3000, 3200], dtype=np.float32) - num_floors = np.array([1, 1, 1.5, 1.5, 2, 2, 2.5, 2.5, 3, 3], dtype=np.float32) - house_price = np.array([200000, 230000, 280000, 320000, 350000, 380000, 420000, 470000, 500000, 520000], - dtype=np.float32) - squarefoot = (squarefoot - squarefoot.mean()) / squarefoot.std() - num_floors = (num_floors - num_floors.mean()) / num_floors.std() - house_price = (house_price - house_price.mean()) / house_price.std() - squarefoot_tensor = torch.from_numpy(squarefoot) - num_floors_tensor = torch.from_numpy(num_floors) - house_price_tensor = torch.from_numpy(house_price) - - X = torch.stack([squarefoot_tensor, num_floors_tensor], dim=1) - # Initialize the model - model = LinearRegressionModel() - - # Define the loss function and optimizer - criterion = nn.MSELoss() - optimizer = optim.SGD(model.parameters(), lr=0.01) - - num_epochs = 1000 - for epoch in range(num_epochs): - # Forward pass - y_pred = model(X) - - # Compute the loss - loss = criterion(y_pred.squeeze(), house_price_tensor) - - # Backward pass and optimization - optimizer.zero_grad() - loss.backward() - optimizer.step() - - test_squarefoot = torch.tensor([2800, 3200], dtype=torch.float32) - test_num_floors = torch.tensor([2.5, 3], dtype=torch.float32) - x = torch.stack([test_squarefoot, test_num_floors], dim=1) - return model, x From 637e5e3167e8f8a788fafca0428cefaa4a4225a8 Mon Sep 17 00:00:00 2001 From: maxwellflitton Date: Wed, 24 Jan 2024 22:17:02 +0000 Subject: [PATCH 5/6] sklearn linear now tested and passing for buffered compute and raw compute --- modules/core/src/execution/compute.rs | 55 +++++++++++++-------------- tests/scripts/build_assets.py | 25 +++++++++++- 2 files changed, 50 insertions(+), 30 deletions(-) diff --git a/modules/core/src/execution/compute.rs b/modules/core/src/execution/compute.rs index e0c9303..fcb4337 100644 --- a/modules/core/src/execution/compute.rs +++ b/modules/core/src/execution/compute.rs @@ -2,7 +2,7 @@ use crate::storage::surml_file::SurMlFile; use std::collections::HashMap; use ndarray::{ArrayD, CowArray}; -use ort::{SessionBuilder, Value}; +use ort::{SessionBuilder, Value, session::Input}; use super::onnx_environment::ENVIRONMENT; @@ -29,6 +29,24 @@ impl <'a>ModelComputation<'a> { ndarray::arr1::(&buffer).into_dyn() } + /// Creates a vector of dimensions for the input tensor from the loaded model. + /// + /// # Arguments + /// * `input_dims` - The input dimensions from the loaded model. + /// + /// # Returns + /// A vector of dimensions for the input tensor to be reshaped into from the loaded model. + fn process_input_dims(input_dims: &Input) -> Vec { + let mut buffer = Vec::new(); + for dim in input_dims.dimensions() { + match dim { + Some(dim) => buffer.push(dim as usize), + None => buffer.push(1) + } + } + buffer + } + /// Creates a Vector that can be used manipulated with other operations such as normalisation from a hashmap of keys and values. /// /// # Arguments @@ -52,22 +70,14 @@ impl <'a>ModelComputation<'a> { /// /// # Returns /// The computed output tensor from the loaded model. - pub fn raw_compute(&self, tensor: ArrayD, dims: Option<(i32, i32)>) -> Result, String> { - - let tensor_placeholder: ArrayD; - if dims.is_some() { - let dims = dims.unwrap(); - let tensor = tensor.into_shape((dims.0 as usize, dims.1 as usize)).unwrap(); - tensor_placeholder = tensor.into_dyn(); - } - else { - tensor_placeholder = tensor; - } - + pub fn raw_compute(&self, tensor: ArrayD, _dims: Option<(i32, i32)>) -> Result, String> { let session = SessionBuilder::new(&ENVIRONMENT).map_err(|e| e.to_string())? .with_model_from_memory(&self.surml_file.model) .map_err(|e| e.to_string())?; - let x = CowArray::from(tensor_placeholder); + let unwrapped_dims = ModelComputation::process_input_dims(&session.inputs[0]); + let tensor = tensor.into_shape(unwrapped_dims).unwrap(); + + let x = CowArray::from(tensor).into_dyn(); let outputs = session.run(vec![Value::from_array(session.allocator(), &x).unwrap()]).map_err(|e| e.to_string())?; let mut buffer: Vec = Vec::new(); @@ -173,22 +183,11 @@ mod tests { let output = model_computation.buffered_compute(&mut input_values).unwrap(); assert_eq!(output.len(), 1); - assert_eq!(output[0], 725.42053); + assert_eq!(output[0], 985.57745); } #[test] - fn test_raw_compute_trees() { - let mut file = SurMlFile::from_file("./stash/forrest.surml").unwrap(); - let model_computation = ModelComputation { - surml_file: &mut file, - }; - - let x = vec![0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]; - let data: ArrayD = ndarray::arr1(&x).into_dyn(); - let data: ArrayD = data.into_shape((1, 28)).unwrap().into_dyn(); - - let output = model_computation.raw_compute(data, None).unwrap(); - assert_eq!(output.len(), 1); - assert_eq!(output[0], 0.0); + fn test_raw_compute_linear_torch() { + } } diff --git a/tests/scripts/build_assets.py b/tests/scripts/build_assets.py index 88aa257..039c008 100644 --- a/tests/scripts/build_assets.py +++ b/tests/scripts/build_assets.py @@ -1,5 +1,9 @@ -import sys +""" +Trains and exports models to be used for testing. +""" import os +import sys + import onnx script_path = os.path.abspath(__file__) @@ -18,6 +22,8 @@ from surrealml.model_templates.sklearn.sklearn_linear import export_model_surml as linear_sklearn_export_model_surml from surrealml.model_templates.torch.torch_linear import train_model as linear_torch_train_model +from surrealml.model_templates.torch.torch_linear import export_model_onnx as linear_torch_export_model_onnx +from surrealml.model_templates.torch.torch_linear import export_model_surml as linear_torch_export_model_surml def delete_directory(dir_path: os.path) -> None: @@ -97,8 +103,23 @@ def main(): sklearn_linear_onnx_file, os.path.join(sklearn_onnx_stash_directory, "linear.onnx") ) + + # train and stash torch models + torch_linear_model, x = linear_torch_train_model() + torch_linear_surml_file = linear_torch_export_model_surml(torch_linear_model) + torch_linear_onnx_file = linear_torch_export_model_onnx(torch_linear_model) + + torch_linear_surml_file.save( + path=str(os.path.join(torch_surml_stash_directory, "linear.surml")) + ) + onnx.save( + torch_linear_onnx_file, + os.path.join(torch_onnx_stash_directory, "linear.onnx") + ) + os.system(f"cd {model_stash_directory} && tree") - # write_file(sklearn_onnx_stash_directory, sklearn_linear_onnx_file, "linear.onnx") + + shutil.rmtree(".surmlcache") if __name__ == '__main__': From 189eab608d944945d4337a7cf603ff4974ffdf97 Mon Sep 17 00:00:00 2001 From: maxwellflitton Date: Thu, 25 Jan 2024 11:52:10 +0000 Subject: [PATCH 6/6] updating tests --- .github/workflows/surrealml_core_test.yml | 30 ++++++++++++++++-- .github/workflows/surrealml_test.yml | 24 -------------- .gitignore | 1 + Cargo.toml | 2 +- build.rs | 8 ++--- modules/core/Cargo.toml | 2 +- modules/core/src/execution/compute.rs | 15 ++++++++- modules/core/stash/test.surml | Bin 369 -> 433 bytes requirements.txt | 4 +++ setup.py | 16 +++++++--- src/transport.rs | 25 ++++++++++++--- .../model_templates/torch/torch_linear.py | 2 +- surrealml/surml_file.py | 3 -- 13 files changed, 85 insertions(+), 47 deletions(-) delete mode 100644 .github/workflows/surrealml_test.yml create mode 100644 requirements.txt diff --git a/.github/workflows/surrealml_core_test.yml b/.github/workflows/surrealml_core_test.yml index 1ab3630..693abb1 100644 --- a/.github/workflows/surrealml_core_test.yml +++ b/.github/workflows/surrealml_core_test.yml @@ -1,4 +1,4 @@ -name: Rust Test for surrealml-core on Pull Request +name: Run tests on Pull Request on: pull_request: @@ -20,5 +20,31 @@ jobs: toolchain: stable override: true - - name: Run Unit Tests + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' # Replace '3.x' with the specific version if needed + + - name: Pre-test Setup + run: | + python3 -m venv venv + source venv/bin/activate + pip install -r requirements.txt + + # build the local version of the core module to be loaded into python + python tests/scripts/local_build.py + + # train the models for the tests + python tests/scripts/build_assets.py + + - name: Run Python Unit Tests + run: | + source venv/bin/activate + python -m unittest discover + deactivate + + - name: Run Core Unit Tests run: cd modules/core && cargo test + + - name: Run HTTP Transfer Tests + run: cargo test diff --git a/.github/workflows/surrealml_test.yml b/.github/workflows/surrealml_test.yml deleted file mode 100644 index e7e7b5a..0000000 --- a/.github/workflows/surrealml_test.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: Rust Test for surrealml on Pull Request - -on: - pull_request: - types: [opened, reopened, synchronize] - -jobs: - test_transport: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - - name: Set up Rust - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - override: true - - - name: Run Unit Tests - run: cargo test diff --git a/.gitignore b/.gitignore index 34ddfbe..5fdb7e2 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ Cargo.lock build/ dist/ +__pycache__/ venv/ .idea/ surrealml.egg-info/ diff --git a/Cargo.toml b/Cargo.toml index 1ed07cd..f99bdbe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ ndarray = "0.15.6" hyper = { version = "0.14.27", features = ["full"] } tokio = { version = "1.34.0", features = ["full"] } base64 = "0.13" -surrealml-core = { path = "./modules/utils" } +surrealml-core = { path = "./modules/core" } [dev-dependencies] axum = "0.6.20" diff --git a/build.rs b/build.rs index 8b0011b..b7a38fd 100644 --- a/build.rs +++ b/build.rs @@ -22,16 +22,16 @@ fn main() { // remove ./modules/utils/target folder if there let _ = - std::fs::remove_dir_all(Path::new("modules").join("utils").join("target")).unwrap_or(()); + std::fs::remove_dir_all(Path::new("modules").join("core").join("target")).unwrap_or(()); // create the target module folder for the utils module - let _ = std::fs::create_dir(Path::new("modules").join("utils").join("target")); - let _ = std::fs::create_dir(Path::new("modules").join("utils").join("target").join(profile)); + let _ = std::fs::create_dir(Path::new("modules").join("core").join("target")); + let _ = std::fs::create_dir(Path::new("modules").join("core").join("target").join(profile)); // copy target folder to modules/utils/target profile for the utils modules std::fs::copy( Path::new("target").join(profile).join(target_lib), - Path::new("modules").join("utils").join("target").join(profile).join(target_lib), + Path::new("modules").join("core").join("target").join(profile).join(target_lib), ) .unwrap(); } diff --git a/modules/core/Cargo.toml b/modules/core/Cargo.toml index a6a7294..9416342 100644 --- a/modules/core/Cargo.toml +++ b/modules/core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "surrealml-core" -version = "0.0.7" +version = "0.0.8" edition = "2021" build = "./build.rs" description = "The core machine learning library for SurrealML that enables SurrealDB to store and load ML models" diff --git a/modules/core/src/execution/compute.rs b/modules/core/src/execution/compute.rs index fcb4337..186d80f 100644 --- a/modules/core/src/execution/compute.rs +++ b/modules/core/src/execution/compute.rs @@ -75,7 +75,7 @@ impl <'a>ModelComputation<'a> { .with_model_from_memory(&self.surml_file.model) .map_err(|e| e.to_string())?; let unwrapped_dims = ModelComputation::process_input_dims(&session.inputs[0]); - let tensor = tensor.into_shape(unwrapped_dims).unwrap(); + let tensor = tensor.into_shape(unwrapped_dims).map_err(|e| e.to_string())?; let x = CowArray::from(tensor).into_dyn(); let outputs = session.run(vec![Value::from_array(session.allocator(), &x).unwrap()]).map_err(|e| e.to_string())?; @@ -188,6 +188,19 @@ mod tests { #[test] fn test_raw_compute_linear_torch() { + let mut file = SurMlFile::from_file("./model_stash/torch/surml/linear.surml").unwrap(); + let model_computation = ModelComputation { + surml_file: &mut file, + }; + + let mut input_values = HashMap::new(); + input_values.insert(String::from("squarefoot"), 1000.0); + input_values.insert(String::from("num_floors"), 2.0); + let raw_input = model_computation.input_tensor_from_key_bindings(input_values); + + let output = model_computation.raw_compute(raw_input, None).unwrap(); + assert_eq!(output.len(), 1); + assert_eq!(output[0], 378.237); } } diff --git a/modules/core/stash/test.surml b/modules/core/stash/test.surml index 45bcd456f17700db03c0f62c8c6901d3c45c2484..61da29a225240cea28e447c93c4f81df8bbdf875 100644 GIT binary patch delta 136 zcmey!w2`@=3.13", - "numpy", - "torch==2.0.0", - "hummingbird-ml==0.4.9" - ] + "numpy==1.26.3", + ], + extras_require={ + "sklearn": [ + "skl2onnx==1.16.0", + "scikit-learn==1.4.0" + ], + "torch": [ + "torch==2.1.2" + ] + } ) diff --git a/src/transport.rs b/src/transport.rs index 8602309..b764fbc 100644 --- a/src/transport.rs +++ b/src/transport.rs @@ -38,7 +38,7 @@ async fn root(mut stream: BodyStream) -> &'static str { surml_file: &mut file }; let result = computert_unit.buffered_compute(&mut input_values).unwrap(); - assert_eq!(result[0], 1.2747419); + println!("Result: {:?}", result); return "Hello root" } @@ -54,10 +54,10 @@ async fn run_server() { } -async fn send_request() { +async fn send_request(path: &str) { let client = Client::new(); let uri: Uri = "http://0.0.0.0:4000".parse().unwrap(); - let generator = StreamAdapter::new(5, "./test.surml".to_string()); + let generator = StreamAdapter::new(5, path.to_string()); let body = Body::wrap_stream(generator); let req = Request::post(uri).body(body).unwrap(); let response = client.request(req).await.unwrap(); @@ -71,7 +71,7 @@ mod tests { use std::thread; #[test] - fn test_server() { + fn test_server_sklearn() { let tokio_runtime = tokio::runtime::Runtime::new().unwrap(); let _server_task = tokio_runtime.spawn( async { run_server().await; @@ -79,7 +79,22 @@ mod tests { let sleep_time = std::time::Duration::from_secs(1); tokio_runtime.block_on( async { - send_request().await; + send_request("./modules/core/model_stash/sklearn/surml/linear.surml").await; + }); + + thread::sleep(sleep_time); + } + + #[test] + fn test_server_torch() { + let tokio_runtime = tokio::runtime::Runtime::new().unwrap(); + let _server_task = tokio_runtime.spawn( async { + run_server().await; + }); + + let sleep_time = std::time::Duration::from_secs(1); + tokio_runtime.block_on( async { + send_request("./modules/core/model_stash/torch/surml/linear.surml").await; }); thread::sleep(sleep_time); diff --git a/surrealml/model_templates/torch/torch_linear.py b/surrealml/model_templates/torch/torch_linear.py index 4abf7d2..e38c514 100644 --- a/surrealml/model_templates/torch/torch_linear.py +++ b/surrealml/model_templates/torch/torch_linear.py @@ -81,7 +81,7 @@ def export_model_surml(model): ] inputs = torch.stack(tensor, dim=1) - file = SurMlFile(model=model, name="linear", inputs=inputs, engine=Engine.PYTORCH) + file = SurMlFile(model=model, name="linear", inputs=inputs[:1], engine=Engine.PYTORCH) file.add_column("squarefoot") file.add_column("num_floors") file.add_normaliser("squarefoot", "z_score", HOUSE_LINEAR["squarefoot"].mean(), HOUSE_LINEAR["squarefoot"].std()) diff --git a/surrealml/surml_file.py b/surrealml/surml_file.py index 891ef13..5cf5c94 100644 --- a/surrealml/surml_file.py +++ b/surrealml/surml_file.py @@ -1,11 +1,8 @@ """ Defines the SurMlFile class which is used to save/load models and perform computations based on those models. """ -import os -import uuid from typing import Optional - from surrealml.engine import Engine, SklearnOnnxAdapter, TorchOnnxAdapter from surrealml.rust_adapter import RustAdapter