diff --git a/.github/workflows/onpush.yml b/.github/workflows/onpush.yml index 7c4a902..12f4442 100644 --- a/.github/workflows/onpush.yml +++ b/.github/workflows/onpush.yml @@ -27,11 +27,11 @@ jobs: poetry install --all-extras - name: Lint with ruff run: | - poetry run ruff auto_zkml + poetry run ruff giza - name: Pre-commit check run: | poetry run pre-commit run --all-files # Bring back when tests area working # - name: Testing # run: | - # poetry run pytest --cov=auto_zkml --cov-report term-missing + # poetry run pytest --cov=giza.zkcook --cov-report term-missing diff --git a/.github/workflows/onrelease.yml b/.github/workflows/onrelease.yml index 82ade6c..034bd5a 100644 --- a/.github/workflows/onrelease.yml +++ b/.github/workflows/onrelease.yml @@ -30,11 +30,11 @@ jobs: poetry install - name: Lint with ruff run: | - poetry run ruff auto_zkml + poetry run ruff giza - name: Build dist run: poetry build - name: Publish a Python distribution to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: user: __token__ - password: ${{ secrets.GIZA_AUTOZKML_PYPI_TOKEN }} + password: ${{ secrets.GIZA_ZKCOOK_PYPI_TOKEN }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fcf6496..c831f9d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,8 +30,10 @@ repos: entry: isort language: system files: "py$" + args: ["--line-length=145"] - id: ruff name: ruff entry: ruff language: system files: "py$" + args: ["--line-length=145"] diff --git a/README.md b/README.md index 0043cdf..4ba9098 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# auto-zkml +# zkcook This package is designed to provide functionality that facilitates the transition from ML algorithms to ZKML. Its two main functionalities are: @@ -6,7 +6,7 @@ This package is designed to provide functionality that facilitates the transitio - [**model-complexity-reducer (mcr)**](#mcr): Given a model and a training dataset, transform the model and the data to obtain a lighter representation that maximizes the tradeoff between performance and complexity. -It's important to note that although the main goal is the transition from ML to ZKML, auto-zkml can be useful in other contexts, such as: +It's important to note that although the main goal is the transition from ML to ZKML, mcr can be useful in other contexts, such as: - The model's weight needs to be minimal, for example for mobile applications. - Minimal inference times are required for low latency applications. @@ -20,7 +20,7 @@ It's important to note that although the main goal is the transition from ML to For the latest release: ```bash -pip install auto-zkml +pip install giza-zkcook ``` ### Installing from source @@ -29,8 +29,8 @@ Clone the repository and install it with `pip`: ```bash - git clone git@github.com:gizatechxyz/auto-zkml.git - cd auto-zkml + git clone git@github.com:gizatechxyz/zkcook.git + cd zkcook pip install . ``` @@ -41,7 +41,7 @@ To see in more detail how this tool works, check out this [tutorial](tutorials/s To run it: ```python -from auto_zkml import serialize_model +from giza.zkcook import serialize_model serialize_model(YOUR_TRAINED_MODEL, "OUTPUT_PATH/MODEL_NAME.json") ``` diff --git a/auto_zkml/__init__.py b/auto_zkml/__init__.py deleted file mode 100644 index 4f5a4a9..0000000 --- a/auto_zkml/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from auto_zkml.model_reducer import mcr -from auto_zkml.serializer.serialize import serialize_model - -__all__ = ["mcr", "serialize_model"] - -__version__ = "0.1.0" diff --git a/auto_zkml/serializer/xg.py b/auto_zkml/serializer/xg.py deleted file mode 100644 index 9e37260..0000000 --- a/auto_zkml/serializer/xg.py +++ /dev/null @@ -1,2 +0,0 @@ -def serialize(model, output_path): - model.save_model(output_path) diff --git a/giza/zkcook/__init__.py b/giza/zkcook/__init__.py new file mode 100644 index 0000000..fa8b4e2 --- /dev/null +++ b/giza/zkcook/__init__.py @@ -0,0 +1,6 @@ +from giza.zkcook.model_reducer import mcr +from giza.zkcook.serializer.serialize import serialize_model + +__all__ = ["mcr", "serialize_model"] + +__version__ = "0.1.0" diff --git a/auto_zkml/model_reducer.py b/giza/zkcook/model_reducer.py similarity index 93% rename from auto_zkml/model_reducer.py rename to giza/zkcook/model_reducer.py index 7d6d898..a190ce6 100644 --- a/auto_zkml/model_reducer.py +++ b/giza/zkcook/model_reducer.py @@ -1,12 +1,12 @@ from skopt import gp_minimize from skopt.utils import use_named_args -from auto_zkml.model_toolkit.data_transformer import DataTransformer -from auto_zkml.model_toolkit.feature_models_space import FeatureSpaceConstants -from auto_zkml.model_toolkit.metrics import check_metric_optimization -from auto_zkml.model_toolkit.model_evaluator import ModelEvaluator -from auto_zkml.model_toolkit.model_info import ModelParameterExtractor -from auto_zkml.model_toolkit.model_trainer import ModelTrainer +from giza.zkcook.model_toolkit.data_transformer import DataTransformer +from giza.zkcook.model_toolkit.feature_models_space import FeatureSpaceConstants +from giza.zkcook.model_toolkit.metrics import check_metric_optimization +from giza.zkcook.model_toolkit.model_evaluator import ModelEvaluator +from giza.zkcook.model_toolkit.model_info import ModelParameterExtractor +from giza.zkcook.model_toolkit.model_trainer import ModelTrainer def mcr(model, X_train, y_train, X_eval, y_eval, eval_metric, transform_features=False): diff --git a/auto_zkml/model_toolkit/__init__.py b/giza/zkcook/model_toolkit/__init__.py similarity index 100% rename from auto_zkml/model_toolkit/__init__.py rename to giza/zkcook/model_toolkit/__init__.py diff --git a/auto_zkml/model_toolkit/custom_transformers/__init__.py b/giza/zkcook/model_toolkit/custom_transformers/__init__.py similarity index 100% rename from auto_zkml/model_toolkit/custom_transformers/__init__.py rename to giza/zkcook/model_toolkit/custom_transformers/__init__.py diff --git a/auto_zkml/model_toolkit/custom_transformers/customPCA.py b/giza/zkcook/model_toolkit/custom_transformers/customPCA.py similarity index 100% rename from auto_zkml/model_toolkit/custom_transformers/customPCA.py rename to giza/zkcook/model_toolkit/custom_transformers/customPCA.py diff --git a/auto_zkml/model_toolkit/custom_transformers/customRFE.py b/giza/zkcook/model_toolkit/custom_transformers/customRFE.py similarity index 100% rename from auto_zkml/model_toolkit/custom_transformers/customRFE.py rename to giza/zkcook/model_toolkit/custom_transformers/customRFE.py diff --git a/auto_zkml/model_toolkit/data_transformer.py b/giza/zkcook/model_toolkit/data_transformer.py similarity index 97% rename from auto_zkml/model_toolkit/data_transformer.py rename to giza/zkcook/model_toolkit/data_transformer.py index 60971ba..111d949 100644 --- a/auto_zkml/model_toolkit/data_transformer.py +++ b/giza/zkcook/model_toolkit/data_transformer.py @@ -2,8 +2,8 @@ from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler -from auto_zkml.model_toolkit.custom_transformers.customPCA import CustomPCA -from auto_zkml.model_toolkit.custom_transformers.customRFE import CustomRFE +from giza.zkcook.model_toolkit.custom_transformers.customPCA import CustomPCA +from giza.zkcook.model_toolkit.custom_transformers.customRFE import CustomRFE class DataTransformer(BaseEstimator, TransformerMixin): diff --git a/auto_zkml/model_toolkit/feature_models_space.py b/giza/zkcook/model_toolkit/feature_models_space.py similarity index 100% rename from auto_zkml/model_toolkit/feature_models_space.py rename to giza/zkcook/model_toolkit/feature_models_space.py diff --git a/auto_zkml/model_toolkit/metrics.py b/giza/zkcook/model_toolkit/metrics.py similarity index 100% rename from auto_zkml/model_toolkit/metrics.py rename to giza/zkcook/model_toolkit/metrics.py diff --git a/auto_zkml/model_toolkit/model_evaluator.py b/giza/zkcook/model_toolkit/model_evaluator.py similarity index 100% rename from auto_zkml/model_toolkit/model_evaluator.py rename to giza/zkcook/model_toolkit/model_evaluator.py diff --git a/auto_zkml/model_toolkit/model_info.py b/giza/zkcook/model_toolkit/model_info.py similarity index 100% rename from auto_zkml/model_toolkit/model_info.py rename to giza/zkcook/model_toolkit/model_info.py diff --git a/auto_zkml/model_toolkit/model_penalicer.py b/giza/zkcook/model_toolkit/model_penalicer.py similarity index 100% rename from auto_zkml/model_toolkit/model_penalicer.py rename to giza/zkcook/model_toolkit/model_penalicer.py diff --git a/auto_zkml/model_toolkit/model_trainer.py b/giza/zkcook/model_toolkit/model_trainer.py similarity index 100% rename from auto_zkml/model_toolkit/model_trainer.py rename to giza/zkcook/model_toolkit/model_trainer.py diff --git a/auto_zkml/serializer/__init__py b/giza/zkcook/serializer/__init__py similarity index 100% rename from auto_zkml/serializer/__init__py rename to giza/zkcook/serializer/__init__py diff --git a/auto_zkml/serializer/lgbm.py b/giza/zkcook/serializer/lgbm.py similarity index 90% rename from auto_zkml/serializer/lgbm.py rename to giza/zkcook/serializer/lgbm.py index e05efea..61d4998 100644 --- a/auto_zkml/serializer/lgbm.py +++ b/giza/zkcook/serializer/lgbm.py @@ -24,6 +24,12 @@ def serialize(model, output_path): with open("./model_tmp.txt") as file: model_text = file.read() + if "binary" in model_text: + opt_type = 1 + elif "regression" in model_text: + opt_type = 0 + else: + raise ValueError("The objective needs to be classification or regression.") tree_blocks = model_text.split("Tree=")[1:] trees = [] @@ -60,8 +66,9 @@ def serialize(model, output_path): ) json_transformed = { + "model_type": "lightgbm", + "opt_type": opt_type, "base_score": 0, - "opt_type": 1, # TODO: review this value "trees_number": len(trees), "trees": trees, } diff --git a/auto_zkml/serializer/serialize.py b/giza/zkcook/serializer/serialize.py similarity index 90% rename from auto_zkml/serializer/serialize.py rename to giza/zkcook/serializer/serialize.py index 4938cf1..73e4f5f 100644 --- a/auto_zkml/serializer/serialize.py +++ b/giza/zkcook/serializer/serialize.py @@ -1,5 +1,5 @@ -from auto_zkml.model_toolkit.model_info import ModelParameterExtractor -from auto_zkml.serializer import lgbm, xg +from giza.zkcook.model_toolkit.model_info import ModelParameterExtractor +from giza.zkcook.serializer import lgbm, xg def serialize_model(model, output_path): diff --git a/giza/zkcook/serializer/xg.py b/giza/zkcook/serializer/xg.py new file mode 100644 index 0000000..44d24ee --- /dev/null +++ b/giza/zkcook/serializer/xg.py @@ -0,0 +1,24 @@ +import json + + +def serialize(model, output_path): + booster = model.get_booster() + model_bytes = booster.save_raw(raw_format="json") + model_json_str = model_bytes.decode("utf-8") + model_json = json.loads(model_json_str) + opt_type = model_json["learner"]["objective"]["name"].lower() + + if "binary" in opt_type: + opt_type = 1 + elif "reg" in opt_type: + opt_type = 0 + else: + raise ValueError("The model should be a classifier or regressor model.") + + new_fields = {"model_type": "xgboost", "opt_type": opt_type} + combined_json = {**new_fields, **model_json} + + combined_json_str = json.dumps(combined_json) + + with open(output_path, "w") as file: + file.write(combined_json_str) diff --git a/pyproject.toml b/pyproject.toml index 794b8ed..a995bde 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,11 @@ [tool.poetry] -name = "auto-zkml" +name = "giza-zkcook" version = "0.1.0" description = "" authors = ["Alejandro Martinez "] readme = "README.md" license = "MIT" +packages = [{include = "giza"}] [tool.poetry.dependencies] diff --git a/tutorials/end_to_end_example.ipynb b/tutorials/end_to_end_example.ipynb index ff69f79..b64d914 100644 --- a/tutorials/end_to_end_example.ipynb +++ b/tutorials/end_to_end_example.ipynb @@ -25,7 +25,7 @@ "metadata": {}, "outputs": [], "source": [ - "# For this example, it is necessary to have lightgbm installed, but it is not necessary to have all packages installed to use auto_zkml. \n", + "# For this example, it is necessary to have lightgbm installed, but it is not necessary to have all packages installed to use zkcook. \n", "# For this reason, we include this cell to ensure the notebook works correctly.\n", "\n", "!pip install lightgbm" @@ -33,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -42,8 +42,8 @@ "import lightgbm as lgb\n", "import pandas as pd\n", "from sklearn.metrics import roc_auc_score\n", - "from auto_zkml import mcr\n", - "from auto_zkml import serialize_model" + "from giza.zkcook import mcr\n", + "from giza.zkcook import serialize_model" ] }, { @@ -61,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -185,7 +185,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -233,7 +233,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -296,7 +296,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -324,7 +324,7 @@ " 'verbose': -1}" ] }, - "execution_count": 12, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -345,7 +345,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ diff --git a/tutorials/reduce_model_complexity.ipynb b/tutorials/reduce_model_complexity.ipynb index 8a795d1..0a4bc8e 100644 --- a/tutorials/reduce_model_complexity.ipynb +++ b/tutorials/reduce_model_complexity.ipynb @@ -45,7 +45,7 @@ "metadata": {}, "outputs": [], "source": [ - "# For this example, it is necessary to have both xgboost and lightgbm installed, but it is not necessary to have all packages installed to use auto_zkml. \n", + "# For this example, it is necessary to have both xgboost and lightgbm installed, but it is not necessary to have all packages installed to use zkcook. \n", "# For this reason, we include this cell to ensure the notebook works correctly.\n", "\n", "!pip install xgboost\n", @@ -84,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -111,7 +111,7 @@ " 'subsample_freq': 0}" ] }, - "execution_count": 2, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -122,11 +122,11 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "from auto_zkml import mcr" + "from giza.zkcook import mcr" ] }, { @@ -146,7 +146,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -178,7 +178,7 @@ " 'early_stopping_rounds': 10}" ] }, - "execution_count": 5, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } diff --git a/tutorials/serialize_my_model.ipynb b/tutorials/serialize_my_model.ipynb index 284986c..1ae8ce8 100644 --- a/tutorials/serialize_my_model.ipynb +++ b/tutorials/serialize_my_model.ipynb @@ -6,7 +6,7 @@ "source": [ "## How to serialize my model\n", "\n", - "auto_zkml offers various functionalities that help us have a model with the necessary characteristics to be transpilable, and therefore, able to generate proofs of its inferences.\n", + "zkcook offers various functionalities that help us have a model with the necessary characteristics to be transpilable, and therefore, able to generate proofs of its inferences.\n", "In this case, we will talk about the serialization process, which involves saving your model in a format that can be interpreted by other Giza tools.\n", "\n", "Currently, the two supported models are XGBoost and LightGBM for both classification and regression. It is preferable that the training is done using the scikit-learn API.\n", @@ -30,7 +30,7 @@ "metadata": {}, "outputs": [], "source": [ - "# For this example, it is necessary to have both xgboost and lightgbm installed, but it is not necessary to have all packages installed to use auto_zkml. \n", + "# For this example, it is necessary to have both xgboost and lightgbm installed, but it is not necessary to have all packages installed to use zkcook. \n", "# For this reason, we include this cell to ensure the notebook works correctly.\n", "\n", "!pip install xgboost\n", @@ -39,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -104,18 +104,17 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# Change \"./MODEL.json\" for your output_path\n", - "\n", - "from auto_zkml import serialize_model\n", + "# Change \"./MODEL*.json\" for your output_path\n", "\n", - "serialize_model(xgb_reg, \"./MODEL.json\")\n", - "serialize_model(lgbm_reg, \"./MODEL.json\")\n", - "serialize_model(xgb_clf, \"./MODEL.json\")\n", - "serialize_model(lgbm_clf, \"./MODEL.json\")" + "from giza.zkcook import serialize_model\n", + "serialize_model(xgb_clf, \"./MODEL_XG_CLF.json\")\n", + "serialize_model(xgb_reg, \"./MODEL_XG.json\")\n", + "serialize_model(lgbm_reg, \"./MODEL_LGBM.json\")\n", + "serialize_model(lgbm_clf, \"./MODEL_LGBM_CLF.json\")" ] }, { @@ -123,7 +122,7 @@ "metadata": {}, "source": [ "That simple! We now have our models saved in the correct format to use the rest of the Giza stack! But not so fast...\n", - "In this example, the models are very simple (few trees and shallow depth), but for other problems, the optimal architecture might be much more complex and not compatible with our current technology. In this case, we will have to use another of the functionalities offered by auto_zkml beforehand: our model_complexity_reducer.\n", + "In this example, the models are very simple (few trees and shallow depth), but for other problems, the optimal architecture might be much more complex and not compatible with our current technology. In this case, we will have to use another of the functionalities offered by zkcook beforehand: our model_complexity_reducer.\n", "\n", "To understand how the model_complexity_reducer (mcr) works, in this same folder you will find the notebook reduce_model_complexity.ipynb with a detailed explanation of its operation and how to run it before serializing your model." ]