Merge pull request #4 from gizatechxyz/feature/giza-zkcook

auto-zkml -> giza-zkcook
gizatechxyz · May 22, 2024 · 3437710 · 3437710
2 parents f53d67a + 4d477bd
commit 3437710
Show file tree

Hide file tree

Showing 27 changed files with 90 additions and 59 deletions.
diff --git a/.github/workflows/onpush.yml b/.github/workflows/onpush.yml
@@ -27,11 +27,11 @@ jobs:
           poetry install --all-extras
       - name: Lint with ruff
         run: |
-          poetry run ruff auto_zkml
+          poetry run ruff giza
       - name: Pre-commit check
         run: |
           poetry run pre-commit run --all-files
       # Bring back when tests area working
       # - name: Testing
       #   run: |
-      #     poetry run pytest --cov=auto_zkml --cov-report term-missing
+      #     poetry run pytest --cov=giza.zkcook --cov-report term-missing
diff --git a/.github/workflows/onrelease.yml b/.github/workflows/onrelease.yml
@@ -30,11 +30,11 @@ jobs:
             poetry install
       - name: Lint with ruff
         run: |
-            poetry run ruff auto_zkml
+            poetry run ruff giza
       - name: Build dist
         run: poetry build
       - name: Publish a Python distribution to PyPI
         uses: pypa/gh-action-pypi-publish@release/v1
         with:
           user: __token__
-          password: ${{ secrets.GIZA_AUTOZKML_PYPI_TOKEN }}
+          password: ${{ secrets.GIZA_ZKCOOK_PYPI_TOKEN }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -30,8 +30,10 @@ repos:
         entry: isort
         language: system
         files: "py$"
+        args: ["--line-length=145"]
       - id: ruff
         name: ruff
         entry: ruff
         language: system
         files: "py$"
+        args: ["--line-length=145"]
diff --git a/README.md b/README.md
@@ -1,12 +1,12 @@
-# auto-zkml
+# zkcook
 
 This package is designed to provide functionality that facilitates the transition from ML algorithms to ZKML. Its two main functionalities are:
 
 - [**Serialization**](#serialization): saving a trained ML model in a specific format to be interpretable by other programs.
 
 - [**model-complexity-reducer (mcr)**](#mcr): Given a model and a training dataset, transform the model and the data to obtain a lighter representation that maximizes the tradeoff between performance and complexity.
 
-It's important to note that although the main goal is the transition from ML to ZKML, auto-zkml can be useful in other contexts, such as:
+It's important to note that although the main goal is the transition from ML to ZKML, mcr can be useful in other contexts, such as:
 
 - The model's weight needs to be minimal, for example for mobile applications.
 - Minimal inference times are required for low latency applications.
@@ -20,7 +20,7 @@ It's important to note that although the main goal is the transition from ML to
 For the latest release:
 
 ```bash
-pip install auto-zkml
+pip install giza-zkcook
 ```
 
 ### Installing from source
@@ -29,8 +29,8 @@ Clone the repository and install it with `pip`:
 
 
 ```bash
-    git clone [email protected]:gizatechxyz/auto-zkml.git
-    cd auto-zkml
+    git clone [email protected]:gizatechxyz/zkcook.git
+    cd zkcook
     pip install .
 ```
 
@@ -41,7 +41,7 @@ To see in more detail how this tool works, check out this [tutorial](tutorials/s
 To run it:
 
 ```python
-from auto_zkml import serialize_model
+from giza.zkcook import serialize_model
 
 serialize_model(YOUR_TRAINED_MODEL, "OUTPUT_PATH/MODEL_NAME.json")
 ```

diff --git a/auto_zkml/__init__.py b/auto_zkml/__init__.py
diff --git a/auto_zkml/serializer/xg.py b/auto_zkml/serializer/xg.py
diff --git a/giza/zkcook/__init__.py b/giza/zkcook/__init__.py
@@ -0,0 +1,6 @@
+from giza.zkcook.model_reducer import mcr
+from giza.zkcook.serializer.serialize import serialize_model
+
+__all__ = ["mcr", "serialize_model"]
+
+__version__ = "0.1.0"
diff --git a/auto_zkml/model_reducer.py → giza/zkcook/model_reducer.py b/auto_zkml/model_reducer.py → giza/zkcook/model_reducer.py
@@ -1,12 +1,12 @@
 from skopt import gp_minimize
 from skopt.utils import use_named_args
 
-from auto_zkml.model_toolkit.data_transformer import DataTransformer
-from auto_zkml.model_toolkit.feature_models_space import FeatureSpaceConstants
-from auto_zkml.model_toolkit.metrics import check_metric_optimization
-from auto_zkml.model_toolkit.model_evaluator import ModelEvaluator
-from auto_zkml.model_toolkit.model_info import ModelParameterExtractor
-from auto_zkml.model_toolkit.model_trainer import ModelTrainer
+from giza.zkcook.model_toolkit.data_transformer import DataTransformer
+from giza.zkcook.model_toolkit.feature_models_space import FeatureSpaceConstants
+from giza.zkcook.model_toolkit.metrics import check_metric_optimization
+from giza.zkcook.model_toolkit.model_evaluator import ModelEvaluator
+from giza.zkcook.model_toolkit.model_info import ModelParameterExtractor
+from giza.zkcook.model_toolkit.model_trainer import ModelTrainer
 
 
 def mcr(model, X_train, y_train, X_eval, y_eval, eval_metric, transform_features=False):

diff --git a/auto_zkml/model_toolkit/__init__.py → giza/zkcook/model_toolkit/__init__.py b/auto_zkml/model_toolkit/__init__.py → giza/zkcook/model_toolkit/__init__.py
diff --git a/...l_toolkit/custom_transformers/__init__.py → ...l_toolkit/custom_transformers/__init__.py b/...l_toolkit/custom_transformers/__init__.py → ...l_toolkit/custom_transformers/__init__.py
diff --git a/..._toolkit/custom_transformers/customPCA.py → ..._toolkit/custom_transformers/customPCA.py b/..._toolkit/custom_transformers/customPCA.py → ..._toolkit/custom_transformers/customPCA.py
diff --git a/..._toolkit/custom_transformers/customRFE.py → ..._toolkit/custom_transformers/customRFE.py b/..._toolkit/custom_transformers/customRFE.py → ..._toolkit/custom_transformers/customRFE.py
diff --git a/auto_zkml/model_toolkit/data_transformer.py → .../zkcook/model_toolkit/data_transformer.py b/auto_zkml/model_toolkit/data_transformer.py → .../zkcook/model_toolkit/data_transformer.py
@@ -2,8 +2,8 @@
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
 
-from auto_zkml.model_toolkit.custom_transformers.customPCA import CustomPCA
-from auto_zkml.model_toolkit.custom_transformers.customRFE import CustomRFE
+from giza.zkcook.model_toolkit.custom_transformers.customPCA import CustomPCA
+from giza.zkcook.model_toolkit.custom_transformers.customRFE import CustomRFE
 
 
 class DataTransformer(BaseEstimator, TransformerMixin):

diff --git a/...kml/model_toolkit/feature_models_space.py → ...ook/model_toolkit/feature_models_space.py b/...kml/model_toolkit/feature_models_space.py → ...ook/model_toolkit/feature_models_space.py
diff --git a/auto_zkml/model_toolkit/metrics.py → giza/zkcook/model_toolkit/metrics.py b/auto_zkml/model_toolkit/metrics.py → giza/zkcook/model_toolkit/metrics.py
diff --git a/auto_zkml/model_toolkit/model_evaluator.py → giza/zkcook/model_toolkit/model_evaluator.py b/auto_zkml/model_toolkit/model_evaluator.py → giza/zkcook/model_toolkit/model_evaluator.py
diff --git a/auto_zkml/model_toolkit/model_info.py → giza/zkcook/model_toolkit/model_info.py b/auto_zkml/model_toolkit/model_info.py → giza/zkcook/model_toolkit/model_info.py
diff --git a/auto_zkml/model_toolkit/model_penalicer.py → giza/zkcook/model_toolkit/model_penalicer.py b/auto_zkml/model_toolkit/model_penalicer.py → giza/zkcook/model_toolkit/model_penalicer.py
diff --git a/auto_zkml/model_toolkit/model_trainer.py → giza/zkcook/model_toolkit/model_trainer.py b/auto_zkml/model_toolkit/model_trainer.py → giza/zkcook/model_toolkit/model_trainer.py
diff --git a/auto_zkml/serializer/__init__py → giza/zkcook/serializer/__init__py b/auto_zkml/serializer/__init__py → giza/zkcook/serializer/__init__py
diff --git a/auto_zkml/serializer/lgbm.py → giza/zkcook/serializer/lgbm.py b/auto_zkml/serializer/lgbm.py → giza/zkcook/serializer/lgbm.py
@@ -24,6 +24,12 @@ def serialize(model, output_path):
     with open("./model_tmp.txt") as file:
         model_text = file.read()
 
+    if "binary" in model_text:
+        opt_type = 1
+    elif "regression" in model_text:
+        opt_type = 0
+    else:
+        raise ValueError("The objective needs to be classification or regression.")
     tree_blocks = model_text.split("Tree=")[1:]
     trees = []
 
@@ -60,8 +66,9 @@ def serialize(model, output_path):
         )
 
     json_transformed = {
+        "model_type": "lightgbm",
+        "opt_type": opt_type,
         "base_score": 0,
-        "opt_type": 1,  # TODO: review this value
         "trees_number": len(trees),
         "trees": trees,
     }

diff --git a/auto_zkml/serializer/serialize.py → giza/zkcook/serializer/serialize.py b/auto_zkml/serializer/serialize.py → giza/zkcook/serializer/serialize.py
@@ -1,5 +1,5 @@
-from auto_zkml.model_toolkit.model_info import ModelParameterExtractor
-from auto_zkml.serializer import lgbm, xg
+from giza.zkcook.model_toolkit.model_info import ModelParameterExtractor
+from giza.zkcook.serializer import lgbm, xg
 
 
 def serialize_model(model, output_path):

diff --git a/giza/zkcook/serializer/xg.py b/giza/zkcook/serializer/xg.py
@@ -0,0 +1,24 @@
+import json
+
+
+def serialize(model, output_path):
+    booster = model.get_booster()
+    model_bytes = booster.save_raw(raw_format="json")
+    model_json_str = model_bytes.decode("utf-8")
+    model_json = json.loads(model_json_str)
+    opt_type = model_json["learner"]["objective"]["name"].lower()
+
+    if "binary" in opt_type:
+        opt_type = 1
+    elif "reg" in opt_type:
+        opt_type = 0
+    else:
+        raise ValueError("The model should be a classifier or regressor model.")
+
+    new_fields = {"model_type": "xgboost", "opt_type": opt_type}
+    combined_json = {**new_fields, **model_json}
+
+    combined_json_str = json.dumps(combined_json)
+
+    with open(output_path, "w") as file:
+        file.write(combined_json_str)
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,10 +1,11 @@
 [tool.poetry]
-name = "auto-zkml"
+name = "giza-zkcook"
 version = "0.1.0"
 description = ""
 authors = ["Alejandro Martinez <[email protected]>"]
 readme = "README.md"
 license = "MIT"
+packages = [{include = "giza"}]
 
 
 [tool.poetry.dependencies]

diff --git a/tutorials/end_to_end_example.ipynb b/tutorials/end_to_end_example.ipynb
@@ -25,15 +25,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# For this example, it is necessary to have lightgbm installed, but it is not necessary to have all packages installed to use auto_zkml. \n",
+    "# For this example, it is necessary to have lightgbm installed, but it is not necessary to have all packages installed to use zkcook. \n",
     "# For this reason, we include this cell to ensure the notebook works correctly.\n",
     "\n",
     "!pip install lightgbm"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -42,8 +42,8 @@
     "import lightgbm as lgb\n",
     "import pandas as pd\n",
     "from sklearn.metrics import roc_auc_score\n",
-    "from auto_zkml import mcr\n",
-    "from auto_zkml import serialize_model"
+    "from giza.zkcook import mcr\n",
+    "from giza.zkcook import serialize_model"
    ]
   },
   {
@@ -61,7 +61,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -185,7 +185,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -233,7 +233,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -296,7 +296,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -324,7 +324,7 @@
        " 'verbose': -1}"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -345,7 +345,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [

diff --git a/tutorials/reduce_model_complexity.ipynb b/tutorials/reduce_model_complexity.ipynb
@@ -45,7 +45,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# For this example, it is necessary to have both xgboost and lightgbm installed, but it is not necessary to have all packages installed to use auto_zkml. \n",
+    "# For this example, it is necessary to have both xgboost and lightgbm installed, but it is not necessary to have all packages installed to use zkcook. \n",
     "# For this reason, we include this cell to ensure the notebook works correctly.\n",
     "\n",
     "!pip install xgboost\n",
@@ -84,7 +84,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -111,7 +111,7 @@
        " 'subsample_freq': 0}"
       ]
      },
-     "execution_count": 2,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -122,11 +122,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "from auto_zkml import mcr"
+    "from giza.zkcook import mcr"
    ]
   },
   {
@@ -146,7 +146,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -178,7 +178,7 @@
        " 'early_stopping_rounds': 10}"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }