Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

stable hashing, batching for offline opt #459

Merged
merged 1 commit into from
Jun 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion offline_optimization_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ def main(
cache_dir: Path = DEFAULT_CACHE_DIR,
out_dir=None,
loglevel: Literal["debug", "info", "warning", "error"] = "info",
index_from: int = None,
index_to: int = None,
):
cache_dir = Path(cache_dir)
cache_dir.mkdir(exist_ok=True)
Expand All @@ -121,6 +123,11 @@ def main(

param_sets = generate_param_sets(api)

# filter for batch
index_from = index_from or 0
index_to = index_to or len(param_sets)
param_sets = param_sets[index_from:index_to]

results = [] # save results
for params in progress.bar.Bar(
suffix=(
Expand Down Expand Up @@ -186,6 +193,18 @@ def main(
choices=["debug", "info", "warning", "error"],
help="Log level for the console.",
)
parser.add_argument(
"-f",
"--index_from",
type=int,
help="starting index for prallel runs",
)
parser.add_argument(
"-t",
"--index_to",
type=int,
help="final index (exlusive) for prallel runs",
)

args = parser.parse_args()
main(cache_dir=args.cache_dir, out_dir=args.out_dir, loglevel=args.loglevel)
main(**vars(args))
4 changes: 2 additions & 2 deletions ptxboa/api_optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from flh_opt.api_opt import optimize
from ptxboa import logger
from ptxboa.static._types import CalculateDataType
from ptxboa.utils import SingletonMeta, annuity
from ptxboa.utils import SingletonMeta, annuity, serialize_for_hashing


def get_data_hash_md5(key: object) -> str:
Expand All @@ -36,7 +36,7 @@ def get_data_hash_md5(key: object) -> str:
md5 hash of a standardized byte representation of the input data
"""
# serialize to str, make sure to sort keys
sdata = json.dumps(key, sort_keys=True, ensure_ascii=False, indent=0)
sdata = serialize_for_hashing(key)
# to bytes (only bytes can be hashed)
bdata = sdata.encode()
# create hash
Expand Down
49 changes: 48 additions & 1 deletion ptxboa/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# -*- coding: utf-8 -*-
"""Utilities."""

import json
import os
from types import NoneType
from typing import Union


def annuity(rate: float, periods: int, value: float) -> float:
Expand Down Expand Up @@ -47,3 +49,48 @@ def is_test():
"PYTEST_CURRENT_TEST" in os.environ
or "STREAMLIT_GLOBAL_UNIT_TEST" in os.environ
)


def serialize_for_hashing(
obj: Union[NoneType, int, float, str, bool, dict, list], float_sig_digits=6
) -> str:
"""Serialize data for hashing.

- custom function to ensure same results for differrent python versions
(json dumps changes sometimes?)
-

Parameters
----------
obj : Union[NoneType, int, float, str, dict, list]
data
float_sig_digits : int, optional
number of significat digits (in scientific notation)

Returns
-------
str
string serialization
"""
if isinstance(obj, list):
return "[" + ",".join(serialize_for_hashing(x) for x in obj) + "]"
elif isinstance(obj, dict):
# map keys to sorted
obj_ = {
serialize_for_hashing(k): serialize_for_hashing(v) for k, v in obj.items()
}
return "{" + ",".join(k + ":" + v for k, v in sorted(obj_.items())) + "}"
elif isinstance(obj, bool):
# NOTE: MUST come before test for
return "true" if obj is True else "false"
elif isinstance(obj, str):
# use json to take care of line breaks and other escaping
return json.dumps(obj, ensure_ascii=False)
elif isinstance(obj, int):
return str(obj)
elif isinstance(obj, float):
return f"%.{float_sig_digits}e" % obj
elif obj is None:
return "null"
else:
raise NotImplementedError(type(obj))
24 changes: 24 additions & 0 deletions tests/test_utills.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
# -*- coding: utf-8 -*-
"""Tests for utils module."""
import json
import unittest

from ptxboa.utils import serialize_for_hashing

from .utils import assert_deep_equal


Expand All @@ -25,3 +28,24 @@ def test_assert_deep_equal(self):
self.assertRaises(ValueError, assert_deep_equal, [], [1])
self.assertRaises(ValueError, assert_deep_equal, {"a": 1}, {"b": 1})
self.assertRaises(ValueError, assert_deep_equal, {"a": 1}, {"a": 2})

def test_serialize_for_hashing(self):
"""Test for ptxboa.utils.serialize_for_hashing."""
for obj, exp_str in [
("text", '"text"'),
(123, "123"),
(123.0, "1.230000e+02"),
(-123.0, "-1.230000e+02"),
(-123.4567, "-1.234567e+02"),
(0.0000001234567, "1.234567e-07"),
(True, "true"),
(False, "false"),
([], "[]"),
({}, "{}"),
([1, {"b": 2, "a": [None]}], '[1,{"a":[null],"b":2}]'),
]:

res = serialize_for_hashing(obj)
# must be json loadable
json.loads(res)
self.assertEqual(res, exp_str)
Loading