diff --git a/workspace/suphx-reward-shaping/evaluate.py b/workspace/suphx-reward-shaping/evaluate.py index 6d95a963..a284aab5 100644 --- a/workspace/suphx-reward-shaping/evaluate.py +++ b/workspace/suphx-reward-shaping/evaluate.py @@ -15,8 +15,13 @@ def evaluate_abs( - params: optax.Params, X, score, batch_size, use_logistic=False, use_clip=False -) -> float: # 前処理する前のスケールでの絶対誤差 + params: optax.Params, + X, + score, + batch_size, + use_logistic=False, + use_clip=False, +) -> float: # 前処理する前のスケールでの絶対誤差 dataset = tf.data.Dataset.from_tensor_slices((X, score)) batched_dataset = dataset.batch(batch_size, drop_remainder=True) cum_loss = 0 @@ -37,6 +42,9 @@ def eval_abs_loss(meth, _type, result_dir): use_clip = False if _type == "_use_clip_": use_clip = True + if _type == "_no_logistic_after": + _type = "_no_logistic_" + use_clip = True params_list: List = ( [ jnp.load( @@ -75,15 +83,19 @@ def eval_abs_loss(meth, _type, result_dir): 32, use_logistic=use_logistic, use_clip=use_clip, - ) + ) # テストデータでの絶対誤差 abs_losses.append(float(np.array(abs_loss).item(0))) return abs_losses if __name__ == "__main__": - train_meth = ["suphx", "TD"] - types = ["_no_logistic_", "_use_logistic_", "_use_clip_"] + types = [ + "_no_logistic_", + "_use_logistic_", + "_no_logistic__use_clip_", + "_no_logistic_after", + ] result_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "result") diff --git a/workspace/suphx-reward-shaping/evaluate.sh b/workspace/suphx-reward-shaping/evaluate.sh new file mode 100755 index 00000000..1cd65a20 --- /dev/null +++ b/workspace/suphx-reward-shaping/evaluate.sh @@ -0,0 +1 @@ +python evaluate.py \ No newline at end of file diff --git a/workspace/suphx-reward-shaping/inference.py b/workspace/suphx-reward-shaping/inference.py new file mode 100644 index 00000000..0ad9fd11 --- /dev/null +++ b/workspace/suphx-reward-shaping/inference.py @@ -0,0 +1,24 @@ +import os + +import jax +import jax.numpy as jnp +import numpy as np + +""" +各局ごとにモデルが一つ存在する. +モデルのアーキテクチャは以下. +入力次元: 19 +Layer1: 19 * 32 +Activation: relu +Layer2: 32 * 32 +Activation: relu +Layer3: 32 * 4 +Clip(0, 1) game rewardを[0, 1]でnormalizeしているため. +""" + + +def predict(x: np.ndarray, W1: np.ndarray, W2: np.ndarray, W3: np.ndarray): + x = np.maximum(0, np.dot(x, W1)) + x = np.maximum(0, np.dot(x, W2)) + x = np.clip(np.dot(x, W3), a_min=0, a_max=1) + return x diff --git a/workspace/suphx-reward-shaping/tests/test_inference.py b/workspace/suphx-reward-shaping/tests/test_inference.py new file mode 100644 index 00000000..2a16b82d --- /dev/null +++ b/workspace/suphx-reward-shaping/tests/test_inference.py @@ -0,0 +1,57 @@ +""" +保存したnumpyの重みによる推論がjaxで学習した重みの推論と一致することを確認する. +""" + +import os +import sys + +import jax.numpy as jnp +import numpy as np + +sys.path.append("../") +from inference import predict +from train_helper import net + +numpy_dir = os.path.join(os.pardir, "weights/numpy") +jax_dir = os.path.join(os.pardir, "weights/jax") + +jax_params = [ + jnp.load( + os.path.join(jax_dir, "params_no_logistic_" + str(round) + ".pickle"), allow_pickle=True + ) + for round in range(8) +] +numpy_params = [ + [ + np.load( + os.path.join( + numpy_dir, "weights_no_logistic_TD_" + str(round) + "_layer_" + str(layer) + ".npy" + ), + allow_pickle=True, + ) + for layer in range(3) + ] + for round in range(8) +] + + +x_j: jnp.ndarray = jnp.array([1] * 19) +x_n: np.ndarray = np.array([1] * 19) + +delta = 0.0001 + + +def test_inference(): + for i in range(8): + jax_param = jax_params[i] + numpy_param = numpy_params[i] + out_j = net(x_j, jax_param, use_clip=True) + out_n = predict(x_n, numpy_param[0], numpy_param[1], numpy_param[2]) + assert out_j[0] - out_n[0] < delta + assert out_j[1] - out_n[1] < delta + assert out_j[2] - out_n[2] < delta + assert out_j[3] - out_n[3] < delta + + +if __name__ == "__main__": + test_inference() diff --git a/workspace/suphx-reward-shaping/to_numpy.py b/workspace/suphx-reward-shaping/to_numpy.py new file mode 100644 index 00000000..6065a93d --- /dev/null +++ b/workspace/suphx-reward-shaping/to_numpy.py @@ -0,0 +1,21 @@ +import os + +import jax +import jax.numpy as jnp +import numpy as np + + +def save_as_numpy(param_dir, round): # no logistic TDが一番性能良かったので, パラメータを保存する. + params = jnp.load(param_dir, allow_pickle=True) + for i, param in enumerate(params.values()): + jnp.save( + "numpy/weights_no_logistic_TD_" + str(round) + "_layer_" + str(i) + ".npy", + jnp.array(param), + ) + + +if __name__ == "__main__": + result_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "result") + for round in range(8): + param_dir = os.path.join(result_dir, "params/params_no_logistic_" + str(round) + ".pickle") + save_as_numpy(param_dir, round) diff --git a/workspace/suphx-reward-shaping/train.py b/workspace/suphx-reward-shaping/train.py index 3e5c8ec6..f1a8d894 100644 --- a/workspace/suphx-reward-shaping/train.py +++ b/workspace/suphx-reward-shaping/train.py @@ -194,27 +194,26 @@ def save_params(params, opt, result_dir): args = parser.parse_args() mjxproto_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), args.data_path) result_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), args.result_path) - if args.train: - for lr in [0.01, 0.001]: - print( - "start_training, round_wise: {}, use_logistic: {}, target_round: {}".format( - args.round_wise, args.use_logistic, args.target_round - ) + for lr in [0.01, 0.001]: + print( + "start_training, round_wise: {}, use_logistic: {}, target_round: {}".format( + args.round_wise, args.use_logistic, args.target_round ) - if args.round_wise: - X, Y, scores = set_dataset_round_wise(mjxproto_dir, result_dir, args) - else: - X, Y, scores = set_dataset_whole(mjxproto_dir, result_dir, args) - params, train_log, val_log = run_training(X, Y, scores, args, lr) - save_params(params, args, result_dir) - save_learning_log(train_log, val_log, args, result_dir, lr) - plot_learning_log(train_log, val_log, args, result_dir, lr) - """ - if args.round_wise == 0: - for round in range(8): - for i in range(4): - plot_result(params, i, round, args, result_dir) - else: + ) + if args.round_wise: + X, Y, scores = set_dataset_round_wise(mjxproto_dir, result_dir, args) + else: + X, Y, scores = set_dataset_whole(mjxproto_dir, result_dir, args) + params, train_log, val_log = run_training(X, Y, scores, args, lr) + save_params(params, args, result_dir) + save_learning_log(train_log, val_log, args, result_dir, lr) + plot_learning_log(train_log, val_log, args, result_dir, lr) + """ + if args.round_wise == 0: + for round in range(8): for i in range(4): - plot_result(params, i, args.target_round, args, result_dir) - """ + plot_result(params, i, round, args, result_dir) + else: + for i in range(4): + plot_result(params, i, args.target_round, args, result_dir) + """ diff --git a/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_0.pickle b/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_0.pickle new file mode 100644 index 00000000..4697587c Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_0.pickle differ diff --git a/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_1.pickle b/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_1.pickle new file mode 100644 index 00000000..900fd99a Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_1.pickle differ diff --git a/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_2.pickle b/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_2.pickle new file mode 100644 index 00000000..691c0ec2 Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_2.pickle differ diff --git a/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_3.pickle b/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_3.pickle new file mode 100644 index 00000000..e19bf53f Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_3.pickle differ diff --git a/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_4.pickle b/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_4.pickle new file mode 100644 index 00000000..2484ef4f Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_4.pickle differ diff --git a/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_5.pickle b/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_5.pickle new file mode 100644 index 00000000..e76ba083 Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_5.pickle differ diff --git a/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_6.pickle b/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_6.pickle new file mode 100644 index 00000000..4905c293 Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_6.pickle differ diff --git a/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_7.pickle b/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_7.pickle new file mode 100644 index 00000000..0f2819c9 Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/jax/params_no_logistic_7.pickle differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_0_layer_0.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_0_layer_0.npy new file mode 100644 index 00000000..9ad5b362 Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_0_layer_0.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_0_layer_1.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_0_layer_1.npy new file mode 100644 index 00000000..60df6ea8 Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_0_layer_1.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_0_layer_2.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_0_layer_2.npy new file mode 100644 index 00000000..da1c2d54 Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_0_layer_2.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_1_layer_0.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_1_layer_0.npy new file mode 100644 index 00000000..36f3d8f5 Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_1_layer_0.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_1_layer_1.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_1_layer_1.npy new file mode 100644 index 00000000..3052a66d Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_1_layer_1.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_1_layer_2.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_1_layer_2.npy new file mode 100644 index 00000000..a913d450 Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_1_layer_2.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_2_layer_0.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_2_layer_0.npy new file mode 100644 index 00000000..5dbe74df Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_2_layer_0.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_2_layer_1.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_2_layer_1.npy new file mode 100644 index 00000000..9bf65f51 Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_2_layer_1.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_2_layer_2.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_2_layer_2.npy new file mode 100644 index 00000000..0d9918fd Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_2_layer_2.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_3_layer_0.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_3_layer_0.npy new file mode 100644 index 00000000..7b9edd52 Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_3_layer_0.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_3_layer_1.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_3_layer_1.npy new file mode 100644 index 00000000..e1ca59e2 Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_3_layer_1.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_3_layer_2.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_3_layer_2.npy new file mode 100644 index 00000000..e4c12d60 Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_3_layer_2.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_4_layer_0.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_4_layer_0.npy new file mode 100644 index 00000000..615dc009 Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_4_layer_0.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_4_layer_1.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_4_layer_1.npy new file mode 100644 index 00000000..d272e81b Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_4_layer_1.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_4_layer_2.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_4_layer_2.npy new file mode 100644 index 00000000..91bc6c33 Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_4_layer_2.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_5_layer_0.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_5_layer_0.npy new file mode 100644 index 00000000..c72c30e5 Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_5_layer_0.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_5_layer_1.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_5_layer_1.npy new file mode 100644 index 00000000..273a6843 Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_5_layer_1.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_5_layer_2.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_5_layer_2.npy new file mode 100644 index 00000000..fde3aab1 Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_5_layer_2.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_6_layer_0.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_6_layer_0.npy new file mode 100644 index 00000000..b055a03c Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_6_layer_0.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_6_layer_1.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_6_layer_1.npy new file mode 100644 index 00000000..30c4d136 Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_6_layer_1.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_6_layer_2.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_6_layer_2.npy new file mode 100644 index 00000000..79dd6c2c Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_6_layer_2.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_7_layer_0.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_7_layer_0.npy new file mode 100644 index 00000000..82ad04dc Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_7_layer_0.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_7_layer_1.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_7_layer_1.npy new file mode 100644 index 00000000..251551a3 Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_7_layer_1.npy differ diff --git a/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_7_layer_2.npy b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_7_layer_2.npy new file mode 100644 index 00000000..dfecbec7 Binary files /dev/null and b/workspace/suphx-reward-shaping/weights/numpy/weights_no_logistic_TD_7_layer_2.npy differ