You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Reinforcement Learning Using Learned Reward Function
Mountaincar RL Training
# coding: utf-8fromlanguage_conditioned_rl.models.reward_modelimportLGROmniChannelPureContrastiveRewardLearnerfromlanguage_conditioned_rl.models.rl_trainerimportMountainCarTrainer,\
MOUNTAINCAR_DEFAULT_LOGGER_PROJECT_NAME,\
MOUNTAINCAR_DEFAULT_EXPERIMENT_NAMEimportrandomREWARD_FN_PROJECT_NAME='valay/Language-Grounded-Rewards'REWARD_FN_EXPERIMENT_NAME='LAN-114'CHECKPOINT_PATH='checkpoints/epoch=01-val_loss=0.00.ckpt'# Neptune API Token API_TOKEN=NoneCHOSEN_TEXT="The car swings around at the bottom of the valley"# Instantiate Reward FunctionREWARD_FN,config=LGROmniChannelPureContrastiveRewardLearner.from_neptune(REWARD_FN_PROJECT_NAME,REWARD_FN_EXPERIMENT_NAME,CHECKPOINT_PATH,api_token=API_TOKEN)
# Run RL training with Text bound reward function. trainer=MountainCarTrainer(
num_eps=400,
model_hidden=256,\
num_timesteps=200,\
reward_scaleup=100,\
project_name=MOUNTAINCAR_DEFAULT_LOGGER_PROJECT_NAME,\
experiment_name=MOUNTAINCAR_DEFAULT_EXPERIMENT_NAME,\
api_token=API_TOKEN,\
video_save_freq=20,\
video_save_dir='./video',\
log_every=100,\
reward_min=-12,\
reward_max=12,\
)
NUM_EXPERIMENT_PER_TYPE=10SENTENCES= [
None,
"The car swings around at the bottom of the valley.",
"The car is able swing beyond the bottom of the valley but does not reach the top of the mountain",
"The car is able to reach the top of the mountain",
]
RANDOM_SEEDS= [random.randint(0,1000) for_inrange(NUM_EXPERIMENT_PER_TYPE)]
forsinSENTENCES:
forrinRANDOM_SEEDS:
ifsisNone:
trainer.run_native(render=False)
else:
trainer.run(REWARD_FN,render=False,text_context=s)