-
Notifications
You must be signed in to change notification settings - Fork 85
/
config_utils.py
53 lines (40 loc) · 1.84 KB
/
config_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# Copyright (C) 2017 Tiancheng Zhao, Carnegie Mellon University
class KgCVAEConfig(object):
description= None
use_hcf = True # use dialog act in training (if turn off kgCVAE -> CVAE)
update_limit = 3000 # the number of mini-batch before evaluating the model
# how to encode utterance.
# bow: add word embedding together
# rnn: RNN utterance encoder
# bi_rnn: bi_directional RNN utterance encoder
sent_type = "bi_rnn"
# latent variable (gaussian variable)
latent_size = 200 # the dimension of latent variable
full_kl_step = 10000 # how many batch before KL cost weight reaches 1.0
dec_keep_prob = 1.0 # do we use word drop decoder [Bowman el al 2015]
# Network general
cell_type = "gru" # gru or lstm
embed_size = 200 # word embedding size
topic_embed_size = 30 # topic embedding size
da_embed_size = 30 # dialog act embedding size
cxt_cell_size = 600 # context encoder hidden size
sent_cell_size = 300 # utterance encoder hidden size
dec_cell_size = 400 # response decoder hidden size
backward_size = 10 # how many utterance kept in the context window
step_size = 1 # internal usage
max_utt_len = 40 # max number of words in an utterance
num_layer = 1 # number of context RNN layers
# Optimization parameters
op = "adam"
grad_clip = 5.0 # gradient abs max cut
init_w = 0.08 # uniform random from [-init_w, init_w]
batch_size = 30 # mini-batch size
init_lr = 0.001 # initial learning rate
lr_hold = 1 # only used by SGD
lr_decay = 0.6 # only used by SGD
keep_prob = 1.0 # drop out rate
improve_threshold = 0.996 # for early stopping
patient_increase = 2.0 # for early stopping
early_stop = True
max_epoch = 60 # max number of epoch of training
grad_noise = 0.0 # inject gradient noise?