-
Notifications
You must be signed in to change notification settings - Fork 1
/
data_generator.py
104 lines (85 loc) · 4.38 KB
/
data_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import argparse
from datetime import datetime, timedelta
from pathlib import Path
from pprint import pprint
import numpy as np
import random
parser = argparse.ArgumentParser(description='Generate RL test data')
parser.add_argument('--path', default='.')
parser.add_argument('--start_date', help='Start date (YYYYMMDD)')
parser.add_argument('--end_date', help='End date (YYYYMMDD)')
parser.add_argument('--user', help='User base name', default='user')
parser.add_argument('--user_count', help='Number of users',
type=int, default=1)
args = parser.parse_args()
user_parameters = {}
def gaussian(mean, std):
return np.random.normal(mean, std)
def random_30_min_step_count():
temp = np.random.uniform(-0.69, 6.9)
return int(np.exp(temp))
def time_8601(time=datetime.now()) -> str:
return time.astimezone().isoformat()
def _uploadGenerator(path, user_list, start_date, end_date):
prompt_times = [(8, 30), (12, 00), (15, 00), (17, 30), (20, 00)]
with open(Path(path).joinpath('upload.csv'), 'w') as output_file:
output_file.write(
'user_id,timestamp,decision_timestamp,decision,proximal_outcome_timestamp,proximal_outcome,step_count\n')
current_date = start_date
while current_date < end_date:
for user_id in user_list:
for (h, m) in prompt_times:
step_count = random_30_min_step_count()
decision_time = datetime(year=current_date.year,
month=current_date.month,
day=current_date.day,
hour=h,
minute=m,
second=0)
decision = 0 if random.random() < 0.4 else 1
proximal_outcome_time = decision_time + \
timedelta(minutes=30)
proximal_outcome = int(np.exp(np.log(
step_count) * user_parameters[user_id][0] + user_parameters[user_id][1] + decision * user_parameters[user_id][2]))
output_file.write(
f'{user_id},{time_8601(proximal_outcome_time + timedelta(minutes=5))},{time_8601(decision_time)},{decision},{time_8601(proximal_outcome_time)},{proximal_outcome},{step_count}\n')
current_date += timedelta(days=1)
pass
def _decisionGenerator(path, user_list, start_date, end_date):
prompt_times = [(8, 30), (12, 00), (15, 00), (17, 30), (20, 00)]
with open(Path(path).joinpath('decision.csv'), 'w') as output_file:
output_file.write('user_id,timestamp,step_count\n')
current_date = start_date
while current_date < end_date:
for user_id in user_list:
for (h, m) in prompt_times:
decision_time = datetime(year=current_date.year,
month=current_date.month,
day=current_date.day,
hour=h,
minute=m,
second=0)
step_count = random_30_min_step_count()
output_file.write(
f'{user_id},{time_8601(decision_time)},{step_count}\n')
current_date += timedelta(days=1)
def _updateGenerator(path, user_list, start_date, end_date):
with open(Path(path).joinpath('update.csv'), 'w') as output_file:
output_file.write('user_id,timestamp\n')
current_date = start_date
while current_date < end_date:
for user_id in user_list:
output_file.write(f'{user_id},{time_8601(current_date)}\n')
current_date += timedelta(days=1)
if __name__ == '__main__':
start_date = datetime.strptime(args.start_date, '%Y%m%d')
end_date = datetime.strptime(args.end_date, '%Y%m%d')
user = args.user
user_list = [f'{user}_{i}' for i in range(args.user_count)]
for user in user_list:
user_parameters[user] = (gaussian(0.41, 0.03), gaussian(
1.53, 0.13), gaussian(0.13, 0.07))
pprint(user_parameters)
_updateGenerator(args.path, user_list, start_date, end_date)
_decisionGenerator(args.path, user_list, start_date, end_date)
_uploadGenerator(args.path, user_list, start_date, end_date)