-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathConfig.py
139 lines (127 loc) · 8.09 KB
/
Config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
class Config:
NUM_EPISODE = 100
NUM_BATCH = 1000
MEMORY_SIZE = 512
TRAINING_BATCH_SIZE = 64 # max size
LOG = False
BACKGROUND = True
SCREEN_SHAPE = [105, 80]
SCREEN_H = SCREEN_SHAPE[0]
SCREEN_W = SCREEN_SHAPE[1]
FRAME_PER_ROW = 4
# this maintains versions and setting
# [DATA_PROFILE, GAME, MAX_EPISODE, EPSILON_FLOOR, EPSILON_START, MOTIVTED, HYBRID_MOT]
SCENARIOS = [
["asyn.v0", "pacman", 2, 0.2, 0.7, True, True],
["asyn.v0", "pacman", 2, 0.2, 0.6, True, True],
["asyn.v0", "pacman", 2, 0.2, 0.5, True, True],
["asyn.v0", "pacman", 2, 0.2, 0.4, True, True],
["asyn.v0", "pacman", 2, 0.2, 0.3, True, True],
["acn.v5", "pacman", 10, 0.2, 0.4, True, True], #0 # R = Ri + Re; 500+ ESP TRAINED
["acn.v0", "pacman", 10, 0.2, 0.4, True, True], #1 # R = Ri only
["acn.v7", "pacman", 10, 0.2, 0.4, False, False], #2 # R = Re only
# Sort-of A3c with mixed motivation #3-12
["asyn.v1", "pacman", 2, 0.2, 0.8, True, True],
["asyn.v1", "pacman", 2, 0.2, 0.8, True, False],
["asyn.v1", "pacman", 2, 0.2, 0.8, False, False],
["asyn.v1", "pacman", 2, 0.2, 0.5, True, True],
["asyn.v1", "pacman", 2, 0.2, 0.5, True, False],
["asyn.v1", "pacman", 2, 0.2, 0.5, False, False],
["asyn.v1", "pacman", 2, 0.2, 0.3, True, True],
["asyn.v1", "pacman", 2, 0.2, 0.3, True, False],
["asyn.v1", "pacman", 2, 0.2, 0.3, False, False],
["asyn.v1", "pacman", 2, 0.1, 0.2, True, True],
# Sort-of A3c with Hybrid Motivation #13-44
["asyn.v2", "pacman", 2, 0.2, 0.8, True, True],
["asyn.v2", "pacman", 2, 0.2, 0.7, True, True],
["asyn.v2", "pacman", 2, 0.2, 0.6, True, True],
["asyn.v2", "pacman", 2, 0.2, 0.5, True, True],
["asyn.v2", "pacman", 2, 0.2, 0.4, True, True],
["asyn.v2", "pacman", 2, 0.1, 0.3, True, True],
["asyn.v2", "pacman", 2, 0.1, 0.2, True, True],
["asyn.v2", "pacman", 2, 0.1, 0.1, True, True],
["asyn.v2", "pacman", 2, 0.2, 0.8, True, True],
["asyn.v2", "pacman", 2, 0.2, 0.7, True, True],
["asyn.v2", "pacman", 2, 0.2, 0.6, True, True],
["asyn.v2", "pacman", 2, 0.2, 0.5, True, True],
["asyn.v2", "pacman", 2, 0.2, 0.4, True, True],
["asyn.v2", "pacman", 2, 0.1, 0.3, True, True],
["asyn.v2", "pacman", 2, 0.1, 0.2, True, True],
["asyn.v2", "pacman", 2, 0.1, 0.1, True, True],
["asyn.v2", "pacman", 2, 0.2, 0.8, True, True],
["asyn.v2", "pacman", 2, 0.2, 0.7, True, True],
["asyn.v2", "pacman", 2, 0.2, 0.6, True, True],
["asyn.v2", "pacman", 2, 0.2, 0.5, True, True],
["asyn.v2", "pacman", 2, 0.2, 0.4, True, True],
["asyn.v2", "pacman", 2, 0.1, 0.3, True, True],
["asyn.v2", "pacman", 2, 0.1, 0.2, True, True],
["asyn.v2", "pacman", 2, 0.1, 0.1, True, True],
["asyn.v2", "pacman", 2, 0.2, 0.8, True, True],
["asyn.v2", "pacman", 2, 0.2, 0.7, True, True],
["asyn.v2", "pacman", 2, 0.2, 0.6, True, True],
["asyn.v2", "pacman", 2, 0.2, 0.5, True, True],
["asyn.v2", "pacman", 2, 0.2, 0.4, True, True],
["asyn.v2", "pacman", 2, 0.1, 0.3, True, True],
["asyn.v2", "pacman", 2, 0.1, 0.2, True, True],
["asyn.v2", "pacman", 2, 0.1, 0.1, True, True],
# Sort-of A3c with only Intrincis Motivation #44-51
["asyn.v3", "pacman", 2, 0.2, 0.8, True, False],
["asyn.v3", "pacman", 2, 0.2, 0.7, True, False],
["asyn.v3", "pacman", 2, 0.2, 0.6, True, False],
["asyn.v3", "pacman", 2, 0.2, 0.5, True, False],
["asyn.v3", "pacman", 2, 0.2, 0.4, True, False],
["asyn.v3", "pacman", 2, 0.1, 0.3, True, False],
["asyn.v3", "pacman", 2, 0.1, 0.2, True, False],
["asyn.v3", "pacman", 2, 0.1, 0.1, True, False],
# Sort-of A3c with only Extrinsic Motivation #52-58
["asyn.v4", "pacman", 2, 0.2, 0.8, False, False],
["asyn.v4", "pacman", 2, 0.2, 0.7, False, False],
["asyn.v4", "pacman", 2, 0.2, 0.6, False, False],
["asyn.v4", "pacman", 2, 0.2, 0.5, False, False],
["asyn.v4", "pacman", 2, 0.2, 0.4, False, False],
["asyn.v4", "pacman", 2, 0.1, 0.3, False, False],
["asyn.v4", "pacman", 2, 0.1, 0.2, False, False],
["asyn.v4", "pacman", 2, 0.1, 0.1, False, False]
]
CURRENT_SCENARIO = 0 # default
FRAME_PER_ACTION = 1
LEARNING_RATE = 1e-5
EPSILON_DECAY = 0.9999
EPSILON_FLOOR = SCENARIOS[CURRENT_SCENARIO][3]
EPSILON = SCENARIOS[CURRENT_SCENARIO][4]
GAMMA = 0.95 # discount factor
# ACNetwork
ALPHA = 0.5 # coefficient for loss_value
BETA = 0.05 # coefficient for self entropy
# ICM
AGENT_SELF_MOTIVATED = SCENARIOS[CURRENT_SCENARIO][5] # flag this to enable intrinsic reward
MOTIVATED_BY_HYBRID_MODE = SCENARIOS[CURRENT_SCENARIO][6] # when agent is self-motivated, flag this enable r = r_i + r_e
ICM_LAMDA = 2 # coefficient to weight against intrinsic reward
ICM_BETA = 0.6 # weight between ICM-FORWARD & ICM-INVERSE
ICM_LEARNING_RATE = 1e-3
ICM_ETA = 10 # coefficient to scale R_e as R_i
LEARNING_DATA_PATH = "./data/{0}.{1}.data".format(SCENARIOS[CURRENT_SCENARIO][1], SCENARIOS[CURRENT_SCENARIO][0])
TRAINING_LOG_PATH = "./data/{0}.{1}.log".format(SCENARIOS[CURRENT_SCENARIO][1], SCENARIOS[CURRENT_SCENARIO][0])
LOSSES_LOG_PATH = "./data/{0}.{1}.losses.log".format(SCENARIOS[CURRENT_SCENARIO][1], SCENARIOS[CURRENT_SCENARIO][0])
def load_scenario(idx = 0):
Config.CURRENT_SCENARIO = idx
Config.NUM_EPISODE = Config.SCENARIOS[idx][2]
Config.EPSILON_FLOOR = Config.SCENARIOS[idx][3]
Config.EPSILON = Config.SCENARIOS[idx][4]
Config.AGENT_SELF_MOTIVATED = Config.SCENARIOS[idx][5]
Config.MOTIVATED_BY_HYBRID_MODE = Config.SCENARIOS[idx][6]
Config.LEARNING_DATA_PATH = "./data/{0}.{1}.data".format(Config.SCENARIOS[idx][1], Config.SCENARIOS[idx][0])
Config.TRAINING_LOG_PATH = "./data/{0}.{1}.log".format(Config.SCENARIOS[idx][1], Config.SCENARIOS[idx][0])
Config.LOSSES_LOG_PATH = "./data/{0}.{1}.losses.log".format(Config.SCENARIOS[idx][1], Config.SCENARIOS[idx][0])
print("========================================================")
print("Loading Profile[{0}] {1}.{2}:".format(Config.CURRENT_SCENARIO, Config.SCENARIOS[idx][1], Config.SCENARIOS[idx][0]))
print("Config Updated with the following...")
print("Config.NUM_EPISODE: {0}".format(Config.NUM_EPISODE))
print("Config.EPSILON_FLOOR: {0}".format(Config.EPSILON_FLOOR))
print("Config.EPSILON: {0}".format(Config.EPSILON))
print("Config.AGENT_SELF_MOTIVATED: {0}".format(Config.AGENT_SELF_MOTIVATED))
print("Config.MOTIVATED_BY_HYBRID_MODE: {0}".format(Config.MOTIVATED_BY_HYBRID_MODE))
print("Config.LEARNING_DATA_PATH: {0}".format(Config.LEARNING_DATA_PATH))
print("Config.TRAINING_LOG_PATH: {0}".format(Config.TRAINING_LOG_PATH))
print("Config.LOSSES_LOG_PATH: {0}".format(Config.LOSSES_LOG_PATH))
print("========================================================")