diff --git a/Agent_cac.py b/Agent_cac.py index c8b8d3d..a630162 100644 --- a/Agent_cac.py +++ b/Agent_cac.py @@ -41,7 +41,7 @@ def __init__(self, action_size, presist_learning=True): self. presist_learning = presist_learning - input_shape = [-1, Config.SCREEN_W, Config.SCREEN_H, Config.FRAME_PER_ROW] + input_shape = [-1, Config.SCREEN_H, Config.SCREEN_W, Config.FRAME_PER_ROW] output_shape = [self.action_size] # Networks @@ -133,7 +133,7 @@ def play(self, sess, env, learning=True): if done: time_diff = agent.reportTimerDiff() - str = "[Episode {0}] Steps: {1} Reward: {2:.5g}, Avg: {3:.5g}, Intrinsic/Step: {4:.5g}, Time: {5}".format(e,agent.timestep,running_reward,reward_sum / (e+1), running_int_reward/self.timestep, time_diff) + str = "[Profile {0}][Episode {1}] Steps: {2} Reward: {3:.5g}, Avg: {4:.5g}, Intrinsic/Step: {5:.5g}, Time: {6}".format(Config.CURRENT_SCENARIO, e,agent.timestep,running_reward,reward_sum / (e+1), running_int_reward/self.timestep, time_diff) print(str) self.writeLog(str) @@ -232,10 +232,11 @@ def reportTimerDiff(self): if __name__ == "__main__": # this allows us to loop through different profile setting to play around with settings - max_batch = 1 + max_batch = Config.NUM_BATCH for i in range(0,max_batch): # sc_list = list(range(0,len(Config.SCENARIOS))) - sc_list = list(range(13, 45)) # running A3C Hybrid only + sc_list = list(range(0, 4)) # running A3C Hybrid only + # sc_list = [1] sc_len = len(sc_list) j = 0 random.shuffle(sc_list) diff --git a/Config.py b/Config.py index a3982a8..3f5e37e 100644 --- a/Config.py +++ b/Config.py @@ -1,21 +1,29 @@ class Config: NUM_EPISODE = 100 + NUM_BATCH = 1000 MEMORY_SIZE = 512 TRAINING_BATCH_SIZE = 64 # max size LOG = False BACKGROUND = True - SCREEN_W = SCREEN_H = 64 - SCREEN_SHAPE = [SCREEN_W, SCREEN_H] + SCREEN_SHAPE = [105, 80] + SCREEN_H = SCREEN_SHAPE[0] + SCREEN_W = SCREEN_SHAPE[1] FRAME_PER_ROW = 4 # this maintains versions and setting # [DATA_PROFILE, GAME, MAX_EPISODE, EPSILON_FLOOR, EPSILON_START, MOTIVTED, HYBRID_MOT] SCENARIOS = [ + ["asyn.v0", "pacman", 2, 0.2, 0.7, True, True], + ["asyn.v0", "pacman", 2, 0.2, 0.6, True, True], + ["asyn.v0", "pacman", 2, 0.2, 0.5, True, True], + ["asyn.v0", "pacman", 2, 0.2, 0.4, True, True], + ["asyn.v0", "pacman", 2, 0.2, 0.3, True, True], + ["acn.v5", "pacman", 10, 0.2, 0.4, True, True], #0 # R = Ri + Re; 500+ ESP TRAINED - ["acn.v6", "pacman", 10, 0.2, 0.4, True, False], #1 # R = Ri only + ["acn.v0", "pacman", 10, 0.2, 0.4, True, True], #1 # R = Ri only ["acn.v7", "pacman", 10, 0.2, 0.4, False, False], #2 # R = Re only # Sort-of A3c with mixed motivation #3-12 ["asyn.v1", "pacman", 2, 0.2, 0.8, True, True], @@ -108,6 +116,7 @@ class Config: LOSSES_LOG_PATH = "./data/{0}.{1}.losses.log".format(SCENARIOS[CURRENT_SCENARIO][1], SCENARIOS[CURRENT_SCENARIO][0]) def load_scenario(idx = 0): + Config.CURRENT_SCENARIO = idx Config.NUM_EPISODE = Config.SCENARIOS[idx][2] Config.EPSILON_FLOOR = Config.SCENARIOS[idx][3] Config.EPSILON = Config.SCENARIOS[idx][4] @@ -117,7 +126,7 @@ def load_scenario(idx = 0): Config.TRAINING_LOG_PATH = "./data/{0}.{1}.log".format(Config.SCENARIOS[idx][1], Config.SCENARIOS[idx][0]) Config.LOSSES_LOG_PATH = "./data/{0}.{1}.losses.log".format(Config.SCENARIOS[idx][1], Config.SCENARIOS[idx][0]) print("========================================================") - print("Loading Profile[{0}] {1}.{2}:".format(idx, Config.SCENARIOS[idx][1], Config.SCENARIOS[idx][0])) + print("Loading Profile[{0}] {1}.{2}:".format(Config.CURRENT_SCENARIO, Config.SCENARIOS[idx][1], Config.SCENARIOS[idx][0])) print("Config Updated with the following...") print("Config.NUM_EPISODE: {0}".format(Config.NUM_EPISODE)) print("Config.EPSILON_FLOOR: {0}".format(Config.EPSILON_FLOOR)) diff --git a/Model.py b/Model.py index 0af9dc4..a69b0d4 100644 --- a/Model.py +++ b/Model.py @@ -51,8 +51,8 @@ class CNN(Model): def __init__(self, input_tensor_shape, output_tensor_shape): Model.__init__(self, input_tensor_shape, output_tensor_shape) self.input_shape_batch = -1 - self.input_shape_width = input_tensor_shape[1] - self.input_shape_height = input_tensor_shape[2] + self.input_shape_height = input_tensor_shape[1] + self.input_shape_width = input_tensor_shape[2] self.input_shape_channels = input_tensor_shape[3] # assumed to be 1D only @@ -110,8 +110,8 @@ def __init__(self, input_tensor_shape, output_tensor_shape): self.filter3_stride_d = 3 def build_model(self): - # obs_input = tf.placeholder(shape=[None, Config.SCREEN_W, Config.SCREEN_H, Config.FRAME_PER_ROW], dtype=tf.float32) - self.obs_input = tf.placeholder(shape=[None, self.input_shape_width, self.input_shape_height, self.input_shape_channels], dtype=tf.float32) + # obs_input = tf.placeholder(shape=[None, Config.SCREEN_H, Config.SCREEN_W, Config.FRAME_PER_ROW], dtype=tf.float32) + self.obs_input = tf.placeholder(shape=[None, self.input_shape_height, self.input_shape_width, self.input_shape_channels], dtype=tf.float32) self.cnn1, self.cnn1_w, self.cnn1_b = self.new_cnn_layer(input=self.obs_input, num_filters=self.num_filter1, @@ -145,13 +145,20 @@ def build_model(self): use_relu=True, name="FC_1") + # print(self.obs_input) + # print(self.cnn1) + # print(self.cnn2) + # print(self.cnn3) + # print(self.fc_out) + + # an utility to fix input with batch_size = 1 def reshape_for_batch(self, input): if len(np.shape(input)) == 3: # if 3D only... i.e. it's a single state state = list(np.transpose(input, (1,2,0))) input_s = np.reshape(input, (self.input_shape_batch, - self.input_shape_width, self.input_shape_height, + self.input_shape_width, self.input_shape_channels)) else: input_s = input