Add files via upload

skelneko · Jun 21, 2017 · 4f991a9 · 4f991a9
1 parent 5b27da3
commit 4f991a9
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 13 deletions.
diff --git a/Agent_cac.py b/Agent_cac.py
@@ -41,7 +41,7 @@ def __init__(self, action_size, presist_learning=True):
 
         self. presist_learning = presist_learning
 
-        input_shape = [-1, Config.SCREEN_W, Config.SCREEN_H, Config.FRAME_PER_ROW]
+        input_shape = [-1, Config.SCREEN_H, Config.SCREEN_W, Config.FRAME_PER_ROW]
         output_shape = [self.action_size]
 
         # Networks
@@ -133,7 +133,7 @@ def play(self, sess, env, learning=True):
 
                 if done:
                     time_diff = agent.reportTimerDiff()
-                    str = "[Episode {0}] Steps: {1} Reward: {2:.5g}, Avg: {3:.5g}, Intrinsic/Step: {4:.5g}, Time: {5}".format(e,agent.timestep,running_reward,reward_sum / (e+1), running_int_reward/self.timestep, time_diff)
+                    str = "[Profile {0}][Episode {1}] Steps: {2} Reward: {3:.5g}, Avg: {4:.5g}, Intrinsic/Step: {5:.5g}, Time: {6}".format(Config.CURRENT_SCENARIO, e,agent.timestep,running_reward,reward_sum / (e+1), running_int_reward/self.timestep, time_diff)
 
                     print(str)
                     self.writeLog(str)
@@ -232,10 +232,11 @@ def reportTimerDiff(self):
 if __name__ == "__main__":
 
     # this allows us to loop through different profile setting to play around with settings
-    max_batch = 1
+    max_batch = Config.NUM_BATCH
     for i in range(0,max_batch):
         # sc_list = list(range(0,len(Config.SCENARIOS)))
-        sc_list = list(range(13, 45))                       # running A3C Hybrid only
+        sc_list = list(range(0, 4))                       # running A3C Hybrid only
+        # sc_list = [1]
         sc_len = len(sc_list)
         j = 0
         random.shuffle(sc_list)

diff --git a/Config.py b/Config.py
@@ -1,21 +1,29 @@
 
 class Config:
     NUM_EPISODE = 100
+    NUM_BATCH = 1000
     MEMORY_SIZE = 512
     TRAINING_BATCH_SIZE = 64    # max size
 
     LOG = False
     BACKGROUND = True
 
-    SCREEN_W = SCREEN_H = 64
-    SCREEN_SHAPE = [SCREEN_W, SCREEN_H]
+    SCREEN_SHAPE = [105, 80]
+    SCREEN_H = SCREEN_SHAPE[0]
+    SCREEN_W = SCREEN_SHAPE[1]
     FRAME_PER_ROW = 4
 
     # this maintains versions and setting
     # [DATA_PROFILE, GAME, MAX_EPISODE, EPSILON_FLOOR, EPSILON_START, MOTIVTED, HYBRID_MOT]
     SCENARIOS = [
+                    ["asyn.v0", "pacman", 2, 0.2, 0.7, True, True],
+                    ["asyn.v0", "pacman", 2, 0.2, 0.6, True, True],
+                    ["asyn.v0", "pacman", 2, 0.2, 0.5, True, True],
+                    ["asyn.v0", "pacman", 2, 0.2, 0.4, True, True],
+                    ["asyn.v0", "pacman", 2, 0.2, 0.3, True, True],
+
                     ["acn.v5", "pacman", 10, 0.2, 0.4, True, True],           #0    # R = Ri + Re; 500+ ESP TRAINED
-                    ["acn.v6", "pacman", 10, 0.2, 0.4, True, False],          #1    # R = Ri only
+                    ["acn.v0", "pacman", 10, 0.2, 0.4, True, True],          #1    # R = Ri only
                     ["acn.v7", "pacman", 10, 0.2, 0.4, False, False],         #2    # R = Re only
                     # Sort-of A3c with mixed motivation                       #3-12
                     ["asyn.v1", "pacman", 2, 0.2, 0.8, True, True],
@@ -108,6 +116,7 @@ class Config:
     LOSSES_LOG_PATH = "./data/{0}.{1}.losses.log".format(SCENARIOS[CURRENT_SCENARIO][1], SCENARIOS[CURRENT_SCENARIO][0])
 
     def load_scenario(idx = 0):
+        Config.CURRENT_SCENARIO = idx
         Config.NUM_EPISODE = Config.SCENARIOS[idx][2]
         Config.EPSILON_FLOOR = Config.SCENARIOS[idx][3]
         Config.EPSILON = Config.SCENARIOS[idx][4]
@@ -117,7 +126,7 @@ def load_scenario(idx = 0):
         Config.TRAINING_LOG_PATH = "./data/{0}.{1}.log".format(Config.SCENARIOS[idx][1], Config.SCENARIOS[idx][0])
         Config.LOSSES_LOG_PATH = "./data/{0}.{1}.losses.log".format(Config.SCENARIOS[idx][1], Config.SCENARIOS[idx][0])
         print("========================================================")
-        print("Loading Profile[{0}] {1}.{2}:".format(idx, Config.SCENARIOS[idx][1], Config.SCENARIOS[idx][0]))
+        print("Loading Profile[{0}] {1}.{2}:".format(Config.CURRENT_SCENARIO, Config.SCENARIOS[idx][1], Config.SCENARIOS[idx][0]))
         print("Config Updated with the following...")
         print("Config.NUM_EPISODE: {0}".format(Config.NUM_EPISODE))
         print("Config.EPSILON_FLOOR: {0}".format(Config.EPSILON_FLOOR))

diff --git a/Model.py b/Model.py
@@ -51,8 +51,8 @@ class CNN(Model):
     def __init__(self, input_tensor_shape, output_tensor_shape):
         Model.__init__(self, input_tensor_shape, output_tensor_shape)
         self.input_shape_batch = -1
-        self.input_shape_width = input_tensor_shape[1]
-        self.input_shape_height = input_tensor_shape[2]
+        self.input_shape_height = input_tensor_shape[1]
+        self.input_shape_width = input_tensor_shape[2]
         self.input_shape_channels = input_tensor_shape[3]
 
         # assumed to be 1D only
@@ -110,8 +110,8 @@ def __init__(self, input_tensor_shape, output_tensor_shape):
         self.filter3_stride_d = 3
 
     def build_model(self):
-        # obs_input = tf.placeholder(shape=[None, Config.SCREEN_W, Config.SCREEN_H, Config.FRAME_PER_ROW], dtype=tf.float32)
-        self.obs_input = tf.placeholder(shape=[None, self.input_shape_width, self.input_shape_height, self.input_shape_channels], dtype=tf.float32)
+        # obs_input = tf.placeholder(shape=[None, Config.SCREEN_H, Config.SCREEN_W, Config.FRAME_PER_ROW], dtype=tf.float32)
+        self.obs_input = tf.placeholder(shape=[None, self.input_shape_height, self.input_shape_width, self.input_shape_channels], dtype=tf.float32)
 
         self.cnn1, self.cnn1_w, self.cnn1_b = self.new_cnn_layer(input=self.obs_input,
                                                 num_filters=self.num_filter1,
@@ -145,13 +145,20 @@ def build_model(self):
                                                     use_relu=True,
                                                     name="FC_1")
 
+        # print(self.obs_input)
+        # print(self.cnn1)
+        # print(self.cnn2)
+        # print(self.cnn3)
+        # print(self.fc_out)
+
+
     # an utility to fix input with batch_size = 1
     def reshape_for_batch(self, input):
         if len(np.shape(input)) == 3:    # if 3D only... i.e. it's a single state
             state = list(np.transpose(input, (1,2,0)))
             input_s = np.reshape(input, (self.input_shape_batch,
-                                            self.input_shape_width,
                                             self.input_shape_height,
+                                            self.input_shape_width,
                                             self.input_shape_channels))
         else:
             input_s = input