Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
skelneko authored Jun 21, 2017
1 parent 5b27da3 commit 4f991a9
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 13 deletions.
9 changes: 5 additions & 4 deletions Agent_cac.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def __init__(self, action_size, presist_learning=True):

self. presist_learning = presist_learning

input_shape = [-1, Config.SCREEN_W, Config.SCREEN_H, Config.FRAME_PER_ROW]
input_shape = [-1, Config.SCREEN_H, Config.SCREEN_W, Config.FRAME_PER_ROW]
output_shape = [self.action_size]

# Networks
Expand Down Expand Up @@ -133,7 +133,7 @@ def play(self, sess, env, learning=True):

if done:
time_diff = agent.reportTimerDiff()
str = "[Episode {0}] Steps: {1} Reward: {2:.5g}, Avg: {3:.5g}, Intrinsic/Step: {4:.5g}, Time: {5}".format(e,agent.timestep,running_reward,reward_sum / (e+1), running_int_reward/self.timestep, time_diff)
str = "[Profile {0}][Episode {1}] Steps: {2} Reward: {3:.5g}, Avg: {4:.5g}, Intrinsic/Step: {5:.5g}, Time: {6}".format(Config.CURRENT_SCENARIO, e,agent.timestep,running_reward,reward_sum / (e+1), running_int_reward/self.timestep, time_diff)

print(str)
self.writeLog(str)
Expand Down Expand Up @@ -232,10 +232,11 @@ def reportTimerDiff(self):
if __name__ == "__main__":

# this allows us to loop through different profile setting to play around with settings
max_batch = 1
max_batch = Config.NUM_BATCH
for i in range(0,max_batch):
# sc_list = list(range(0,len(Config.SCENARIOS)))
sc_list = list(range(13, 45)) # running A3C Hybrid only
sc_list = list(range(0, 4)) # running A3C Hybrid only
# sc_list = [1]
sc_len = len(sc_list)
j = 0
random.shuffle(sc_list)
Expand Down
17 changes: 13 additions & 4 deletions Config.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,29 @@

class Config:
NUM_EPISODE = 100
NUM_BATCH = 1000
MEMORY_SIZE = 512
TRAINING_BATCH_SIZE = 64 # max size

LOG = False
BACKGROUND = True

SCREEN_W = SCREEN_H = 64
SCREEN_SHAPE = [SCREEN_W, SCREEN_H]
SCREEN_SHAPE = [105, 80]
SCREEN_H = SCREEN_SHAPE[0]
SCREEN_W = SCREEN_SHAPE[1]
FRAME_PER_ROW = 4

# this maintains versions and setting
# [DATA_PROFILE, GAME, MAX_EPISODE, EPSILON_FLOOR, EPSILON_START, MOTIVTED, HYBRID_MOT]
SCENARIOS = [
["asyn.v0", "pacman", 2, 0.2, 0.7, True, True],
["asyn.v0", "pacman", 2, 0.2, 0.6, True, True],
["asyn.v0", "pacman", 2, 0.2, 0.5, True, True],
["asyn.v0", "pacman", 2, 0.2, 0.4, True, True],
["asyn.v0", "pacman", 2, 0.2, 0.3, True, True],

["acn.v5", "pacman", 10, 0.2, 0.4, True, True], #0 # R = Ri + Re; 500+ ESP TRAINED
["acn.v6", "pacman", 10, 0.2, 0.4, True, False], #1 # R = Ri only
["acn.v0", "pacman", 10, 0.2, 0.4, True, True], #1 # R = Ri only
["acn.v7", "pacman", 10, 0.2, 0.4, False, False], #2 # R = Re only
# Sort-of A3c with mixed motivation #3-12
["asyn.v1", "pacman", 2, 0.2, 0.8, True, True],
Expand Down Expand Up @@ -108,6 +116,7 @@ class Config:
LOSSES_LOG_PATH = "./data/{0}.{1}.losses.log".format(SCENARIOS[CURRENT_SCENARIO][1], SCENARIOS[CURRENT_SCENARIO][0])

def load_scenario(idx = 0):
Config.CURRENT_SCENARIO = idx
Config.NUM_EPISODE = Config.SCENARIOS[idx][2]
Config.EPSILON_FLOOR = Config.SCENARIOS[idx][3]
Config.EPSILON = Config.SCENARIOS[idx][4]
Expand All @@ -117,7 +126,7 @@ def load_scenario(idx = 0):
Config.TRAINING_LOG_PATH = "./data/{0}.{1}.log".format(Config.SCENARIOS[idx][1], Config.SCENARIOS[idx][0])
Config.LOSSES_LOG_PATH = "./data/{0}.{1}.losses.log".format(Config.SCENARIOS[idx][1], Config.SCENARIOS[idx][0])
print("========================================================")
print("Loading Profile[{0}] {1}.{2}:".format(idx, Config.SCENARIOS[idx][1], Config.SCENARIOS[idx][0]))
print("Loading Profile[{0}] {1}.{2}:".format(Config.CURRENT_SCENARIO, Config.SCENARIOS[idx][1], Config.SCENARIOS[idx][0]))
print("Config Updated with the following...")
print("Config.NUM_EPISODE: {0}".format(Config.NUM_EPISODE))
print("Config.EPSILON_FLOOR: {0}".format(Config.EPSILON_FLOOR))
Expand Down
17 changes: 12 additions & 5 deletions Model.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ class CNN(Model):
def __init__(self, input_tensor_shape, output_tensor_shape):
Model.__init__(self, input_tensor_shape, output_tensor_shape)
self.input_shape_batch = -1
self.input_shape_width = input_tensor_shape[1]
self.input_shape_height = input_tensor_shape[2]
self.input_shape_height = input_tensor_shape[1]
self.input_shape_width = input_tensor_shape[2]
self.input_shape_channels = input_tensor_shape[3]

# assumed to be 1D only
Expand Down Expand Up @@ -110,8 +110,8 @@ def __init__(self, input_tensor_shape, output_tensor_shape):
self.filter3_stride_d = 3

def build_model(self):
# obs_input = tf.placeholder(shape=[None, Config.SCREEN_W, Config.SCREEN_H, Config.FRAME_PER_ROW], dtype=tf.float32)
self.obs_input = tf.placeholder(shape=[None, self.input_shape_width, self.input_shape_height, self.input_shape_channels], dtype=tf.float32)
# obs_input = tf.placeholder(shape=[None, Config.SCREEN_H, Config.SCREEN_W, Config.FRAME_PER_ROW], dtype=tf.float32)
self.obs_input = tf.placeholder(shape=[None, self.input_shape_height, self.input_shape_width, self.input_shape_channels], dtype=tf.float32)

self.cnn1, self.cnn1_w, self.cnn1_b = self.new_cnn_layer(input=self.obs_input,
num_filters=self.num_filter1,
Expand Down Expand Up @@ -145,13 +145,20 @@ def build_model(self):
use_relu=True,
name="FC_1")

# print(self.obs_input)
# print(self.cnn1)
# print(self.cnn2)
# print(self.cnn3)
# print(self.fc_out)


# an utility to fix input with batch_size = 1
def reshape_for_batch(self, input):
if len(np.shape(input)) == 3: # if 3D only... i.e. it's a single state
state = list(np.transpose(input, (1,2,0)))
input_s = np.reshape(input, (self.input_shape_batch,
self.input_shape_width,
self.input_shape_height,
self.input_shape_width,
self.input_shape_channels))
else:
input_s = input
Expand Down

0 comments on commit 4f991a9

Please sign in to comment.