From 89d6a60ba25114c55ffcd046565decb7267a3b86 Mon Sep 17 00:00:00 2001
From: hvy <hiroyuki.vincent.yamazaki@gmail.com>
Date: Wed, 13 Jul 2016 22:21:13 +0000
Subject: [PATCH] Save activation images for each layer

---
 VGGVisualizer.py      | 244 ++++++++++++++----------------------------
 activations/README.md |   3 +
 imgutil.py            |  45 ++++++++
 max_pooling_2d.py     |   7 +-
 test.py               |  64 -----------
 unpooling_2d.py       |   3 +
 visualize.py          |  84 +++++++++++++++
 7 files changed, 223 insertions(+), 227 deletions(-)
 create mode 100644 activations/README.md
 create mode 100644 imgutil.py
 delete mode 100644 test.py
 create mode 100644 visualize.py

diff --git a/VGGVisualizer.py b/VGGVisualizer.py
index 3737679..6020413 100644
--- a/VGGVisualizer.py
+++ b/VGGVisualizer.py
@@ -2,7 +2,6 @@
 import chainer.links as L
 import chainer.functions as F
 from chainer import Variable
-# from lib.chainer.functions.pooling.max_pooling_2d import max_pooling_2d
 from max_pooling_2d import max_pooling_2d
 from unpooling_2d import unpooling_2d
 
@@ -12,205 +11,128 @@
 input_dimensions = (244, 244)
 
 
-def from_VGG(vgg):
-    vgg_visualizer = VGGVisualizer()
-
-    vgg_visualizer.conv1_1 = vgg.conv1_1
-    vgg_visualizer.conv1_2 = vgg.conv1_2
-
-    vgg_visualizer.deconv1_2.W = vgg.conv1_2.W
-    # vgg_visualizer.deconv1_2.b = vgg.conv1_2.b
-    vgg_visualizer.deconv1_1.W = vgg.conv1_1.W
-    # vgg_visualizer.deconv1_1.b = vgg.conv1_1.b
-
-    vgg_visualizer.conv2_1 = vgg.conv2_1
-    vgg_visualizer.conv2_2 = vgg.conv2_2
-
-    vgg_visualizer.deconv2_2.W = vgg.conv2_2.W
-    # vgg_visualizer.deconv2_2.b = vgg.conv2_2.b
-    vgg_visualizer.deconv2_1.W = vgg.conv2_1.W
-    # vgg_visualizer.deconv2_1.b = vgg.conv2_1.b
-
-    vgg_visualizer.conv3_1 = vgg.conv3_1
-    vgg_visualizer.conv3_2 = vgg.conv3_2
-    vgg_visualizer.conv3_3 = vgg.conv3_3
-
-    vgg_visualizer.deconv3_3.W = vgg.conv3_3.W
-    # vgg_visualizer.deconv3_3.b = vgg.conv3_3.b
-    vgg_visualizer.deconv3_2.W = vgg.conv3_2.W
-    # vgg_visualizer.deconv3_2.b = vgg.conv3_2.b
-    vgg_visualizer.deconv3_1.W = vgg.conv3_1.W
-    # vgg_visualizer.deconv3_1.b = vgg.conv3_1.b
-
-    vgg_visualizer.conv4_1 = vgg.conv4_1
-    vgg_visualizer.conv4_2 = vgg.conv4_2
-    vgg_visualizer.conv4_3 = vgg.conv4_3
-
-    vgg_visualizer.deconv4_3.W = vgg.conv4_3.W
-    # vgg_visualizer.deconv4_3.b = vgg.conv4_3.b
-    vgg_visualizer.deconv4_2.W = vgg.conv4_2.W
-    # vgg_visualizer.deconv4_2.b = vgg.conv4_2.b
-    vgg_visualizer.deconv4_1.W = vgg.conv4_1.W
-    # vgg_visualizer.deconv4_1.b = vgg.conv4_1.b
-
-    vgg_visualizer.conv5_1 = vgg.conv5_1
-    vgg_visualizer.conv5_2 = vgg.conv5_2
-    vgg_visualizer.conv5_3 = vgg.conv5_3
-
-    vgg_visualizer.deconv5_3.W = vgg.conv5_3.W
-    # vgg_visualizer.deconv5_3.b = vgg.conv5_3.b
-    vgg_visualizer.deconv5_2.W = vgg.conv5_2.W
-    # vgg_visualizer.deconv5_2.b = vgg.conv5_2.b
-    vgg_visualizer.deconv5_1.W = vgg.conv5_1.W
-    # vgg_visualizer.deconv5_1.b = vgg.conv5_1.b
-
-    print '=================================='
-    print vgg_visualizer.deconv1_1.W.data.shape
-    print vgg.conv1_1.W.data.shape
-    print '=================================='
-    print '=================================='
-    print '------------------------------------------------------------'
-    print vgg.conv1_1.W.data[0][0][0][0]
-    print vgg_visualizer.conv1_1.W.data[0][0][0][0]
-    print vgg_visualizer.deconv1_1.W.data[0][0][0][0]
-    print '------------------------------------------------------------'
-    return vgg_visualizer
-
-
-class VGGVisualizer(chainer.Chain):
+class VGG(chainer.Chain):
     def __init__(self):
-        super(VGGVisualizer, self).__init__(
+        super(VGG, self).__init__(
             conv1_1=L.Convolution2D(3, 64, 3, stride=1, pad=1),
             conv1_2=L.Convolution2D(64, 64, 3, stride=1, pad=1),
-            deconv1_2=L.Deconvolution2D(64, 64, 3, stride=1, pad=1, nobias=True),
-            deconv1_1=L.Deconvolution2D(64, 3, 3, stride=1, pad=1, nobias=True),
 
             conv2_1=L.Convolution2D(64, 128, 3, stride=1, pad=1),
             conv2_2=L.Convolution2D(128, 128, 3, stride=1, pad=1),
-            deconv2_2=L.Deconvolution2D(128, 128, 3, stride=1, pad=1, nobias=True),
-            deconv2_1=L.Deconvolution2D(128, 64, 3, stride=1, pad=1, nobias=True),
 
             conv3_1=L.Convolution2D(128, 256, 3, stride=1, pad=1),
             conv3_2=L.Convolution2D(256, 256, 3, stride=1, pad=1),
             conv3_3=L.Convolution2D(256, 256, 3, stride=1, pad=1),
-            deconv3_3=L.Deconvolution2D(256, 256, 3, stride=1, pad=1, nobias=True),
-            deconv3_2=L.Deconvolution2D(256, 256, 3, stride=1, pad=1, nobias=True),
-            deconv3_1=L.Deconvolution2D(256, 128, 3, stride=1, pad=1, nobias=True),
 
             conv4_1=L.Convolution2D(256, 512, 3, stride=1, pad=1),
             conv4_2=L.Convolution2D(512, 512, 3, stride=1, pad=1),
             conv4_3=L.Convolution2D(512, 512, 3, stride=1, pad=1),
-            deconv4_3=L.Deconvolution2D(512, 512, 3, stride=1, pad=1, nobias=True),
-            deconv4_2=L.Deconvolution2D(512, 512, 3, stride=1, pad=1, nobias=True),
-            deconv4_1=L.Deconvolution2D(512, 256, 3, stride=1, pad=1, nobias=True),
 
             conv5_1=L.Convolution2D(512, 512, 3, stride=1, pad=1),
             conv5_2=L.Convolution2D(512, 512, 3, stride=1, pad=1),
             conv5_3=L.Convolution2D(512, 512, 3, stride=1, pad=1),
-            deconv5_3=L.Deconvolution2D(512, 512, 3, stride=1, pad=1, nobias=True),
-            deconv5_2=L.Deconvolution2D(512, 512, 3, stride=1, pad=1, nobias=True),
-            deconv5_1=L.Deconvolution2D(512, 512, 3, stride=1, pad=1, nobias=True)
+
+            fc6=L.Linear(25088, 4096),
+            fc7=L.Linear(4096, 4096),
+            fc8=L.Linear(4096, 1000)
         )
-        self.visualize = 1
-        """
-        fc6=L.Linear(25088, 4096),
-        fc7=L.Linear(4096, 4096),
-        fc8=L.Linear(4096, 1000)
-        """
+        self.convs = [
+            ['conv1_1', 'conv1_2'],
+            ['conv2_1', 'conv2_2'],
+            ['conv3_1', 'conv3_2', 'conv3_3'],
+            ['conv4_1', 'conv4_2', 'conv4_3'],
+            ['conv5_1', 'conv5_2', 'conv5_3']]
 
-    def __call__(self, x, t):
-        h = F.relu(self.conv1_1(x))
-        h = F.relu(self.conv1_2(h))
+        self.train = False
+        self.switches = []
+        self.unpooling_outsizes = []
+        self.added_deconv = False
 
-        """
-        print('--- Before pooling (subset) ---')
-        print (h.data[0, 0, :6, :6])
-        """
-        outsize1 = h.data.shape[2:]
-        h, indexes1 = F.max_pooling_2d(h, 2, stride=2)
+    def __call__(self, x, t=None, stop_layer=None):
+        self.switches = []
+        self.unpooling_outsizes = []
 
-        h = F.relu(self.conv2_1(h))
-        h = F.relu(self.conv2_2(h))
-        outsize2 = h.data.shape[2:]
-        h, indexes2 = F.max_pooling_2d(h, 2, stride=2)
+        # Forward pass through convolutional layers with ReLU and pooling
+        h = x
+        for i, layer in enumerate(self.convs):
+            for conv in layer:
+                h = F.relu(getattr(self, conv)(h))
 
+            prepooling_size = h.data.shape[2:]
+            self.unpooling_outsizes.append(prepooling_size)
 
-        h = F.relu(self.conv3_1(h))
-        h = F.relu(self.conv3_2(h))
-        h = F.relu(self.conv3_3(h))
-        outsize3 = h.data.shape[2:]
-        h, indexes3 = F.max_pooling_2d(h, 2, stride=2)
+            h, switches = F.max_pooling_2d(h, 2, stride=2)
+            self.switches.append(switches)
 
-        h = F.relu(self.conv4_1(h))
-        h = F.relu(self.conv4_2(h))
-        h = F.relu(self.conv4_3(h))
-        outsize4 = h.data.shape[2:]
-        h, indexes4 = F.max_pooling_2d(h, 2, stride=2)
+            if stop_layer == i + 1:
+                return h
 
-        h = F.relu(self.conv5_1(h))
-        h = F.relu(self.conv5_2(h))
-        h = F.relu(self.conv5_3(h))
-        outsize5 = h.data.shape[2:]
-        h, indexes5 = F.max_pooling_2d(h, 2, stride=2)
+        h = F.dropout(F.relu(self.fc6(h)), train=self.train, ratio=0.5)
+        h = F.dropout(F.relu(self.fc7(h)), train=self.train, ratio=0.5)
+        h = self.fc8(h)
 
-        h_tmp = h.data.copy()
-        print 'h.shape'
-        print h_tmp.shape
+        if self.train:
+            self.loss = F.softmax_cross_entropy(h, t)
+            self.acc = F.accuracy(h, t)
+            return self.loss
+        else:
+            self.pred = F.softmax(h)
+            return self.pred
 
+    def activations(self, x, layer):
+        if x.data.shape[0] != 1:
+            raise TypeError('Visualization is only supported for a single \
+                            image at a time')
 
-        # Reconstruction
-        i = 10
-        import numpy as np
-        h = np.zeros(h_tmp.shape)
-        h[0][i] = h_tmp[0][i]
-        h = Variable(h)
+        self.add_deconv_layers()
 
-        h = F.unpooling_2d(h, indexes5, 2, stride=2, outsize=outsize5)
-        h = self.deconv5_3(F.relu(h))
-        h = self.deconv5_2(F.relu(h))
-        h = self.deconv5_1(F.relu(h))
-        # return h
+        # Forward pass
+        h = self(x, stop_layer=layer)
 
-        h = F.unpooling_2d(h, indexes4, 2, stride=2, outsize=outsize4)
-        h = self.deconv4_3(F.relu(h))
-        h = self.deconv4_2(F.relu(h))
-        h = self.deconv4_1(F.relu(h))
+        # Compute the activations for each feature map
+        h_data = h.data.copy()
+        xp = chainer.cuda.get_array_module(h.data)
+        zeros = xp.zeros_like(h.data)
+        convs = self.convs[:layer]
+        deconvs = [['de{}'.format(c) for c in conv] for conv in convs]
 
+        feat_maps = []
 
-        h = F.unpooling_2d(h, indexes3, 2, stride=2, outsize=outsize3)
-        h = self.deconv3_3(F.relu(h))
-        h = self.deconv3_2(F.relu(h))
-        h = self.deconv3_1(F.relu(h))
+        for fm in range(h.data.shape[1]):  # For each feature map
 
+            print('Feature map {}'.format(fm))
 
-        h = F.unpooling_2d(h, indexes2, 2, stride=2, outsize=outsize2)
-        h = self.deconv2_2(F.relu(h))
-        h = self.deconv2_1(F.relu(h))
+            condition = zeros.copy()
+            condition[0][fm] = 1  # Keep one feature map and zero all other
+            h = Variable(xp.where(condition, h_data, zeros))
 
-        h = F.unpooling_2d(h, indexes1, 2, stride=2, outsize=outsize1)
-        h = self.deconv1_2(F.relu(h))
-        h = self.deconv1_1(F.relu(h))
+            for i, deconv in enumerate(reversed(deconvs)):
+                h = F.unpooling_2d(h, self.switches[layer-i-1], 2, stride=2,
+                                   outsize=self.unpooling_outsizes[layer-i-1])
+                for d in reversed(deconv):
+                    h = getattr(self, d)(F.relu(h))
 
-        print(h.data)
-        print(h.data.shape)
+            feat_maps.append(h.data)
 
+        feat_maps = xp.array(feat_maps)
+        feat_maps = xp.rollaxis(feat_maps, 0, 2)  # Batch to first axis
 
-        # Return first layer visualizations
-        return h
+        return Variable(feat_maps)
 
-        print('--- After pooling (subset) ---')
-        print h_prim.data[0, 0, :6, :6]
+    def add_deconv_layers(self, nobias=True):
+        """Add a deconvolutional layer for each convolutional layer already
+        defined in the network."""
+        if self.added_deconv:
+            return
 
-        """
-        h = F.dropout(F.relu(self.fc6(h)), train=self.train, ratio=0.5)
-        h = F.dropout(F.relu(self.fc7(h)), train=self.train, ratio=0.5)
-        h = self.fc8(h)
+        for layer in self.children():
+            if isinstance(layer, F.Convolution2D):
+                out_channels, in_channels, kh, kw = layer.W.data.shape
+                deconv = L.Deconvolution2D(out_channels, in_channels,
+                                           (kh, kw), stride=layer.stride,
+                                           pad=layer.pad,
+                                           initialW=layer.W.data,
+                                           nobias=nobias)
+                self.add_link('de{}'.format(layer.name), deconv)
 
-        if self.train:
-            self.loss = F.softmax_cross_entropy(h, t)
-            self.acc = F.accuracy(h, t)
-            return self.loss
-        else:
-            self.pred = F.softmax(h)
-            return self.pred
-        """
+        self.added_deconv = True
diff --git a/activations/README.md b/activations/README.md
new file mode 100644
index 0000000..cd23338
--- /dev/null
+++ b/activations/README.md
@@ -0,0 +1,3 @@
+# Activations
+
+Feature map activations are stored in this directory.
diff --git a/imgutil.py b/imgutil.py
new file mode 100644
index 0000000..9b6f732
--- /dev/null
+++ b/imgutil.py
@@ -0,0 +1,45 @@
+import os
+import math
+import numpy as np
+import cv2 as cv
+import matplotlib
+matplotlib.use('Agg')  # Workaround to save images when running over ssh sessions
+import matplotlib.pyplot as plt
+import matplotlib.image as mpimg
+
+
+def tile_ims(filename, directory):
+    """Load all images in the given directory and tile them into one."""
+    ims = [mpimg.imread(os.path.join(directory, f)) for f in
+           sorted(os.listdir(directory))]
+    save_ims(filename, np.array(ims))
+
+
+def save_im(filename, im):
+    # h, w, c = im.shape
+    cv.imwrite(filename, im)
+
+
+def save_ims(filename, ims):
+    n, h, w, c = ims.shape
+
+    # Plot the images on a grid
+    rows = int(math.ceil(math.sqrt(n)))
+    cols = int(round(math.sqrt(n)))
+
+    # Each subplot should have the same resolutions as the image dimensions
+
+    # TODO: Consider proper heights and widths for the subplots
+    h = 64
+    w = 64
+
+    fig, axes = plt.subplots(rows, cols, figsize=(h, w))
+    fig.subplots_adjust(hspace=0, wspace=0)
+
+    for i, ax in enumerate(axes.flat):
+        ax.axis('off')  # Hide x, y axes completely
+        if i < n:
+            ax.imshow(ims[i])
+
+    plt.savefig(filename, bbox_inches='tight')
+    plt.clf()
diff --git a/max_pooling_2d.py b/max_pooling_2d.py
index 7769c04..236a7eb 100644
--- a/max_pooling_2d.py
+++ b/max_pooling_2d.py
@@ -27,9 +27,12 @@ def forward_cpu(self, x):
         return y, self.indexes
 
     def forward_gpu(self, x):
-        if cuda.cudnn_enabled and self.use_cudnn:
-            return super(MaxPooling2D, self).forward_gpu(x)
+        """
+        Commented away since we need the indexes for the unpooling process.
 
+        if cuda.cudnn_enabled and self.use_cudnn:
+            return super(MaxPooling2D, self).forward_gpu(x), self.indexes
+        """
         n, c, h, w = x[0].shape
         y_h = conv.get_conv_outsize(
             h, self.kh, self.sy, self.ph, self.cover_all)
diff --git a/test.py b/test.py
deleted file mode 100644
index 8b525c8..0000000
--- a/test.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import numpy as np
-import cv2 as cv
-import chainer
-from chainer import serializers
-from chainer import Variable
-
-from VGG import VGG, input_dimensions
-import VGGVisualizer
-
-
-if __name__ == '__main__':
-    print('Starting...')
-
-    mean = np.array([103.939, 116.779, 123.68])
-    img = cv.imread('images/cat.jpg').astype(np.float32)
-    img -= mean
-    img = cv.resize(img, (224, 224)).transpose((2, 0, 1))
-    img = img[np.newaxis, :, :, :]
-
-    vgg = VGG()
-    serializers.load_hdf5('VGG.model', vgg)
-
-
-    # Visualize fst filter
-    """
-    imgs = ()
-    for i in range(64):
-        fil = vgg.conv1_1.W.data[i]
-        fil = np.rollaxis(fil, 0, 3)
-        min_val = fil.min()
-        fil -= min_val
-        max_val = fil.max()
-        fil *= ( 255.0 / max_val)
-        imgs += (fil,)
-        imgs += (np.zeros((3, 3, 3)),)
-
-    vis = np.concatenate(imgs, axis=0)
-    cv.imwrite('filters_conv1_1_new.jpg', vis)
-    """
-
-    vgg = VGGVisualizer.from_VGG(vgg)
-
-    reconstruction = vgg(Variable(img), None)
-
-    n, c, h, w = reconstruction.data.shape
-
-    # Assume a single image in batch and get it
-    img = reconstruction.data[0]
-    print('Max: {}'.format(img.max()))
-    print('Min: {}'.format(img.min()))
-    img -= img.min()
-    if img.max() > 0:
-        img *= 255.0 / img.max()
-    else:
-        img *= 255.0
-
-    print('img.shape: {}'.format(img.shape))
-    img = np.rollaxis(img, 0, 3)
-    # img += mean
-
-    # cv.imwrite('cat_reconstructed.jpg', img)
-    cv.imwrite('new_dog.jpg', img)
-
-    print('Done')
diff --git a/unpooling_2d.py b/unpooling_2d.py
index 48ad768..d10e2f7 100644
--- a/unpooling_2d.py
+++ b/unpooling_2d.py
@@ -55,6 +55,9 @@ def forward(self, x):
         # NOTE(hvy): Take indexes(Switches) into account
         # TODO(hvy): Remove the loops and make it efficient
         y = xp.zeros_like(col)
+        if isinstance(x[0], cuda.ndarray):
+            indexes = cuda.cupy.asnumpy(indexes)
+
         for n_i in range(n):
             for c_i in range(c):
                 for r in range(h):
diff --git a/visualize.py b/visualize.py
new file mode 100644
index 0000000..b09e52a
--- /dev/null
+++ b/visualize.py
@@ -0,0 +1,84 @@
+import os
+import numpy as np
+import cv2 as cv
+from chainer import serializers
+from chainer import Variable
+from VGGVisualizer import VGG
+import imgutil
+
+
+"""
+TODO
+- Speed up the unpooling loop with indexes loop
+- Suport GPU
+"""
+
+
+def sample_im():
+    """Return a preprocessed (averaged and resized to VGG) sample image."""
+    mean = np.array([103.939, 116.779, 123.68])
+    im = cv.imread('images/cat.jpg').astype(np.float32)
+    im -= mean
+    im = cv.resize(im, (224, 224)).transpose((2, 0, 1))
+    im = im[np.newaxis, :, :, :]
+    return im
+
+
+def get_activations(model, x, layer):
+    """Compute the activations for each feature map for the given layer for
+    this particular image. Note that the input x should be a mini-batch
+    of size one, i.e. a single image.
+    """
+    a = model.activations(Variable(x), layer=layer+1)  # To 1-indexed
+    a = a.data[0]  # Assume batch with a single image
+    return post_process_activations(a)
+
+
+def post_process_activations(a):
+    a -= a.min()
+    if a.max() > 0:
+        a *= 255.0 / a.max()
+    else:
+        a *= 255.0
+    return a
+
+
+def save_activations(model, x, layer, dst_root):
+    """Save feature map activations for the given image as images on disk."""
+
+    # Create the target directory if it doesn't already exist
+    dst_dir = os.path.join(dst_root, 'layer_{}/'.format(layer+1))
+    dst_dir = os.path.dirname(dst_dir)
+    if not os.path.exists(dst_dir):
+        os.makedirs(dst_dir)
+
+    print('Computing activations for layer {}...'.format(layer+1))
+    activations = get_activations(model, x, layer)
+
+    # Save each activation as its own image to later tile them all into
+    # a single image for a better overview
+    filename_len = len(str(len(activations)))
+    for i, activation in enumerate(activations):
+        im = np.rollaxis(activation, 0, 3)  # c, h, w -> h, w, c
+        filename = os.path.join(dst_dir,
+                                '{num:0{width}}.jpg'  # Pad with zeros
+                                .format(num=i, width=filename_len))
+
+        print('Saving image {}...'.format(filename))
+        imgutil.save_im(filename, im)
+
+    tiled_filename = os.path.join(dst_root, 'layer_{}.jpg'.format(layer+1))
+    print('Saving image {}...'.format(filename))
+    imgutil.tile_ims(tiled_filename, dst_dir)
+
+
+if __name__ == '__main__':
+    print('Preparing the model...')
+    model = VGG()
+    serializers.load_hdf5('VGG.model', model)
+
+    # Visualize each of the 5 convolutional layers in VGG
+    for layer in range(5):
+        save_activations(model, sample_im(), layer, 'activations')
+
+    print('Done')