diff --git a/VGG.py b/VGG.py deleted file mode 100644 index d212988..0000000 --- a/VGG.py +++ /dev/null @@ -1,69 +0,0 @@ -import chainer -import chainer.links as L -import chainer.functions as F -from chainer import Variable - -input_dimensions = (244, 244) - -class VGG(chainer.Chain): - def __init__(self): - super(VGG, self).__init__( - conv1_1=L.Convolution2D(3, 64, 3, stride=1, pad=1), - conv1_2=L.Convolution2D(64, 64, 3, stride=1, pad=1), - - conv2_1=L.Convolution2D(64, 128, 3, stride=1, pad=1), - conv2_2=L.Convolution2D(128, 128, 3, stride=1, pad=1), - - conv3_1=L.Convolution2D(128, 256, 3, stride=1, pad=1), - conv3_2=L.Convolution2D(256, 256, 3, stride=1, pad=1), - conv3_3=L.Convolution2D(256, 256, 3, stride=1, pad=1), - - conv4_1=L.Convolution2D(256, 512, 3, stride=1, pad=1), - conv4_2=L.Convolution2D(512, 512, 3, stride=1, pad=1), - conv4_3=L.Convolution2D(512, 512, 3, stride=1, pad=1), - - conv5_1=L.Convolution2D(512, 512, 3, stride=1, pad=1), - conv5_2=L.Convolution2D(512, 512, 3, stride=1, pad=1), - conv5_3=L.Convolution2D(512, 512, 3, stride=1, pad=1), - - fc6=L.Linear(25088, 4096), - fc7=L.Linear(4096, 4096), - fc8=L.Linear(4096, 1000) - ) - self.train = False - - def __call__(self, x, t): - h = F.relu(self.conv1_1(x)) - h = F.relu(self.conv1_2(h)) - h = F.max_pooling_2d(h, 2, stride=2) - - h = F.relu(self.conv2_1(h)) - h = F.relu(self.conv2_2(h)) - h = F.max_pooling_2d(h, 2, stride=2) - - h = F.relu(self.conv3_1(h)) - h = F.relu(self.conv3_2(h)) - h = F.relu(self.conv3_3(h)) - h = F.max_pooling_2d(h, 2, stride=2) - - h = F.relu(self.conv4_1(h)) - h = F.relu(self.conv4_2(h)) - h = F.relu(self.conv4_3(h)) - h = F.max_pooling_2d(h, 2, stride=2) - - h = F.relu(self.conv5_1(h)) - h = F.relu(self.conv5_2(h)) - h = F.relu(self.conv5_3(h)) - h = F.max_pooling_2d(h, 2, stride=2) - - h = F.dropout(F.relu(self.fc6(h)), train=self.train, ratio=0.5) - h = F.dropout(F.relu(self.fc7(h)), train=self.train, ratio=0.5) - h = self.fc8(h) - - if self.train: - self.loss = F.softmax_cross_entropy(h, t) - self.acc = F.accuracy(h, t) - return self.loss - else: - self.pred = F.softmax(h) - return self.pred diff --git a/images/cat.jpg b/images/cat.jpg new file mode 100644 index 0000000..910527d Binary files /dev/null and b/images/cat.jpg differ diff --git a/images/dog.jpg b/images/dog.jpg new file mode 100644 index 0000000..40cd01c Binary files /dev/null and b/images/dog.jpg differ diff --git a/max_pooling_2d.py b/max_pooling_2d.py deleted file mode 100644 index 236a7eb..0000000 --- a/max_pooling_2d.py +++ /dev/null @@ -1,169 +0,0 @@ -import numpy - -from chainer import cuda -from chainer.functions.pooling import pooling_2d -from chainer.utils import conv - -if cuda.cudnn_enabled: - cudnn = cuda.cudnn - libcudnn = cudnn.cudnn - - -class MaxPooling2D(pooling_2d.Pooling2D): - - """Max pooling over a set of 2d planes.""" - - def forward_cpu(self, x): - col = conv.im2col_cpu( - x[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, - pval=-float('inf'), cover_all=self.cover_all) - n, c, kh, kw, out_h, out_w = col.shape - col = col.reshape(n, c, kh * kw, out_h, out_w) - - # We select maximum twice, since the implementation using numpy.choose - # hits its bug when kh * kw >= 32. - self.indexes = col.argmax(axis=2) - y = col.max(axis=2) - return y, self.indexes - - def forward_gpu(self, x): - """ - Commented away since we need the indexes for the unpooling process. - - if cuda.cudnn_enabled and self.use_cudnn: - return super(MaxPooling2D, self).forward_gpu(x), self.indexes - """ - n, c, h, w = x[0].shape - y_h = conv.get_conv_outsize( - h, self.kh, self.sy, self.ph, self.cover_all) - y_w = conv.get_conv_outsize( - w, self.kw, self.sx, self.pw, self.cover_all) - y = cuda.cupy.empty((n, c, y_h, y_w), dtype=x[0].dtype) - self.indexes = cuda.cupy.empty((n, c, y_h, y_w), dtype=numpy.int32) - - cuda.elementwise( - 'raw T in, int32 h, int32 w, int32 out_h, int32 out_w,' - 'int32 kh, int32 kw, int32 sy, int32 sx, int32 ph, int32 pw', - 'T out, S indexes', - ''' - int c0 = i / (out_h * out_w); - int out_y = i / out_w % out_h; - int out_x = i % out_w; - int in_y_0 = max(0, out_y * sy - ph); - int in_y_1 = min(h, out_y * sy + kh - ph); - int in_x_0 = max(0, out_x * sx - pw); - int in_x_1 = min(w, out_x * sx + kw - pw); - - T maxval = in[in_x_0 + w * (in_y_0 + h * c0)]; - int argmax_y = in_y_0; - int argmax_x = in_x_0; - for (int y = in_y_0; y < in_y_1; ++y) { - int offset_y = w * (y + h * c0); - for (int x = in_x_0; x < in_x_1; ++x) { - float v = in[x + offset_y]; - if (maxval < v) { - maxval = v; - argmax_y = y; - argmax_x = x; - } - } - } - out = maxval; - - int argmax_ky = argmax_y + ph - out_y * sy; - int argmax_kx = argmax_x + pw - out_x * sx; - indexes = argmax_kx + kw * argmax_ky; - ''', 'max_pool_fwd')(x[0].reduced_view(), - h, w, y_h, y_w, self.kh, self.kw, - self.sy, self.sx, self.ph, self.pw, - y, self.indexes) - return y, self.indexes - - def backward_cpu(self, x, gy): - n, c, out_h, out_w = gy[0].shape - h, w = x[0].shape[2:] - gcol = numpy.zeros( - (n, c, self.kh, self.kw, out_h, out_w), dtype=numpy.float32) - - # TODO(beam2d): Make it fast - gcol_r = numpy.rollaxis(gcol.reshape(n, c, -1, out_h, out_w), 2) - for i in numpy.ndindex(n, c, out_h, out_w): - gcol_r[self.indexes[i]][i] = gy[0][i] - - gx = conv.col2im_cpu(gcol, self.sy, self.sx, self.ph, self.pw, h, w) - return gx, - - def backward_gpu(self, x, gy): - if cuda.cudnn_enabled and self.use_cudnn: - return super(MaxPooling2D, self).backward_gpu(x, gy) - - n, c, h, w = x[0].shape - y_h, y_w = gy[0].shape[2:] - gx = cuda.cupy.empty_like(x[0]) - - cuda.elementwise( - 'raw T gy, raw S indexes, int32 h, int32 w,' - 'int32 out_h, int32 out_w, int32 kh, int32 kw,' - 'int32 sy, int32 sx, int32 ph, int32 pw', - 'T gx', - ''' - int c0 = i / (h * w); - int y = i / w % h + ph; - int x = i % w + pw; - int out_y_0 = max(0, (y - kh + sy) / sy); - int out_y_1 = min(out_h, (y + sy) / sy); - int out_x_0 = max(0, (x - kw + sx) / sx); - int out_x_1 = min(out_w, (x + sx) / sx); - - T val = 0; - for (int out_y = out_y_0; out_y < out_y_1; ++out_y) { - int ky = y - out_y * sy; - for (int out_x = out_x_0; out_x < out_x_1; ++out_x) { - int kx = x - out_x * sx; - int offset = out_x + out_w * (out_y + out_h * c0); - if (indexes[offset] == kx + kw * ky) { - val += gy[offset]; - } - } - } - gx = val; - ''', - 'max_pool_bwd')(gy[0].reduced_view(), self.indexes.reduced_view(), - h, w, y_h, y_w, self.kh, self.kw, - self.sy, self.sx, self.ph, self.pw, - gx) - return gx, - - def create_pool_desc(self): - return cudnn.create_pooling_descriptor( - (self.kh, self.kw), (self.sy, self.sx), (self.ph, self.pw), - libcudnn.CUDNN_POOLING_MAX) - - -def max_pooling_2d(x, ksize, stride=None, pad=0, cover_all=True, - use_cudnn=True): - """Spatial max pooling function. - - This function acts similarly to :class:`~functions.Convolution2D`, but - it computes the maximum of input spatial patch for each channel - without any parameter instead of computing the inner products. - - Args: - x (~chainer.Variable): Input variable. - ksize (int or pair of ints): Size of pooling window. ``ksize=k`` and - ``ksize=(k, k)`` are equivalent. - stride (int or pair of ints or None): Stride of pooling applications. - ``stride=s`` and ``stride=(s, s)`` are equivalent. If ``None`` is - specified, then it uses same stride as the pooling window size. - pad (int or pair of ints): Spatial padding width for the input array. - ``pad=p`` and ``pad=(p, p)`` are equivalent. - cover_all (bool): If ``True``, all spatial locations are pooled into - some output pixels. It may make the output size larger. - use_cudnn (bool): If ``True`` and cuDNN is enabled, then this function - uses cuDNN as the core implementation. - - Returns: - ~chainer.Variable: Output variable. - - """ - return MaxPooling2D(ksize, stride, pad, cover_all, use_cudnn)(x) diff --git a/VGGVisualizer.py b/models/VGG.py similarity index 94% rename from VGGVisualizer.py rename to models/VGG.py index 6020413..5963553 100644 --- a/VGGVisualizer.py +++ b/models/VGG.py @@ -2,16 +2,16 @@ import chainer.links as L import chainer.functions as F from chainer import Variable -from max_pooling_2d import max_pooling_2d -from unpooling_2d import unpooling_2d +from lib.chainer.chainer.functions.pooling import max_pooling_2d +from lib.chainer.chainer.functions.pooling import unpooling_2d -F.max_pooling_2d = max_pooling_2d -F.unpooling_2d = unpooling_2d - -input_dimensions = (244, 244) +# Override original Chainer functions +F.max_pooling_2d = max_pooling_2d.max_pooling_2d +F.unpooling_2d = unpooling_2d.unpooling_2d class VGG(chainer.Chain): + """Input dimensions are (244, 244).""" def __init__(self): super(VGG, self).__init__( conv1_1=L.Convolution2D(3, 64, 3, stride=1, pad=1), diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/unpooling_2d.py b/unpooling_2d.py deleted file mode 100644 index d10e2f7..0000000 --- a/unpooling_2d.py +++ /dev/null @@ -1,126 +0,0 @@ -from chainer import cuda -from chainer.functions.pooling import pooling_2d -from chainer.utils import conv -from chainer.utils import type_check - - -class Unpooling2D(pooling_2d.Pooling2D): - - """Unpooling over a set of 2d planes.""" - - def __init__(self, ksize, stride=None, pad=0, - outsize=None, cover_all=True): - super(Unpooling2D, self).__init__(ksize, stride, pad, cover_all) - self.outh, self.outw = (None, None) if outsize is None else outsize - - def check_type_forward(self, in_types): - n_in = in_types.size() - type_check.expect(n_in == 2) - x_type = in_types[0] - indexes_type = in_types[1] - - type_check.expect( - x_type.dtype.kind == 'f', - x_type.ndim == 4, - indexes_type.dtype.kind == 'i', - indexes_type.ndim == 4 - ) - - if self.outh is not None: - expected_h = conv.get_conv_outsize( - self.outh, self.kh, self.sy, self.ph, cover_all=self.cover_all) - type_check.expect(x_type.shape[2] == expected_h) - if self.outw is not None: - expected_w = conv.get_conv_outsize( - self.outw, self.kw, self.sx, self.pw, cover_all=self.cover_all) - type_check.expect(x_type.shape[3] == expected_w) - - def forward(self, x): - h, w = x[0].shape[2:] - n = x[0].shape[0] - c = x[0].shape[1] - indexes = x[1] - - if self.outh is None: - self.outh = conv.get_deconv_outsize( - h, self.kh, self.sy, self.ph, cover_all=self.cover_all) - if self.outw is None: - self.outw = conv.get_deconv_outsize( - w, self.kw, self.sx, self.pw, cover_all=self.cover_all) - xp = cuda.get_array_module(*x) - - col = xp.tile(x[0][:, :, xp.newaxis, xp.newaxis], - (1, 1, self.kh, self.kw, 1, 1)) - - # NOTE(hvy): Take indexes(Switches) into account - # TODO(hvy): Remove the loops and make it efficient - y = xp.zeros_like(col) - if isinstance(x[0], cuda.ndarray): - indexes = cuda.cupy.asnumpy(indexes) - - for n_i in range(n): - for c_i in range(c): - for r in range(h): - for c in range(w): - index = indexes[n_i][c_i][r][c] - if index < self.kw: - y[n_i][c_i].T[c][r][index][0] = col[n_i][c_i].T[c][r][index][0] - else: - y[n_i][c_i].T[c][r][index % self.kw][1] = col[n_i][c_i].T[c][r][index % self.kw][1] - - if isinstance(x[0], cuda.ndarray): - y = conv.col2im_gpu(y, self.sy, self.sx, self.ph, self.pw, - self.outh, self.outw) - else: - y = conv.col2im_cpu(y, self.sy, self.sx, self.ph, self.pw, - self.outh, self.outw) - - return y, - - - def backward(self, x, gy): - if isinstance(gy[0], cuda.ndarray): - gcol = conv.im2col_gpu( - gy[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, - cover_all=self.cover_all) - else: - gcol = conv.im2col_cpu( - gy[0], self.kh, self.kw, self.sy, self.sx, self.ph, self.pw, - cover_all=self.cover_all) - gx = gcol.sum(axis=(2, 3)) - return gx, - - -def unpooling_2d(x, indexes, ksize, stride=None, pad=0, outsize=None, cover_all=True): - """Inverse operation of pooling for 2d array. - - This function acts similarly to :class:`~functions.Deconvolution2D`, but - it spreads input 2d array's value without any parameter instead of - computing the inner products. - - Args: - x (~chainer.Variable): Input variable. - ksize (int or pair of ints): Size of pooling window. ``ksize=k`` and - ``ksize=(k, k)`` are equivalent. - stride (int, pair of ints or None): Stride of pooling applications. - ``stride=s`` and ``stride=(s, s)`` are equivalent. If ``None`` is - specified, then it uses same stride as the pooling window size. - pad (int or pair of ints): Spatial padding width for the input array. - ``pad=p`` and ``pad=(p, p)`` are equivalent. - outsize (None or pair of ints): Expected output size (height, width) - of array after the operation. If ``None``, the size - (height or width) is estimated from the size of input array - in first batch with - :func:`~chainer.utils.conv.get_deconv_outsize`. - If outsize is not ``None``, the result of outsize applied to - :func:`~chainer.utils.conv.get_conv_outsize` must be equal to - the shape of the 2d array in the input batch ``x``. - cover_all (bool): If ``True``, all spatial locations are pooled - into some output pixels, and the output size is larger than that - when cover_all is ``False``. - - Returns: - ~chainer.Variable: Output variable. - - """ - return Unpooling2D(ksize, stride, pad, outsize, cover_all)(x, indexes) diff --git a/visualize.py b/visualize.py index b09e52a..8b2c297 100644 --- a/visualize.py +++ b/visualize.py @@ -3,7 +3,7 @@ import cv2 as cv from chainer import serializers from chainer import Variable -from VGGVisualizer import VGG +from models.VGG import VGG import imgutil