卷积层的实现

这是卷积层的前向传播的原始实现，使用了4层循环，时间复杂度相当高

def forward_raw(self, input):
    start_time = time.time()
    self.input = input # [N, C, H, W]
    """padding img and let the center be img"""
    height = self.input.shape[2] + self.padding * 2
    width = self.input.shape[3] + self.padding * 2
    self.input_pad = np.zeros([self.input.shape[0], self.input.shape[1], height, width])
    self.input_pad[:, :, self.padding:self.padding+self.input.shape[2], self.padding:self.padding+self.input.shape[3]] = self.input
    """compute the output shape"""
    height_out = (height - self.kernel_size) // self.stride + 1
    width_out = (width - self.kernel_size) // self.stride + 1
    self.output = np.zeros([self.input.shape[0], self.channel_out, height_out, width_out])
    """
    卷积计算的核心，输出特征图的形状是（batch, out_channel, height_out, width_out），在这个形状上一个一个遍历
    这边注意一下是怎么计算卷积核一个channel产生的特征图的，
    首先先选取卷积核该channel上的权重，这个权重的形状应该是（in_channel, kernel_size, kernel_size）
    self.weight[:, :, :, idxc]的作用就是选出一个卷积核，因为一个channel_out就是一个卷积核
    结果需要计算卷积核与第idxn个输入特征图的对应位置上进行内积并加上偏置
    """
    for idxn in range(self.input.shape[0]):
        for idxc in range(self.channel_out):
            for idxh in range(height_out):
                for idxw in range(width_out):
                    # TODO: 计算卷积层的前向传播，特征图与卷积核的内积再加偏置
                    self.output[idxn, idxc, idxh, idxw] = np.sum(self.weight[:, :, :, idxc] * self.input_pad[idxn, :, idxh*self.stride:idxh*self.stride+self.kernel_size, idxw*self.stride:idxw*self.stride+self.kernel_size]) + self.bias[idxc]
    self.forward_time = time.time() - start_time
    return self.output

然后就是卷积层的反向传播

def backward_raw(self, top_diff):
    start_time = time.time()
    """make temp of dweight and dbias and bottom_diff"""
    self.d_weight = np.zeros(self.weight.shape)
    self.d_bias = np.zeros(self.bias.shape)
    bottom_diff = np.zeros(self.input_pad.shape)
    """compute the gradient of weight and bias"""
    for idxn in range(top_diff.shape[0]):
        for idxc in range(top_diff.shape[1]):
            for idxh in range(top_diff.shape[2]):
                for idxw in range(top_diff.shape[3]):
                    # TODO： 计算卷积层的反向传播， 权重、偏置的梯度和本层损失
                    """
                    偏置更新只需要求和对应位置上的梯度即可也就是top_diff
                    权重更新也是求和对应位置上梯度与input_pad相乘
                    """
                    self.d_weight[:, :, :, idxc] += top_diff[idxn, idxc, idxh, idxw] * self.input_pad[idxn, :, idxh*self.stride:idxh*self.stride+self.kernel_size, idxw*self.stride:idxw*self.stride+self.kernel_size]
                    self.d_bias[idxc] += top_diff[idxn, idxc, idxh, idxw]
                    bottom_diff[idxn, :, idxh*self.stride:idxh*self.stride+self.kernel_size, idxw*self.stride:idxw*self.stride+self.kernel_size] += top_diff[idxn, idxc, idxh, idxw] * self.weight[:, :, :, idxc]
    bottom_diff = bottom_diff[:, :, self.padding:self.padding+self.input.shape[2], self.padding:self.padding+self.input.shape[3]]
    self.backward_time = time.time() - start_time
    return bottom_diff

池化层的实现

这是最大池化层的原始实现

def forward_raw(self, input):
    start_time = time.time()
    """compute output shape and make max index temp"""
    self.input = input # [N, C, H, W]
    self.max_index = np.zeros(self.input.shape)
    height_out = (self.input.shape[2] - self.kernel_size) // self.stride + 1
    width_out = (self.input.shape[3] - self.kernel_size) // self.stride + 1
    self.output = np.zeros([self.input.shape[0], self.input.shape[1], height_out, width_out])
    """
    池化计算与卷积计算很相似，但是在池化计算过程中需要输入图每个通道分开计算各自的最大值
    然后需要记录每次最大值的index方便后续反向传播
    """
    for idxn in range(self.input.shape[0]):
        for idxc in range(self.input.shape[1]):
            for idxh in range(height_out):
                for idxw in range(width_out):
                    # TODO： 计算最大池化层的前向传播， 取池化窗口内的最大值
                    self.output[idxn, idxc, idxh, idxw] = np.max(self.input[idxn, idxc, idxh*self.stride:idxh*self.stride+self.kernel_size, idxw*self.stride:idxw*self.stride+self.kernel_size])
                    curren_max_index = np.argmax(self.input[idxn, idxc, idxh*self.stride:idxh*self.stride+self.kernel_size, idxw*self.stride:idxw*self.stride+self.kernel_size])
                    curren_max_index = np.unravel_index(curren_max_index, [self.kernel_size, self.kernel_size])
                    self.max_index[idxn, idxc, idxh*self.stride+curren_max_index[0], idxw*self.stride+curren_max_index[1]] = 1
    return self.output

反向传播

def backward_raw_book(self, top_diff):
    bottom_diff = np.zeros(self.input.shape)
    for idxn in range(top_diff.shape[0]):
        for idxc in range(top_diff.shape[1]):
            for idxh in range(top_diff.shape[2]):
                for idxw in range(top_diff.shape[3]):
                    """
                    应该就是让最大的梯度传回到对应位置
                    没有看到之前记录的max index的使用，看不太懂
                    """
                    max_index = np.argmax(self.input[idxn, idxc, idxh*self.stride:idxh*self.stride+self.kernel_size, idxw*self.stride:idxw*self.stride+self.kernel_size])
                    max_index = np.unravel_index(max_index, [self.kernel_size, self.kernel_size])
                    bottom_diff[idxn, idxc, idxh*self.stride+max_index[0], idxw*self.stride+max_index[1]] = top_diff[idxn, idxc, idxh, idxw] 
    show_matrix(top_diff, 'top_diff--------')
    show_matrix(bottom_diff, 'max pooling d_h ')
    return bottom_diff

flatten层的实现

就是让输入的数据展平

class FlattenLayer(object):
    def __init__(self, input_shape, output_shape):
        self.input_shape = input_shape
        self.output_shape = output_shape
        assert np.prod(self.input_shape) == np.prod(self.output_shape)
        print('\tFlatten layer with input shape %s, output shape %s.' % (str(self.input_shape), str(self.output_shape)))
    def forward(self, input):
        assert list(input.shape[1:]) == list(self.input_shape)
        # matconvnet feature map dim: [N, height, width, channel]
        # ours feature map dim: [N, channel, height, width]
        self.input = np.transpose(input, [0, 2, 3, 1])
        """将每个batch都展平成self.output_shape的样子"""
        self.output = self.input.reshape([self.input.shape[0]] + list(self.output_shape))
        show_matrix(self.output, 'flatten out ')
        return self.output
    def backward(self, top_diff):
        assert list(top_diff.shape[1:]) == list(self.output_shape)
        top_diff = np.transpose(top_diff, [0, 3, 1, 2])
        bottom_diff = top_diff.reshape([top_diff.shape[0]] + list(self.input_shape))
        show_matrix(bottom_diff, 'flatten d_h ')
        return bottom_diff

后面会记录一下怎么对卷积操作加速