周三上午在写多智能体作业
先睡午觉晚上再说,等会上课调研一下那个李学龙的东西
周四上午在写风格迁移作业,打算周末的时候好好看下代码,最好是能够自己实现一遍
今天下午就可以调研一下大模型决策这种东西。
卷积层的实现
这是卷积层的前向传播的原始实现,使用了4层循环,时间复杂度相当高
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
| def forward_raw(self, input): start_time = time.time() self.input = input """padding img and let the center be img""" height = self.input.shape[2] + self.padding * 2 width = self.input.shape[3] + self.padding * 2 self.input_pad = np.zeros([self.input.shape[0], self.input.shape[1], height, width]) self.input_pad[:, :, self.padding:self.padding+self.input.shape[2], self.padding:self.padding+self.input.shape[3]] = self.input """compute the output shape""" height_out = (height - self.kernel_size) // self.stride + 1 width_out = (width - self.kernel_size) // self.stride + 1 self.output = np.zeros([self.input.shape[0], self.channel_out, height_out, width_out]) """ 卷积计算的核心,输出特征图的形状是(batch, out_channel, height_out, width_out),在这个形状上一个一个遍历 这边注意一下是怎么计算卷积核一个channel产生的特征图的, 首先先选取卷积核该channel上的权重,这个权重的形状应该是(in_channel, kernel_size, kernel_size) self.weight[:, :, :, idxc]的作用就是选出一个卷积核,因为一个channel_out就是一个卷积核 结果需要计算卷积核与第idxn个输入特征图的对应位置上进行内积并加上偏置 """ for idxn in range(self.input.shape[0]): for idxc in range(self.channel_out): for idxh in range(height_out): for idxw in range(width_out): self.output[idxn, idxc, idxh, idxw] = np.sum(self.weight[:, :, :, idxc] * self.input_pad[idxn, :, idxh*self.stride:idxh*self.stride+self.kernel_size, idxw*self.stride:idxw*self.stride+self.kernel_size]) + self.bias[idxc] self.forward_time = time.time() - start_time return self.output
|
然后就是卷积层的反向传播
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
| def backward_raw(self, top_diff): start_time = time.time() """make temp of dweight and dbias and bottom_diff""" self.d_weight = np.zeros(self.weight.shape) self.d_bias = np.zeros(self.bias.shape) bottom_diff = np.zeros(self.input_pad.shape) """compute the gradient of weight and bias""" for idxn in range(top_diff.shape[0]): for idxc in range(top_diff.shape[1]): for idxh in range(top_diff.shape[2]): for idxw in range(top_diff.shape[3]): """ 偏置更新只需要求和对应位置上的梯度即可也就是top_diff 权重更新也是求和对应位置上梯度与input_pad相乘 """ self.d_weight[:, :, :, idxc] += top_diff[idxn, idxc, idxh, idxw] * self.input_pad[idxn, :, idxh*self.stride:idxh*self.stride+self.kernel_size, idxw*self.stride:idxw*self.stride+self.kernel_size] self.d_bias[idxc] += top_diff[idxn, idxc, idxh, idxw] bottom_diff[idxn, :, idxh*self.stride:idxh*self.stride+self.kernel_size, idxw*self.stride:idxw*self.stride+self.kernel_size] += top_diff[idxn, idxc, idxh, idxw] * self.weight[:, :, :, idxc] bottom_diff = bottom_diff[:, :, self.padding:self.padding+self.input.shape[2], self.padding:self.padding+self.input.shape[3]] self.backward_time = time.time() - start_time return bottom_diff
|
池化层的实现
这是最大池化层的原始实现
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
| def forward_raw(self, input): start_time = time.time() """compute output shape and make max index temp""" self.input = input self.max_index = np.zeros(self.input.shape) height_out = (self.input.shape[2] - self.kernel_size) // self.stride + 1 width_out = (self.input.shape[3] - self.kernel_size) // self.stride + 1 self.output = np.zeros([self.input.shape[0], self.input.shape[1], height_out, width_out]) """ 池化计算与卷积计算很相似,但是在池化计算过程中需要输入图每个通道分开计算各自的最大值 然后需要记录每次最大值的index方便后续反向传播 """ for idxn in range(self.input.shape[0]): for idxc in range(self.input.shape[1]): for idxh in range(height_out): for idxw in range(width_out): self.output[idxn, idxc, idxh, idxw] = np.max(self.input[idxn, idxc, idxh*self.stride:idxh*self.stride+self.kernel_size, idxw*self.stride:idxw*self.stride+self.kernel_size]) curren_max_index = np.argmax(self.input[idxn, idxc, idxh*self.stride:idxh*self.stride+self.kernel_size, idxw*self.stride:idxw*self.stride+self.kernel_size]) curren_max_index = np.unravel_index(curren_max_index, [self.kernel_size, self.kernel_size]) self.max_index[idxn, idxc, idxh*self.stride+curren_max_index[0], idxw*self.stride+curren_max_index[1]] = 1 return self.output
|
反向传播
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
| def backward_raw_book(self, top_diff): bottom_diff = np.zeros(self.input.shape) for idxn in range(top_diff.shape[0]): for idxc in range(top_diff.shape[1]): for idxh in range(top_diff.shape[2]): for idxw in range(top_diff.shape[3]): """ 应该就是让最大的梯度传回到对应位置 没有看到之前记录的max index的使用,看不太懂 """ max_index = np.argmax(self.input[idxn, idxc, idxh*self.stride:idxh*self.stride+self.kernel_size, idxw*self.stride:idxw*self.stride+self.kernel_size]) max_index = np.unravel_index(max_index, [self.kernel_size, self.kernel_size]) bottom_diff[idxn, idxc, idxh*self.stride+max_index[0], idxw*self.stride+max_index[1]] = top_diff[idxn, idxc, idxh, idxw] show_matrix(top_diff, 'top_diff--------') show_matrix(bottom_diff, 'max pooling d_h ') return bottom_diff
|
flatten层的实现
就是让输入的数据展平
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
| class FlattenLayer(object): def __init__(self, input_shape, output_shape): self.input_shape = input_shape self.output_shape = output_shape assert np.prod(self.input_shape) == np.prod(self.output_shape) print('\tFlatten layer with input shape %s, output shape %s.' % (str(self.input_shape), str(self.output_shape))) def forward(self, input): assert list(input.shape[1:]) == list(self.input_shape) self.input = np.transpose(input, [0, 2, 3, 1]) """将每个batch都展平成self.output_shape的样子""" self.output = self.input.reshape([self.input.shape[0]] + list(self.output_shape)) show_matrix(self.output, 'flatten out ') return self.output def backward(self, top_diff): assert list(top_diff.shape[1:]) == list(self.output_shape) top_diff = np.transpose(top_diff, [0, 3, 1, 2]) bottom_diff = top_diff.reshape([top_diff.shape[0]] + list(self.input_shape)) show_matrix(bottom_diff, 'flatten d_h ') return bottom_diff
|
后面会记录一下怎么对卷积操作加速