周三上午在写多智能体作业
先睡午觉晚上再说,等会上课调研一下那个李学龙的东西

周四上午在写风格迁移作业,打算周末的时候好好看下代码,最好是能够自己实现一遍
今天下午就可以调研一下大模型决策这种东西。

卷积层的实现

这是卷积层的前向传播的原始实现,使用了4层循环,时间复杂度相当高

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
def forward_raw(self, input):
start_time = time.time()
self.input = input # [N, C, H, W]
"""padding img and let the center be img"""
height = self.input.shape[2] + self.padding * 2
width = self.input.shape[3] + self.padding * 2
self.input_pad = np.zeros([self.input.shape[0], self.input.shape[1], height, width])
self.input_pad[:, :, self.padding:self.padding+self.input.shape[2], self.padding:self.padding+self.input.shape[3]] = self.input
"""compute the output shape"""
height_out = (height - self.kernel_size) // self.stride + 1
width_out = (width - self.kernel_size) // self.stride + 1
self.output = np.zeros([self.input.shape[0], self.channel_out, height_out, width_out])
"""
卷积计算的核心,输出特征图的形状是(batch, out_channel, height_out, width_out),在这个形状上一个一个遍历
这边注意一下是怎么计算卷积核一个channel产生的特征图的,
首先先选取卷积核该channel上的权重,这个权重的形状应该是(in_channel, kernel_size, kernel_size)
self.weight[:, :, :, idxc]的作用就是选出一个卷积核,因为一个channel_out就是一个卷积核
结果需要计算卷积核与第idxn个输入特征图的对应位置上进行内积并加上偏置
"""
for idxn in range(self.input.shape[0]):
for idxc in range(self.channel_out):
for idxh in range(height_out):
for idxw in range(width_out):
# TODO: 计算卷积层的前向传播,特征图与卷积核的内积再加偏置
self.output[idxn, idxc, idxh, idxw] = np.sum(self.weight[:, :, :, idxc] * self.input_pad[idxn, :, idxh*self.stride:idxh*self.stride+self.kernel_size, idxw*self.stride:idxw*self.stride+self.kernel_size]) + self.bias[idxc]
self.forward_time = time.time() - start_time
return self.output

然后就是卷积层的反向传播

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
def backward_raw(self, top_diff):
start_time = time.time()
"""make temp of dweight and dbias and bottom_diff"""
self.d_weight = np.zeros(self.weight.shape)
self.d_bias = np.zeros(self.bias.shape)
bottom_diff = np.zeros(self.input_pad.shape)
"""compute the gradient of weight and bias"""
for idxn in range(top_diff.shape[0]):
for idxc in range(top_diff.shape[1]):
for idxh in range(top_diff.shape[2]):
for idxw in range(top_diff.shape[3]):
# TODO: 计算卷积层的反向传播, 权重、偏置的梯度和本层损失
"""
偏置更新只需要求和对应位置上的梯度即可也就是top_diff
权重更新也是求和对应位置上梯度与input_pad相乘
"""
self.d_weight[:, :, :, idxc] += top_diff[idxn, idxc, idxh, idxw] * self.input_pad[idxn, :, idxh*self.stride:idxh*self.stride+self.kernel_size, idxw*self.stride:idxw*self.stride+self.kernel_size]
self.d_bias[idxc] += top_diff[idxn, idxc, idxh, idxw]
bottom_diff[idxn, :, idxh*self.stride:idxh*self.stride+self.kernel_size, idxw*self.stride:idxw*self.stride+self.kernel_size] += top_diff[idxn, idxc, idxh, idxw] * self.weight[:, :, :, idxc]
bottom_diff = bottom_diff[:, :, self.padding:self.padding+self.input.shape[2], self.padding:self.padding+self.input.shape[3]]
self.backward_time = time.time() - start_time
return bottom_diff

池化层的实现

这是最大池化层的原始实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
def forward_raw(self, input):
start_time = time.time()
"""compute output shape and make max index temp"""
self.input = input # [N, C, H, W]
self.max_index = np.zeros(self.input.shape)
height_out = (self.input.shape[2] - self.kernel_size) // self.stride + 1
width_out = (self.input.shape[3] - self.kernel_size) // self.stride + 1
self.output = np.zeros([self.input.shape[0], self.input.shape[1], height_out, width_out])
"""
池化计算与卷积计算很相似,但是在池化计算过程中需要输入图每个通道分开计算各自的最大值
然后需要记录每次最大值的index方便后续反向传播
"""
for idxn in range(self.input.shape[0]):
for idxc in range(self.input.shape[1]):
for idxh in range(height_out):
for idxw in range(width_out):
# TODO: 计算最大池化层的前向传播, 取池化窗口内的最大值
self.output[idxn, idxc, idxh, idxw] = np.max(self.input[idxn, idxc, idxh*self.stride:idxh*self.stride+self.kernel_size, idxw*self.stride:idxw*self.stride+self.kernel_size])
curren_max_index = np.argmax(self.input[idxn, idxc, idxh*self.stride:idxh*self.stride+self.kernel_size, idxw*self.stride:idxw*self.stride+self.kernel_size])
curren_max_index = np.unravel_index(curren_max_index, [self.kernel_size, self.kernel_size])
self.max_index[idxn, idxc, idxh*self.stride+curren_max_index[0], idxw*self.stride+curren_max_index[1]] = 1
return self.output

反向传播

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
def backward_raw_book(self, top_diff):
bottom_diff = np.zeros(self.input.shape)
for idxn in range(top_diff.shape[0]):
for idxc in range(top_diff.shape[1]):
for idxh in range(top_diff.shape[2]):
for idxw in range(top_diff.shape[3]):
"""
应该就是让最大的梯度传回到对应位置
没有看到之前记录的max index的使用,看不太懂
"""
max_index = np.argmax(self.input[idxn, idxc, idxh*self.stride:idxh*self.stride+self.kernel_size, idxw*self.stride:idxw*self.stride+self.kernel_size])
max_index = np.unravel_index(max_index, [self.kernel_size, self.kernel_size])
bottom_diff[idxn, idxc, idxh*self.stride+max_index[0], idxw*self.stride+max_index[1]] = top_diff[idxn, idxc, idxh, idxw]
show_matrix(top_diff, 'top_diff--------')
show_matrix(bottom_diff, 'max pooling d_h ')
return bottom_diff

flatten层的实现

就是让输入的数据展平

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
class FlattenLayer(object):
def __init__(self, input_shape, output_shape):
self.input_shape = input_shape
self.output_shape = output_shape
assert np.prod(self.input_shape) == np.prod(self.output_shape)
print('\tFlatten layer with input shape %s, output shape %s.' % (str(self.input_shape), str(self.output_shape)))
def forward(self, input):
assert list(input.shape[1:]) == list(self.input_shape)
# matconvnet feature map dim: [N, height, width, channel]
# ours feature map dim: [N, channel, height, width]
self.input = np.transpose(input, [0, 2, 3, 1])
"""将每个batch都展平成self.output_shape的样子"""
self.output = self.input.reshape([self.input.shape[0]] + list(self.output_shape))
show_matrix(self.output, 'flatten out ')
return self.output
def backward(self, top_diff):
assert list(top_diff.shape[1:]) == list(self.output_shape)
top_diff = np.transpose(top_diff, [0, 3, 1, 2])
bottom_diff = top_diff.reshape([top_diff.shape[0]] + list(self.input_shape))
show_matrix(bottom_diff, 'flatten d_h ')
return bottom_diff

后面会记录一下怎么对卷积操作加速