本文最后更新于 2021年5月9日 晚上
层和块 一个简单的MLP构建代码:
1 2 3 4 5 6 7 8 import torchfrom torch import nnfrom torch.nn import functional as F net = nn.Sequential(nn.Linear(20 , 256 ), nn.ReLU(), nn.Linear(256 , 10 )) X = torch.rand(2 , 20 ) net(X)
nn.Sequential定义了一种特殊的Module
自定义块
1 2 3 4 5 6 7 8 class MLP (nn.Module): def __init__ (self ): super ().__init__() self.hidden = nn.Linear(20 , 256 ) self.out = nn.Linear(256 , 10 ) def forward (self, X ): return self.out(F.relu(self.hidden(X)))
使用这个类,先实例化MLP的层,然后在每次调用正向传播函数时调用这些层,
顺序块
1 2 3 4 5 6 7 8 9 10 11 12 13 class MySequential (nn.Module): def __init__ (self, *args ): super ().__init__() for block in args: self._modules[block] = block def forward (self, X ): for block in self._modules.values(): X = block(X) return X net = MySequential(nn.Linear(20 , 256 ), nn.ReLU(), nn.Linear(256 , 10 )) net(X)
在正向传播函数中执行代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 class FixedHiddenMLP (nn.Module): def __init__ (self ): super ().__init__() self.rand_weight = torch.rand((20 , 20 ), requires_grad=False ) self.linear = nn.Linear(20 , 20 ) def forward (self, X ): X = self.linear(X) X = F.relu(torch.mm(X, self.rand_weight) + 1 ) X = self.linear(X) while X.abs ().sum () > 1 : X /= 2 return X.sum () net = FixedHiddenMLP() net(X)
可以随意混搭嵌套使用
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 class FixedHiddenMLP (nn.Module): def __init__ (self ): super ().__init__() self.rand_weight = torch.rand((20 , 20 ), requires_grad=False ) self.linear = nn.Linear(20 , 20 ) def forward (self, X ): X = self.linear(X) X = F.relu(torch.mm(X, self.rand_weight) + 1 ) X = self.linear(X) while X.abs ().sum () > 1 : X /= 2 return X.sum () net = FixedHiddenMLP() net(X)
参数管理 单隐藏层的MLP
1 2 3 4 5 6 import torchfrom torch import nn net = nn.Sequential(nn.Linear(4 , 8 ), nn.ReLU(), nn.Linear(8 , 1 )) X = torch.rand(size=(2 , 4 )) net(X)
参数访问
1 2 3 print (net[2 ].state_dict())
索引访问最后的输出层的参数
访问具体的参数
1 2 3 4 5 print (type (net[2 ].bias))print (net[2 ].bias)print (net[2 ].weight) print (net[2 ].bias.data) print (net[2 ].weight.grad)
一次性访问所有参数
1 2 print (*[(name, param.shape) for name, param in net[0 ].named_parameters()])print (*[(name, param.shape) for name, param in net.named_parameters()])
或者这样访问参数
1 net.state_dict()['2.bias' ].data
从嵌套块收集参数
1 2 3 4 5 6 7 8 9 10 11 12 def block1 (): return nn.Sequential(nn.Linear(4 , 8 ), nn.ReLU(), nn.Linear(8 , 4 ), nn.ReLU())def block2 (): net = nn.Sequential() for i in range (4 ): net.add_module(f'block {i} ' , block1()) return net rgnet = nn.Sequential(block2(), nn.Linear(4 , 1 )) rgnet(X)
内置初始化
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 def init_normal (m ): if type (m) == nn.Linear: nn.init.normal_(m.weight, mean=0 , std=0.01 ) nn.init.zeros_(m.bias) net.apply(init_normal) net[0 ].weight.data[0 ], net[0 ].bias.data[0 ]def init_constant (m ): if type (m) == nn.Linear: nn.init.constant_(m.weight, 1 ) nn.init.zeros_(m.bias) net.apply(init_constant) net[0 ].weight.data[0 ], net[0 ].bias.data[0 ]
对某些块应用不同的初始化方法
1 2 3 4 5 6 7 8 9 10 11 12 def xavier (m ): if type (m) == nn.Linear: nn.init.xavier_uniform_(m.weight)def init_42 (m ): if type (m) == nn.Linear: nn.init.constant_(m.weight, 42 ) net[0 ].apply(xavier) net[2 ].apply(init_42)print (net[0 ].weight.data[0 ])print (net[2 ].weight.data)
自定义初始化
1 2 3 4 5 6 7 8 9 10 def my_init (m ): if type (m) == nn.Linear: print ( "Init" , *[(name, param.shape) for name, param in m.named_parameters()][0 ]) nn.init.uniform_(m.weight, -10 , 10 ) m.weight.data *= m.weight.data.abs () >= 5 net.apply(my_init) net[0 ].weight[:2 ]
更暴力的方法初始化参数
1 2 3 net[0 ].weight.data[:] += 1 net[0 ].weight.data[0 , 0 ] = 42 net[0 ].weight.data[0 ]
参数绑定
1 2 3 4 5 6 7 shared = nn.Linear(8 , 8 ) net = nn.Sequential(nn.Linear(4 , 8 ), nn.ReLU(), shared, nn.ReLU(), shared, nn.ReLU(), nn.Linear(8 , 1 )) net(X)print (net[2 ].weight.data[0 ] == net[4 ].weight.data[0 ]) net[2 ].weight.data[0 , 0 ] = 100 print (net[2 ].weight.data[0 ] == net[4 ].weight.data[0 ])
自定义层 构造一个没有任何参数的自定义层
1 2 3 4 5 6 7 8 9 10 11 12 13 import torchimport torch.nn.functional as Ffrom torch import nnclass CenteredLayer (nn.Module): def __init__ (self ): super ().__init__() def forward (self, X ): return X - X.mean() layer = CenteredLayer() layer(torch.FloatTensor([1 , 2 , 3 , 4 , 5 ]))
将层作为组件合并到构建更复杂的模型中
1 2 3 4 net = nn.Sequential(nn.Linear(8 , 128 ), CenteredLayer()) Y = net(torch.rand(4 , 8 )) Y.mean()
带参数的图层
1 2 3 4 5 6 7 8 9 10 11 12 class MyLinear (nn.Module): def __init__ (self, in_units, units ): super ().__init__() self.weight = nn.Parameter(torch.randn(in_units, units)) self.bias = nn.Parameter(torch.randn(units,)) def forward (self, X ): linear = torch.matmul(X, self.weight.data) + self.bias.data return F.relu(linear) dense = MyLinear(5 , 3 ) dense.weight
使用自定义层直接执行正向传播计算
使用自定义层构建模型
1 2 net = nn.Sequential(MyLinear(64 , 8 ), MyLinear(8 , 1 )) net(torch.rand(2 , 64 ))
读写文件 加载和保存张量
1 2 3 4 5 6 7 8 9 import torchfrom torch import nnfrom torch.nn import functional as F x = torch.arange(4 ) torch.save(x, 'x-file' ) x2 = torch.load("x-file" ) x2
存储一个张量列表,然后把它们读回内存
1 2 3 4 y = torch.zeros(4 ) torch.save([x, y], 'x-files' ) x2, y2 = torch.load('x-files' ) (x2, y2)
写入或读取从字符串映射到张量的字典
1 2 3 4 mydict = {'x' : x, 'y' : y} torch.save(mydict, 'mydict' ) mydict2 = torch.load('mydict' ) mydict2
加载和保存模型参数
1 2 3 4 5 6 7 8 9 10 11 12 class MLP (nn.Module): def __init__ (self ): super ().__init__() self.hidden = nn.Linear(20 , 256 ) self.output = nn.Linear(256 , 10 ) def forward (self, x ): return self.output(F.relu(self.hidden(x))) net = MLP() X = torch.randn(size=(2 , 20 )) Y = net(X)
将模型的参数存储为一个叫做“mlp.params”的文件
1 torch.save(net.state_dict(), 'mlp.params' )
实例化了原始多层感知机模型的一个备份。 直接读取文件中存储的参数
1 2 3 clone = MLP() clone.load_state_dict(torch.load("mlp.params" )) clone.eval ()