https://zh.d2l.ai/chapter_deep-learning-computation/custom-layer.html
你好。我发现 named_parameters() 方法会自动捕捉到 MyLinear 类中定义的 weight 和 bias 参数, 请问 PyTorch 是如何发现这些参数的呢?
因为你使用了 pytorch的接口函数,比如 nn.Parameter nn.Linera nn.conv2d …
猜测是Module的相关方法,会分析类中类型为Parameter类型的参数,把这些成员标记为该module的参数,key值就是成员名。
记得应该是在nn.Module里面的__setattr__里面有,会对这些东西自动分类并放到模型的OrderedDict里
第一问,不知道对不对,请大家指正。
class sumyk(nn.Module):
def init(self,xi,xj):
super().init()
self.weight = nn.Parameter(torch.randn(xi.shape[1],xj.shape[0]))
def forward(self,xi,xj):
return torch.matmul(torch.matmul(xi,self.weight.data),xj)
第二问求助
我觉得是nn.module捕捉的,parameter只不过data和梯度值写入了一下
一个可能正确的实现
题1
class DimensionReduction(nn.Module):
def __init__(self, i, j, k):
super(DimensionReduction, self).__init__()
self.net = nn.Conv2d(in_channels=1, out_channels=k, kernel_size=(i, j))
def forward(self, X, Y):
# 先用X和Y做矩阵乘法构成i*j矩阵,
# 再用卷积层快捷地实现计算功能
matrix = torch.bmm(x, torch.transpose(y, 1, 2))
matrix = matrix.unsqueeze(1) # B*1*i*j
return self.net(matrix) # B*5*i*j
myNet1 = DimensionReduction(2, 3, 5)
x = torch.ones(1, 2, 1) # B*i*1
y = torch.rand(1, 3, 1) # B*j*1
print(myNet1(x, y))
题2
class HalfFFT(nn.Module):
def __init__(self):
super(HalfFFT, self).__init__()
def forward(self, X):
"""
Compute FFT and return half of it
:param X: size = B*L
:return: size = B*round(L/2)
"""
half_len = round(X.shape[1]/2)
X_f = torch.fft.fft(X)
return X_f[:, :half_len]
myNet2 = HalfFFT()
print(myNet2(torch.rand(2, 3)))
题一你再看下公式,你的实现和公式完全不一样
y = torch.sum(torch.mm(x.T,x)*w,dim=[1,2])
是因为weight和bias是nn.Parameter类的实例
#练习1可能的答案
class DemensionReduction(nn.Module):
def init(self,in_units, units):
super().init()
self.weight = nn.Parameter(torch.randn(in_units*in_units, units))
def forward(self, X):
Y = torch.matmul(X[0,:].reshape(-1,1),X[0,:].reshape(1,-1)).reshape(1,-1)
Z = torch.matmul(Y,self.weight)
for i in range(1,X.shape[0]):
Y = torch.matmul(X[i,:].reshape(-1,1),X[i,:].reshape(1,-1)).reshape(1,-1)
Z = torch.cat((Z,torch.matmul(Y,self.weight)),0)
return Z
dem = DemensionReduction(20,1)
X = torch.randn(5,20)
dem(X)
class Layer(nn.Module):
def init(self,k,i,j):
super(Layer, self).init()
self.weight=torch.randn(k,i,j)
def forward(self,xi,xj):
return torch.sum(torch.matmul(self.weight,torch.matmul(xi,xj),))
k,i,j=5,2,2
net=Layer(k,i,j)
print(net(torch.randn(2).reshape(2,1),torch.randn(2).reshape(1,2)))
不知道是否正确
这个可以看 python 元编程的内容,在没有看源码的情况下,猜测应该使用了 Python 的 MetaClass。
练习1
class DRLayer(nn.Module):
def init(self,i,k):
super().init()
self.k = k
self.weight = nn.Parameter(torch.randn((k,i,i)))
def forward(self,X):
z = torch.ones((1,self.k))
for i in range(X.shape[0]):
y = torch.matmul(X[i,:].reshape(-1,1),X[i,:].reshape(1,-1))
tmp_z = torch.matmul(self.weight,y).sum(axis=[1,2]).reshape(1,-1)
z = torch.cat([z,tmp_z],0)
return z[1:]
net = DRLayer(5,2)
print(net(torch.rand(4,5)).shape)
练习1:
class MyLinear(nn.Module):
def init(self,in_units,out_units):
super().init()
self.weight = nn.Parameter(torch.randn(out_units,in_units,in_units))
def forward(self,X):
return X.T@self.weight.data@X
net = nn.Sequential(MyLinear(3,2))
X = torch.randn(3,1)
net(X)
不是很确定实现是否正确,欢迎讨论和指正!
Q1:
class TensorReduction(nn.Module):
def __init__(self, dim1, dim2):
super(TensorReduction, self).__init__()
self.weight = nn.Parameter(torch.rand(dim2, dim1, dim1))
def forward(self, X):
Y = torch.zeros(X.shape[0], self.weight.shape[0])
for k in range(self.weight.shape[0]):
temp = torch.matmul(X, self.weight[k]) @ X.T
Y[:, k] = temp.diagonal()
return Y
layer = TensorReduction(10, 5)
print(layer(torch.rand(2, 10)).shape)
Q2:
class Fourier(nn.Module):
def __init__(self):
super(Fourier, self).__init__()
self.F = torch.fft.fft
def forward(self, X):
return self.F(X)[:, :round(X.shape[1] / 2)]
layer = Fourier()
print(layer(torch.ones(2, 4)))
请问在最后一个使用自定义层构建模型中输出的结果为什么始终都是tensor ( [ [ 0. ] , [ 0. ] ] )呢 谢谢大家!
至少实现了降维
Q1:
class reduction(nn.Module):
def __init__(self,i,j,k):
super().__init__()
self.weight=nn.Parameter(torch.ones(i,j,k))
def forward(self,X):
return torch.matmul(X,self.weight.data).sum(axis=[0,1])
net=reduction(3,3,3)
net(torch.ones(3,3)),torch.ones(3,3)
matmul的规则试了半天,终于整出了这个不靠循环只靠矩阵运算而能够实现的版本。
class DeDim(nn.Module):
def __init__(self, in_dim1, in_dim2, out_dim):
super().__init__()
self.weight = nn.Parameter(torch.randn(in_dim1, in_dim2, out_dim))
def forward(self, X, Y):
Xw = torch.matmul(self.weight.transpose(0, -1), X.T).transpose(0, -1)
YXw = torch.matmul(Y, Xw)
return YXw
DoubleLinear = DeDim(3, 4, 5)
X = torch.randn(2, 3)
Y = torch.randn(2, 4)
print(DoubleLinear(X, Y))
啊,这个不对。这个是z_k = W_ijk X_i Y_j,而不是y_k = W_ijk X_i X_j。我再想想。