自定义层

看看这个答案是不是对的。这个答案有个疑问,如果输入的X维度是(in_dim, narray),则输出维度为(narray, narray, out_dim),前面两个维度是必须的吗?

import torch
from torch import nn

class DeDim(nn.Module):
    def __init__(self, in_dim, out_dim):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_dim, in_dim, out_dim))

    def forward(self, X):
        Xw = torch.matmul(self.weight.transpose(0, -1), X.T).transpose(0, -1)
        XXw = torch.matmul(X, Xw)
        return XXw


DoubleLinear = DeDim(3, 5)

X = torch.randn(2, 3)
print(DoubleLinear(X))

class Reduction(nn.Module):

def __init__(self,k,i,j):

    super().__init__()

    self.k = k

    self.weight=nn.Parameter(torch.rand(k,i,j))

def forward(self,X):

    y  = torch.zeros(self.k)

    matrix = torch.mm(X.sum(axis=1).unsqueeze(1),X.sum(axis=0).unsqueeze(1).T)

    for i in range(self.k):

        y[i] = torch.mul(matrix,self.weight[i]).sum()

   

    return y

在nn.parameter.Parameter 的官方文档中有说过,如果你将Parameter()最为成员变量,他们会自动的添加到参数列表中。原文详见https://pytorch.org/docs/stable/generated/torch.nn.parameter.Parameter.html#torch.nn.parameter.Parameter

1 Like
class MyLinear(nn.Module):
    def __init__(self, in_units, units):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units, units))
        self.bias = nn.Parameter(torch.randn(units,))
    def forward(self, X):
        linear = torch.matmul(X, self.weight.data) + self.bias.data
        return F.relu(linear)

您好,在原文中的MyLinear 类中的 forward 方法实现为什么使用的是 self.weight.data 而不是 self.weight ? 这样的写法是否会导致无法使用 backward() 的情况?

题一感觉可以这样捏

class mynet(nn.Module):
    def __init__(self, in_dim, dim):
        super().__init__()
        self.weight = nn.Parameter(torch.rand(in_dim, dim))
    def forward(self, x):
        x1 = x.unsqueeze(1)
        x2 = x.unsqueeze(1).repeat(1, x.shape[1], 1).transpose(1, 2)
        x3 = (x1 * x2).reshape(-1, x.shape[1]*x.shape[1])
        return torch.matmul(x3, self.weight)

第一题想到的最简洁方法:

class Cal_dim(nn.Module):
    def __init__(self, in_units, units):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units, in_units, units))

    def forward(self, X):
        return torch.sum(X.transpose(0,-1)*X*net.weight.data, dim=(1,2))

net = Cal_dim(5, 3)
net(X)

第一问尝试实现:

class my_net(nn.Module):
    def __init__(self,dim):
        super().__init__()
        self.weight=nn.Parameter(torch.randn(dim,dim))

    def forward(self,X):
        X=X@self.weight*X
        return X.sum(axis=1).reshape(-1,1)

torch.einsum(‘i, j, ijk → k’, X, X, W)

分享一下我的第一题的实现,是用爱因斯坦求和约定写的(不然感觉还挺麻烦的写起来)

class TesnorReduceLayer(nn.Module):
    def __init__(self, in_features1, in_features2, out_features):
        super().__init__()
        
        self.weight = nn.Parameter(
            torch.randn(in_features1, in_features2, out_features)
        )
    
    def forward(self, X):
        # X:  (b, x, y, any)
        # Xi: (b, x, y, any) => (b, m, y, any)
        # Xj: (b, y, x, any) => (b, n, x, any)
        # W:  (x, y, h)
        Xi = X
        Xj = X.transpose(1, 2)
        out_withdim_ij = torch.einsum('xyk, bmyc, bnxc->bmnkc', self.weight, Xi, Xj)
        out = out_withdim_ij.sum(axis=[1, 2])
        return out

X = torch.rand(100, 42, 67, 5)
layer = TesnorReduceLayer(42, 67, 6)
layer(X).shape

然后是通用版本:

class TesnorReduceLayer_General(nn.Module):
    def __init__(self, out_features):
        super().__init__()
        
        # 权重参数
        self.weight = None
        # 输出维度
        self.out_features = out_features
        # 额外维度的einsum表达式记录   
        self.einsum_c_str = "xyk, bmyc, bnxc->bmnkc"
        self.einsum_str_lis = "cdefghijlopqrstuvwz"  # 预留了 x, y, k, m, n, b
    
    def initParam(self, X):
        # X: (b, x, y, any)
        assert 0 <= len(X.shape) - 3 <= len(self.einsum_str_lis)
        x = X.shape[1]
        y = X.shape[2]
        self.weight = nn.Parameter(
            torch.randn(x, y, self.out_features)
        )
        self.einsum_c_str = self.einsum_c_str.replace('c', self.einsum_str_lis[:len(X.shape) - 3])
    
    def forward(self, X):
        # X:  (b, x, y, any)
        # Xi: (b, x, y, any) => (b, m, y, any)
        # Xj: (b, y, x, any) => (b, n, x, any)
        # W:  (x, y, k)
        if self.weight is None:
            self.initParam(X)
        Xi = X
        Xj = X.transpose(1, 2)
        out_withdim_ij = torch.einsum(self.einsum_c_str, self.weight, Xi, Xj)
        out = out_withdim_ij.sum(axis=[1, 2])
        return out

X = torch.rand(100, 42, 67, 5)
layer = TesnorReduceLayer_General(6)
print(layer(X).shape)

Y = torch.rand(64, 21, 13, 2, 3, 4, 5, 6)
layer = TesnorReduceLayer_General(6)
print(layer(Y).shape)

Z = torch.rand(32, 12, 11, 2, 3, 4, 5, 6)
layer = TesnorReduceLayer_General(6)
print(layer(Z).shape)
1 Like

啊,我运行了很多次不全是零啊,多运行几次试试看

image

结合您的答案,我写了一个简化版的:

class TesnorReduceLayer(nn.Module):
    def __init__(self, in_features1, in_features2, out_features):
        super().__init__()
        ### parpameter init
        self.weight = nn.Parameter(
            torch.randn(in_features1, in_features2, out_features)
        )
    def forward(self, x):
        out = torch.einsum('ijk, i, j -> ijk', self.weight,x,x).sum(axis=[0,1])
        return out
x = torch.rand(42)
layer = TesnorReduceLayer(1, 42, 7)
layer(x)