自定义层

看看这个答案是不是对的。这个答案有个疑问,如果输入的X维度是(in_dim, narray),则输出维度为(narray, narray, out_dim),前面两个维度是必须的吗?

import torch
from torch import nn

class DeDim(nn.Module):
    def __init__(self, in_dim, out_dim):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_dim, in_dim, out_dim))

    def forward(self, X):
        Xw = torch.matmul(self.weight.transpose(0, -1), X.T).transpose(0, -1)
        XXw = torch.matmul(X, Xw)
        return XXw


DoubleLinear = DeDim(3, 5)

X = torch.randn(2, 3)
print(DoubleLinear(X))

class Reduction(nn.Module):

def __init__(self,k,i,j):

    super().__init__()

    self.k = k

    self.weight=nn.Parameter(torch.rand(k,i,j))

def forward(self,X):

    y  = torch.zeros(self.k)

    matrix = torch.mm(X.sum(axis=1).unsqueeze(1),X.sum(axis=0).unsqueeze(1).T)

    for i in range(self.k):

        y[i] = torch.mul(matrix,self.weight[i]).sum()

   

    return y

在nn.parameter.Parameter 的官方文档中有说过,如果你将Parameter()最为成员变量,他们会自动的添加到参数列表中。原文详见https://pytorch.org/docs/stable/generated/torch.nn.parameter.Parameter.html#torch.nn.parameter.Parameter

1 Like
class MyLinear(nn.Module):
    def __init__(self, in_units, units):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units, units))
        self.bias = nn.Parameter(torch.randn(units,))
    def forward(self, X):
        linear = torch.matmul(X, self.weight.data) + self.bias.data
        return F.relu(linear)

您好,在原文中的MyLinear 类中的 forward 方法实现为什么使用的是 self.weight.data 而不是 self.weight ? 这样的写法是否会导致无法使用 backward() 的情况?

题一感觉可以这样捏

class mynet(nn.Module):
    def __init__(self, in_dim, dim):
        super().__init__()
        self.weight = nn.Parameter(torch.rand(in_dim, dim))
    def forward(self, x):
        x1 = x.unsqueeze(1)
        x2 = x.unsqueeze(1).repeat(1, x.shape[1], 1).transpose(1, 2)
        x3 = (x1 * x2).reshape(-1, x.shape[1]*x.shape[1])
        return torch.matmul(x3, self.weight)

第一题想到的最简洁方法:

class Cal_dim(nn.Module):
    def __init__(self, in_units, units):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units, in_units, units))

    def forward(self, X):
        return torch.sum(X.transpose(0,-1)*X*net.weight.data, dim=(1,2))

net = Cal_dim(5, 3)
net(X)

第一问尝试实现:

class my_net(nn.Module):
    def __init__(self,dim):
        super().__init__()
        self.weight=nn.Parameter(torch.randn(dim,dim))

    def forward(self,X):
        X=X@self.weight*X
        return X.sum(axis=1).reshape(-1,1)

torch.einsum(‘i, j, ijk → k’, X, X, W)

分享一下我的第一题的实现,是用爱因斯坦求和约定写的(不然感觉还挺麻烦的写起来)

class TesnorReduceLayer(nn.Module):
    def __init__(self, in_features1, in_features2, out_features):
        super().__init__()
        
        self.weight = nn.Parameter(
            torch.randn(in_features1, in_features2, out_features)
        )
    
    def forward(self, X):
        # X:  (b, x, y, any)
        # Xi: (b, x, y, any) => (b, m, y, any)
        # Xj: (b, y, x, any) => (b, n, x, any)
        # W:  (x, y, h)
        Xi = X
        Xj = X.transpose(1, 2)
        out_withdim_ij = torch.einsum('xyk, bmyc, bnxc->bmnkc', self.weight, Xi, Xj)
        out = out_withdim_ij.sum(axis=[1, 2])
        return out

X = torch.rand(100, 42, 67, 5)
layer = TesnorReduceLayer(42, 67, 6)
layer(X).shape

然后是通用版本:

class TesnorReduceLayer_General(nn.Module):
    def __init__(self, out_features):
        super().__init__()
        
        # 权重参数
        self.weight = None
        # 输出维度
        self.out_features = out_features
        # 额外维度的einsum表达式记录   
        self.einsum_c_str = "xyk, bmyc, bnxc->bmnkc"
        self.einsum_str_lis = "cdefghijlopqrstuvwz"  # 预留了 x, y, k, m, n, b
    
    def initParam(self, X):
        # X: (b, x, y, any)
        assert 0 <= len(X.shape) - 3 <= len(self.einsum_str_lis)
        x = X.shape[1]
        y = X.shape[2]
        self.weight = nn.Parameter(
            torch.randn(x, y, self.out_features)
        )
        self.einsum_c_str = self.einsum_c_str.replace('c', self.einsum_str_lis[:len(X.shape) - 3])
    
    def forward(self, X):
        # X:  (b, x, y, any)
        # Xi: (b, x, y, any) => (b, m, y, any)
        # Xj: (b, y, x, any) => (b, n, x, any)
        # W:  (x, y, k)
        if self.weight is None:
            self.initParam(X)
        Xi = X
        Xj = X.transpose(1, 2)
        out_withdim_ij = torch.einsum(self.einsum_c_str, self.weight, Xi, Xj)
        out = out_withdim_ij.sum(axis=[1, 2])
        return out

X = torch.rand(100, 42, 67, 5)
layer = TesnorReduceLayer_General(6)
print(layer(X).shape)

Y = torch.rand(64, 21, 13, 2, 3, 4, 5, 6)
layer = TesnorReduceLayer_General(6)
print(layer(Y).shape)

Z = torch.rand(32, 12, 11, 2, 3, 4, 5, 6)
layer = TesnorReduceLayer_General(6)
print(layer(Z).shape)
1 Like

啊,我运行了很多次不全是零啊,多运行几次试试看

image

结合您的答案,我写了一个简化版的:

class TesnorReduceLayer(nn.Module):
    def __init__(self, in_features1, in_features2, out_features):
        super().__init__()
        ### parpameter init
        self.weight = nn.Parameter(
            torch.randn(in_features1, in_features2, out_features)
        )
    def forward(self, x):
        out = torch.einsum('ijk, i, j -> ijk', self.weight,x,x).sum(axis=[0,1])
        return out
x = torch.rand(42)
layer = TesnorReduceLayer(1, 42, 7)
layer(x)

Question1

实现了两个版本,一个版本不使用循环(向量化编程),另一个版本使用循环。

Code:

import torch
from torch import nn


class Layer_no_loop(nn.Module):
    def __init__(self, in_units, out_units) -> None:
        super().__init__()
        torch.manual_seed(42)
        self.weight = nn.Parameter(torch.randn(in_units, in_units, out_units))
    
    def forward(self, x : torch.Tensor):
        X = x.view(-1, 1) @ x.view(1, -1)
        return (self.weight.permute(2, 0, 1) * X).sum(dim=(1,2))

class Layer_loop(nn.Module):
    def __init__(self, in_units, out_units) -> None:
        super().__init__()
        torch.manual_seed(42)
        self.weight = nn.Parameter(torch.randn(in_units, in_units, out_units))
        
    def forward(self, x : torch.Tensor):
        y = torch.zeros(self.weight.shape[2])
        for k in range(self.weight.shape[2]):
            for i in range(self.weight.shape[0]):
                for j in range(self.weight.shape[1]):
                    y[k] += self.weight[i, j, k] * x[i] * x[j]
        return y

# test

import time

in_units, out_units = 50, 5
layer_no_loop = Layer_no_loop(in_units, out_units)
layer_loop = Layer_loop(in_units, out_units)

torch.manual_seed(42)
x = torch.randn(in_units)

t1 = time.time()
y1 = layer_no_loop(x)
t2 = time.time()
print(f'No loop version took {(t2 - t1):.5f} seconds')

t1 = time.time()
y2 = layer_loop(x)
t2 = time.time()
print(f'Loop version took {(t2 - t1):.5f} seconds')

print(x.shape)
print(y1.shape)
print(y2.shape)
print(y1)
print(y2)

Output results:

No loop version took 0.00033 seconds
Loop version took 0.18843 seconds
torch.Size([50])
torch.Size([5])
torch.Size([5])
tensor([-39.1317,  57.6817, 104.2714, -38.5032, 135.5675],
       grad_fn=<SumBackward1>)
tensor([-39.1317,  57.6818, 104.2715, -38.5032, 135.5675],
       grad_fn=<CopySlices>)
"""Q1"""
class DimReduction(nn.Module):
    def __init__(self, dim1, dim2):
        super().__init__()
        self.dim1 = dim1
        self.dim2 = dim2
        self.weight = nn.Parameter(torch.randn(dim2, dim1, dim1))
        
    def forward(self, X):
        # X is 2D (n, dim1)
        X = X.reshape(-1, 1, self.dim1, 1)
        X_dr = (X @ X.mT * self.weight).sum((-2, -1)).reshape(-1, self.dim2)
        return X_dr

dim1, dim2 = 8, 2

dr = DimReduction(dim1, dim2)
W = dr.weight.data
print(W.shape, W.sum((-2, -1)))

X = torch.tensor([[1] * dim1,[2] * dim1])
print(X)
Y = dr(X)
print(Y)

xw = sum(xi * wi) = avg(xi * wi) * n, i=range(n)
E(xw) = nE(avg(xi * wi) ) = nE(x*w) = nExEw
Ew=0→E(xw) =0
大数定律,n(维度)越大,xwE(xw)

linear = torch.matmul(X, self.weight.data) + self.bias.data
这里应该不需要.data
这种访问方式不会更新计算图,导致参数得不到更新

第一问(可能正确的版本):
class sumyk(nn.Module):
def init(self,i,k):
super().init()
self.weight = nn.Parameter(torch.randn(k,i,i))
self.bias = nn.Parameter(torch.zeros(k,))
def forward(self,X,k):
Xk = torch.matmul(X.T,X)
Xx = Xk.unsqueeze(0).repeat(k,1,1)
y = torch.mul(Xx,self.weight.data)
yk = torch.sum(y,dim=[1,2])+self.bias.data
return yk

第一题正确答案

class DimReductionLayer(nn.Module):
    def __init__(self,in_units,units):
        super().__init__()
        self.weight=nn.Parameter(torch.rand(units,in_units,in_units))
    def forward(self,X):
        oriShape=X.shape[:-1]
        ret=torch.empty(*oriShape,self.weight.shape[0]) # 构造一个降维后的数据
        for i in range(self.weight.shape[0]): #计算每列元素,其为一个和
            ret[...,i]=(torch.matmul(X.unsqueeze(-1),X.unsqueeze(-2))*self.weight[i]).sum(dim=(-1,-2))
        return ret
X=torch.rand(10,6,8,20)
net=DimReductionLayer(20,5)
net(X).shape