自定义层

看看这个答案是不是对的。这个答案有个疑问,如果输入的X维度是(in_dim, narray),则输出维度为(narray, narray, out_dim),前面两个维度是必须的吗?

import torch
from torch import nn

class DeDim(nn.Module):
    def __init__(self, in_dim, out_dim):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_dim, in_dim, out_dim))

    def forward(self, X):
        Xw = torch.matmul(self.weight.transpose(0, -1), X.T).transpose(0, -1)
        XXw = torch.matmul(X, Xw)
        return XXw


DoubleLinear = DeDim(3, 5)

X = torch.randn(2, 3)
print(DoubleLinear(X))

class Reduction(nn.Module):

def __init__(self,k,i,j):

    super().__init__()

    self.k = k

    self.weight=nn.Parameter(torch.rand(k,i,j))

def forward(self,X):

    y  = torch.zeros(self.k)

    matrix = torch.mm(X.sum(axis=1).unsqueeze(1),X.sum(axis=0).unsqueeze(1).T)

    for i in range(self.k):

        y[i] = torch.mul(matrix,self.weight[i]).sum()

   

    return y

在nn.parameter.Parameter 的官方文档中有说过,如果你将Parameter()最为成员变量,他们会自动的添加到参数列表中。原文详见https://pytorch.org/docs/stable/generated/torch.nn.parameter.Parameter.html#torch.nn.parameter.Parameter

1 Like
class MyLinear(nn.Module):
    def __init__(self, in_units, units):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units, units))
        self.bias = nn.Parameter(torch.randn(units,))
    def forward(self, X):
        linear = torch.matmul(X, self.weight.data) + self.bias.data
        return F.relu(linear)

您好,在原文中的MyLinear 类中的 forward 方法实现为什么使用的是 self.weight.data 而不是 self.weight ? 这样的写法是否会导致无法使用 backward() 的情况?

题一感觉可以这样捏

class mynet(nn.Module):
    def __init__(self, in_dim, dim):
        super().__init__()
        self.weight = nn.Parameter(torch.rand(in_dim, dim))
    def forward(self, x):
        x1 = x.unsqueeze(1)
        x2 = x.unsqueeze(1).repeat(1, x.shape[1], 1).transpose(1, 2)
        x3 = (x1 * x2).reshape(-1, x.shape[1]*x.shape[1])
        return torch.matmul(x3, self.weight)

第一题想到的最简洁方法:

class Cal_dim(nn.Module):
    def __init__(self, in_units, units):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units, in_units, units))

    def forward(self, X):
        return torch.sum(X.transpose(0,-1)*X*net.weight.data, dim=(1,2))

net = Cal_dim(5, 3)
net(X)

第一问尝试实现:

class my_net(nn.Module):
    def __init__(self,dim):
        super().__init__()
        self.weight=nn.Parameter(torch.randn(dim,dim))

    def forward(self,X):
        X=X@self.weight*X
        return X.sum(axis=1).reshape(-1,1)

torch.einsum(‘i, j, ijk → k’, X, X, W)

分享一下我的第一题的实现,是用爱因斯坦求和约定写的(不然感觉还挺麻烦的写起来)

class TesnorReduceLayer(nn.Module):
    def __init__(self, in_features1, in_features2, out_features):
        super().__init__()
        
        self.weight = nn.Parameter(
            torch.randn(in_features1, in_features2, out_features)
        )
    
    def forward(self, X):
        # X:  (b, x, y, any)
        # Xi: (b, x, y, any) => (b, m, y, any)
        # Xj: (b, y, x, any) => (b, n, x, any)
        # W:  (x, y, h)
        Xi = X
        Xj = X.transpose(1, 2)
        out_withdim_ij = torch.einsum('xyk, bmyc, bnxc->bmnkc', self.weight, Xi, Xj)
        out = out_withdim_ij.sum(axis=[1, 2])
        return out

X = torch.rand(100, 42, 67, 5)
layer = TesnorReduceLayer(42, 67, 6)
layer(X).shape

然后是通用版本:

class TesnorReduceLayer_General(nn.Module):
    def __init__(self, out_features):
        super().__init__()
        
        # 权重参数
        self.weight = None
        # 输出维度
        self.out_features = out_features
        # 额外维度的einsum表达式记录   
        self.einsum_c_str = "xyk, bmyc, bnxc->bmnkc"
        self.einsum_str_lis = "cdefghijlopqrstuvwz"  # 预留了 x, y, k, m, n, b
    
    def initParam(self, X):
        # X: (b, x, y, any)
        assert 0 <= len(X.shape) - 3 <= len(self.einsum_str_lis)
        x = X.shape[1]
        y = X.shape[2]
        self.weight = nn.Parameter(
            torch.randn(x, y, self.out_features)
        )
        self.einsum_c_str = self.einsum_c_str.replace('c', self.einsum_str_lis[:len(X.shape) - 3])
    
    def forward(self, X):
        # X:  (b, x, y, any)
        # Xi: (b, x, y, any) => (b, m, y, any)
        # Xj: (b, y, x, any) => (b, n, x, any)
        # W:  (x, y, k)
        if self.weight is None:
            self.initParam(X)
        Xi = X
        Xj = X.transpose(1, 2)
        out_withdim_ij = torch.einsum(self.einsum_c_str, self.weight, Xi, Xj)
        out = out_withdim_ij.sum(axis=[1, 2])
        return out

X = torch.rand(100, 42, 67, 5)
layer = TesnorReduceLayer_General(6)
print(layer(X).shape)

Y = torch.rand(64, 21, 13, 2, 3, 4, 5, 6)
layer = TesnorReduceLayer_General(6)
print(layer(Y).shape)

Z = torch.rand(32, 12, 11, 2, 3, 4, 5, 6)
layer = TesnorReduceLayer_General(6)
print(layer(Z).shape)
1 Like

啊,我运行了很多次不全是零啊,多运行几次试试看

image

结合您的答案,我写了一个简化版的:

class TesnorReduceLayer(nn.Module):
    def __init__(self, in_features1, in_features2, out_features):
        super().__init__()
        ### parpameter init
        self.weight = nn.Parameter(
            torch.randn(in_features1, in_features2, out_features)
        )
    def forward(self, x):
        out = torch.einsum('ijk, i, j -> ijk', self.weight,x,x).sum(axis=[0,1])
        return out
x = torch.rand(42)
layer = TesnorReduceLayer(1, 42, 7)
layer(x)

Question1

实现了两个版本,一个版本不使用循环(向量化编程),另一个版本使用循环。

Code:

import torch
from torch import nn


class Layer_no_loop(nn.Module):
    def __init__(self, in_units, out_units) -> None:
        super().__init__()
        torch.manual_seed(42)
        self.weight = nn.Parameter(torch.randn(in_units, in_units, out_units))
    
    def forward(self, x : torch.Tensor):
        X = x.view(-1, 1) @ x.view(1, -1)
        return (self.weight.permute(2, 0, 1) * X).sum(dim=(1,2))

class Layer_loop(nn.Module):
    def __init__(self, in_units, out_units) -> None:
        super().__init__()
        torch.manual_seed(42)
        self.weight = nn.Parameter(torch.randn(in_units, in_units, out_units))
        
    def forward(self, x : torch.Tensor):
        y = torch.zeros(self.weight.shape[2])
        for k in range(self.weight.shape[2]):
            for i in range(self.weight.shape[0]):
                for j in range(self.weight.shape[1]):
                    y[k] += self.weight[i, j, k] * x[i] * x[j]
        return y

# test

import time

in_units, out_units = 50, 5
layer_no_loop = Layer_no_loop(in_units, out_units)
layer_loop = Layer_loop(in_units, out_units)

torch.manual_seed(42)
x = torch.randn(in_units)

t1 = time.time()
y1 = layer_no_loop(x)
t2 = time.time()
print(f'No loop version took {(t2 - t1):.5f} seconds')

t1 = time.time()
y2 = layer_loop(x)
t2 = time.time()
print(f'Loop version took {(t2 - t1):.5f} seconds')

print(x.shape)
print(y1.shape)
print(y2.shape)
print(y1)
print(y2)

Output results:

No loop version took 0.00033 seconds
Loop version took 0.18843 seconds
torch.Size([50])
torch.Size([5])
torch.Size([5])
tensor([-39.1317,  57.6817, 104.2714, -38.5032, 135.5675],
       grad_fn=<SumBackward1>)
tensor([-39.1317,  57.6818, 104.2715, -38.5032, 135.5675],
       grad_fn=<CopySlices>)
"""Q1"""
class DimReduction(nn.Module):
    def __init__(self, dim1, dim2):
        super().__init__()
        self.dim1 = dim1
        self.dim2 = dim2
        self.weight = nn.Parameter(torch.randn(dim2, dim1, dim1))
        
    def forward(self, X):
        # X is 2D (n, dim1)
        X = X.reshape(-1, 1, self.dim1, 1)
        X_dr = (X @ X.mT * self.weight).sum((-2, -1)).reshape(-1, self.dim2)
        return X_dr

dim1, dim2 = 8, 2

dr = DimReduction(dim1, dim2)
W = dr.weight.data
print(W.shape, W.sum((-2, -1)))

X = torch.tensor([[1] * dim1,[2] * dim1])
print(X)
Y = dr(X)
print(Y)

xw = sum(xi * wi) = avg(xi * wi) * n, i=range(n)
E(xw) = nE(avg(xi * wi) ) = nE(x*w) = nExEw
Ew=0→E(xw) =0
大数定律,n(维度)越大,xwE(xw)

linear = torch.matmul(X, self.weight.data) + self.bias.data
这里应该不需要.data
这种访问方式不会更新计算图,导致参数得不到更新

第一问(可能正确的版本):
class sumyk(nn.Module):
def init(self,i,k):
super().init()
self.weight = nn.Parameter(torch.randn(k,i,i))
self.bias = nn.Parameter(torch.zeros(k,))
def forward(self,X,k):
Xk = torch.matmul(X.T,X)
Xx = Xk.unsqueeze(0).repeat(k,1,1)
y = torch.mul(Xx,self.weight.data)
yk = torch.sum(y,dim=[1,2])+self.bias.data
return yk

第一题正确答案

class DimReductionLayer(nn.Module):
    def __init__(self,in_units,units):
        super().__init__()
        self.weight=nn.Parameter(torch.rand(units,in_units,in_units))
    def forward(self,X):
        oriShape=X.shape[:-1]
        ret=torch.empty(*oriShape,self.weight.shape[0]) # 构造一个降维后的数据
        for i in range(self.weight.shape[0]): #计算每列元素,其为一个和
            ret[...,i]=(torch.matmul(X.unsqueeze(-1),X.unsqueeze(-2))*self.weight[i]).sum(dim=(-1,-2))
        return ret
X=torch.rand(10,6,8,20)
net=DimReductionLayer(20,5)
net(X).shape
  1. 设计一个接受输入并计算张量降维的层,它返回$y_k = \sum_{i, j} W_{ijk} x_i x_j$。

    import torch
    from torch import nn
    import torch.nn.functional as F
    
    class QuadraticLayer(nn.Module):
        def __init__(self, input_dim, output_dim):
            super().__init__()
            # 初始化权重参数 W,形状为 (input_dim, input_dim, output_dim)
            self.weight = nn.Parameter(torch.randn(input_dim, input_dim, output_dim))
            # 初始化偏置参数
            self.bias = nn.Parameter(torch.randn(output_dim,))
        
        def forward(self, X):
            
            batch_size = X.shape[0]  # 获取批量大小
            # 扩展 X 以便进行广播计算
            X_i = X.unsqueeze(-1)  # (batch_size, input_dim, 1)
            X_j = X.unsqueeze(-2)  # (batch_size, 1, input_dim)
            # 计算所有组合 x_i * x_j 并与权重 W 相乘
            quadratic_terms = torch.einsum('bij,ijk->bk', X_i * X_j, self.weight)
            # 加上偏置并返回
            return quadratic_terms + self.bias
    
    # 测试
    input_dim, output_dim = 4, 3
    X = torch.rand(2, input_dim)  # 输入维度为 4,批量大小为 2
    layer = QuadraticLayer(input_dim, output_dim)
    output = layer(X)
    print(output.shape)  # 输出形状应为 (2, 3)
    print(output)
    
  2. 设计一个返回输入数据的傅立叶系数前半部分的层。

不是很理解这个变换,信号变到幅度和相位上,反正不是很重要这块。

反正就引入函数,切片保留前半部分就行了。

例如

X = torch.tensor([1.0, 2.0, 3.0, 4.0])
fft_result = torch.fft.fft(X)
print(fft_result)

所以层可以是

class FourierLayer(nn.Module):
   def __init__(self):
        super().__init__()
   
   def forward(self, X):
      fft_result = torch.fft.fft(X)
      half_length = fft_result.size(-1) // 2 + 1
      return fft_result[..., :half_length]

X = torch.rand(1, 8)  # 输入为一行八列的张量
layer = FourierLayer()
output = layer(X)
print("输入: ", X)
print("傅立叶系数前半部分: ", output)

plus:“前半部分傅立叶系数”并不只是指实数部分,而是指傅立叶变换结果中对应于非负频率成分的部分。

所以输出是:

输入: tensor([[0.6467, 0.0874, 0.5268, 0.5647, 0.3713, 0.4820, 0.5223, 0.0772]])

傅立叶系数前半部分: tensor([[ 3.2785+0.0000j, -0.3483-0.0702j, -0.0311+0.0725j, 0.8991-0.0612j, 0.8560+0.0000j]])

简单实现了下,将2维度矩阵X_i_j降维到一维向量y_k

# 设计一个接受输入并计算张量降维的层,它返回$y_k = \sum_{i, j} W_{ijk} x_i x_j$。
import torch
from torch import nn

class TensorDotLayer(nn.Module):
    def __init__(self, x_i, x_j, y_k):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(x_i, x_j, y_k))

    def forward(self, X):
        # 2维矩阵X_i_j降维到y_k维度
        return torch.einsum('ij,ijk->k', X, self.weight)

X = torch.tensor([[1, 2, 4], [4, 5, 6]], dtype=torch.float32)
net = TensorDotLayer(X.shape[0], X.shape[1], 4)
print(net.weight)
y_k = net(X)
print(y_k)