自定义层

thinkerqin · December 9, 2022, 3:15am

看看这个答案是不是对的。这个答案有个疑问，如果输入的X维度是(in_dim, narray)，则输出维度为(narray, narray, out_dim)，前面两个维度是必须的吗？

import torch
from torch import nn

class DeDim(nn.Module):
    def __init__(self, in_dim, out_dim):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_dim, in_dim, out_dim))

    def forward(self, X):
        Xw = torch.matmul(self.weight.transpose(0, -1), X.T).transpose(0, -1)
        XXw = torch.matmul(X, Xw)
        return XXw


DoubleLinear = DeDim(3, 5)

X = torch.randn(2, 3)
print(DoubleLinear(X))

ChenMY · December 11, 2022, 7:37am

class Reduction(nn.Module):

def __init__(self,k,i,j):

    super().__init__()

    self.k = k

    self.weight=nn.Parameter(torch.rand(k,i,j))

def forward(self,X):

    y  = torch.zeros(self.k)

    matrix = torch.mm(X.sum(axis=1).unsqueeze(1),X.sum(axis=0).unsqueeze(1).T)

    for i in range(self.k):

        y[i] = torch.mul(matrix,self.weight[i]).sum()

   

    return y

hunzhizi · February 17, 2023, 1:35pm

在nn.parameter.Parameter 的官方文档中有说过，如果你将Parameter()最为成员变量，他们会自动的添加到参数列表中。原文详见https://pytorch.org/docs/stable/generated/torch.nn.parameter.Parameter.html#torch.nn.parameter.Parameter

hunzhizi · February 17, 2023, 1:43pm

class MyLinear(nn.Module):
    def __init__(self, in_units, units):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units, units))
        self.bias = nn.Parameter(torch.randn(units,))
    def forward(self, X):
        linear = torch.matmul(X, self.weight.data) + self.bias.data
        return F.relu(linear)

您好，在原文中的MyLinear 类中的 forward 方法实现为什么使用的是 self.weight.data 而不是 self.weight ? 这样的写法是否会导致无法使用 backward() 的情况？

dian_xu · March 1, 2023, 8:15am

题一感觉可以这样捏

class mynet(nn.Module):
    def __init__(self, in_dim, dim):
        super().__init__()
        self.weight = nn.Parameter(torch.rand(in_dim, dim))
    def forward(self, x):
        x1 = x.unsqueeze(1)
        x2 = x.unsqueeze(1).repeat(1, x.shape[1], 1).transpose(1, 2)
        x3 = (x1 * x2).reshape(-1, x.shape[1]*x.shape[1])
        return torch.matmul(x3, self.weight)

xiangyi060 · March 13, 2023, 8:45am

第一题想到的最简洁方法：

class Cal_dim(nn.Module):
    def __init__(self, in_units, units):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units, in_units, units))

    def forward(self, X):
        return torch.sum(X.transpose(0,-1)*X*net.weight.data, dim=(1,2))

net = Cal_dim(5, 3)
net(X)

Eureka · March 26, 2023, 7:13am

第一问尝试实现：

class my_net(nn.Module):
    def __init__(self,dim):
        super().__init__()
        self.weight=nn.Parameter(torch.randn(dim,dim))

    def forward(self,X):
        X=X@self.weight*X
        return X.sum(axis=1).reshape(-1,1)

Shiqian_Tan · October 23, 2023, 8:46am

torch.einsum(‘i, j, ijk → k’, X, X, W)

GodExious · December 8, 2023, 2:08pm

分享一下我的第一题的实现，是用爱因斯坦求和约定写的（不然感觉还挺麻烦的写起来）

class TesnorReduceLayer(nn.Module):
    def __init__(self, in_features1, in_features2, out_features):
        super().__init__()
        
        self.weight = nn.Parameter(
            torch.randn(in_features1, in_features2, out_features)
        )
    
    def forward(self, X):
        # X:  (b, x, y, any)
        # Xi: (b, x, y, any) => (b, m, y, any)
        # Xj: (b, y, x, any) => (b, n, x, any)
        # W:  (x, y, h)
        Xi = X
        Xj = X.transpose(1, 2)
        out_withdim_ij = torch.einsum('xyk, bmyc, bnxc->bmnkc', self.weight, Xi, Xj)
        out = out_withdim_ij.sum(axis=[1, 2])
        return out

X = torch.rand(100, 42, 67, 5)
layer = TesnorReduceLayer(42, 67, 6)
layer(X).shape

然后是通用版本：

class TesnorReduceLayer_General(nn.Module):
    def __init__(self, out_features):
        super().__init__()
        
        # 权重参数
        self.weight = None
        # 输出维度
        self.out_features = out_features
        # 额外维度的einsum表达式记录   
        self.einsum_c_str = "xyk, bmyc, bnxc->bmnkc"
        self.einsum_str_lis = "cdefghijlopqrstuvwz"  # 预留了 x, y, k, m, n, b
    
    def initParam(self, X):
        # X: (b, x, y, any)
        assert 0 <= len(X.shape) - 3 <= len(self.einsum_str_lis)
        x = X.shape[1]
        y = X.shape[2]
        self.weight = nn.Parameter(
            torch.randn(x, y, self.out_features)
        )
        self.einsum_c_str = self.einsum_c_str.replace('c', self.einsum_str_lis[:len(X.shape) - 3])
    
    def forward(self, X):
        # X:  (b, x, y, any)
        # Xi: (b, x, y, any) => (b, m, y, any)
        # Xj: (b, y, x, any) => (b, n, x, any)
        # W:  (x, y, k)
        if self.weight is None:
            self.initParam(X)
        Xi = X
        Xj = X.transpose(1, 2)
        out_withdim_ij = torch.einsum(self.einsum_c_str, self.weight, Xi, Xj)
        out = out_withdim_ij.sum(axis=[1, 2])
        return out

X = torch.rand(100, 42, 67, 5)
layer = TesnorReduceLayer_General(6)
print(layer(X).shape)

Y = torch.rand(64, 21, 13, 2, 3, 4, 5, 6)
layer = TesnorReduceLayer_General(6)
print(layer(Y).shape)

Z = torch.rand(32, 12, 11, 2, 3, 4, 5, 6)
layer = TesnorReduceLayer_General(6)
print(layer(Z).shape)

abc_121 · January 31, 2024, 2:49am

啊，我运行了很多次不全是零啊，多运行几次试试看

Landau1994 · April 12, 2024, 8:44am

结合您的答案，我写了一个简化版的：

class TesnorReduceLayer(nn.Module):
    def __init__(self, in_features1, in_features2, out_features):
        super().__init__()
        ### parpameter init
        self.weight = nn.Parameter(
            torch.randn(in_features1, in_features2, out_features)
        )
    def forward(self, x):
        out = torch.einsum('ijk, i, j -> ijk', self.weight,x,x).sum(axis=[0,1])
        return out
x = torch.rand(42)
layer = TesnorReduceLayer(1, 42, 7)
layer(x)

xulongbai · June 7, 2024, 1:43pm

Question1

实现了两个版本，一个版本不使用循环（向量化编程），另一个版本使用循环。

Code:

import torch
from torch import nn


class Layer_no_loop(nn.Module):
    def __init__(self, in_units, out_units) -> None:
        super().__init__()
        torch.manual_seed(42)
        self.weight = nn.Parameter(torch.randn(in_units, in_units, out_units))
    
    def forward(self, x : torch.Tensor):
        X = x.view(-1, 1) @ x.view(1, -1)
        return (self.weight.permute(2, 0, 1) * X).sum(dim=(1,2))

class Layer_loop(nn.Module):
    def __init__(self, in_units, out_units) -> None:
        super().__init__()
        torch.manual_seed(42)
        self.weight = nn.Parameter(torch.randn(in_units, in_units, out_units))
        
    def forward(self, x : torch.Tensor):
        y = torch.zeros(self.weight.shape[2])
        for k in range(self.weight.shape[2]):
            for i in range(self.weight.shape[0]):
                for j in range(self.weight.shape[1]):
                    y[k] += self.weight[i, j, k] * x[i] * x[j]
        return y

# test

import time

in_units, out_units = 50, 5
layer_no_loop = Layer_no_loop(in_units, out_units)
layer_loop = Layer_loop(in_units, out_units)

torch.manual_seed(42)
x = torch.randn(in_units)

t1 = time.time()
y1 = layer_no_loop(x)
t2 = time.time()
print(f'No loop version took {(t2 - t1):.5f} seconds')

t1 = time.time()
y2 = layer_loop(x)
t2 = time.time()
print(f'Loop version took {(t2 - t1):.5f} seconds')

print(x.shape)
print(y1.shape)
print(y2.shape)
print(y1)
print(y2)

Output results:

No loop version took 0.00033 seconds
Loop version took 0.18843 seconds
torch.Size([50])
torch.Size([5])
torch.Size([5])
tensor([-39.1317,  57.6817, 104.2714, -38.5032, 135.5675],
       grad_fn=<SumBackward1>)
tensor([-39.1317,  57.6818, 104.2715, -38.5032, 135.5675],
       grad_fn=<CopySlices>)

LyricsGo · July 2, 2024, 6:16am

"""Q1"""
class DimReduction(nn.Module):
    def __init__(self, dim1, dim2):
        super().__init__()
        self.dim1 = dim1
        self.dim2 = dim2
        self.weight = nn.Parameter(torch.randn(dim2, dim1, dim1))
        
    def forward(self, X):
        # X is 2D (n, dim1)
        X = X.reshape(-1, 1, self.dim1, 1)
        X_dr = (X @ X.mT * self.weight).sum((-2, -1)).reshape(-1, self.dim2)
        return X_dr

dim1, dim2 = 8, 2

dr = DimReduction(dim1, dim2)
W = dr.weight.data
print(W.shape, W.sum((-2, -1)))

X = torch.tensor([[1] * dim1,[2] * dim1])
print(X)
Y = dr(X)
print(Y)

LyricsGo · July 2, 2024, 7:49am

xw = sum(xi * wi) = avg(xi * wi) * n, i=range(n)
E(xw) = nE(avg(xi * wi) ) = nE(x*w) = nExEw
Ew=0→E(xw) =0
大数定律，n（维度）越大，xw ≈ E(xw)

IN0vation · August 18, 2024, 7:54am

linear = torch.matmul(X, self.weight.data) + self.bias.data
这里应该不需要.data
这种访问方式不会更新计算图，导致参数得不到更新

Arya · October 13, 2024, 3:26am

第一问（可能正确的版本）：
class sumyk(nn.Module):
def init(self,i,k):
super().init()
self.weight = nn.Parameter(torch.randn(k,i,i))
self.bias = nn.Parameter(torch.zeros(k,))
def forward(self,X,k):
Xk = torch.matmul(X.T,X)
Xx = Xk.unsqueeze(0).repeat(k,1,1)
y = torch.mul(Xx,self.weight.data)
yk = torch.sum(y,dim=[1,2])+self.bias.data
return yk

xulong_xia · October 29, 2024, 2:33pm

第一题正确答案

class DimReductionLayer(nn.Module):
    def __init__(self,in_units,units):
        super().__init__()
        self.weight=nn.Parameter(torch.rand(units,in_units,in_units))
    def forward(self,X):
        oriShape=X.shape[:-1]
        ret=torch.empty(*oriShape,self.weight.shape[0]) # 构造一个降维后的数据
        for i in range(self.weight.shape[0]): #计算每列元素,其为一个和
            ret[...,i]=(torch.matmul(X.unsqueeze(-1),X.unsqueeze(-2))*self.weight[i]).sum(dim=(-1,-2))
        return ret
X=torch.rand(10,6,8,20)
net=DimReductionLayer(20,5)
net(X).shape

ButuSun · January 13, 2025, 12:37pm

设计一个接受输入并计算张量降维的层，它返回$y_k = \sum_{i, j} W_{ijk} x_i x_j$。

import torch
from torch import nn
import torch.nn.functional as F

class QuadraticLayer(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        # 初始化权重参数 W，形状为 (input_dim, input_dim, output_dim)
        self.weight = nn.Parameter(torch.randn(input_dim, input_dim, output_dim))
        # 初始化偏置参数
        self.bias = nn.Parameter(torch.randn(output_dim,))
    
    def forward(self, X):
        
        batch_size = X.shape[0]  # 获取批量大小
        # 扩展 X 以便进行广播计算
        X_i = X.unsqueeze(-1)  # (batch_size, input_dim, 1)
        X_j = X.unsqueeze(-2)  # (batch_size, 1, input_dim)
        # 计算所有组合 x_i * x_j 并与权重 W 相乘
        quadratic_terms = torch.einsum('bij,ijk->bk', X_i * X_j, self.weight)
        # 加上偏置并返回
        return quadratic_terms + self.bias

# 测试
input_dim, output_dim = 4, 3
X = torch.rand(2, input_dim)  # 输入维度为 4，批量大小为 2
layer = QuadraticLayer(input_dim, output_dim)
output = layer(X)
print(output.shape)  # 输出形状应为 (2, 3)
print(output)

设计一个返回输入数据的傅立叶系数前半部分的层。

不是很理解这个变换，信号变到幅度和相位上，反正不是很重要这块。

反正就引入函数，切片保留前半部分就行了。

例如

X = torch.tensor([1.0, 2.0, 3.0, 4.0])
fft_result = torch.fft.fft(X)
print(fft_result)

所以层可以是

class FourierLayer(nn.Module):
   def __init__(self):
        super().__init__()
   
   def forward(self, X):
      fft_result = torch.fft.fft(X)
      half_length = fft_result.size(-1) // 2 + 1
      return fft_result[..., :half_length]

X = torch.rand(1, 8)  # 输入为一行八列的张量
layer = FourierLayer()
output = layer(X)
print("输入: ", X)
print("傅立叶系数前半部分: ", output)

plus：“前半部分傅立叶系数”并不只是指实数部分，而是指傅立叶变换结果中对应于非负频率成分的部分。

所以输出是：

输入: tensor([[0.6467, 0.0874, 0.5268, 0.5647, 0.3713, 0.4820, 0.5223, 0.0772]])

傅立叶系数前半部分: tensor([[ 3.2785+0.0000j, -0.3483-0.0702j, -0.0311+0.0725j, 0.8991-0.0612j, 0.8560+0.0000j]])

ShadowBearKill · January 20, 2025, 2:49pm

简单实现了下，将2维度矩阵X_i_j降维到一维向量y_k

# 设计一个接受输入并计算张量降维的层，它返回$y_k = \sum_{i, j} W_{ijk} x_i x_j$。
import torch
from torch import nn

class TensorDotLayer(nn.Module):
    def __init__(self, x_i, x_j, y_k):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(x_i, x_j, y_k))

    def forward(self, X):
        # 2维矩阵X_i_j降维到y_k维度
        return torch.einsum('ij,ijk->k', X, self.weight)

X = torch.tensor([[1, 2, 4], [4, 5, 6]], dtype=torch.float32)
net = TensorDotLayer(X.shape[0], X.shape[1], 4)
print(net.weight)
y_k = net(X)
print(y_k)

W.Yun · May 25, 2025, 9:13am

提供一个问题一的方法(欢迎纠错)

class Linear_t1(nn.Module):
    def __init__(self, in_units, units):
        super().__init__()
        self.weight = nn.Parameter(torch.rand(size=(in_units, in_units, units)))
        self.bias = nn.Parameter(torch.zeros(units))

    def forward(self, X):
        self.weight.data *= torch.outer(X, X).reshape(2, -1, 1)

        return self.weight.data.sum(axis=[0, 1])

linear = Linear_t1(2, 3)
X = torch.rand(2)
linear(X)

个人感觉可读性高一些

自定义层

分享一下我的第一题的实现，是用爱因斯坦求和约定写的（不然感觉还挺麻烦的写起来）

第一题正确答案

设计一个接受输入并计算张量降维的层，它返回$y_k = \sum_{i, j} W_{ijk} x_i x_j$。

设计一个返回输入数据的傅立叶系数前半部分的层。