# 层和块

## “例如，我们上面模型中的第一个全连接的层接收任意维的输入，但是返回一个维度256的输出。”

# 这里，`block`是`Module`子类的一个实例。我们把它保存在'Module'类的成员变量
# `_children` 中。`block`的类型是OrderedDict。
self._modules[block] = block

1. 注释中_children是不是写错了，应该是_modules
2. _modules为什么定义成OrderedDict类型，一个list应该就够了吧
class MySequential(nn.Module):
def init(self, *args):
super().init()
self.sequential = []
for bk in args:
self.sequential.append(bk)

def forward(self, X):
for i in range(len(self.sequential)):
X = self.sequential[i](X)
return X

class MySequential2(nn.Module):
def init(self, *args):
super().init()
# self.sequential = []
for bk in args:
self._modules[bk] = bk

def forward(self, X):
for bk in self._modules.values():
X = bk(X)
return X

if name == ‘main’:
net = MLP()
X = torch.randn(2, 20)
# print(net(X))
net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net2 = MySequential2(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
print(net(X))
print(net2(X))

X = F.relu(torch.mm(X, self.rand_weight) + 1)

It’s resonable to see the results are not matched.
net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net2 = MySequential2(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
As you can see, you instantiated four different nn.Linear layers. So the weight are not initialized as the same. To prove that, it’s easy to print and compare the weights.

The same result can be achieved by changing the way of storing blocks in MySequential to a Python list. So what is the difference between these two methods?

The main difference is that using _modules enables the other pytorch functions/methods to find the added layers automatically. To put it in another word, these layers will be registered. For example, if you want to print parameters of the network, you can simply call state_dicts(). But if the list is adopted, methods like state_dicts don’t work.
Code:
class MySequential(nn.Module):

def __init__(self, *args):
super().__init__()
for i,block in enumerate(args):
self._modules[str(i)] = block

def forward(self, X):
for block in self._modules.values():
X = block(X)
return X

net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net.state_dicts()

class myMLP(nn.Module):
def init(self,*args):
super().init()
for block in args:
self._modules[block] = block
def forward(self,X):
outputs = []
for block in self._modules.values():
outputs.append(block(X))
return outputs
net = myMLP(nn.Linear(20,256),MySequential())
net(X)

list不是按顺序存放的，可能会出现问题吧

class Bigkuai(nn.Module):
def __init__(self):
super().__init__()
self.net1=nn.Sequential(nn.Linear(20,66),nn.ReLU())
self.net2=nn.Sequential(nn.Linear(20,33),nn.ReLU())

def forward(self,X):
x1= self.net1(X)
x2= self.net2(X)
x3= torch.cat((x1,x2),1)
Twao=nn.Sequential(nn.Linear(x3.shape[1],30),nn.ReLU())
return Twao(x3)

X = torch.rand(2,20)
k=Bigkuai()
print(k(X))
k(X).shape
5.1.2代码注释是不是写错了呢？_modules的类型才是OrderedDict

class MySequential(nn.Module):
def __init__(self, *args):
super().__init__()
for idx, module in enumerate(args):
# 这里，module是Module子类的一个实例。我们把它保存在'Module'类的成员
# 变量_modules中。module的类型是OrderedDict
self._modules[str(idx)] = module
self._modules: Dict[str, Optional[‘Module’]] = OrderedDict()

For example, the first fully-connected layer in our model above ingests an input of dimension 20 but returns an output of dimension 256.
(https://d2l.ai/chapter_deep-learning-computation/model-construction.html)

class MyParallel(nn.Module):
def __init__(self, *args):
super().__init__()
for idx, module in enumerate(args):
self._modules[str(idx)] = module

def forward(self, X):

X = torch.rand(2, 20)
n1 = nn.Linear(20, 256)
n2 = nn.Linear(20, 128)
p = MyParallel(n1,n2)
print(p(X).shape, p(X))
print(p)

class Factory(nn.Module):
def __init__(self, net, ins, outs, k):
super().__init__()
for idx in range(k):
self._modules[str(idx)] = net(ins, outs)

def forward(self, X):
res = list()
for idx in range(len(self._modules)):
res.append(self._modules[str(idx)](X))