我试了别的大小也可以,不知道这个是怎么确定的
GPU运行内存不够了。。。。。。。。。。。。。。。。。
请问如何找到d2l中的dataloade中的参数num_workers?谢谢。
减小batchsize的大小,然后结束掉占用显卡的进程可以解决这个问题的。我减少到16就可以了,最低可以减少到2,你可以试一下
打开你anaconda的d2l包安装位置(我的是E:\anaconda\envs\d2l\Lib\site-packages\d2l),找到torch.py,搜索load_data_fashion_mnist,找到这个函数中的num_workers即可
你看一下模型图啊,不能按文章这么理解啊,真照这句话,难道还给1*1convolution后面加relu吗,看模型图一目了然
Empty Traceback (most recent call last)
File ~\miniconda3\lib\site-packages\torch\utils\data\dataloader.py:1011, in _MultiProcessingDataLoaderIter._try_get_data(self, timeout)
1010 try:
-> 1011 data = self._data_queue.get(timeout=timeout)
1012 return (True, data)
File ~\miniconda3\lib\multiprocessing\queues.py:114, in Queue.get(self, block, timeout)
113 if not self._poll(timeout):
–> 114 raise Empty
115 elif not self._poll():
Empty:
The above exception was the direct cause of the following exception:
RuntimeError Traceback (most recent call last)
Input In [9], in <cell line: 3>()
1 lr, num_epochs, batch_size = 0.05, 10, 256
2 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
----> 3 d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())
File ~\miniconda3\lib\site-packages\d2l\torch.py:512, in train_ch6(net, train_iter, test_iter, num_epochs, lr, device)
510 metric = d2l.Accumulator(3)
511 net.train()
–> 512 for i, (X, y) in enumerate(train_iter):
513 timer.start()
514 optimizer.zero_grad()
File ~\miniconda3\lib\site-packages\torch\utils\data\dataloader.py:530, in _BaseDataLoaderIter.next(self)
528 if self._sampler_iter is None:
529 self._reset()
–> 530 data = self._next_data()
531 self._num_yielded += 1
532 if self._dataset_kind == _DatasetKind.Iterable and
533 self._IterableDataset_len_called is not None and
534 self._num_yielded > self._IterableDataset_len_called:
File ~\miniconda3\lib\site-packages\torch\utils\data\dataloader.py:1207, in _MultiProcessingDataLoaderIter._next_data(self)
1204 return self._process_data(data)
1206 assert not self._shutdown and self._tasks_outstanding > 0
-> 1207 idx, data = self._get_data()
1208 self._tasks_outstanding -= 1
1209 if self._dataset_kind == _DatasetKind.Iterable:
1210 # Check for _IterableDatasetStopIteration
File ~\miniconda3\lib\site-packages\torch\utils\data\dataloader.py:1173, in _MultiProcessingDataLoaderIter._get_data(self)
1169 # In this case, self._data_queue
is a queue.Queue
,. But we don’t
1170 # need to call .task_done()
because we don’t use .join()
.
1171 else:
1172 while True:
-> 1173 success, data = self._try_get_data()
1174 if success:
1175 return data
File ~\miniconda3\lib\site-packages\torch\utils\data\dataloader.py:1024, in _MultiProcessingDataLoaderIter._try_get_data(self, timeout)
1022 if len(failed_workers) > 0:
1023 pids_str = ', '.join(str(w.pid) for w in failed_workers)
-> 1024 raise RuntimeError(‘DataLoader worker (pid(s) {}) exited unexpectedly’.format(pids_str)) from e
1025 if isinstance(e, queue.Empty):
1026 return (False, None)
RuntimeError: DataLoader worker (pid(s) 4056, 13616, 11320, 9348) exited unexpectedly
这是为啥啊?
只有一个training on cuda:0是怎么回事?
老师,您好,我进入jupyter点击run all之后,提示file failed to load,然后就没反应,这个是什么情况啊,该怎么解决呢?
ERROR: Failed building wheel for pandas
Failed to build pandas
ERROR: Could not build wheels for pandas which use PEP 517 and cannot be installed directly
朋友们,pip安装d2l时报这个错怎么解决呀?
以下为ResNet50的简单实现,请多多指教。
import torch
from torch import nn
from torch.nn import functional as F
# 1x1 conv -> 3x3 conv -> 1x1 conv
class Bottleneck(nn.Module):
def __init__(self, in_channels, channels, stride=1, use_1x1conv=False):
super(Bottleneck,self).__init__()
self.conv1 = nn.Conv2d(in_channels, channels, kernel_size=1, stride=1, bias=False)
self.bn1 = nn.BatchNorm2d(channels)
self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(channels)
self.conv3 = nn.Conv2d(channels, channels*4, kernel_size=1, stride=1, bias=False)
self.bn3 = nn.BatchNorm2d(channels*4)
if use_1x1conv:
self.conv4 = nn.Conv2d(
in_channels, channels*4, kernel_size=1, stride=stride
)
else:
self.conv4 = None
def forward(self, x):
# 1x1 conv 通道数:in_channels -> channels
out = F.relu(self.bn1(self.conv1(x)))
# 3x3 conv 通道数:channels -> channels
out = F.relu(self.bn2(self.conv2(out)))
# 1x1 conv 通道数: channels -> 4*channels
out = self.bn3(self.conv3(out))
# 恒等映射 or 1x1 conv
if self.conv4 == None:
identity = x
else:
identity = self.conv4(x)
out += identity
return F.relu(out)
def bottleneck_block(in_channels, channels, num_bottlenecks, not_FirstBlock = True):
# 第一个neck使用1x1conv,剩余的neck不使用1x1conv
# 第一个block的stride=1,后面的block的stride=2
blk = []
for i in range(num_bottlenecks):
if i == 0:
blk.append(
Bottleneck(in_channels, channels, stride=not_FirstBlock+1, use_1x1conv=True)
)
else:
blk.append(
Bottleneck(channels*4, channels)
)
return blk
b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
nn.BatchNorm2d(64), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
b2 = nn.Sequential(*bottleneck_block(64, 64, 3, not_FirstBlock=False))
b3 = nn.Sequential(*bottleneck_block(64*4, 128, 3))
b4 = nn.Sequential(*bottleneck_block(128*4, 256, 3))
b5 = nn.Sequential(*bottleneck_block(256*4, 512, 3))
resnet50 = nn.Sequential(
b1, b2, b3, b4, b5,
nn.AdaptiveAvgPool2d((1,1)),
nn.Flatten(),
nn.Linear(2048, 10)
)
残差单元的改进版本
改进后的代码实现
class Residual(nn.Module):
def __init__(self, input_channels, num_channels,
use_1x1conv=False, strides=1):
super().__init__()
self.conv1 = nn.Conv2d(input_channels, num_channels,
kernel_size=3, padding=1, stride=strides)
self.conv2 = nn.Conv2d(num_channels, num_channels,
kernel_size=3, padding=1)
if use_1x1conv:
self.conv3 = nn.Conv2d(input_channels, num_channels,
kernel_size=1, stride=strides)
else:
self.conv3 = None
self.bn1 = nn.BatchNorm2d(input_channels)
self.bn2 = nn.BatchNorm2d(num_channels)
def forward(self, X):
Y = self.conv1(F.relu(self.bn1(X)))
Y = self.conv2(F.relu(self.bn2(Y)))
if self.conv3:
X = self.conv3(X)
Y += X
return Y
其实,Residual 的 use_1x1conv 参数,可以去掉。
残差块:f(x) = g(x) + x
use_1x1conv 的目的是,在 g(x) 会改变特征图的通道数 / 高 / 宽的情况下,利用 1x1 卷积,调整 x 的形状(通道数 / 高 / 宽)。
因此,只需要判断 Residual 的参数,当 input_channels != num_channels || strides > 1
时,即特征图形状会发生改变时,调整 x 的形状即可。即 use_1x1conv 的传参可省略