我自己的VGG实现在训练的时候出现很大问题:
- 无法收敛,Loss始终保持在2.3左右
- 显存占用极高,超出了RTX4070 12G显存的容量。理论上其显存占用应当不超过7G
- 速度极慢,猜测是将数据移动到CUDA导致的
请问大家能帮我看看哪里出了错吗?
from torch import nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch
# do not use lazy api
class VGG(nn.Module):
def __init__(self, in_channels, num_classes=10):
super(VGG, self).__init__()
self.net = nn.Sequential(
nn.Conv2d(in_channels, 64, 3, padding=1), nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1), nn.ReLU(),
nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1), nn.ReLU(),
nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(),
nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Flatten(),
nn.Linear(512*7*7, 4096), nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(4096, 4096), nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(4096, num_classes)
)
def forward(self, x):
x = self.net(x)
return x
# use xavier initialization
def init_weights(m):
if type(m) == nn.Linear or type(m) == nn.Conv2d:
nn.init.xavier_uniform_(m.weight)
def train(model:nn.Module, criterion, optimizer, train_loader, epochs=1):
model.train()
for epoch in range(epochs):
for i, (X, y) in enumerate(train_loader):
optimizer.zero_grad()
if use_gpu:
X, y = X.cuda(), y.cuda()
y_hat = model(X)
loss = criterion(y_hat, y)
loss.backward()
optimizer.step()
if i % 100 == 0:
print(f"epoch {epoch}, batch {i}, loss {loss.item()}")
def test(model, test_loader):
model.eval()
correct = 0
total = 0
with torch.no_grad():
for X, y in test_loader:
if use_gpu:
X, y = X.cuda(), y.cuda()
y_hat = model(X)
_, predicted = torch.max(y_hat, 1)
total += y.size(0)
correct += (predicted == y).sum().item()
print(f"accuracy: {correct/total}")
use_gpu = True
train_data = datasets.FashionMNIST(root="Beginners/data", train=True, transform=transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor()]))
test_data = datasets.FashionMNIST(root="Beginners/data", train=False, transform=transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor()]))
train_loader = DataLoader(train_data, batch_size=128, shuffle=True)
test_loader = DataLoader(test_data, batch_size=128, shuffle=False)
model = VGG(in_channels=1)
model.apply(init_weights)
if use_gpu:
model = model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
train(model, criterion, optimizer, train_loader, epochs=10)
test(model, test_loader)