·CNN implements logistic regression to classify the FashionMNIST data set (using GPU)
The content also includes the network model parameters save to load. Data set download address code section
import torch as t
import torchvision as tv
import numpy as np
import time
# super parameter
EPOCH = 5
BATCH_SIZE = 100
DOWNLOAD_MNIST = True # Set to False if data is downloaded
N_TEST_IMG = 10 5 pictures will be displayed to see the effect, as shown in figure 1
class DNN(t.nn.Module) :
def __init__(self) :
super(DNN, self).__init__()
train_data = tv.datasets.FashionMNIST(
root="./fashionmnist/",
train=True,
transform=tv.transforms.ToTensor(),
download=DOWNLOAD_MNIST
)
test_data = tv.datasets.FashionMNIST(
root="./fashionmnist/",
train=False,
transform=tv.transforms.ToTensor(),
download=DOWNLOAD_MNIST
)
print(test_data)
# Data Loader for easy mini-batch return in training, the image batch shape will be (50, 1, 28, 28)
self.train_loader = t.utils.data.DataLoader(
dataset=train_data,
batch_size=BATCH_SIZE,
shuffle=True)
self.test_loader = t.utils.data.DataLoader(
dataset=test_data,
batch_size=1000,
shuffle=True)
self.cnn = t.nn.Sequential(
t.nn.Conv2d(
in_channels=1.# input height
out_channels=32.# n_filters
kernel_size=5.# filter size
stride=1.# filter movement/step
padding=2.# Padding =(kernel_size-1)/2 when the stride=1
), # output shape (16, 28, 28)
t.nn.ELU(), # activation
t.nn.MaxPool2d(kernel_size=2),
t.nn.Conv2d(
in_channels=32.# input height
out_channels=64.# n_filters
kernel_size=3.# filter size
stride=1.# filter movement/step
padding=1.# Padding =(kernel_size-1)/2 when the stride=1
), # output shape (64, 14, 14)
t.nn.ELU(), # activation
t.nn.MaxPool2d(kernel_size=2) # output shape (64, 7, 7)
)
self.dnn = t.nn.Sequential(
t.nn.Linear(7*7*64.256),
t.nn.Dropout(0.5),
t.nn.ELU(),
t.nn.Linear(256.10),
)
self.lr = 0.001
self.loss = t.nn.CrossEntropyLoss()
self.opt = t.optim.Adam(self.parameters(), lr = self.lr)
def forward(self,x) :
cnn1 = self.cnn(x)
#print(cnn1.shape)
cnn1 = cnn1.view(-1.7*7*64)
#print(cnn1.shape)
out = self.dnn(cnn1)
#print(out.shape)
return(out)
def train() :
use_gpu = t.cuda.is_available()
model = DNN()
if(use_gpu):
model.cuda()
print(model)
loss = model.loss
opt = model.opt
dataloader = model.train_loader
testloader = model.test_loader
for e in range(EPOCH):
step = 0
ts = time.time()
for (x, y) in (dataloader):
model.train()# train model dropout used
step += 1
b_x = x.view(-1.1.28.28) # batch x, shape (batch, 28*28)
#print(b_x.shape)
b_y = y
if(use_gpu):
b_x = b_x.cuda()
b_y = b_y.cuda()
out = model(b_x)
losses = loss(out,b_y)
opt.zero_grad()
losses.backward()
opt.step()
if(step%100= =0) :if(use_gpu):
print(e,step,losses.data.cpu().numpy())
else:
print(e,step,losses.data.numpy())
model.eval(a)# train model dropout not use
for (tx,ty) in testloader:
t_x = tx.view(-1.1.28.28) # batch x, shape (batch, 28*28)
t_y = ty
if(use_gpu):
t_x = t_x.cuda()
t_y = t_y.cuda()
t_out = model(t_x)
if(use_gpu):
acc = (np.argmax(t_out.data.cpu().numpy(),axis=1) == t_y.data.cpu().numpy())
else:
acc = (np.argmax(t_out.data.numpy(),axis=1) == t_y.data.numpy())
print(time.time() - ts ,np.sum(acc)/1000)
ts = time.time()
break# Test only the first 1000
t.save(model, './model.pkl') # Save the entire network
t.save(model.state_dict(), './model_params.pkl') Save only network parameters (fast, less memory)
The way to load parameters
"""net = DNN() net.load_state_dict(t.load('./model_params.pkl')) net.eval()"""
# How to load the whole model
net = t.load('./model.pkl')
net.cpu()
net.eval(a)for (tx,ty) in testloader:
t_x = tx.view(-1.1.28.28) # batch x, shape (batch, 28*28)
t_y = ty
t_out = net(t_x)
#acc = (np.argmax(t_out.data.CPU().numpy(),axis=1) == t_y.data.CPU().numpy())
acc = (np.argmax(t_out.data.numpy(),axis=1) == t_y.data.numpy())
print(np.sum(acc)/1000)
if __name__ == "__main__":
train()
Copy the code
The output
((CNN) : Sequential (within DNN (0) : Conv2d (1, 32, kernel_size = (5, 5), stride = (1, 1), padding = (2, 2)) (1) : ELU (alpha = 1.0) and (2) : MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (3): Conv2d (32, 64, kernel_size = (3, 3), stride = (1, 1), padding = (1, 1)) (4) : ELU (alpha = 1.0), (5) : MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) ) (dnn): Sequential( (0): Linear(IN_features =3136, OUT_features =256, Bias =True) (1): Dropout(P =0.5) (2): ELU(alpha=1.0) (3): Dropout(P =0.5) Linear(in_features=256, out_features=10, bias=True) ) (loss): CrossEntropyLoss() 0 100 0.5667072 2.720407485961914 0.801 0 200 0.39575616 1.7416255474090576 0.843 0 300 0.37888268 1.7285969257354736 0.862 0 400 0.40818048 1.773937702178955 0.869 0 500 0.47720864 1.7295997142791748 0.862 0 600 0.30158585 1.7265923023223877 0.867 1 100 0.27911857 1.7963228225708008 0.885 1 200 0.2902728 1.7476909160614014 0.89 1 300 0.25626943 1.8007855415344238 0.884 1 400 0.3532468 1.7679908275604248 0.871 1 500 0.27845666 1.7266316413879395 0.909 1 600 0.3446595 1.7566702365875244 0.895 0.884 0.89 0.885 0.892 0.899 0.895 0.892 0.869 0.898 0.871Copy the code
Result Analysis My laptop is configured with CPU I5 8250U GPU MX150 2G memory. After the test, the COMPUTATION speed of CNN using GPU is about 12~15 times (23/1.75) that of CPU. It is recommended that you use GPU to significantly improve efficiency.