·CNN implements logistic regression to classify the FashionMNIST data set (using GPU)

The content also includes the network model parameters save to load. Data set download address code section

import torch as t
import torchvision as tv
import numpy as np
import time


# super parameter
EPOCH = 5
BATCH_SIZE = 100
DOWNLOAD_MNIST = True   # Set to False if data is downloaded
N_TEST_IMG = 10          5 pictures will be displayed to see the effect, as shown in figure 1



class DNN(t.nn.Module) :
    def __init__(self) :
        super(DNN, self).__init__()

        train_data = tv.datasets.FashionMNIST(
        root="./fashionmnist/",
        train=True,
        transform=tv.transforms.ToTensor(),
        download=DOWNLOAD_MNIST
        )

        test_data = tv.datasets.FashionMNIST(
        root="./fashionmnist/",
        train=False,
        transform=tv.transforms.ToTensor(),
        download=DOWNLOAD_MNIST
        )

        print(test_data)


        # Data Loader for easy mini-batch return in training, the image batch shape will be (50, 1, 28, 28)
        self.train_loader = t.utils.data.DataLoader(
            dataset=train_data, 
            batch_size=BATCH_SIZE,
            shuffle=True)

        self.test_loader = t.utils.data.DataLoader(
            dataset=test_data, 
            batch_size=1000,
            shuffle=True)

        self.cnn = t.nn.Sequential(
            t.nn.Conv2d(
                in_channels=1.# input height
                out_channels=32.# n_filters
                kernel_size=5.# filter size
                stride=1.# filter movement/step
                padding=2.# Padding =(kernel_size-1)/2 when the stride=1
            ),                  # output shape (16, 28, 28)
            t.nn.ELU(),    # activation
            t.nn.MaxPool2d(kernel_size=2), 

            t.nn.Conv2d(
                in_channels=32.# input height
                out_channels=64.# n_filters
                kernel_size=3.# filter size
                stride=1.# filter movement/step
                padding=1.# Padding =(kernel_size-1)/2 when the stride=1
            ),                  # output shape (64, 14, 14)
            t.nn.ELU(),    # activation
            t.nn.MaxPool2d(kernel_size=2)  # output shape (64, 7, 7)
        )

        self.dnn = t.nn.Sequential(
            t.nn.Linear(7*7*64.256),
            t.nn.Dropout(0.5),
            t.nn.ELU(),
            t.nn.Linear(256.10),
        )

        self.lr = 0.001
        self.loss = t.nn.CrossEntropyLoss()
        self.opt = t.optim.Adam(self.parameters(), lr = self.lr)

    def forward(self,x) :
        cnn1 = self.cnn(x)
        #print(cnn1.shape)
        cnn1 = cnn1.view(-1.7*7*64)
        #print(cnn1.shape)
        out = self.dnn(cnn1)
        #print(out.shape)
        return(out)

def train() :
    use_gpu =  t.cuda.is_available()
    model = DNN()
    if(use_gpu):
        model.cuda()
    print(model)
    loss = model.loss
    opt = model.opt
    dataloader = model.train_loader
    testloader = model.test_loader

    
    for e in range(EPOCH):
        step = 0
        ts = time.time()
        for (x, y) in (dataloader):
            

            model.train()# train model dropout used
            step += 1
            b_x = x.view(-1.1.28.28)   # batch x, shape (batch, 28*28)
            #print(b_x.shape)
            b_y = y
            if(use_gpu):
                b_x = b_x.cuda()
                b_y = b_y.cuda()
            out = model(b_x)
            losses = loss(out,b_y)
            opt.zero_grad()
            losses.backward()
            opt.step()
            if(step%100= =0) :if(use_gpu):
                    print(e,step,losses.data.cpu().numpy())
                else:
                    print(e,step,losses.data.numpy())
                
                model.eval(a)# train model dropout not use
                for (tx,ty) in testloader:
                    t_x = tx.view(-1.1.28.28)   # batch x, shape (batch, 28*28)
                    t_y = ty
                    if(use_gpu):
                        t_x = t_x.cuda()
                        t_y = t_y.cuda()
                    t_out = model(t_x)
                    if(use_gpu):
                        acc = (np.argmax(t_out.data.cpu().numpy(),axis=1) == t_y.data.cpu().numpy())
                    else:
                        acc = (np.argmax(t_out.data.numpy(),axis=1) == t_y.data.numpy())

                    print(time.time() - ts ,np.sum(acc)/1000)
                    ts = time.time()
                    break# Test only the first 1000
            


    t.save(model, './model.pkl')  # Save the entire network
    t.save(model.state_dict(), './model_params.pkl')   Save only network parameters (fast, less memory)
    The way to load parameters
    """net = DNN() net.load_state_dict(t.load('./model_params.pkl')) net.eval()"""
    # How to load the whole model
    net = t.load('./model.pkl')
    net.cpu()
    net.eval(a)for (tx,ty) in testloader:
        t_x = tx.view(-1.1.28.28)   # batch x, shape (batch, 28*28)
        t_y = ty

        t_out = net(t_x)
        #acc = (np.argmax(t_out.data.CPU().numpy(),axis=1) == t_y.data.CPU().numpy())
        acc = (np.argmax(t_out.data.numpy(),axis=1) == t_y.data.numpy())

        print(np.sum(acc)/1000)

if __name__ == "__main__":
    train()
Copy the code

The output

((CNN) : Sequential (within DNN (0) : Conv2d (1, 32, kernel_size = (5, 5), stride = (1, 1), padding = (2, 2)) (1) : ELU (alpha = 1.0) and (2) : MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (3): Conv2d (32, 64, kernel_size = (3, 3), stride = (1, 1), padding = (1, 1)) (4) : ELU (alpha = 1.0), (5) : MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) ) (dnn): Sequential( (0): Linear(IN_features =3136, OUT_features =256, Bias =True) (1): Dropout(P =0.5) (2): ELU(alpha=1.0) (3): Dropout(P =0.5) Linear(in_features=256, out_features=10, bias=True) ) (loss): CrossEntropyLoss() 0 100 0.5667072 2.720407485961914 0.801 0 200 0.39575616 1.7416255474090576 0.843 0 300 0.37888268 1.7285969257354736 0.862 0 400 0.40818048 1.773937702178955 0.869 0 500 0.47720864 1.7295997142791748 0.862 0 600 0.30158585 1.7265923023223877 0.867 1 100 0.27911857 1.7963228225708008 0.885 1 200 0.2902728 1.7476909160614014 0.89 1 300 0.25626943 1.8007855415344238 0.884 1 400 0.3532468 1.7679908275604248 0.871 1 500 0.27845666 1.7266316413879395 0.909 1 600 0.3446595 1.7566702365875244 0.895 0.884 0.89 0.885 0.892 0.899 0.895 0.892 0.869 0.898 0.871Copy the code

Result Analysis My laptop is configured with CPU I5 8250U GPU MX150 2G memory. After the test, the COMPUTATION speed of CNN using GPU is about 12~15 times (23/1.75) that of CPU. It is recommended that you use GPU to significantly improve efficiency.