Mission Description:

This practice is a multi-classification task, which needs to identify the gems in the photos and complete themIdentification of gems

Practice platform: Baidu AI training platform -AI Studio, PaddlePaddle2.0.0 dynamic map

Requirement: Use CNN method to realize gem identification!

Convolutional neural network

Convolutional neural network is a classical network for extracting image features, and its structure generally consists of multiple alternating combinations of convolutional layers and pooling layers.

Introduction to data set

  • The data set files are named archive_train.zip,archive_test.zip.

  • The dataset contains images of 25 different categories of gemstones.

  • These categories have been divided into training and test data.

  • Image sizes vary and the format is.jpeg.

Changes in this directory will be automatically restored after the environment restarts
# View dataset directory. This directory will be recovered automatically after resetting environment. ! ls /home/aistudio/dataCopy the code
data55032  dataset
Copy the code
Import the required package
import os
import zipfile
import random
import json
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import paddle
from paddle.io import Dataset
import paddle.nn as nn 

Copy the code

1. Data preparation

Parameter Configuration
train_parameters = {
    "input_size": [3.224.224].Enter shape for the image
    "class_dim": 25.# class number
    "src_path":"data/data55032/archive_train.zip".The path to the original dataset
    "target_path":"/home/aistudio/data/dataset".The path to unzip
    "train_list_path": "./train.txt".# train_data. TXT path
    "eval_list_path": "./eval.txt".# eval_data. TXT path
    "label_dict": {},# tag dictionary
    "readme_path": "/home/aistudio/data/readme.json".# readme. Json path
    "num_epochs":10.# Number of training rounds
    "train_batch_size": 32.# Batch size
    "learning_strategy": {                           Optimize the function-specific configuration
        "lr": 0.0005                                  # Hyperparametric learning rate}}Copy the code
def unzip_data(src_path,target_path) :
    Unzip the original dataset and unzip the srC_path zip package to the data/dataset directory. 
    if(not os.path.isdir(target_path)):    
        z = zipfile.ZipFile(src_path, 'r')
        z.extractall(path=target_path)
        z.close()
    else:
        print("File unzipped")
Copy the code
def get_data_list(target_path,train_list_path,eval_list_path) :
    Generate data list
    Get the folder names saved for all categories
    data_list_path=target_path
    class_dirs = os.listdir(data_list_path) 
    if '__MACOSX' in class_dirs:
        class_dirs.remove('__MACOSX')
    Store the contents to be written to eval. TXT and train. TXT
    trainer_list=[]
    eval_list=[]
    class_label=0
    i = 0
    
    for class_dir in class_dirs:   
        path = os.path.join(data_list_path,class_dir)
        # Get all images
        img_paths = os.listdir(path)
        for img_path in img_paths:                                        # Walk through each image in the folder
            i += 1
            name_path = os.path.join(path,img_path)                       # Path for each image
            if i % 10= =0:                                                
                eval_list.append(name_path + "\t%d" % class_label + "\n")
            else: 
                trainer_list.append(name_path + "\t%d" % class_label + "\n") 
        
        train_parameters['label_dict'] [str(class_label)]=class_dir
        class_label += 1
            
    # out-of-order
    random.shuffle(eval_list)
    with open(eval_list_path, 'a') as f:
        for eval_image in eval_list:
            f.write(eval_image) 
    # out-of-order
    random.shuffle(trainer_list) 
    with open(train_list_path, 'a') as f2:
        for train_image in trainer_list:
            f2.write(train_image) 
 
    print ('Generating data list complete! ')
Copy the code
Parameter initialization
src_path=train_parameters['src_path']
target_path=train_parameters['target_path']
train_list_path=train_parameters['train_list_path']
eval_list_path=train_parameters['eval_list_path']
batch_size=train_parameters['train_batch_size']

Unzip the raw data to the specified path
unzip_data(src_path,target_path)

Train. TXT and eval. TXT should be emptied before generating a list of data
with open(train_list_path, 'w') as f: 
    f.seek(0)
    f.truncate() 
with open(eval_list_path, 'w') as f: 
    f.seek(0)
    f.truncate() 
    
Generate a list of data
get_data_list(target_path,train_list_path,eval_list_path)
Copy the code
Files have been decompressed to generate data list complete!Copy the code
class Reader(Dataset) :
    def __init__(self, data_path, mode='train') :
        "" data_path: data set path: param mode: train or eval """
        super().__init__()
        self.data_path = data_path
        self.img_paths = []
        self.labels = []

        if mode == 'train':
            with open(os.path.join(self.data_path, "train.txt"), "r", encoding="utf-8") as f:
                self.info = f.readlines()
            for img_info in self.info:
                img_path, label = img_info.strip().split('\t')
                self.img_paths.append(img_path)
                self.labels.append(int(label))

        else:
            with open(os.path.join(self.data_path, "eval.txt"), "r", encoding="utf-8") as f:
                self.info = f.readlines()
            for img_info in self.info:
                img_path, label = img_info.strip().split('\t')
                self.img_paths.append(img_path)
                self.labels.append(int(label))


    def __getitem__(self, index) :
        "" get a set of data :param index: file index number :return: ""
        The first step is to open the image file and get the label value
        img_path = self.img_paths[index]
        img = Image.open(img_path)
        ifimg.mode ! ='RGB':
            img = img.convert('RGB') 
        img = img.resize((224.224), Image.BILINEAR)
        img = np.array(img).astype('float32')
        img = img.transpose((2.0.1)) / 255
        label = self.labels[index]
        label = np.array([label], dtype="int64")
        return img, label

    def print_sample(self, index: int = 0) :
        print("File name", self.img_paths[index], "\t tag value", self.labels[index])

    def __len__(self) :
        return len(self.img_paths)
Copy the code
# Train data loading
train_dataset = Reader('/home/aistudio/',mode='train')
train_loader = paddle.io.DataLoader(train_dataset, batch_size=16, shuffle=True)
# Test data loading
eval_dataset = Reader('/home/aistudio/',mode='eval')
eval_loader = paddle.io.DataLoader(eval_dataset, batch_size = 8, shuffle=False)
Copy the code
train_dataset.print_sample(200)
print(train_dataset.__len__())
eval_dataset.print_sample(0)
print(eval_dataset.__len__())
print(eval_dataset.__getitem__(10) [0].shape)
print(eval_dataset.__getitem__(10) [1].shape)
Copy the code
Name of the file/home/aistudio/data/the dataset/Fluorite/fluorite_35. JPG label value 14, 730 file name / home/aistudio/data/the dataset/Danburite/danburite_15. JPG label value 18, 81 (3, 224, 224) (1)Copy the code
Batch=0
Batchs=[]
all_train_accs=[]
def draw_train_acc(Batchs, train_accs) :
    title="training accs"
    plt.title(title, fontsize=24)
    plt.xlabel("batch", fontsize=14)
    plt.ylabel("acc", fontsize=14)
    plt.plot(Batchs, train_accs, color='green', label='training accs')
    plt.legend()
    plt.grid()
    plt.show()

all_train_loss=[]
def draw_train_loss(Batchs, train_loss) :
    title="training loss"
    plt.title(title, fontsize=24)
    plt.xlabel("batch", fontsize=14)
    plt.ylabel("loss", fontsize=14)
    plt.plot(Batchs, train_loss, color='red', label='training loss')
    plt.legend()
    plt.grid()
    plt.show()
Copy the code

2. Define the model

Define convolutional neural network to realize gem recognition
class MyCNN(nn.Layer) : 
    def __init__(self) :
        super(MyCNN,self).__init__()
        self.conv0=nn.Conv2D(in_channels=3, out_channels=64, kernel_size=3, stride=1)
        self.pool0=nn.MaxPool2D(kernel_size=2, stride=2)
        self.conv1=nn.Conv2D(in_channels=64, out_channels=128, kernel_size=4, stride=1)
        self.pool1=nn.MaxPool2D(kernel_size=2, stride=2)
        self.conv2=nn.Conv2D(in_channels=128, out_channels=50, kernel_size=5)
        self.pool2=nn.MaxPool2D(kernel_size=2, stride=2)
        self.conv3=nn.Conv2D(in_channels=50, out_channels=50, kernel_size=5)
        self.pool3=nn.MaxPool2D(kernel_size=2, stride=2)
        self.conv4=nn.Conv2D(in_channels=50, out_channels=50, kernel_size=5)
        self.pool4=nn.MaxPool2D(kernel_size=2, stride=2)
        self.fc1=nn.Linear(in_features=50*3*3, out_features=25)

    def forward(self,input) : 
        print("input.shape:".input.shape)
        x=self.conv0(input)
        print("x.shape:",x.shape)
        x=self.pool0(x)        
        print('x0.shape:',x.shape)
        x=self.conv1(x)
        print(x.shape)
        x=self.pool1(x)
        print('x1.shape:',x.shape)
        x=self.conv2(x)
        print(x.shape)
        x=self.pool2(x)
        print('x2.shape:',x.shape)
        x=self.conv3(x)
        print(x.shape)
        x=self.pool3(x)
        print('x3.shape:',x.shape)
        x=self.conv4(x)
        print(x.shape)
        x=self.pool4(x)
        print('x4.shape:',x.shape)
        x=paddle.reshape(x, shape=[-1.50*3*3])
        print('x3.shape:',x.shape)
        y=self.fc1(x)
        print('y.shape:', y.shape)
        # input.shape: [16, 3, 224, 224]
        # x.shape: [16, 64, 222, 222]
        # x0.shape: [16, 64, 111, 111]
        # [16, 128, 108, 108]
        # x1.shape: [16, 128, 54, 54]
        # [16, 50, 50, 50]
        # x2.shape: [16, 50, 25, 25]
        # [16, 50, 21, 21]
        # x3.shape: [16, 50, 10, 10]
        # [16, 50, 6, 6]
        # x4.shape: [16, 50, 3, 3]
        return y       
        
Copy the code
Define convolutional neural network to realize gem recognition
class MyCNN2(nn.Layer) : 
    def __init__(self) :
        super(MyCNN2,self).__init__()
        self.conv0 = nn.Conv2D(in_channels= 3,out_channels=64, kernel_size=3,stride=1)
        self.pool0 = nn.MaxPool2D(kernel_size=2,stride=2)
        self.conv1 = nn.Conv2D(in_channels = 64,out_channels=128,kernel_size=4,stride = 1)
        self.pool1 = nn.MaxPool2D(kernel_size=2,stride=2)
        self.conv2 = nn.Conv2D(in_channels= 128,out_channels=50,kernel_size=5)
        self.pool2 = nn.MaxPool2D(kernel_size=2,stride=2)
        self.fc1 = nn.Linear(in_features=50*25*25,out_features=25)


    def forward(self,input) : 
        x = self.conv0(input)
        x = self.pool0(x)
        print("x:", x.shape)
        x = self.conv1(x)
        x = self.pool1(x)
        print("x:", x.shape)
        x = self.conv2(x)
        x = self.pool2(x)
        print("x:", x.shape)
        x = paddle.reshape(x,shape=[-1.50*25*25])
        y = self.fc1(x)
        
        return y
Copy the code

3. Training Model — CNN

model=MyCNN() # model instantiation
model.train() # Training mode
cross_entropy = paddle.nn.CrossEntropyLoss()
opt=paddle.optimizer.SGD(learning_rate=train_parameters['learning_strategy'] ['lr'],\
                                                    parameters=model.parameters())

epochs_num=train_parameters['num_epochs'] # number of iterations
for pass_num in range(train_parameters['num_epochs') :for batch_id,data in enumerate(train_loader()):
        image = data[0]
        label = data[1]
        predict=model(image) # Data is passed into the model
        loss=cross_entropy(predict,label)
        acc=paddle.metric.accuracy(predict,label)# Calculation accuracy
        ifbatch_id! =0 and batch_id%5= =0:
            Batch = Batch+5 
            Batchs.append(Batch)
            all_train_loss.append(loss.numpy()[0])
            all_train_accs.append(acc.numpy()[0]) 
            print("epoch:{},step:{},train_loss:{},train_acc:{}".format(pass_num,batch_id,loss.numpy(),acc.numpy()))        
        loss.backward()       
        opt.step()
        opt.clear_grad()   #opt.clear_grad() to reset the gradient
paddle.save(model.state_dict(),'MyCNN')# Save the model
draw_train_acc(Batchs,all_train_accs)
draw_train_loss(Batchs,all_train_loss)
Copy the code
y.shape: [16, 25] --------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent Py in <module> 7 epochs_num=train_parameters['num_epochs'] # number of iterations 8 for pass_num in range(train_parameters['num_epochs']): ----> 9 for batch_id,data in enumerate(train_loader()): 10 image = data[0] 11 label = data[1] / opt/conda envs/python35 - paddle120 - env/lib/python3.7 / site - packages/paddle/fluid/dataloader/dataloader_iter py in __next__(self) 349 try: 350 if in_dygraph_mode(): --> 351 return self._reader.read_next_var_list() 352 else: 353 if self._return_list: KeyboardInterrupt:Copy the code

4. Model Evaluation — CNN

# Model evaluation
para_state_dict = paddle.load("MyCNN") 
model = MyCNN()
model.set_state_dict(para_state_dict) Load model parameters
model.eval(a)Validation mode

accs = []

for batch_id,data in enumerate(eval_loader()):# test set
    image=data[0]
    label=data[1]     
    predict=model(image)       
    acc=paddle.metric.accuracy(predict,label)
    accs.append(acc.numpy()[0])
avg_acc = np.mean(accs)
print("The accuracy of the current model on the verification set is :",avg_acc)
Copy the code

5. Model Prediction -CNN

def unzip_infer_data(src_path,target_path) :
    Unzip the forecast data set.
    if(not os.path.isdir(target_path)):     
        z = zipfile.ZipFile(src_path, 'r')
        z.extractall(path=target_path)
        z.close()


def load_image(img_path) :
    Prediction image preprocessing
    img = Image.open(img_path) 
    ifimg.mode ! ='RGB': 
        img = img.convert('RGB') 
    img = img.resize((224.224), Image.BILINEAR)
    img = np.array(img).astype('float32') 
    img = img.transpose((2.0.1))  # HWC to CHW 
    img = img/255                # Normalize pixel values
    return img


infer_src_path = '/home/aistudio/data/data55032/archive_test.zip'
infer_dst_path = '/home/aistudio/data/archive_test'
unzip_infer_data(infer_src_path,infer_dst_path)

para_state_dict = paddle.load("MyCNN")
model = MyCNN()
model.set_state_dict(para_state_dict) Load model parameters
model.eval(a)Validation mode

# Show prediction pictures
infer_path='data/archive_test/alexandrite_3.jpg'
img = Image.open(infer_path)
plt.imshow(img)          Draw an image from an array
plt.show()               # display image
# Preprocess the prediction images
infer_imgs = []
infer_imgs.append(load_image(infer_path))
infer_imgs = np.array(infer_imgs)
label_dic = train_parameters['label_dict']
for i in range(len(infer_imgs)):
    data = infer_imgs[i]
    dy_x_data = np.array(data).astype('float32')
    dy_x_data=dy_x_data[np.newaxis,:, : ,:]
    img = paddle.to_tensor (dy_x_data)
    out = model(img)
    lab = np.argmax(out.numpy())  #argmax(): Returns the index of the largest number
    print("The {} sample is predicted to be {}, and the real label is {}".format(i+1,label_dic[str(lab)],infer_path.split('/')[-1].split("_") [0]))       
print("The end")

Copy the code