This is the fifth day of my participation in the November Gwen Challenge. Check out the details: The last Gwen Challenge 2021
import torch
from IPython import display
from d2l import torch as d2l
Copy the code
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
Copy the code
Set the mini-Batch size to 256 and read iterators for the dataset.
num_inputs = 784
num_outputs = 10
Initialize to a tensor with mean 0 and variance 0.1
W = torch.normal(0.0.01, size=(num_inputs, num_outputs), requires_grad=True)
b = torch.zeros(num_outputs, requires_grad=True)
Copy the code
num_inputs
Image 28*28 in the dataset, channel number 1, which is converted into input vector 784num_outputs
The output is ten categories, and the output vector is 10
def softmax(X) :
X_exp = torch.exp(X)
partition = X_exp.sum(1, keepdim=True) Sum per row, keeping the dimensions the same
return X_exp / partition The broadcast mechanism is used here
Copy the code
Define the softmax
def net(X) :
return softmax(torch.matmul(X.reshape((-1, W.shape[0])), W) + b)
Copy the code
Define the model.
def cross_entropy(y_hat, y) :
return - torch.log(y_hat[range(len(y_hat)), y])
Copy the code
Define cross entropy loss
def accuracy(y_hat, y) :
# y_hat dimension >1 and has more than one line
if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
y_hat = y_hat.argmax(axis=1)
cmp = y_hat.type(y.dtype) == y
return float(cmp.type(y.dtype).sum())
Copy the code
This function is used to calculate and return the predicted correct amount.
argmax()
Fetch the index of the largest element, at which point y_hat has been converted into a vector of the largest element index- CMP is a bool. Compare y_hat with y to see how accurate the prediction is
- Finally, the data type of CMP is converted to the data type of Y, that is, bool true false is converted to 1, 0, and the sum calculates the predicted correct total and returns.
accuracy(y_hat, y) / len(y)
Divide by the total number of y’s to get the accuracy
class Accumulator:
""" sum over n variables." ""
def __init__(self, n) :
self.data = [0.0] * n Initializer list, length n
def add(self, *args) :
self.data = [a + float(b) for a, b in zip(self.data, args)]
def reset(self) :
self.data = [0.0] * len(self.data)
def __getitem__(self, idx) :
return self.data[idx]
Copy the code
The Accumulator class implements an Accumulator. Stack incoming data each time.
*args
Is used when the number of parameters passed in is unknown and the name of the parameter is not needed- I’m not sure what the initialization length is, so
add
The length of the argument is also uncertain - But to perform
add
To ensure that the number of arguments passed in andn
The same
- I’m not sure what the initialization length is, so
def evaluate_accuracy(net, data_iter) :
if isinstance(net, torch.nn.Module):# Determine the type
net.eval()
metric = Accumulator(2) The number of correct predictions and the total number of predictions are altogether two
for X, y in data_iter:
metric.add(accuracy(net(X), y), y.numel())
return metric[0] / metric[1]
Copy the code
Computes the precision of the model on the specified data set.
net.eval()
Set the model to evaluation modeaccuracy(net(X), y)
Calculate the correct sample numbery.numel()
The total number of samples
Evaluate and stack each batch to find the sum of the whole.
def updater(batch_size) :
return d2l.sgd([W, b], lr, batch_size)
Copy the code
This SGD is a function implemented manually in 3.2 to update weights and parameters
def train_epoch_ch3(net, train_iter, loss, updater) : #@save
"" Train the model for one iteration cycle (defined in Chapter 3). ""
Set the model to training mode
if isinstance(net, torch.nn.Module):
net.train()
# Total training loss, total training accuracy, number of samples
metric = Accumulator(3)
for X, y in train_iter:
Calculate gradients and update parameters
y_hat = net(X)
l = loss(y_hat, y)
if isinstance(updater, torch.optim.Optimizer):
# Use PyTorch's built-in optimizer and loss function
updater.zero_grad()
l.backward()
updater.step()
metric.add(float(l) * len(y), accuracy(y_hat, y),
y.size().numel())
else:
# Use custom optimizers and loss functions
l.sum().backward()
updater(X.shape[0])
metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
# return training loss and training accuracy
return metric[0] / metric[2], metric[1] / metric[2]
# ch3 is the training function of Chapter 3
def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater) : #@save
""" Training model." ""
for epoch in range(num_epochs):
train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
test_acc = evaluate_accuracy(net, test_iter)
train_loss, train_acc = train_metrics
assert train_loss < 0.5, train_loss
assert train_acc <= 1 and train_acc > 0.7, train_acc
assert test_acc <= 1 and test_acc > 0.7, test_acc
Copy the code
- Assert: Determines an expression and raises an exception when the expression condition is false
lr = 0.1
num_epochs = 10
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, updater)
Copy the code
There were visualizations in there, but I deleted them.