Numpy¶
- 使用 numpy 来拟合三阶多项式到正弦函数
- 使用 numpy 手动实现通过网络的前向和后向传递
In [79]:
import numpy as np
import math
# Create random input and output data
x = np.linspace(-math.pi, math.pi, 2000); y = np.sin(x)
# Randomly initialize weights
a, b, c, d = np.random.randn(), np.random.randn(), np.random.randn(), np.random.randn()
learning_rate = 1e-6
for t in range(2000):
# Forward pass: compute predicted y
# y = a + b x + c x^2 + d x^3
y_pred = a + b * x + c * x ** 2 + d * x ** 3
# Compute and print loss
loss = np.square(y_pred - y).sum()
if t % 100 == 99:
print(t, loss)
# Backprop to compute gradients of a, b, c, d with respect to loss
grad_y_pred = 2.0 * (y_pred - y)
grad_a = grad_y_pred.sum()
grad_b = (grad_y_pred * x).sum()
grad_c = (grad_y_pred * x ** 2).sum()
grad_d = (grad_y_pred * x ** 3).sum()
# Update weights
a -= learning_rate * grad_a
b -= learning_rate * grad_b
c -= learning_rate * grad_c
d -= learning_rate * grad_d
print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')
99 886.6224037631865 199 629.2292060666061 299 447.34710555467234 399 318.80931904283386 499 227.9626428891946 599 163.74974434293713 699 118.35889657765176 799 86.2707214623376 899 63.58510974912219 999 47.54590296582326 1099 36.20518694900068 1199 28.18616045469913 1299 22.515614250538913 1399 18.50557342953937 1499 15.669666192577145 1599 13.664024582838099 1699 12.245517336651243 1799 11.242229350041601 1899 10.532595262323262 1999 10.030649060639465 Result: y = -0.03677535675138621 + 0.8542833949812978 x + 0.0063443609841918784 x^2 + -0.09298083844908889 x^3
PyTorch: 张量¶
- Numpy 无法利用 GPU 来加速其数值计算。
- 使用 PyTorch 张量将三阶多项式拟合为正弦函数
- 手动实现通过网络的前向和后向传递
In [80]:
import torch
dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU
# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)
# Randomly initialize weights
a, b, c, d = torch.randn((), device=device, dtype=dtype), torch.randn((), device=device, dtype=dtype), torch.randn((), device=device, dtype=dtype), torch.randn((), device=device, dtype=dtype)
learning_rate = 1e-6
for t in range(2000):
# Forward pass: compute predicted y
y_pred = a + b * x + c * x ** 2 + d * x ** 3
# Compute and print loss
loss = (y_pred - y).pow(2).sum().item()
if t % 100 == 99:
print(t, loss)
# Backprop to compute gradients of a, b, c, d with respect to loss
grad_y_pred = 2.0 * (y_pred - y)
grad_a = grad_y_pred.sum()
grad_b = (grad_y_pred * x).sum()
grad_c = (grad_y_pred * x ** 2).sum()
grad_d = (grad_y_pred * x ** 3).sum()
# Update weights using gradient descent
a -= learning_rate * grad_a
b -= learning_rate * grad_b
c -= learning_rate * grad_c
d -= learning_rate * grad_d
print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')
99 677.9607543945312 199 473.0617370605469 299 331.2071533203125 399 232.90127563476562 499 164.70883178710938 599 117.36085510253906 699 84.45564270019531 799 61.56745910644531 899 45.633277893066406 999 34.53114700317383 1099 26.789535522460938 1199 21.387123107910156 1299 17.614286422729492 1399 14.977657318115234 1499 13.133779525756836 1599 11.84348201751709 1699 10.940007209777832 1799 10.307008743286133 1899 9.863264083862305 1999 9.552023887634277 Result: y = -0.027143917977809906 + 0.8481974601745605 x + 0.00468277744948864 x^2 + -0.09211516380310059 x^3
自动梯度¶
- 在上面的例子中,我们必须手动实现神经网络的前向和后向传递
- 对于大型复杂网络来说,这很快就会变得非常棘手
- 使用自动微分 来自动计算神经网络中的反向传递
- 张量x的属性x.requires_grad=True,则表示反向传播时需要计算梯度,x.grad将存储其梯度值
- 使用 PyTorch Tensors 和 autograd 来实现带有三阶多项式示例的拟合正弦函数;
- 现在我们不再需要手动实现通过网络的反向传递
In [81]:
dtype = torch.float
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.set_default_device(device)
x = torch.linspace(-math.pi, math.pi, 2000, dtype=dtype)
y = torch.sin(x)
a, b, c, d = torch.randn((), dtype=dtype, requires_grad=True), torch.randn((), dtype=dtype, requires_grad=True), torch.randn((), dtype=dtype, requires_grad=True), torch.randn((), dtype=dtype, requires_grad=True)
learning_rate = 1e-6
for t in range(2000):
# Forward pass: compute predicted y using operations on Tensors.
y_pred = a + b * x + c * x ** 2 + d * x ** 3
# loss.item() gets the scalar value held in the loss.
loss = (y_pred - y).pow(2).sum()
if t % 100 == 99:
print(t, loss.item())
# the gradient of the loss with respect to a, b, c, d respectively.
loss.backward()
# Manually update weights using gradient descent. Wrap in torch.no_grad()
# because weights have requires_grad=True, but we don't need to track this
# in autograd.
with torch.no_grad():
a -= learning_rate * a.grad
b -= learning_rate * b.grad
c -= learning_rate * c.grad
d -= learning_rate * d.grad
# Manually zero the gradients after updating weights
a.grad, b.grad, c.grad, d.grad = None, None, None, None
print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')
99 195.7766876220703 199 136.6044464111328 299 96.26067352294922 399 68.72262573242188 499 49.90393829345703 599 37.02909851074219 699 28.210630416870117 799 22.163551330566406 899 18.012170791625977 999 15.159011840820312 1099 13.195876121520996 1199 11.843607902526855 1299 10.91112995147705 1399 10.267412185668945 1499 9.822571754455566 1599 9.514861106872559 1699 9.301766395568848 1799 9.154065132141113 1899 9.051584243774414 1999 8.980417251586914 Result: y = -0.011865215376019478 + 0.8507804870605469 x + 0.002046945970505476 x^2 + -0.09248258173465729 x^3
PyTorch:nn.Module¶
- nn 包定义了一组module,它们大致相当于神经网络层
- 模块接收输入张量并计算输出张量
- nn包还定义了一组有用的损失函数
In [82]:
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)
# For this example, the output y is a linear function of (x, x^2, x^3), so
# we can consider it as a linear layer neural network. Let's prepare the
# tensor (x, x^2, x^3).
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)
# In the above code, x.unsqueeze(-1) has shape (2000, 1), and p has shape
# (3,), for this case, broadcasting semantics will apply to obtain a tensor
# of shape (2000, 3)
model = torch.nn.Sequential(
torch.nn.Linear(3, 1),
torch.nn.Flatten(0, 1)
)
# The nn package also contains definitions of popular loss functions; in this
# case we will use Mean Squared Error (MSE) as our loss function.
loss_fn = torch.nn.MSELoss(reduction='sum')
learning_rate = 1e-6
In [83]:
for t in range(2000):
y_pred = model(xx)
loss = loss_fn(y_pred, y)
if t % 100 == 99:
print(t, loss.item())
# Zero the gradients before running the backward pass.
model.zero_grad()
# all learnable parameters in the model.
loss.backward()
# Update the weights using gradient descent. Each parameter is a Tensor, so
# we can access its gradients like we did before.
with torch.no_grad():
for param in model.parameters():
param -= learning_rate * param.grad
# You can access the first layer of `model` like accessing the first item of a list
linear_layer = model[0]
# For linear layer, its parameters are stored as `weight` and `bias`.
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')
99 420.62548828125 199 285.8297119140625 299 195.3209686279297 399 134.4989013671875 499 93.59147644042969 599 66.0538558959961 699 47.499908447265625 799 34.98699188232422 899 26.540008544921875 999 20.83228302001953 1099 16.971538543701172 1199 14.357422828674316 1299 12.585514068603516 1399 11.383183479309082 1499 10.566455841064453 1599 10.011048316955566 1699 9.632920265197754 1799 9.375188827514648 1899 9.199324607849121 1999 9.079187393188477 Result: y = 0.012617216445505619 + 0.8460951447486877 x + -0.0021766796708106995 x^2 + -0.09181612730026245 x^3
PyTorch优化¶
- 到目前为止,我们已经通过使用 手动改变保存可学习参数的张量来更新模型的权重,但这并不是一个好的方法。
- 对于像随机梯度下降这样的简单优化算法来说并不是一个巨大的负担,但在实践中,我们经常使用更复杂的优化器来训练神经网络,比如AdaGrad、RMSProp、Adam等。
- PyTorch 中的optim包提供了许多内置的优化器,可以帮助我们更有效地更新模型的权重。
In [84]:
# -*- coding: utf-8 -*-
import torch
import math
class Polynomial3(torch.nn.Module):
def __init__(self):
super().__init__()
self.a = torch.nn.Parameter(torch.randn(()))
self.b = torch.nn.Parameter(torch.randn(()))
self.c = torch.nn.Parameter(torch.randn(()))
self.d = torch.nn.Parameter(torch.randn(()))
def forward(self, x):
return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
def string(self):
return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'
In [85]:
# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)
# Construct our model by instantiating the class defined above
model = Polynomial3()
# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters (defined
# with torch.nn.Parameter) which are members of the model.
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)
for t in range(2000):
# Forward pass: Compute predicted y by passing x to the model
y_pred = model(x)
# Compute and print loss
loss = criterion(y_pred, y)
if t % 100 == 99:
print(t, loss.item())
# Zero gradients, perform a backward pass, and update the weights.
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f'Result: {model.string()}')
99 2498.1796875 199 1662.6041259765625 299 1107.829345703125 399 739.3926391601562 499 494.63714599609375 599 331.995361328125 699 223.8843231201172 799 151.99658203125 899 104.17843627929688 999 72.35853576660156 1099 51.17639923095703 1199 37.06956481933594 1299 27.670612335205078 1399 21.405357360839844 1499 17.22696304321289 1599 14.438947677612305 1699 12.577527046203613 1799 11.33414077758789 1899 10.503029823303223 1999 9.947181701660156 Result: y = 0.0161358080804348 + 0.8276166915893555 x + -0.002783695003017783 x^2 + -0.08918772637844086 x^3
Tensorboard¶
为了查看发生了什么,我们在模型训练时打印出一些统计数据,以了解训练是否正在进行。
- 然而,我们可以做得更好:PyTorch 与 TensorBoard 集成,TensorBoard 是一种用于可视化神经网络训练运行结果的工具。
设置 TensorBoard。
写入 TensorBoard。
使用 TensorBoard 检查模型架构。
使用 TensorBoard 创建可视化的交互式版本
- 检查训练数据的几种方法
- 如何在训练过程中跟踪模型的性能
- 如何评估我们的模型训练后的性能。
In [86]:
# - 读取数据并进行适当的转换(与之前的教程几乎相同)。
import matplotlib.pyplot as plt
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
# transforms
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))])
# datasets
trainset = torchvision.datasets.FashionMNIST('./data',
download=True,
train=True,
transform=transform)
testset = torchvision.datasets.FashionMNIST('./data',
download=True,
train=False,
transform=transform)
# dataloaders
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
shuffle=False, num_workers=2)
In [87]:
# constant for classes
classes = ('T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle Boot')
# helper function to show an image
# (used in the `plot_classes_preds` function below)
def matplotlib_imshow(img, one_channel=False):
if one_channel:
img = img.mean(dim=0)
img = img / 2 + 0.5 # unnormalize
npimg = img.numpy()
if one_channel:
plt.imshow(npimg, cmap="Greys")
else:
plt.imshow(np.transpose(npimg, (1, 2, 0)))
In [88]:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 4 * 4, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 4 * 4)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = Net()
In [89]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
TensorBoard 设置¶
- 现在我们将设置 TensorBoard,导入tensorboard并torch.utils定义一个 SummaryWriter,这是我们将信息写入 TensorBoard 的关键对象。
In [90]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
from torch.utils.tensorboard import SummaryWriter
# default `log_dir` is "runs" - we'll be more specific here
writer = SummaryWriter('runs/fashion_mnist_experiment_1')
写入 TensorBoard¶
- 现在让我们使用make_grid将图像写入我们的 TensorBoard - 具体来说,是一个网格。
In [91]:
# get some random training images
dataiter = iter(trainloader)
images, labels = next(dataiter)
# create grid of images
img_grid = torchvision.utils.make_grid(images)
# show images
matplotlib_imshow(img_grid, one_channel=True)
# write to tensorboard
writer.add_image('four_fashion_mnist_images', img_grid)
- 使用 TensorBoard 的魔法命令 %tensorboard 来启动 TensorBoard,并查看训练过程中的可视化。
- 加载 tensorboard 扩展 %load_ext tensorboard
- 启动Tensorboard服务器 %tensorboard --logdir runs --port=6006
from IPython.display import IFrame
IFrame(src='http://localhost:6006/', width=1000, height=600)
3. 使用 TensorBoard 检查模型¶
- TensorBoard 的一大优势就是能够可视化复杂的模型结构。让我们将我们构建的模型可视化。
In [94]:
# input_to_model用于执行一次前向传播,生成计算图
writer.add_graph(model = net, input_to_model = images)
writer.close()
# 现在刷新 TensorBoard 后,你应该会看到一个如下所示的“Graphs”选项卡:
IFrame(src='http://localhost:6006/', width=1000, height=600)
向 TensorBoard 添加“投影仪”¶
- 我们可以通过add_embedding方法可视化高维数据的低维表示
In [96]:
# helper function
def select_n_random(data, labels, n=100):
'''
Selects n random datapoints and their corresponding labels from a dataset
'''
assert len(data) == len(labels)
# 用于生成一个包含从 0 到 n-1 的随机排列的整数张量
perm = torch.randperm(len(data))
return data[perm][:n], labels[perm][:n]
# select random images and their target indices
images, labels = select_n_random(trainset.data, trainset.targets)
# get the class labels for each image
class_labels = [classes[lab] for lab in labels]
# log embeddings
features = images.view(-1, 28 * 28)
writer.add_embedding(features,
metadata=class_labels,
label_img=images.unsqueeze(1)) # 在指定位置增加一个新的维度,使得张量的形状发生变化。
writer.close()
IFrame(src='http://localhost:6006/', width=1000, height=600)
5. 使用 TensorBoard 跟踪模型训练¶
- 在上一个示例中,我们只是每 2000 次迭代打印一次模型的运行损失。现在,我们将运行损失记录到 TensorBoard,同时查看模型通过该plot_classes_preds函数做出的预测。
- 让我们使用前面教程中的相同模型训练代码来训练模型,但每 1000 个批次将结果写入 TensorBoard 而不是打印到控制台;这是使用 add_scalar 函数完成的。
- 此外,在训练时,我们将生成一张图像,显示模型的预测与该批次中包含的四张图像的实际结果。
In [98]:
# helper functions
def images_to_probs(net, images):
'''
Generates predictions and corresponding probabilities from a trained
network and a list of images
'''
output = net(images)
# convert output probabilities to predicted class
_, preds_tensor = torch.max(output, 1)
preds = np.squeeze(preds_tensor.numpy())
return preds, [F.softmax(el, dim=0)[i].item() for i, el in zip(preds, output)]
In [99]:
def plot_classes_preds(net, images, labels):
'''
Generates matplotlib Figure using a trained network, along with images
and labels from a batch, that shows the network's top prediction along
with its probability, alongside the actual label, coloring this
information based on whether the prediction was correct or not.
Uses the "images_to_probs" function.
'''
preds, probs = images_to_probs(net, images)
# plot the images in the batch, along with predicted and true labels
fig = plt.figure(figsize=(12, 48))
for idx in np.arange(4):
ax = fig.add_subplot(1, 4, idx+1, xticks=[], yticks=[])
matplotlib_imshow(images[idx], one_channel=True)
ax.set_title("{0}, {1:.1f}%\n(label: {2})".format(
classes[preds[idx]],
probs[idx] * 100.0,
classes[labels[idx]]),
color=("green" if preds[idx]==labels[idx].item() else "red"))
return fig
In [100]:
running_loss = 0.0
for epoch in range(1): # loop over the dataset multiple times
for i, data in enumerate(trainloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if i % 1000 == 999: # every 1000 mini-batches...
# ...log the running loss
writer.add_scalar('training loss',
running_loss / 1000,
epoch * len(trainloader) + i)
# ...log a Matplotlib Figure showing the model's predictions on a
# random mini-batch
writer.add_figure('predictions vs. actuals',
plot_classes_preds(net, inputs, labels),
global_step=epoch * len(trainloader) + i)
running_loss = 0.0
print('Finished Training')
Finished Training
- 可以查看标量选项卡,查看在 15,000 次训练迭代中绘制的运行损失:
IFrame(src='http://localhost:6006/', width=1000, height=600)
- 此外,我们可以查看模型在学习过程中对任意批次做出的预测
IFrame(src='http://localhost:6006/', width=1000, height=600)
6. 使用 TensorBoard 评估训练好的模型¶
- 看到一个“PR 曲线”选项卡,其中包含每个类别的精确度-召回率曲线。
In [103]:
# 1. gets the probability predictions in a test_size x num_classes Tensor
# 2. gets the preds in a test_size Tensor
# takes ~10 seconds to run
class_probs = []
class_label = []
with torch.no_grad():
for data in testloader:
images, labels = data
output = net(images)
class_probs_batch = [F.softmax(el, dim=0) for el in output]
class_probs.append(class_probs_batch)
class_label.append(labels)
test_probs = torch.cat([torch.stack(batch) for batch in class_probs])
test_label = torch.cat(class_label)
In [104]:
# helper function
def add_pr_curve_tensorboard(class_index, test_probs, test_label, global_step=0):
'''
Takes in a "class_index" from 0 to 9 and plots the corresponding
precision-recall curve
'''
tensorboard_truth = test_label == class_index
tensorboard_probs = test_probs[:, class_index]
writer.add_pr_curve(classes[class_index],
tensorboard_truth,
tensorboard_probs,
global_step=global_step)
writer.close()
# plot all the pr curves
for i in range(len(classes)):
add_pr_curve_tensorboard(i, test_probs, test_label)
IFrame(src='http://localhost:6006/', width=1000, height=600)