from IPython import display
from matplotlib import pyplot as plt
from mxnet import autograd, nd
import random
%matplotlib inline
%matplotlib inline
使用此方法时,绘制命令的输出将在前端显示,就像Jupyter笔记本一样,直接显示在生成命令的代码单元格的下方,生成的绘图也将存储在笔记本文档中。不过这个方法好像只适用于Jupyter notebook和Jupyter QtConsole。
1.生成数据集
num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
features = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)
features[0],labels[0]
(
[1.1630785 0.4838046]
<NDArray 2 @cpu(0)>,
[4.879625]
<NDArray 1 @cpu(0)>)
def use_svg_display():
# 用矢量图显示
display.set_matplotlib_formats('svg')
def set_figsize(figsize=(3.5, 2.5)):
use_svg_display()
# 设置图的尺寸
plt.rcParams['figure.figsize'] = figsize
set_figsize()
plt.scatter(features[:, 1].asnumpy(), labels.asnumpy(),1); # 加分号只显示图
2.读取数据集
# 本函数已经保存在d2lzh包中方便以后使用
def data_iter(batch_size, features, labels):
"""
函数的功能:
每次返回batch_size(批量大小)个随机样本的特征和标签
"""
num_examples = len(features)
indices = list(range(num_examples))
random.shuffle(indices) # 样本的读取顺序是随机的
for i in range(0,num_examples,batch_size):
j = nd.array(indices[i:min(i+batch_size,num_examples)])
# print(j)
yield features.take(j),labels.take(j) # take函数根据索引返回对应元素
batch_size=10
for X,y in data_iter(batch_size,features,labels):
print(X,y)
break
[[ 0.8324502 0.40138802]
[-0.52868146 -0.29051137]
[ 0.42972964 -0.8840267 ]
[ 1.2927808 -0.5197495 ]
[ 1.9283639 1.104401 ]
[-0.59701055 -0.75448406]
[-0.3610099 0.18343352]
[ 0.69407785 -1.6696625 ]
[-2.030718 -0.17273481]
[-0.5400787 0.4005976 ]]
<NDArray 10x2 @cpu(0)>
[ 4.5144353 4.133816 8.061061 8.550262 4.299974 5.558471
2.8409128 11.26492 0.7255566 1.7483729]
<NDArray 10 @cpu(0)>
3.初始化模型参数
w = nd.random.normal(scale=0.01,shape=(num_inputs,1))
b = nd.zeros(shape=(1,))
# 调用attach_grad函数来申请存储梯度所需要的内存
w.attach_grad()
b.attach_grad()
4.定义模型
# 本函数已经保存在d2lzh包中方便以后使用
def linreg(X,w,b):
return nd.dot(X,w)+b
5.定义损失函数
# 本函数已经保存在d2lzh包中方便以后使用
def squared_loss(y_hat,y):
return (y_hat - y.reshape(y_hat.shape))**2/2
6.定义优化算法
def sgd(params,lr,batch_size):
for param in params:
param[:] = param - lr * param.grad/batch_size
7.训练模型
lr = 0.03 # 学习率
num_epochs = 3 # 迭代次数
net = linreg
loss = squared_loss
for epoch in range(num_epochs):
# 在每一个迭代周期中,会使用训练数据集中所有样本一次(假设样本能够被批量大小整除),
# X和y分别是小批量样本的特征和标签
for X,y in data_iter(batch_size,features,labels):
with autograd.record(): # record函数要求MXnet记录与梯度有关的计算
l = loss(net(X,w,b),y) # l是有关小批量X和y的损失
l.backward() # 小批量的损失对模型参数求梯度
sgd([w,b],lr,batch_size) # 使用小批量随机梯度下降迭代模型参数
train_l = loss(net(features,w,b),labels)
print('epoch%d, loss%f' % (epoch+1,train_l.mean().asnumpy()))
epoch1, loss0.035120
epoch2, loss0.000121
epoch3, loss0.000049
true_w,w
([2, -3.4],
[[ 1.9993292]
[-3.3993492]]
<NDArray 2x1 @cpu(0)>)
true_b,b
(4.2,
[4.1999245]
<NDArray 1 @cpu(0)>)