from IPython import display
from matplotlib import pyplot as plt
from mxnet import autograd, nd
import random 
%matplotlib inline

%matplotlib inline
使用此方法时,绘制命令的输出将在前端显示,就像Jupyter笔记本一样,直接显示在生成命令的代码单元格的下方,生成的绘图也将存储在笔记本文档中。不过这个方法好像只适用于Jupyter notebook和Jupyter QtConsole。

1.生成数据集

num_inputs = 2 
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
features = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=0.01, shape=labels.shape)
features[0],labels[0]
(
 [1.1630785 0.4838046]
 <NDArray 2 @cpu(0)>, 
 [4.879625]
 <NDArray 1 @cpu(0)>)
def use_svg_display():
    # 用矢量图显示
    display.set_matplotlib_formats('svg')
    
def set_figsize(figsize=(3.5, 2.5)):
    use_svg_display()
    # 设置图的尺寸
    plt.rcParams['figure.figsize'] = figsize

set_figsize()
plt.scatter(features[:, 1].asnumpy(), labels.asnumpy(),1);  # 加分号只显示图

线性回归的从零开始实现_其他

2.读取数据集

# 本函数已经保存在d2lzh包中方便以后使用
def data_iter(batch_size, features, labels):
    """
    函数的功能:
    每次返回batch_size(批量大小)个随机样本的特征和标签
    """
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices)  # 样本的读取顺序是随机的
    for i in range(0,num_examples,batch_size):
        j = nd.array(indices[i:min(i+batch_size,num_examples)])
        # print(j)
        yield features.take(j),labels.take(j)  # take函数根据索引返回对应元素
batch_size=10
for X,y in data_iter(batch_size,features,labels):
    print(X,y)
    break
[[ 0.8324502   0.40138802]
 [-0.52868146 -0.29051137]
 [ 0.42972964 -0.8840267 ]
 [ 1.2927808  -0.5197495 ]
 [ 1.9283639   1.104401  ]
 [-0.59701055 -0.75448406]
 [-0.3610099   0.18343352]
 [ 0.69407785 -1.6696625 ]
 [-2.030718   -0.17273481]
 [-0.5400787   0.4005976 ]]
<NDArray 10x2 @cpu(0)> 
[ 4.5144353  4.133816   8.061061   8.550262   4.299974   5.558471
  2.8409128 11.26492    0.7255566  1.7483729]
<NDArray 10 @cpu(0)>

3.初始化模型参数

w = nd.random.normal(scale=0.01,shape=(num_inputs,1))
b = nd.zeros(shape=(1,))
# 调用attach_grad函数来申请存储梯度所需要的内存
w.attach_grad()
b.attach_grad()

4.定义模型

# 本函数已经保存在d2lzh包中方便以后使用
def linreg(X,w,b):
    return nd.dot(X,w)+b

5.定义损失函数

# 本函数已经保存在d2lzh包中方便以后使用
def squared_loss(y_hat,y):
    return (y_hat - y.reshape(y_hat.shape))**2/2

6.定义优化算法

def sgd(params,lr,batch_size):
    for param in params:
        param[:] = param - lr * param.grad/batch_size

7.训练模型

lr = 0.03  # 学习率
num_epochs = 3  # 迭代次数
net = linreg
loss = squared_loss
for epoch in range(num_epochs):  
    # 在每一个迭代周期中,会使用训练数据集中所有样本一次(假设样本能够被批量大小整除),
    # X和y分别是小批量样本的特征和标签
    for X,y in data_iter(batch_size,features,labels):
        with autograd.record():  # record函数要求MXnet记录与梯度有关的计算
            l = loss(net(X,w,b),y)  # l是有关小批量X和y的损失
        l.backward()  # 小批量的损失对模型参数求梯度
        sgd([w,b],lr,batch_size)  # 使用小批量随机梯度下降迭代模型参数
    train_l = loss(net(features,w,b),labels)
    print('epoch%d, loss%f' % (epoch+1,train_l.mean().asnumpy()))


epoch1, loss0.035120
epoch2, loss0.000121
epoch3, loss0.000049
true_w,w
([2, -3.4], 
 [[ 1.9993292]
  [-3.3993492]]
 <NDArray 2x1 @cpu(0)>)
true_b,b
(4.2, 
 [4.1999245]
 <NDArray 1 @cpu(0)>)