keras知识点

原创

mb5ffd6eef9281a 2021-03-04 15:01:32 ©著作权

©著作权归作者所有：来自51CTO博客作者mb5ffd6eef9281a的原创作品，请联系作者获取转载授权，否则将追究法律责任

keras知识点_python

keras 中数据预处理

所有的函数都在keras.preprocessing 分别有text ,sequence, image

# 文字预处理
txt = "My name is maoli.maoli don't like coding."

文字预处理

文字拆分
建立索引
padding（序列补齐）
标注

from keras.preprocessing.text import text_to_word_sequence # 文本转化序列
out = text_to_word_sequence(txt) # 默认lower=True,,
print(out)  # 与jieba 功能一样

['my', 'name', 'is', 'maoli', 'maoli', "don't", 'like', 'coding']

out1 = text_to_word_sequence(txt,filters='maoli')# 无视maili字母
print(out1)

['y', 'n', 'e', 's', '.', 'd', "n't", 'ke', 'c', 'd', 'ng.']

# 文字拆分
chn = '我的名字叫毛利。我不喜欢写码'
out2 = text_to_word_sequence(chn)
out3 = text_to_word_sequence(chn,filters='。')
print(out2)
print(out3)

['我的名字叫毛利。我不喜欢写码']
['我的名字叫毛利', '我不喜欢写码']

# 对于中文就没用了，必须使用jieba
import jieba
#cut是生成一个生成器,lcut生成列表，cut_all就是filters='。'
out4 = jieba.lcut(chn,cut_all=False)
out5 = jieba.lcut(chn,cut_all=True)
print(out4)
print(out5)

['我', '的', '名字', '叫', '毛利', '。', '我', '不', '喜欢', '写码']
['我', '的', '名字', '叫', '毛利', '', '', '我', '不', '喜欢', '写', '码']

print(out)

['my', 'name', 'is', 'maoli', 'maoli', "don't", 'like', 'coding']

# 倒过来
out.sort(reverse=True)
print(out)

['name', 'my', 'maoli', 'maoli', 'like', 'is', "don't", 'coding']

import numpy as np
# 建立索引
word_index = dict(list(zip(out,np.arange(len(out)))))

word_index

{'name': 0, 'my': 1, 'maoli': 3, 'like': 4, 'is': 5, "don't": 6, 'coding': 7}

 # pad_sequences序列补充
from keras.preprocessing.sequence import pad_sequences   
x = [[1,2,3],[4,5],[6,7,8,9]]
y0 = pad_sequences(x)
y1 = pad_sequences(x,maxlen=5)

print(y0)
print('-------------'*10)
print(y1)

[[0 1 2 3]
 [0 0 4 5]
 [6 7 8 9]]
----------------------------------------------------------------------------------------------------------------------------------
[[0 0 1 2 3]
 [0 0 0 4 5]
 [0 6 7 8 9]]

# 使用Tokenizer分词
from keras.preprocessing.text import Tokenizer
somestr = ['i am dalao,my name is maoli','maoli is very cool']
tok = Tokenizer()
tok.fit_on_texts(somestr)
tok.word_index

{'is': 1,
 'maoli': 2,
 'i': 3,
 'am': 4,
 'dalao': 5,
 'my': 6,
 'name': 7,
 'very': 8,
 'cool': 9}

# 图像预处理

# ImageDataGenerator 类
from keras.preprocessing.image import ImageDataGenerator
# 下面是官网的cifar10例子
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
y_train = np_utils.to_categorical(y_train, num_classes)
y_test = np_utils.to_categorical(y_test, num_classes)

datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)

# 计算特征归一化所需的数量
# （如果应用 ZCA 白化，将计算标准差，均值，主成分）
datagen.fit(x_train)

# 使用实时数据增益的批数据对模型进行拟合：
model.fit_generator(datagen.flow(x_train, y_train, batch_size=32),
                    steps_per_epoch=len(x_train) / 32, epochs=epochs)

# 这里有一个更 「手动」的例子
for e in range(epochs):
    print('Epoch', e)
    batches = 0
    for x_batch, y_batch in datagen.flow(x_train, y_train, batch_size=32):
        model.fit(x_batch, y_batch)
        batches += 1
        if batches >= len(x_train) / 32:
            # 我们需要手动打破循环，
            # 因为生成器会无限循环
            break

Keras 模型

在 Keras 中有两类主要的模型：Sequential 顺序模型和使用函数式 API 的 Model 类模型。

# 这里主要讲下使用函数式 API
from keras.models import Model
from keras.layers import Input, Dense

a = Input(shape=(32,))
b = Dense(32)(a)
model = Model(inputs=a, outputs=b)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_2 (InputLayer)         (None, 32)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 32)                1056      
=================================================================
Total params: 1,056
Trainable params: 1,056
Non-trainable params: 0
_________________________________________________________________

模型可视化

from keras.utils import plot_model
plot_model(model, to_file='model.png')

# 训练可视化
import matplotlib.pyplot as plt

history = model.fit(x, y, validation_split=0.25, epochs=50, batch_size=16, verbose=1)

# 绘制训练 & 验证的准确率值
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# 绘制训练 & 验证的损失值
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

实战手写字体

import keras
from keras import layers
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from keras.datasets import mnist
(train_image,train_label),(test_image,test_label) = mnist.load_data()
train_image= np.expand_dims(train_image,axis=-1)
test_image= np.expand_dims(test_image,axis=-1)
model = keras.Sequential()
model.add(layers.Conv2D(64,(3,3),activation='relu',input_shape=(28,28,1)))
model.add(layers.Conv2D(64,(3,3),activation='relu'))
model.add(layers.MaxPool2D())
model.add(layers.Flatten())
model.add(layers.Dense(256,activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(10,activation = 'softmax'))
model.compile(optimizer='adam',loss ='sparse_categorical_crossentropy',metrics=['acc'])
model.fit(train_image,train_label,epochs=5,batch_size=512)

Epoch 1/5
60000/60000 [==============================] - 237s 4ms/step - loss: 1.6534 - acc: 0.8529
Epoch 2/5
60000/60000 [==============================] - 237s 4ms/step - loss: 0.0784 - acc: 0.9762
Epoch 3/5
60000/60000 [==============================] - 241s 4ms/step - loss: 0.0521 - acc: 0.9840
Epoch 4/5
60000/60000 [==============================] - 240s 4ms/step - loss: 0.0399 - acc: 0.9879
Epoch 5/5
60000/60000 [==============================] - 236s 4ms/step - loss: 0.0314 - acc: 0.9896
<keras.callbacks.History at 0x165e8383438>