(一)选题背景
(I)选题背景
蔬菜在我国人均消费量中占比最高,目前蔬菜大多采用人工识别的方式,而蔬菜种类多,目前人工识别的方式效率比较低,人工成本高。为了提高效率,降低人工成本,利用机器学习卷积神经网络来进行对蔬菜的识别。希望通过这次课程设计能对机器学习有更好的了解。
蔬菜在我国人均消费中占比最高。目前,蔬菜的识别多为人工识别,但蔬菜种类繁多。目前人工识别效率较低,人工成本较高。为了提高效率,降低人工成本,采用机器学习卷积神经网络对蔬菜进行识别。希望通过本课程设计能对机器学习有更好的了解。
(二)机器学习案例设计方案
(II)机器学习案例设计方案
下载数据集,整理和处理好数据集,利用keras建立训练模型,对图片进行识别。
下载数据集,对数据集进行排序和处理,利用keras建立训练模型,对图像进行识别。
参考来源:爱数科:http://idatascience.cn/,kaggle网站,猫狗大战,森林火灾图片识别。
资料来源:idascience.cn/,卡格尔。
数据集来源:kaggle,网址:https://www.kaggle.com/
数据集来源:卡格尔,https://www.kaggle.com/
(三)机器学习的实现步骤
(三)机器学习的实施步骤
1.从kaggle网站中下载数据集
1.从kaggle网站下载数据集
2.导入需要的包
2.导入所需的包
#导入包
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf
from tensorflow import keras
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras import layers
from PIL import Image
import random
from tensorflow.keras.preprocessing import image
import shutil
import glob
#设置参数字体
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus']=False
3.获取路径设置图片规格
3.获取路径集图像规范
currentPath = 'C://Users//23874//Desktop//pyhon机器学习'#获取当前代码路径
allPicPath = os.path.join(currentPath,'Vegetable Images')#获取数据集路径
IMG_HEIGHT = 128#图片高度
IMG_WIDTH = 128#图片宽度
BATCH_SIZE = 128#每个批次的大小
SIZE_TUPLE = (IMG_HEIGHT, IMG_WIDTH)
4.加载数据集
4.加载数据集
trainPath = os.path.join(currentPath,'Vegetable Images','train')
testPath = os.path.join(currentPath,'Vegetable Images','test')
valPath = os.path.join(currentPath,'Vegetable Images','validation')
#从文件夹中加载训练集
trainDataSet = tf.keras.preprocessing.image_dataset_from_directory(
trainPath,
image_size=SIZE_TUPLE,
batch_size=BATCH_SIZE
)
#从文件夹中加载测试集
testDataSet = tf.keras.preprocessing.image_dataset_from_directory(
testPath,
image_size=SIZE_TUPLE,
batch_size=BATCH_SIZE
)
#从文件夹中加载验证集
valDataSet = tf.keras.preprocessing.image_dataset_from_directory(
valPath,
image_size=SIZE_TUPLE,
batch_size=BATCH_SIZE
)
获取到数据集数量
采集的数据集数量
5.查看图像及对应的标签
5.查看图像和相应标签
batchNum = random.randint(1,BATCH_SIZE+1)#随机挑选一个图片画出来
plt.figure(figsize=(10,8),dpi=100)
i = 0
for img,label in trainDataSet.take(batchNum):#迭代数据
if i < 6:
# print(img[0].shape)#打印数据维度
plt.subplot(3,3,i+1)
# plt.figure(figsize=(10,8),dpi=300)#设置画布
showNum = img[0]#取出数据
showNum = showNum.numpy()#数据转numpy
showNum = showNum.astype("uint8")#数据编码
plt.imshow(showNum)#显示数据
# plt.show()
i += 1
else:
break
6.获取分类名称,查看数据类别
6.获取分类名称并查看数据类别
classNames = trainDataSet.class_names#分类名称
print(classNames)#打印分类名称
7.模型参数设置
7.模型参数设置
设置训练50次
设置50项训练
outPutNum = len(classNames)
padding = 'same'
activation = 'relu'
kernel_initializer = "he_normal"
epochs = 50
batch_size = BATCH_SIZE
loss="sparse_categorical_crossentropy"
optimizer = "Adam"
metrics="accuracy"
#======================================================================
def plotHistory(history):#绘制loss与accury图
epoch_range = range(1, epochs + 1)#设置eoochs
fig = plt.figure(figsize=(10, 5))#设置画布
ax1 = fig.add_subplot(121)
trainLossValue = history.history['loss']#取出训练集loss数值
valLossValue = history.history['val_loss']#取出验证集lss数值
plt.plot(epoch_range, trainLossValue, label='loss')
plt.plot(epoch_range, valLossValue, label='val_loss')
ax1.set_ylabel('loss')
ax1.set_xlabel('epochs')
ax1.set_title('loss')
plt.legend()
ax1 = fig.add_subplot(122)
trainAccValue = history.history['accuracy']#同上
testAccValue = history.history['val_accuracy']
plt.plot(epoch_range,trainAccValue , label='accuracy')
plt.plot(epoch_range, testAccValue, label='val_accuracy')
ax1.set_ylabel('accuracy')
ax1.set_xlabel('epochs')
ax1.set_title('accuray')
plt.legend()
plt.show()
8.对模型进行训练
添加卷积层
8.训练模型
model = keras.models.Sequential()#建立模型容器
model.add(keras.layers.experimental.preprocessing.Rescaling(1/255,
input_shape=(IMG_HEIGHT,IMG_WIDTH,3)))#添加输入层
model.add(Conv2D(filters=2,
kernel_size=(2,2),
padding=padding,
activation=activation,
kernel_initializer=kernel_initializer))#添加卷积层
model.add(MaxPooling2D(pool_size=(2,2)))#添加池化层
model.add(Conv2D(filters=4,
kernel_size=(2,2),
padding=padding,
activation=activation,
kernel_initializer=kernel_initializer))#同上
model.add(MaxPooling2D(pool_size=(2,2)))#添加池化层
model.add(Conv2D(filters=8,
kernel_size=(2,2),
padding=padding,
activation=activation,
kernel_initializer=kernel_initializer))#同上
model.add(MaxPooling2D(pool_size=(2,2)))#添加池化层
model.add(Conv2D(filters=16,
kernel_size=(2,2),
padding=padding,
activation=activation,
kernel_initializer=kernel_initializer))#同上
model.add(MaxPooling2D(pool_size=(2,2)))#同上
model.add(Dropout(0.20))#随机丢弃神经元防止过拟合
model.add(Conv2D(filters=32,
kernel_size=(2,2),
padding=padding,
activation=activation,
kernel_initializer=kernel_initializer))#同上
model.add(MaxPooling2D(pool_size=(2,2)))#同上
model.add(Dropout(0.25))#同上
model.add(Flatten())#展开
model.add(Dense(1024,
activation=activation,
kernel_initializer=kernel_initializer))#全连接层
model.add(Dense(1024,
activation=activation,
kernel_initializer=kernel_initializer))#全连接层
model.add(Dense(512,
activation=activation,
kernel_initializer=kernel_initializer))#全连接层
model.add(Dense(512,
activation=activation,
kernel_initializer=kernel_initializer))#全连接层
model.add(Dense(256,
activation=activation,
kernel_initializer=kernel_initializer))#全连接层
model.add(Dense(256,
activation=activation,
kernel_initializer=kernel_initializer))#全连接层
model.add(Dense(128,
activation=activation,
kernel_initializer=kernel_initializer))#全连接层
model.add(Dense(128,
activation=activation,
kernel_initializer=kernel_initializer))#全连接层
model.add(Dense(64,activation))#全连接层
model.add(Dense(outPutNum,"softmax"))#输出层
#编译模型
model.compile(loss=loss,
optimizer = optimizer,
metrics=[metrics])
查看特征图的变化
model.summary()#查看特征图的变化
根据训练的结果,绘制训练和验证准确性图,训练和验证损失图。
#开始拟合
history = model.fit(trainDataSet,validation_data=valDataSet,epochs=epochs, batch_size=batch_size, verbose=1)
model.save(os.path.join(currentPath,'蔬菜识别.h5'))
plotHistory(history)
从测试机读取样本
img_path = r"C:\Users\23874\Desktop\pyhon机器学习\test\15.jpg"
from keras.preprocessing import image
import numpy as np
img = image.load_img(img_path, target_size=(128,128))
img_tensor = image.img_to_array(img)
img_tensor = np.expand_dims(img_tensor, axis=0)
img_tensor /= 255.
print(img_tensor.shape)
#显示样本
import matplotlib.pyplot as plt
plt.imshow(img_tensor[0])
plt.show()
显示第一层激活输出特的第一个滤波器的特征图
from keras import models
layer_outputs = [layer.output for layer in model.layers[:8]]
activation_model = models.Model(inputs=model.input, outputs=layer_outputs)
#获得改样本的特征图
activations = activation_model.predict(img_tensor)
#显示第一层激活输出特的第一个滤波器的特征图
import matplotlib.pyplot as plt
first_layer_activation = activations[0]
plt.matshow(first_layer_activation[0,:,:,1], cmap="viridis")
9.训练结束进行测试
我们可以对结果进行测试,可以随机测试几张蔬菜的图片,也可以制定需要测试的蔬菜图片
下面是随机测试4张蔬菜的图片
def sample(num):#定义随机测试函数
model = keras.models.load_model(os.path.join(currentPath,'蔬菜识别.h5'))
t = 0
for img,label in testDataSet:#随机迭代某张图片
if t < num:
basePre = tf.expand_dims(img[0], 0)#扩展数据维度
preArray = model.predict(basePre)#预测数
preArray = preArray[0]#取出数据
preArray = list(preArray)#预测概率
theLabel = max(preArray)#预测结果
site = preArray.index(theLabel)#获取答案
plt.figure(figsize=(3,3),dpi=150)#画出来图
thePre = img[0]
thePre = thePre.numpy()
thePre = thePre.astype("uint8")
plt.imshow(thePre)
plt.xlabel('这个蔬菜为:{},蔬菜识别结果为:{}'.format(classNames[label[0].numpy()],classNames[site]),fontsize=14)
plt.show()
t += 1
else:
break
sample(4)
测试的结果为
可以看出识别的结果比较准确
最后是指定图片测试,从百度上选几张蔬菜图片进行测试
最后是指定图片测试,从百度上随便下载两张蔬菜图片进行测试
def valThePic(filePath):#定义指定图片预测
model = keras.models.load_model(os.path.join(currentPath,'蔬菜识别.h5'))
testSave = os.path.join(currentPath,'singleTest')
testSaveAdd = os.path.join(testSave,'test')
if os.path.exists(testSave):#判断放置文件路径是否存在
shutil.rmtree(testSave)#删除
os.makedirs(testSaveAdd)#重建
else:
os.makedirs(testSaveAdd)#创建路径
filePathList = filePath.split('\\')#切分
shutil.copy(filePath, os.path.join(testSaveAdd, filePathList[-1]))#选文件名
# print(testSave)
testImg = tf.keras.preprocessing.image_dataset_from_directory(
testSave,
image_size=(IMG_HEIGHT, IMG_WIDTH),
# batch_size=1
)#从文件夹中加载训练集
for img,label in testImg:
preArray = model.predict(tf.expand_dims(img[0], 0))[0]#预测结果
preArray = list(preArray)#预测概率
theLabel = max(preArray)#预测结果
site = preArray.index(theLabel)#获取答案
plt.figure(figsize=(3,3),dpi=150)#画出来图
plt.imshow(img[0].numpy().astype("uint8"))
plt.xlabel('蔬菜识别结果为:{}'.format(classNames[site]),fontsize=14)
plt.show()
thePath = r"C:\Users\23874\Desktop\pyhon机器学习\test\15.jpg"#选择图片路径
valThePic(thePath)
蔬菜识别正确
(四)总结
本次课程设计是蔬菜识别,在本实验中,通过多次训练,验证了准确度逐渐增加,发现问题模型训练70次之后精确度会降低。从最后指定蔬菜图片识别的结果来看,准确度比较高,但还有的蔬菜识别不准确,还需要继续改进,在这次课程设计的实现过程中,发现了自己的很多不足,还要多多学习,在实操中也发现许多问题,通过在许多错误中不断改正,积累了经验,同时也明白了实践的重要性。让我对python有了更深入的了解。掌握得还不够好,还是要继续努力学习。
(五)全部代码
1 #导入包
2 import tensorflow as tf
3 import matplotlib.pyplot as plt
4 import numpy as np
5 import os
6 import tensorflow as tf
7 from tensorflow import keras
8 from keras.layers import Dropout
9 from keras.layers import Flatten
10 from keras.layers import Conv2D
11 from keras.layers import MaxPooling2D
12 from keras.layers import Dense
13 from keras import layers
14 from PIL import Image
15 import random
16 from tensorflow.keras.preprocessing import image
17 import shutil
18 import glob
19
20 #设置字体
21 plt.rcParams['font.sans-serif']=['SimHei']
22 plt.rcParams['axes.unicode_minus']=False
23
24 currentPath = 'C://Users//23874//Desktop//pyhon机器学习'#获取当前代码路径
25 allPicPath = os.path.join(currentPath,'Vegetable Images')#获取数据集路径
26
27 IMG_HEIGHT = 128#图片高度
28 IMG_WIDTH = 128#图片宽度
29 BATCH_SIZE = 128#每个批次的大小
30 SIZE_TUPLE = (IMG_HEIGHT, IMG_WIDTH)
31 trainPath = os.path.join(currentPath,'Vegetable Images','train')
32 testPath = os.path.join(currentPath,'Vegetable Images','test')
33 valPath = os.path.join(currentPath,'Vegetable Images','validation')
34
35 #从文件夹中加载训练集
36 trainDataSet = tf.keras.preprocessing.image_dataset_from_directory(
37 trainPath,
38 image_size=SIZE_TUPLE,
39 batch_size=BATCH_SIZE
40 )
41
42 #从文件夹中加载测试集
43 testDataSet = tf.keras.preprocessing.image_dataset_from_directory(
44 testPath,
45 image_size=SIZE_TUPLE,
46 batch_size=BATCH_SIZE
47 )
48 #从文件夹中加载验证集
49 valDataSet = tf.keras.preprocessing.image_dataset_from_directory(
50 valPath,
51 image_size=SIZE_TUPLE,
52 batch_size=BATCH_SIZE
53 )
54 batchNum = random.randint(1,BATCH_SIZE+1)#随机挑选一个图片画出来
55 plt.figure(figsize=(10,8),dpi=100)
56 i = 0
57 for img,label in trainDataSet.take(batchNum):#迭代数据
58 if i < 6:
59 # print(img[0].shape)#打印数据维度
60 plt.subplot(3,3,i+1)
61 # plt.figure(figsize=(10,8),dpi=300)#设置画布
62 showNum = img[0]#取出数据
63 showNum = showNum.numpy()#数据转numpy
64 showNum = showNum.astype("uint8")#数据编码
65 plt.imshow(showNum)#显示数据
66 # plt.show()
67 i += 1
68 else:
69 break
70 classNames = trainDataSet.class_names#分类名称
71 print(classNames)#打印分类名称
72 #超参数设置
73 outPutNum = len(classNames)
74 padding = 'same'
75 activation = 'relu'
76 kernel_initializer = "he_normal"
77 epochs = 50
78 batch_size = BATCH_SIZE
79 loss="sparse_categorical_crossentropy"
80 optimizer = "Adam"
81 metrics="accuracy"
82 #======================================================================
83 def plotHistory(history):#绘制loss与accury图
84 epoch_range = range(1, epochs + 1)#设置eoochs
85
86 fig = plt.figure(figsize=(10, 5))#设置画布
87 ax1 = fig.add_subplot(121)
88 trainLossValue = history.history['loss']#取出训练集loss数值
89 valLossValue = history.history['val_loss']#取出验证集lss数值
90 plt.plot(epoch_range, trainLossValue, label='loss')
91 plt.plot(epoch_range, valLossValue, label='val_loss')
92 ax1.set_ylabel('loss')
93 ax1.set_xlabel('epochs')
94 ax1.set_title('loss')
95 plt.legend()
96
97 ax1 = fig.add_subplot(122)
98 trainAccValue = history.history['accuracy']#同上
99 testAccValue = history.history['val_accuracy']
100 plt.plot(epoch_range,trainAccValue , label='accuracy')
101 plt.plot(epoch_range, testAccValue, label='val_accuracy')
102 ax1.set_ylabel('accuracy')
103 ax1.set_xlabel('epochs')
104 ax1.set_title('accuray')
105 plt.legend()
106 plt.show()
107 # =============================================================================
108 model = keras.models.Sequential()#建立模型容器
109
110 model.add(keras.layers.experimental.preprocessing.Rescaling(1/255, #数据归一化处理
111 input_shape=(IMG_HEIGHT,IMG_WIDTH,3)))#添加输入层
112
113 model.add(Conv2D(filters=2, # Conv2D层 2个过滤器
114 kernel_size=(2,2),
115 padding=padding,
116 activation=activation,
117 kernel_initializer=kernel_initializer))#添加卷积层
118 model.add(MaxPooling2D(pool_size=(2,2)))#添加池化层
119
120 model.add(Conv2D(filters=4,
121 kernel_size=(2,2),
122 padding=padding,
123 activation=activation,
124 kernel_initializer=kernel_initializer))#同上
125 model.add(MaxPooling2D(pool_size=(2,2)))#添加池化层
126
127 model.add(Conv2D(filters=8,
128 kernel_size=(2,2),
129 padding=padding,
130 activation=activation,
131 kernel_initializer=kernel_initializer))#同上
132 model.add(MaxPooling2D(pool_size=(2,2)))#添加池化层
133
134 model.add(Conv2D(filters=16,
135 kernel_size=(2,2),
136 padding=padding,
137 activation=activation,
138 kernel_initializer=kernel_initializer))#同上
139 model.add(MaxPooling2D(pool_size=(2,2)))#同上
140
141 model.add(Dropout(0.20))#随机丢弃神经元防止过拟合 减少过拟合
142
143 model.add(Conv2D(filters=32,
144 kernel_size=(2,2),
145 padding=padding,
146 activation=activation,
147 kernel_initializer=kernel_initializer))#同上
148 model.add(MaxPooling2D(pool_size=(2,2)))#同上
149
150 model.add(Dropout(0.25))#同上
151
152 model.add(Flatten())#展开
153 model.add(Dense(1024,
154 activation=activation,
155 kernel_initializer=kernel_initializer))#全连接层
156
157 model.add(Dense(1024,
158 activation=activation,
159 kernel_initializer=kernel_initializer))#全连接层
160
161 model.add(Dense(512,
162 activation=activation,
163 kernel_initializer=kernel_initializer))#全连接层
164
165 model.add(Dense(512,
166 activation=activation,
167 kernel_initializer=kernel_initializer))#全连接层
168
169 model.add(Dense(256,
170 activation=activation,
171 kernel_initializer=kernel_initializer))#全连接层
172
173 model.add(Dense(256,
174 activation=activation,
175 kernel_initializer=kernel_initializer))#全连接层
176
177 model.add(Dense(128,
178 activation=activation,
179 kernel_initializer=kernel_initializer))#全连接层
180
181 model.add(Dense(128,
182 activation=activation,
183 kernel_initializer=kernel_initializer))#全连接层
184
185 model.add(Dense(64,activation))#全连接层
186 model.add(Dense(outPutNum,"softmax"))#输出层
187
188 #编译模型
189 model.compile(loss=loss,
190 optimizer = optimizer,
191 metrics=[metrics])
192
193 #开始拟合
194 history = model.fit(trainDataSet,validation_data=valDataSet,epochs=epochs, batch_size=batch_size, verbose=1)
195
196 model.save(os.path.join(currentPath,'蔬菜识别.h5'))
197 plotHistory(history)
198 # from keras.models import load_model
199 # from keras import layers
200 # from keras import models
201 # model=load_model('C://Users//23874//Desktop//pyhon机器学习//蔬菜识别.h5')
202 model.summary()#查看特征图的变化
203 #从测试集中读取一条样本
204 img_path = r"C:\Users\23874\Desktop\pyhon机器学习\test\15.jpg"
205
206 from keras.preprocessing import image
207 import numpy as np
208
209 img = image.load_img(img_path, target_size=(128,128))
210 img_tensor = image.img_to_array(img)
211 img_tensor = np.expand_dims(img_tensor, axis=0)
212 img_tensor /= 255.
213 print(img_tensor.shape)
214 #显示样本
215 import matplotlib.pyplot as plt
216 plt.imshow(img_tensor[0])
217 plt.show()
218 #建立模型,输入为原图像,输出为原模型的前8层的激活输出的特征图
219 from keras import models
220
221 layer_outputs = [layer.output for layer in model.layers[:8]]
222 activation_model = models.Model(inputs=model.input, outputs=layer_outputs)
223 #获得改样本的特征图
224 activations = activation_model.predict(img_tensor)
225 #显示第一层激活输出特的第一个滤波器的特征图
226 import matplotlib.pyplot as plt
227 first_layer_activation = activations[0]
228 plt.matshow(first_layer_activation[0,:,:,1], cmap="viridis")
229 def sample(num):#定义随机测试函数
230 model = keras.models.load_model(os.path.join(currentPath,'蔬菜识别.h5'))
231 t = 0
232 for img,label in testDataSet:#随机迭代某张图片
233 if t < num:
234 basePre = tf.expand_dims(img[0], 0)#扩展数据维度
235 preArray = model.predict(basePre)#预测数
236 preArray = preArray[0]#取出数据
237 preArray = list(preArray)#预测概率
238 theLabel = max(preArray)#预测结果
239
240 site = preArray.index(theLabel)#获取答案
241 plt.figure(figsize=(3,3),dpi=150)#画出来图
242
243 thePre = img[0]
244 thePre = thePre.numpy()
245 thePre = thePre.astype("uint8")
246 plt.imshow(thePre)
247 plt.xlabel('这个蔬菜为:{},蔬菜识别结果为:{}'.format(classNames[label[0].numpy()],classNames[site]),fontsize=14)
248
249 plt.show()
250
251 t += 1
252 else:
253 break
254 sample(5)
255 #传入图片指定路径进行预测
256 def valThePic(filePath):#定义指定图片预测
257 model = keras.models.load_model(os.path.join(currentPath,'蔬菜识别.h5'))
258 testSave = os.path.join(currentPath,'singleTest')
259 testSaveAdd = os.path.join(testSave,'test')
260
261 if os.path.exists(testSave):#判断放置文件路径是否存在
262 shutil.rmtree(testSave)#删除
263 os.makedirs(testSaveAdd)#重建
264 else:
265 os.makedirs(testSaveAdd)#创建路径
266
267 filePathList = filePath.split('\\')#切分
268 shutil.copy(filePath, os.path.join(testSaveAdd, filePathList[-1]))#选文件名
269 # print(testSave)
270 testImg = tf.keras.preprocessing.image_dataset_from_directory(
271 testSave,
272 image_size=(IMG_HEIGHT, IMG_WIDTH),
273 # batch_size=1
274 )#从文件夹中加载训练集
275 for img,label in testImg:
276 preArray = model.predict(tf.expand_dims(img[0], 0))[0]#预测结果
277 preArray = list(preArray)#预测概率
278 theLabel = max(preArray)#预测结果
279
280 site = preArray.index(theLabel)#获取答案
281 plt.figure(figsize=(3,3),dpi=150)#画出来图
282 plt.imshow(img[0].numpy().astype("uint8"))
283
284 plt.xlabel('蔬菜识别结果为:{}'.format(classNames[site]),fontsize=14)
285
286
287 plt.show()
288
289 thePath = r"C:\Users\23874\Desktop\pyhon机器学习\test\9.png"#选择图片路径
290
291 valThePic(thePath)