# 人工智能机器学习鸢尾花卉分类
#样本数据集预处理
def make_data_set(file_name): #文件名为字符串
input_set_list = []
input_file = open(file_name) #打开文件并返回input_file
for line_str in input_file:
line_str = line_str.strip() #去掉行尾的换行符\n
a1,a2,a3,a4,iris_type = line_str.split(',')
iris_tuple = float(a1),float(a2),float(a3),float(a4),iris_type
input_set_list.append(iris_tuple)
return input_set_list
# print('读取训练数据……')
# trainin_set_list =make_data_set('iris_train_data.txt')
# print(trainin_set_list)
# #
#定义求和函数
def sum_lists(list1,list2):
sums_list = []
for index in range(4):
sums_list.append(list1[index]+list2[index])
return sums_list
# l1=[1,2,3]
# l2=[4,5,6]
# s=sum_lists(l1,l2)
# print(s)
#定义求平均函数
def make_averages(sums_list,total_int): #通过除以样本总数计算出每个属性的平均值."""
averages_list = []
for value_int in sums_list:
averages_list.append(value_int/total_int)
return averages_list
# a2 = (4,5,6,7)
# av = make_averages(a2,68)
# print(av)
#
# 利用训练样本数据构建分类器
def train_classifier(training_set_list):
classifier_list = []
virginica_sums_list=[0]*4 # 弗吉尼亚鸢尾样本属性求和列表
virginica_count=0 # 弗吉尼亚鸢尾样本数
versicolor_sums_list=[0]*4 # 变色鸢尾样本求和列表
versicolor_count=0 # 变色鸢尾样本数
setosa_sums_list = [0] * 4 # 山鸢尾样本求和列表
setosa_count = 0 # 山鸢尾样本数
for iris_tuple in training_set_list:
if iris_tuple[4]=='Iris-virginica': # 若为弗吉尼亚鸢尾
virginica_sums_list=sum_lists(virginica_sums_list,iris_tuple[:4]) # 将属性值加到弗吉尼亚鸢尾求和列表
virginica_count += 1
elif iris_tuple[4]=='Iris-versicolor': # 若为山鸢尾
versicolor_sums_list = sum_lists(versicolor_sums_list, iris_tuple[:4]) # 将属性值加到变色鸢尾求和列表
versicolor_count += 1
else:
setosa_sums_list=sum_lists(setosa_sums_list,iris_tuple[:4]) #将属性值加到变色鸢尾求和列表
setosa_count += 1
# 求弗吉尼亚鸢尾、变色鸢尾样本的属性平均值
virginica_averages_list=make_averages(virginica_sums_list,virginica_count)
versicolor_averages_list=make_averages(versicolor_sums_list,versicolor_count)
setose_averages_list = make_averages(setosa_sums_list, setosa_count)
#求良性和恶性样本的属性分隔值
classifier_list=[virginica_averages_list,versicolor_averages_list,setose_averages_list]
return classifier_list
#定义测试分类函数
def classify_test_set(test_set_list, classifier_list):
# 已知测试样本、分类器,对测试样本进行分类,并返回列表(样本id,良性的属性个数,恶性的属性个数,诊断结果)
result_list = []
for iris_tuple in test_set_list: #对每个列表中的样本
distance1 = (iris_tuple[0]-classifier_list[0][0])**2+(iris_tuple[1]-classifier_list[0][2])**2+\
(iris_tuple[2]-classifier_list[0][2])**2+(iris_tuple[3]-classifier_list[0][3])**2
distance_virginica =distance1**0.5
distance2 = (iris_tuple[0] - classifier_list[1][0]) ** 2 + (iris_tuple[1] - classifier_list[1][2]) ** 2 + \
(iris_tuple[2] - classifier_list[1][2]) ** 2 + (iris_tuple[3] - classifier_list[1][3]) ** 2
distance_versicolor = distance2 ** 0.5
distance3 = (iris_tuple[0] - classifier_list[2][0]) ** 2 + (iris_tuple[1] - classifier_list[2][2]) ** 2 + \
(iris_tuple[2] - classifier_list[2][2]) ** 2 + (iris_tuple[3] - classifier_list[2][3]) ** 2
distance_setose = distance3 ** 0.5
iris_type = iris_tuple[4]
result_tuple = (distance_virginica, distance_versicolor, distance_setose,iris_type)
result_list.append(result_tuple)
# virginica_count = 0
# versicolor_count = 0
# iris_type = iris_tuple[3]
# for index in range(4):# 对样本的每一属性
# if iris_tuple[index] < classifier_list[index]: #如果样本属性值比分隔值大,则恶性的属性个数增加1,否则良性的属性个数增加1
# versicolor_count += 1
# else:
# virginica_count += 1
# result_tuple = (virginica_count,versicolor_count,iris_type)
# result_list.append(result_tuple)
return result_list
#定义测试结果报告函数
def report_results(result_list):
total_count=0
inaccurate_count = 0
for result_tuple in result_list:
distance_virginica, distance_versicolor, distance_setose,iris_type = result_tuple
total_count += 1
if (distance_virginica < distance_versicolor) and (iris_type == 'Iris-versicolor'):
inaccurate_count += 1 # 出现诊断错误
elif (distance_virginica > distance_versicolor) and (iris_type == 'Iris-virginica'):
inaccurate_count += 1 # 出现诊断错误
print("在 ",total_count," 组鸢尾测试数据中, 共有 ",inaccurate_count," 组数据分类不准确.")
print("数据分类准确率为:", round((total_count-inaccurate_count)*100/total_count,2),'%')
#利用上述子函数完成鸢尾花卉分类
print("读取训练数据...")
training_file = "iris.txt"
training_set_list = make_data_set(training_file) #调用样本数据预处理函数
print("读取训练数据结束.\n")
print("训练分类器...")
classifier_list = train_classifier(training_set_list) #调用分类器函数
print("训练分类器结束.\n")
print("读取测试数据...")
test_file = "iris.txt"
test_set_list = make_data_set(test_file) #调用样本数据预处理函数
print("读取测试数据结束.\n")
print("测试数据分类...")
result_list = classify_test_set(test_set_list, classifier_list) #利用测试样本数据对分类器进行测试
print("分类结束.\n")
report_results(result_list)
print("程序运行完成.")
# input()
# # time.sleep(0.5)
用到的文件
机器学习数据:
链接:https://pan.baidu.com/s/1z5TEtIh_9QTg7CK9MEOmGg
提取码:041d