一、选题的背景
选择此选题是因为飞机在社会、经济和技术领域具有重要地位。通过使用Python机器学习判断飞机类型,能满足对航空知识的需求,提高航空安全性,为航空产业决策提供数据支持,并促进机器学习在航空领域的创新应用。随着航空技术的发展,人们对飞机识别和分类的需求日益增加。准确判断飞机类型对航空公司、航空制造商和航空维修公司等具有重要经济价值,帮助他们做出更明智的决策和规划。此外,飞机类型判断是一个复杂的问题,利用机器学习可以从大量数据中学习模式和规律,具有技术挑战性和创新性。
二、机器学习案例设计方案
1.本选题采用的机器学习案例(训练集与测试集)的来源描述
数据集来源:Kaggle flying-planes | Kaggle
2 采用的机器学习框架描述
从网站下载数据集,对数据集进行整理,使用jupyter notebook进行编写,对数据集中的文件进行划分,利用keras,构建神经网络,训练模型。
三、机器学习的实现步骤
(1)下载数据集

(2)导入所需要的库
# 导入所需要的库 import pandas as pd import numpy as np import tensorflow as tf import matplotlib.pyplot as plt import matplotlib.font_manager as fm from keras.preprocessing.image import ImageDataGenerator from sklearn.model_selection import train_test_split from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense, Dropout from tensorflow.keras.preprocessing.image import ImageDataGenerator from tensorflow.keras import optimizers from pathlib import Path from matplotlib import font_manager # 设置全局字体 my_font=font_manager.FontProperties(fname="C:\Windows\Fonts\STSONG.TTF") plt.rcParams['font.family'] = 'STSong' # 替换成所选的中文字体
(3)对图像进行读取
# 读取6种飞行器类型的图片路径和标签
filepaths = []
labels = []
categories = ['drone', 'fighterjet', 'helicopter', 'missile', 'passengerplane', 'rocket']
for category in categories:
dir_path = Path('E:/python/data/planes/' + category)
filepaths_category = list(dir_path.glob('**/*.JPG'))
filepaths.extend(filepaths_category)
labels.extend([category] * len(filepaths_category))
(4)图像数据处理
#图像数据处理
categories = ['drone', 'fighterjet', 'helicopter', 'missile', 'passengerplane', 'rocket']
filepaths = []
labels = []
for category in categories:
dir_path = Path('E:/python/data/planes/' + category)
filepaths_category = list(dir_path.glob('**/*.JPG'))
filepaths.extend(filepaths_category)
labels.extend([category] * len(filepaths_category))
data = pd.DataFrame({'FilePaths': filepaths, 'labels': labels})
print(len(data['labels']))
print(len(data))
print(data.head())
print(data.info())
(5)划分数据集
from sklearn.model_selection import train_test_split
data['FilePaths'] = data['FilePaths'].astype(str)
#9:1划分为训练集和测试集
X_train_test, X_test = train_test_split(data, test_size=0.1, stratify=data['labels'])
print('测试集形状', X_test.shape)
# 4:1划分为训练集和验证集
X_train, X_val = train_test_split(X_train_test, test_size=0.2, stratify=X_train_test['labels'])
print('训练集形状', X_train.shape)
print('验证集形状', X_val.shape)
# 查看各类型的图片张数
print(X_train['labels'].value_counts())
print(X_train['FilePaths'].shape)
(6)图像预处理
from tensorflow.keras.preprocessing.image import ImageDataGenerator
img_preprocessing = ImageDataGenerator(rescale=1./255)
data_gen_params = {
'target_size': (112, 112),
'color_mode': 'rgb',
'class_mode': 'categorical',
'batch_size': 32,
'seed': 30
}
x_train = img_preprocessing.flow_from_dataframe(dataframe=X_train, x_col='FilePaths', y_col='labels', **data_gen_params)
x_test = img_preprocessing.flow_from_dataframe(dataframe=X_test, x_col='FilePaths', y_col='labels', **data_gen_params)
x_val = img_preprocessing.flow_from_dataframe(dataframe=X_val, x_col='FilePaths', y_col='labels', **data_gen_params)
(7)构建神经网络
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense, Dropout
from tensorflow.keras import optimizers
# 构建神经网络模型
model = Sequential()
# Conv2D层,32个滤波器
model.add(Conv2D(filters=32, kernel_size=(3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPool2D(pool_size=(2, 2), strides=2, padding='valid'))
# Conv2D层,64个滤波器
model.add(Conv2D(filters=64, kernel_size=(3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPool2D(pool_size=(2, 2), strides=2, padding='valid'))
# Conv2D层,128个滤波器
model.add(Conv2D(filters=128, kernel_size=(3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPool2D(pool_size=(2, 2), strides=2, padding='valid'))
# Conv2D层,256个滤波器
model.add(Conv2D(filters=256, kernel_size=(3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPool2D(pool_size=(2, 2), strides=2, padding='valid'))
# Conv2D层,512个滤波器
model.add(Conv2D(filters=512, kernel_size=(3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPool2D(pool_size=(2, 2), strides=2, padding='valid'))
# Conv2D层,1024个滤波器
model.add(Conv2D(filters=1024, kernel_size=(3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPool2D(pool_size=(2, 2), strides=2, padding='valid'))
# 展平数据,降维
model.add(Flatten())
# 全连接层
model.add(Dense(2048))
model.add(Activation('relu'))
# 减少过拟合
model.add(Dropout(0.5))
# 全连接层
model.add(Dense(6)) # 识别6种类别
model.add(Activation('softmax')) # 使用softmax进行分类
# 模型编译
model.compile(optimizer=optimizers.RMSprop(learning_rate=1e-4),
loss="categorical_crossentropy",
metrics=["accuracy"])
(8)第一次训练模型
print('————————开始训练————————')
print('模型训练中——————————————')
model1 = model.fit(x_train, validation_data=x_val, epochs=500)
model.summary()
print('\n验证中-----')
loss, accuracy = model.evaluate(x_val)
print('\n验证损失:', loss)
print('\n验证准确率:', accuracy)
# 保存模型
model.save('E:\python\data\model1')


(9)绘制损失和精确度图像
import matplotlib.pyplot as plt
# 绘制model1模型损失变化曲线
loss = model1.history["loss"]
val_loss = model1.history["val_loss"]
plt.plot(loss, "bo-", label="训练损失")
plt.plot(val_loss, "ro--", label="验证损失")
plt.title("训练和验证损失")
plt.xlabel("训练迭代次数")
plt.ylabel("损失")
plt.legend()
plt.savefig('model1模型损失变化曲线图.jpg')
plt.show()
# 绘制model1模型准确率变化曲线
acc = model1.history["accuracy"]
val_acc = model1.history["val_accuracy"]
plt.plot(acc, "bo-", label="训练准确率")
plt.plot(val_acc, "ro--", label="验证准确率")
plt.title("训练和验证准确率")
plt.xlabel("训练迭代次数")
plt.ylabel("准确率")
plt.legend()
plt.savefig('model1模型准确率变化曲线图.jpg')
plt.show()


(10)数据增强
from keras.preprocessing.image import ImageDataGenerator
# 数据增强
train_data_gen = ImageDataGenerator(rescale=1./255,
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode='nearest')
val_data_gen = ImageDataGenerator(rescale=1./255)
train_generator = train_data_gen.flow_from_dataframe(dataframe=X_train,
x_col='FilePaths',
y_col='labels',
target_size=(112, 112),
color_mode='rgb',
class_mode='categorical',
batch_size=32,
seed=30)
val_generator = val_data_gen.flow_from_dataframe(dataframe=X_val,
x_col='FilePaths',
y_col='labels',
target_size=(112, 112),
color_mode='rgb',
class_mode='categorical',
batch_size=32,
seed=30)
(11)第二次训练模型
print('——————开始训练——————')
print('模型训练中————————')
model2 = model.fit(
x_train, validation_data=x_val, epochs=300)
model.summary()
print('\n验证中-----')
loss, accuracy = model.evaluate(x_val)
print('\n验证损失:', loss)
print('\n验证准确率:', accuracy)
# 保存模型
model.save('E:\python\data\model2')



(12)绘制损失和精确度图像
# 绘制model2模型损失变化曲线
loss = model2.history["loss"]
val_loss = model2.history["val_loss"]
plt.plot(loss, "bo-", label="训练损失")
plt.plot(val_loss, "ro--", label="验证损失")
plt.title("训练和验证损失")
plt.xlabel("训练迭代次数")
plt.ylabel("损失")
plt.legend()
plt.savefig('model2模型损失变化曲线图.jpg')
plt.show()
# 绘制model2模型准确率变化曲线
acc = model2.history["accuracy"]
val_acc = model2.history["val_accuracy"]
plt.plot(acc, "bo-", label="训练准确率")
plt.plot(val_acc, "ro--", label="验证准确率")
plt.title("训练和验证准确率")
plt.xlabel("训练迭代次数")
plt.ylabel("准确率")
plt.legend()
plt.savefig('model2模型准确率变化曲线图.jpg')
plt.show()


(13)模型预测
from keras.models import load_model
import numpy as np
import matplotlib.pyplot as plt
# 加载model1模型
model1 = load_model('E:\python\data\model1')
l1 = ['drone', 'fighterjet', 'helicopter', 'missile', 'passengerplane', 'rocket']
# model1模型预测
i = 10
img1, label1 = x_test.next()
plt.figure(figsize=(12, 7))
plt.subplot(2, 1, 1)
plt.imshow((img1[i] * 255).astype('uint8'))
plt.title('实际类型为:' + l1[np.argmax(label1[i])])
x_test_img1 = img1[i].reshape(1, 112, 112, 3).astype("float32")
pr1 = model1.predict(x_test_img1)
plt.subplot(2, 1, 2)
plt.title("预测结果的概率饼图")
plt.pie(pr1.reshape(6), labels=l1, autopct='%1.1f%%')
plt.axis('equal')
plt.show()
print('实际类型:' + l1[np.argmax(label1[i])])
print('model1模型预测的结果是:' + l1[np.argmax(pr1)])
# 加载model2模型
model2 = load_model('E:\python\data\model2')
# model2模型预测
i = 10
img2, label2 = x_test.next()
l2 = ['drone', 'fighterjet', 'helicopter', 'missile', 'passengerplane', 'rocket']
plt.figure(figsize=(12, 7))
plt.subplot(2, 1, 1)
plt.imshow((img2[i] * 255).astype('uint8'))
plt.title('实际类型为:' + l2[np.argmax(label2[i])])
x_test_img2 = img2[i].reshape(1, 112, 112, 3).astype("float32")
pr2 = model2.predict(x_test_img2)
plt.subplot(2, 1, 2)
plt.title("预测结果的概率饼图")
plt.pie(pr2.reshape(6), labels=l2, autopct='%1.1f%%')
plt.axis('equal')
plt.show()
print('实际类型:' + l2[np.argmax(label2[i])])
print('model2模型预测的结果是:' + l2[np.argmax(pr2)])




因为验证集准确率只有50%左右,所以也会有出错的情况出现。



四、总结
通过编写这次程序,对机器学习有了初步的认识,这次结果不算太好,验证集准确率在50%附近,在发现验证集准确率较低后,仔细查看了数据集,发现里面的图片有混乱的情况,准确率较低的原因可能是由于数据集不完整,数据集数量不够多。在下一次进行机器学习项目编写时,要挑选较为准确,重复性低的数据集,要不然会给以后的编写工作带来麻烦。部分代码由于自身缺乏相关知识,也参考了网上的一些代码,但也理解了代码的作用。
五、完整代码
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
|
# 导入所需要的库import pandas as pdimport numpy as npimport tensorflow as tfimport matplotlib.pyplot as pltimport matplotlib.font_manager as fmfrom keras.preprocessing.image import ImageDataGeneratorfrom sklearn.model_selection import train_test_splitfrom tensorflow.keras.models import Sequentialfrom tensorflow.keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense, Dropoutfrom tensorflow.keras.preprocessing.image import ImageDataGeneratorfrom tensorflow.keras import optimizersfrom pathlib import Pathfrom matplotlib import font_manager# 设置全局字体my_font=font_manager.FontProperties(fname="C:\Windows\Fonts\STSONG.TTF")plt.rcParams['font.family'] = 'STSong' # 替换成所选的中文字体#图像数据读取# 读取6种飞机类型的图片路径和标签filepaths = []labels = []categories = ['drone', 'fighterjet', 'helicopter', 'missile', 'passengerplane', 'rocket']for category in categories: dir_path = Path('E:/python/data/planes/' + category) filepaths_category = list(dir_path.glob('**/*.JPG')) filepaths.extend(filepaths_category) labels.extend([category] * len(filepaths_category))#图像数据处理categories = ['drone', 'fighterjet', 'helicopter', 'missile', 'passengerplane', 'rocket']filepaths = []labels = []for category in categories: dir_path = Path('E:/python/data/planes/' + category) filepaths_category = list(dir_path.glob('**/*.JPG')) filepaths.extend(filepaths_category) labels.extend([category] * len(filepaths_category))data = pd.DataFrame({'FilePaths': filepaths, 'labels': labels})print(len(data['labels']))print(len(data))print(data.head())print(data.info())#使用数据框中图片路径查看图片import matplotlib.pyplot as pltpic = plt.figure(figsize=(12, 7))indices = [1,100,500,800,1000,1500,2000,3000,5000]for i, index in enumerate(indices, 1): ax = pic.add_subplot(3, 3, i) plt.imshow(plt.imread(data['FilePaths'][index])) plt.title(data['labels'][index])plt.savefig('图像.jpg')plt.show()from sklearn.model_selection import train_test_splitdata['FilePaths'] = data['FilePaths'].astype(str)# 划分数据集,85:15划分为训练集和测试集X_train_test, X_test = train_test_split(data, test_size=0.15, stratify=data['labels'])print('测试集形状', X_test.shape)# 划分数据集,4:1划分为训练集和验证集X_train, X_val = train_test_split(X_train_test, test_size=0.2, stratify=X_train_test['labels'])print('训练集形状', X_train.shape)print('验证集形状', X_val.shape)# 查看各类型的图片张数print(X_train['labels'].value_counts())print(X_train['FilePaths'].shape)from tensorflow.keras.preprocessing.image import ImageDataGeneratorimg_preprocessing = ImageDataGenerator(rescale=1./255)data_gen_params = { 'target_size': (112, 112), 'color_mode': 'rgb', 'class_mode': 'categorical', 'batch_size': 32, 'seed': 30}x_train = img_preprocessing.flow_from_dataframe(dataframe=X_train, x_col='FilePaths', y_col='labels', **data_gen_params)x_test = img_preprocessing.flow_from_dataframe(dataframe=X_test, x_col='FilePaths', y_col='labels', **data_gen_params)x_val = img_preprocessing.flow_from_dataframe(dataframe=X_val, x_col='FilePaths', y_col='labels', **data_gen_params)from tensorflow.keras.models import Sequentialfrom tensorflow.keras.layers import Conv2D, Activation, MaxPool2D, Flatten, Dense, Dropoutfrom tensorflow.keras import optimizers# 构建神经网络模型model = Sequential()# Conv2D层,32个滤波器model.add(Conv2D(filters=32, kernel_size=(3, 3), padding='same'))model.add(Activation('relu'))model.add(MaxPool2D(pool_size=(2, 2), strides=2, padding='valid'))# Conv2D层,64个滤波器model.add(Conv2D(filters=64, kernel_size=(3, 3), padding='same'))model.add(Activation('relu'))model.add(MaxPool2D(pool_size=(2, 2), strides=2, padding='valid'))# Conv2D层,128个滤波器model.add(Conv2D(filters=128, kernel_size=(3, 3), padding='same'))model.add(Activation('relu'))model.add(MaxPool2D(pool_size=(2, 2), strides=2, padding='valid'))# Conv2D层,256个滤波器model.add(Conv2D(filters=256, kernel_size=(3, 3), padding='same'))model.add(Activation('relu'))model.add(MaxPool2D(pool_size=(2, 2), strides=2, padding='valid'))# Conv2D层,512个滤波器model.add(Conv2D(filters=512, kernel_size=(3, 3), padding='same'))model.add(Activation('relu'))model.add(MaxPool2D(pool_size=(2, 2), strides=2, padding='valid'))# 展平数据,降维model.add(Flatten())# 全连接层model.add(Dense(1024))model.add(Activation('relu'))# 减少过拟合model.add(Dropout(0.5))# 全连接层model.add(Dense(6)) # 识别6种类别model.add(Activation('softmax')) # 使用softmax进行分类# 模型编译model.compile(optimizer=optimizers.RMSprop(learning_rate=1e-4), loss="categorical_crossentropy", metrics=["accuracy"])print('————————开始训练————————')print('模型训练中——————————————')model1 = model.fit(x_train, validation_data=x_val, epochs=100)model.summary()print('\n验证中-----')loss, accuracy = model.evaluate(x_val)print('\n验证损失:', loss)print('\n验证准确率:', accuracy)# 保存模型model.save('E:\python\data\model1')import matplotlib.pyplot as plt# 绘制model1模型损失变化曲线loss = model1.history["loss"]val_loss = model1.history["val_loss"]plt.plot(loss, "bo-", label="训练损失")plt.plot(val_loss, "ro--", label="验证损失")plt.title("训练和验证损失")plt.xlabel("训练迭代次数")plt.ylabel("损失")plt.legend()plt.savefig('model1模型损失变化曲线图.jpg')plt.show()# 绘制model1模型准确率变化曲线acc = model1.history["accuracy"]val_acc = model1.history["val_accuracy"]plt.plot(acc, "bo-", label="训练准确率")plt.plot(val_acc, "ro--", label="验证准确率")plt.title("训练和验证准确率")plt.xlabel("训练迭代次数")plt.ylabel("准确率")plt.legend()plt.savefig('model1模型准确率变化曲线图.jpg')plt.show()from keras.preprocessing.image import ImageDataGenerator# 数据增强train_data_gen = ImageDataGenerator(rescale=1./255, rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest')val_data_gen = ImageDataGenerator(rescale=1./255)train_generator = train_data_gen.flow_from_dataframe(dataframe=X_train, x_col='FilePaths', y_col='labels', target_size=(112, 112), color_mode='rgb', class_mode='categorical', batch_size=32, seed=30)val_generator = val_data_gen.flow_from_dataframe(dataframe=X_val, x_col='FilePaths', y_col='labels', target_size=(112, 112), color_mode='rgb', class_mode='categorical', batch_size=32, seed=30)print('——————开始训练——————')print('模型训练中————————')model2 = model.fit(x_train, validation_data=x_val, epochs=100)model.summary()print('\n验证中-----')loss, accuracy = model.evaluate(x_val)print('\n验证损失:', loss)print('\n验证准确率:', accuracy)# 保存模型model.save('E:\python\data\model2')# 设置全局字体plt.rcParams['font.family'] = 'STSong' # 替换成所选的中文字体# 绘制model2模型损失变化曲线loss = model2.history["loss"]val_loss = model2.history["val_loss"]plt.plot(loss, "bo-", label="训练损失")plt.plot(val_loss, "ro--", label="验证损失")plt.title("训练和验证损失")plt.xlabel("训练迭代次数")plt.ylabel("损失")plt.legend()plt.savefig('model2模型损失变化曲线图.jpg')plt.show()# 绘制model2模型准确率变化曲线acc = model2.history["accuracy"]val_acc = model2.history["val_accuracy"]plt.plot(acc, "bo-", label="训练准确率")plt.plot(val_acc, "ro--", label="验证准确率")plt.title("训练和验证准确率")plt.xlabel("训练迭代次数")plt.ylabel("准确率")plt.legend()plt.savefig('model2模型准确率变化曲线图.jpg')plt.show()from keras.models import load_modelimport numpy as npimport matplotlib.pyplot as plt# 加载model1模型model1 = load_model('E:\python\data\model1')l1 = ['drone', 'fighterjet', 'helicopter', 'missile', 'passengerplane', 'rocket']# model1模型预测i = 10img1, label1 = x_test.next()plt.figure(figsize=(12, 7))plt.subplot(2, 1, 1)plt.imshow((img1[i] * 255).astype('uint8'))plt.title('实际类型为:' + l1[np.argmax(label1[i])])x_test_img1 = img1[i].reshape(1, 112, 112, 3).astype("float32")pr1 = model1.predict(x_test_img1)plt.subplot(2, 1, 2)plt.title("预测结果的概率柱状图")plt.bar(np.arange(6), pr1.reshape(6), align="center")plt.xticks(np.arange(6), l1)plt.show()print('实际类型:' + l1[np.argmax(label1[i])])print('model1模型预测的结果是:' + l1[np.argmax(pr1)])# 加载model2模型model2 = load_model('E:\python\data\model2')# model2模型预测i = 10img2, label2 = x_test.next()l2 = ['drone', 'fighterjet', 'helicopter', 'missile', 'passengerplane', 'rocket']plt.figure(figsize=(12, 7))plt.subplot(2, 1, 1)plt.imshow((img2[i] * 255).astype('uint8'))plt.title('实际类型为:' + l2[np.argmax(label2[i])])x_test_img2 = img2[i].reshape(1, 112, 112, 3).astype("float32")pr2 = model2.predict(x_test_img2)plt.subplot(2, 1, 2)plt.title("预测结果的概率柱状图")plt.bar(np.arange(6), pr2.reshape(6), align="center")plt.xticks(np.arange(6), l2)plt.show()print('实际类型:' + l2[np.argmax(label2[i])])print('model2模型预测的结果是:' + l2[np.argmax(pr2)]) |
