基于Kears的手写数字识别
一、MNIST数据集介绍
1- 所包含的数据集:
- Training set images: train-images-idx3-ubyte.gz (9.9 MB, 解压后 47 MB, 包含 60,000 个样本)
- Training set labels: train-labels-idx1-ubyte.gz (29 KB, 解压后 60 KB, 包含 60,000 个标签)
- Test set images: t10k-images-idx3-ubyte.gz (1.6 MB, 解压后 7.8 MB, 包含 10,000 个样本)
- Test set labels: t10k-labels-idx1-ubyte.gz (5KB, 解压后 10 KB, 包含 10,000 个标签)
2- 图片格式:
(1)每张图片都是28*28的单通道手写数字图片;
(2)有数字 0~9 共10个类别;
(3)共有训练数据集60000张,测试数据集10000张;
3- 下载地址:
MINST数据库是由Yann提供的手写数字数据库文件,其官方下载地址:http://yann.lecun.com/exdb/mnist/
二、关键点解析
1- 数据集目录设置如下:
2- 说明:
(1)读入mnist数据集这里采用 input_data 函数读入,并进行one-hot独热编码,默认归一化操作,后续可直接实现。需要注意的是,这里函数会自动将60000张训练集图片划分为55000张训练图片和5000张验证图片,建议大家也先把数据集给下到本地,不然直接调用api下的可能网络问题会变龟速;
(2)代码搭建的模型是LeNet-5,具体的可以参考下论文。这个模型的提出当时很大一部分就是用在这个手写数字识别上的,效果确实是挺不错,自测了下准确率达到了99%左右,有兴趣的可以改下超参数去调整下;
(3)本文顺便做了下Kaggle上的手写数字识别Digit Recognizer比赛,结合mnist数据集,最终在Kaggle上提交的分数为0.9999,差不多接近满分了,有兴趣的也可以自行试试看;
代码方面有任何不懂的地方可以直接在评论区留言,有空回复,未经授权,谢绝转载,多谢合作。
mnist = input_data.read_data_sets(path, one_hot=True)
三、代码如下
#######################################################################
# author: Jack
# date: 2019/05/26
# title: Digit Recognizer
# data set download link:
# mnit: http://yann.lecun.com/exdb/mnist/
# kaggle: https://www.kaggle.com/c/digit-recognizer/data
########################################################################
from __future__ import print_function
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from tensorflow.examples.tutorials.mnist import input_data
import keras
from keras.models import load_model
from keras.preprocessing import image
from keras.models import Sequential
from keras.utils import np_utils
from keras.layers import Conv2D, MaxPool2D, Flatten, AveragePooling2D
from keras.layers import Dense, BatchNormalization, Dropout
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
path = r'.\path\to\MNIST_data' # Put the file in your current directory before using it
num_class = 10 # results = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
batch_size = 32
learning_rate = 0.0001
data_augmentation = False
epochs = 25
model_dir = os.path.join(os.getcwd(), 'saved_models')
model_name = 'Keras_HDR_AlexNet_Model.h5'
mnist = input_data.read_data_sets(path, one_hot=True)
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', patience=3, verbose=1, factor=0.5, min_lr=0.00001)
def visualize(train_data):
"""Visualize data"""
img_id = np.random.choice(train_data.shape[0], 4)
plt.subplot(221)
plt.imshow(train_data[img_id[0]].reshape((28, 28)))
plt.subplot(222)
plt.imshow(train_data[img_id[1]].reshape((28, 28)))
plt.subplot(223)
plt.imshow(train_data[img_id[2]].reshape((28, 28)))
plt.subplot(224)
plt.imshow(train_data[img_id[3]].reshape((28, 28)))
plt.show()
def get_data():
# Get image
train_data = mnist.train.images
val_data = mnist.validation.images
test_data = mnist.test.images
print("shape of each image: ", train_data[0].shape)
visualize(train_data)
# Get label
train_label = mnist.train.labels
val_label = mnist.validation.labels
test_label = mnist.test.labels
print("shape of each label: ", train_label[0].shape)
assert (train_data.shape[0] == train_label.shape[0])
assert (val_data.shape[0] == val_data.shape[0])
assert (test_data.shape[0] == test_label.shape[0])
train_data = train_data.reshape((train_data.shape[0], 28, 28, 1))
val_data = val_data.reshape((val_data.shape[0], 28, 28, 1))
test_data = test_data.reshape((test_data.shape[0], 28, 28, 1))
df = pd.read_csv(r"./path/to/data/train.csv")
data = df.as_matrix()
np.random.shuffle(data)
x_train = data[:, 1:]
x_train = x_train.reshape(data.shape[0], 28, 28, 1).astype('float32')
x_train = x_train / 255
y_train = np_utils.to_categorical(data[:, 0], 10).astype('float32')
train_data = np.vstack((train_data, x_train, test_data))
train_label = np.vstack((train_label, y_train, test_label))
print("train data shape = ", train_data.shape)
print("train label shape = ", train_label.shape)
print("val data shape = ", val_data.shape)
print("val data shape = ", val_label.shape)
labels = pd.read_csv(r"./path/to/data/test.csv")
test_data = labels.values.astype('float32')
test_data = test_data.reshape(test_data.shape[0], 28, 28, 1)
test_data = test_data/255
return train_data, train_label, val_data, val_label, test_data
def built_model():
models = Sequential()
models.add(Conv2D(6, (5, 5), activation='relu', input_shape=(28, 28, 1)))
models.add(AveragePooling2D(pool_size=(2, 2), strides=2))
models.add(Conv2D(16, (5, 5), activation='relu'))
models.add(AveragePooling2D(pool_size=(2, 2), strides=2))
models.add(Flatten())
models.add(Dense(120, activation='relu'))
models.add(Dense(84, activation='relu'))
models.add(Dense(num_class, activation='softmax'))
print("Create Model!")
return models
def compile_model(model):
# opt = keras.optimizers.rmsprop(lr=learning_rate, decay=1e-6)
# opt = keras.optimizers.sgd(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True)
opt = keras.optimizers.adam()
model.compile(loss='categorical_crossentropy',
optimizer=opt,
metrics=['accuracy'])
model.summary()
print("model compile successful!")
def train_model(model, train_x, train_y, val_x, val_y, h):
if data_augmentation:
print("Using real-time data augmentation") # 使用实时的数据增加
data_generate = ImageDataGenerator(
featurewise_center=False, # 将输入数据的均值设置为0
samplewise_center=False, # 将每个样本的均值设置为0
featurewise_std_normalization=False, # 将输入除以数据标准差,逐特征进行
samplewise_std_normalization=False, # 将每个输出除以其标准差
zca_epsilon=1e-6, # ZCA白化的epsilon值,默认为1e-6
zca_whitening=False, # 是否应用ZCA白化
rotation_range=0, # 随机旋转的度数范围,输入为整数
width_shift_range=0.1, # 左右平移,输入为浮点数,大于1时输出为像素值
height_shift_range=0.1, # 上下平移,输入为浮点数,大于1时输出为像素值
shear_range=0., # 剪切强度,输入为浮点数
zoom_range=0., # 随机缩放,输入为浮点数
channel_shift_range=0., # 随机通道转换范围,输入为浮点数
fill_mode='nearest', # 输入边界以外点的填充方式,还有constant,reflect,wrap三种填充方式
cval=0., # 用于填充的值,当fill_mode='constant'时生效
horizontal_flip=True, # 随机水平翻转
vertical_flip=False, # 随机垂直翻转
rescale=None, # 重随放因子,为None或0时不进行缩放
preprocessing_function=None, # 应用于每个输入的函数
data_format=None, # 图像数据格式,默认为channels_last
validation_split=0.0
)
data_generate.fit(train_x)
# 使用实时数据增强的batch对模型进行拟合:
model.fit_generator(data_generate.flow(train_x, train_y, batch_size),
steps_per_epoch=len(train_x)//batch_size + len(val_x)//batch_size, epochs=epochs,
validation_data=(val_x, val_y), workers=4,
callbacks=[learning_rate_reduction, h])
else:
print("Not using data augementation")
model.fit(train_x, train_y, batch_size=batch_size, epochs=epochs,
validation_data=(val_x, val_y), shuffle=True,
callbacks=[learning_rate_reduction, h])
def save_model(model):
if not os.path.isdir(model_dir):
os.makedirs(model_dir)
model_path = os.path.join(model_dir, model_name)
model.save(model_path)
print("Saved trained model at: " + str(model_path))
def predict_model(model, test_x, test_y, h):
scores = model.evaluate(test_x, test_y, verbose=1)
print("Test loss is: " + str(scores[0]))
print("Test accuracy is: " + str(scores[1]))
h.loss_plot('epoch')
class LossHistory(keras.callbacks.Callback):
def on_train_begin(self, logs={}):
self.losses = {'batch': [], 'epoch': []}
self.accuracy = {'batch': [], 'epoch': []}
self.val_loss = {'batch': [], 'epoch': []}
self.val_acc = {'batch': [], 'epoch': []}
def on_batch_end(self, batch, logs={}):
self.losses['batch'].append(logs.get('loss'))
self.accuracy['batch'].append(logs.get('acc'))
self.val_loss['batch'].append(logs.get('val_loss'))
self.val_acc['batch'].append(logs.get('val_acc'))
def on_epoch_end(self, batch, logs={}):
self.losses['epoch'].append(logs.get('loss'))
self.accuracy['epoch'].append(logs.get('acc'))
self.val_loss['epoch'].append(logs.get('val_loss'))
self.val_acc['epoch'].append(logs.get('val_acc'))
def loss_plot(self, loss_type):
iters = range(len(self.losses[loss_type]))
plt.figure()
plt.plot(iters, self.accuracy[loss_type], 'r', label='train acc')
plt.plot(iters, self.losses[loss_type], 'g', label='train loss')
if loss_type == 'epoch':
plt.plot(iters, self.val_acc[loss_type], 'b', label='val acc')
plt.plot(iters, self.val_loss[loss_type], 'k', label='val loss')
plt.grid(True)
plt.xlabel(loss_type)
plt.ylabel('acc-loss')
plt.legend(loc="upper right")
plt.show()
def main():
train_data, train_label, val_data, val_label, test_data = get_data()
model = built_model()
history = LossHistory()
compile_model(model)
train_model(model, train_data, train_label, val_data, val_label, history)
save_model(model)
y_predict = model.predict_classes(test_data, batch_size=32, verbose=1)
np.savetxt('Digit_Recognizer_V1.csv', np.c_[range(1, len(y_predict)+1), y_predict],
delimiter=',', header='ImageId,Label', comments='', fmt='%d')
if __name__ == "__main__":
main()
基于Kears的cifar-10分类: https://blog.****.net/weixin_43509263/article/details/88647889