Tensorflow学习10-2:验证码识别——训练和测试

简介

如题,本篇介绍的是tensorflow实现验证码的识别,之前我们已经生成了数据集,并且转换成了tfrecord格式的文件,现在我们开始利用这个文件来进行训练及识别。

补充一点,我们可以有两种方法进行验证码识别,其一,把标签转为向量,向量长度为40,比如一个验证码为0782,它的标签可以转为长度为40的向量 1000000000 0000000100 0000000010 0010000000,接下来,训练方法和手写数字识别类似。其二,使用的是多任务的学习方法,拆分为4个标签

1 多任务学习

采用multi-task learning 多任务学习。

以验证码识别为例:

多任务学习是一种联合学习,多个任务并行学习,结果相互影响。所谓多任务学习,就是同时求解多个问题。个性化问题就是一种典型的多任务学习问题,它同时学习多个用户的兴趣偏好。

多任务学习有交替训练和联合训练。由于数据集相同,我们采用的是多任务学习中的联合训练。

1)准备工作

言归正传,我们下面用代码实现这个多任务学习。上篇已经按照之前的步骤生成好了tfrecord文件,我们使用alexnet_v2模型来完成。注意需要修改alexnet代码,该代码位于slim/nets文件夹下:

我们将nets拷贝到当前工程目录下,重命名为nets_multi,并修改alexnet.py代码,将最后一层分为4个输出(4个学习任务)。修改后其完整代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

slim = tf.contrib.slim
trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)


def alexnet_v2_arg_scope(weight_decay=0.0005):
with slim.arg_scope([slim.conv2d, slim.fully_connected],
activation_fn=tf.nn.relu,
biases_initializer=tf.constant_initializer(0.1),
weights_regularizer=slim.l2_regularizer(weight_decay)):
with slim.arg_scope([slim.conv2d], padding='SAME'):
with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
return arg_sc


def alexnet_v2(inputs,
num_classes=1000,
is_training=True,
dropout_keep_prob=0.5,
spatial_squeeze=True,
scope='alexnet_v2',
global_pool=False):
"""AlexNet version 2.

Described in: http://arxiv.org/pdf/1404.5997v2.pdf
Parameters from:
github.com/akrizhevsky/cuda-convnet2/blob/master/layers/
layers-imagenet-1gpu.cfg

Note: All the fully_connected layers have been transformed to conv2d layers.
To use in classification mode, resize input to 224x224 or set
global_pool=True. To use in fully convolutional mode, set
spatial_squeeze to false.
The LRN layers have been removed and change the initializers from
random_normal_initializer to xavier_initializer.

Args:
inputs: a tensor of size [batch_size, height, width, channels].
num_classes: the number of predicted classes. If 0 or None, the logits layer
is omitted and the input features to the logits layer are returned instead.
is_training: whether or not the model is being trained.
dropout_keep_prob: the probability that activations are kept in the dropout
layers during training.
spatial_squeeze: whether or not should squeeze the spatial dimensions of the
logits. Useful to remove unnecessary dimensions for classification.
scope: Optional scope for the variables.
global_pool: Optional boolean flag. If True, the input to the classification
layer is avgpooled to size 1x1, for any input size. (This is not part
of the original AlexNet.)

Returns:
net: the output of the logits layer (if num_classes is a non-zero integer),
or the non-dropped-out input to the logits layer (if num_classes is 0
or None).
end_points: a dict of tensors with intermediate activations.
"""
with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:
end_points_collection = sc.name + '_end_points'
# Collect outputs for conv2d, fully_connected and max_pool2d.
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
outputs_collections=[end_points_collection]):
net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
scope='conv1')
net = slim.max_pool2d(net, [3, 3], 2, scope='pool1')
net = slim.conv2d(net, 192, [5, 5], scope='conv2')
net = slim.max_pool2d(net, [3, 3], 2, scope='pool2')
net = slim.conv2d(net, 384, [3, 3], scope='conv3')
net = slim.conv2d(net, 384, [3, 3], scope='conv4')
net = slim.conv2d(net, 256, [3, 3], scope='conv5')
net = slim.max_pool2d(net, [3, 3], 2, scope='pool5')

# Use conv2d instead of fully_connected layers.
with slim.arg_scope([slim.conv2d],
weights_initializer=trunc_normal(0.005),
biases_initializer=tf.constant_initializer(0.1)):
net = slim.conv2d(net, 4096, [5, 5], padding='VALID',
scope='fc6')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout6')
net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout7')
#分成4个输出
net0 = slim.conv2d(net, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
biases_initializer=tf.zeros_initializer(),
scope='fc8_0')
net1 = slim.conv2d(net, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
biases_initializer=tf.zeros_initializer(),
scope='fc8_1')
net2 = slim.conv2d(net, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
biases_initializer=tf.zeros_initializer(),
scope='fc8_2')
net3 = slim.conv2d(net, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
biases_initializer=tf.zeros_initializer(),
scope='fc8_3')
# Convert end_points_collection into a end_point dict.
end_points = slim.utils.convert_collection_to_dict(end_points_collection)
# if global_pool:
# net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool')
# end_points['global_pool'] = net
#if num_classes:

#net = slim.conv2d(net, num_classes, [1, 1],
#activation_fn=None,
#normalizer_fn=None,
#biases_initializer=tf.zeros_initializer(),
#scope='fc8')
if spatial_squeeze:
net0 = tf.squeeze(net0, [1, 2], name='fc8_0/squeezed')
end_points[sc.name + '/fc8_0'] = net0
net1 = tf.squeeze(net1, [1, 2], name='fc8_1/squeezed')
end_points[sc.name + '/fc8_1'] = net1
net2 = tf.squeeze(net2, [1, 2], name='fc8_2/squeezed')
end_points[sc.name + '/fc8_2'] = net2
net3 = tf.squeeze(net3, [1, 2], name='fc8_3/squeezed')
end_points[sc.name + '/fc8_3'] = net3
return net0, net1, net2, net3, end_points
alexnet_v2.default_image_size = 224

修改nets_factory.py文件,增加一条代码:

1
from nets import alexnet_multi

2)训练

训练代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import os
import tensorflow as tf
import numpy as np
from PIL import Image
from nets import nets_factory
#不同字符数量
CHAR_SET_LEN = 10
#图片高度
IMAGE_HEIGHT = 60
#图片宽度
IMAGE_WIDTH = 160
#批次
BATCH_SIZE = 25

MOD_DIR = "D:/Tensorflow/captcha/model/"
#tfrecord存放路径
TFRECORD_FILE = "D:/Tensorflow/captcha/train.tfrecord"
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#placeholder
x = tf.placeholder(tf.float32, [None, 224, 224])
y0 = tf.placeholder(tf.float32, [None])
y1 = tf.placeholder(tf.float32, [None])
y2 = tf.placeholder(tf.float32, [None])
y3 = tf.placeholder(tf.float32, [None])

#学习率
lr = tf.Variable(0.003, dtype=tf.float32)

#读取tfrecord
def read_and_decode(filename):
#根据文件名生成一个队列
filename_queue = tf.train.string_input_producer([filename])
reader = tf.TFRecordReader()
#返回文件名和文件
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example,
features= {
"image" : tf.FixedLenFeature([], tf.string),
"label0": tf.FixedLenFeature([], tf.int64),
"label1": tf.FixedLenFeature([], tf.int64),
"label2": tf.FixedLenFeature([], tf.int64),
"label3": tf.FixedLenFeature([], tf.int64),
})
#获取图片数据
image = tf.decode_raw(features["image"], tf.uint8)
#tf.train.shuffle_batch必须确定shape
image = tf.reshape(image, [224,224])
#图片预处理
image = tf.cast(image, tf.float32) / 255.0
image = tf.subtract(image, 0.5)
image = tf.multiply(image, 2.0)
#获取Label
label0 = tf.cast(features["label0"], tf.int32)
label1 = tf.cast(features["label1"], tf.int32)
label2 = tf.cast(features["label2"], tf.int32)
label3 = tf.cast(features["label3"], tf.int32)

return image, label0, label1, label2, label3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#获取图片数据和标签
image, label0, label1, label2, label3 = read_and_decode(TFRECORD_FILE)

#使用shuffle_batch可以随机打乱 next_batch挨着往下取
# shuffle_batch才能实现[img,label]的同步,也即特征和label的同步,不然可能输入的特征和label不匹配
# 比如只有这样使用,才能使img和label一一对应,每次提取一个image和对应的label
# shuffle_batch返回的值就是RandomShuffleQueue.dequeue_many()的结果
# Shuffle_batch构建了一个RandomShuffleQueue,并不断地把单个的[img,label],送入队列中
image_batch, label_batch0, label_batch1,label_batch2,label_batch3 = tf.train.shuffle_batch(
[image, label0, label1, label2, label3], batch_size = BATCH_SIZE,
capacity = 5000, min_after_dequeue=1000, num_threads=1)

#定义网络结构
train_network_fn = nets_factory.get_network_fn(
"alexnet_v2",
num_classes=CHAR_SET_LEN,
weight_decay=0.0005,
is_training=True)

with tf.Session() as sess:
#inputs: a tensor of size [batch_size, height, width, channels]
X = tf.reshape(x, [BATCH_SIZE, 224, 224, 1])
#数据输入网络得到输出值
logits0,logits1,logits2,logits3,end_points = train_network_fn(X)

#把标签转成one_hot形式
one_hot_labels0 = tf.one_hot(indices=tf.cast(y0, tf.int32), depth=CHAR_SET_LEN)
one_hot_labels1 = tf.one_hot(indices=tf.cast(y1, tf.int32), depth=CHAR_SET_LEN)
one_hot_labels2 = tf.one_hot(indices=tf.cast(y2, tf.int32), depth=CHAR_SET_LEN)
one_hot_labels3 = tf.one_hot(indices=tf.cast(y3, tf.int32), depth=CHAR_SET_LEN)


#计算loss
loss0 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits0, labels=one_hot_labels0))
loss1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits1, labels=one_hot_labels1))
loss2 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits2, labels=one_hot_labels2))
loss3 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits3, labels=one_hot_labels3))
#计算总loss
total_loss = (loss0+loss1+loss2+loss3) / 4.0
#优化器
optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(total_loss)

#计算准确率
correct_prediction0 = tf.equal(tf.argmax(one_hot_labels0,1), tf.argmax(logits0,1))
accuracy0 = tf.reduce_mean(tf.cast(correct_prediction0, tf.float32))

correct_prediction1 = tf.equal(tf.argmax(one_hot_labels1,1), tf.argmax(logits1,1))
accuracy1 = tf.reduce_mean(tf.cast(correct_prediction1, tf.float32))

correct_prediction2 = tf.equal(tf.argmax(one_hot_labels2,1), tf.argmax(logits2,1))
accuracy2 = tf.reduce_mean(tf.cast(correct_prediction2, tf.float32))

correct_prediction3 = tf.equal(tf.argmax(one_hot_labels3,1), tf.argmax(logits3,1))
accuracy3 = tf.reduce_mean(tf.cast(correct_prediction3, tf.float32))

#用于保存模型
saver = tf.train.Saver()

sess.run(tf.global_variables_initializer())

#创建一个协调器,管理线程
coord = tf.train.Coordinator()
#启动Queue Runners,此时文件名队列已经进队
threads = tf.train.start_queue_runners(sess=sess, coord=coord)

for i in range(3001):
#获取一个批次是数据和标签
b_image,b_label0,b_label1,b_label2,b_label3 = sess.run([image_batch, label_batch0, label_batch1,label_batch2,label_batch3])

#优化模型
sess.run(optimizer, feed_dict={x:b_image, y0:b_label0, y1:b_label1, y2:b_label2, y3:b_label3})

#每迭代20次计算一下loss 和 accuracy
if i%20 == 0:
#每迭代1000次降低一下学习率
if i%1000 == 0:
sess.run(tf.assign(lr, lr/3))

# print("y0:",b_label0, "y1:",b_label1, "y2:",b_label2, "y3:",b_label3)
# _logits0,_logits1,_logits2,_logits3 = sess.run([logits0,logits1,logits2,logits3], feed_dict={x:b_image})
# print("logits0:",_logits0, "logits1:",_logits1,"logits2:",_logits2,"logits3:",_logits3)

acc0,acc1,acc2,acc3,loss_ = sess.run([accuracy0,accuracy1,accuracy2,accuracy3,total_loss], feed_dict={
x:b_image, y0:b_label0, y1:b_label1, y2:b_label2, y3:b_label3
})
learning_rate = sess.run(lr)
print("Iter:%d Loss:%.3f Accuracy:%.2f,%.2f,%.2f,%.2f Learning Rate:%.4f" % (i,loss_,acc0,acc1,acc2,acc3, learning_rate))

#满足设置条件,就停止训练保存模型
if i==3000:
saver.save(sess, MOD_DIR + "captcha.model", global_step=i) #global_step——保存后缀为3000
break

#通知其他线程关闭
coord.request_stop()
#其他所有线程关闭后,这个函数才能返回
coord.join(threads)

WARNING:tensorflow:From :34: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.

Iter:0 Loss:1566.136 Accuracy:0.32,0.12,0.16,0.20 Learning Rate:0.0010
Iter:20 Loss:2.314 Accuracy:0.12,0.08,0.12,0.08 Learning Rate:0.0010
Iter:40 Loss:2.297 Accuracy:0.24,0.04,0.12,0.04 Learning Rate:0.0010
Iter:60 Loss:2.296 Accuracy:0.20,0.04,0.04,0.08 Learning Rate:0.0010
Iter:80 Loss:2.310 Accuracy:0.04,0.04,0.04,0.20 Learning Rate:0.0010
Iter:100 Loss:2.322 Accuracy:0.00,0.04,0.08,0.04 Learning Rate:0.0010
Iter:120 Loss:2.277 Accuracy:0.08,0.32,0.08,0.16 Learning Rate:0.0010
Iter:140 Loss:2.328 Accuracy:0.12,0.08,0.04,0.08 Learning Rate:0.0010
Iter:160 Loss:2.294 Accuracy:0.08,0.16,0.12,0.08 Learning Rate:0.0010
Iter:180 Loss:2.295 Accuracy:0.04,0.08,0.24,0.20 Learning Rate:0.0010
Iter:200 Loss:2.314 Accuracy:0.16,0.04,0.04,0.04 Learning Rate:0.0010
Iter:220 Loss:2.299 Accuracy:0.08,0.16,0.12,0.04 Learning Rate:0.0010
Iter:240 Loss:2.310 Accuracy:0.04,0.00,0.12,0.12 Learning Rate:0.0010
Iter:260 Loss:2.315 Accuracy:0.00,0.16,0.12,0.16 Learning Rate:0.0010
Iter:280 Loss:2.305 Accuracy:0.12,0.28,0.08,0.04 Learning Rate:0.0010
Iter:300 Loss:2.299 Accuracy:0.04,0.08,0.08,0.16 Learning Rate:0.0010
Iter:320 Loss:2.293 Accuracy:0.12,0.08,0.16,0.20 Learning Rate:0.0010
Iter:340 Loss:2.265 Accuracy:0.12,0.28,0.12,0.24 Learning Rate:0.0010
Iter:360 Loss:2.307 Accuracy:0.16,0.16,0.08,0.12 Learning Rate:0.0010
Iter:380 Loss:2.305 Accuracy:0.16,0.12,0.04,0.08 Learning Rate:0.0010
Iter:400 Loss:2.312 Accuracy:0.16,0.20,0.00,0.04 Learning Rate:0.0010
Iter:420 Loss:2.302 Accuracy:0.16,0.00,0.12,0.08 Learning Rate:0.0010
Iter:440 Loss:2.278 Accuracy:0.08,0.24,0.36,0.08 Learning Rate:0.0010
Iter:460 Loss:2.290 Accuracy:0.04,0.12,0.08,0.12 Learning Rate:0.0010
Iter:480 Loss:2.294 Accuracy:0.20,0.16,0.08,0.12 Learning Rate:0.0010
Iter:500 Loss:2.319 Accuracy:0.08,0.12,0.00,0.12 Learning Rate:0.0010
Iter:520 Loss:2.294 Accuracy:0.12,0.04,0.20,0.20 Learning Rate:0.0010
Iter:540 Loss:2.297 Accuracy:0.16,0.12,0.08,0.00 Learning Rate:0.0010
Iter:560 Loss:2.309 Accuracy:0.08,0.08,0.04,0.04 Learning Rate:0.0010
Iter:580 Loss:2.294 Accuracy:0.12,0.08,0.24,0.08 Learning Rate:0.0010
Iter:600 Loss:2.284 Accuracy:0.16,0.08,0.08,0.24 Learning Rate:0.0010
Iter:620 Loss:2.281 Accuracy:0.08,0.00,0.28,0.32 Learning Rate:0.0010
Iter:640 Loss:2.318 Accuracy:0.04,0.16,0.08,0.00 Learning Rate:0.0010
Iter:660 Loss:2.291 Accuracy:0.08,0.20,0.20,0.12 Learning Rate:0.0010
Iter:680 Loss:2.311 Accuracy:0.04,0.04,0.00,0.12 Learning Rate:0.0010
Iter:700 Loss:2.220 Accuracy:0.20,0.04,0.08,0.20 Learning Rate:0.0010
Iter:720 Loss:2.196 Accuracy:0.48,0.16,0.00,0.24 Learning Rate:0.0010
Iter:740 Loss:2.215 Accuracy:0.20,0.08,0.04,0.20 Learning Rate:0.0010
Iter:760 Loss:2.084 Accuracy:0.36,0.12,0.08,0.12 Learning Rate:0.0010
Iter:780 Loss:2.087 Accuracy:0.36,0.12,0.00,0.04 Learning Rate:0.0010
Iter:800 Loss:2.121 Accuracy:0.36,0.08,0.08,0.08 Learning Rate:0.0010
Iter:820 Loss:1.991 Accuracy:0.48,0.20,0.16,0.12 Learning Rate:0.0010
Iter:840 Loss:1.926 Accuracy:0.60,0.12,0.32,0.16 Learning Rate:0.0010
Iter:860 Loss:1.868 Accuracy:0.52,0.24,0.12,0.24 Learning Rate:0.0010
Iter:880 Loss:1.876 Accuracy:0.48,0.12,0.16,0.20 Learning Rate:0.0010
Iter:900 Loss:1.693 Accuracy:0.64,0.24,0.28,0.40 Learning Rate:0.0010
Iter:920 Loss:1.768 Accuracy:0.72,0.28,0.20,0.24 Learning Rate:0.0010
Iter:940 Loss:1.582 Accuracy:0.64,0.32,0.36,0.48 Learning Rate:0.0010
Iter:960 Loss:1.673 Accuracy:0.60,0.24,0.24,0.32 Learning Rate:0.0010
Iter:980 Loss:1.530 Accuracy:0.84,0.28,0.28,0.36 Learning Rate:0.0010
Iter:1000 Loss:1.550 Accuracy:0.68,0.28,0.40,0.40 Learning Rate:0.0003
Iter:1020 Loss:1.446 Accuracy:0.56,0.20,0.48,0.36 Learning Rate:0.0003
Iter:1040 Loss:1.445 Accuracy:0.68,0.44,0.20,0.52 Learning Rate:0.0003
Iter:1060 Loss:1.425 Accuracy:0.80,0.48,0.24,0.60 Learning Rate:0.0003
Iter:1080 Loss:1.273 Accuracy:0.80,0.56,0.40,0.56 Learning Rate:0.0003
Iter:1100 Loss:1.171 Accuracy:0.76,0.44,0.36,0.68 Learning Rate:0.0003
Iter:1120 Loss:1.080 Accuracy:0.84,0.44,0.52,0.56 Learning Rate:0.0003
Iter:1140 Loss:1.242 Accuracy:0.88,0.40,0.56,0.32 Learning Rate:0.0003
Iter:1160 Loss:1.071 Accuracy:0.88,0.60,0.52,0.52 Learning Rate:0.0003
Iter:1180 Loss:1.176 Accuracy:0.80,0.44,0.56,0.48 Learning Rate:0.0003
Iter:1200 Loss:1.131 Accuracy:0.84,0.48,0.52,0.44 Learning Rate:0.0003
Iter:1220 Loss:1.138 Accuracy:0.76,0.52,0.64,0.56 Learning Rate:0.0003
Iter:1240 Loss:1.035 Accuracy:0.84,0.56,0.56,0.52 Learning Rate:0.0003
Iter:1260 Loss:0.820 Accuracy:0.92,0.68,0.64,0.68 Learning Rate:0.0003
Iter:1280 Loss:1.083 Accuracy:0.92,0.36,0.52,0.64 Learning Rate:0.0003
Iter:1300 Loss:0.966 Accuracy:1.00,0.52,0.44,0.60 Learning Rate:0.0003
Iter:1320 Loss:0.804 Accuracy:0.84,0.68,0.60,0.64 Learning Rate:0.0003
Iter:1340 Loss:0.845 Accuracy:0.92,0.72,0.48,0.56 Learning Rate:0.0003
Iter:1360 Loss:0.923 Accuracy:0.80,0.48,0.64,0.56 Learning Rate:0.0003
Iter:1380 Loss:0.664 Accuracy:0.96,0.60,0.60,0.88 Learning Rate:0.0003
Iter:1400 Loss:0.915 Accuracy:0.88,0.72,0.40,0.72 Learning Rate:0.0003
Iter:1420 Loss:0.724 Accuracy:0.92,0.72,0.64,0.72 Learning Rate:0.0003
Iter:1440 Loss:0.574 Accuracy:0.96,0.76,0.76,0.76 Learning Rate:0.0003
Iter:1460 Loss:0.550 Accuracy:0.88,0.80,0.72,0.88 Learning Rate:0.0003
Iter:1480 Loss:0.588 Accuracy:0.88,0.72,0.84,0.84 Learning Rate:0.0003
Iter:1500 Loss:0.611 Accuracy:0.80,0.76,0.68,0.84 Learning Rate:0.0003
Iter:1520 Loss:0.487 Accuracy:0.88,0.84,0.80,0.96 Learning Rate:0.0003
Iter:1540 Loss:0.648 Accuracy:0.88,0.68,0.72,0.80 Learning Rate:0.0003
Iter:1560 Loss:0.600 Accuracy:0.84,0.76,0.68,0.84 Learning Rate:0.0003
Iter:1580 Loss:0.714 Accuracy:0.88,0.68,0.68,0.76 Learning Rate:0.0003
Iter:1600 Loss:0.497 Accuracy:0.96,0.72,0.76,0.84 Learning Rate:0.0003
Iter:1620 Loss:0.519 Accuracy:0.88,0.80,0.72,0.84 Learning Rate:0.0003
Iter:1640 Loss:0.551 Accuracy:0.92,0.72,0.68,0.92 Learning Rate:0.0003
Iter:1660 Loss:0.539 Accuracy:0.92,0.80,0.64,0.88 Learning Rate:0.0003
Iter:1680 Loss:0.484 Accuracy:0.92,0.80,0.80,0.76 Learning Rate:0.0003
Iter:1700 Loss:0.428 Accuracy:0.96,0.80,0.84,0.88 Learning Rate:0.0003
Iter:1720 Loss:0.510 Accuracy:0.92,0.68,0.84,0.80 Learning Rate:0.0003
Iter:1740 Loss:0.548 Accuracy:0.88,0.80,0.72,0.80 Learning Rate:0.0003
Iter:1760 Loss:0.358 Accuracy:0.92,0.80,0.84,1.00 Learning Rate:0.0003
Iter:1780 Loss:0.374 Accuracy:0.92,0.76,0.92,0.84 Learning Rate:0.0003
Iter:1800 Loss:0.442 Accuracy:0.88,0.80,0.68,0.88 Learning Rate:0.0003
Iter:1820 Loss:0.432 Accuracy:0.96,0.80,0.72,0.88 Learning Rate:0.0003
Iter:1840 Loss:0.399 Accuracy:1.00,0.84,0.80,0.76 Learning Rate:0.0003
Iter:1860 Loss:0.541 Accuracy:1.00,0.68,0.64,0.88 Learning Rate:0.0003
Iter:1880 Loss:0.495 Accuracy:0.92,0.64,0.76,0.80 Learning Rate:0.0003
Iter:1900 Loss:0.275 Accuracy:0.88,0.88,0.88,0.88 Learning Rate:0.0003
Iter:1920 Loss:0.319 Accuracy:0.96,0.92,0.88,0.80 Learning Rate:0.0003
Iter:1940 Loss:0.259 Accuracy:1.00,0.96,0.84,0.92 Learning Rate:0.0003
Iter:1960 Loss:0.379 Accuracy:0.96,0.76,0.76,0.84 Learning Rate:0.0003
Iter:1980 Loss:0.388 Accuracy:0.92,0.92,0.80,0.84 Learning Rate:0.0003
Iter:2000 Loss:0.350 Accuracy:0.96,0.88,0.72,0.96 Learning Rate:0.0001
Iter:2020 Loss:0.448 Accuracy:0.96,0.72,0.92,0.84 Learning Rate:0.0001
Iter:2040 Loss:0.232 Accuracy:0.96,0.84,0.92,0.92 Learning Rate:0.0001
Iter:2060 Loss:0.196 Accuracy:0.92,0.92,0.92,0.84 Learning Rate:0.0001
Iter:2080 Loss:0.346 Accuracy:0.96,0.92,0.72,0.84 Learning Rate:0.0001
Iter:2100 Loss:0.181 Accuracy:0.96,0.92,0.96,0.96 Learning Rate:0.0001
Iter:2120 Loss:0.231 Accuracy:0.96,0.80,0.88,1.00 Learning Rate:0.0001
Iter:2140 Loss:0.201 Accuracy:1.00,1.00,0.76,0.92 Learning Rate:0.0001
Iter:2160 Loss:0.271 Accuracy:0.96,0.92,0.88,0.92 Learning Rate:0.0001
Iter:2180 Loss:0.214 Accuracy:0.96,0.92,0.96,0.92 Learning Rate:0.0001
Iter:2200 Loss:0.241 Accuracy:0.96,0.92,1.00,0.88 Learning Rate:0.0001
Iter:2220 Loss:0.268 Accuracy:0.92,0.92,0.88,0.92 Learning Rate:0.0001
Iter:2240 Loss:0.249 Accuracy:0.92,0.92,0.84,0.96 Learning Rate:0.0001
Iter:2260 Loss:0.188 Accuracy:0.96,0.92,0.92,0.92 Learning Rate:0.0001
Iter:2280 Loss:0.196 Accuracy:0.96,0.88,0.92,1.00 Learning Rate:0.0001
Iter:2300 Loss:0.186 Accuracy:1.00,0.80,0.92,1.00 Learning Rate:0.0001
Iter:2320 Loss:0.167 Accuracy:1.00,0.88,0.88,0.96 Learning Rate:0.0001
Iter:2340 Loss:0.282 Accuracy:0.96,0.84,0.92,0.92 Learning Rate:0.0001
Iter:2360 Loss:0.224 Accuracy:1.00,0.88,0.88,0.96 Learning Rate:0.0001
Iter:2380 Loss:0.209 Accuracy:0.92,0.84,0.96,1.00 Learning Rate:0.0001
Iter:2400 Loss:0.100 Accuracy:1.00,1.00,0.96,1.00 Learning Rate:0.0001
Iter:2420 Loss:0.227 Accuracy:0.96,0.96,0.88,0.84 Learning Rate:0.0001
Iter:2440 Loss:0.228 Accuracy:0.96,0.96,0.92,0.88 Learning Rate:0.0001
Iter:2460 Loss:0.169 Accuracy:1.00,0.92,0.84,0.96 Learning Rate:0.0001
Iter:2480 Loss:0.162 Accuracy:0.96,0.84,1.00,0.96 Learning Rate:0.0001
Iter:2500 Loss:0.149 Accuracy:0.96,0.92,0.96,0.88 Learning Rate:0.0001
Iter:2520 Loss:0.198 Accuracy:0.96,0.96,0.88,0.92 Learning Rate:0.0001
Iter:2540 Loss:0.134 Accuracy:0.96,1.00,0.92,0.96 Learning Rate:0.0001
Iter:2560 Loss:0.181 Accuracy:0.96,0.96,0.92,0.92 Learning Rate:0.0001
Iter:2580 Loss:0.230 Accuracy:0.96,0.92,0.84,0.88 Learning Rate:0.0001
Iter:2600 Loss:0.137 Accuracy:1.00,1.00,0.92,0.92 Learning Rate:0.0001
Iter:2620 Loss:0.111 Accuracy:1.00,0.96,1.00,1.00 Learning Rate:0.0001
Iter:2640 Loss:0.142 Accuracy:1.00,0.92,0.96,0.92 Learning Rate:0.0001
Iter:2660 Loss:0.158 Accuracy:0.96,0.96,0.84,0.96 Learning Rate:0.0001
Iter:2680 Loss:0.070 Accuracy:0.96,0.96,0.96,1.00 Learning Rate:0.0001
Iter:2700 Loss:0.119 Accuracy:1.00,1.00,0.92,0.96 Learning Rate:0.0001
Iter:2720 Loss:0.074 Accuracy:0.96,0.96,0.96,1.00 Learning Rate:0.0001
Iter:2740 Loss:0.125 Accuracy:1.00,1.00,1.00,0.92 Learning Rate:0.0001
Iter:2760 Loss:0.072 Accuracy:1.00,1.00,0.96,1.00 Learning Rate:0.0001
Iter:2780 Loss:0.109 Accuracy:0.96,0.88,0.92,1.00 Learning Rate:0.0001
Iter:2800 Loss:0.181 Accuracy:1.00,0.96,0.96,0.92 Learning Rate:0.0001
Iter:2820 Loss:0.121 Accuracy:1.00,0.88,1.00,1.00 Learning Rate:0.0001
Iter:2840 Loss:0.102 Accuracy:1.00,0.96,0.96,0.96 Learning Rate:0.0001
Iter:2860 Loss:0.241 Accuracy:0.92,0.88,0.92,0.84 Learning Rate:0.0001
Iter:2880 Loss:0.129 Accuracy:1.00,0.92,0.96,0.92 Learning Rate:0.0001
Iter:2900 Loss:0.214 Accuracy:0.96,0.88,0.92,0.88 Learning Rate:0.0001
Iter:2920 Loss:0.138 Accuracy:1.00,0.96,0.92,0.96 Learning Rate:0.0001
Iter:2940 Loss:0.110 Accuracy:0.92,0.88,1.00,1.00 Learning Rate:0.0001
Iter:2960 Loss:0.103 Accuracy:0.96,0.96,0.96,1.00 Learning Rate:0.0001
Iter:2980 Loss:0.083 Accuracy:0.96,1.00,0.96,0.96 Learning Rate:0.0001
Iter:3000 Loss:0.111 Accuracy:0.96,0.92,0.96,0.92 Learning Rate:0.0000

经过大约3000次迭代 accuracy 可以达到要求。

3)测试

测试代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# 验证码测试
import os
import tensorflow as tf
from PIL import Image
from nets import nets_factory
import numpy as np
import matplotlib.pyplot as plt

# 不同字符数量
CHAR_SET_LEN = 10
# 图片高度
IMAGE_HEIGHT = 60
# 图片宽度
IMAGE_WIDTH = 160
# 批次
BATCH_SIZE = 1

MOD_DIR = "D:/Tensorflow/captcha/model/"
# tfrecord文件存放路径
TFRECORD_FILE = "D:/Tensorflow/captcha/validation.tfrecord"


# placeholder
x = tf.placeholder(tf.float32,[None,224,224])

# 从tfrecord读出数据
def read_and_decode(filename):
# 根据文件名生成一个队列
filename_queue = tf.train.string_input_producer([filename])
# create a reader from file queue
reader = tf.TFRecordReader()
# reader从文件队列中读入一个序列化的样本,返回文件名和文件
_, serialized_example = reader.read(filename_queue)
# get feature from serialized example
# 解析符号化的样本
features = tf.parse_single_example(
serialized_example,
features={
'image': tf.FixedLenFeature([], tf.string),
'label0': tf.FixedLenFeature([], tf.int64),
'label1': tf.FixedLenFeature([], tf.int64),
'label2': tf.FixedLenFeature([], tf.int64),
'label3': tf.FixedLenFeature([], tf.int64),
})
#获取图片数据
image = tf.decode_raw(features["image"], tf.uint8)
# 没有经过预处理的灰度图
image_raw = tf.reshape(image, [224,224])
#tf.train.shuffle_batch必须确定shape
image = tf.reshape(image, [224,224])
# 图片预处理
image = tf.cast(image, tf.float32) /255.0
image = tf.subtract(image,0.5)
image = tf.multiply(image,2.0)
# 获取label
label0 = tf.cast(features['label0'], tf.int32)
label1 = tf.cast(features['label1'], tf.int32)
label2 = tf.cast(features['label2'], tf.int32)
label3 = tf.cast(features['label3'], tf.int32)
return image, image_raw, label0, label1, label2, label3


# 获取图片数据和标签
image, image_raw, label0, label1, label2, label3 = read_and_decode(TFRECORD_FILE)
# 使用shuffle_batch可以随机打乱输入 next_batch挨着往下取
# shuffle_batch才能实现[img,label]的同步,也即特征和label的同步,不然可能输入的特征和label不匹配
# 比如只有这样使用,才能使img和label一一对应,每次提取一个image和对应的label
# shuffle_batch返回的值就是RandomShuffleQueue.dequeue_many()的结果
# Shuffle_batch构建了一个RandomShuffleQueue,并不断地把单个的[img,label],送入队列中
image_batch, image_raw_batch, label_batch0, label_batch1, label_batch2, label_batch3 = tf.train.shuffle_batch(
[image,image_raw, label0,label1,label2,label3],
batch_size=BATCH_SIZE, capacity=5000,
min_after_dequeue=1000,num_threads=1)
# 定义网络结构
train_network_fn = nets_factory.get_network_fn(
'alexnet_v2',
num_classes=CHAR_SET_LEN,
weight_decay=0.0005,
is_training=False
)

with tf.Session() as sess:
X = tf.reshape(x,[BATCH_SIZE,224,224,1])
# 数据输入网络得到输出值
logits0,logits1,logits2,logits3,end_points = train_network_fn(X)

# 预测值
predict0 = tf.reshape(logits0,[-1,CHAR_SET_LEN])
predict0 = tf.argmax(predict0,1)

predict1 = tf.reshape(logits1, [-1, CHAR_SET_LEN])
predict1 = tf.argmax(predict1, 1)

predict2 = tf.reshape(logits2, [-1, CHAR_SET_LEN])
predict2 = tf.argmax(predict2, 1)

predict3 = tf.reshape(logits3, [-1, CHAR_SET_LEN])
predict3 = tf.argmax(predict3, 1)


# 初始化
sess.run(tf.global_variables_initializer())
#载入训练好的模型
saver = tf.train.Saver()
saver.restore(sess, MOD_DIR + "captcha.model-3000")

# 创建一个协调器,管理线程
coord = tf.train.Coordinator()
# 启动队列
threads = tf.train.start_queue_runners(sess=sess,coord=coord)
for i in range(5):
# 获取一个批次的数据和标签
b_image,b_image_raw, b_label0,b_label1,b_label2,b_label3 = sess.run([image_batch,image_raw_batch,
label_batch0, label_batch1, label_batch2, label_batch3])
# 显示图片
img = Image.fromarray(b_image_raw[0], "L")
plt.imshow(np.array(img))
plt.axis('off')
plt.show()
# 打印标签
print('label:',b_label0,b_label1,b_label2,b_label3)
# 预测
label0,label1,label2,label3 = sess.run([predict0,predict1,predict2,predict3],
feed_dict={x:b_image})
# print
print('predict:',label0,label1,label2,label3)

# 通知其他线程关闭
coord.request_stop()
# 其他所有线程关闭之后,这一函数才能返回
coord.join(threads)

INFO:tensorflow:Restoring parameters from D:/Tensorflow/captcha/model/captcha.model-3000

png

label: [3] [1] [8] [3]
predict: [3] [1] [8] [3]

png

label: [8] [1] [1] [7]
predict: [8] [1] [1] [7]

png

label: [4] [7] [3] [4]
predict: [4] [7] [3] [4]

png

label: [5] [2] [9] [4]
predict: [5] [6] [9] [8]

png

label: [1] [6] [6] [8]
predict: [1] [4] [8] [8]


总结:
如何训练带字母字符的验证码呢?其实很简单,A-Z,一共26个字母,我们可以映射为11~35这26个数字,A:10,B:11,,,Z :35,那么,这种数字+字母的组合一共有10+26=36个字符,同样采用one-hot编码,label是一个36维的向量,只有1个值为1,其余为0,A:000000000010000…..000;

2 普通的单任务模式

1)准备工作

将net文件夹拷贝到当前目录下,不做改动。

2)训练

3)测试

꧁༺The༒End༻꧂