Python之Tensorflow学习笔记
在此记录我的tensorflow的学习
0、简单的tensorflow操作
这里我们定义两个变量w1、w2(中间层变量)
w1是一个2行3列的矩阵、w2是一个3行1列的矩阵
x是占位符,为输入的数据占位,x是未知个,2列的数据
使用feed_dict模式输入数据,喂入神经网络中
#coding:utf-8
import tensorflow as tf
x = tf.placeholder(tf.float32, shape=(None, 2))
w1 = tf.Variable(tf.random_normal([2, 3], stddev=1))
w2 = tf.Variable(tf.random_normal([3, 1], stddev=1))
# 定义前向传播过程
a = tf.matmul(x, w1)
y = tf.matmul(a, w2)
# 用于会话计算结果
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
print('y :', sess.run(y, feed_dict={x: [[0.7, 0.5],
[0.2, 0.3], [0.3, 0.4], [0.4, 0.5]]}))
输出结果:(输入数据的个数的行数,1列的矩阵)
1、简单的制造些数据
import tensorflow as tf
import numpy as np
BATCH_SIZE = 8
# 产生随机数
rng = np.random.RandomState()
X = rng.rand(32, 2)
Y = [[int(x0 + x1 < 1)] for (x0, x1) in X]
print('X:', X)
print('Y:', Y)
2、定义神经网络的输入、参数和输出、定义前向传播过程
#定义神经网络的输入输出、参数和输出、定义前向传播过程
x = tf.placeholder(tf.float32, shape=(None, 2))
y_ = tf.placeholder(tf.float32, shape=(None, 1))
w1 = tf.Variable(tf.random_normal([2, 3], stddev=1, seed=1))
w2 = tf.Variable(tf.random_normal([3, 1], stddev=1, seed=1))
a = tf.matmul(x, w1)
y = tf.matmul(a, w2)
3、定义损失函数及反向传播方法、(三种传播方法, 加自定义损失函数)
#定义损失函数及反向传播方法 、 三种反向传播方法
loss = tf.reduce_mean(tf.square(y - y_))
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
# train_step = tf.train.MomentOptimizer(0.01, 0.9).minimize(loss)
# train_step = tf.train.AdamOptimizer(0.01).minimize(loss)
# 自定义损失函数 、 其中 COST是人为定义的价值(花费) PROFIT(人为定义的利润) y是推测的,y_是真实的结果
#loss_mse = tf.reduce_sum(tf.where(tf.greater(y, y_), (y - y_) * COST, (y_ - y) * PROFIT))
#train_step = tf.train.GradientDescentOptimizer(0.005).minimize(loss_mse)
4、生成会话,开始训练
# 生成会话,开始训练
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
for i in range(3000):
start = (i * BATCH_SIZE) % 32
end = start + BATCH_SIZE
sess.run(train_step, feed_dict={x: X[start: end], y_: Y[start: end]})
if i % 500 == 0 :
total_loss = sess.run(loss, feed_dict={x: X, y_: Y})
print('i = ', i, 'total_loss =', total_loss)
print()
print('w1:', sess.run(w1))
print('w2', sess.run(w1))
输出结果:
5、学习率
√学习率 learning_rate:表示了每次参数更新的幅度大小。
学习率过大,会导致待优化的参数在最小值附近波动,不收敛;
学习率过小,会导致待优化的参数收敛缓慢。
在训练过程中,参数的更新向着损失函数梯度下降的方向。
import tensorflow as tf
LEARNING_RATE_BASE = 0.1 # 最初学习率
LEARNING_RATE_DECAY = 0.99 # 学习率衰减率
LEARNING_RATE_STEP = 1 # 喂入多少轮BATCH_SIZE后、更新一次学习率,一般设置为 总样本上/BATCH_SIZE
# 运行几轮BATCH_SIZE计数器,初值为0,设为不训练
global_step = tf.Variable(0, trainable=False)
# 定义指数下降学习率
learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, LEARNING_RATE_STEP, LEARNING_RATE_DECAY, staircase = True)
# 定义待优化参数、初值为5
w = tf.Variable(tf.constant(5, dtype=tf.float32))
# 定义损失函数loss
loss = tf.square(w + 1)
# 定义反向传播方法
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step = global_step)
# 生成会话,训练
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
for i in range(40):
sess.run(train_step)
learning_rate_val = sess.run(learning_rate)
global_step_val = sess.run(global_step)
w_val = sess.run(w)
loss_val = sess.run(loss)
print('After %s steps: global_step is %f, w is %f, learning_rate is %f, loss is %f' % (i, global_step_val, w_val, learning_rate_val, loss_val))
6、滑动平均
代码如下:
import tensorflow as tf
# 定义一个变量及滑动平均类
w1 = tf.Variable(0, dtype=tf.float32)
# 定义num_updates(NN的迭代轮数),初始值为0,不可优化(训练
global_step = tf.Variable(0, trainable=False)
# 实现滑动平均类,给删减率为0.99,当前轮数global_step
MOVING_AVERAGE_DECAY = 0.99
ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
# tf.trainable_variables()自动将所有待训练的参数汇总为列表
ema_op = ema.apply(tf.trainable_variables())
# 查看不同迭代中变量的取值变化
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
# 使用ema.average(w1)获取w1活动平均值
print(sess.run([w1, ema.average(w1)]))
# 将参数w1赋值为1
sess.run(tf.assign(w1, 1))
sess.run(ema_op)
print(sess.run([w1, ema.average(w1)]))
# 更新step和w1的值,模拟100轮迭代后,参数w1变为10
sess.run(tf.assign(global_step, 100))
sess.run(tf.assign(w1, 10))
sess.run(ema_op)
print(sess.run([w1, ema.average(w1)]))
# 打印10次 运行sess.run()所更新的w1的滑动平均值
for i in range(10):
sess.run(ema_op)
print(sess.run([w1, ema.average(w1)]))
运行结果:
可以看得出来,每一次执行,参数w1的滑动平均值都向着参数w1靠近,(滑动平均追随参数的变化而变化
7、正则化
其中:cem是交叉熵
我们随机生成300个符合正态分布的点X[x0, x1]作为数据集,根据点X[x0, x1]计算生成标注Y_,并使用红色点和蓝色点在图表中显示出来
规则:当x0 * x0 + x1 * x1 < 2时,y_=1,标注为红色,否则标注为蓝色
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
BATCH_SIZE = 30
# 产生随机数
rdm = np.random.RandomState()
# 随机生成300行2列的矩阵,表示300组坐标点(x0, x1)作为输入数据集
X = rdm.randn(300, 2)
# 从X这个数据集中、取出一行、如果两个坐标的平方和小于2,给Y赋值为1,否则为0
Y_ = [int(x0 * x0 + x1 * x1 < 2) for (x0, x1) in X]
# 遍历Y中的每个元素、1赋值为’red‘, 否则赋值为’blue;
Y_c = [['red' if y else 'blue'] for y in Y_]
# 对数据集X和标签Y进行shape整理,随第二个参数计算得到,第二个元素表示有多少列,
# X 整理为n行2列,把Y整理为n行1列
X = np.vstack(X).reshape(-1, 2)
Y_ = np.vstack(Y_).reshape(-1, 1)
print('X:', X)
print('Y_"', Y_)
print('Y_c', Y_c)
# 使用plt.scatter画出数据集X各行中0列元素和第1列元素的点即各行的(x0, x1)
# 用各行的Y_c的值表示颜色
plt.scatter(X[:, 0], X[:, 1], c = np.squeeze(Y_c))
plt.show()
# 定义神经网络的输入、参数和输出。定义前向传播过程
def get_weight(shape, regularizer):
w = tf.Variable(tf.random_normal(shape), dtype=tf.float32)
tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w))
return w
def get_bias(shape):
b = tf.Variable(tf.constant(0.01, shape=shape))
return b
x = tf.placeholder(tf.float32, shape=(None, 2))
y_ = tf.placeholder(tf.float32, shape=(None, 1))
w1 = get_weight([2, 11], 0.01)
b1 = get_bias([11])
y1 = tf.nn.relu(tf.matmul(x, w1) + b1)
w2 = get_weight([11, 1], 0.01)
b2 = get_bias([1])
y = tf.matmul(y1, w2) + b2 # 输出层不过**
# 定义损失函数
loss_mse = tf.reduce_mean(tf.square(y - y_))
loss_total = loss_mse + tf.add_n(tf.get_collection('losses'))
# 定义反向传播方法,不含正则化
train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_mse)
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
for i in range(40000):
start = (i * BATCH_SIZE) % 300
end = start + BATCH_SIZE
sess.run(train_step, feed_dict={x: X[start: end], y_: Y_[start: end]})
if i % 2000 == 0:
loss_mse_v = sess.run(loss_mse, feed_dict={x: X, y_: Y_})
print('After %d steps, loss is: %f' % (i, loss_mse_v))
# xx 在-3到3之间以步长为0.01,yy在-3到3之间以步长0.01,生成二维网络坐标
xx, yy = np.mgrid[-3: 3: 0.01, -3: 3: 0.01]
# 将xx, yy 拉直,合并成一个2列的矩阵,得到一个网格坐标点的集合
grid = np.c_[xx.ravel(), yy.ravel()]
# 将网格坐标点喂入神经网络,probs为输出
probs = sess.run(y, feed_dict={x: grid})
# probs的shape调整为xx的样子
probs = probs.reshape(xx.shape)
print('w1:', sess.run(w1))
print('b1:', sess.run(b1))
print('w2:', sess.run(w2))
print('b2:', sess.run(b2))
plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c))
plt.contour(xx, yy, probs, level=[.5])
plt.show()
# 定义反向传播方法:包含正则化
train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_total)
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
for i in range(40000):
start = (i * BATCH_SIZE) % 300
end = start + BATCH_SIZE
sess.run(train_step, feed_dict={x: X[start: end], y_: Y_[start:end]})
if i % 2000 == 0:
loss_val = sess.run(loss_total, feed_dict={x: X, y_: Y_})
print('After %d steps, loss is %f' % (i, loss_val))
xx, yy = np.mgrid[-3: 3: 0.01, -3: 3: 0.01]
grid = np.c_[xx.ravel(), yy.ravel()]
probs = sess.run(y, feed_dict={x: grid})
probs = probs.reshape(xx.shape)
print('w1:', sess.run(w1))
print('b1:', sess.run(b1))
print('w2:', sess.run(w2))
print('b2:', sess.run(b2))
plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c))
plt.contour(xx, yy, probs, level=[0.5])
plt.show()
图1:生成的随机点
图2:未正则化的
图3:正则化训练后的
有无正则化的模型结果可看出来,正则化能使拟合的曲线平滑,模型具有更好的泛化能力
8、模块化实现
# 搭建模块化神经网络八股
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
BATCH_SIZE = 30
LEARNING_RATE_BASE = 0.001
LEARNING_RATE_DECAY = 0.999
REGULARIZER = 0.01
# 定义生成数据集函数
def generateds():
rdm = np.random.RandomState()
X = rdm.randn(300, 2)
Y_ = [int(x0*x0 + x1*x1 < 2) for (x0, x1) in X]
Y_c = [['red' if y else 'blue'] for y in Y_]
X = np.vstack(X).reshape(-1, 2)
Y_ = np.vstack(Y_).reshape(-1, 1)
return X, Y_, Y_c
# 定义神经网络的输入、参数和输出、定义前向传播过程
def get_weight(shape, regularizer):
w = tf.Variable(tf.random_normal(shape), dtype=tf.float32)
tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w))
return w
def get_bias(shape):
b = tf.Variable(tf.constant(0.01, shape=shape))
return b
def forward(x, regularizer):
w1 = get_weight([2, 11], regularizer)
b1 = get_bias([11])
y1 = tf.nn.relu(tf.matmul(x, w1) + b1)
w2 = get_weight([11, 1], regularizer)
b2 = get_bias([1])
y = tf.matmul(y1, w2) + b2 # 输出层不过**
return y
# 反向传播模块
def backward():
x = tf.placeholder(tf.float32, shape=(None, 2))
y_ = tf.placeholder(tf.float32, shape=(None, 1))
X, Y_, Y_c = generateds() # 获取数据集
y = forward(x, REGULARIZER)
global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
300/BATCH_SIZE,
LEARNING_RATE_DECAY,
staircase=True)
# 定义损失函数
loss_mse = tf.reduce_mean(tf.square(y - y_))
loss_total = loss_mse + tf.add_n(tf.get_collection('losses'))
# 定义反向传播方法:包含正则化
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss_total)
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
for i in range(40000):
start = (i * BATCH_SIZE) % 300
end = start + BATCH_SIZE
sess.run(train_step, feed_dict={x: X[start: end], y_: Y_[start: end]})
if i % 2000 == 0:
loss_val = sess.run(loss_total, feed_dict={x: X, y_: Y_})
print('After %d steps, loss is %f' % (i, loss_val))
xx, yy = np.mgrid[-3: 3: 0.01, -3: 3: 0.01]
grid = np.c_[xx.ravel(), yy.ravel()]
probs = sess.run(y, feed_dict={x: grid})
probs = probs.reshape(xx.shape)
plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c))
plt.contour(xx, yy, probs, levels=[.5])
plt.show()
if __name__=='__main__':
backward()
输出: