Python之Tensorflow学习笔记

在此记录我的tensorflow的学习

0、简单的tensorflow操作

这里我们定义两个变量w1、w2（中间层变量）

w1是一个2行3列的矩阵、w2是一个3行1列的矩阵

x是占位符，为输入的数据占位，x是未知个，2列的数据

使用feed_dict模式输入数据，喂入神经网络中

#coding:utf-8

import tensorflow as tf

x = tf.placeholder(tf.float32, shape=(None, 2))
w1 = tf.Variable(tf.random_normal([2, 3], stddev=1))
w2 = tf.Variable(tf.random_normal([3, 1], stddev=1))

# 定义前向传播过程
a = tf.matmul(x, w1)
y = tf.matmul(a, w2)

# 用于会话计算结果
with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    print('y :', sess.run(y, feed_dict={x: [[0.7, 0.5],
        [0.2, 0.3], [0.3, 0.4], [0.4, 0.5]]}))

输出结果：(输入数据的个数的行数，1列的矩阵）

Python之Tensorflow学习笔记

1、简单的制造些数据

import tensorflow as tf
import numpy as np
BATCH_SIZE = 8

# 产生随机数
rng = np.random.RandomState()

X = rng.rand(32, 2)

Y = [[int(x0 + x1 < 1)] for (x0, x1) in X]

print('X:', X)
print('Y:', Y)

2、定义神经网络的输入、参数和输出、定义前向传播过程

#定义神经网络的输入输出、参数和输出、定义前向传播过程
x = tf.placeholder(tf.float32, shape=(None, 2))
y_ = tf.placeholder(tf.float32, shape=(None, 1))

w1 = tf.Variable(tf.random_normal([2, 3], stddev=1, seed=1))
w2 = tf.Variable(tf.random_normal([3, 1], stddev=1, seed=1))

a = tf.matmul(x, w1)
y = tf.matmul(a, w2)

3、定义损失函数及反向传播方法、（三种传播方法，加自定义损失函数）

#定义损失函数及反向传播方法 、 三种反向传播方法
loss = tf.reduce_mean(tf.square(y - y_))
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
# train_step = tf.train.MomentOptimizer(0.01, 0.9).minimize(loss)
# train_step = tf.train.AdamOptimizer(0.01).minimize(loss)

# 自定义损失函数 、 其中 COST是人为定义的价值（花费） PROFIT（人为定义的利润） y是推测的，y_是真实的结果
#loss_mse = tf.reduce_sum(tf.where(tf.greater(y, y_), (y - y_) * COST, (y_ - y) * PROFIT))

#train_step = tf.train.GradientDescentOptimizer(0.005).minimize(loss_mse)

4、生成会话，开始训练

# 生成会话，开始训练
with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)

    for i in range(3000):
        start = (i * BATCH_SIZE) % 32
        end = start + BATCH_SIZE
        sess.run(train_step, feed_dict={x: X[start: end], y_: Y[start: end]})
        if i % 500 == 0 :
            total_loss = sess.run(loss, feed_dict={x: X, y_: Y})
            print('i = ', i, 'total_loss =', total_loss)

    print()
    print('w1:', sess.run(w1))
    print('w2', sess.run(w1))

输出结果：

Python之Tensorflow学习笔记

5、学习率

√学习率 learning_rate:表示了每次参数更新的幅度大小。

学习率过大,会导致待优化的参数在最小值附近波动,不收敛;

学习率过小,会导致待优化的参数收敛缓慢。

在训练过程中,参数的更新向着损失函数梯度下降的方向。
Python之Tensorflow学习笔记

Python之Tensorflow学习笔记

import tensorflow as tf

LEARNING_RATE_BASE = 0.1 # 最初学习率
LEARNING_RATE_DECAY = 0.99 # 学习率衰减率
LEARNING_RATE_STEP = 1 # 喂入多少轮BATCH_SIZE后、更新一次学习率，一般设置为 总样本上/BATCH_SIZE

# 运行几轮BATCH_SIZE计数器，初值为0，设为不训练
global_step = tf.Variable(0, trainable=False)

# 定义指数下降学习率
learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, LEARNING_RATE_STEP, LEARNING_RATE_DECAY, staircase = True)
# 定义待优化参数、初值为5
w = tf.Variable(tf.constant(5, dtype=tf.float32))
# 定义损失函数loss
loss = tf.square(w + 1)
# 定义反向传播方法
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step = global_step)

# 生成会话，训练
with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    for i in range(40):
        sess.run(train_step)
        learning_rate_val = sess.run(learning_rate)
        global_step_val = sess.run(global_step)
        w_val = sess.run(w)
        loss_val = sess.run(loss)
        print('After %s steps: global_step is %f, w is %f, learning_rate is %f, loss is %f' % (i, global_step_val, w_val, learning_rate_val, loss_val))

6、滑动平均

Python之Tensorflow学习笔记

代码如下：

import tensorflow as tf

# 定义一个变量及滑动平均类
w1 = tf.Variable(0, dtype=tf.float32)
# 定义num_updates(NN的迭代轮数），初始值为0，不可优化（训练
global_step = tf.Variable(0, trainable=False)
# 实现滑动平均类，给删减率为0.99，当前轮数global_step
MOVING_AVERAGE_DECAY = 0.99
ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
# tf.trainable_variables()自动将所有待训练的参数汇总为列表
ema_op = ema.apply(tf.trainable_variables())

# 查看不同迭代中变量的取值变化
with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    # 使用ema.average(w1)获取w1活动平均值
    print(sess.run([w1, ema.average(w1)]))

    # 将参数w1赋值为1
    sess.run(tf.assign(w1, 1))
    sess.run(ema_op)
    print(sess.run([w1, ema.average(w1)]))

    # 更新step和w1的值，模拟100轮迭代后，参数w1变为10
    sess.run(tf.assign(global_step, 100))
    sess.run(tf.assign(w1, 10))
    sess.run(ema_op)
    print(sess.run([w1, ema.average(w1)]))

    # 打印10次 运行sess.run()所更新的w1的滑动平均值
    for i in range(10):
        sess.run(ema_op)
        print(sess.run([w1, ema.average(w1)]))

运行结果：

Python之Tensorflow学习笔记

可以看得出来，每一次执行，参数w1的滑动平均值都向着参数w1靠近，（滑动平均追随参数的变化而变化

7、正则化

Python之Tensorflow学习笔记

其中：cem是交叉熵

Python之Tensorflow学习笔记

我们随机生成300个符合正态分布的点X[x0, x1]作为数据集，根据点X[x0, x1]计算生成标注Y_，并使用红色点和蓝色点在图表中显示出来

规则：当x0 * x0 + x1 * x1 < 2时，y_=1，标注为红色，否则标注为蓝色

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

BATCH_SIZE = 30

# 产生随机数
rdm = np.random.RandomState()

# 随机生成300行2列的矩阵，表示300组坐标点(x0, x1)作为输入数据集
X = rdm.randn(300, 2)

# 从X这个数据集中、取出一行、如果两个坐标的平方和小于2，给Y赋值为1，否则为0
Y_ = [int(x0 * x0 + x1 * x1 < 2) for (x0, x1) in X]

# 遍历Y中的每个元素、1赋值为’red‘， 否则赋值为’blue；
Y_c = [['red' if y else 'blue'] for y in Y_]

# 对数据集X和标签Y进行shape整理，随第二个参数计算得到，第二个元素表示有多少列，
# X 整理为n行2列，把Y整理为n行1列
X = np.vstack(X).reshape(-1, 2)
Y_ = np.vstack(Y_).reshape(-1, 1)

print('X:', X)
print('Y_"', Y_)
print('Y_c', Y_c)

# 使用plt.scatter画出数据集X各行中0列元素和第1列元素的点即各行的(x0, x1)
# 用各行的Y_c的值表示颜色
plt.scatter(X[:, 0], X[:, 1], c = np.squeeze(Y_c))
plt.show()


# 定义神经网络的输入、参数和输出。定义前向传播过程
def get_weight(shape, regularizer):
    w = tf.Variable(tf.random_normal(shape), dtype=tf.float32)
    tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w))
    return w

def get_bias(shape):
    b = tf.Variable(tf.constant(0.01, shape=shape))
    return b

x = tf.placeholder(tf.float32, shape=(None, 2))
y_ = tf.placeholder(tf.float32, shape=(None, 1))

w1 = get_weight([2, 11], 0.01)
b1 = get_bias([11])
y1 = tf.nn.relu(tf.matmul(x, w1) + b1)

w2 = get_weight([11, 1], 0.01)
b2 = get_bias([1])
y = tf.matmul(y1, w2) + b2 # 输出层不过**

# 定义损失函数
loss_mse = tf.reduce_mean(tf.square(y - y_))
loss_total = loss_mse + tf.add_n(tf.get_collection('losses'))

# 定义反向传播方法，不含正则化
train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_mse)

with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    for i in range(40000):
        start = (i * BATCH_SIZE) % 300
        end = start + BATCH_SIZE
        sess.run(train_step, feed_dict={x: X[start: end], y_: Y_[start: end]})
        if i % 2000 == 0:
            loss_mse_v = sess.run(loss_mse, feed_dict={x: X, y_: Y_})
            print('After %d steps, loss is: %f' % (i, loss_mse_v))

    # xx 在-3到3之间以步长为0.01，yy在-3到3之间以步长0.01，生成二维网络坐标
    xx, yy = np.mgrid[-3: 3: 0.01, -3: 3: 0.01]
    # 将xx, yy 拉直，合并成一个2列的矩阵，得到一个网格坐标点的集合
    grid = np.c_[xx.ravel(), yy.ravel()]
    # 将网格坐标点喂入神经网络，probs为输出
    probs = sess.run(y, feed_dict={x: grid})
    # probs的shape调整为xx的样子
    probs = probs.reshape(xx.shape)
    
    print('w1:', sess.run(w1))
    print('b1:', sess.run(b1))
    print('w2:', sess.run(w2))
    print('b2:', sess.run(b2))

plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c))
plt.contour(xx, yy, probs, level=[.5])
plt.show()



# 定义反向传播方法：包含正则化
train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_total)

with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    for i in range(40000):
        start = (i * BATCH_SIZE) % 300
        end = start + BATCH_SIZE
        sess.run(train_step, feed_dict={x: X[start: end], y_: Y_[start:end]})
        if i % 2000 == 0:
            loss_val = sess.run(loss_total, feed_dict={x: X, y_: Y_})
            print('After %d steps, loss is %f' % (i, loss_val))


    xx, yy = np.mgrid[-3: 3: 0.01, -3: 3: 0.01]
    grid = np.c_[xx.ravel(), yy.ravel()]
    probs = sess.run(y, feed_dict={x: grid})
    probs = probs.reshape(xx.shape)

    print('w1:', sess.run(w1))
    print('b1:', sess.run(b1))
    print('w2:', sess.run(w2))
    print('b2:', sess.run(b2))

plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c))
plt.contour(xx, yy, probs, level=[0.5])
plt.show()

图1：生成的随机点

Python之Tensorflow学习笔记

图2：未正则化的

Python之Tensorflow学习笔记

图3：正则化训练后的

Python之Tensorflow学习笔记

有无正则化的模型结果可看出来，正则化能使拟合的曲线平滑，模型具有更好的泛化能力

8、模块化实现

# 搭建模块化神经网络八股
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

BATCH_SIZE = 30
LEARNING_RATE_BASE = 0.001
LEARNING_RATE_DECAY = 0.999
REGULARIZER = 0.01

# 定义生成数据集函数
def generateds():
    rdm = np.random.RandomState()
    X = rdm.randn(300, 2)
    Y_ = [int(x0*x0 + x1*x1 < 2) for (x0, x1) in X]
    Y_c = [['red' if y else 'blue'] for y in Y_]
    
    X = np.vstack(X).reshape(-1, 2)
    Y_ = np.vstack(Y_).reshape(-1, 1)

    return X, Y_, Y_c

# 定义神经网络的输入、参数和输出、定义前向传播过程
def get_weight(shape, regularizer):
    w = tf.Variable(tf.random_normal(shape), dtype=tf.float32)
    tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w))
    return w

def get_bias(shape):
    b = tf.Variable(tf.constant(0.01, shape=shape))
    return b

def forward(x, regularizer):
    w1 = get_weight([2, 11], regularizer)
    b1 = get_bias([11])
    y1 = tf.nn.relu(tf.matmul(x, w1) + b1)

    w2 = get_weight([11, 1], regularizer)
    b2 = get_bias([1])
    y = tf.matmul(y1, w2) + b2 # 输出层不过**
    return y


# 反向传播模块
def backward():
    x = tf.placeholder(tf.float32, shape=(None, 2))
    y_ = tf.placeholder(tf.float32, shape=(None, 1))

    X, Y_, Y_c = generateds() # 获取数据集
    
    y = forward(x, REGULARIZER)

    global_step = tf.Variable(0, trainable=False)
    learning_rate = tf.train.exponential_decay(
            LEARNING_RATE_BASE,
            global_step,
            300/BATCH_SIZE,
            LEARNING_RATE_DECAY,
            staircase=True)

    # 定义损失函数
    loss_mse = tf.reduce_mean(tf.square(y - y_))
    loss_total = loss_mse + tf.add_n(tf.get_collection('losses'))

    # 定义反向传播方法：包含正则化
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss_total)
    
    with tf.Session() as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        for i in range(40000):
            start = (i * BATCH_SIZE) % 300
            end = start + BATCH_SIZE
            sess.run(train_step, feed_dict={x: X[start: end], y_: Y_[start: end]})
            if i % 2000 == 0:
                loss_val = sess.run(loss_total, feed_dict={x: X, y_: Y_})
                print('After %d steps, loss is %f' % (i, loss_val))
        
        xx, yy = np.mgrid[-3: 3: 0.01, -3: 3: 0.01]
        grid = np.c_[xx.ravel(), yy.ravel()]
        probs = sess.run(y, feed_dict={x: grid})
        probs = probs.reshape(xx.shape)
    
    plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c))
    plt.contour(xx, yy, probs, levels=[.5])
    plt.show()

if __name__=='__main__':
    backward()

输出：

Python之Tensorflow学习笔记

Python之Tensorflow学习笔记

相关推荐