Python 利用scipy.optimize手写最小二乘法目标函数及优化算法
本文通过最小二乘法的矩阵实现形式和for循环遍历形式分别实现最小二乘法的实现,其中参数学习过程使用BFGS算法。
(1)通过矩阵实现,代码如下:
import numpy as np
import scipy.optimize as opt
import matplotlib.pyplot as plt
points = []
shape = []
np.random.seed(1)
# 目标函数
def obj_fun(theta, x, y_):
theta = theta.reshape(3, 3)
pre_dis = np.dot(x, theta)
loss = np.sum((pre_dis - y_)**2) / 2
points.append(loss)
return loss
def prime(theta, x, y_):
theta = theta.reshape(3, 3)
pre_dis = np.dot(x, theta)
print(pre_dis.shape)
gradient = np.dot(np.transpose(x), (pre_dis - y_))
return np.ravel(gradient) # 将二维矩阵展开成一维向量
if __name__ == "__main__":
feature = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [1, 5, 9]])
dis_ = np.array([[0.1, 0.3, 0.6], [0.2, 0.4, 0.4], [0.3, 0.2, 0.5], [0.1, 0.3, 0.6]])
#init_theta = np.ones([3, 3])
init_theta = np.random.normal(size=(3, 3))
shape = init_theta.shape
print(prime(init_theta, x=feature, y_=dis_))
#result = opt.fmin_l_bfgs_b(obj_fun2, init_theta, prime3, args=(feature, dis_, shape))
result = opt.fmin_l_bfgs_b(obj_fun, init_theta, prime, args=(feature, dis_))
# 注意使用fmin_l_bfgs_b算法时,优化得到的参数在result[0]中,其他算法即在result中
m_theta = np.array(result[0]).reshape(3, 3)
print(np.dot(feature, m_theta))
print(obj_fun(m_theta, feature, dis_))
plt.plot(np.arange(len(points)), points)
plt.show()
(2)通过for循环遍历实现,代码如下:
import numpy as np
import scipy.optimize as opt
import matplotlib.pyplot as plt
points = []
shape = []
np.random.seed(1)
def obj_fun(theta, x, y_, theta_shape):
(cols, rows) = theta_shape
theta = theta.reshape([cols, rows])
loss = 0
for i in range(rows):
pre_dis = per_obj_func(theta[:, i], x, y_[:, i])
loss += pre_dis
points.append(loss)
return loss
#用于一维目标函数的计算
def per_obj_func(theta, x, y_):
loss = 0
for i in range(len(x)):
y = 0
for j in range(len(theta)):
y += theta[j] * x[i][j]
loss += (y - y_[i])**2/2
return loss
def prime(theta, x, y_, theta_shape):
lst = []
(cols, rows) = theta_shape
theta = theta.reshape([cols, rows])
for i in range(rows):
per_lst = per_prime(theta[:, i], x, y_[:, i])
lst.append(per_lst)
np_lst = np.transpose(np.array(lst))
return np.ravel(np_lst)
#用于一维W偏导数的计算
def per_prime(theta, x, y_):
lst = []
for j in range(len(theta)):
pre_dis = 0
for i in range(len(x)):
y = 0
for k in range(len(x[0])):
y += x[i][k] * theta[k]
pre_dis += (y - y_[i]) * x[i][j]
lst.append(pre_dis)
return lst
if __name__ == "__main__":
feature = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [1, 5, 9]])
dis_ = np.array([[0.1, 0.3, 0.6], [0.2, 0.4, 0.4], [0.3, 0.2, 0.5], [0.1, 0.3, 0.6]])
#init_theta = np.ones([3, 3])
init_theta = np.random.normal(size=(3, 3))
shape = init_theta.shape
result = opt.fmin_l_bfgs_b(obj_fun, init_theta, prime, args=(feature, dis_, shape))
# 注意使用fmin_l_bfgs_b算法时,优化得到的参数在result[0]中,其他算法即在result中
m_theta = np.array(result[0]).reshape(3, 3)
print(np.dot(feature, m_theta))
print(obj_fun(m_theta, feature, dis_, shape))
plt.plot(np.arange(len(points)), points)
plt.show()
程序运行结果:
可以看到,loss下降图相同,所以实现是对的。
注意:
1.这里将shape传到函数中,是因为 opt.fmin_l_bfgs_b 函数将init_theta的参数从(3,3)变为(9,1),因此
需要在函数内部将W重组为(3,3)的形式。
2.注意 opt.fmin_l_bfgs_b 的 args 需要在fun函数和fprime函数中都使用,所以需要统一参数格式,不能出现两个函数
的参数不一致情况。
因为本人比较菜,所以在用遍历的方式实现时,将W矩阵拆分为几个3 X 1的单列矩阵,这样分别计算,可以减少出错的概率。
参考矩阵方式文档: