Logistic Regression

The data

我们将建立一个逻辑回归模型来预测一个学生是否被大学录取。假设你是一个大学系的管理员，你想根据两次考试的结果来决定每个申请人的录取机会。你有以前的申请人的历史数据，你可以用它作为逻辑回归的训练集。对于每一个培训例子，你有两个考试的申请人的分数和录取决定。为了做到这一点，我们将建立一个分类模型，根据考试成绩估计入学概率。

#导入三大件

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

%matplotlib inline

import os

path = 'data' + os.sep + 'LogiReg_data.txt'

pdData = pd.read_csv(path, header = None, names = ['Exam 1', 'Exam 2', 'Admitted'])

#查看通过申请和没有通过申请的数据的两项特征指标分布

positive = pdData[pdData['Admitted'] == 1]

negative = pdData[pdData['Admitted'] == 0]

fig,ax = plt.subplots(figsize = (10,5))

ax.scatter(postive['Exam 1'], positive['Exam 2'], s = 30, c = 'b', marker = 'o', label = 'Admitted')

ax.scatter(negative['Exam 1'], negative['Exam 2'], s = 30, c = 'r', marker = 'x', label = 'Not Admitted')

ax.legend()

ax.set_xlabel('Exam 1 Score')

ax.set_ylabel('Exam 2 Score')

The logistic regression

目标：建立分类器（求解出三个参数 ????0????1????2

）

设定阈值，根据阈值判断录取结果

要完成的模块

sigmoid : 映射到概率的函数
model : 返回预测结果值
cost : 根据参数计算损失
gradient : 计算每个参数的梯度方向
descent : 进行参数更新
accuracy: 计算精度

`sigmoid` 函数

python机器学习梯度下降求解逻辑回归

def sigmoid(z):

return 1 / (1+np.exp(-z))

#设置映射函数

python机器学习梯度下降求解逻辑回归

def model(X, theta):

return sigmoid(np.dot(X, theta.T))

#构造预测函数其中theta为各特征变量的系数矩阵，X为样本数据或者是用来预测的数据的分数部分

python机器学习梯度下降求解逻辑回归

pdData.insert(0, 'Ones', 1)

#拆入常数项对应的系数1，这里0是插入的列的序号

#设置X（训练数据）和y（目标变量）set X（training data） and y (target variable)

orig_data = pdData.as_matrix()

#将数据的panda表示转换为对进一步计算有用的数组

cols = orig_data.shape(1)

X = orig_data[:,0:cols-1]

y = orig_data[:,cols-1:cols]

theta = np.zeros([1,3])

python机器学习梯度下降求解逻辑回归

def cost(X,y, theta):

left = np.multiply(-y, np.log(model(X, theta)))

right = np.multiply(1 - y, np.log(1 - model(X, theta)))

return np.sum(left - right) / (len(X))

cost(X,y,theta)

python机器学习梯度下降求解逻辑回归

def gradient(X,y, theta):

grad = np.zeros(theta.shape)

error = (model(X, theta) - y).ravel()

for j in range(len(theta.ravel))):

term = np.multiply(error, X[:,j])

grad[0,j] = np.sum(term) / len(X)

return grad

#np.ravel()将多维数组降为一维

Gradient descent

比较3种不同的梯度下降方法

STOP_ITER = 0

STOP_COST = 1

STOP_GRAD = 2

def stopCriterion(type, value, threshold):

#设定三种不同的停止策略

if type == STOP_ITER: return value > threshold

elif type == STOP_COST: return abs(value[-1]-value[-2]) < threshold

elif type == STOP_GRAD: return np.linalg.norm(value) < threshold

import numpy.random

#洗牌
def shuffleData(data):

np.random.shuffle(data)

cols = data.shape[1]

X = data[:, 0:cols - 1]

y = data[:, cols - 1:]

return X,y

import time

def descent(data, theta, batchSize, stopType, thresh, alpha):

#梯度下降求解 batchSize是一次训练的样本数 stopType是停止策略, thresh是对应停止策略的阈值， alpha是步长

init_time = time.time()

i = 0 #迭代次数

k = 0 #batch每次训练的样本数量个数计数

X, y = shuffleData(data)

grad = np.zeros(theta.shape) #计算梯度

costs = [cost(X, y, theta)] #损失值1

while True:

grad = gradient(X[k:k+batchSize], y[k:k+batchSize], theta)

k += batchSize #去batch数量个数数据

if k >= n:

k = 0

X ,y = shuffleData(data) #重新洗牌

theta = theta - alpha * grad #参数更新

costs.append(cost(X,y,theta)) #计算新的损失

i += 1

if stopType == STOP_ITER: value = i

#按照迭代次数进行的停止策略

elif stopType == STOP_COST: value = costs

#按照损失函数最后一项与上一项之差与阈值比较的停止策略

elif stopType == STOP_GRAD: value = grad

#按照梯度函数与阈值比较进行的停止策略

if stopCriterion(stopType, value, thresh): break

return theta, i-1, costs, grad, time.time() - init_time

def runExpe(data, theta, batchSize, stopType, thresh, alpha):
    #import pdb; pdb.set_trace();
    theta, iter, costs, grad, dur = descent(data, theta, batchSize, stopType, thresh, alpha)
    name = "Original" if (data[:,1]>2).sum() > 1 else "Scaled"
    name += " data - learning rate: {} - ".format(alpha)
    if batchSize==n: strDescType = "Gradient"
    elif batchSize==1: strDescType = "Stochastic"
    else: strDescType = "Mini-batch ({})".format(batchSize)
    name += strDescType + " descent - Stop: "
    if stopType == STOP_ITER: strStop = "{} iterations".format(thresh)
    elif stopType == STOP_COST: strStop = "costs change < {}".format(thresh)
    else: strStop = "gradient norm < {}".format(thresh)
    name += strStop
    print ("***{}\nTheta: {} - Iter: {} - Last cost: {:03.2f} - Duration: {:03.2f}s".format(
        name, theta, iter, costs[-1], dur))
    fig, ax = plt.subplots(figsize=(12,4))
    ax.plot(np.arange(len(costs)), costs, 'r')
    ax.set_xlabel('Iterations')
    ax.set_ylabel('Cost')
    ax.set_title(name.upper() + ' - Error vs. Iteration')
    return theta

不同的停止策略

设定迭代次数

n = 100

runExpe(orig_data, theta, n, STOP_ITER, thresh = 5000, alpha = 0.000001)

python机器学习梯度下降求解逻辑回归

根据损失值停止

设定阈值 1E-6, 差不多需要110 000次迭代

runExpe(orig_data, theta, n, STOP_COST, thresh = 0.000001, alpha = 0.001)

python机器学习梯度下降求解逻辑回归

根据梯度变化停止

设定阈值 0.05,差不多需要40 000次迭代

runExpe(orig_data, theta, n, STOP_GRAD, thresh = 0.05, alpha = 0.001)

python机器学习梯度下降求解逻辑回归

对比不同的梯度下降方法

Stochastic descent

runExpe(orig_data, theta, 1, STOP_ITER, thresh = 5000, alpha = 0.001)

python机器学习梯度下降求解逻辑回归

有点爆炸。。。很不稳定,再来试试把学习率调小一些

runExpe(orig_data, theta, 1, STOP_ITER, thresh = 15000, alpha = 0.000002)

python机器学习梯度下降求解逻辑回归

Mini-batch descent

runExpe(orig_data, theta, 16, STOP_ITER, thresh = 15000, alpha = 0.001)

python机器学习梯度下降求解逻辑回归

from sklearn import preprocessing as pp

scaled_data = orig_data.copy()

scaled_data[:,1:3] = pp.scale(orig_data[:, 1:3])

runExpe(scaled_data, theta, n, STOP_ITER, thresh = 5000, alpha = 0.001)

python机器学习梯度下降求解逻辑回归

runExpe(scaled_data, theta, n, STOP_GRAD, thresh = 0.02, alpha = 0.001)

python机器学习梯度下降求解逻辑回归

theta = runExpe(scaled_data, theta, 1, STOP_GRAD, thresh=0.002/5, alpha=0.001)

python机器学习梯度下降求解逻辑回归

runExpe(scaled_data, theta, 16, STOP_GRAD, thresh=0.002*2, alpha=0.001)

python机器学习梯度下降求解逻辑回归

#设定阈值
def predict(X, theta):
return [1 if x >= 0.5 else 0 for x in model(X, theta)]

scaled_X = scaled_data[:, :3]
y = scaled_data[:, 3]
predictions = predict(scaled_X, theta)
correct = [1 if ((a == 1 and b == 1) or (a == 0 and b == 0)) else 0 for (a, b) in zip(predictions, y)]
accuracy = (sum(map(int, correct)) % len(correct))
print ('accuracy = {0}%'.format(accuracy))

python机器学习梯度下降求解逻辑回归

python机器学习梯度下降求解逻辑回归

Logistic Regression

The data

The logistic regression

要完成的模块

sigmoid 函数

Gradient descent

不同的停止策略

对比不同的梯度下降方法

相关推荐

`sigmoid` 函数