西瓜书 课后习题3.3

import csv
import numpy as np
import matplotlib.pyplot as plt


def newton(X, y):
    """
    Input:
        X: np.array with shape [N, 3]. Input.
        y: np.array with shape [N, 1]. Label.
    Return:
        beta: np.array with shape [1, 3]. Optimal params with newton method
    """
    N = X.shape[0]
    # initialization
    beta = np.ones((1, 3))
    # shape [N, 1]
    z = X.dot(beta.T)  # 定义一个中间量z
    # log-likehood
    old_l = 0
    new_l = np.sum(-y * z + np.log(1 + np.exp(z)))  # 目标函数 
    iters = 0
    while (np.abs(old_l - new_l) > 1e-5):
        # shape [N, 1]
        p1 = np.exp(z) / (1 + np.exp(z))
        # shape [N, N]
        p = np.diag((p1 * (1 - p1)).reshape(N))
        # shape [1, 3]
        first_order = -np.sum(X * (y - p1), 0, keepdims=True)   # 一阶求导
        # shape [3, 3]
        second_order = X.T.dot(p).dot(X)        # 二阶求导

        # update
        beta -= first_order.dot(np.linalg.inv(second_order))
        z = X.dot(beta.T)
        old_l = new_l
        new_l = np.sum(-y * z + np.log(1 + np.exp(z)))

        iters += 1
    print("iters: ", iters)
    print(new_l)
    print(beta)
    return beta


def gradDescent(X, y):
    """
    Input:
        X: np.array with shape [N, 3]. Input.
        y: np.array with shape [N, 1]. Label.
    Return:
        beta: np.array with shape [1, 3]. Optimal params with gradient descent method
    """

    N = X.shape[0]
    lr = 0.1   # 学习率,对结果影响很大
    # initialization
    beta = np.ones((1, 3))
    # shape [N, 1]
    z = X.dot(beta.T)

    for i in range(200):    # 迭代次数
        # shape [N, 1]
        p1 = np.exp(z) / (1 + np.exp(z))
        # shape [1, 3]
        first_order = -np.sum(X * (y - p1), 0, keepdims=True)

        # update
        beta -= first_order * lr
        z = X.dot(beta.T)

    l = np.sum(-y * z + np.log(1 + np.exp(z)))    # 目标函数
    print(l)
    print(beta)
    return beta


if __name__ == "__main__":
    # read data from csv file
    filename = "C:\\Users\\14399\\Desktop\\西瓜3.0.csv"
    with open(filename) as f:
        reader = csv.reader(f)
        header_row = next(reader)
        datax = []
        datay = []
        density = []
        sugar = []
        for row in reader:
            datax.append([float(row[7]), float(row[8]), float(1.0)])
            datay.append([float(row[10])])
            density.append(float(row[7]))
            sugar.append(float(row[8]))

    # 训练数据正反例 可视化
    plt.plot(density[:8], sugar[:8], 'bo')
    plt.plot(density[8:], sugar[8:], 'r+')

    # 分别用newton法和梯度递降法求最优参数beta_1,beta_2
    X = np.array(datax)
    y = np.array(datay)
    beta_1 = newton(X, y)
    beta_2 = gradDescent(X, y)

    # 画出对率曲线   0.1和0.9分别代表曲线两端点x轴的坐标
    newton_left = -(beta_1[0, 0] * 0.1 + beta_1[0, 2]) / beta_1[0, 1]
    newton_right = -(beta_1[0, 0] * 0.9 + beta_1[0, 2]) / beta_1[0, 1]
    plt.plot([0.1, 0.9], [newton_left, newton_right], 'g-')

    grad_descent_left = -(beta_2[0, 0] * 0.1 + beta_2[0, 2]) / beta_2[0, 1]
    grad_descent_right = -(beta_2[0, 0] * 0.9 + beta_2[0, 2]) / beta_2[0, 1]
    plt.plot([0.1, 0.9], [grad_descent_left, grad_descent_right], 'y-')

    plt.xlabel('density')
    plt.ylabel('sugar rate')
    plt.title("LR")

    plt.show()

西瓜书 课后习题3.3

 

西瓜3.0数据集:链接:https://pan.baidu.com/s/1RXTUG9gP1Jn9HKFCiEzOlA         密码:3h6n

参考资料:https://blog.****.net/victoriaw/article/details/77989610    

https://blog.****.net/chinwuforwork/article/details/51786967     # plt.plot参数设置