机器学习之多项式回归
In [16]:
# Importing the libraries 导入库
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# 使图像能够调整
%matplotlib notebook
#中文字体显示
plt.rc('font', family='SimHei', size=8)
In [10]:
# Importing the dataset 导入数据
dataset = pd.read_csv('./Position_Salaries.csv')
X = dataset.iloc[:, 1:2].values # 职位级别
y = dataset.iloc[:, 2].values # 薪水
dataset
Out[10]:
In [11]:
X
Out[11]:
In [12]:
y
Out[12]:
In [13]:
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X,y)
Out[13]:
In [29]:
from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree = 4)
X_poly = poly_reg.fit_transform(X) # 自变量不同次数的矩阵
lin_reg_2 = LinearRegression()
lin_reg_2.fit(X_poly,y)
X_poly
Out[29]:
In [24]:
plt.scatter(X,y,color="red")
plt.plot(X,lin_reg.predict(X),color ="blue")
plt.title(u"真话还是假话(线性模型)")
plt.xlabel(u"职位水平")
plt.ylabel(u"薪水")
plt.show()
print('r-squared', lin_reg.score(X,y))
In [28]:
X_grid = np.arange(min(X),max(X),0.1)
X_grid = X_grid.reshape(len(X_grid),1)
plt.scatter(X,y,color="red")
plt.plot(X_grid,lin_reg_2.predict(poly_reg.fit_transform(X_grid)),color ="blue")
plt.title(u"真话还是假话(多项式模型)")
plt.xlabel(u"职位水平")
plt.ylabel(u"薪水")
plt.show()
print('r-squared', lin_reg_2.score(X_poly,y))
In [19]:
lin_reg.predict(6.5)
Out[19]:
In [20]:
lin_reg_2.predict(poly_reg.fit_transform(6.5))
Out[20]: