sklearn的metrics

回归模型的评判标准

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import numpy as np
boston  = datasets.load_boston()
X =boston.data[:,:1]
y = boston.target
X_train,X_test,y_train,y_test = train_test_split(X,y)
line_clf = LinearRegression()
line_clf.fit(X_train,y_train)
y_predict = line_clf.predict(X_test)

MSE

sklearn的metrics

from sklearn.metrics import mean_squared_error
mean_squared_error(y_test,y_predict)
71.18241302503122

RMSE

sklearn的metrics

from sklearn.metrics import mean_squared_error
np.sqrt(mean_squared_error(y_test,y_predict))
8.436967051318337

MAE

from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_test,y_predict)
5.966994236883726

R方

我们一般用R方来作为回归模型的准确率

from sklearn.metrics import r2_score
r2_score(y_test,y_predict)
0.1035542273566239
line_clf.score(X_test,y_test)
0.1035542273566239

分类模型的评判标准

import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
breast_cancer = datasets.load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target
X_train,X_test,y_train,y_test = train_test_split(X,y)
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import PolynomialFeatures

poly = PolynomialFeatures(degree=2)
log_reg = LogisticRegression(C=3)
poly.fit(X_train)
X_train = poly.transform(X_train)
X_test =poly.transform(X_test)
log_reg.fit(X_train,y_train)
y_predict= log_reg.predict(X_test)

精准率

from sklearn.metrics import precision_score
precision_score(y_test,y_predict)
0.9787234042553191

召回率

from sklearn.metrics import recall_score
recall_score(y_test,y_predict)
0.9787234042553191

F1(精准率与召回率的平衡)

from sklearn.metrics import f1_score
f1_score(y_test,y_predict)
0.9787234042553191

ROC曲线

from sklearn.metrics import roc_curve
decision_scores = log_reg.decision_function(X_test)
fprs,tprs,thresholds = roc_curve(y_test,decision_scores)
plt.plot(fprs,tprs)
plt.show()

sklearn的metrics

from sklearn.metrics import roc_auc_score

roc_auc_score(y_test, decision_scores)
#  我们可以使用这个来判定被曲线包围的面积有多少
0.9963091619626574