sklearn in machine learning algorithm evaluation

# Machine learning classification algorithm evaluation of 
# binary classification algorithm evaluation
Import numpy AS NP
Import matplotlib.pyplot AS plt
Import PANDAS AS pd
from sklearn Import Datasets
d = datasets.load_digits ()
the X-d.data =
the y-= d. target.copy () # prevent original data change
Print (len (Y))
Y [d.target ==. 9]. 1 =
Y [d.target! =. 9] = 0
Print (Y)
Print (pd.value_counts (Y )) # statistical data each occurrence number
from sklearn.model_selection Import train_test_split
x_train, x_test, y_train, android.permission.FACTOR. train_test_split = (X, Y, random_state = 666)
from sklearn.linear_model Import LogisticRegression
log_reg = LogisticRegression (Solver = "Newton-CG ") # using logistic regression algorithm for data classification
log_reg.fit (x_train, y_train)
Print (log_reg.score (x_test, android.permission.FACTOR.))
y_pre = log_reg.predict (x_test)
def TN (y_true, y_pre):
return np.sum ((y_true == 0) & (y_pre == 0))
def FP (y_true, y_pre):
return np.sum ((y_true == 0) & (y_pre == 1))
def FN (y_true, y_pre):
return np.sum ((y_true == 1) & (y_pre == 0))
def TP (y_true, y_pre):
return np. sum ((y_true == 1) & (y_pre == 1))
print (TN (y_test, y_pre))
print (FP (y_test, y_pre))
print (FN (y_test, y_pre))
print (TP (y_test, y_pre ))
#混淆矩阵的定义
def confusion_matrix (y_true, y_pre):
return np.array ([
[TN (y_true, y_pre), FP (y_true, y_pre)],
[FN (y_true, y_pre), TP (y_true, y_pre )]
])
print (confusion_matrix (y_test, y_pre))
#精准率
def precision(y_true,y_pre):
try:
return TP(y_true,y_pre)/(FP(y_true,y_pre)+TP(y_true,y_pre))
except:
return 0.0
#召回率
def recall(y_true,y_pre):
try:
return TP(y_true,y_pre)/(FN(y_true,y_pre)+TP(y_true,y_pre))
except:
return 0.0
print(precision(y_test,y_pre))
print(recall(y_test,y_pre))
#sklearn中直接调用混淆矩阵,召回率,精准率
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
print((confusion_matrix(y_test,y_pre)))
print(precision_score(y_test,y_pre))
print(recall_score(y_test,y_pre))
Print (log_reg.score (x_test, android.permission.FACTOR.))
#sklearn F1 in value, to obtain both harmonic mean, when large data gaps therebetween when the composite indicator is calculated so that the data can be biased to the minimum
def F1 (pre , REC):
the try:
return (* 2 * pre REC) / (pre + REC)
the except:
return 0.0
Print (Fl (Precision (android.permission.FACTOR., y_pre), Recall (android.permission.FACTOR., y_pre)))
Print (Fl (0.1, 0.9 ))
Print (Fl (0,1))
# f1_score sklearn function directly encapsulated
from sklearn.metrics Import f1_score
Print (f1_score (android.permission.FACTOR., y_pre))
when the print (log_reg.decision_function (x_test)) # logistic regression predicted output size decision boundary, i.e., the value of theta * X (compared with 0)
# changes the threshold decision boundary score = 0, may be varied recall and precision of machine learning,
decision_scores = log_reg.decision_function (x_test) belongs to the test # value theta * X data sets calculated, and the decision boundary threshold comparison outputs 0 prediction result
y_pre2 = np.array (decision_scores> = 5 , dtype = "int")
print (precision (y_test, y_pre2) ) # improve (increase the threshold)
Print (Recall (android.permission.FACTOR., y_pre2)) # decreased
Print (confusion_matrix (android.permission.FACTOR., y_pre2))
y_pre3 = np.array (decision_scores> = -. 5, DTYPE = " int ")
Print (Precision (android.permission.FACTOR., y_pre3)) # decreased (the threshold value is decreased)
Print (Recall (android.permission.FACTOR., y_pre3)) # improve
Print (confusion_matrix (android.permission.FACTOR., y_pre3))
Print (y_pre3)
# drawn decision boundaries threshold curve of precision and recall
from sklearn.metrics Import precision_score
from sklearn.metrics Import recall_score
Thresholds = np.arange (np.min (decision_scores), np.max (decision_scores), 0.1)
pre = []
REC = []
threshold in Thresholds for:
y_pre11 = np.array (decision_scores> threshold, dtype = "int")
pre.append (precision_score (android.permission.FACTOR., y_pre11))
rec.append (recall_score (android.permission.FACTOR., y_pre11))
plt.figure ()
plt.plot (Thresholds, pre, "R & lt", Thresholds, REC, "G")
plt.show ()
# output precision and recall another variation curve
plt.plot (pre, REC, "G", as linewidth =. 1)
plt.show ()
# direct call sklearn accuracy in the recall rate PR curve corresponding direct output precision and recall rate change decision threshold, and changes
from sklearn Import precision_recall_curve .metrics
decision_scores = log_reg.decision_function (x_test)
PRE1, RECl, thre1 = precision_recall_curve (android.permission.FACTOR., decision_scores)
Print (rec1.shape)
Print (pre1.shape)
value print (thre1.shape) # abscissa least one element , i.e., rightmost 1 was accurate, the recall ratio of point 0 is not present
plt.figure ()
plt.plot (thre1, PRE1 [: - 1], "R & lt") is necessary to remove a point #
plt.plot (thre1 , RECl [: -. 1], "G")
plt.show ()
plt.plot (PRE1, RECl)
PLT.show()
call ROC (TPR and FPR curve) #sklearn in
from sklearn.metrics import roc_curve
threshold vector decision_scores = log_reg.decision_function (x_test) # computed from the test data set
FPR, TPR, thre2 = roc_curve (android.permission.FACTOR., decision_scores)
plt.plot (fpr, tpr, "r"
greater # curve and the x-axis area surrounded plt.show () better the performance point
# ROC x-axis output size of the area enclosed roc_auc
from sklearn.metrics Import roc_auc_score
Print (roc_auc_score (android.permission.FACTOR., decision_scores))
# machine learning evaluation index of each application in multi-classification problems
Import numpy AS NP
Import matplotlib.pyplot AS plt
Import PANDAS AS pd
from sklearn Import Datasets
d = datasets.load_digits ()
the X-d.data =
= d.target Y
from sklearn.model_selection Import train_test_split
x_train, x_test, y_train, android.permission.FACTOR. train_test_split = (X, Y, random_state = 666)
from sklearn.linear_model Import LogisticRegression
that log1 = LogisticRegression ()
log1.fit (x_train, y_train)
Print (log1.score (x_test, android.permission.FACTOR.))
y_p = log1.predict (x_test)
from sklearn.metrics Import precision_score
Print (precision_score (android.permission.FACTOR., y_p, average = "Micro")) # precise rate of output of the multi classification size (average need to set parameters)
Print (recall_score (android.permission.FACTOR., y_p, average = "micro") ) # recall the output of the multi classification
from sklearn.metrics Import confusion_matrix
Print (confusion_matrix (android.permission.FACTOR., y_p)) # confusion matrix output
method # draw confusion matrix can be seen by the respective grayscale ranks the relative size of the element
C = confusion_matrix (android.permission.FACTOR., y_p)
plt.matshow (C, CMap = plt.cm.gray) # brighter the image, the larger the data elements in the matrix, indicating that the more accurate prediction
plt.show ()
= np.sum row_sum (C, Axis =. 1)
erro_matrix = C / # row_sum each row and each row of data by the data
# diagonal values np.fill_diagonal (erro_matrix, 0) is filled with 0
Print (erro_matrix)
# output wrong place, the brighter the more error
plt.matshow (erro_matrix, cmap = plt.cm.gray) # erroneous results when output multivariate classification result output
plt.show ()

Guess you like

Origin www.cnblogs.com/Yanjy-OnlyOne/p/12526433.html