1 Naive Bayesian programming Self
import numpy as np import pandas as pd %config ZMQInteractiveShell.ast_node_interactivity='all'
class NaiveBayes (): DEF the __init__ (Self, lambda_): self.lambda_ = lambda_ # When Bayes factor, taking 0, that is the maximum likelihood estimation self.y_types_count = None # Y (type: number) Self. = None y_types_proba # Y (type: probability) self.x_types_proba = dict () # (XI numbers, xi values, y type): probability DEF Fit (Self, X_train, y_train): self.y_types = NP .unique (y_train) # np.unique () array is repeated removal and sorting the digital outputs, all values of y obtained type X-pd.DataFrame = (X_train) # converted pandas DataFrame format, the same below y = pd.DataFrame ( y_train) # Y (type: Number) Statistics. 1: -1. 9:. 6 self.y_types_count = Y [0] .value_counts () # Y (type: probability) is calculated self.y_types_proba = (self.y_types_count + self.lambda_) / (y.shape [0] + len (self.y_types) * self.lambda_) # (XI numbers, values XI, of the type y): probability calculation for IDX in X.columns: # traversal XI for J in self.y_types: # select each type y # select all true values y == j idx in data point feature, and these values (type: number) statistical p_x_y = X [(y == J) .values] [IDX] .value_counts () # calculated (xi numbers, xi values, y type): probability forI in p_x_y.index: self.x_types_proba [(IDX, I, J)] = (p_x_y [I] + self.lambda_) / (self.y_types_count [J] + p_x_y.shape [0] * self.lambda_) DEF Predict (Self, X_new): RES = [] for y in self.y_types: # traversal possible values of y p_y = self.y_types_proba [y] # priori probability calculating the y P (the Y = CK) p_xy =. 1 for IDX, X in the enumerate (X_new): p_xy * = self.x_types_proba [(IDX, X, Y)] # calculates P (x = (x1, x2 , ... xd) / Y = ck) res.append (p_y * p_xy) for I in Range (len (self.y_types)): Print ( " [{}] corresponding probability: 2%} {:. " .format (self.y_types [I], RES [I])) # returns the maximum a posteriori probability value y corresponding return self.y_types [np.argmax (RES)]
def main(): X_train = np.array([ [1,"S"], [1,"M"], [1,"M"], [1,"S"], [1,"S"], [2,"S"], [2,"M"], [2,"M"], [2,"L"], [2,"L"], [3,"L"], [3,"M"], [3,"M"], [3,"L"], [3,"The"] ]) y_train = np.array([-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1]) clf = NaiveBayes(lambda_ = 0.2) clf.fit(X_train, y_train) X_new = np.array([2, 'S']) y_predict = clf.predict(X_new) print("{}被分类为:{}".format(X_new, y_predict)) if __name__ == '__main__': main()
2 Naive Bayes sklearn achieve
import numpy as np from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB from sklearn import preprocessing # 预处理
def main(): X_train=np.array([ [1,"S"], [1,"M"], [1,"M"], [1,"S"], [1,"S"], [2,"S"], [2,"M"], [2,"M"], [2,"L"], [2,"L"], [3,"L"], [3,"M"], [3,"M"], [3,"L"], [3,"The"] ]) y_train=np.array([-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1]) enc = preprocessing.OneHotEncoder(categories='auto') enc.fit(X_train) X_train = enc.transform(X_train).toarray() print(X_train) clf = MultinomialNB(alpha=0.0000001) clf.fit(X_train, y_train) X_new = np.array([[2, 'S']]) X_new = enc.transform(X_new).toarray() y_predict = clf.predict(X_new) print("------------------------------------") print("{}被分类为:{}".format(X_new,y_predict)) print(clf.predict_proba(X_new)) if __name__ == '__main__': main()
reference:
[1] eye depth of statistical learning methods training camp after-school practice
[2] "statistical learning methods" Lee Hang