Naive Bayes realized

1 Naive Bayesian programming Self

import numpy as np
import pandas as pd

%config ZMQInteractiveShell.ast_node_interactivity='all'
class NaiveBayes ():
     DEF  the __init__ (Self, lambda_): 
        self.lambda_ = lambda_   # When Bayes factor, taking 0, that is the maximum likelihood estimation 
        self.y_types_count = None   # Y (type: number) 
        Self. = None y_types_proba   # Y (type: probability) 
        self.x_types_proba = dict ()   # (XI numbers, xi values, y type): probability 
    
    DEF Fit (Self, X_train, y_train): 
        self.y_types = NP .unique (y_train)   # np.unique () array is repeated removal and sorting the digital outputs, all values of y obtained type 
        X-pd.DataFrame = (X_train)   # converted pandas DataFrame format, the same below 
        y = pd.DataFrame ( y_train)
        # Y (type: Number) Statistics. 1: -1. 9:. 6 
        self.y_types_count = Y [0] .value_counts ()
         # Y (type: probability) is calculated 
        self.y_types_proba = (self.y_types_count + self.lambda_) / (y.shape [0] + len (self.y_types) * self.lambda_) 
        
        # (XI numbers, values XI, of the type y): probability calculation 
        for IDX in X.columns:     # traversal XI 
            for J in self.y_types:     # select each type y 
                # select all true values y == j idx in data point feature, and these values (type: number) statistical 
                p_x_y = X [(y == J) .values] [IDX] .value_counts ()
                 # calculated (xi numbers, xi values, y type): probability 
                forI in p_x_y.index: 
                    self.x_types_proba [(IDX, I, J)] = (p_x_y [I] + self.lambda_) / (self.y_types_count [J] + p_x_y.shape [0] * self.lambda_) 
                    
    DEF Predict (Self, X_new): 
        RES = []
         for y in self.y_types:   # traversal possible values of y 
            p_y = self.y_types_proba [y]   # priori probability calculating the y P (the Y = CK) 
            p_xy =. 1 for IDX, X in the enumerate (X_new): 
                p_xy * = self.x_types_proba [(IDX, X, Y)]   # calculates P (x = (x1, x2 , ... xd) / Y = ck)
            
            res.append (p_y * p_xy)
         for I in Range (len (self.y_types)):
             Print ( " [{}] corresponding probability: 2%} {:. " .format (self.y_types [I], RES [I]))
         # returns the maximum a posteriori probability value y corresponding 
        return self.y_types [np.argmax (RES)]
def main():
    X_train = np.array([
                      [1,"S"],
                      [1,"M"],
                      [1,"M"],
                      [1,"S"],
                      [1,"S"],
                      [2,"S"],
                      [2,"M"],
                      [2,"M"],
                      [2,"L"],
                      [2,"L"],
                      [3,"L"],
                      [3,"M"],
                      [3,"M"],
                      [3,"L"],
                      [3,"The"]
                      ])
    y_train = np.array([-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1])
    clf = NaiveBayes(lambda_ = 0.2)
    clf.fit(X_train, y_train)
    X_new = np.array([2, 'S'])
    y_predict = clf.predict(X_new)
    print("{}被分类为:{}".format(X_new, y_predict))
    
if __name__ == '__main__':
    main()

 

2 Naive Bayes sklearn achieve

import numpy as np
from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB
from sklearn import preprocessing  # 预处理
def main():
    X_train=np.array([
                      [1,"S"],
                      [1,"M"],
                      [1,"M"],
                      [1,"S"],
                      [1,"S"],
                      [2,"S"],
                      [2,"M"],
                      [2,"M"],
                      [2,"L"],
                      [2,"L"],
                      [3,"L"],
                      [3,"M"],
                      [3,"M"],
                      [3,"L"],
                      [3,"The"]
                      ])
    y_train=np.array([-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1])
    enc = preprocessing.OneHotEncoder(categories='auto')
    enc.fit(X_train)
    X_train = enc.transform(X_train).toarray()
    print(X_train)
    clf = MultinomialNB(alpha=0.0000001)
    clf.fit(X_train, y_train)
    X_new = np.array([[2, 'S']])
    X_new = enc.transform(X_new).toarray()
    y_predict = clf.predict(X_new)
    print("------------------------------------")
    print("{}被分类为:{}".format(X_new,y_predict))
    print(clf.predict_proba(X_new))
    
if __name__ == '__main__':
    main()

reference:

[1] eye ​​depth of statistical learning methods training camp after-school practice

[2] "statistical learning methods" Lee Hang

 

Guess you like

Origin www.cnblogs.com/WJZheng/p/11275360.html