4. Naive Bayes

One, the basic method of Naive Bayes

Insert picture description here
Insert picture description here
Insert picture description here

2. Naive Bayes parameter estimation

1. Maximum likelihood estimation

Insert picture description here

2. Bayesian estimation

Insert picture description here

Three, naive Bayes algorithm

Insert picture description here
Insert picture description here

Four, naive Bayes code implementation

import numpy as np


# 加载数据的函数
def loaddata():
    X = np.array([[1, 'S'], [1, 'M'], [1, 'M'], [1, 'S'],
                  [1, 'S'], [2, 'S'], [2, 'M'], [2, 'M'],
                  [2, 'L'], [2, 'L'], [3, 'L'], [3, 'M'],
                  [3, 'M'], [3, 'L'], [3, 'L']])
    y = np.array([-1, -1, 1, 1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, -1])
    return X, y


# 训练、计算各个概率值
def Train(trainset, train_labels):
    # 数据量
    m = trainset.shape[0]
    # 特征数
    n = trainset.shape[1]
    # 先验概率,key是类别值,value是类别的概率值
    prior_probability = {
    
    }
    # 条件概率,key的构造:类别,特征,特征值,value是
    conditional_probability = {
    
    }

    # 类别的可能取值
    labels = set(train_labels)
    # 计算先验概率,此时没有计算总数据量m
    for label in labels:
        prior_probability[label] = len(train_labels[train_labels == label])
    print('prior_probabilit =', prior_probability)

    # 计算条件概率
    for i in range(m):
        for j in range(n):
            # key的构造:类别,特征,特征值
            key = str(train_labels[i]) + ',' + str(j) + ',' + str(trainset[i][j])
            if key in conditional_probability:
                conditional_probability[key] += 1
            else:
                conditional_probability[key] = 1
    print('conditional_probability = ', conditional_probability)

    # 因字典在循环时不能改变,故定义新字典来保存值
    conditional_probability_final = {
    
    }
    for key in conditional_probability:
        # 取出当前的类别
        label = key.split(',')[0]
        conditional_probability_final[key] = conditional_probability[key] / prior_probability[int(label)]

    # 最终先验概率(除以总数据量m)
    for label in labels:
        prior_probability[label] = prior_probability[label] / m

    return prior_probability, conditional_probability_final, labels


# 定义预测函数
def predict(data):
    result = {
    
    }
    # 循环标签
    for label in train_labels_set:
        temp = 1.0
        for j in range(len(data)):
            key = str(label) + ',' + str(j) + ',' + str(data[j])
            # 条件概率连乘
            temp = temp * conditional_probability[key]
        # 在乘上先验概率
        result[label] = temp * prior_probability[label]
    print('result =', result)
    # 排序返回标签值
    return sorted(result.items(), key=lambda x: x[1], reverse=True)[0][0]


if __name__ == '__main__':
    X, y = loaddata()
    prior_probability, conditional_probability, train_labels_set = Train(X, y)
    print('conditional_probability = ', conditional_probability)

    # 预测
    y_hat = predict([2, 'S'])
    print('y_hat =', y_hat)

prior_probabilit = {
    
    1: 9, -1: 6}
conditional_probability =  {
    
    '-1,0,1': 3, '-1,1,S': 3, '-1,1,M': 2, '1,0,1': 2, '1,1,M': 4, '1,1,S': 1, '-1,0,2': 2, '1,0,2': 3, '1,1,L': 4, '1,0,3': 4, '-1,0,3': 1, '-1,1,L': 1}
conditional_probability =  {
    
    '-1,0,1': 0.5, '-1,1,S': 0.5, '-1,1,M': 0.3333333333333333, '1,0,1': 0.2222222222222222, '1,1,M': 0.4444444444444444, '1,1,S': 0.1111111111111111, '-1,0,2': 0.3333333333333333, '1,0,2': 0.3333333333333333, '1,1,L': 0.4444444444444444, '1,0,3': 0.4444444444444444, '-1,0,3': 0.16666666666666666, '-1,1,L': 0.16666666666666666}
result = {
    
    1: 0.02222222222222222, -1: 0.06666666666666667}
y_hat = -1

Five, Laplace revised code implementation

import numpy as np


# 加载数据的函数
def loaddata():
    X = np.array([[1, 'S'], [1, 'M'], [1, 'M'], [1, 'S'],
                  [1, 'S'], [2, 'S'], [2, 'M'], [2, 'M'],
                  [2, 'L'], [2, 'L'], [3, 'L'], [3, 'M'],
                  [3, 'M'], [3, 'L'], [3, 'L']])
    y = np.array([-1, -1, 1, 1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, -1])
    return X, y


# 训练、计算各个概率值
def Train(trainset, train_labels):
    # 数据量
    m = trainset.shape[0]
    # 特征数
    n = trainset.shape[1]
    # 先验概率,key是类别值,value是类别的概率值
    prior_probability = {
    
    }
    # 条件概率,key的构造:类别,特征,特征值,value是
    conditional_probability = {
    
    }

    # 类别的可能取值
    labels = set(train_labels)
    # 计算先验概率,此时没有计算总数据量m
    for label in labels:
        prior_probability[label] = len(train_labels[train_labels == label]) + 1
    print('prior_probabilit =', prior_probability)

    # 计算条件概率
    for i in range(m):
        for j in range(n):
            # key的构造:类别,特征,特征值
            key = str(train_labels[i]) + ',' + str(j) + ',' + str(trainset[i][j])
            if key in conditional_probability:
                conditional_probability[key] += 1
            else:
                conditional_probability[key] = 1
    print('conditional_probability = ', conditional_probability)

    # 因字典在循环时不能改变,故定义新字典来保存值
    conditional_probability_final = {
    
    }
    for key in conditional_probability:
        # 取出当前的类别
        label = key.split(',')[0]
        key1 = key.split(',')[1]
        Ni = len(set(trainset[:, int(key1)]))
        print(Ni)
        conditional_probability_final[key] = (conditional_probability[key] + 1) / (prior_probability[int(label)] +Ni)

    # 最终先验概率(除以总数据量m)
    for label in labels:
        prior_probability[label] = prior_probability[label] / (m + len(labels))

    return prior_probability, conditional_probability_final, labels


# 定义预测函数
def predict(data):
    result = {
    
    }
    # 循环标签
    for label in train_labels_set:
        temp = 1.0
        for j in range(len(data)):
            key = str(label) + ',' + str(j) + ',' + str(data[j])
            # 条件概率连乘
            temp = temp * conditional_probability[key]
        # 在乘上先验概率
        result[label] = temp * prior_probability[label]
    print('result =', result)
    # 排序返回标签值
    return sorted(result.items(), key=lambda x: x[1], reverse=True)[0][0]


if __name__ == '__main__':
    X, y = loaddata()
    prior_probability, conditional_probability, train_labels_set = Train(X, y)
    print('conditional_probability = ', conditional_probability)

    # 预测
    y_hat = predict([2, 'S'])
    print('y_hat =', y_hat)

prior_probabilit = {
    
    1: 10, -1: 7}
conditional_probability =  {
    
    '-1,0,1': 3, '-1,1,S': 3, '-1,1,M': 2, '1,0,1': 2, '1,1,M': 4, '1,1,S': 1, '-1,0,2': 2, '1,0,2': 3, '1,1,L': 4, '1,0,3': 4, '-1,0,3': 1, '-1,1,L': 1}
conditional_probability =  {
    
    '-1,0,1': 0.4, '-1,1,S': 0.4, '-1,1,M': 0.3, '1,0,1': 0.23076923076923078, '1,1,M': 0.38461538461538464, '1,1,S': 0.15384615384615385, '-1,0,2': 0.3, '1,0,2': 0.3076923076923077, '1,1,L': 0.38461538461538464, '1,0,3': 0.38461538461538464, '-1,0,3': 0.2, '-1,1,L': 0.2}
result = {
    
    1: 0.027845457709711106, -1: 0.04941176470588235}
y_hat = -1

Six, Naive Bayes processing continuous data

Insert picture description here

Seven, summary

Insert picture description here

Guess you like

Origin blog.csdn.net/weixin_46649052/article/details/112491015