朴素贝叶斯方法的学习与分类

#!/usr/bin/python
#-*-coding:utf-8 -*-
#贝叶斯实现

def createDataSET():
    dataSet=[[1,"S",-1],
             [1,"M",-1],
             [1,"M",1],
             [1,"S",1],
             [1,"S",-1],
             [2,"S",-1],
             [2,"M",-1],
             [2,"M",1],
             [2,"L",1],
             [2,"L",1],
             [3,"L",1],
             [3,"M",1],
             [3,"M",1],
             [3,"L",1],
             [3,"L",-1],
             ]
    labels=["no surfacing","flippers"]
    return dataSet,labels
# 划分数据,以第i 特征值划分数据
def splitDataSet(dataSet,i,values):
    '''
    :param dataSet: 数据集
    :param i:  以哪一个特征值划分数据
    :param labels:标记这个剩下的特征值对应的名字
    :param values:当前的特征值对应的值
    :return:划分后的数据
    '''
    splitData=[]
    for data in dataSet:
        if data[i]==values:
            nowData=[]
            before=data[:i]
            after=data[i+1:]
            nowData.extend(before)
            nowData.extend(after)
            splitData.append(nowData)
            #对特征标签进行处理
    # print("splitData---->"+str(splitData))
    # print(str(splitData)+" ----")
    return splitData

#得到最大似然估计
def getConPro(j,feaValu,y,dataSet):
    """
    :param j: 第几个特征,从零开始计算
    :param feaValu: 特征值对应的值
    :param y:类标记Ck的值
    :param dataSet:数据集
    :return:
    """
    #得到某一特征下的所有值
    fearList=[data[j] for data in dataSet]
    uniqulist=set(fearList)
    #极大似然估计
    MLE_data=splitDataSet(dataSet,j,feaValu)
    print("MLE_data---"+str(MLE_data))
    before=0
    for data in MLE_data:
        if data[-1]==y:
            before+=1
    after=0
    for data in  dataSet:
        if data[-1]==y:
            after+=1
    # print("after----"+str(after)+"before----"+str(before))
    # print("NOWMLE----"+str(float(before)/after))
    MLE=float(before)/after
    # print("MLE"+str(MLE))
    return MLE
#得到先验概率
def getPrior(dataSet,y):
    '''
    :param dataSet:
    :param y: 类标记Ck的值
    :return: 先验概率
    '''
    numData=len(dataSet)
    #先验概率
    prior_data=splitDataSet(dataSet,len(dataSet[0])-1,y)
    prior=float(len(prior_data))/numData
    return prior

def getBaseClass(fenValu):
    dataSet,labels=createDataSET()
    beastEntity=0
    besty=0
    ClassList=[data[-1] for data in dataSet]
    uniquClass=set(ClassList)
    for cla in uniquClass:
        proportion=1
        for i in range(len(dataSet[0])-1):
            proportion*=getConPro(i,fenValu[i],cla,dataSet)
        prior=getPrior(dataSet,cla)
        print(str(prior)+"prior....."+str(proportion))
        EntityClass=prior*proportion
        if beastEntity<EntityClass:
            beastEntity=EntityClass
            besty=cla
    print(besty)

getBaseClass([2,"S"])

猜你喜欢

转载自blog.csdn.net/qq_18617299/article/details/78857116