data = {'s':4,'f':7,'e':8} print(max(data,key = data.get)) #输出为e #读取数据 def loadData(filename): with open(filename) as fr: dataSet = fr.readlines() #读取数据,每一行为列表中的元素 dataSetMat = [] labels = [] for line in dataSet: line = line.strip().split(' ') #去空格,分割 labels.append(line[-1]) dataSetMat.append(line) return dataSetMat,labels dataSet,labels = loadData('data.txt') def train(dataSet,labels): unique_labels = set(labels) res = {} #分别考虑每个类 书上151页 (机器学习 周志华) for label in unique_labels: res[label] = [] res[label].append(labels.count(label)/float(len(labels))) #每个类的概率 #每个属性下的个个取值的概率 for i in range(len(dataSet[0]) - 1): tempCol = [l[i] for l in dataSet if l[-1] == label] #对应类标签下的某个属性下的列取值 uniqueVlaue = set(tempCol) dict = {} for value in uniqueVlaue: count = tempCol.count(value) prob = count/float(labels.count(label)) #计算的是某个类下属性取值为value的概率 dict[value] = prob res[label].append(dict) return res #{'soft': [0.3333333333333333, {'3': 0.375, '1': 0.25, '2': 0.375}, {'1': 0.375, '2': 0.625}, {'1': 0.875, '2': 0.125}, {'1': 0.25, '2': 0.75}, {'3': 0.5, '1': 0.125, '2': 0.375}], 'no': [0.4583333333333333, {'3': 0.36363636363636365, '1': 0.36363636363636365, '2': 0.2727272727272727}, {'1': 0.5454545454545454, '2': 0.45454545454545453}, {'1': 0.45454545454545453, '2': 0.5454545454545454}, {'1': 0.9090909090909091, '2': 0.09090909090909091}, {'3': 0.36363636363636365, '1': 0.36363636363636365, '2': 0.2727272727272727}], 'hard': [0.20833333333333334, {'3': 0.2, '1': 0.4, '2': 0.4}, {'1': 0.6, '2': 0.4}, {'2': 1.0}, {'2': 1.0}, {'1': 0.6, '2': 0.4}]} #就是字典的索引 def test(testVect,probMat): #proMat可就是上边的那个字典 hard = probMat['hard'] #每个类对应的后面的值 soft = probMat['soft'] no = probMat['no'] #每个类的概率 phard = hard[0] psoft = soft[0] pno = no[0] #循环测试数据 for i in range(len(testVect)): if testVect[i] in hard[i+1].keys(): phard *= hard[i+1][testVect[i]] else: phard = 0 if testVect[i] in soft[i+1].keys(): psoft *= soft[i+1][testVect[i]] else: psoft = 0 if testVect[i] in no[i+1].keys(): pno *= no[i+1][testVect[i]] else: pno = 0 res = {} res['hard'] = phard res['soft'] = psoft res['no'] = pno print(phard,psoft,pno) return max(res,key = res.get) probMat = train(dataSet,labels) res = test(['3','1','2','2','1'],probMat) print(res)
python实现贝叶斯
猜你喜欢
转载自blog.csdn.net/qq_27015119/article/details/80793578
今日推荐
周排行