Apriori算法Python版本

import numpy as np
from collections import defaultdict

d=np.array(["milk,water,juice","milk,water","milk,rice,water","water","tomato,juice","juice,cucumber"])

def Apriori(data,min_spport=2):

    FP=[]
    Data=[]
    # 初始化项集
    I=[]
    # 因为字符串不好做运算,所以我们映射成数组
    str2num=defaultdict(int)
    cnt=0

    for i in data:
        for j in i.split(","):
            if j not in str2num.keys():
                str2num[j]=cnt
                cnt+=1

    for i in data:
        t=[]
        for j in i.split(","):
            I.append(j)
            t.append(str2num[j])
        Data.append(set(t))
    I=[[i] for i in set(I)]

    def scan(setI):
        tem=defaultdict(int)
        for i in setI:
            for j in Data:
                # 做个映射
                for k in i:
                    if str2num[k] not in j:
                        break
                else:
                    tem[",".join(i)]+=1
        return [[i,j] for i,j in tem.items() if j>=min_spport]

    def comb(setI):
        # 记录
        FP.append(setI)
        # 按字典序排序
        I=[sorted([i[0]]) for i in setI]
        T=[]
        for i,v in enumerate(I):
            for j in I[i+1:]:
                if len(v)>1 and v[:-1]==j[:-1]:
                    T.append(v[:-1]+v[-1]+j[-1])
                elif len(v)==1:
                    T.append(v+j)
                    
        return [set(i) for i in T]
    
    while 1:
        # 每次都需要扫描
        I=scan(I)
        # 这个I是已经进行剪枝后的了
        # 然后要进行连接步,要求前K-2项相同
        if I==[]:
            return FP
        I=comb(I)



print(Apriori(d))

输出结果为

[[['juice', 3], ['milk', 3], ['water', 4]], [['water,milk', 3]]]

猜你喜欢

转载自blog.csdn.net/qq_45957458/article/details/127957077