defcreateTree(dataSet, minSup=1):#create FP-tree from dataset but don't mine
headerTable ={}#go over dataSet twicefor trans in dataSet:#first pass counts frequency of occurancefor item in trans:
headerTable[item]= headerTable.get(item,0)+ dataSet[trans]for k in headerTable.keys():#remove items not meeting minSupif headerTable[k]< minSup:del(headerTable[k])
freqItemSet =set(headerTable.keys())#print 'freqItemSet: ',freqItemSetiflen(freqItemSet)==0:returnNone,None#if no items meet min support -->get outfor k in headerTable:
headerTable[k]=[headerTable[k],None]#reformat headerTable to use Node link #print 'headerTable: ',headerTable
retTree = treeNode('Null Set',1,None)#create treefor tranSet, count in dataSet.items():#go through dataset 2nd time
localD ={}for item in tranSet:#put transaction items in orderif item in freqItemSet:
localD[item]= headerTable[item][0]iflen(localD)>0:
orderedItems =[v[0]for v insorted(localD.items(), key=lambda p: p[1], reverse=True)]
updateTree(orderedItems, retTree, headerTable, count)#populate tree with ordered freq itemsetreturn retTree, headerTable #return tree and header tabledefupdateTree(items, inTree, headerTable, count):if items[0]in inTree.children:#check if orderedItems[0] in retTree.children
inTree.children[items[0]].inc(count)#incrament countelse:#add items[0] to inTree.children
inTree.children[items[0]]= treeNode(items[0], count, inTree)if headerTable[items[0]][1]isNone:#update header table
headerTable[items[0]][1]= inTree.children[items[0]]else:
updateHeader(headerTable[items[0]][1], inTree.children[items[0]])iflen(items)>1:#call updateTree() with remaining ordered items
updateTree(items[1::], inTree.children[items[0]], headerTable, count)
简单数据集及数据包装器
defloadSimpDat():
simpDat =[['r','z','h','j','p'],['z','y','x','w','v','u','t','s'],['z'],['r','x','n','o','s'],['y','r','x','z','q','t','p'],['y','z','x','e','q','s','t','m']]return simpDat
defcreateInitSet(dataSet):
retDict ={}for trans in dataSet:
retDict[frozenset(trans)]=1return retDict
从一颗FP树中挖掘频繁项集
发现以给定元素项结尾的所有路径的函数
defascendTree(leafNode, prefixPath):#ascends from leaf node to rootif leafNode.parent isnotNone:
prefixPath.append(leafNode.name)
ascendTree(leafNode.parent, prefixPath)deffindPrefixPath(basePat, treeNode):#treeNode comes from header table
condPats ={}while treeNode isnotNone:
prefixPath =[]
ascendTree(treeNode, prefixPath)iflen(prefixPath)>1:
condPats[frozenset(prefixPath[1:])]= treeNode.count
treeNode = treeNode.nodeLink
return condPats
递归查找频繁项集的mineTree函数
defmineTree(inTree, headerTable, minSup, preFix, freqItemList):
bigL =[v[0]for v insorted(headerTable.items(), key=lambda p: p[1])]#(sort header table)for basePat in bigL:#start from bottom of header table
newFreqSet = preFix.copy()
newFreqSet.add(basePat)#print 'finalFrequent Item: ',newFreqSet #append to set
freqItemList.append(newFreqSet)
condPattBases = findPrefixPath(basePat, headerTable[basePat][1])#print 'condPattBases :',basePat, condPattBases#2. construct cond FP-tree from cond. pattern base
myCondTree, myHead = createTree(condPattBases, minSup)#print 'head from conditional tree: ', myHeadif myHead isnotNone:#3. mine cond. FP-tree#print 'conditional tree for: ',newFreqSet#myCondTree.disp(1)
mineTree(myCondTree, myHead, minSup, newFreqSet, freqItemList)