Weka的-3.6.10的C4.5与Quinlan教授的C4.5算法的区别

使用数据集:
http://archive.ics.uci.edu/ml/machine-learning-databases/car/
weka-3.6.10的结果是:

safety = low: unacc (576.0)
safety = med
|   persons = 2.0: unacc (192.0)
|   persons = 4.0
|   |   buying = vhigh
|   |   |   maint = vhigh: unacc (12.0)
|   |   |   maint = high: unacc (12.0)
|   |   |   maint = med
|   |   |   |   lug_boot = small: unacc (4.0)
|   |   |   |   lug_boot = med: unacc (4.0/2.0)
|   |   |   |   lug_boot = big: acc (4.0)
|   |   |   maint = low
|   |   |   |   lug_boot = small: unacc (4.0)
|   |   |   |   lug_boot = med: unacc (4.0/2.0)
|   |   |   |   lug_boot = big: acc (4.0)
|   |   buying = high
|   |   |   lug_boot = small: unacc (16.0)
|   |   |   lug_boot = med
|   |   |   |   doors = 2.0: unacc (4.0)
|   |   |   |   doors = 3.0: unacc (4.0)
|   |   |   |   doors = 4.0: acc (4.0/1.0)
|   |   |   |   doors = 5more: acc (4.0/1.0)
|   |   |   lug_boot = big
|   |   |   |   maint = vhigh: unacc (4.0)
|   |   |   |   maint = high: acc (4.0)
|   |   |   |   maint = med: acc (4.0)
|   |   |   |   maint = low: acc (4.0)
|   |   buying = med
|   |   |   maint = vhigh
|   |   |   |   lug_boot = small: unacc (4.0)
|   |   |   |   lug_boot = med: unacc (4.0/2.0)
|   |   |   |   lug_boot = big: acc (4.0)
|   |   |   maint = high
|   |   |   |   lug_boot = small: unacc (4.0)
|   |   |   |   lug_boot = med: unacc (4.0/2.0)
|   |   |   |   lug_boot = big: acc (4.0)
|   |   |   maint = med: acc (12.0)
|   |   |   maint = low
|   |   |   |   lug_boot = small: acc (4.0)
|   |   |   |   lug_boot = med: acc (4.0/2.0)
|   |   |   |   lug_boot = big: good (4.0)
|   |   buying = low
|   |   |   maint = vhigh
|   |   |   |   lug_boot = small: unacc (4.0)
|   |   |   |   lug_boot = med: unacc (4.0/2.0)
|   |   |   |   lug_boot = big: acc (4.0)
|   |   |   maint = high: acc (12.0)
|   |   |   maint = med
|   |   |   |   lug_boot = small: acc (4.0)
|   |   |   |   lug_boot = med: acc (4.0/2.0)
|   |   |   |   lug_boot = big: good (4.0)
|   |   |   maint = low
|   |   |   |   lug_boot = small: acc (4.0)
|   |   |   |   lug_boot = med: acc (4.0/2.0)
|   |   |   |   lug_boot = big: good (4.0)
|   persons = more
|   |   lug_boot = small
|   |   |   buying = vhigh: unacc (16.0)
|   |   |   buying = high: unacc (16.0)
|   |   |   buying = med
|   |   |   |   maint = vhigh: unacc (4.0)
|   |   |   |   maint = high: unacc (4.0)
|   |   |   |   maint = med: acc (4.0/1.0)
|   |   |   |   maint = low: acc (4.0/1.0)
|   |   |   buying = low
|   |   |   |   maint = vhigh: unacc (4.0)
|   |   |   |   maint = high: acc (4.0/1.0)
|   |   |   |   maint = med: acc (4.0/1.0)
|   |   |   |   maint = low: acc (4.0/1.0)
|   |   lug_boot = med
|   |   |   buying = vhigh
|   |   |   |   maint = vhigh: unacc (4.0)
|   |   |   |   maint = high: unacc (4.0)
|   |   |   |   maint = med: acc (4.0/1.0)
|   |   |   |   maint = low: acc (4.0/1.0)
|   |   |   buying = high
|   |   |   |   maint = vhigh: unacc (4.0)
|   |   |   |   maint = high: acc (4.0/1.0)
|   |   |   |   maint = med: acc (4.0/1.0)
|   |   |   |   maint = low: acc (4.0/1.0)
|   |   |   buying = med: acc (16.0/5.0)
|   |   |   buying = low
|   |   |   |   maint = vhigh: acc (4.0/1.0)
|   |   |   |   maint = high: acc (4.0)
|   |   |   |   maint = med: good (4.0/1.0)
|   |   |   |   maint = low: good (4.0/1.0)
|   |   lug_boot = big
|   |   |   buying = vhigh
|   |   |   |   maint = vhigh: unacc (4.0)
|   |   |   |   maint = high: unacc (4.0)
|   |   |   |   maint = med: acc (4.0)
|   |   |   |   maint = low: acc (4.0)
|   |   |   buying = high
|   |   |   |   maint = vhigh: unacc (4.0)
|   |   |   |   maint = high: acc (4.0)
|   |   |   |   maint = med: acc (4.0)
|   |   |   |   maint = low: acc (4.0)
|   |   |   buying = med
|   |   |   |   maint = vhigh: acc (4.0)
|   |   |   |   maint = high: acc (4.0)
|   |   |   |   maint = med: acc (4.0)
|   |   |   |   maint = low: good (4.0)
|   |   |   buying = low
|   |   |   |   maint = vhigh: acc (4.0)
|   |   |   |   maint = high: acc (4.0)
|   |   |   |   maint = med: good (4.0)
|   |   |   |   maint = low: good (4.0)
safety = high
|   persons = 2.0: unacc (192.0)
|   persons = 4.0
|   |   buying = vhigh
|   |   |   maint = vhigh: unacc (12.0)
|   |   |   maint = high: unacc (12.0)
|   |   |   maint = med: acc (12.0)
|   |   |   maint = low: acc (12.0)
|   |   buying = high
|   |   |   maint = vhigh: unacc (12.0)
|   |   |   maint = high: acc (12.0)
|   |   |   maint = med: acc (12.0)
|   |   |   maint = low: acc (12.0)
|   |   buying = med
|   |   |   maint = vhigh: acc (12.0)
|   |   |   maint = high: acc (12.0)
|   |   |   maint = med
|   |   |   |   lug_boot = small: acc (4.0)
|   |   |   |   lug_boot = med: acc (4.0/2.0)
|   |   |   |   lug_boot = big: vgood (4.0)
|   |   |   maint = low
|   |   |   |   lug_boot = small: good (4.0)
|   |   |   |   lug_boot = med: vgood (4.0/2.0)
|   |   |   |   lug_boot = big: vgood (4.0)
|   |   buying = low
|   |   |   maint = vhigh: acc (12.0)
|   |   |   maint = high
|   |   |   |   lug_boot = small: acc (4.0)
|   |   |   |   lug_boot = med: acc (4.0/2.0)
|   |   |   |   lug_boot = big: vgood (4.0)
|   |   |   maint = med
|   |   |   |   lug_boot = small: good (4.0)
|   |   |   |   lug_boot = med: vgood (4.0/2.0)
|   |   |   |   lug_boot = big: vgood (4.0)
|   |   |   maint = low
|   |   |   |   lug_boot = small: good (4.0)
|   |   |   |   lug_boot = med: vgood (4.0/2.0)
|   |   |   |   lug_boot = big: vgood (4.0)
|   persons = more
|   |   buying = vhigh
|   |   |   maint = vhigh: unacc (12.0)
|   |   |   maint = high: unacc (12.0)
|   |   |   maint = med: acc (12.0/1.0)
|   |   |   maint = low: acc (12.0/1.0)
|   |   buying = high
|   |   |   maint = vhigh: unacc (12.0)
|   |   |   maint = high: acc (12.0/1.0)
|   |   |   maint = med: acc (12.0/1.0)
|   |   |   maint = low: acc (12.0/1.0)
|   |   buying = med
|   |   |   maint = vhigh: acc (12.0/1.0)
|   |   |   maint = high: acc (12.0/1.0)
|   |   |   maint = med
|   |   |   |   lug_boot = small: acc (4.0/1.0)
|   |   |   |   lug_boot = med: vgood (4.0/1.0)
|   |   |   |   lug_boot = big: vgood (4.0)
|   |   |   maint = low
|   |   |   |   lug_boot = small: good (4.0/1.0)
|   |   |   |   lug_boot = med: vgood (4.0/1.0)
|   |   |   |   lug_boot = big: vgood (4.0)
|   |   buying = low
|   |   |   maint = vhigh: acc (12.0/1.0)
|   |   |   maint = high
|   |   |   |   lug_boot = small: acc (4.0/1.0)
|   |   |   |   lug_boot = med: vgood (4.0/1.0)
|   |   |   |   lug_boot = big: vgood (4.0)
|   |   |   maint = med
|   |   |   |   lug_boot = small: good (4.0/1.0)
|   |   |   |   lug_boot = med: vgood (4.0/1.0)
|   |   |   |   lug_boot = big: vgood (4.0)
|   |   |   maint = low
|   |   |   |   lug_boot = small: good (4.0/1.0)
|   |   |   |   lug_boot = med: vgood (4.0/1.0)
|   |   |   |   lug_boot = big: vgood (4.0)

Ross Quinlan的C4.5-Release8的代码是:
http://www.rulequest.com/Personal/c4.5r8.tar.gz

运行结果是:

safety = low: unacc (576.0/1.4)
safety = med:
|   persons = 2: unacc (192.0/1.4)
|   persons = 4:
|   |   buying = vhigh:
|   |   |   maint = vhigh: unacc (12.0/1.3)
|   |   |   maint = high: unacc (12.0/1.3)
|   |   |   maint = med:
|   |   |   |   lug_boot = small: unacc (4.0/1.2)
|   |   |   |   lug_boot = med: unacc (4.0/3.1)
|   |   |   |   lug_boot = big: acc (4.0/1.2)
|   |   |   maint = low:
|   |   |   |   lug_boot = small: unacc (4.0/1.2)
|   |   |   |   lug_boot = med: unacc (4.0/3.1)
|   |   |   |   lug_boot = big: acc (4.0/1.2)
|   |   buying = high:
|   |   |   lug_boot = small: unacc (16.0/1.3)
|   |   |   lug_boot = med:
|   |   |   |   doors = 2: unacc (4.0/1.2)
|   |   |   |   doors = 3: unacc (4.0/1.2)
|   |   |   |   doors = 4: acc (4.0/2.2)
|   |   |   |   doors = 5more: acc (4.0/2.2)
|   |   |   lug_boot = big:
|   |   |   |   maint = vhigh: unacc (4.0/1.2)
|   |   |   |   maint = high: acc (4.0/1.2)
|   |   |   |   maint = med: acc (4.0/1.2)
|   |   |   |   maint = low: acc (4.0/1.2)
|   |   buying = med:
|   |   |   maint = med: acc (12.0/1.3)
|   |   |   maint = vhigh:
|   |   |   |   lug_boot = small: unacc (4.0/1.2)
|   |   |   |   lug_boot = med: unacc (4.0/3.1)
|   |   |   |   lug_boot = big: acc (4.0/1.2)
|   |   |   maint = high:
|   |   |   |   lug_boot = small: unacc (4.0/1.2)
|   |   |   |   lug_boot = med: unacc (4.0/3.1)
|   |   |   |   lug_boot = big: acc (4.0/1.2)
|   |   |   maint = low:
|   |   |   |   lug_boot = small: acc (4.0/1.2)
|   |   |   |   lug_boot = med: acc (4.0/3.1)
|   |   |   |   lug_boot = big: good (4.0/1.2)
|   |   buying = low:
|   |   |   maint = high: acc (12.0/1.3)
|   |   |   maint = vhigh:
|   |   |   |   lug_boot = small: unacc (4.0/1.2)
|   |   |   |   lug_boot = med: unacc (4.0/3.1)
|   |   |   |   lug_boot = big: acc (4.0/1.2)
|   |   |   maint = med:
|   |   |   |   lug_boot = small: acc (4.0/1.2)
|   |   |   |   lug_boot = med: acc (4.0/3.1)
|   |   |   |   lug_boot = big: good (4.0/1.2)
|   |   |   maint = low:
|   |   |   |   lug_boot = small: acc (4.0/1.2)
|   |   |   |   lug_boot = med: acc (4.0/3.1)
|   |   |   |   lug_boot = big: good (4.0/1.2)
|   persons = more:
|   |   lug_boot = small:
|   |   |   buying = vhigh: unacc (16.0/1.3)
|   |   |   buying = high: unacc (16.0/1.3)
|   |   |   buying = med:
|   |   |   |   maint = vhigh: unacc (4.0/1.2)
|   |   |   |   maint = high: unacc (4.0/1.2)
|   |   |   |   maint = med: acc (4.0/2.2)
|   |   |   |   maint = low: acc (4.0/2.2)
|   |   |   buying = low:
|   |   |   |   maint = vhigh: unacc (4.0/1.2)
|   |   |   |   maint = high: acc (4.0/2.2)
|   |   |   |   maint = med: acc (4.0/2.2)
|   |   |   |   maint = low: acc (4.0/2.2)
|   |   lug_boot = med:
|   |   |   buying = med: acc (16.0/6.9)
|   |   |   buying = vhigh:
|   |   |   |   maint = vhigh: unacc (4.0/1.2)
|   |   |   |   maint = high: unacc (4.0/1.2)
|   |   |   |   maint = med: acc (4.0/2.2)
|   |   |   |   maint = low: acc (4.0/2.2)
|   |   |   buying = high:
|   |   |   |   maint = vhigh: unacc (4.0/1.2)
|   |   |   |   maint = high: acc (4.0/2.2)
|   |   |   |   maint = med: acc (4.0/2.2)
|   |   |   |   maint = low: acc (4.0/2.2)
|   |   |   buying = low:
|   |   |   |   maint = vhigh: acc (4.0/2.2)
|   |   |   |   maint = high: acc (4.0/1.2)
|   |   |   |   maint = med: good (4.0/2.2)
|   |   |   |   maint = low: good (4.0/2.2)
|   |   lug_boot = big:
|   |   |   buying = vhigh:
|   |   |   |   maint = vhigh: unacc (4.0/1.2)
|   |   |   |   maint = high: unacc (4.0/1.2)
|   |   |   |   maint = med: acc (4.0/1.2)
|   |   |   |   maint = low: acc (4.0/1.2)
|   |   |   buying = high:
|   |   |   |   maint = vhigh: unacc (4.0/1.2)
|   |   |   |   maint = high: acc (4.0/1.2)
|   |   |   |   maint = med: acc (4.0/1.2)
|   |   |   |   maint = low: acc (4.0/1.2)
|   |   |   buying = med:
|   |   |   |   maint = vhigh: acc (4.0/1.2)
|   |   |   |   maint = high: acc (4.0/1.2)
|   |   |   |   maint = med: acc (4.0/1.2)
|   |   |   |   maint = low: good (4.0/1.2)
|   |   |   buying = low:
|   |   |   |   maint = vhigh: acc (4.0/1.2)
|   |   |   |   maint = high: acc (4.0/1.2)
|   |   |   |   maint = med: good (4.0/1.2)
|   |   |   |   maint = low: good (4.0/1.2)
safety = high:
|   persons = 2: unacc (192.0/1.4)
|   persons = 4:
|   |   buying = vhigh:
|   |   |   maint = vhigh: unacc (12.0/1.3)
|   |   |   maint = high: unacc (12.0/1.3)
|   |   |   maint = med: acc (12.0/1.3)
|   |   |   maint = low: acc (12.0/1.3)
|   |   buying = high:
|   |   |   maint = vhigh: unacc (12.0/1.3)
|   |   |   maint = high: acc (12.0/1.3)
|   |   |   maint = med: acc (12.0/1.3)
|   |   |   maint = low: acc (12.0/1.3)
|   |   buying = med:
|   |   |   maint = vhigh: acc (12.0/1.3)
|   |   |   maint = high: acc (12.0/1.3)
|   |   |   maint = med:
|   |   |   |   lug_boot = small: acc (4.0/1.2)
|   |   |   |   lug_boot = med: acc (4.0/3.1)
|   |   |   |   lug_boot = big: vgood (4.0/1.2)
|   |   |   maint = low:
|   |   |   |   lug_boot = small: good (4.0/1.2)
|   |   |   |   lug_boot = med: good (4.0/3.1)
|   |   |   |   lug_boot = big: vgood (4.0/1.2)
|   |   buying = low:
|   |   |   maint = vhigh: acc (12.0/1.3)
|   |   |   maint = high:
|   |   |   |   lug_boot = small: acc (4.0/1.2)
|   |   |   |   lug_boot = med: acc (4.0/3.1)
|   |   |   |   lug_boot = big: vgood (4.0/1.2)
|   |   |   maint = med:
|   |   |   |   lug_boot = small: good (4.0/1.2)
|   |   |   |   lug_boot = med: good (4.0/3.1)
|   |   |   |   lug_boot = big: vgood (4.0/1.2)
|   |   |   maint = low:
|   |   |   |   lug_boot = small: good (4.0/1.2)
|   |   |   |   lug_boot = med: good (4.0/3.1)
|   |   |   |   lug_boot = big: vgood (4.0/1.2)
|   persons = more:
|   |   buying = vhigh:
|   |   |   maint = vhigh: unacc (12.0/1.3)
|   |   |   maint = high: unacc (12.0/1.3)
|   |   |   maint = med: acc (12.0/2.5)
|   |   |   maint = low: acc (12.0/2.5)
|   |   buying = high:
|   |   |   maint = vhigh: unacc (12.0/1.3)
|   |   |   maint = high: acc (12.0/2.5)
|   |   |   maint = med: acc (12.0/2.5)
|   |   |   maint = low: acc (12.0/2.5)
|   |   buying = med:
|   |   |   maint = vhigh: acc (12.0/2.5)
|   |   |   maint = high: acc (12.0/2.5)
|   |   |   maint = med:
|   |   |   |   lug_boot = small: acc (4.0/2.2)
|   |   |   |   lug_boot = med: vgood (4.0/2.2)
|   |   |   |   lug_boot = big: vgood (4.0/1.2)
|   |   |   maint = low:
|   |   |   |   lug_boot = small: good (4.0/2.2)
|   |   |   |   lug_boot = med: vgood (4.0/2.2)
|   |   |   |   lug_boot = big: vgood (4.0/1.2)
|   |   buying = low:
|   |   |   maint = vhigh: acc (12.0/2.5)
|   |   |   maint = high:
|   |   |   |   lug_boot = small: acc (4.0/2.2)
|   |   |   |   lug_boot = med: vgood (4.0/2.2)
|   |   |   |   lug_boot = big: vgood (4.0/1.2)
|   |   |   maint = med:
|   |   |   |   lug_boot = small: good (4.0/2.2)
|   |   |   |   lug_boot = med: vgood (4.0/2.2)
|   |   |   |   lug_boot = big: vgood (4.0/1.2)
|   |   |   maint = low:
|   |   |   |   lug_boot = small: good (4.0/2.2)
|   |   |   |   lug_boot = med: vgood (4.0/2.2)
|   |   |   |   lug_boot = big: vgood (4.0/1.2)

比较两者的不同,可以看到weka没有采用Quinlan的书《C4.5:programs for machine learning》对unKnown数据的处理方式。
另外我们也可以得知,虽然《C4.5:programs for machine learning》提到的剪枝方式是EBP,但是代码中其实使用的是PERP,weka使用的也是PERP(Pessimistirc Error Pruning)

注意,如果使用http://www.rulequest.com/Personal/c4.5r8.tar.gz中自带的hypo数据集,那么weka就能看出比Quinlan实现的差一些。
因为Quinlan实现的版本生成的决策树更加简化,更加适合提取知识。

猜你喜欢

转载自blog.csdn.net/appleyuchi/article/details/83791046