基数预估算法 错误率验证

package hllctest

import java.util

import org.scalatest.{FlatSpec}
import org.spark.sqludf.HLLCounter

import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import scala.util.Random

class HllcCrossSetTest extends FlatSpec {

  val ramdom = new Random()
  val m = 18

  // 用于验证hllc 的错误率

  def errorRateCal(sampleCount: Int) = {

    errorRate(0.01, sampleCount)
    errorRate(0.05, sampleCount)
    errorRate(0.1, sampleCount)
    errorRate(0.2, sampleCount)
    errorRate(0.5, sampleCount)

  }


  "hllc test" should "hllc merge,mix error rate" in {
    errorRateCal(1000)
    errorRateCal(2000)
    errorRateCal(5000)
    errorRateCal(10000)
    errorRateCal(20000)
    errorRateCal(50000)
    errorRateCal(100000)
    errorRateCal(200000)
    errorRateCal(500000)
    errorRateCal(100000)
    errorRateCal(200000)
    errorRateCal(500000)
  }


  // 不放回抽样  测试集合生成
  def getRandomStr(setCollection: mutable.HashSet[String], totalIntArray: Array[Int]): Unit = {
    val str = getTestString(totalIntArray)
    if (!setCollection.contains(str)) setCollection.add(str)
    else
      getRandomStr(setCollection, totalIntArray)
  }


  //5%    100w 级别集合  计算总量
  def errorRate(r: Double, testSetLength: Int) = {
    println(s" ********************begin r :{$r},testSetLength:{$testSetLength} ,m:{$m} ***********************")
    var setA = new mutable.HashSet[String]()
    var setB = new mutable.HashSet[String]()
    val tatolCount = (testSetLength / r).toInt
    var b = System.currentTimeMillis()

    val totalIntArray = (tatolCount + "").toCharArray.map(x => x.toString.toInt)

    var timeRecord = System.currentTimeMillis()
    for (i <- 0 until testSetLength) {
      getRandomStr(setA, totalIntArray)
      getRandomStr(setB, totalIntArray)
      //      if (i%5000 == 0) {
      //        println(s" generate data ${i} ,cost: ${System.currentTimeMillis()-timeRecord}) ")
      //        timeRecord = System.currentTimeMillis()
      //      }
    }


    println(s"tatolCount: ${tatolCount}  ,r : ${r}  setA size: ${setA.size} , setB size: ${setB.size} ")

    var e = System.currentTimeMillis()
    println(s" generate data cost time: ${e - b}  ")

    /*
    * realMix  交集
    * hllcMix
    * realMerge
    * hllcMerge
    * */

    b = System.currentTimeMillis()
    val realMixCnt = realMix(setA, setB)
    val mixRate = realMixRate(realMixCnt, setA)
    println(s" realMixCnt: ${realMixCnt} , mixRate:${mixRate}")
    e = System.currentTimeMillis()
    println(s" Map collection cost time: ${e - b}  ")

    b = System.currentTimeMillis()
    val hllcMixCnt = hllcMix(setA, setB)
    val mixRatehllc = realMixRate(hllcMixCnt, setA)
    println(s" hllcMixCnt: ${hllcMixCnt} , mixRatehllc:${mixRatehllc}")
    val distinct = mixRatehllc - mixRate
    println(f" mixRatehllc - mixRate:  $distinct%1.6f ")
    println(f" hllcMixCnt - realMixCnt:  ${hllcMixCnt - realMixCnt} ")
    e = System.currentTimeMillis()
    println(s" hllc cost time: ${e - b}  ")


  }


  def realMix(setA: mutable.HashSet[String], setB: mutable.HashSet[String]) = {
    val hashSet = new util.HashSet[String]
    setA.foreach(str => if (!hashSet.contains(str)) hashSet.add(str))
    var mixCount = 0
    setB.foreach(str => if (hashSet.contains(str)) mixCount += 1)
    mixCount
  }


  def realMixRate(mixCount: Int, set: mutable.HashSet[String]) = {
    mixCount * 1.0 / set.size
  }


  def realMixRate(mixCount: Long, set: mutable.HashSet[String]) = {
    mixCount * 1.0 / set.size
  }


  def hllcMix(setA: mutable.HashSet[String], setB: mutable.HashSet[String]): Long = {

    val hllc16A = new HLLCounter(m)
    setA.foreach(item => hllc16A.add(item))

    val hllc16B = new HLLCounter(m)
    setA.foreach(item => hllc16B.add(item))
    hllc16A.getCountEstimate + hllc16B.getCountEstimate - hllcMerge(setA, setB)
  }


  def hllcMerge(setA: mutable.HashSet[String], setB: mutable.HashSet[String]) = {
    val hllc16 = new HLLCounter(m)
    setA.foreach(item => hllc16.add(item))
    setB.foreach(item => hllc16.add(item))
    hllc16.getCountEstimate
  }

  def realMerge(setA: ArrayBuffer[String], setB: ArrayBuffer[String]) = {
    val hashSet = new util.HashSet[String]
    setA.foreach(str => if (!hashSet.contains(str)) hashSet.add(str))
    setB.foreach(str => if (!hashSet.contains(str)) hashSet.add(str))
    hashSet.size()
  }


  def getTestString(totalCountArray: Array[Int]) = {
    val sbf = new StringBuffer()
    //没一位的数字是几, 然后根据这个来生成随机数
    totalCountArray.foreach(s => {
      if (!0.equals(s))
        sbf.append(getRamdomStringS(s))
      else sbf.append(getRamdomStringS(10))
    })
    sbf.toString
  }


  //  n -> 10 ^^n
  def getRamdomString(length: Int): String = {
    val sbf = new StringBuffer()
    for (i <- 0 until length) sbf.append((ramdom.nextInt(10) + 97).toChar)
    sbf.toString
  }


  //  n -> 10 ^^n
  def getRamdomStringS(l: Int): String = {
    (ramdom.nextInt(l) + 97).toChar.toString
  }


}


********************begin r :{0.01},testSetLength:{1000} ,m:{18} ***********************
tatolCount: 100000  ,r : 0.01  setA size: 1000 , setB size: 1000
generate data cost time: 48 
realMixCnt: 12 , mixRate:0.012
Map collection cost time: 6 
hllcMixCnt: 8 , mixRatehllc:0.008
mixRatehllc - mixRate:  -0.004000
hllcMixCnt - realMixCnt:  -4
hllc cost time: 131 
********************begin r :{0.05},testSetLength:{1000} ,m:{18} ***********************
tatolCount: 20000  ,r : 0.05  setA size: 1000 , setB size: 1000
generate data cost time: 4 
realMixCnt: 49 , mixRate:0.049
Map collection cost time: 3 
hllcMixCnt: 48 , mixRatehllc:0.048
mixRatehllc - mixRate:  -0.001000
hllcMixCnt - realMixCnt:  -1
hllc cost time: 19 
********************begin r :{0.1},testSetLength:{1000} ,m:{18} ***********************
tatolCount: 10000  ,r : 0.1  setA size: 1000 , setB size: 1000
generate data cost time: 3 
realMixCnt: 108 , mixRate:0.108
Map collection cost time: 5 
hllcMixCnt: 107 , mixRatehllc:0.107
mixRatehllc - mixRate:  -0.001000
hllcMixCnt - realMixCnt:  -1
hllc cost time: 15 
********************begin r :{0.2},testSetLength:{1000} ,m:{18} ***********************
tatolCount: 5000  ,r : 0.2  setA size: 1000 , setB size: 1000
generate data cost time: 3 
realMixCnt: 196 , mixRate:0.196
Map collection cost time: 1 
hllcMixCnt: 195 , mixRatehllc:0.195
mixRatehllc - mixRate:  -0.001000
hllcMixCnt - realMixCnt:  -1
hllc cost time: 16 
********************begin r :{0.5},testSetLength:{1000} ,m:{18} ***********************
tatolCount: 2000  ,r : 0.5  setA size: 1000 , setB size: 1000
generate data cost time: 7 
realMixCnt: 489 , mixRate:0.489
Map collection cost time: 1 
hllcMixCnt: 490 , mixRatehllc:0.49
mixRatehllc - mixRate:  0.001000
hllcMixCnt - realMixCnt:  1
hllc cost time: 8 
********************begin r :{0.01},testSetLength:{2000} ,m:{18} ***********************
tatolCount: 200000  ,r : 0.01  setA size: 2000 , setB size: 2000
generate data cost time: 6 
realMixCnt: 11 , mixRate:0.0055
Map collection cost time: 0 
hllcMixCnt: 19 , mixRatehllc:0.0095
mixRatehllc - mixRate:  0.004000
hllcMixCnt - realMixCnt:  8
hllc cost time: 24 
********************begin r :{0.05},testSetLength:{2000} ,m:{18} ***********************
tatolCount: 40000  ,r : 0.05  setA size: 2000 , setB size: 2000
generate data cost time: 5 
realMixCnt: 102 , mixRate:0.051
Map collection cost time: 1 
hllcMixCnt: 110 , mixRatehllc:0.055
mixRatehllc - mixRate:  0.004000
hllcMixCnt - realMixCnt:  8
hllc cost time: 11 
********************begin r :{0.1},testSetLength:{2000} ,m:{18} ***********************
tatolCount: 20000  ,r : 0.1  setA size: 2000 , setB size: 2000
generate data cost time: 4 
realMixCnt: 192 , mixRate:0.096
Map collection cost time: 0 
hllcMixCnt: 192 , mixRatehllc:0.096
mixRatehllc - mixRate:  0.000000
hllcMixCnt - realMixCnt:  0
hllc cost time: 11 
********************begin r :{0.2},testSetLength:{2000} ,m:{18} ***********************
tatolCount: 10000  ,r : 0.2  setA size: 2000 , setB size: 2000
generate data cost time: 3 
realMixCnt: 395 , mixRate:0.1975
Map collection cost time: 1 
hllcMixCnt: 387 , mixRatehllc:0.1935
mixRatehllc - mixRate:  -0.004000
hllcMixCnt - realMixCnt:  -8
hllc cost time: 12 
********************begin r :{0.5},testSetLength:{2000} ,m:{18} ***********************
tatolCount: 4000  ,r : 0.5  setA size: 2000 , setB size: 2000
generate data cost time: 6 
realMixCnt: 986 , mixRate:0.493
Map collection cost time: 1 
hllcMixCnt: 981 , mixRatehllc:0.4905
mixRatehllc - mixRate:  -0.002500
hllcMixCnt - realMixCnt:  -5
hllc cost time: 16 
********************begin r :{0.01},testSetLength:{5000} ,m:{18} ***********************
tatolCount: 500000  ,r : 0.01  setA size: 5000 , setB size: 5000
generate data cost time: 17 
realMixCnt: 48 , mixRate:0.0096
Map collection cost time: 2 
hllcMixCnt: 41 , mixRatehllc:0.0082
mixRatehllc - mixRate:  -0.001400
hllcMixCnt - realMixCnt:  -7
hllc cost time: 13 
********************begin r :{0.05},testSetLength:{5000} ,m:{18} ***********************
tatolCount: 100000  ,r : 0.05  setA size: 5000 , setB size: 5000
generate data cost time: 7 
realMixCnt: 263 , mixRate:0.0526
Map collection cost time: 1 
hllcMixCnt: 271 , mixRatehllc:0.0542
mixRatehllc - mixRate:  0.001600
hllcMixCnt - realMixCnt:  8
hllc cost time: 13 
********************begin r :{0.1},testSetLength:{5000} ,m:{18} ***********************
tatolCount: 50000  ,r : 0.1  setA size: 5000 , setB size: 5000
generate data cost time: 6 
realMixCnt: 527 , mixRate:0.1054
Map collection cost time: 0 
hllcMixCnt: 526 , mixRatehllc:0.1052
mixRatehllc - mixRate:  -0.000200
hllcMixCnt - realMixCnt:  -1
hllc cost time: 26 
********************begin r :{0.2},testSetLength:{5000} ,m:{18} ***********************
tatolCount: 25000  ,r : 0.2  setA size: 5000 , setB size: 5000
generate data cost time: 16 
realMixCnt: 2505 , mixRate:0.501
Map collection cost time: 4 
hllcMixCnt: 2496 , mixRatehllc:0.4992
mixRatehllc - mixRate:  -0.001800
hllcMixCnt - realMixCnt:  -9
hllc cost time: 16 
********************begin r :{0.5},testSetLength:{5000} ,m:{18} ***********************
tatolCount: 10000  ,r : 0.5  setA size: 5000 , setB size: 5000
generate data cost time: 8 
realMixCnt: 2499 , mixRate:0.4998
Map collection cost time: 1 
hllcMixCnt: 2505 , mixRatehllc:0.501
mixRatehllc - mixRate:  0.001200
hllcMixCnt - realMixCnt:  6
hllc cost time: 14 
********************begin r :{0.01},testSetLength:{10000} ,m:{18} ***********************
tatolCount: 1000000  ,r : 0.01  setA size: 10000 , setB size: 10000
generate data cost time: 15 
realMixCnt: 103 , mixRate:0.0103
Map collection cost time: 2 
hllcMixCnt: 62 , mixRatehllc:0.0062
mixRatehllc - mixRate:  -0.004100
hllcMixCnt - realMixCnt:  -41
hllc cost time: 24 
********************begin r :{0.05},testSetLength:{10000} ,m:{18} ***********************
tatolCount: 200000  ,r : 0.05  setA size: 10000 , setB size: 10000
generate data cost time: 18 
realMixCnt: 484 , mixRate:0.0484
Map collection cost time: 2 
hllcMixCnt: 467 , mixRatehllc:0.0467
mixRatehllc - mixRate:  -0.001700
hllcMixCnt - realMixCnt:  -17
hllc cost time: 18 
********************begin r :{0.1},testSetLength:{10000} ,m:{18} ***********************
tatolCount: 100000  ,r : 0.1  setA size: 10000 , setB size: 10000
generate data cost time: 11 
realMixCnt: 938 , mixRate:0.0938
Map collection cost time: 1 
hllcMixCnt: 967 , mixRatehllc:0.0967
mixRatehllc - mixRate:  0.002900
hllcMixCnt - realMixCnt:  29
hllc cost time: 12 
********************begin r :{0.2},testSetLength:{10000} ,m:{18} ***********************
tatolCount: 50000  ,r : 0.2  setA size: 10000 , setB size: 10000
generate data cost time: 10 
realMixCnt: 1997 , mixRate:0.1997
Map collection cost time: 5 
hllcMixCnt: 1999 , mixRatehllc:0.1999
mixRatehllc - mixRate:  0.000200
hllcMixCnt - realMixCnt:  2
hllc cost time: 15 
********************begin r :{0.5},testSetLength:{10000} ,m:{18} ***********************
tatolCount: 20000  ,r : 0.5  setA size: 10000 , setB size: 10000
generate data cost time: 18 
realMixCnt: 5010 , mixRate:0.501
Map collection cost time: 4 
hllcMixCnt: 4990 , mixRatehllc:0.499
mixRatehllc - mixRate:  -0.002000
hllcMixCnt - realMixCnt:  -20
hllc cost time: 23 
********************begin r :{0.01},testSetLength:{20000} ,m:{18} ***********************
tatolCount: 2000000  ,r : 0.01  setA size: 20000 , setB size: 20000
generate data cost time: 41 
realMixCnt: 218 , mixRate:0.0109
Map collection cost time: 4 
hllcMixCnt: 134 , mixRatehllc:0.0067
mixRatehllc - mixRate:  -0.004200
hllcMixCnt - realMixCnt:  -84
hllc cost time: 36 
********************begin r :{0.05},testSetLength:{20000} ,m:{18} ***********************
tatolCount: 400000  ,r : 0.05  setA size: 20000 , setB size: 20000
generate data cost time: 19 
realMixCnt: 946 , mixRate:0.0473
Map collection cost time: 3 
hllcMixCnt: 949 , mixRatehllc:0.04745
mixRatehllc - mixRate:  0.000150
hllcMixCnt - realMixCnt:  3
hllc cost time: 23 
********************begin r :{0.1},testSetLength:{20000} ,m:{18} ***********************
tatolCount: 200000  ,r : 0.1  setA size: 20000 , setB size: 20000
generate data cost time: 26 
realMixCnt: 2001 , mixRate:0.10005
Map collection cost time: 10 
hllcMixCnt: 2080 , mixRatehllc:0.104
mixRatehllc - mixRate:  0.003950
hllcMixCnt - realMixCnt:  79
hllc cost time: 56 
********************begin r :{0.2},testSetLength:{20000} ,m:{18} ***********************
tatolCount: 100000  ,r : 0.2  setA size: 20000 , setB size: 20000
generate data cost time: 28 
realMixCnt: 4034 , mixRate:0.2017
Map collection cost time: 7 
hllcMixCnt: 4113 , mixRatehllc:0.20565
mixRatehllc - mixRate:  0.003950
hllcMixCnt - realMixCnt:  79
hllc cost time: 25 
********************begin r :{0.5},testSetLength:{20000} ,m:{18} ***********************
tatolCount: 40000  ,r : 0.5  setA size: 20000 , setB size: 20000
generate data cost time: 24 
realMixCnt: 9975 , mixRate:0.49875
Map collection cost time: 8 
hllcMixCnt: 9994 , mixRatehllc:0.4997
mixRatehllc - mixRate:  0.000950
hllcMixCnt - realMixCnt:  19
hllc cost time: 23 
********************begin r :{0.01},testSetLength:{50000} ,m:{18} ***********************
tatolCount: 5000000  ,r : 0.01  setA size: 50000 , setB size: 50000
generate data cost time: 88 
realMixCnt: 468 , mixRate:0.00936
Map collection cost time: 48 
hllcMixCnt: 603 , mixRatehllc:0.01206
mixRatehllc - mixRate:  0.002700
hllcMixCnt - realMixCnt:  135
hllc cost time: 163 
********************begin r :{0.05},testSetLength:{50000} ,m:{18} ***********************
tatolCount: 1000000  ,r : 0.05  setA size: 50000 , setB size: 50000
generate data cost time: 99 
realMixCnt: 2381 , mixRate:0.04762
Map collection cost time: 17 
hllcMixCnt: 2335 , mixRatehllc:0.0467
mixRatehllc - mixRate:  -0.000920
hllcMixCnt - realMixCnt:  -46
hllc cost time: 59 
********************begin r :{0.1},testSetLength:{50000} ,m:{18} ***********************
tatolCount: 500000  ,r : 0.1  setA size: 50000 , setB size: 50000
generate data cost time: 52 
realMixCnt: 5091 , mixRate:0.10182
Map collection cost time: 19 
hllcMixCnt: 5116 , mixRatehllc:0.10232
mixRatehllc - mixRate:  0.000500
hllcMixCnt - realMixCnt:  25
hllc cost time: 88 
********************begin r :{0.2},testSetLength:{50000} ,m:{18} ***********************
tatolCount: 250000  ,r : 0.2  setA size: 50000 , setB size: 50000
generate data cost time: 72 
realMixCnt: 24889 , mixRate:0.49778
Map collection cost time: 18 
hllcMixCnt: 25002 , mixRatehllc:0.50004
mixRatehllc - mixRate:  0.002260
hllcMixCnt - realMixCnt:  113
hllc cost time: 61 
********************begin r :{0.5},testSetLength:{50000} ,m:{18} ***********************
tatolCount: 100000  ,r : 0.5  setA size: 50000 , setB size: 50000
generate data cost time: 86 
realMixCnt: 25140 , mixRate:0.5028
Map collection cost time: 15 
hllcMixCnt: 25190 , mixRatehllc:0.5038
mixRatehllc - mixRate:  0.001000
hllcMixCnt - realMixCnt:  50
hllc cost time: 71 
********************begin r :{0.01},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 10000000  ,r : 0.01  setA size: 100000 , setB size: 100000
generate data cost time: 154 
realMixCnt: 1051 , mixRate:0.01051
Map collection cost time: 29 
hllcMixCnt: 811 , mixRatehllc:0.00811
mixRatehllc - mixRate:  -0.002400
hllcMixCnt - realMixCnt:  -240
hllc cost time: 232 
********************begin r :{0.05},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 2000000  ,r : 0.05  setA size: 100000 , setB size: 100000
generate data cost time: 171 
realMixCnt: 4903 , mixRate:0.04903
Map collection cost time: 19 
hllcMixCnt: 5095 , mixRatehllc:0.05095
mixRatehllc - mixRate:  0.001920
hllcMixCnt - realMixCnt:  192
hllc cost time: 122 
********************begin r :{0.1},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 1000000  ,r : 0.1  setA size: 100000 , setB size: 100000
generate data cost time: 131 
realMixCnt: 9931 , mixRate:0.09931
Map collection cost time: 42 
hllcMixCnt: 10136 , mixRatehllc:0.10136
mixRatehllc - mixRate:  0.002050
hllcMixCnt - realMixCnt:  205
hllc cost time: 155 
********************begin r :{0.2},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 500000  ,r : 0.2  setA size: 100000 , setB size: 100000
generate data cost time: 117 
realMixCnt: 20148 , mixRate:0.20148
Map collection cost time: 35 
hllcMixCnt: 20414 , mixRatehllc:0.20414
mixRatehllc - mixRate:  0.002660
hllcMixCnt - realMixCnt:  266
hllc cost time: 111 
********************begin r :{0.5},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 200000  ,r : 0.5  setA size: 100000 , setB size: 100000
generate data cost time: 130 
realMixCnt: 49964 , mixRate:0.49964
Map collection cost time: 35 
hllcMixCnt: 50268 , mixRatehllc:0.50268
mixRatehllc - mixRate:  0.003040
hllcMixCnt - realMixCnt:  304
hllc cost time: 133 
********************begin r :{0.01},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 20000000  ,r : 0.01  setA size: 200000 , setB size: 200000
generate data cost time: 260 
realMixCnt: 2035 , mixRate:0.010175
Map collection cost time: 83 
hllcMixCnt: 1247 , mixRatehllc:0.006235
mixRatehllc - mixRate:  -0.003940
hllcMixCnt - realMixCnt:  -788
hllc cost time: 389 
********************begin r :{0.05},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 4000000  ,r : 0.05  setA size: 200000 , setB size: 200000
generate data cost time: 311 
realMixCnt: 10159 , mixRate:0.050795
Map collection cost time: 94 
hllcMixCnt: 10030 , mixRatehllc:0.05015
mixRatehllc - mixRate:  -0.000645
hllcMixCnt - realMixCnt:  -129
hllc cost time: 308 
********************begin r :{0.1},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 2000000  ,r : 0.1  setA size: 200000 , setB size: 200000
generate data cost time: 255 
realMixCnt: 20009 , mixRate:0.100045
Map collection cost time: 133 
hllcMixCnt: 19539 , mixRatehllc:0.097695
mixRatehllc - mixRate:  -0.002350
hllcMixCnt - realMixCnt:  -470
hllc cost time: 235 
********************begin r :{0.2},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 1000000  ,r : 0.2  setA size: 200000 , setB size: 200000
generate data cost time: 229 
realMixCnt: 39946 , mixRate:0.19973
Map collection cost time: 92 
hllcMixCnt: 41310 , mixRatehllc:0.20655
mixRatehllc - mixRate:  0.006820
hllcMixCnt - realMixCnt:  1364
hllc cost time: 271 
********************begin r :{0.5},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 400000  ,r : 0.5  setA size: 200000 , setB size: 200000
generate data cost time: 357 
realMixCnt: 100095 , mixRate:0.500475
Map collection cost time: 93 
hllcMixCnt: 100242 , mixRatehllc:0.50121
mixRatehllc - mixRate:  0.000735
hllcMixCnt - realMixCnt:  147
hllc cost time: 422 
********************begin r :{0.01},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 50000000  ,r : 0.01  setA size: 500000 , setB size: 500000
generate data cost time: 758 
realMixCnt: 5084 , mixRate:0.010168
Map collection cost time: 211 
hllcMixCnt: 2978 , mixRatehllc:0.005956
mixRatehllc - mixRate:  -0.004212
hllcMixCnt - realMixCnt:  -2106
hllc cost time: 844 
********************begin r :{0.05},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 10000000  ,r : 0.05  setA size: 500000 , setB size: 500000
generate data cost time: 721 
realMixCnt: 25296 , mixRate:0.050592
Map collection cost time: 222 
hllcMixCnt: 23440 , mixRatehllc:0.04688
mixRatehllc - mixRate:  -0.003712
hllcMixCnt - realMixCnt:  -1856
hllc cost time: 699 
********************begin r :{0.1},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 5000000  ,r : 0.1  setA size: 500000 , setB size: 500000
generate data cost time: 688 
realMixCnt: 50178 , mixRate:0.100356
Map collection cost time: 200 
hllcMixCnt: 45070 , mixRatehllc:0.09014
mixRatehllc - mixRate:  -0.010216
hllcMixCnt - realMixCnt:  -5108
hllc cost time: 701 
********************begin r :{0.2},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 2500000  ,r : 0.2  setA size: 500000 , setB size: 500000
generate data cost time: 897 
realMixCnt: 249899 , mixRate:0.499798
Map collection cost time: 223 
hllcMixCnt: 250263 , mixRatehllc:0.500526
mixRatehllc - mixRate:  0.000728
hllcMixCnt - realMixCnt:  364
hllc cost time: 658 
********************begin r :{0.5},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 1000000  ,r : 0.5  setA size: 500000 , setB size: 500000
generate data cost time: 868 
realMixCnt: 249895 , mixRate:0.49979
Map collection cost time: 245 
hllcMixCnt: 249916 , mixRatehllc:0.499832
mixRatehllc - mixRate:  0.000042
hllcMixCnt - realMixCnt:  21
hllc cost time: 724 
********************begin r :{0.01},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 10000000  ,r : 0.01  setA size: 100000 , setB size: 100000
generate data cost time: 110 
realMixCnt: 1026 , mixRate:0.01026
Map collection cost time: 28 
hllcMixCnt: 569 , mixRatehllc:0.00569
mixRatehllc - mixRate:  -0.004570
hllcMixCnt - realMixCnt:  -457
hllc cost time: 95 
********************begin r :{0.05},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 2000000  ,r : 0.05  setA size: 100000 , setB size: 100000
generate data cost time: 91 
realMixCnt: 5024 , mixRate:0.05024
Map collection cost time: 26 
hllcMixCnt: 5439 , mixRatehllc:0.05439
mixRatehllc - mixRate:  0.004150
hllcMixCnt - realMixCnt:  415
hllc cost time: 131 
********************begin r :{0.1},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 1000000  ,r : 0.1  setA size: 100000 , setB size: 100000
generate data cost time: 93 
realMixCnt: 9925 , mixRate:0.09925
Map collection cost time: 28 
hllcMixCnt: 10201 , mixRatehllc:0.10201
mixRatehllc - mixRate:  0.002760
hllcMixCnt - realMixCnt:  276
hllc cost time: 141 
********************begin r :{0.2},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 500000  ,r : 0.2  setA size: 100000 , setB size: 100000
generate data cost time: 90 
realMixCnt: 19983 , mixRate:0.19983
Map collection cost time: 32 
hllcMixCnt: 19936 , mixRatehllc:0.19936
mixRatehllc - mixRate:  -0.000470
hllcMixCnt - realMixCnt:  -47
hllc cost time: 128 
********************begin r :{0.5},testSetLength:{100000} ,m:{18} ***********************
tatolCount: 200000  ,r : 0.5  setA size: 100000 , setB size: 100000
generate data cost time: 121 
realMixCnt: 50027 , mixRate:0.50027
Map collection cost time: 35 
hllcMixCnt: 49726 , mixRatehllc:0.49726
mixRatehllc - mixRate:  -0.003010
hllcMixCnt - realMixCnt:  -301
hllc cost time: 137 
********************begin r :{0.01},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 20000000  ,r : 0.01  setA size: 200000 , setB size: 200000
generate data cost time: 247 
realMixCnt: 1991 , mixRate:0.009955
Map collection cost time: 38 
hllcMixCnt: 2118 , mixRatehllc:0.01059
mixRatehllc - mixRate:  0.000635
hllcMixCnt - realMixCnt:  127
hllc cost time: 200 
********************begin r :{0.05},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 4000000  ,r : 0.05  setA size: 200000 , setB size: 200000
generate data cost time: 225 
realMixCnt: 10000 , mixRate:0.05
Map collection cost time: 71 
hllcMixCnt: 9751 , mixRatehllc:0.048755
mixRatehllc - mixRate:  -0.001245
hllcMixCnt - realMixCnt:  -249
hllc cost time: 273 
********************begin r :{0.1},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 2000000  ,r : 0.1  setA size: 200000 , setB size: 200000
generate data cost time: 224 
realMixCnt: 19974 , mixRate:0.09987
Map collection cost time: 71 
hllcMixCnt: 19810 , mixRatehllc:0.09905
mixRatehllc - mixRate:  -0.000820
hllcMixCnt - realMixCnt:  -164
hllc cost time: 300 
********************begin r :{0.2},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 1000000  ,r : 0.2  setA size: 200000 , setB size: 200000
generate data cost time: 243 
realMixCnt: 40093 , mixRate:0.200465
Map collection cost time: 82 
hllcMixCnt: 40549 , mixRatehllc:0.202745
mixRatehllc - mixRate:  0.002280
hllcMixCnt - realMixCnt:  456
hllc cost time: 297 
********************begin r :{0.5},testSetLength:{200000} ,m:{18} ***********************
tatolCount: 400000  ,r : 0.5  setA size: 200000 , setB size: 200000
generate data cost time: 283 
realMixCnt: 99874 , mixRate:0.49937
Map collection cost time: 88 
hllcMixCnt: 99730 , mixRatehllc:0.49865
mixRatehllc - mixRate:  -0.000720
hllcMixCnt - realMixCnt:  -144
hllc cost time: 306 
********************begin r :{0.01},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 50000000  ,r : 0.01  setA size: 500000 , setB size: 500000
generate data cost time: 678 
realMixCnt: 5148 , mixRate:0.010296
Map collection cost time: 181 
hllcMixCnt: 3895 , mixRatehllc:0.00779
mixRatehllc - mixRate:  -0.002506
hllcMixCnt - realMixCnt:  -1253
hllc cost time: 673 
********************begin r :{0.05},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 10000000  ,r : 0.05  setA size: 500000 , setB size: 500000
generate data cost time: 820 
realMixCnt: 25131 , mixRate:0.050262
Map collection cost time: 185 
hllcMixCnt: 24850 , mixRatehllc:0.0497
mixRatehllc - mixRate:  -0.000562
hllcMixCnt - realMixCnt:  -281
hllc cost time: 647 
********************begin r :{0.1},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 5000000  ,r : 0.1  setA size: 500000 , setB size: 500000
generate data cost time: 691 
realMixCnt: 49911 , mixRate:0.099822
Map collection cost time: 187 
hllcMixCnt: 50951 , mixRatehllc:0.101902
mixRatehllc - mixRate:  0.002080
hllcMixCnt - realMixCnt:  1040
hllc cost time: 690 
********************begin r :{0.2},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 2500000  ,r : 0.2  setA size: 500000 , setB size: 500000
generate data cost time: 888 
realMixCnt: 250250 , mixRate:0.5005
Map collection cost time: 212 
hllcMixCnt: 249358 , mixRatehllc:0.498716
mixRatehllc - mixRate:  -0.001784
hllcMixCnt - realMixCnt:  -892
hllc cost time: 608 
********************begin r :{0.5},testSetLength:{500000} ,m:{18} ***********************
tatolCount: 1000000  ,r : 0.5  setA size: 500000 , setB size: 500000
generate data cost time: 840 
realMixCnt: 249691 , mixRate:0.499382
Map collection cost time: 230 
hllcMixCnt: 249833 , mixRatehllc:0.499666
mixRatehllc - mixRate:  0.000284
hllcMixCnt - realMixCnt:  142
hllc cost time: 714 

Process finished with exit code 0



猜你喜欢

转载自lingzhi007.iteye.com/blog/2400879
今日推荐