udaf self define type


  class HllcdistinctByte extends Aggregator[Row, HLLCounter, Array[Byte]] {
    // A zero value for this aggregation. Should satisfy the property that any b + zero = b
    def zero: HLLCounter = new HLLCounter(14)
    // Combine two values to produce a new value. For performance, the function may modify `buffer`
    // and return it instead of constructing a new object
    def reduce(buffer: HLLCounter, employee: Row): HLLCounter = {
      buffer.add(employee.getString(2))
      buffer
    }
    // Merge two intermediate values
    def merge(b1: HLLCounter, b2: HLLCounter): HLLCounter = {
      b1.merge(b2)
      b1
    }
    // Transform the output of the reduction
    def finish(reduction: HLLCounter): Array[Byte] =  {
      val out1 = ByteBuffer.allocate(reduction.maxLength())
      reduction.writeRegisters(out1)
      out1.array()
    }
    // Specifies the Encoder for the intermediate value type
    def bufferEncoder: Encoder[HLLCounter] = Encoders.javaSerialization
    // Specifies the Encoder for the final output value type
    def outputEncoder: Encoder[Array[Byte]] = Encoders.BINARY
  }


      val uvbytes =  new  HllcdistinctByte().toColumn
      val uvb =  wordsDataFrame.where("event_id = '2001'").groupByKey(_.getString(0)).agg(uvbytes)
      uvb.show(5)

猜你喜欢

转载自lingzhi007.iteye.com/blog/2386783
今日推荐