Implementation of spark custom partition

Spark provides two partitioners by default, HashPartitioner and RangePartitioner, but sometimes it cannot meet our actual needs. At this time, we can customize a partitioner, which is also very simple. Look at the following demo, and set the key to an even number. Put it in one partition, and put it in another partition if the key is odd.

package spark

import org.apache.spark.{Partitioner, SparkConf, SparkContext, TaskContext}

/**
  * spark自定义分区
  */
object CoustomPartitioner {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("Spark Streaming Jason").setMaster("local")
    val sc = new SparkContext(conf)
    val rdd = sc.parallelize(1 to 10,5)
    rdd.map((_,1)).partitionBy(new MyPartitioner(2)).foreachPartition(fp=>{
      println("分区ID:" + TaskContext.get.partitionId)
      fp.foreach(f=>{
        println(f)
      })
    })
  }
}

class MyPartitioner(num:Int) extends Partitioner {
  override def numPartitions: Int = num

  override def getPartition(key: Any): Int = {
    if(key.toString.toInt % 2 == 0){

Guess you like

Origin blog.csdn.net/xianpanjia4616/article/details/88738091