Spark mapPartitions和map

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/u012848709/article/details/85463752

楔子

Spark 两个API

map

public static void mapMethod() {
	JavaRDD<String> rdd = sc.parallelize(Arrays.asList("hello", "spark", "hadoop", "李丰", "帅", "弓", "瑞", "杰", "青"), 3);

	JavaRDD<String> map = rdd.map(new Function<String, String>() {

		private static final long serialVersionUID = 1L;

		@Override
		public String call(String v1) throws Exception {
			System.out.println("创建数据库连接……");
			System.out.println("insert数据……         " + v1);
			System.out.println("close数据库连接……");
			return v1;
		}
	});
	map.collect();
}

在这里插入图片描述

mapPartitions

/**
 * mapPartitions 有几个分区创建几个连接 ,相比 map 性能高一点
 */
public static void mapPartitionsMethod() {
	JavaRDD<String> rdd = sc.parallelize(Arrays.asList("hello", "spark", "hadoop", "李丰", "帅", "弓", "瑞", "杰", "青"), 3);
	JavaRDD<String> mapPartitions = rdd.mapPartitions(new FlatMapFunction<Iterator<String>, String>() {
		private static final long serialVersionUID = 1L;

		@Override
		public Iterator<String> call(Iterator<String> t) throws Exception {
			ArrayList<String> list = new ArrayList<String>();
			System.out.println("创建数据库连接……");
			while (t.hasNext()) {
				String next = t.next();
				list.add(next);
				System.out.println("insert数据……         " + next);
			}
			System.out.println("close数据库连接……");
			return list.iterator();
		}
	});
	mapPartitions.collect();

}

在这里插入图片描述

重新分区

repartition=coalesce(numPartitions, false)

/**
 * 重新分区 repartition=coalesce(numPartitions, false)
 */
public static void repatition() {
	int size = rdd.partitions().size();
	System.out.println(size);
	JavaRDD<String> coalesce = rdd.coalesce(2, false);// false 不产生shuffle

	// repartition 是有shuffle的算子,可以多RDD重新分区,可以增加或者减少分区
	rdd.repartition(2);
	System.out.println(coalesce.collect());
}

广播变量

/**
 * 广播变量
 */
private static void accumulator() {
	Accumulator<Integer> intAccumulator = sc.intAccumulator(0);
	JavaRDD<String> map = rdd.map(new Function<String, String>() {
		private static final long serialVersionUID = 1L;

		@Override
		public String call(String v1) throws Exception {
			intAccumulator.add(1);
			return v1;
		}
	});
	map.collect();
	System.out.println("单词个数是:" + intAccumulator);
}

猜你喜欢

转载自blog.csdn.net/u012848709/article/details/85463752
今日推荐