spark学习记录(八、广播变量和累加器)

一、广播变量

public class JavaExample {
    public static void main(String[] args) {
        SparkConf conf = new SparkConf();
        conf.setMaster("local").setAppName("JavaExample");
        JavaSparkContext sc = new JavaSparkContext(conf);

        final List<String> list = Arrays.asList("hello world", "hello spark");
        //广播变量
        final Broadcast<List<String>> broadcast = sc.broadcast(list);
        JavaRDD<String> rdd1 = sc.parallelize(Arrays.asList("hello world", "hello spark", "hello java"));
        JavaRDD<String> rdd2 = rdd1.filter(new Function<String, Boolean>() {
            public Boolean call(String line) throws Exception {
                return !broadcast.value().contains(line);
            }
        });
        rdd2.foreach(new VoidFunction<String>() {
            public void call(String s) throws Exception {
                System.out.println(s);
            }
        });
    }
}

 二、累加器

object ScalaExample {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local").setAppName("ScalaExample")
    val sc = new SparkContext(conf);
    val rdd1 = sc.textFile("C://words.txt")
//    累加器
    val accumulator = sc.accumulator(0);
    rdd1.map(line =>{
      accumulator.add(1)
      println(accumulator)
      line
    }).collect()
    println(" i = " +accumulator.value)
  }
}

猜你喜欢

转载自blog.csdn.net/qq_33283652/article/details/85772540