Scala WordCount / Java WordCount

Java WordCount

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.*;
import scala.Tuple2;

import java.util.Arrays;
import java.util.Iterator;

public class JavaWordCount {
    public static void main(String[] args) {
        SparkConf conf = new SparkConf();
        conf.setMaster("local[4]").setAppName("test");
        JavaSparkContext sc =new JavaSparkContext(conf);

        JavaRDD<String> text = sc.textFile("./score");

        JavaRDD<String> words = text.flatMap(new FlatMapFunction<String, String>() {

            @Override
            public Iterator<String> call(String s) throws Exception {
                return Arrays.asList(s.split(",")).iterator();
            }
        });

        JavaPairRDD<String, Integer> pairRDD = words.mapToPair(new PairFunction<String, String, Integer>() {
            @Override
            public Tuple2<String, Integer> call(String s) throws Exception {
                return new Tuple2<>(s, 1);
            }
        });

        JavaPairRDD<String, Integer> reduceRDD = pairRDD.reduceByKey(new Function2<Integer, Integer, Integer>() {
            @Override
            public Integer call(Integer v1, Integer v2) throws Exception {
                return v1 + v2;
            }
        });


        reduceRDD.sortByKey().foreach(new VoidFunction<Tuple2<String, Integer>>() {
            @Override
            public void call(Tuple2<String, Integer> stringIntegerTuple2) throws Exception {
                System.out.println(stringIntegerTuple2._1 + " appears " + stringIntegerTuple2._2 + " times.");
            }
        });

        sc.stop();
    }
}


Scala WordCount


import org.apache.spark.{SparkConf, SparkContext}

object ScalaWC {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
    conf.setMaster("local[4]").setAppName("wc")
    val sc = new SparkContext(conf)

    sc.textFile("./score").flatMap(_.split(","))
                          .map(new Tuple2(_,1))
                          .reduceByKey(_+_).sortByKey()
                          .foreach(x => {
                                    println(s"${x._1} appears ${x._2} times")
                                  })

    sc.stop()
  }
}

运行结果:
300 appears 2 times
1 appears 2 times
100 appears 2 times
liming appears 2 times
2 appears 2 times
lisi appears 2 times
200 appears 2 times
zhangsan appears 2 times
3 appears 2 times

猜你喜欢

转载自blog.csdn.net/yswhfd/article/details/85088496
今日推荐