SPARK算子实例JAVA实现(take,CountByKey,SaveAsTextFile)

package day06;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import scala.Tuple2;
import java.util.*;
//取前几个元素
public class myTake {
    public static void myTake(JavaSparkContext jsc){
        JavaRDD<String> RDD1 = jsc.parallelize(Arrays.asList("aa", "aa", "bb", "cc", "dd"));
        List<String> take = RDD1.take(3);
        System.out.println(take);
    }
//整合相同的key 取出对应的值
    public static void myCountByKey(JavaSparkContext jsc){
        List<Tuple2<String,String>> tuple2s = Arrays.asList(new Tuple2<String,String>("class2", "liao"),
                new Tuple2<String,String>("class2", "ao"), new Tuple2<String,String>("class2", "li"),
                new Tuple2<String,String>("class1", "lao"));
        JavaPairRDD javaPairRDD = jsc.parallelizePairs(tuple2s);
        Map<String,Long> map = javaPairRDD.countByKey();


        for (Map.Entry<String, Long> entry:map.entrySet()){
             System.out.println("k="+entry.getKey()+"v="+entry.getValue());
         }

    }
 //保存文件至hdfs上
    public static void mySaveASTextFile(JavaSparkContext jsc){
        List<String> list =
                Arrays.asList("Hello World", "Hello scala", "Hello Java");
        JavaRDD<String> parallelizeRDD = jsc.parallelize(list);
        JavaRDD<String> flatMapRDD = parallelizeRDD.flatMap(
                new FlatMapFunction<String, String>() {
                    public Iterator<String> call(String v1) throws Exception {
                        return Arrays.asList(v1.split(" ")).iterator();
                    }
                });
        flatMapRDD.saveAsTextFile("hdfs://hadoop-1707-001:9000/save/test001");
        System.out.println("保存成功");
    }
    public static void main(String[] args){
        SparkConf conf = new SparkConf().
                setMaster("local").setAppName("MyAction_Java ");
        JavaSparkContext jsc = new JavaSparkContext(conf);
      //  mySaveASTextFile(jsc);
        myCountByKey(jsc);
        jsc.stop();
    }
}

猜你喜欢

转载自blog.csdn.net/wjn19921104/article/details/80230250