package com.bjsxt.spark;
import java.util.Arrays;
import java.util.List;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;
public class SparkDay02 {
public static void main(String[] args) {
SparkConf conf=new SparkConf().setAppName("test").setMaster("local");
JavaSparkContext sc=new JavaSparkContext(conf);
List list = Arrays.asList("a","a","b","b","c","d","d","e");//在这里,一般是用Arrays.asList(),前边用List
// JavaRDD<String> rdd1=sc.parallelize(list);//在这里,rdd1.paralleaize,前边是RDD的格式
JavaRDD<String> rdd1 = sc.parallelize(list);
/**
* 第二种方式
* 利用Wordcount
*
* 利用maptopair和reducebykey的方式
* 首先是利用maptopair
* 其次是用reducebykey
* 再次是map操作
* 最后打印输出
*/
rdd1.mapToPair(new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(String arg0) throws Exception {
// TODO Auto-generated method stub
return new Tuple2<String, Integer>(arg0, 1);
}
}).reduceByKey(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer v1, Integer v2) throws Exception {
// TODO Auto-generated method stub
return v1+v2;
}
}).map(new Function<Tuple2<String,Integer>, String>() {//map的方式:只取出来一个key,map是来一个tuple,出一个String
@Override
public String call(Tuple2<String, Integer> arg0) throws Exception {
// TODO Auto-generated method stub
return arg0._1;
}
}).foreach(new VoidFunction<String>() {
@Override
public void call(String arg0) throws Exception {
System.out.println(arg0);
}
});
/**
* 第一种方式
* 利用distinct进行去重操作
* 直接.foreache(new VoidFuncton)打印输出
* 中间就一次distinct操作
*/
JavaRDD<String> rd = rdd1.distinct();
rd.foreach(new VoidFunction<String>() {
@Override
public void call(String arg0) throws Exception {
System.out.println(arg0);
}
});
}
}