면책 조항 :이 문서는 블로거 원본입니다, 추적 에 의해-SA의 CC 4.0 저작권 계약, 복제, 원본 소스 링크이 문을 첨부 해주세요.
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
public class localFiles {
public static void main(String[] args) {
//创建Sparkconf
SparkConf conf = new SparkConf()
.setAppName("localFiles")
//如果在HDFS上运行就删除这个
.setMaster("local");
//创建JavaSparkcontext
JavaSparkContext sc = new JavaSparkContext(conf);
//使用sparkContext以及其子类的textFile()方法,针对本地文件创建RDD
//在HDFS上运行改路径(E://"hdfs://spark:9000/spark.txt")
JavaRDD<String> lines = sc.textFile("E://hadoop//maven.txt");
//统计文本文件的字数
JavaRDD<Integer> lineslength = lines.map(new Function<String, Integer>() {
@Override
public Integer call(String v1) throws Exception {
return v1.length();
}
});
Integer count = lineslength.reduce(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer v1, Integer v2) throws Exception {
return v1 + v2;
}
});
System.out.println("单词的总个数是:" + count);
sc.close();
}
}