Error when reading the local file, file not found: The executor may not be on the current node, and the other nodes do not have this file
scala> sc.textFile("hdfs://node01:8020/sparkwordcount/wordcount.txt").flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_)collect
warning: there was one feature warning; re-run with -feature for details
res4: Array[(String, Int)] = Array((hello,3), (me,1), (you,1), (her,1))
3.maven project operation: there is a difference between cluster mode and local mode
scala project
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import scala.collection.mutable
object scala_spark {
def main(args: Array[String]): Unit = {
//todo 本地模式
//val sparkConf = new SparkConf().setAppName("scalaCount".setMaster("local[2]"))
//todo 打包到打集群运行
val sparkConf = new SparkConf().setAppName("scalaCount")
//todo 获取sc
val context = new SparkContext(sparkConf)
//todo 设置打印日志级别
context.setLogLevel("WARN")
//todo 获取文件
//val file: RDD[String] = context.textFile("file:///C:\\Users\\Administrator\\Documents\\tt\\wordcount.txt")
//todo 打包集群运行
val file: RDD[String] = context.textFile(args(0))
//todo 进行操作
val fileWord = file.flatMap(_.split(" ")).map((_, 1)).reduceByKey((_ + _))
//todo 降序
val sortBy: RDD[(String, Int)] = fileWord.sortBy(_._2,false)
//todo 将RDD模式转出Array
val array: Array[(String, Int)] = sortBy.collect()
val result: mutable.Buffer[(String, Int)] = array.toBuffer
println(result)
//todo 保存为文件(路径)
sortBy.saveAsTextFile(args(1))
context.stop()
}
}