import findspark
findspark.init()
from pyspark.sql import SparkSession # SparkConf、SparkContext 和 SQLContext 都已经被封装在 SparkSession
# 可用
spark = SparkSession \
.builder \
.master('local[*]') \
.appName('task') \
.config("spark.sql.shuffle.paratitions", "4")\
.config("spark.sql.warehouse.dir", "hdfs://ip:8020/user/hive/warehouse")\
.config("hive.metastore.uris", "thrift://ip:9083")\
.enableHiveSupport()\
.getOrCreate()
# 不可用
spark = SparkSession.builder\
.master("local[*]") \
.appName("app") \
.config('fs.defaultFS', "hdfs://ip:50070") \
.enableHiveSupport()\
.getOrCreate()
spark.sql("use default")
Problem with connecting local PySpark to remote Hive
Guess you like
Origin blog.csdn.net/qq_44881930/article/details/130013843
Recommended
Ranking