输出操作,结果保存到本地和输出到MySQL

saveAsTextFiles("")
结果写入到MySQL
service mysql start
mysql -u hadoop -p
use spark
create table wordcount(word char(20),count int(4));

#得安装python连接MySQL的模块PyMySQL
sudo apt-get update
sudo apt-get install python3-pip
pip3 -V
sudo pip3 install PyMySQL

东西和上篇博客一样就是加个保存的
#!/usr/bin/env python3
from__future__import print_function
import sys
from pyspark import SparkContext
from pyspark.streaming import StreamingContext
if__name__==“main”:
if len(sys.argv)!=3:
print(“Usage:NetworkWordCountStateful.py<hostname.,port>”,file=sys.stderror)
exit(-1)
sc = SparkContext(appName = “PythonStreamingStatefulNetworkWordCount”)
ssc = StreamingContext(sc,1)
ssc.checkpoint(“file:///usr/local/spark/mycode/streaming/stateful/”)
initialRDD = sc.parallelize([(u’hello’,1),(u’world’,1)])#初始化RDD,u是unicode编码
#nc端发过来(hadoop,1)(hadoop,1)(spark,1)(spark,1)到这个updateStateByKey,先变成(hadoop,(1,1))(spark,(1,1)),然后先处理第一个把(1,1)传到这个updateFunc,把last_sum也传进来
def updateFunc(new_values,last_sum):
return sum(new_values)+(last_sum or 0)

def func(rdd):
repartitionedRDD=rdd.repartition(3)#不用打开很多数据库连接
repartitionedRDD.foreachPartition(dbfunc)

def dbfunc(records):
db = pymysql.connect(“localhost”,“hadoop”,“hadoop”,“spark”)
cursor =db.cursor()#指针
def doinsert§:
sql = “insert into wordcount(word,count) values(’%s’,’%s’)”%(str(p[0]),str(p[1]))
try:
cursor.execute(sql)
db.commit()
except:
db.rollback()
for item in records:
doinsert(item)

lines=ssc.socketTextStream(sys.argv[1],int(sys.argv[2]))#socketTextStream(“localhost”, 9999)
running_counts=lines.flatMap(lambda line:line.split(" ")).map(lambda x:(x,1)).updateStateByKey(updateFunc,initialRDD=initialRDD)
running_counts.pprint()
running_counts.foreachRDD(func)

ssc.start()
ssc.awaitTermination()

发布了25 篇原创文章 · 获赞 0 · 访问量 367

猜你喜欢

转载自blog.csdn.net/qq_45371603/article/details/104661665
今日推荐