pyspark sql mysql连表查询

1.安装mysql驱动

https://dev.mysql.com/downloads/connector/j/
在这里插入图片描述

cp mysql-connector-java-8.0.27.jar $SPARK_HOME/jars

2.pyspark读取mysql

# -*- coding: utf-8 -*-
# @Time    : 2021/10/29 20:23
# @Author  :

from pyspark import SparkConf
from pyspark.sql import SparkSession


MASTER = "spark://127.0.0.1:17077"
APPName = "spark_db"
sc_conf = SparkConf()

# 设置spark对应加载的jar包
sc_conf.set("spark.jars", "/opt/spark/jars/mysql-connector-java-8.0.27.jar")
spark = SparkSession.builder.config(conf=sc_conf).appName(APPName).master(MASTER).getOrCreate()

# spark连接mysql配置
spark_mysql_driver = spark.read.format("jdbc")\
    .option("url", "jdbc:mysql://10.8.112.28:3306/campus")\
    .option("driver","com.mysql.jdbc.Driver")\
    .option("user", "user")\
    .option("password", "password")


def create_tempView(driver, table_name):
    """
    创建spark sql临时表
    :param driver:
    :param table_name:
    :return:
    """
    return driver.option("dbtable", table_name).load().createOrReplaceTempView(table_name)


# 创建临时表
Orders = create_tempView(spark_mysql_driver, "Orders")
Persons = create_tempView(spark_mysql_driver, "Persons")
mysql_sql = "SELECT p.Id_P,p.LastName,p.FirstName,p.Address,p.City,o.Id_O,o.OrderNo FROM `Persons` as p, `Orders` as o where p.Id_P=o.Id_P;"
mysqlDF = spark.sql(mysql_sql)
# mysqlDF.show()

在这里插入图片描述

3.pyspark写入mysql

"""
写入模式(mode)从源码看有四种模式:
``overwrite``是覆盖;
``append``是增加;
``ignore``是如果数据已存在,则静默忽略此操作;
"""
# 数据写入mysql另外一张表
new_table_name = "Persons_Orders"
mysqlDF.write.mode('overwrite').format("jdbc").option("url", "jdbc:mysql://10.8.112.28:3306/campus").\
    option("driver","com.mysql.jdbc.Driver").\
    option("user", "user").\
    option("password", "password").\
    option("dbtable",new_table_name).option("batchsize", 10000).save()

参考:https://blog.csdn.net/pycrossover/article/details/102792855
参考:https://towardsdatascience.com/pyspark-mysql-tutorial-fa3f7c26dc7

参考:https://spark.apache.org/docs/latest/monitoring.html
参考:https://cloud.tencent.com/developer/article/1768501

猜你喜欢

转载自blog.csdn.net/Cocktail_py/article/details/121093790