object JdbcDatasourceTest { def main(args: Array[String]): Unit = { val spark = SparkSession .builder() .appName("JdbcDatasourceTest") .master("local") .getOrCreate() //url: // jdbc:mysql://master:3306/test // jdbc:oracle://master:3306/test // jdbc:db2://master:3306/test // jdbc:derby://master:3306/test // jdbc:sqlserver://master:3306/test // jdbc:postgresql://master:3306/test val mysqlUrl = "jdbc:mysql://master:3306/test" //1: 读取csv文件数据 val optsMap = Map("header" -> "true", "inferSchema" -> "true") DF = spark.read.options Val (optsMap) .csv (S "$ {} /jdbc_demo_data.csv BASE_PATH") // ENGINE = InnoDB engine using innodb dEFAULT CHARSET = utf8 database default encoding is utf8 AUTO_INCREMENT = 1 starting sequence number is an increment key df.show() the Properties the Properties = new new Val () properties.put ( "the User", "root") properties.put ( "password", "root") // write data to the Mysql database df.write.mode (SaveMode.Overwrite) .jdbc (mysqlUrl, "the Person", the Properties) // read data from mysql database Val jdbcDFWithNoneOption = spark.read.jdbc (mysqlUrl, "the Person", the Properties) jdbcDFWithNoneOption.show () // write data process: // 1: built table // first time to write, you need to create a table, construction of the table statement like the following: // the cREATE tABLE t (String name) ENGINE = InnoDB the DEFAULT CHARSET = utf8 AUTO_INCREMENT = 1 //.InnoDB, is one of MySQL database engine, release one of the criteria for binary AB MySQL // attribute configuration ENGINE = InnoDB DEFAULT CHARSET = utf8 AUTO_INCREMENT = 1 parameter can be passed through the spark createTableOptions var writeOpts = the Map [String, String] ( "createTableOptions" -> "ENGINE = InnoDB the DEFAULT CHARSET = utf8 AUTO_INCREMENT = 1") df.write .mode (SaveMode.Overwrite) .options (writeOpts) .jdbc (mysqlUrl, "Person", Properties) // 2: set table Schema Schema // table is generally consistent and DataFrame, from the type field of spark dataType translation into all the sql database corresponding data type // If the field type in the database is not what you want, // you can set createTableColumnTypes = age long parameter createTableColumnTypes, String name writeOpts = the Map [String, String] ( "createTableColumnTypes" -> "the above mentioned id Long, Long Age") df.write.mode (SaveMode.Overwrite) .options (writeOpts) .jdbc (mysqlUrl, "the Person", the Properties) // 3:Transaction isolation level is set by the parameter setting isolationLevel // NONE does not support things 1 the Properties) // 5: The second time write data, this time table has been in existence, it is necessary to distinguish between SaveMode // When SaveMode = Overwrite, you need to clean up the table, and then write the data. Clean up table method was divided into two: // The first is to truncate table that is empty, if this is the case, then empty the table first, and then write data // READ_COMMITTED not appear dirty reads, but still there will be non-repeatable read and phantom read // The second is drop off the table, if this is the case, then the first drop table, and then build the table, and finally write data to select two ways // above, parameters can be truncate (default is false) control. Because truncate data may fail to clear, so you can use drop table way // and not all databases support truncate table, which PostgresDialect not support // When SaveMode = Append, the data is written directly on the line // When SaveMode = ErrorIfExists when the direct throw an exception // when SaveMode = Ignore, then do not do anything directly writeOpts = the Map [String, String] ( "TRUNCATE" -> "false") df.write.mode (SaveMode. Overwrite) .options (writeOpts) .jdbc ( mysqlUrl, "person", // If upperBound - lowerBound> = numPartitions, then we take numPartitions partitions, // otherwise we take upperBound - lowerBound number of partitions // 8--3 = 5> 3 we take three partitions // where id <3 the 1 + 1 by 8/3 - 3/3 = 1 come // WHERE ID> = + 1 and ID. 3 <. 3 + 1 + 1 // WHERE ID> = + 1 + 1. 3 // configuration embodiment Val readOpts = the Map [String, String] ( "numPartitions" -> ". 3", "partitionColumn" -> "ID", "lowerBound" -> ". 3", "upperBound" -> ". 8", "FetchSize" -> "100") Val jdbcDF = spark.read.options (readOpts) .jdbc (mysqlUrl, "Person", Properties) jdbcDF.rdd.partitions.size jdbcDF.rdd.glom ().the collect () jdbcDF.show () // API way Show () // read each time, the batch manner can be used to read data, batch number may be set by the parameter fetchsize. Default: 0, indicating the jdbc driver to estimate the size of the batch // either read or write, there is the concept of partition numbers, // read time is set by the user numPartitions parameter settings, // and write partition DataFrame number is the number of partitions // point to note is that whether it is read or write, each partition will open a jdbc connection, so the partition should not be too much, otherwise it will destroy the database to // write, can DataFrame coalease interfaces to reduce the number of partition } } spark.stop ()