1.创建java maven项目,添加依赖
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.3.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/mysql/mysql-connector-java -->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.38</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.3.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.47</version>
</dependency>
2.写数据
SparkConf conf = new SparkConf().setAppName("HelloWorld").setMaster("local");
JavaSparkContext sc = new JavaSparkContext(conf);
SQLContext sqlContext = new SQLContext(sc);
//写入的数据内容
JavaRDD<String> personData = sc.parallelize(Arrays.asList("java chinese 5", "c++ chinese 6"));
//数据库内容
String url = "jdbc:mysql://localhost:3306/demo";
Properties connectionProperties = new Properties();
connectionProperties.put("user", "root");
connectionProperties.put("password", "123456");
connectionProperties.put("driver", "com.mysql.jdbc.Driver");
/**
* 第一步:在RDD的基础上创建类型为Row的RDD
*/
//将RDD变成以Row为类型的RDD。Row可以简单理解为Table的一行数据
JavaRDD<Row> personsRDD = personData.map(new Function<String, Row>() {
@Override
public Row call(String line) throws Exception {
String[] splited = line.split(" ");
return RowFactory.create(splited[0], splited[1], Integer.valueOf(splited[2]));
}
});
/**
* 第二步:动态构造DataFrame的元数据。
*/
List structFields = new ArrayList();
structFields.add(DataTypes.createStructField("search_word", DataTypes.StringType, true));
structFields.add(DataTypes.createStructField("lang", DataTypes.StringType, true));
structFields.add(DataTypes.createStructField("hot_index", DataTypes.IntegerType, true));
//构建StructType,用于最后DataFrame元数据的描述
StructType structType = DataTypes.createStructType(structFields);
/**
* 第三步:基于已有的元数据以及RDD<Row>来构造DataFrame
*/
Dataset personsDF = sqlContext.createDataFrame(personsRDD, structType);
/**
* 第四步:将数据写入到person表中
*/
personsDF.write().mode("append").jdbc(url, "person", connectionProperties);
sc.close();
3.读取数据
private static void getTagByDay(SQLContext sqlContext) {
String url = "jdbc:mysql://192.168.1.87:3306/tnoat_news_bz";
//查找的表名
String table = "news";
//增加数据库的用户名(user)密码(password),指定test数据库的驱动(driver)
Properties connectionProperties = new Properties();
connectionProperties.put("user", "dbuser");
connectionProperties.put("password", "asdQWE!@#");
connectionProperties.put("driver", "com.mysql.jdbc.Driver");
//SparkJdbc读取Postgresql的products表内容
System.out.println("读取test数据库中的user_test表内容");
// 读取表中所有数据
sqlContext.read().jdbc(url, table, connectionProperties).createOrReplaceTempView("news");
Dataset jd = sqlContext.sql("SELECT * FROM news ");
//显示数据
jd.show();
}
第二种
DbConnection dbConnection = new DbConnection(MYSQL_DRIVER, MYSQL_CONNECTION_URL, MYSQL_USERNAME, MYSQL_PWD);
//1.数据的输入
SparkConf conf = new SparkConf().setAppName("SearchWordRecommend").setMaster("local").setSparkHome("/usr/lib/spark/spark-2.1.1-bin-hadoop2.7");
JavaSparkContext sparkContext = new JavaSparkContext(conf);
SQLContext sqlContext = new SQLContext(sparkContext);
// Load data from MySQL
JdbcRDD<JSONObject> jdbcRDD = new JdbcRDD<>(sparkContext.sc(), dbConnection, "select * from news where 0 >= ? and 0 <= ? ", 0,
10, 2, new MapResult(), ClassManifestFactory$.MODULE$.fromClass(JSONObject.class));
// Convert to JavaRDD
JavaRDD<JSONObject> javaRDD = JavaRDD.fromRDD(jdbcRDD, ClassManifestFactory$.MODULE$.fromClass(JSONObject.class));
javaRDD.foreach(new VoidFunction<JSONObject>() {
@Override
public void call(JSONObject jsonObject) throws Exception {
System.out.println(jsonObject);
}
});
static class DbConnection extends AbstractFunction0<Connection> implements Serializable {
private String driverClassName;
private String connectionUrl;
private String userName;
private String password;
public DbConnection(String driverClassName, String connectionUrl, String userName, String password) {
this.driverClassName = driverClassName;
this.connectionUrl = connectionUrl;
this.userName = userName;
this.password = password;
}
@Override
public Connection apply() {
try {
Class.forName(driverClassName);
} catch (ClassNotFoundException e) {
//LOGGER.error("Failed to load driver class", e);
}
Properties properties = new Properties();
properties.setProperty("user", userName);
properties.setProperty("password", password);
Connection connection = null;
try {
connection = DriverManager.getConnection(connectionUrl, properties);
} catch (SQLException e) {
// LOGGER.error("Connection failed", e);
}
return connection;
}
}
static class MapResult extends AbstractFunction1<ResultSet, JSONObject> implements Serializable {
@Override
public JSONObject apply(ResultSet resultSet) {
ResultSetMetaData metaData = null;
JSONObject jsonObj = new JSONObject();
try {
metaData = resultSet.getMetaData();
int columnCount = metaData.getColumnCount();
// 遍历每一列
for (int i = 1; i <= columnCount; i++) {
String columnName = metaData.getColumnLabel(i);
String value = resultSet.getString(columnName);
jsonObj.put(columnName, value);
}
} catch (SQLException e) {
e.printStackTrace();
}
return jsonObj;
}
}
}