Partition Discovery
paraquet数据源自动分区推断
For example, when path/to/table/gender=male is the path of the data and users set basePath to path/to/table/, gender will be a partitioning column.
关键理解上面这段话
package sql;
import java.util.ArrayList;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
/**
* parquet数据源自动分区推断
*/
public class SparkSqlLocalPartition1 {
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder()
.master("local")
.appName("Java Spark SQL data sources example")
.config("spark.some.config.option", "some-value")
.getOrCreate();
runProgram(spark);
}
private static void runProgram(SparkSession spark) {
// Create a simple DataFrame, store into a partition directory
Dataset<Row> userDF = spark.read().option("basePath","D://learning_bigdata/sparkDemo/data2/users") .parquet("D://learning_bigdata/sparkDemo/data2/users/gender=male/country=US/users.parquet");
userDF.printSchema();
userDF.show();
}
}