SparkSql read HDFS

{class SparkSqlBathLog public
public static void main (String [] args) {
SparkConf new new SparkConf the conf = ();
conf.setMaster ( "local [2]") setAppName ( "jsonfile");.
// Spark context
SparkContext sc = new SparkContext (conf);
// create sqlContext get SqlContext
SqlContext sqlContext = new new SqlContext (sc);
// load the txt file from HDFS
// RDD <String> stringRDD = sc.textFile ( "HDFS: //192.168.48.11: 9000 / aaa / ssooo.txt ");

DataFrame df = sqlContext.read().json("hdfs://node1:9000/aaa/ssooo.txt ");
df.show(30);
//
// DataFrame dfa = sqlContext.read().load("hdfs://192.168.48.11:9000/aaa/ssooo.txt");
// dfa.show(30);
// sqlContext.sql("use hive");
// df.show(20);
DataFrame df1 = sqlContext.read().format("json").load("./test.txt");
df1.show(30);

// df1.coalesce(1).write().format("String").save("hive 路径");


// DataFrame corrupt_record = df.drop("_corrupt_record");
// corrupt_record.show(20);
// DataFrame table = corrupt_record.select("table");
// table.na().drop("all").show(20);
// JavaRDD<Row> rowJavaRDD = table.toJavaRDD();
// df1.show(20);
// df.show();
sc.stop();
}
}

Guess you like

Origin www.cnblogs.com/Mr--zhao/p/12212746.html