使用spark读取hbase中的数据，使用newAPIHadoopRDD遇到的问题

代码如下：
| import com.dataexa.data.neo4j.hbase.HbaseConf;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
import org.apache.hadoop.hbase.util.Base64;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import scala.Tuple2;

import java.io.IOException;
import java.util.*;

/**

@Author: liuyg
@Date: 19-1-5 下午2:20
*/
public class hbaseTest01 {
public static void main(String[] args) {

// System.setProperty(“spark.serializer”, “org.apache.spark.serializer.KryoSerializer”);
Configuration conf = HBaseConfiguration.create();
conf.set(“hbase.zookeeper.quorum”,“localhost”);
conf.set(“hbase.zookeeper.property.clientPort”,“2181”);
HbaseConf a = new HbaseConf();
String filterOperator = “pass_all”;
List<Map<String,String>> filterParams = new ArrayList<>();
Map<String,String> map = new HashMap<>();
String FILTER = “fileter”;
String COMPAREOP = “compareop”;
String COMPARATOR = “comparator”;
String FAMILY = “family”;
String QUALIFIER = “qualifier”;
String VALUE = “value”;
String VALUE_TYPE = “valueType”;
String QUALIFIERPREFIXS = “qualifierprefixs”;
map.put(FILTER,“multipleColumn”);
map.put(COMPAREOP,“EQUAL”);
map.put(COMPARATOR,“BinaryComparator”);
map.put(FAMILY,“info”);
map.put(QUALIFIER,"");
map.put(VALUE,"");
map.put(VALUE_TYPE,"");
map.put(QUALIFIERPREFIXS,“name;age;addr”);
filterParams.add(map);
//获取过滤器集合
FilterList filterList = a.filterOperator(filterOperator,filterParams);
//创建sparkcontext
SparkConf sparkconf = new SparkConf();
sparkconf.setAppName(“test”).setMaster(“local[2]”);
JavaSparkContext sparkContext = new JavaSparkContext(sparkconf);
//将filter添加到hbaseConf中,并添加表名
Scan scan = new Scan();
scan.setFilter(filterList);
try {
conf.set(TableInputFormat.SCAN, convertScanToString(scan));
} catch (IOException e) {
e.printStackTrace();
}
conf.set(TableInputFormat.INPUT_TABLE,“person”);
//使用newAPIHadoopRDD直接读取hbase
JavaPairRDD<ImmutableBytesWritable, Result> hbaseRDD = sparkContext.newAPIHadoopRDD(conf, TableInputFormat.class, ImmutableBytesWritable.class, Result.class);

// System.out.println(hbaseRDD.toString());
// Map<ImmutableBytesWritable, Result> immutableBytesWritableResultMap = hbaseRDD.collectAsMap();
// Iterator<Map.Entry<ImmutableBytesWritable, Result>> iterator = immutableBytesWritableResultMap.entrySet().iterator();
// while (iterator.hasNext()){
// Map.Entry<ImmutableBytesWritable, Result> next = iterator.next();
// System.out.println(next.getKey()+"-"+next.getValue());
// }
List<Tuple2<ImmutableBytesWritable, Result>> collect = hbaseRDD.collect();
System.out.println("####################################");
for (Tuple2 T : collect){
System.out.println(T._1 + “—” + T._2);
}
collect.spliterator().forEachRemaining(x -> {
System.out.println(x._1+"----"+x._2);
});
}
static String convertScanToString(Scan scan) throws IOException {
ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
String scanToString = Base64.encodeBytes(proto.toByteArray());
return scanToString;
}
}
| 遇到的错误：
Exception in thread “main” org.apache.spark.SparkException: Job aborted due to stage failure: Task 0.0 in stage 0.0 (TID 0) had a not serializable result: org.apache.hadoop.hbase.io.ImmutableBytesWritable
Serialization stack:
- object not serializable (class: org.apache.hadoop.hbase.io.ImmutableBytesWritable, value: 33)
- field (class: scala.Tuple2, name: _1, type: class java.lang.Object)
- object (class scala.Tuple2, (33,keyvalues={1/info:addr/1546667724725/Put/vlen=2/seqid=0, 1/info:age/1546667714480/Put/vlen=2/seqid=0, 1/info:name/1546667704515/Put/vlen=2/seqid=0}))
- element of array (index: 0)
|
|解决办法：
在代码中加入一行：
System.setProperty(“spark.serializer”, “org.apache.spark.serializer.KryoSerializer”);–|

使用spark读取hbase中的数据，使用newAPIHadoopRDD遇到的问题

猜你喜欢