【原创】大叔经验分享(35)lzo格式支持

建表语句

CREATE EXTERNAL TABLE `my_lzo_table`(`something` string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
STORED AS INPUTFORMAT
'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'


1 lzo

# yum install lzo lzop

手工安装:http://www.oberhumer.com/opensource/lzo/download/lzo-2.10.tar.gz

2 hadoop-lzo

# wget https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/hadoop-gpl-packing/hadoop-gpl-packaging-0.6.1-1.x86_64.rpm
# rpm -ivh hadoop-gpl-packaging-0.6.1-1.x86_64.rpm

# ls /opt/hadoopgpl/lib
cdh4.0.1 guava-12.0.jar hadoop-lzo-0.4.17.jar hadoop-lzo.jar pig-0.10.0 pig-0.6.0 pig-0.7.0 pig-0.8.0 protobuf-java-2.4.1.jar slf4j-api-1.5.8.jar slf4j-log4j12-1.5.10.jar yamlbeans-0.9.3.jar
# ls /opt/hadoopgpl/native/Linux-amd64-64/
libgplcompression.a libgplcompression.la libgplcompression.so libgplcompression.so.0 libgplcompression.so.0.0.0 LzoCompressor.lo LzoCompressor.o LzoDecompressor.lo LzoDecompressor.o

手工安装:https://github.com/twitter/hadoop-lzo/

3 报错

1)报错:IOException: No LZO codec found, cannot run.

core-site.xml

    <property>

        <name>io.compression.codecs</name>

        <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.SnappyCodec</value>

    </property>

    <property>

        <name>io.compression.codec.lzo.class</name>

        <value>com.hadoop.compression.lzo.LzoCodec</value>

    </property>

2)报错:Error: java.io.IOException: cannot find class com.hadoop.mapred.DeprecatedLzoTextInputFormat

hive

export HADOOP_CLASSPATH=/opt/hadoopgpl/lib/hadoop-lzo.jar

spark

export SPARK_CLASSPATH=/opt/hadoopgpl/lib/hadoop-lzo.jar

3)报错:IOException:java.lang.RuntimeException: native-lzo library not available

hive

export JAVA_LIBRARY_PATH=/opt/hadoopgpl/native/Linux-amd64-64/

spark

export LD_LIBRARY_PATH=/opt/hadoopgpl/native/Linux-amd64-64/

4)mr报错:Error: java.io.IOException: cannot find class com.hadoop.mapred.DeprecatedLzoTextInputFormat
at org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getRecordReader(CombineHiveInputFormat.java:689)
at org.apache.hadoop.mapred.MapTask$TrackedRecordReader.<init>(MapTask.java:169)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:429)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)

$ cp /opt/hadoopgpl/lib/hadoop-lzo.jar $HADOOP_HOME/share/hadoop/common/lib/

5)mr报错:Caused by: java.lang.RuntimeException: native-lzo library not available

mapred-site.xml

    <property>

        <name>mapred.child.java.opts</name>

        <value>-Djava.library.path=/opt/hadoopgpl/native/Linux-amd64-64</value>

    </property>

猜你喜欢

转载自www.cnblogs.com/barneywill/p/10439181.html