hadoop大数据文件压缩Gzip代码实现

文件压缩有两大好处：减少存储文件所需要的磁盘空间，并加速数据在网络和磁盘上的传输。

不同的压缩工具有不同的压缩特性。

gzip是一个通用的压缩工具，在空间/时间性能的权衡中，居于其他两个压缩方法之间。

bzip2的压缩能力强于gzip,但压缩速度更慢一些。

LZO、LZ4和Snappy均优化压缩速度，其速度比 gzip快一个数量级，但压缩效率稍逊一筹。Snappy和LZ4的解压缩速度比LZO高出很多。

测试前准备：

在d:/codec目录下放一个名为hadoop.pdf的文件，然后就可以粘贴以下代码块进行测试

package com.jr.compress;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.Decompressor;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.util.ReflectionUtils;
import org.junit.Test;


public class testCompress {
	@Test
	public void Compress() throws FileNotFoundException, IOException{
		testCompress();
		testUnGzip();
	}
		/**
		 * 测试压缩
		 * @throws IOException 
		 * @throws FileNotFoundException 
		 */
		public void testCompress() throws FileNotFoundException, IOException {
			Class clazz=GzipCodec.class;
			Configuration conf=new Configuration();
			CompressionCodec codec=ReflectionUtils.newInstance(clazz, conf);
			//扩展名
			String ext=codec.getDefaultExtension();
			long start=System.currentTimeMillis();
			//得到压缩输出流
			OutputStream out=codec.createOutputStream(new FileOutputStream("d:/codec/hadoop"+ext));
			IOUtils.copyBytes(new FileInputStream("d:/codec/hadoop.pdf"), out, 1024);
			out.close();
			System.out.println(ext+" decompress time : "+(System.currentTimeMillis()-start));
		}
		
		
		/**
		 * 测试解压缩
		 * @throws FileNotFoundException
		 * @throws IOException
		 */
		@Test
		public void testUnGzip() throws FileNotFoundException, IOException {
		Configuration conf= new Configuration();
		Class clazz =GzipCodec.class;
		//创建gzipo codec实例
		CompressionCodec codec=ReflectionUtils.newInstance(clazz, conf);
		//扩展名
		String ext=codec.getDefaultExtension();
		long start=System.currentTimeMillis();
		//解压器
		Decompressor dcor=codec.createDecompressor();
		//得到压缩输出流
		InputStream in=codec.createInputStream(new FileInputStream("d:/codec/hadoop"+ext),dcor);
		IOUtils.copyBytes(in, new FileOutputStream("d:/codec/hadoop_ext"+ext+".pdf"), 1024);
		in.close();
		System.out.println(System.currentTimeMillis()-start);
		}
		
	}

hadoop大数据文件压缩Gzip代码实现

猜你喜欢