GZip是常用的无损压缩算法实现,在Linux中较为常见,像我们在Linux安装软件时,基本都是.tar.gz格式。.tar.gz格式文件需要先对目录内文件进行tar压缩,然后使用GZip进行压缩。
本文针对磁盘上和内存中两种方式进行压缩和解压演示,演示只针对一层目录结构进行,多层目录只需递归操作进行即可。
· Maven依赖
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.19</version>
</dependency>
· 磁盘压缩和解压
无特殊情况下,操作都是在磁盘上进行,将所有文件存放在某一目录中,然后对目录进行压缩,工具类代码如下:
package com.arhorchin.securitit.compress.gzip;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
public class GZipDiskUtil {
/**
* UTF-8字符集.
*/
public static String UTF8_CHARSET = "UTF-8";
/**
* tar打包压缩.
* @param srcDirPath 原路径.
* @param tarFilePath 目标路径.
* @return 压缩结果.
* @throws Exception .
*/
public static boolean tarCompress(String srcDirPath, String tarFilePath) throws Exception {
FileOutputStream fos = null;
TarArchiveOutputStream taos = null;
File filePathFile = null;
try {
fos = new FileOutputStream(new File(tarFilePath));
taos = new TarArchiveOutputStream(fos);
filePathFile = new File(srcDirPath);
for (File file : filePathFile.listFiles()) {
BufferedInputStream bis = null;
FileInputStream fis = null;
try {
TarArchiveEntry tae = new TarArchiveEntry(file);
tae.setName(new String(file.getName().getBytes(UTF8_CHARSET), UTF8_CHARSET));
taos.putArchiveEntry(tae);
fis = new FileInputStream(file);
bis = new BufferedInputStream(fis);
int count;
byte data[] = new byte[1024];
while ((count = bis.read(data, 0, 1024)) != -1) {
taos.write(data, 0, count);
}
} finally {
taos.closeArchiveEntry();
if (bis != null)
bis.close();
if (fis != null)
fis.close();
}
}
} finally {
if (taos != null)
taos.close();
if (fos != null)
fos.close();
}
return true;
}
/**
* tar拆包解压.
* @param srcFilePath 原路径.
* @param targDirPath 目标路径.
* @return 解压结果.
* @throws Exception .
*/
public static boolean tarDecompress(String srcFilePath, String targDirPath) throws Exception {
TarArchiveInputStream tais = null;
FileInputStream fis = null;
try {
File file = new File(srcFilePath);
fis = new FileInputStream(file);
tais = new TarArchiveInputStream(fis);
TarArchiveEntry tae = null;
while ((tae = tais.getNextTarEntry()) != null) {
BufferedOutputStream bos = null;
FileOutputStream fos = null;
try {
String dir = targDirPath + File.separator + tae.getName();
File dirFile = new File(dir);
fos = new FileOutputStream(dirFile);
bos = new BufferedOutputStream(fos);
int count;
byte data[] = new byte[1024];
while ((count = tais.read(data, 0, 1024)) != -1) {
bos.write(data, 0, count);
}
} finally {
if (bos != null)
bos.close();
if (fos != null)
fos.close();
}
}
} finally {
if (tais != null)
tais.close();
if (fis != null)
fis.close();
}
return true;
}
/**
* gzip压缩.
* @param srcFilePath 原路径.
* @param tarFilePath 目标路径.
* @return 压缩结果.
* @throws IOException .
*/
public static boolean gzipCompress(String srcFilePath, String tarFilePath) throws IOException {
InputStream fin = null;
BufferedInputStream bis = null;
FileOutputStream fos = null;
BufferedOutputStream bos = null;
GzipCompressorOutputStream gcos = null;
try {
fin = Files.newInputStream(Paths.get(srcFilePath));
bis = new BufferedInputStream(fin);
fos = new FileOutputStream(tarFilePath);
bos = new BufferedOutputStream(fos);
gcos = new GzipCompressorOutputStream(bos);
byte[] buffer = new byte[1024];
int read = -1;
while ((read = bis.read(buffer)) != -1) {
gcos.write(buffer, 0, read);
}
} finally {
if (gcos != null)
gcos.close();
if (bos != null)
bos.close();
if (fos != null)
fos.close();
if (bis != null)
bis.close();
if (fin != null)
fin.close();
}
return true;
}
/**
* gzip解压.
* @param srcFilePath 原路径.
* @param tarFilePath 目标路径.
* @return 解压结果.
* @throws IOException .
*/
public static boolean gzipDecompress(String srcFilePath, String tarFilePath) throws IOException {
InputStream fin = null;
BufferedInputStream in = null;
OutputStream out = null;
GzipCompressorInputStream gcis = null;
try {
out = Files.newOutputStream(Paths.get(tarFilePath));
fin = Files.newInputStream(Paths.get(srcFilePath));
in = new BufferedInputStream(fin);
gcis = new GzipCompressorInputStream(in);
final byte[] buffer = new byte[1024];
int n = 0;
while (-1 != (n = gcis.read(buffer))) {
out.write(buffer, 0, n);
}
} finally {
if (gcis != null)
gcis.close();
if (in != null)
in.close();
if (fin != null)
fin.close();
if (out != null)
out.close();
}
return true;
}
}
测试代码如下:
package com.arhorchin.securitit.com.compress;
import com.arhorchin.securitit.compress.gzip.GZipDiskUtil;
public class GZipDiskUtilTester {
public static void main(String[] args) throws Exception {
GZipDiskUtil.tarCompress("C:/Users/Administrator/Downloads/个人文件/2020-07-13/files", "C:/Users/Administrator/Downloads/个人文件/2020-07-13/disk.tar");
GZipDiskUtil.gzipCompress("C:/Users/Administrator/Downloads/个人文件/2020-07-13/disk.tar", "C:/Users/Administrator/Downloads/个人文件/2020-07-13/disk.tar.gz");
GZipDiskUtil.gzipDecompress("C:/Users/Administrator/Downloads/个人文件/2020-07-13/disk.tar.gz", "C:/Users/Administrator/Downloads/个人文件/2020-07-13/disk-untar.tar");
GZipDiskUtil.tarDecompress("C:/Users/Administrator/Downloads/个人文件/2020-07-13/disk-untar.tar", "C:/Users/Administrator/Downloads/个人文件/2020-07-13/disk-untar");
}
}
· 内存压缩和解压
在实际应用中,对应不同需求,可能需要生成若干文件,然后将其压缩。在某些应用中,文件较小、文件数量较少且较为固定,频繁与磁盘操作,会带来不必要的效率影响。此时,可以在内存中将文件进行压缩得到.tar.gzip文件,工具类代码如下:
package com.arhorchin.securitit.compress.gzip;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
public class GZipMemoryUtil {
/**
* tar压缩.
* @param fileBytesMap 待压缩文件集合.
* @return tar文件内容.
* @throws Exception .
*/
public static byte[] tarCompress(Map<String, byte[]> fileBytesMap) throws Exception {
ByteArrayOutputStream tarBaos = null;
TarArchiveOutputStream taos = null;
try {
tarBaos = new ByteArrayOutputStream();
taos = new TarArchiveOutputStream(tarBaos);
for (Map.Entry<String, byte[]> fileEntry : fileBytesMap.entrySet()) {
TarArchiveEntry tae = new TarArchiveEntry(fileEntry.getKey());
tae.setName(fileEntry.getKey());
tae.setSize(fileEntry.getValue().length);
taos.putArchiveEntry(tae);
taos.write(fileEntry.getValue());
taos.closeArchiveEntry();
}
} finally {
if (taos != null) {
taos.close();
}
}
return tarBaos.toByteArray();
}
/**
* tar解压.
* @param fileBytes tar压缩文件内容.
* @return 解压后文件集合.
* @throws Exception .
*/
public static Map<String, byte[]> tarDecompress(byte[] fileBytes) throws Exception {
Map<String, byte[]> fileBytesMap = null;
TarArchiveInputStream tais = null;
ByteArrayOutputStream taeBaos = null;
try {
fileBytesMap = new HashMap<String, byte[]>();
tais = new TarArchiveInputStream(new ByteArrayInputStream(fileBytes));
TarArchiveEntry tae = null;
while ((tae = tais.getNextTarEntry()) != null) {
taeBaos = new ByteArrayOutputStream();
int count;
byte data[] = new byte[1024];
while ((count = tais.read(data, 0, 1024)) != -1) {
taeBaos.write(data, 0, count);
}
fileBytesMap.put(tae.getName(), taeBaos.toByteArray());
}
} finally {
if (tais != null)
tais.close();
}
return fileBytesMap;
}
/**
* gzip压缩.
* @param fileBytes 待压缩文件.
* @return 已压缩文件.
* @throws IOException .
*/
public static byte[] gzipCompress(byte[] fileBytes) throws IOException {
ByteArrayOutputStream gzipBaos = null;
GzipCompressorOutputStream gcos = null;
try {
gzipBaos = new ByteArrayOutputStream();
gcos = new GzipCompressorOutputStream(gzipBaos);
gcos.write(fileBytes);
} finally {
if (gcos != null)
gcos.close();
}
return gzipBaos.toByteArray();
}
/**
* gzip解压.
* @param fileBytes 待解压文件.
* @return 已解压文件.
* @throws IOException .
*/
public static byte[] gzipDecompress(byte[] fileBytes) throws IOException {
ByteArrayOutputStream gzipBaos = null;
ByteArrayInputStream gzipBais = null;
GzipCompressorInputStream gcis = null;
try {
gzipBaos = new ByteArrayOutputStream();
gzipBais = new ByteArrayInputStream(fileBytes);
gcis = new GzipCompressorInputStream(gzipBais);
final byte[] buffer = new byte[1024];
int n = 0;
while (-1 != (n = gcis.read(buffer))) {
gzipBaos.write(buffer, 0, n);
}
} finally {
if (gcis != null)
gcis.close();
}
return gzipBaos.toByteArray();
}
}
测试代码如下:
package com.arhorchin.securitit.com.compress;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.io.FileUtils;
import com.arhorchin.securitit.compress.gzip.GZipMemoryUtil;
public class GZipMemoryUtilTester {
public static void main(String[] args) throws Exception {
Map<String, byte[]> fileBytesMap = null;
fileBytesMap = new HashMap<String, byte[]>();
// 设置文件列表.
File dirFile = new File("C:/Users/Administrator/Downloads/个人文件/2020-07-13/files");
for (File file : dirFile.listFiles()) {
fileBytesMap.put(file.getName(), FileUtils.readFileToByteArray(file));
}
byte[] memoryBytes = GZipMemoryUtil.tarCompress(fileBytesMap);
memoryBytes = GZipMemoryUtil.gzipCompress(memoryBytes);
FileUtils.writeByteArrayToFile(new File("C:/Users/Administrator/Downloads/个人文件/2020-07-13/memory.tar.gz"), memoryBytes);
memoryBytes = GZipMemoryUtil.gzipDecompress(memoryBytes);
fileBytesMap = GZipMemoryUtil.tarDecompress(memoryBytes);
System.out.println(fileBytesMap.size());
}
}
· 总结
1) 在小文件、文件数量较小且较为固定时,提倡使用内存压缩和解压方式。使用内存换时间,减少频繁的磁盘操作。
2) 在大文件、文件数量较大时,提倡使用磁盘压缩和解压方式。过大文件对服务会造成过度的负载,磁盘压缩和解压可以缓解这种压力。