Java GZip 磁盘与内存压缩实现

  GZip是常用的无损压缩算法实现,在Linux中较为常见,像我们在Linux安装软件时,基本都是.tar.gz格式。.tar.gz格式文件需要先对目录内文件进行tar压缩,然后使用GZip进行压缩。

​  本文针对磁盘上和内存中两种方式进行压缩和解压演示,演示只针对一层目录结构进行,多层目录只需递归操作进行即可。

  · Maven依赖

<dependency>
	<groupId>org.apache.commons</groupId>
	<artifactId>commons-compress</artifactId>
	<version>1.19</version>
</dependency>

  · 磁盘压缩和解压

  无特殊情况下,操作都是在磁盘上进行,将所有文件存放在某一目录中,然后对目录进行压缩,工具类代码如下:

package com.arhorchin.securitit.compress.gzip;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Paths;

import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;

public class GZipDiskUtil {

    /**
     * UTF-8字符集.
     */
    public static String UTF8_CHARSET = "UTF-8";

    /**
     * tar打包压缩.
     * @param srcDirPath 原路径.
     * @param tarFilePath 目标路径.
     * @return 压缩结果.
     * @throws Exception .
     */
    public static boolean tarCompress(String srcDirPath, String tarFilePath) throws Exception {
        FileOutputStream fos = null;
        TarArchiveOutputStream taos = null;
        File filePathFile = null;

        try {
            fos = new FileOutputStream(new File(tarFilePath));
            taos = new TarArchiveOutputStream(fos);
            filePathFile = new File(srcDirPath);
            for (File file : filePathFile.listFiles()) {
                BufferedInputStream bis = null;
                FileInputStream fis = null;
                try {
                    TarArchiveEntry tae = new TarArchiveEntry(file);
                    tae.setName(new String(file.getName().getBytes(UTF8_CHARSET), UTF8_CHARSET));
                    taos.putArchiveEntry(tae);
                    fis = new FileInputStream(file);
                    bis = new BufferedInputStream(fis);
                    int count;
                    byte data[] = new byte[1024];
                    while ((count = bis.read(data, 0, 1024)) != -1) {
                        taos.write(data, 0, count);
                    }
                } finally {
                    taos.closeArchiveEntry();
                    if (bis != null)
                        bis.close();
                    if (fis != null)
                        fis.close();
                }
            }
        } finally {
            if (taos != null)
                taos.close();
            if (fos != null)
                fos.close();

        }
        return true;
    }

    /**
     * tar拆包解压.
     * @param srcFilePath 原路径.
     * @param targDirPath 目标路径.
     * @return 解压结果.
     * @throws Exception .
     */
    public static boolean tarDecompress(String srcFilePath, String targDirPath) throws Exception {
        TarArchiveInputStream tais = null;
        FileInputStream fis = null;
        try {
            File file = new File(srcFilePath);
            fis = new FileInputStream(file);
            tais = new TarArchiveInputStream(fis);
            TarArchiveEntry tae = null;
            while ((tae = tais.getNextTarEntry()) != null) {
                BufferedOutputStream bos = null;
                FileOutputStream fos = null;
                try {
                    String dir = targDirPath + File.separator + tae.getName();
                    File dirFile = new File(dir);
                    fos = new FileOutputStream(dirFile);
                    bos = new BufferedOutputStream(fos);
                    int count;
                    byte data[] = new byte[1024];
                    while ((count = tais.read(data, 0, 1024)) != -1) {
                        bos.write(data, 0, count);
                    }
                } finally {
                    if (bos != null)
                        bos.close();
                    if (fos != null)
                        fos.close();
                }
            }
        } finally {
            if (tais != null)
                tais.close();
            if (fis != null)
                fis.close();
        }
        return true;
    }

    /**
     * gzip压缩.
     * @param srcFilePath 原路径.
     * @param tarFilePath 目标路径.
     * @return 压缩结果.
     * @throws IOException .
     */
    public static boolean gzipCompress(String srcFilePath, String tarFilePath) throws IOException {
        InputStream fin = null;
        BufferedInputStream bis = null;
        FileOutputStream fos = null;
        BufferedOutputStream bos = null;
        GzipCompressorOutputStream gcos = null;
        try {
            fin = Files.newInputStream(Paths.get(srcFilePath));
            bis = new BufferedInputStream(fin);
            fos = new FileOutputStream(tarFilePath);
            bos = new BufferedOutputStream(fos);
            gcos = new GzipCompressorOutputStream(bos);
            byte[] buffer = new byte[1024];
            int read = -1;
            while ((read = bis.read(buffer)) != -1) {
                gcos.write(buffer, 0, read);
            }
        } finally {
            if (gcos != null)
                gcos.close();
            if (bos != null)
                bos.close();
            if (fos != null)
                fos.close();
            if (bis != null)
                bis.close();
            if (fin != null)
                fin.close();
        }
        return true;
    }

    /**
     * gzip解压.
     * @param srcFilePath 原路径.
     * @param tarFilePath 目标路径.
     * @return 解压结果.
     * @throws IOException .
     */
    public static boolean gzipDecompress(String srcFilePath, String tarFilePath) throws IOException {
        InputStream fin = null;
        BufferedInputStream in = null;
        OutputStream out = null;
        GzipCompressorInputStream gcis = null;
        try {
            out = Files.newOutputStream(Paths.get(tarFilePath));
            fin = Files.newInputStream(Paths.get(srcFilePath));
            in = new BufferedInputStream(fin);
            gcis = new GzipCompressorInputStream(in);
            final byte[] buffer = new byte[1024];
            int n = 0;
            while (-1 != (n = gcis.read(buffer))) {
                out.write(buffer, 0, n);
            }
        } finally {
            if (gcis != null)
                gcis.close();
            if (in != null)
                in.close();
            if (fin != null)
                fin.close();
            if (out != null)
                out.close();
        }
        return true;
    }

}

​ 测试代码如下:

package com.arhorchin.securitit.com.compress;

import com.arhorchin.securitit.compress.gzip.GZipDiskUtil;

public class GZipDiskUtilTester {

    public static void main(String[] args) throws Exception {
        GZipDiskUtil.tarCompress("C:/Users/Administrator/Downloads/个人文件/2020-07-13/files", "C:/Users/Administrator/Downloads/个人文件/2020-07-13/disk.tar");
        GZipDiskUtil.gzipCompress("C:/Users/Administrator/Downloads/个人文件/2020-07-13/disk.tar", "C:/Users/Administrator/Downloads/个人文件/2020-07-13/disk.tar.gz");
        
        GZipDiskUtil.gzipDecompress("C:/Users/Administrator/Downloads/个人文件/2020-07-13/disk.tar.gz", "C:/Users/Administrator/Downloads/个人文件/2020-07-13/disk-untar.tar");
        GZipDiskUtil.tarDecompress("C:/Users/Administrator/Downloads/个人文件/2020-07-13/disk-untar.tar", "C:/Users/Administrator/Downloads/个人文件/2020-07-13/disk-untar");
    }

}

  · 内存压缩和解压

  在实际应用中,对应不同需求,可能需要生成若干文件,然后将其压缩。在某些应用中,文件较小、文件数量较少且较为固定,频繁与磁盘操作,会带来不必要的效率影响。此时,可以在内存中将文件进行压缩得到.tar.gzip文件,工具类代码如下:

package com.arhorchin.securitit.compress.gzip;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;

public class GZipMemoryUtil {

    /**
     * tar压缩.
     * @param fileBytesMap 待压缩文件集合.
     * @return tar文件内容.
     * @throws Exception .
     */
    public static byte[] tarCompress(Map<String, byte[]> fileBytesMap) throws Exception {
        ByteArrayOutputStream tarBaos = null;
        TarArchiveOutputStream taos = null;

        try {
            tarBaos = new ByteArrayOutputStream();
            taos = new TarArchiveOutputStream(tarBaos);
            for (Map.Entry<String, byte[]> fileEntry : fileBytesMap.entrySet()) {
                TarArchiveEntry tae = new TarArchiveEntry(fileEntry.getKey());
                tae.setName(fileEntry.getKey());
                tae.setSize(fileEntry.getValue().length);
                taos.putArchiveEntry(tae);
                taos.write(fileEntry.getValue());
                taos.closeArchiveEntry();
            }
        } finally {
            if (taos != null) {
                taos.close();
            }
        }
        return tarBaos.toByteArray();
    }

    /**
     * tar解压.
     * @param fileBytes tar压缩文件内容.
     * @return 解压后文件集合.
     * @throws Exception .
     */
    public static Map<String, byte[]> tarDecompress(byte[] fileBytes) throws Exception {
        Map<String, byte[]> fileBytesMap = null;

        TarArchiveInputStream tais = null;
        ByteArrayOutputStream taeBaos = null;
        try {
            fileBytesMap = new HashMap<String, byte[]>();
            tais = new TarArchiveInputStream(new ByteArrayInputStream(fileBytes));
            TarArchiveEntry tae = null;
            while ((tae = tais.getNextTarEntry()) != null) {
                taeBaos = new ByteArrayOutputStream();
                int count;
                byte data[] = new byte[1024];
                while ((count = tais.read(data, 0, 1024)) != -1) {
                    taeBaos.write(data, 0, count);
                }
                fileBytesMap.put(tae.getName(), taeBaos.toByteArray());
            }
        } finally {
            if (tais != null)
                tais.close();
        }
        return fileBytesMap;
    }

    /**
     * gzip压缩.
     * @param fileBytes 待压缩文件.
     * @return 已压缩文件.
     * @throws IOException .
     */
    public static byte[] gzipCompress(byte[] fileBytes) throws IOException {
        ByteArrayOutputStream gzipBaos = null;
        GzipCompressorOutputStream gcos = null;
        try {
            gzipBaos = new ByteArrayOutputStream();
            gcos = new GzipCompressorOutputStream(gzipBaos);
            gcos.write(fileBytes);
        } finally {
            if (gcos != null)
                gcos.close();
        }
        return gzipBaos.toByteArray();
    }

    /**
     * gzip解压.
     * @param fileBytes 待解压文件.
     * @return 已解压文件.
     * @throws IOException .
     */
    public static byte[] gzipDecompress(byte[] fileBytes) throws IOException {
        ByteArrayOutputStream gzipBaos = null;
        ByteArrayInputStream gzipBais = null;
        GzipCompressorInputStream gcis = null;

        try {
            gzipBaos = new ByteArrayOutputStream();
            gzipBais = new ByteArrayInputStream(fileBytes);
            gcis = new GzipCompressorInputStream(gzipBais);
            final byte[] buffer = new byte[1024];
            int n = 0;
            while (-1 != (n = gcis.read(buffer))) {
                gzipBaos.write(buffer, 0, n);
            }
        } finally {
            if (gcis != null)
                gcis.close();
        }
        return gzipBaos.toByteArray();
    }

}

  测试代码如下:

package com.arhorchin.securitit.com.compress;

import java.io.File;
import java.util.HashMap;
import java.util.Map;

import org.apache.commons.io.FileUtils;

import com.arhorchin.securitit.compress.gzip.GZipMemoryUtil;

public class GZipMemoryUtilTester {

    public static void main(String[] args) throws Exception {
        Map<String, byte[]> fileBytesMap = null;

        fileBytesMap = new HashMap<String, byte[]>();
        // 设置文件列表.
        File dirFile = new File("C:/Users/Administrator/Downloads/个人文件/2020-07-13/files");
        for (File file : dirFile.listFiles()) {
            fileBytesMap.put(file.getName(), FileUtils.readFileToByteArray(file));
        }

        byte[] memoryBytes = GZipMemoryUtil.tarCompress(fileBytesMap);
        memoryBytes = GZipMemoryUtil.gzipCompress(memoryBytes);
        FileUtils.writeByteArrayToFile(new File("C:/Users/Administrator/Downloads/个人文件/2020-07-13/memory.tar.gz"), memoryBytes);
        
        memoryBytes = GZipMemoryUtil.gzipDecompress(memoryBytes);
        fileBytesMap = GZipMemoryUtil.tarDecompress(memoryBytes);
        System.out.println(fileBytesMap.size());
    }

}

  · 总结

  1) 在小文件、文件数量较小且较为固定时,提倡使用内存压缩和解压方式。使用内存换时间,减少频繁的磁盘操作。

  2) 在大文件、文件数量较大时,提倡使用磁盘压缩和解压方式。过大文件对服务会造成过度的负载,磁盘压缩和解压可以缓解这种压力。

猜你喜欢

转载自blog.csdn.net/securitit/article/details/107328132