Java 之Excel 转HTML 篇

import java.io.BufferedOutputStream;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.URISyntaxException;
import java.util.List;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.poi.hssf.converter.ExcelToHtmlConverter;
import org.apache.poi.hssf.usermodel.HSSFPictureData;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;

/**
 * java将excel转换为HTML
 * 
 * @author [email protected]
 */
public class Excel2Html {

    private static final org.slf4j.Logger logger = LoggerFactory.getLogger(Excel2Html.class.getName());
    private static final String DEFAULT_PICTURE_FOLDER = "picturess";
    private static final String DEFAULT_HTML_TYPE = ".html";// 默认转换的HTML文件后缀

    public static void main(String[] args) throws URISyntaxException {
        File outputFolder = null;
        File outputPictureFolder = null;
        // 转换后HTML文件存放位置 Excel2Html.class.getResource("").toURI()
        outputFolder = new File("G:\\");
        if (null != outputFolder) {
            // 转换后原excel中图片存放位置
            String outputPictureFolderPath = outputFolder.getAbsolutePath() + File.separator + DEFAULT_PICTURE_FOLDER;
            outputPictureFolder = new File(outputPictureFolderPath);
            outputPictureFolder.mkdir();
        }
        try {
            // 被转换的excel文件
            File convertedWordFile = new File(
                    "G:\\test.xls");
            convert2Html(convertedWordFile, outputFolder, outputPictureFolder);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void excel2Html(String htmlUrl,String excelUrl) {
        File outputFolder = null;
        File outputPictureFolder = null;
        // 转换后HTML文件存放位置 Excel2Html.class.getResource("").toURI()
        outputFolder = new File(htmlUrl);
        if (null != outputFolder) {
            // 转换后原excel中图片存放位置
            String outputPictureFolderPath = outputFolder.getAbsolutePath() + File.separator + DEFAULT_PICTURE_FOLDER;
            outputPictureFolder = new File(outputPictureFolderPath);
            outputPictureFolder.mkdir();
        }
        try {
            // 被转换的excel文件
            File convertedWordFile = new File(excelUrl);
            convert2Html(convertedWordFile, outputFolder, outputPictureFolder);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void writeFile(String content, String path) {
        FileOutputStream fos = null;
        BufferedWriter bw = null;
        try {
            File file = new File(path);
            fos = new FileOutputStream(file);
            bw = new BufferedWriter(new OutputStreamWriter(fos, "UTF-8"));
            bw.write(content);
        } catch (FileNotFoundException fnfe) {
            fnfe.printStackTrace();
        } catch (IOException ioe) {
            ioe.printStackTrace();
        } finally {
            try {
                if (bw != null)
                    bw.close();
                if (fos != null)
                    fos.close();
            } catch (IOException ie) {
            }
        }
    }

    public static Workbook getWorkbook(File file) {
        Workbook workbook = null;
        try {
            if (null != file && file.exists()) {
                workbook = WorkbookFactory.create(file);
            }
        } catch (IOException e) {
            logger.error("IOException in getWorkbook:", e);
        } catch (InvalidFormatException e) {
            logger.error("InvalidFormatException in getWorkbook:", e);
        }
        return workbook;
    }

    /**
     * @param excelFile
     *            被转换的word文件
     * @param outputFolder
     *            转换后HTML文件存放位置
     * @param outputPictureFolder
     *            转换后原word中图片存放位置
     * @throws TransformerException
     * @throws IOException
     * @throws ParserConfigurationException
     */
    @SuppressWarnings("deprecation")
    public static void convert2Html(File excelFile, File outputFolder, final File outputPictureFolder)
            throws TransformerException, IOException, ParserConfigurationException {
        // 创建excel ExcelToHtmlConverter对象
        ExcelToHtmlConverter excelToHtmlConverter = new ExcelToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        excelToHtmlConverter.setOutputColumnHeaders(false);
        excelToHtmlConverter.setOutputRowNumbers(false);

        // 创建POI工作薄对象
        HSSFWorkbook workbook = (HSSFWorkbook) getWorkbook(excelFile);
        excelToHtmlConverter.processWorkbook(workbook);

        Document htmlDocument = excelToHtmlConverter.getDocument();
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(out);

        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        
        String html = new String(out.toByteArray());
        
        int tbodya = html.indexOf("<tbody>");
        int tbodyb = html.indexOf("</tbody>");
        
        //tbody 转义之前的String
        String tbodyQ = html.substring(tbodya, tbodyb+8);
        //tbody 转义之后的String
        String tbodyH = StringEscapeUtils.unescapeXml(tbodyQ);
        html = html.replace(tbodyQ,tbodyH);
        
//        writePicures(workbook.getAllPictures(), outputPictureFolder.getAbsolutePath() + File.separator);
        writeFile(html,
                outputFolder.getAbsolutePath() + File.separator + excelFile.getName().replaceAll(".xls","") + DEFAULT_HTML_TYPE);
        out.close();
    }

    public static void writePicures(List<HSSFPictureData> pics, String picturesFolder) throws IOException {
        if (pics != null) {
            int count = 0;
            for (int i = 0; i < pics.size(); i++) {
                HSSFPictureData picData = pics.get(i);
                
                if (null == picData) {
                    continue;
                }
                byte[] bytes = picData.getData();
                FileOutputStream output = new FileOutputStream(
                        picturesFolder + count + "." + picData.suggestFileExtension());
                BufferedOutputStream writer = new BufferedOutputStream(output);
                writer.write(bytes);
                writer.flush();
                writer.close();
                output.close();
                count++;
            }
        }
    }

}
 

结果如下:

研究了两个小时,本以为是实现不了的,想尽了各种办法,最终还是实现了,慢慢的成就感与收获,高级工程师是没有这个耐心的,我与高级又进了一步了!哈哈哈~

猜你喜欢

转载自blog.csdn.net/m0_37598953/article/details/81460710
今日推荐