解析word文档，可以提取图片，另外还可以利用这段代码，扩展找到图片位置

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.UUID;
 
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import com.google.common.collect.Maps;
 
public class Test {
    
    public static void main(String[] args){
        String importPath = "D:/zyk_import/习题模板V1.4(2).docx";
        String absolutePath = "";
        try {
            FileInputStream inputStream = new FileInputStream(importPath);
            XWPFDocument xDocument = new XWPFDocument(inputStream);
            List<XWPFParagraph> paragraphs = xDocument.getParagraphs();
            List<XWPFPictureData> pictures = xDocument.getAllPictures();
            Map<String, String> map = Maps.newHashMap();
            for(XWPFPictureData picture : pictures){
                String id = picture.getPackageRelationship().getId();
                File folder = new File(absolutePath);
                if (!folder.exists()) {
                    folder.mkdirs();
                }
                String rawName = picture.getFileName();
                String fileExt = rawName.substring(rawName.lastIndexOf("."));
                String newName = System.currentTimeMillis() + UUID.randomUUID().toString() + fileExt;
                File saveFile = new File(absolutePath + File.separator + newName);
                @SuppressWarnings("resource")
                FileOutputStream fos = new FileOutputStream(saveFile); 
                fos.write(picture.getData());
                System.out.println(saveFile.getAbsolutePath());
                                map.put(id, saveFile.getAbsolutePath());
            }
String text = "";
int count = 0;
int paranum = 1;
int paraIndex = 0;
//默认先处理一个文档300道题目

String[] newresultStr = new String[30000];
for (XWPFParagraph paragraph : paragraphs) {

    paranum++;
    //System.out.println(paragraph.getParagraphText());
    List<XWPFRun> runs = paragraph.getRuns();
    String textTest = "";
    for (int i = 0; i < runs.size(); i++) {
        if (runs.get(i).toString().length() == 0) {
            textTest += tempImagesName[count++];
        } else {
            textTest += runs.get(i).toString();
        }
    }

    //这个textTest包含了题干，选项，答案，解析，其中有图片的都可以替换成「XXXXXXX_X」这种格式了
    /* System.out.println(textTest);*/
    //录入题目到数据库中去，一般一次性都是录入很多道题目的，因为有引入题库questionbankid，
    //录入后就完成了，然后前端显示，就在题目里增加images列，以逗号隔开[img1],[img2]等等
    // 显示的时候获取显示就好
    String[] resultStr = new String[1000];
    resultStr = textTest.split("\n");
    String[] oldresultStr = new String[1000];
    int countSum = 0;
    for(int i=0;i<resultStr.length;i++){
        if(resultStr[i].length()>0){
            oldresultStr[countSum++]=resultStr[i];
        }
    }
    newresultStr[paraIndex] = oldresultStr[0];
    paraIndex++;
    //所有题目的每一行段落都放置在了newresultStr 字符串数组里了
    //下面循环处理每一行段落
    //处理word模板解析每一个段落
    for (XWPFRun run : runs) {
        if (run.getCTR().xmlText().indexOf("<w:pict>") != -1) {
            String runXmlText = run.getCTR().xmlText();
            int rIdIndex = runXmlText.indexOf("r:id");
            int rIdEndIndex = runXmlText.indexOf("/>", rIdIndex);
            String rIdText = runXmlText.substring(rIdIndex, rIdEndIndex);
            System.out.println(rIdText.split("\"")[1].substring("rId".length()));
            String id = rIdText.split("\"")[1];
            text = text + "<img src = '" + map.get(id) + "'/>";
        } else {
            text = text + run;
        }
    }
}
//先将newresultStr[i]为null的去掉
String[] newresultStrWithoutNull = new String[30000];
int countSum = 0;
for(int i=0;i<paraIndex;i++){
    if(newresultStr[i]!=null){
        newresultStrWithoutNull[countSum++] = newresultStr[i];
    }
}

String[][] titlesInfo = new String[300][1000];
int titleNumber = 1;
int perTitleIndex = -1;
for(int i=0;i<300;i++){
    for(int k=0;k<1000;k++){
        titlesInfo[i][k]="";
    }
}
} catch (
IOException e) 
{ // TODO Auto-generated catch block e.printStackTrace(); } 
} 
}
解析word文档，可以提取图片，另外还可以利用这段代码，扩展找到图片位置

猜你喜欢