java-poi4.0.1读取word文本和图片
import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.List; import java.util.UUID; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.model.PicturesTable; import org.apache.poi.hwpf.usermodel.Picture; import org.apache.poi.ooxml.extractor.POIXMLTextExtractor; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.poi.xwpf.usermodel.XWPFPictureData; import org.junit.Test; public class WordTest { @Test public void testWord() { // String path = "D:\\temp\\temp\\test.doc"; String path = "D:\\temp\\temp\\test.docx"; String content = null; File file = new File(path); if (file.exists() && file.isFile()) { InputStream is = null; HWPFDocument doc = null; XWPFDocument docx = null; POIXMLTextExtractor extractor = null; try { is = new FileInputStream(file); if (path.endsWith(".doc")) { doc = new HWPFDocument(is); // 文档文本内容 content = doc.getDocumentText(); // 文档图片内容 PicturesTable picturesTable = doc.getPicturesTable(); List<Picture> pictures = picturesTable.getAllPictures(); for (Picture picture : pictures) { // 输出图片到磁盘 OutputStream out = new FileOutputStream( new File("D:\\temp\\" + UUID.randomUUID() + "." + picture.suggestFileExtension())); picture.writeImageContent(out); out.close(); } } else if (path.endsWith("docx")) { docx = new XWPFDocument(is); extractor = new XWPFWordExtractor(docx); // 文档文本内容 content = extractor.getText(); // 文档图片内容 List<XWPFPictureData> pictures = docx.getAllPictures(); for (XWPFPictureData picture : pictures) { byte[] bytev = picture.getData(); // 输出图片到磁盘 FileOutputStream out = new FileOutputStream( "D:\\temp\\temp\\" + UUID.randomUUID() + picture.getFileName()); out.write(bytev); out.close(); } } else { System.out.println("此文件不是word文件!"); } System.out.println(content); } catch (FileNotFoundException e) { } catch (IOException e) { } finally { try { if (doc != null) { doc.close(); } if (extractor != null) { extractor.close(); } if (docx != null) { docx.close(); } if (is != null) { is.close(); } } catch (IOException e) { } } } } }
解析String中的多段指定字符串名字,存储图片到 SpringBoot 的 resource static 文件夹下。
import com.example.tsfunproj.entity.test.ImageTest; import com.example.tsfunproj.service.test.ImageTestService; import org.junit.Test; import org.junit.runner.RunWith; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; import org.springframework.test.context.junit4.SpringRunner; import org.springframework.util.ClassUtils; import org.springframework.util.ResourceUtils; import java.io.*; import java.util.regex.Matcher; import java.util.regex.Pattern; @RunWith(SpringRunner.class) @SpringBootTest public class ImageUtilTest { @Autowired private ImageTestService imageTestService; @Test public void testStorageImage() throws IOException { //开始写一段代码简单测试一下图片存储问题 //存储+取出来都要测试一下 //先不去管如何存入的,先测试一下已经存入后取出是否可以取出 //以下这段话,是存入数据库的,取出来,解析出图片名字,然后这道题目有questionbankid, //获取questionbankid,间接获取subjectid,titletypeid,floorid,titlenumber,这些数字, //拼凑成 subjectid/titletypeid/floorid/titlenumber String titleStem = "根据《工程岩体试验方法标准》(GB/T 50266—1999)第2. 2. 6条," + "岩石颗粒密度为:\\n「2454683_1」\\n式中,m1、m2、ms分别" + "为瓶及试液总质量,瓶、试液及岩粉总质量,干岩粉的质量。" + "ρ0为与试验温度同温的试液密度。"; String titleStem2 = "根据《工程岩体试验方法标准》(GB/T 50266—1999)第2. 2. 6条," + "岩石颗粒密度为:「2454683_1」式中,m1、m2、ms分别" + "为瓶及试液总质量,瓶、试液及岩粉总质量,干岩粉的质量。" + "ρ0为与试验温度同温的试液密度。根据《工程岩体试验方法标准》(GB/T 50266—1999)第2. 2. 6条,\" +\n" + " 岩石颗粒密度为:「2222683_1」式中,m1、m2、ms分别\" +\n" + " 为瓶及试液总质量,瓶、试液及岩粉总质量,干岩粉的质量。\" +\n" + " ρ0为与试验温度同温的试液密度。"; String questionbankid = "20200418104401abcdef"; String subjectid = "000006"; String titletypeid = 1+""; String floorid = 22+""; String titlenumber=1+""; //这样题目网页访问路径就定义出来了 //题目里单个图片解析 int begin = titleStem.indexOf("「"); int end = titleStem.indexOf("」"); String imageName = titleStem.substring(begin+1,end); System.out.println(imageName); ImageTest imageTest = new ImageTest(); imageTest.setImageAddr(imageName); imageTest.setContent("2020-04-18 测试部分代码"); imageTestService.saveImageTest(imageTest); //数据库存储的是整个可以打开的图片链接吗?还是仅仅是名字。 //应该存储可以打开的整个图片链接吧。 //questionIMG/26/1/11/2454698_0.jpg 这个图片路径是我们自己创建的,有两步,一部是存入这个链接到数据库 //一步是真实创建文件夹,存入图片。 //获取项目根目录 + 拼接 questionIMG/ + subjectid/ titletypeid/floorid/titlenubmer/ imageName.jpg //存储到项目target根节点的部分,没有处理好跟localhost:8080的映射 String rootPath1 = ClassUtils.getDefaultClassLoader().getResource("static/questionIMG/").getPath(); String rootPath = "src\\main\\resources\\static\\image\\"; String realfilePath1 = rootPath1+subjectid+"/"+titletypeid+"/"+floorid+"/"+titlenumber+"/"; String realfilePath = rootPath+subjectid+"/"+titletypeid+"/"+floorid+"/"+titlenumber+"/"; File upload = new File(realfilePath); if(!upload.exists()) { upload.mkdirs(); } System.out.println("upload url:"+upload.getAbsolutePath()); File imageFile = new File(realfilePath+"/"+imageName+".jpg"); InputStream is=new FileInputStream(realfilePath1+"/"+1+".jpg"); //本地图片,到时候替换成上传的题目内部图片 OutputStream os=new FileOutputStream(realfilePath+"/"+imageName+".jpg");//存储图片 byte[] buffer=new byte[1024]; int len=0; while((len=is.read(buffer))!=-1){ os.write(buffer, 0, len); } os.close(); is.close(); //------------------------------------------------------------------ //题目里多个图片解析,解析出问题,先放着,以后处理 String[] imageNames = new String[100 ]; int count = countStr(titleStem2,"「"); for(int i=1;i<=count;i++){ imageNames[i] = getIndexOfJson(titleStem2,i); System.out.println(imageNames[i]); } } /** * 获取项目根路径 * * @return */ private static String getResourceBasePath() { // 获取跟目录 File path = null; try { path = new File(ResourceUtils.getURL("classpath:").getPath()); } catch (FileNotFoundException e) { // nothing to do } if (path == null || !path.exists()) { path = new File(""); } String pathStr = path.getAbsolutePath(); // 如果是在eclipse中运行,则和target同级目录,如果是jar部署到服务器,则默认和jar包同级 pathStr = pathStr.replace("\\target\\classes", ""); return pathStr; }
// 获取第index位置的指定字符串 public String getIndexOfJson(String str, int index) { int beginindex = findStrIndex(str, "「", index); int endindex = findStrIndex(str, "」", index); str = str.substring(beginindex + 1, endindex); return str; }
//查找String下的某符号的第num次出现的位置 public int findStrIndex(String str, String cha, int num) { int x = str.indexOf(cha); for (int i = 0; i < num - 1; i++) { x = str.indexOf(cha, x + 1); } return x; }
//返回某个符号再Str中的出现次数 private int countStr(String str, String sToFind) { int num = 0; while (str.contains(sToFind)) { str = str.substring(str.indexOf(sToFind) + sToFind.length()); num++; } return num; } }