我这个实例是在我开发过程中用到的,本人是菜鸟,希望高手勿喷!
首先dom4j 需要在项目中添加一些包,另外调用xpath 也需要一些包,然后我这里就不多说了,直接说一下我这个代码的具体思路:
1、根据一定的规则,创建读取xml的url (由于我这个代码是根据用户不同的选择读取不同的xml)
2、检查url的合法性,主要是看url是否是有效链接
3、创建saxReader实例,接着创建doc实例
4、通过xpath中的selectNodes读取出数据,(其实这个是dom4j的一个方法,主要需要xpath的支持)
5、打包数据,挂在arraylist中,最后写入数据库
代码如下:
public class ParseXML { private ImportNameForm dataform; private int type; private Academic ac; private int count = 0; // 返回文章的数量 private StringBuffer finalStrBuffer = new StringBuffer(); ArrayList<Academic> ar = new ArrayList<Academic>(); // 把年份和账号通过构造函数传过来 public ParseXML(ImportNameForm dataform, int type) { this.dataform = dataform; this.type = type; } // 返回文章的数量 public int getCount() { return count; } public ArrayList<Academic> xml2arraylist() { this.parsing(1); return ar; } public String xml2string() { this.parsing(2); return finalStrBuffer.toString(); } // 根据地址在xml中把数据读出,当flag等于1,把数据放在academic中,然后再放在arraylist中 // 当flag=2 把数据转化成html放在buffer中 //flag等于1是为了方便存储,flag等于2是为了在选择名字后的显示 public void parsing(int flag) { switch (type) { case 1: this.getPaperXML(flag); break; case 2: this.getPublicationXML(flag); break; case 3: this.getProjectXML(flag); break; case 4: this.getPatentXML(flag); break; } } public String xzmTemplate(int i) { String str; String[] xzm = dataform.getXzm(); str = "<tr><td bgcolor='#dddddd'>学者:"; str = str + xzm[i] + " 的相关信息</td></tr>"; return str; } public String checkboxTemplate(Academic ac) { String str; str = "<tr><td><input type='checkbox' name='ckb' value='"; str = str + ac.getId() + "'/>"; str = str + ac.getTitle() + "</td></tr>"; return str; } // 根据用户的账号和选择的年份生成URL public String[] createURL() { String year1 = dataform.getYear1(); String year2 = dataform.getYear2(); String[] ckb = dataform.getCkb(); String[] str = new String[ckb.length]; String strType; switch (type) { case 1: strType = "Paper"; break; case 2: strType = "Publication"; break; case 3: strType = "Project"; break; case 4: strType = "Patent"; break; default: strType = "Paper"; break; } for (int i = 0; i < ckb.length; i++) { str[i] = "http://www.XXXXXX.com/rest/"; str[i] = str[i] + strType + "/" + ckb[i].trim() + "/" + year1 + "-" + year2; } return str; } // 检查URL是否有效 private boolean isConnect(String url) { boolean flag = false; int counts = 0; if (url == null || url.length() <= 0) { return flag; } while (counts < 5) { try { HttpURLConnection connection = (HttpURLConnection) new URL(url) .openConnection(); int state = connection.getResponseCode(); if (state == 200) { flag = true; } break; } catch (Exception ex) { counts++; continue; } } return flag; } public void getPaperXML(int flag) { System.out.println("Paper_parsing"); String[] strURL = this.createURL(); SAXReader saxReader = new SAXReader(); try { for (int i = 0; i < strURL.length; i++) { if (isConnect(strURL[i])) { URL url; url = new URL(strURL[i]); Document doc = saxReader.read(url); List paperids = doc .selectNodes("/scholarPapers/scholarPaper/id"); List titles = doc .selectNodes("/scholarPapers/scholarPaper/title"); List authors = doc .selectNodes("/scholarPapers/scholarPaper/authors"); List dates = doc .selectNodes("/scholarPapers/scholarPaper/date"); List sources = doc .selectNodes("/scholarPapers/scholarPaper/source"); List types = doc .selectNodes("/scholarPapers/scholarPaper/type"); List citations = doc.selectNodes("/scholarPapers/scholarPaper/citation"); if (flag == 2) { finalStrBuffer.append(xzmTemplate(i)); } for (int j = 0; j < paperids.size(); j++) { count++; ac = new Academic(); ac.setAcademic_class(String.valueOf(type)); ac.setAuthor(((Element) authors.get(j)).getText()); ac.setPaper(((Element) sources.get(j)).getText()); ac.setPtype(((Element) types.get(j)).getText()); ac.setTitle(((Element) titles.get(j)).getText()); ac.setId(Integer.parseInt(((Element) paperids.get(j)) .getText())); ac.setYear(Integer.parseInt(((Element) dates.get(j)) .getText().substring(0, 4))); ac.setContent(((Element)citations.get(j)).getText()); if (flag == 1) { ar.add(ac); } else finalStrBuffer.append(checkboxTemplate(ac)); } } } } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (DocumentException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }