最近的工作真的老是跟I/O流打交道,最新的任务是给一个xml的url,读取其中的数据并存入csv。。。哼,小仙女像怕事的人吗,看爸爸给你写出来。。。老规矩,贴代码。。。
public static void xmlWrite(String urlString, String filePath,
Boolean judgeAppend) throws DocumentException {
BufferedWriter writer = null;
try {
writer = new BufferedWriter(
new FileWriter(new File(filePath), true));
SAXReader saxReader = new SAXReader(); //xml解析有sax解析和dom解析,我采用的是sax解析
Document document = saxReader.read(new URL(urlString));
Element root = document.getRootElement();
List<Element> elist = root.elements("entry");
System.out.println("elist.size:" + elist.size());
//如果是第一次写入数据,则写上表头,如果是追加,就不写入(judgeAppend唯一的一丢丢用处) if (judgeAppend == false) {
String head = "id,title1,title2,title3,summary,updated,link,georss:line(georss:point)";
writer.write(head);
writer.newLine();
}
int line = 0;
int point = 0;
for (Element entry : elist) {
StringBuffer sb = new StringBuffer();
Boolean flag = false;
if (entry.element("id") != null) {
String id = dealWithData(entry.element("id").getText()); //dealWithData(String)处理数据,数据中有逗号分隔符,就整体加上引号
List<String> ids = readCSVGetIds("D:/zcx/xmldata.csv"); //追加的情况下,去重,获取已存在数据的所有id
for (String idItem : ids) {
if (idItem.equals(id)) {
flag = true;
break;
}
}
if (flag == false) {
sb.append(id).append(",");
}()
} else {
sb.append(" ").append(",");
}
if (flag == true) {
continue;
}
if (entry.element("title") != null) {
String title = dealWithData(entry.element("title")
.getText());
if (title.charAt(0) == '"'
&& title.charAt(title.length() - 1) == '"') {
title = title.substring(1, title.length() - 2); // remove
// ""
}
int firstIndex = title.indexOf('-', 0);
sb.append(dealWithData(title.substring(0, firstIndex - 1)))
.append(",");
int secondIndex = title.lastIndexOf('(');
sb.append(
dealWithData(title.substring(firstIndex + 2,
secondIndex - 1))).append(",");
sb.append(
dealWithData(title.substring(secondIndex + 1,
title.length() - 1))).append(",");
} else {
sb.append(" ").append(",");
}
if (entry.element("summary") != null) {
String summary = dealWithData(entry.element("summary")
.getText());
summary = summary.substring(1, summary.length() - 1);
Pattern p1 = Pattern.compile("<strong>");
Matcher m1 = p1.matcher(summary);
String a1 = m1.replaceAll("; ");
Pattern p2 = Pattern.compile("</strong>");
Matcher m2 = p2.matcher(a1);
String afterSummary = m2.replaceAll("");
afterSummary = dealWithData(afterSummary.substring(1,
afterSummary.length()));
// System.out.println(afterSummary);
sb.append(afterSummary).append(",");
} else {
sb.append(" ").append(",");
}
if (entry.element("updated") != null) {
String updated = dealWithData(entry.element("updated")
.getText());
sb.append(updated).append(",");
} else {
sb.append(" ").append(",");
}
if (entry.element("link") != null) {
String link = dealWithData(entry.element("link").getText());
sb.append(link).append(",");
} else {
sb.append(" ").append(",");
}
if (entry.element("line") != null) { //这里需要注意的是xml解析中的标签,一个<entry></entry中>有<georss:line>或者<georss:point>,这样的数据获取就分为如下两种情况:
line++;
String georssLine = entry.element("line").getText();
sb.append(georssLine).append(",");
} else if (entry.element("point") != null) {
point++;
String georssPoint = entry.element("point").getText();
sb.append(georssPoint).append(",");
} else {
sb.append(" ").append(",");
}
writer.write(sb.toString());
writer.write(sb.toString());
writer.newLine();
writer.flush();
}
System.out.println("point:" + point);
System.out.println("line:" + line);
writer.close(); //这里必须爆粗口,特别气,上次这里忘记写,数据直接没写进去,这次写进去了,但是数据有丢失,缓冲果然还是不能忘记关啊
} catch (Exception e) {
e.printStackTrace();
}
}
路过的大佬,如若有误或者你认为不合理,请给出意见。。。。谢谢,手动笔芯。。。