public List<String> readFile(String dirPath) { // 建立当前目录中文件的File对象 File file = new File(dirPath); // 取得代表目录中所有文件的File对象数组 File[] list = file.listFiles(); // 遍历file数组 for (int i = 0; i < list.length; i++) { if(list[i].isDirectory()) { readFile(list[i].getPath());//递归 } else{ if(list[i].getName().endsWith("htm")){ dirPath1.add(list[i].getPath()); } } } return dirPath1; }
2、一行一行的读取文本内容
public String Reader() throws IOException{ String temp=null; File file=new File("C:\\Users\\Administrator\\Desktop\\tests"); File[] list=file.listFiles(); for(int i=0;i<list.length;i++){ StringBuffer content=new StringBuffer(); InputStreamReader isr=new InputStreamReader(new FileInputStream(list[i].getAbsolutePath())); BufferedReader read=new BufferedReader(isr); while((temp=read.readLine())!=null){ content.append(temp); content.append("\n"); } } return content.toString(); }
3、将整个内容写到文本
public static void FileWrite(String fileName, String content) { FileWriter writer = null; try { // 打开一个写文件器,构造函数中的第二个参数true表示以追加形式写文件 writer = new FileWriter(fileName, true); writer = new FileWriter(fileName);//不追加文件内容 writer.write(content); } catch (IOException e) { e.printStackTrace(); } finally { try { if(writer != null){ writer.close(); } } catch (IOException e) { e.printStackTrace(); } } }
4、将字符串内容循环写到文本中
public String WriteTopic(ArrayList result) throws FileNotFoundException{ Book book=new Book(); String id=null; String body=null; String document=null; String qrel=null; for(int i=0;i<result.size();i++){ Pattern p0=Pattern.compile("id=(.*?)bioasq"); Matcher m0=p0.matcher(result.get(i).toString()); while(m0.find()){ id=m0.group(1); book.setId(id); } Pattern p1=Pattern.compile("body:\"(.*?)\""); Matcher m1=p1.matcher(result.get(i).toString()); while(m1.find()){ body=m1.group(1); book.setBody(body); } Pattern p2=Pattern.compile("\"http://www.ncbi.nlm.nih.gov/pubmed/(.*?)\""); Matcher m2=p2.matcher(result.get(i).toString()); while(m2.find()){ document=m2.group(1); // book.setDocument(document); qrel=id+" "+"0"+" "+document+" "+"1"; System.out.println(qrel); FileOutputStream fos=new FileOutputStream("C:\\Users\\Administrator\\Desktop\\BioASQ_2013_TaskB\\BioASQ_PhaseB.qrel",true); PrintStream ps=new PrintStream(fos); Scanner in=new Scanner(qrel); String s=in.next(); System.setOut(ps); // System.out.println(s); } } return qrel; }
BioASQ_PhaseB.qrel格式
1 0 23220349 1 1 0 19582169 1 1 0 22161322 1 1 0 18025684 1 1 0 15701682 1 1 0 15215406 1 1 0 18081932 1 1 0 18629289 1 1 0 21729286 1 1 0 11438739 1 1 0 10573422 1 2 0 22975810 1 2 0 22852678 1 2 0 22278059 1 2 0 21500720 1 2 0 23331310 1 2 0 23250067 1 2 0 23003992 1 2 0 22714377 1 2 0 22665786 1 2 0 22653729 1 2 0 22330507 1 2 0 22286383 1 2 0 21293374 1 3 0 22921312 1 3 0 22480152 1 3 0 22315491 1 3 0 22258533 1 3 0 21415082 1 3 0 21068339 1 3 0 20569258 1 4 0 23297037 1 4 0 22987359 1 4 0 22540951 1 4 0 22247276 1 ......
5、写xml
public void getTopics(String title,String narrative) throws IOException{ Document document=DocumentHelper.createDocument(); OutputFormat format=OutputFormat.createPrettyPrint(); Element root=document.addElement("parameters"); Element topic=root.addElement("topic"); topic.setAttributeValue("id", "1"); // topic.setAttributeValue("id",1 ); Element titleElement=topic.addElement("title"); Element groupElement=topic.addElement("group"); Element narrativeElement=topic.addElement("narrative"); if(title!=null){ titleElement.setText(title); } if(narrative!=null){ narrativeElement.setText(narrative); } /*File file=new File("/home/zzj/test.topics"); file.mkdirs();*/ XMLWriter writer=new XMLWriter(new FileWriter(new File("/home/douban/test.xml")),format); writer.write(document); writer.flush(); writer.close(); }
test.xml格式
<?xml version="1.0" encoding="UTF-8"?> <parameters> <topic id="1"> <title>Give examples of next-generation sequencing applications in mutation screening?</title> <group/> <narrative>Give examples of next-generation sequencing applications in mutation screening?</narrative> </topic> </parameters>
6、写XML文件,如何在同一个节点下循环加入多个节点
public void WrtierTopics(ArrayList result) throws IOException { String body=null; Document doc=DocumentHelper.createDocument(); OutputFormat format=OutputFormat.createPrettyPrint(); Element root=doc.addElement("parameters"); for(int i=0;i<result.size();i++){ Pattern p0=Pattern.compile("id=(.*?)bioasq"); Matcher m0=p0.matcher(result.get(i).toString()); Pattern p1=Pattern.compile("body:\"(.*?)\""); Matcher m1=p1.matcher(result.get(i).toString()); if(m1.find()){ body=m1.group(1); } Element topic=root.addElement("topic"); topic.setAttributeValue("id", "1");//topic中的id值固定为1 Element titleElement=topic.addElement("title"); Element groupElement=topic.addElement("group"); Element narrativeElement=topic.addElement("narrative"); titleElement.setText(body); narrativeElement.setText(body); } XMLWriter writer=new XMLWriter(new FileWriter(new File("C:\\Users\\Administrator\\Desktop\\BioASQ_2013_TaskB\\BioASQ_phaseB.topic")),format); writer.write(doc); writer.flush(); writer.close(); }
BioASQ_PhaseB.topic格式:
<?xml version="1.0" encoding="UTF-8"?> <parameters> <topic id="1"> <title>How could we infer functional associations from gene fusion events?</title> <group/> <narrative>How could we infer functional associations from gene fusion events?</narrative> </topic> <topic id="1"> <title>Where is X-ray free electron laser used?</title> <group/> <narrative>Where is X-ray free electron laser used?</narrative> </topic> <topic id="1"> <title>Give examples of next-generation sequencing applications in mutation screening?</title> <group/> <narrative>Give examples of next-generation sequencing applications in mutation screening?</narrative> </topic> <topic id="1"> <title>What are the computational methods for the prediction of beta-barrel transmembrane proteins?</title> <group/> <narrative>What are the computational methods for the prediction of beta-barrel transmembrane proteins?</narrative> </topic> ...... </parameters>