在需要对一个大文件的可排序内容进行排序时可能不能一次性加载到内存处理,那我们可以把文件分快加载到内存处理,每次加载一块到内存排好序保存为一个新的分块文件,当所有处理完后在合并,并采用合并排序算法,把多个有序序列合并为一个有序序列,合并时边读边写。
下面代码先生成一个内容为无序数字的文件,每行一个数字,然后采用上面说的方法排序
LargeDataSortTest.java
import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.PriorityQueue; import java.util.Random; public class LargeDataSortTest { static File file = new File("data.txt"); static File file1 = new File("dataSorted.txt"); public static void main(String[] args) throws Exception{ createDataFile(); System.out.println("大文件写入成功"); separateFile(); System.out.println("文件拆分成功"); allSingleFileSort(); System.out.println("小文件排序完成"); mergeFile(); System.out.println("所有排序都已完成"); } public static void createDataFile() throws IOException{ FileWriter fw = new FileWriter(file); BufferedWriter bw = new BufferedWriter(fw); Random random = new Random(); for(int i=0;i<1000000;i++){ bw.write(random.nextInt(Integer.MAX_VALUE)+"\r\n"); } bw.close(); fw.close(); } public static void separateFile() throws IOException{ FileReader fr = new FileReader(file); BufferedReader br = new BufferedReader(fr); FileWriter fw = null; BufferedWriter bw = null; List<FileWriter> fwList = new LinkedList<FileWriter>(); List<BufferedWriter> bwList = new LinkedList<BufferedWriter>(); for(int i=0;i<20;i++){ fw = new FileWriter("data"+i+".txt"); bw = new BufferedWriter(fw); //把对象放入集合 fwList.add(fw); bwList.add(bw); } while(br.ready()){ for(Iterator<BufferedWriter> iterator=bwList.iterator();iterator.hasNext();){ BufferedWriter it = iterator.next(); it.write(br.readLine()+"\r\n"); // continue;//第一个bw读完后让下一个读 然后写入小文件 } } br.close(); fr.close(); //遍历关闭所有子文件流 for (Iterator iterator = bwList.iterator(); iterator.hasNext();) { BufferedWriter it = (BufferedWriter) iterator.next(); it.close(); } for (Iterator iterator = fwList.iterator(); iterator.hasNext();) { FileWriter it = (FileWriter) iterator.next(); it.close(); } } //对每个小文件进行排序 public static void allSingleFileSort() throws Exception{ LinkedList<Integer> numbers ; for(int i=0;i<20;i++){ numbers = new LinkedList<Integer>(); String path = "data"+i+".txt"; FileReader fr = new FileReader(path); BufferedReader br = new BufferedReader(fr); while(br.ready()){ numbers.add(Integer.parseInt(br.readLine())); } Collections.sort(numbers); numbersWrite(numbers,path); br.close(); fr.close(); } } //将排好序的没个文件写回到小文件中 public static void numbersWrite(LinkedList<Integer> numbers,String path) throws IOException{ FileWriter fw = new FileWriter(path); BufferedWriter bw = new BufferedWriter(fw); for(Iterator<Integer> iterator=numbers.iterator();iterator.hasNext();){ Integer num = (Integer)iterator.next(); bw.write(num+"\r\n"); } bw.close(); fw.close(); } //再将所有小文件整合到一个大文件中 public static void mergeFile() throws Exception{ PriorityQueue<Obj> queue = new PriorityQueue<Obj>(20,new Obj()); FileReader fr = null; BufferedReader br = null; FileWriter fw = new FileWriter(file1); BufferedWriter bw = new BufferedWriter(fw); List<FileReader> frList = new LinkedList<FileReader>(); List<BufferedReader> brList = new LinkedList<BufferedReader>(); int n; //打来所有分块文件 for(int i=0;i<20;i++){ String path = "data"+i+".txt"; fr = new FileReader(path); br = new BufferedReader(fr); frList.add(fr); brList.add(br); } //把每个小文件的第一个数读入队列中 for(int i=0;i<=20;i++){ BufferedReader buffR; if(i==20){ System.out.println("开始合并"); while(queue.size()!=0){ Obj obj = queue.poll(); //PriorityQueue的poll从堆顶弹出元素,弹出是有序的,从小到大 bw.write(obj.a+"\r\n"); buffR = brList.get(obj.b); while(buffR.ready()&&queue.size()<20){ n = Integer.parseInt(buffR.readLine()); queue.add(new Obj(n,obj.b)); } } break; } buffR = brList.get(i); while(buffR.ready()&&queue.size()<20){ n = Integer.parseInt(buffR.readLine()); Obj obj = new Obj(n,i); queue.add(obj); break; } } bw.close(); fw.close(); //遍历关闭所有子文件流 for (Iterator iterator = brList.iterator(); iterator.hasNext();) { BufferedReader it = (BufferedReader) iterator.next(); it.close(); } for (Iterator iterator = frList.iterator(); iterator.hasNext();) { FileReader it = (FileReader) iterator.next(); it.close(); } } } class Obj implements Comparator<Obj>{ int a,b; Obj(){} Obj(int a,int b){ this.a =a; this.b=b; } public int compare(Obj o1, Obj o2) { return o1.a-o2.a; } }