100万个数据里面找出100个最大的

参考博客:

https://blog.csdn.net/beiyetengqing/article/details/8011792

/*基于quicksort实现的原理如下:

1. 假设数组为 array[N] (N = 1 亿),首先利用quicksort的原理把array分成两个部分,左边部分比 array[N - 1] (array中的最后一个值,即pivot) 大, 右边部分比pivot 小。然后,可以得到 array[array.length - 1] (即 pivot) 在整个数组中的位置,假设是 k.
2. 如果 k 比 99 大,原数组变成了 array [0, ...  k - 1], 然后在数组里找前 100 最大值。 (继续递归)
3. 如果 k 比 99 小, 原数组变成了 array [k + 1, ..., N ], 然后在数组里找前 100 - (k + 1) 最大值。(继续递归)
4. 如果 k == 99, 那么数组的前 100 个值一定是最大的。(退出)*/


import java.util.Random;


public class TopHundred {
    public void tophundred(int[] array, int start, int end, int k) {
        
        int switchPointer = start;
        int pivot = array[end]; //array最后一个值作为pivot
        for (int i = start; i < end; i++) {
            if (array[i] >= pivot) {
                swap(array, switchPointer, i);
                switchPointer++;
            }
        }
        swap(array, end, switchPointer);//交换后,array左边的值比pivot大,右边的值比pivot小
        
        if (switchPointer < k - 1) {
            tophundred(array, switchPointer + 1, end, k - switchPointer - 1);//比pivot大的部分不够99个,所以从后面再找100-(左边的部分)
        } else if (switchPointer == k - 1) {
            return;
        } else {
            tophundred(array, 0, switchPointer - 1, k);
        }
    }
    
    public void swap(int[] array, int i, int j) {
        int temp = array[i];
        array[i] = array[j];
        array[j] = temp;        
    }
    
    public static void main(String[] args) {
        
        // the size of the array
        int number = 100000000;
        // the top k values
        int k = 100;
        // the range of the values in the array
        int range = 1000000001;
 
        //input for minHeap based method
        int[] array = new int[number];
        
        Random random = new Random();
        for (int i = 0; i < number; i++) {
            array[i] = random.nextInt(range);
        }
        
        TopHundred topHundred = new TopHundred();
        
        //start time
        long t1 = System.currentTimeMillis(); 
        topHundred.tophundred(array, 0, array.length - 1, k);
        //end time
        long t2 = System.currentTimeMillis(); 
        
        System.out.println("The total execution time " +
                "of quicksort based method is " + (t2 - t1) +" millisecond!");
        
        // print out the top k largest values in the top array
        System.out.println("The top "+ k + " largest values are:");
        for (int i = 0; i < k; i++) {
            System.out.println(array[i]);
        }
                
    }
}

Eclipse刚跑可能会爆内存,右击该class,选择RunAs->VmArgument->输入-Xmx800m,然后保存运行就可以了。

用堆来维护的做法:

/*
 * 建立大小为100的最小堆,然后遍历余下的所有数字,大于堆顶的数字放入堆中,不断进行维护
 * */


import java.util.Random;


public class findKelem {
    public static void main(String[] args) {
        // the size of the array
        int number = 100000000;
        // the top k values
        int k = 100;
        // the range of the values in the array
        int range = 1000000001;
 
        //input for minHeap based method
        int[] array = new int[number];
        
        Random random = new Random();
        for (int i = 0; i < number; i++) {
            array[i] = random.nextInt(range);
        }
        
        findKelem thh = new findKelem();
        
        long t1, t2;
        //start time
        t1 = System.currentTimeMillis(); 
        int[] top = thh.topHundred(array, k);
        
        //end time
        t2 = System.currentTimeMillis(); 
        System.out.println("The total execution time of " +
                "quicksort based method is " + (t2 - t1) +" millisecond!");
        
        // print out the top k largest values in the top array
        System.out.println("The top "+ k + "largest values are:");
        for (int i = 0; i < k; i++) {
            System.out.println(top[i]);
        }
    }
    
    public int[] topHundred(int[] array, int k) {
        // the heap with size k
        int[] top = new int[k];
        
        for (int i = 0; i < k; i++) {//默认前100个数据来完成堆的构造
            top[i] = array[i];
        }
        
        buildMinHeap(top);
        
        for (int i = k; i < array.length; i++) {
            if (top[0] < array[i]) {//大于最小堆的堆顶
                top[0] = array[i];
                minHeapify(top, 0, top.length);
            }
        }
        
        return top;
    }
    
    // create a min heap
    public void buildMinHeap(int[] array) {
        int heapSize = array.length;
        for (int i = array.length / 2 - 1; i >= 0; i--) {//从下向上调整
            minHeapify(array, i, heapSize);
        }
    }
    
     /// MinHeapify is to build the min heap from the 'position'最小堆,父节点比左右节点都小
    public void minHeapify(int[] array, int position, int heapSize)
    {
        int left = left(position);//得到当前子树的左子树的位置
        int right = right(position);
        int minPosition = position;
        
        if (left < heapSize && array[left] < array[position]) {//左子树比父节点小
            minPosition = left;
        }
        
        if (right < heapSize && array[right] < array[minPosition]) {//右子树比父节点小
            minPosition = right;
        }
        
        if (position != minPosition) {
            swap(array, position, minPosition);//完成父节点的值最小
            minHeapify(array, minPosition, heapSize);//继续调整,直到叶子节点
        }
    }
    
    public void swap(int[] array, int i, int j) {
        int temp = array[i];
        array[i] = array[j];
        array[j] = temp;        
    }
    
    /// return the left child position
    public int left(int i)
    {
        return 2 * i + 1;
    }
    /// return the right child position
    public int right(int i)
    {
        return 2 * i + 2;
    } 
}

猜你喜欢

转载自blog.csdn.net/cyanchen666/article/details/81940904