Java data structure and algorithm (7): search

Four commonly used search algorithms:

Sequential (linear) search
Binary search/binary search
Fibonacci Find

1. Linear search

Number sequence: {1,8,10,89,1000,1234}, to determine whether the name is included in the sequence (sequential search), requirement: if found, prompt to find it, and give the subscript value.
Code

package com.lele.search;

/**
 * author: hwl
 * date: 2020/10/20 21:36
 * version: 1.0.0
 * modified by:
 * description:
 */
public class SeqSearch {
    
    
    public static void main(String[] args) {
    
    
        int[] arr = {
    
    1,9,11,-1,34,89};// 没有顺序的数组
        int index = seqSearch(arr, 11);
        if (index == -1) {
    
    
            System.out.println("没有找到");
        } else {
    
    
            System.out.println("找到，下标为：" + index);
        }
    }

    /**
     * 找到一个满足条件的值，就返回
     * @param arr
     * @param value
     * @return
     */
    public static int seqSearch(int[] arr, int value) {
    
    
        // 线性查找是逐一比对，发现有相同值，就返回下标
        for (int i = 0; i < arr.length; i++) {
    
    
            if (arr[i] == value) {
    
    
                return i;
            }
        }
        return -1;
    }
}

2. Binary search

Please perform a binary search {1,8,10,89,1000,1234} on an ordered array, enter a number to see if the number exists in the array, and find the subscript. If there is no such number, it will prompt "there is no such number".
Idea
Insert picture description here
code implementation

package com.lele.search;

import java.util.ArrayList;
import java.util.List;

/**
 * author: hwl
 * date: 2020/10/21 21:08
 * version: 1.0.0
 * modified by:
 * description:
 */
public class BinarySearch {
    
    
    public static void main(String[] args) {
    
    
        int[] arr = {
    
    1,8,10,89,1000,1000,1000,1234};
//
//        int resIndex = binarySearch(arr, 0, arr.length - 1, 1000);
//        System.out.println("resIndex=" + resIndex);

        List<Integer> resIndexList = binarySearch2(arr, 0, arr.length - 1, 1000);
        System.out.println("resIndexList=" + resIndexList);
    }

    /**
     * 二分查找
     * @param arr  数组
     * @param left  左边的索引
     * @param right  右边的索引
     * @param findVal  要查找的值
     * @return  如果找到就返回下标，如果没有找到，就返回 -1
     */
    public static int binarySearch(int[] arr, int left, int right, int findVal) {
    
    
         // 当 left > right 时，说明递归整个数组，但没有找到
        if (left > right) {
    
    
            return -1;
        }
        int mid = (left + right) / 2;
        int midVal = arr[mid];

        if (findVal > midVal) {
    
      // 向右递归
            return binarySearch(arr, mid + 1, right, findVal);
        } else if (findVal < midVal) {
    
      // 向左递归
            return binarySearch(arr, left, mid - 1, findVal);
        } else {
    
    
            return mid;
        }
    }

    /**
     * 一个有序数组中有多个相同的数值，如何将所有的数值都查找到
     * 思路分析：
     * 1.在找到mid索引值，不要马上返回；
     * 2.向mid索引值的左边扫描，将所有满足1000的元素的下标，加入到集合 ArrayList
     * 3.向mid索引值得右边扫描，将所有满足 1000的元素的下标，加入到集合 ArrayList
     * 4.将ArrayList返回
     *
     * @param arr
     * @param left
     * @param right
     * @param findVal
     * @return
     */
    public static List<Integer> binarySearch2(int[] arr, int left, int right, int findVal) {
    
    
        if (left > right) {
    
    
            return new ArrayList<Integer>();
        }
        int mid = (left + right) / 2;
        int midVal = arr[mid];

        if (findVal > midVal) {
    
     // 向右递归
            return binarySearch2(arr, mid + 1, right, findVal);
        } else if (findVal < midVal) {
    
      // 向左递归
            return binarySearch2(arr, left, mid - 1, findVal);
        } else {
    
    

            List<Integer> resIndexList = new ArrayList<>();
            // 向mid索引值得左边扫描，将所有满足1000的元素下标，加入到集合ArrayList
            int temp = mid - 1;
            while(true) {
    
    
                if (temp < 0 || arr[temp] != findVal) {
    
    
                    break;
                }
                // 否则，就将temp放入到 resIndexList
                resIndexList.add(temp);
                temp--;
            }
            resIndexList.add(mid);

            temp = mid + 1;
            while(true) {
    
    
                if (temp > arr.length - 1 || arr[temp] != findVal) {
    
    
                    break;
                }
                resIndexList.add(temp);
                temp++;
            }
            return resIndexList;
        }
    }
}

3. Interpolation search

The interpolation search algorithm is similar to the binary search, the difference is that the interpolation search starts from the adaptive mid each time.

The formula for finding the mid index in the bifurcation search, low represents the index left, high represents the right index right, and the key is findVal.
Insert picture description here

Application case
Please interpolate and search an ordered array {1,8,10,89,1000,1234}, enter a number to see if the number exists in the array, and find the subscript. If not, it will prompt "There is no such number "

Code

package com.lele.search;

/**
 * author: hwl
 * date: 2020/10/23 7:30
 * version: 1.0.0
 * modified by:
 * description:
 */
public class InsertValueSearch {
    
    
    public static void main(String[] args) {
    
    
        int[] arr = new int[100];
        for (int i = 0; i < 100; i++) {
    
    
            arr[i] = i + 1;
        }

//        int arr[] = {1,8,10,89,1000,1000,1234};
        int index = insertValueSearch(arr,0, arr.length - 1, 34);
        System.out.println("index = " + index);
    }

    /**
     * 插值查找算法，也要求数组有序
     * @param arr  数组
     * @param left  左边索引
     * @param right 右边索引
     * @param findVal  查找值
     * @return  如果找到，就返回对应的下标，如果没有找到，就返回-1
     */
    public static int insertValueSearch(int[] arr, int left, int right, int findVal) {
    
    
        // findVal < arr[0] 和 findVal > arr[arr.length - 1],避免数组越界
        if (left > right || findVal < arr[0] || findVal > arr[arr.length - 1]) {
    
    
            return -1;
        }

        // 求出 mid，自适应
        int mid = left + (right - left) * (findVal - arr[left]) / (arr[right] - arr[left]);
        int midVal = arr[mid];
        if (findVal > midVal) {
    
    
            return insertValueSearch(arr, mid + 1, right, findVal);
        } else if (findVal < midVal) {
    
    
            return insertValueSearch(arr, left, mid - 1, findVal);
        } else {
    
    
            return mid;
        }
    }
}

Note:

For look-up tables with a large amount of data and relatively uniform keyword distribution, interpolation search is used, which is faster;
In the case of uneven distribution of keywords, this method is not necessarily better than binary search;

4. Fibonacci (Golden Section) search algorithm

The golden section refers to dividing a line segment into two parts so that the ratio of one part to the total length is equal to the ratio of the other part to this part. The approximate value of the first three digits is 0.618. Because the shape designed according to this ratio is very beautiful, it is called the golden ratio, also known as the ratio of China to foreign countries. This is a magic number that will bring unexpected results.

The Fibonacci sequence {1,1,2,3,5,813,21,34,55}, the ratio of two adjacent numbers in the sequence is infinitely close to the golden section value of 0.618.

The Fibonacci search principle is similar to the first two, only changing the position of the middle node (mid), mid is no longer in the middle or interpolated, but is located near the golden section point, that is, mid = low + F(k- 1)-1 (F stands for Fibonacci sequence), as shown in the figure below.
Insert picture description here
Understanding of F(k-1)-1:

With the property of Fibonacci sequence F[k] = F[k-1]+F[k-2], we can get (F(k)-1) = (F[k-1]-1) + ( F[k-2]-1) + 1. This formula explains: As long as the length of the sequence table is F[k]-1, the table can be divided into two sections with lengths F[k-1]-1 and F[k-2]-1, as shown in the figure above Show. Thus the middle position is mid = low + F(k-1)-1.
Similarly, each sub-segment can also be divided in the same way;
But the sequence length n is not necessarily equal to F[k]-1, so the original sequence table length n needs to be increased to F[k]-1. The value of k here only needs to make F[k]-1 exactly greater than or equal to n, which can be obtained by the following code. After the length of the sequence table increases, the newly added position (from n+1 to F[k]-1 position), All are assigned to the value of position n.

while(n > fib(k)-1) {
    
    
	k++;
}

Code

package com.lele.search;

import java.util.Arrays;

/**
 * author: hwl
 * date: 2020/10/24 11:13
 * version: 1.0.0
 * modified by:
 * description:
 */
public class FibonacciSearch {
    
    

    public static int maxSize = 20;
    public static void main(String[] args) {
    
    
        int[] arr = {
    
    1,8,10,89,1000,1234};

        System.out.println("index=" + fibSearch(arr, 1234));
    }

    /**
     * 因为后面我们 mid = low + F(k-1)-1,需要使用到斐波那契数列，因此需要先获取到一个斐波那契数列
     * 非递归方法得到一个斐波那契数列
     * @return
     */
    public static int[] fib() {
    
    
        int[] f = new int[maxSize];
        f[0] = 1;
        f[1] = 1;
        for (int i = 2; i < maxSize; i++) {
    
    
            f[i] = f[i - 1] + f[i - 2];
        }
        return f;
    }

    /**
     * 编写斐波那契查找算法
     * 使用非递归的方式编写算法
     * @param a 数组
     * @param key 需要查找的关键码（值）
     * @return 返回对应的下标，如果没有，则返回-1
     */
    public static int fibSearch(int[] a, int key) {
    
    
        int low = 0;
        int high = a.length - 1;
        int k = 0;// 表示斐波那契分割数值的下标
        int mid = 0;// 存放mid值
        int f[] = fib(); // 获取到斐波那契数列
        // 获取到斐波那契分割数值的下标
        while(high > f[k] - 1) {
    
    
            k++;
        }
        // 因为f[k]值 可能大于a的长度，因此我们需要使用Arrays类，构造一个新的数组，并指向temp[]
        // 不足的部分会使用0填充
        int[] temp = Arrays.copyOf(a, f[k]);
        // 实际上需求使用a数组最后的数填充temp
        // temp = {1,8,10,89,1000,1234,0,0} => {1,8,10,89,1000,1234,1234,1234}
        for (int i = high + 1; i < temp.length; i++) {
    
    
            temp[i] = a[high];
        }
        while (low <= high) {
    
    
            mid = low + f[k - 1] - 1;
            if (key < temp[mid]) {
    
     // 向左查找
                high = mid - 1;
                /**
                 * 说明：
                 * 1. 全部元素 = 前面的元素 + 后面元素
                 * 2. f[k] = f[k-1]+f[k-2]
                 * 因为 前面有 f[k-1]个元素，所以可以继续拆分 f[k-1] = f[k-2]+f[k-3], 即 在 f[k-1] 的前面继续查找 k--
                 * 即 下次循环 mid = f[k-1-1]-1
                 */
                k--;
            } else if (key > temp[mid]) {
    
     // 继续向右查找
                low = mid + 1;
                /**
                 * 说明
                 * 1.全部元素 = 前面的元素 + 后面的元素
                 * 2.f[k] = f[k-1]+f[k-2]
                 * 3.因为后面我们有 f[k-2] 所以可以继续拆分 f[k-1] = f[k-3]+f[k-4]
                 * 4.即 在 f[k-2] 的前面进行查找 k-=2
                 * 5.即下次循环 mid = f[k-1-2] - 1
                 */
                k -= 2;
            } else {
    
    
                if (mid <= high) {
    
    
                    return mid;
                } else {
    
    
                    return high;
                }
            }
        }
        return -1;
    }
}