总结Java中的字符串匹配算法

学习Java几个月了。
做ACM是突然对字符串匹配感兴趣。
就总结以下Java字符串匹配的几种方法。
但是通过时间记录。结果不是很理想。
可能是字符串的长度有点短。
导致算法区别不是很大。
但是发现index of是比较快的。
下面有注释。

package one;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * 字符串匹配 算法
 * 
 * @author 轩xuan
 *
 */
public class OK {

	/**
	 * 母串中子串的匹配个数
	 * 
	 * @param args
	 */
	// 记录程序运行时间
	static long time_暴力 = 0;
	static long time_KMP = 0;
	static long time_JavaIndexOf = 0;
	static long time_正则表达式 = 0;
	static long time_split = 0;

	// 主方法
	public static void main(String[] args) {

		// 输入母串和子串
		String mother = "94189123456469584185165156415615616894156132513202195609818651864165165486741874948464184861564984864894616479278678675527737975634465615878646565845894175691556473152321418434161555648164616546165411415153151"
				+ "123456123456122311234457867123456867878675781056098156191234568121031544854507374682341456234589167664865567162341125824333115155416841561494185469484161864648641658461468468418642161581211132"
				+ "16156156151654156156454187496184156468123456931482309815609871950320464563644589345123498491123354911311123456121312368584861641849641684846849816541741749498491513161068489114";
		String son = "123456";

		// 返回匹配个数
		int ans_暴力 = Violence(mother, son);
		int ans_KMP = KMP(mother, son);
		int ans_JavaIndexOf = JavaIndexOf(mother, son);
		int ans_正则表达式 = Regular_expression(mother, son);
		int ans_split = JavaSplit(mother, son);

		// 打印匹配个数
		System.out.println("\t匹配算法\t\t匹配个数\t\t匹配时间(纳秒)");
		System.out.println("\t暴力查找:\t\t" + ans_暴力 + "\t\t" + time_暴力);
		System.out.println("\tKMP 算法:\t" + ans_KMP + "\t\t" + time_KMP);
		System.out.println("\t字符串IndexOf:\t" + ans_JavaIndexOf + "\t\t" + time_JavaIndexOf);
		System.out.println("\t正则表达式:\t" + ans_正则表达式 + "\t\t" + time_正则表达式);
		System.out.println("\t字符串Split:\t" + ans_split + "\t\t" + time_split);
	}

	// 1. 暴力匹配 复杂度:O(n*m)
	public static int Violence(String mother, String son) {
		int ans = 0;// 记录
		char[] str_mother = mother.toCharArray();
		char[] str_son = son.toCharArray();
		long begin = System.nanoTime();
		int i = 0, j = 0;// 两个索引
		while (i < str_mother.length) {
			if (str_mother[i] == str_son[j]) {
				i++;
				j++;
			} else {
				i -= j - 1;
				j = 0;
			}
			if (j == str_son.length) {
				ans++;
				i -= j - 1;
				j = 0;
			}
		}
		long end = System.nanoTime();
		time_暴力 = end - begin;
		return ans;
	}

	// 2. KMP
	public static int KMP(String mother, String son) {
		int ans = 0;
		long begin = System.nanoTime();
		int[] next = new int[mother.length() + 1];
		char[] str_mother = mother.toCharArray();
		char[] str_son = son.toCharArray();

		next[0] = 0;
		// 找next[]数组
		for (int i = 1, j = 0; i < str_mother.length; i++) {
			while (j > 0 && str_mother[i] != str_mother[j]) {
				j = next[j - 1];
			}
			if (str_mother[i] == str_mother[j]) {// 相同往后移动
				j++;
			}
			next[i] = j;
		}
		// 开始匹配
		for (int i = 0, j = 0; i < str_mother.length; i++) {
			while (j > 0 && str_mother[i] != str_son[j]) {
				j = next[j - 1];
			}
			if (str_mother[i] == str_son[j]) {
				j++;
			}
			if (j == str_son.length) {
				ans++;
				j = 0;
			}
		}
		long end = System.nanoTime();
		time_KMP = end - begin;
		return ans;
	}

	// 3. 利用Java自带的index of 匹配
	public static int JavaIndexOf(String mother, String son) {
		int ans = 0;
		long begin = System.nanoTime();
		int i = 0;
		while ((i = mother.indexOf(son, i)) != -1) {
			i += son.length();
			ans++;
		}
		long end = System.nanoTime();
		time_JavaIndexOf = end - begin;
		return ans;
	}

	// 4. 正则表达式匹配
	public static int Regular_expression(String mother, String son) {
		int ans = 0;
		long begin = System.nanoTime();
		Pattern p = Pattern.compile(son);
		Matcher m = p.matcher(mother);
		while (m.find()) {
			ans++;
		}
		long end = System.nanoTime();
		time_正则表达式 = end - begin;
		return ans;
	}

	// 5.Java字符串的split方法
	public static int JavaSplit(String mother, String son) {
		int ans = 0;
		long begin = System.nanoTime();
		//返回匹配到的下标数组
		ans = mother.split(son).length - 1;
		long end = System.nanoTime();
		time_split = end - begin;
		return ans;
	}
}

猜你喜欢

转载自blog.csdn.net/qq_44009311/article/details/101453576