前几天做了个报名秒杀功能,用户需要输入:
姓,名,姓拼音,名拼音
中间使用到了pinyin4J, 记录一下。
- 导入pinyin4j-2.5.0.jar
- 创建 Pinyin4jUtil。以下是具体代码
package com.kewei.framework.common;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import com.alibaba.fastjson.JSON;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
public class Pinyin4jUtil {
/**
* 汉字转换位汉语拼音首字母,英文字符不变,特殊字符丢失 支持多音字,生成方式如(长沙市长:cssc,zssz,zssc,cssz)
*
* @param chines
* 汉字
* @return 拼音
*/
private static Map<String, List<String>> pinyinMap = new HashMap<String, List<String>>();
static{
//initPinyin("/config/duoyinzi_dic.txt");
}
public static String converterToFirstSpell(String chines) {
StringBuffer pinyinName = new StringBuffer();
char[] nameChar = chines.toCharArray();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
for (int i = 0; i < nameChar.length; i++) {
if (nameChar[i] > 128) {
try {
// 取得当前汉字的所有全拼
String[] strs = PinyinHelper.toHanyuPinyinStringArray(
nameChar[i], defaultFormat);
if (strs != null) {
for (int j = 0; j < strs.length; j++) {
// 取首字母
pinyinName.append(strs[j].charAt(0));
if (j != strs.length - 1) {
pinyinName.append(",");
}
}
}
// else {
// pinyinName.append(nameChar[i]);
// }
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
pinyinName.append(nameChar[i]);
}
pinyinName.append(" ");
}
// return pinyinName.toString();
return parseTheChineseByObject(discountTheChinese(pinyinName.toString()));
}
/**
* 汉字转换位汉语全拼,英文字符不变,特殊字符丢失
* 支持多音字,生成方式如(重当参:zhongdangcen,zhongdangcan,chongdangcen
* ,chongdangshen,zhongdangshen,chongdangcan)
*
* @param chines
* 汉字
* @return 拼音
*/
public static String converterToSpell(String chines) {
StringBuffer pinyinName = new StringBuffer();
char[] nameChar = chines.toCharArray();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
for (int i = 0; i < nameChar.length; i++) {
if (nameChar[i] > 128) {
try {
// 取得当前汉字的所有全拼
String[] strs = PinyinHelper.toHanyuPinyinStringArray(
nameChar[i], defaultFormat);
if (strs != null) {
for (int j = 0; j < strs.length; j++) {
pinyinName.append(strs[j]);
if (j != strs.length - 1) {
pinyinName.append(",");
}
}
}
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
pinyinName.append(nameChar[i]);
}
pinyinName.append(" ");
}
// return pinyinName.toString();
return parseTheChineseByObject(discountTheChinese(pinyinName.toString()));
}
/**
* 去除多音字重复数据
*
* @param theStr
* @return
*/
private static List<Map<String, Integer>> discountTheChinese(String theStr) {
// 去除重复拼音后的拼音列表
List<Map<String, Integer>> mapList = new ArrayList<Map<String, Integer>>();
// 用于处理每个字的多音字,去掉重复
Map<String, Integer> onlyOne = null;
String[] firsts = theStr.split(" ");
// 读出每个汉字的拼音
for (String str : firsts) {
onlyOne = new Hashtable<String, Integer>();
String[] china = str.split(",");
// 多音字处理
for (String s : china) {
Integer count = onlyOne.get(s);
if (count == null) {
onlyOne.put(s, new Integer(1));
} else {
onlyOne.remove(s);
count++;
onlyOne.put(s, count);
}
}
mapList.add(onlyOne);
}
return mapList;
}
/**
* 解析并组合拼音,对象合并方案(推荐使用)
*
* @return
*/
private static String parseTheChineseByObject(
List<Map<String, Integer>> list) {
Map<String, Integer> first = null; // 用于统计每一次,集合组合数据
// 遍历每一组集合
for (int i = 0; i < list.size(); i++) {
// 每一组集合与上一次组合的Map
Map<String, Integer> temp = new Hashtable<String, Integer>();
// 第一次循环,first为空
if (first != null) {
// 取出上次组合与此次集合的字符,并保存
for (String s : first.keySet()) {
for (String s1 : list.get(i).keySet()) {
String str = s + s1;
temp.put(str, 1);
}
}
// 清理上一次组合数据
if (temp != null && temp.size() > 0) {
first.clear();
}
} else {
for (String s : list.get(i).keySet()) {
String str = s;
temp.put(str, 1);
}
}
// 保存组合数据以便下次循环使用
if (temp != null && temp.size() > 0) {
first = temp;
}
}
String returnStr = "";
if (first != null) {
// 遍历取出组合字符串
for (String str : first.keySet()) {
returnStr += (str + ",");
}
}
if (returnStr.length() > 0) {
returnStr = returnStr.substring(0, returnStr.length() - 1);
}
return returnStr;
}
/**
* 将某个字符串的首字母 大写
* @param str
* @return
*/
public static String convertInitialToUpperCase(String str){
if(str==null){
return "";
}
StringBuffer sb = new StringBuffer();
char[] arr = str.toCharArray();
for(int i=0;i<arr.length;i++){
char ch = arr[i];
if(i==0){
sb.append(String.valueOf(ch).toUpperCase());
}else{
sb.append(ch);
}
}
return sb.toString();
}
/**
* 获取字符串的首字母
* @param str
* @return
*/
public static String getInitialChar(String str){
if(str==null){
return "";
}
StringBuffer sb = new StringBuffer();
char[] arr = str.toCharArray();
if(arr!=null && arr.length>0){
sb.append(String.valueOf(arr[0]));
}
return sb.toString();
}
/**
* 汉字转拼音 最大匹配优先
* @param chinese
* @return
*/
public static String convertChineseToPinyin(String chinese, boolean shortTerm) {
StringBuffer pinyin = new StringBuffer();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
char[] arr = chinese.toCharArray();
for (int i = 0; i < arr.length; i++) {
char ch = arr[i];
if (ch > 128) { // 非ASCII码
// 取得当前汉字的所有全拼
try {
String[] results = PinyinHelper.toHanyuPinyinStringArray(
ch, defaultFormat);
if (results == null) { //非中文
continue;
} else {
int len = results.length;
if (len == 1) { // 不是多音字
// pinyin.append(results[0]);
String py = results[0];
if(py.contains("u:")){ //过滤 u:
py = py.replace("u:", "v");
// System.out.println("filter u:"+py);
}
if(shortTerm){
pinyin.append(getInitialChar(py));
}else{
pinyin.append(convertInitialToUpperCase(py));
}
}else if(results[0].equals(results[1])){ //非多音字 有多个音,取第一个
// pinyin.append(results[0]);
if(shortTerm){
pinyin.append(getInitialChar(results[0]));
}else{
pinyin.append(convertInitialToUpperCase(results[0]));
}
}else { // 多音字
// System.out.println("多音字:"+ch);
int length = chinese.length();
boolean flag = false;
String s = null;
List<String> keyList =null;
for (int x = 0; x < len; x++) {
String py = results[x];
if(py.contains("u:")){ //过滤 u:
py = py.replace("u:", "v");
// System.out.println("filter u:"+py);
}
keyList = pinyinMap.get(py);
if (i + 3 <= length) { //后向匹配2个汉字 大西洋
s = chinese.substring(i, i + 3);
if (keyList != null && (keyList.contains(s))) {
// if (value != null && value.contains(s)) {
// System.out.println("last 2 > " + py);
// pinyin.append(results[x]);
if(shortTerm){
pinyin.append(getInitialChar(py));
}else{
pinyin.append(convertInitialToUpperCase(py));
}
flag = true;
break;
}
}
if (i + 2 <= length) { //后向匹配 1个汉字 大西
s = chinese.substring(i, i + 2);
if (keyList != null && (keyList.contains(s))) {
// System.out.println("last 1 > " + py);
// pinyin.append(results[x]);
if(shortTerm){
pinyin.append(getInitialChar(py));
}else{
pinyin.append(convertInitialToUpperCase(py));
}
flag = true;
break;
}
}
if ((i - 2 >= 0) && (i+1<=length)) { // 前向匹配2个汉字 龙固大
s = chinese.substring(i - 2, i+1);
if (keyList != null && (keyList.contains(s))) {
// System.out.println("before 2 < " + py);
// pinyin.append(results[x]);
if(shortTerm){
pinyin.append(getInitialChar(py));
}else{
pinyin.append(convertInitialToUpperCase(py));
}
flag = true;
break;
}
}
if ((i - 1 >= 0) && (i+1<=length)) { // 前向匹配1个汉字 固大
s = chinese.substring(i - 1, i+1);
if (keyList != null && (keyList.contains(s))) {
// System.out.println("before 1 < " + py);
// pinyin.append(results[x]);
if(shortTerm){
pinyin.append(getInitialChar(py));
}else{
pinyin.append(convertInitialToUpperCase(py));
}
flag = true;
break;
}
}
if ((i - 1 >= 0) && (i+2<=length)) { //前向1个,后向1个 固大西
s = chinese.substring(i - 1, i+2);
if (keyList != null && (keyList.contains(s))) {
// System.out.println("before last 1 <> " + py);
// pinyin.append(results[x]);
if(shortTerm){
pinyin.append(getInitialChar(py));
}else{
pinyin.append(convertInitialToUpperCase(py));
}
flag = true;
break;
}
}
}
if (!flag) { //都没有找到,匹配默认的 读音 大
s = String.valueOf(ch);
boolean found = false;
for (int x = 0; x < len; x++) {
String py = results[x];
if(py.contains("u:")){ //过滤 u:
py = py.replace("u:", "v");
// System.out.println("filter u:");
}
keyList = pinyinMap.get(py);
if (keyList != null && (keyList.contains(s))) {
// System.out.println("default = " + py);
// pinyin.append(results[x]); //如果不需要拼音首字母大写 ,直接返回即可
if(shortTerm){
pinyin.append(getInitialChar(py));
}else{
pinyin.append(convertInitialToUpperCase(py));
}
found = true;
break;
}
}
if(!found){
//取第一个音
if(results.length>0){
if(shortTerm){
pinyin.append(getInitialChar(results[0]));
}else{
pinyin.append(convertInitialToUpperCase(results[0]));
}
}
}
}
}
}
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
pinyin.append(arr[i]);
}
}
return pinyin.toString();
}
/**
* 初始化 所有的多音字词组
*
* @param fileName
*/
public static void initPinyin(String fileName) {
// 读取多音字的全部拼音表;
InputStream file = PinyinHelper.class.getResourceAsStream(fileName);
BufferedReader br = new BufferedReader(new InputStreamReader(file));
String s = null;
try {
while ((s = br.readLine()) != null) {
if (s != null) {
String[] arr = s.split("#");
String pinyin = arr[0];
String chinese = arr[1];
if(chinese!=null){
String[] strs = chinese.split(" ");
List<String> list = Arrays.asList(strs);
pinyinMap.put(pinyin, list);
}
}
}
} catch (IOException e) {
e.printStackTrace();
}finally{
try {
br.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
main方法测试
public static void main(String[] args){
String surName = "JIE";
String name = "QIAN";
System.out.println(Pinyin4jUtil.converterToSpell("解").toUpperCase().indexOf(surName));
System.out.println(Pinyin4jUtil.converterToSpell("茜").toUpperCase().indexOf(name));
//验证是否包含
String surName = stuExamInfo.getSurNamePy();
String name = stuExamInfo.getNamePy();
int pysurnameFlag = 0;
String py4jsurNameStr =
Pinyin4jUtil.converterToSpell(stuExamInfo.getSurName()).toUpperCase();
String[] py4jsurNameArr = py4jsurNameStr.split(",");
for (String py4jsurName : py4jsurNameArr) {
if(surName.equals(py4jsurName)){
pysurnameFlag = 1;
break;
}
}
int pynameFlag = 0;
String py4jNameStr =
Pinyin4jUtil.converterToSpell(stuExamInfo.getName()).toUpperCase();
String[] py4jNameArr = py4jNameStr.split(",");
for (String py4jName : py4jNameArr) {
if(name.equals(py4jName)){
pynameFlag = 1;
break;
}
}
}
最开始的时候没有考虑多音字,直接使用名转拼音字符串相等。后来修改成了上文方式。
将字的拼音组合和传入的拼音依次对比。当 pysurnameFlag 与pynameFlag都相等时,则输入正确
注意:在默认情况下 吕(LV) 使用工具类得到的拼音是LU
需要使用 defaultFormat.setVCharType(HanyuPinyinVCharType.WITH_V);进行单独处理。