Java汉语转拼音首字母并大写利用Pinyin4j(多音字识别)(中午括号识别)

        <dependency>
            <groupId>com.belerweb</groupId>
            <artifactId>pinyin4j</artifactId>
            <version>2.5.1</version>
        </dependency>

引入pinyin4j依赖

直接上Util类

/**
 * @author shangxichen
 * @date 2020-03-13
 */
public class PinyinUtil {
    private static Map<String, String> map = new HashMap<>();
    private static final Logger logger= LoggerFactory.getLogger(PinyinUtil.class);
    private static void initMap() {
        try (
                InputStream is = PinyinUtil.class.getClassLoader().getResourceAsStream("cn_en_punc.txt");
                BufferedReader reader = new BufferedReader(new InputStreamReader(is));
        ) {
            String keyword;
            while ((keyword = reader.readLine()) != null) {
                String[] temp = keyword.split(" ");
                map.put(temp[0], temp[1]);
            }
        } catch (IOException e) {
            logger.error("加载cn_en_punc.txt失败: " + e.getMessage());
        }
    }

    /**
     * 将汉字转成拼音
     * <p>
     * 取首字母或全拼
     *
     * @param chinese 汉字字符串
     * @return 拼音
     */
    private static String getUpperInitials(String chinese) {
        /***
         * ^[\u2E80-\u9FFF]+$ 匹配所有东亚区的语言
         * ^[\u4E00-\u9FFF]+$ 匹配简体和繁体
         * ^[\u4E00-\u9FA5]+$ 匹配简体
         */
        initMap();
        String regExp = "^[\u4E00-\u9FFF]+$";
        String regMarks="[\\u3002\\uff1b\\uff0c\\uff1a\\u201c\\u201d\\uff08\\uff09\\u3001\\uff1f\\u300a\\u300b]";

        StringBuffer sb = new StringBuffer();
        if (StringUtils.isBlank(chinese)) {
            return "";
        }
        String pinyin = "";
        for (int i = 0; i < chinese.length(); i++) {
            char unit = chinese.charAt(i);
            if (match(String.valueOf(unit), regExp)) {
                pinyin = convertSingleChinese2Pinyin(unit);
                sb.append(pinyin.toUpperCase().charAt(0));
            }else if(match(String.valueOf(unit),regMarks)) {
                String value=map.get(String.valueOf(unit));
                sb.append(StringUtils.isBlank(value) ? unit : value);
            } else {
                sb.append(unit);
            }
        }
        return sb.toString();
    }

    /**
     * 将单个汉字转成拼音
     *
     * @param chinese 汉字字符
     * @return 拼音
     */
    private static String convertSingleChinese2Pinyin(char chinese) {
        HanyuPinyinOutputFormat outputFormat = new HanyuPinyinOutputFormat();
        outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
        String[] res;
        StringBuffer sb = new StringBuffer();
        try {
            res = PinyinHelper.toHanyuPinyinStringArray(chinese, outputFormat);
            //对于多音字,只用第一个拼音
            sb.append(res[0]);
        } catch (Exception e) {
            e.printStackTrace();
            return "";
        }
        return sb.toString();
    }

    /***
     * 匹配
     * <P>
     * 根据字符和正则表达式进行匹配
     *
     * @param str 源字符串
     * @param regex 正则表达式
     *
     * @return true:匹配成功  false:匹配失败
     */
    private static boolean match(String str, String regex) {
        Pattern pattern = Pattern.compile(regex);
        Matcher matcher = pattern.matcher(str);
        return matcher.find();
    }

    /**
     * 测试方法
     */
    public static void main(String[] args) {
        System.out.println(getUpperInitials("音调 good ((0),,音。.乐 乐 调侃"));
    }
}
发布了18 篇原创文章 · 获赞 0 · 访问量 1992

猜你喜欢

转载自blog.csdn.net/rye1009/article/details/104849386