How to intercept Chinese and English strings without garbled characters

public static void main(String[] args) {
    
    

        String dispatchSite = "差不奥xxcnubis那里是身材好cU币a我吃完子额鹅湖撤回";
        String str = subGB2312(dispatchSite, 30);
        System.out.println(str);

        str = sub(dispatchSite, 30);
        System.out.println(str);

        str = subStringFix(dispatchSite, 30);
        System.out.println(str);
    }


    //二分查找 适合一切特殊字符(GB2312或GBK都可)
    public static String subGB2312(String source, int target) {
    
    
        String targetStr = "";
        try {
    
    
            int length = source.getBytes("GB2312").length;
            if (length > target) {
    
    
                int index = 0;
                int left = 0;
                int right = target;
                if (target >= source.length()-1) {
    
    //防止下标越界
                    right = source.length()-1;
                }
                //全英文  一个英文字母(不分大小写)和标点 是一个字节
                String charStr = source.substring(0, right);
                if (charStr.getBytes("GB2312").length == target) {
    
    
                    return charStr;
                }
                targetStr = source.substring(0, right / 2);
                //中英混合
                while (targetStr.getBytes("GB2312").length != target) {
    
    
                    index = left + (right - left) / 2;
                    targetStr = source.substring(0, index);//下标越界

                    if (targetStr.getBytes("GB2312").length < target) {
    
    
                        left = index;
                        System.out.println("left" + left);
                    } else if (targetStr.getBytes("GB2312").length > target) {
    
    
                        right = index;
                        System.out.println("right" + right);
                    } 
                    /*else if (targetStr.getBytes("GB2312").length == target) {
                       //循环结束
                        System.out.println("index" + index);
                    }*/
                    if (left >= right || right - left == 1) {
    
    
                        System.out.println("right - left == 1");//小数问题 (left=18,right=19 会导致一直获取index=18)
                        index = right;
                        System.out.println("index" + index);
                        break;
                    }

                }
                if (targetStr.getBytes("GB2312").length > target) {
    
    
                    targetStr = source.substring(0, index - 1);
                }
            } else {
    
    
                targetStr = source;
            }
        } catch (UnsupportedEncodingException e) {
    
    
            return source;
        }
        return targetStr;
    }
    /**
     * 如果字符串过大,每次减减(或加加),性能能太低
     * @param s
     * @param num
     * @return
     */
    public static String sub(String s, int num) {
    
    
        int length = 0;
        try {
    
    
            length = s.getBytes("GB2312").length;
        } catch (UnsupportedEncodingException e) {
    
    
            return s;
        }
        if (length > num) {
    
    
            s = s.substring(0, s.length() - 1);
            s = sub(s, num);
        }
        return s;
    }

    /**
     * 按照目标长度去截取含有中文的字符串<按照给定的编码格式>
     * 最后一位会出现乱码(一个汉字被劈成了2半)
     * Arrays.copyOfRange(sourceByte, 0, target - 2);有坑
     * @param source
     * @param target 需要截取的位置
     * @return String
     */
    public static String subStringFix(String source, int target){
    
    
        try {
    
    
            if (StringUtils.isBlank(source)) {
    
    
                return null;
            }

            byte[] sourceByte = source.getBytes("GBK");
            if (sourceByte.length > target) {
    
    
                //target - 1 则汉字乱码(汉字被劈2半)所以target - 2
                byte[] bytes = Arrays.copyOfRange(sourceByte, 0, target - 2);
                //特殊字符会有问题(、,-_/等)可能会乱码
                String str= new String(bytes, "GBK");
                //追加一位(如果最后不是汉字,则上面多减了,所以追加 判断最后是不是汉字)
                String result=str+source.substring(str.length(),str.length()+1);
                byte[] resultBytes= result.getBytes("GBK");
                if (resultBytes.length > target) {
    
    
                    return str;
                }
                return result;
            } else {
    
    
                return source;
            }
        } catch (Exception e) {
    
    
            return source;
        }
    }
   

subGB2312(): poor xxcnubis where is a good figure cU coin
sub(): poor xxcnubis where is a good figure
cU coin subStringFix(): poor poor xxcnubis where is a good figure cU coin

Guess you like

Origin blog.csdn.net/eluanshi12/article/details/110235109