public static void main(String[] args) {
String dispatchSite = "差不奥xxcnubis那里是身材好cU币a我吃完子额鹅湖撤回";
String str = subGB2312(dispatchSite, 30);
System.out.println(str);
str = sub(dispatchSite, 30);
System.out.println(str);
str = subStringFix(dispatchSite, 30);
System.out.println(str);
}
//二分查找 适合一切特殊字符(GB2312或GBK都可)
public static String subGB2312(String source, int target) {
String targetStr = "";
try {
int length = source.getBytes("GB2312").length;
if (length > target) {
int index = 0;
int left = 0;
int right = target;
if (target >= source.length()-1) {
//防止下标越界
right = source.length()-1;
}
//全英文 一个英文字母(不分大小写)和标点 是一个字节
String charStr = source.substring(0, right);
if (charStr.getBytes("GB2312").length == target) {
return charStr;
}
targetStr = source.substring(0, right / 2);
//中英混合
while (targetStr.getBytes("GB2312").length != target) {
index = left + (right - left) / 2;
targetStr = source.substring(0, index);//下标越界
if (targetStr.getBytes("GB2312").length < target) {
left = index;
System.out.println("left" + left);
} else if (targetStr.getBytes("GB2312").length > target) {
right = index;
System.out.println("right" + right);
}
/*else if (targetStr.getBytes("GB2312").length == target) {
//循环结束
System.out.println("index" + index);
}*/
if (left >= right || right - left == 1) {
System.out.println("right - left == 1");//小数问题 (left=18,right=19 会导致一直获取index=18)
index = right;
System.out.println("index" + index);
break;
}
}
if (targetStr.getBytes("GB2312").length > target) {
targetStr = source.substring(0, index - 1);
}
} else {
targetStr = source;
}
} catch (UnsupportedEncodingException e) {
return source;
}
return targetStr;
}
/**
* 如果字符串过大,每次减减(或加加),性能能太低
* @param s
* @param num
* @return
*/
public static String sub(String s, int num) {
int length = 0;
try {
length = s.getBytes("GB2312").length;
} catch (UnsupportedEncodingException e) {
return s;
}
if (length > num) {
s = s.substring(0, s.length() - 1);
s = sub(s, num);
}
return s;
}
/**
* 按照目标长度去截取含有中文的字符串<按照给定的编码格式>
* 最后一位会出现乱码(一个汉字被劈成了2半)
* Arrays.copyOfRange(sourceByte, 0, target - 2);有坑
* @param source
* @param target 需要截取的位置
* @return String
*/
public static String subStringFix(String source, int target){
try {
if (StringUtils.isBlank(source)) {
return null;
}
byte[] sourceByte = source.getBytes("GBK");
if (sourceByte.length > target) {
//target - 1 则汉字乱码(汉字被劈2半)所以target - 2
byte[] bytes = Arrays.copyOfRange(sourceByte, 0, target - 2);
//特殊字符会有问题(、,-_/等)可能会乱码
String str= new String(bytes, "GBK");
//追加一位(如果最后不是汉字,则上面多减了,所以追加 判断最后是不是汉字)
String result=str+source.substring(str.length(),str.length()+1);
byte[] resultBytes= result.getBytes("GBK");
if (resultBytes.length > target) {
return str;
}
return result;
} else {
return source;
}
} catch (Exception e) {
return source;
}
}
subGB2312(): poor xxcnubis where is a good figure cU coin
sub(): poor xxcnubis where is a good figure
cU coin subStringFix(): poor poor xxcnubis where is a good figure cU coin