#include <string>
#include <algorithm>
#include <cctype>
#include <typeinfo>
/*
** \brief获取utf-8编码的字符串长度
**
*/
int getUtf8NameLen(const char* str)
{
static const unsigned char utf8_look_for_table[] =
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
};
#define UTFLEN(x) utf8_look_for_table[(x)]
int clen = strlen(str);
const char* ptrend = str + clen;
const char* ptr = str;
int num = 0;
while (ptr + UTFLEN((unsigned char)*ptr) <= ptrend)
{
ptr += UTFLEN((unsigned char)*ptr);
num++;
}
return num;
}
int is_zh_ch(char p)
{
/*汉字的两个字节的最高为都为1,这里采用判断最高位的方法
将p字节进行移位运算,右移8位,这样,如果移位后是0,
则说明原来的字节最高位为0,不是1那么也就不是汉字的一个字节
*/
if (~(p >> 8) == 0)
{
return 1;//代表不是汉字
}
return -1;
}
std::string sub(std::string str, int start, int end = -1)
{
if (typeid(str) == typeid(std::string) && str.length() > 0)
{
int len = str.length();
std::string tmp = "";
//先把str里的汉字和英文分开
std::vector <std::string> dump;
int i = 0;
while (i < len)
{
if (is_zh_ch(str.at(i)) == 1)
{
dump.push_back(str.substr(i, 2));
i = i + 2;
}
else
{
dump.push_back(str.substr(i, 1));
i = i + 1;
}
}
end = end > 0 ? end : dump.size(); //end默认为dump.size
if (start<0 || start>end)
printf("start is wrong");
//直接从dump里取即可
for (i = start; i <= end; i++)
{
tmp += dump[i - 1];
}
return tmp;
}
else
{
printf("str is not string\n");
return "";
}
}
/*
* \brief 截取固定长度的字符串
* \brief str要截取的目标字符串
* \brief len要截取的长度
*/
std::string substr(std::string str, int len = 3)
{
int strlen = getUtf8NameLen(str.c_str());
if (strlen <= len)
{
return str;
}
return sub(str, 1, len);
}