中英文混合的字符串截取指定长度的字符

#include <string>
#include <algorithm>
#include <cctype>
#include <typeinfo>

/*
   ** \brief获取utf-8编码的字符串长度
   **
   */
   int getUtf8NameLen(const char* str)
   {
       static const unsigned char utf8_look_for_table[] =
       {
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
           4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
       };
#define UTFLEN(x) utf8_look_for_table[(x)]
       int clen = strlen(str);
       const char* ptrend = str + clen;
       const char* ptr = str;
       int num = 0;
       while (ptr + UTFLEN((unsigned char)*ptr) <= ptrend)
       {
           ptr += UTFLEN((unsigned char)*ptr);
           num++;
       }

return num;
}

int is_zh_ch(char p)
{

       /*汉字的两个字节的最高为都为1,这里采用判断最高位的方法
       将p字节进行移位运算，右移8位，这样，如果移位后是0，
       则说明原来的字节最高位为0，不是1那么也就不是汉字的一个字节
       */
       if (~(p >> 8) == 0)
       {
           return 1;//代表不是汉字
       }

return -1;
}

std::string sub(std::string str, int start, int end = -1)
{

       if (typeid(str) == typeid(std::string) && str.length() > 0)
       {
           int len = str.length();
           std::string tmp = "";
           //先把str里的汉字和英文分开
           std::vector <std::string> dump;
           int i = 0;
           while (i < len)
           {
               if (is_zh_ch(str.at(i)) == 1)
               {
                   dump.push_back(str.substr(i, 2));
                   i = i + 2;
               }
               else
               {
                   dump.push_back(str.substr(i, 1));
                   i = i + 1;
               }
           }

           end = end > 0 ? end : dump.size(); //end默认为dump.size
           if (start<0 || start>end)
               printf("start is wrong");
           //直接从dump里取即可
           for (i = start; i <= end; i++)
           {
               tmp += dump[i - 1];
           }

           return tmp;
       }
       else
       {
           printf("str is not string\n");
           return "";

}
}

   /*
   * \brief 截取固定长度的字符串
   * \brief str要截取的目标字符串
   * \brief len要截取的长度
   */
   std::string substr(std::string str, int len = 3)
   {
       int strlen = getUtf8NameLen(str.c_str());
       if (strlen <= len)
       {
           return str;
       }

return sub(str, 1, len);
}

中英文混合的字符串 截取指定长度的字符

猜你喜欢

中英文混合的字符串截取指定长度的字符