Jsoncpp 中文汉字unicode乱码解决及标点问题解决

由于项目中用到了JSON数据结构,客户端采用VC编写,服务端数据接口采用PHP编写,遇到了JSON中文Unicode编码后,客户端出现了乱码的情形。

网上有一个方法比较好用,就是修改json_reader.cpp中的codePointToUTF8函数。

修改为以下:

static inline JSONCPP_STRING codePointToUTF8(unsigned int cp) {
  JSONCPP_STRING result;

  // based on description from http://en.wikipedia.org/wiki/UTF-8

  if (cp <= 0x7f) {
    result.resize(1);
    result[0] = static_cast<char>(cp);
  } else if (cp <= 0x7FF) {
    result.resize(2);
    result[1] = static_cast<char>(0x80 | (0x3f & cp));
    result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
  } else if (cp <= 0xFFFF) {
	  if ((cp >= 0x4E00 && cp <= 0x9FA5) || (cp >= 0xF900 && cp <= 0xFA2D))
    {
		wchar_t src[2] = { 0 };
		char dest[5] = { 0 };
		src[0] = static_cast<wchar_t>(cp);
		std::string curLocale = setlocale(LC_ALL, NULL);
		setlocale(LC_ALL, "chs");
		wcstombs_s(NULL, dest, 5, src, 2);
		result = dest;
		setlocale(LC_ALL, curLocale.c_str());
	}
	else
	{
		result.resize(3);
		result[2] = static_cast<char>(0x80 | (0x3f & cp));
		//result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
		//result[0] = static_cast<char>(0xE0 | (0xf & (cp >> 12)));
		result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));
		result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));
	}
  } else if (cp <= 0x10FFFF) {
    result.resize(4);
    result[3] = static_cast<char>(0x80 | (0x3f & cp));
    result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
    result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
    result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
  }

  return result;
}

经过以上修改,中文确定能显示了,但是中文的标点符号仍为乱码,于是查找了中文标点的UNicode编码,只要在上面处理中文汉字的地方,加入标点编码的处理即可。

句号    0x3002  。   
问号    0xFF1F  ?   
叹号       0xFF01  !   
逗号    0xFF0C  ,
顿号    0x3001  、   
分号    0xFF1B  ;   
冒号    0xFF1A  :   
引号    0x300C  「   
      0x300D  」   
引号    0x300E  『   
      0x300F  』   
引号    0x2018  ‘   
      0x2019  ’   
引号    0x201C  “   
      0x201D  ”   
括号    0xFF08  (   
      0xFF09  )   
括号    0x3014  〔   
      0x3015  〕   
括号    0x3010  【   
      0x3011  】   
破折号      0x2014  —   
省略号      0x2026  …   
连接号      0x2013  –   
间隔号      0xFF0E  .   
书名号      0x300A  《   
       0x300B  》   
书名号      0x3008  〈   
       0x3009  〉


最终修改后的函数为:


static inline JSONCPP_STRING codePointToUTF8(unsigned int cp) {
  JSONCPP_STRING result;

  // based on description from http://en.wikipedia.org/wiki/UTF-8

  if (cp <= 0x7f) {
    result.resize(1);
    result[0] = static_cast<char>(cp);
  } else if (cp <= 0x7FF) {
    result.resize(2);
    result[1] = static_cast<char>(0x80 | (0x3f & cp));
    result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
  } else if (cp <= 0xFFFF) {
	  if ((cp >= 0x4E00 && cp <= 0x9FA5) || (cp >= 0xF900 && cp <= 0xFA2D) || cp == 0x3002 || cp == 0xFF1F || cp == 0xFF01 || cp == 0xFF0C || cp == 0x3001 || cp == 0xFF1B || cp == 0xFF1A || cp == 0x300C || cp == 0x300D || cp == 0x300E || cp == 0x300F || cp == 0x2018 || cp == 0x2019 || cp == 0x201C || cp == 0x201D || cp == 0xFF08 || cp == 0xFF09 || cp == 0x3014 || cp == 0x3015 || cp == 0x3010 || cp == 0x3011 || cp == 0x2014 || cp == 0x2026 || cp == 0x2013 || cp == 0xFF0E || cp == 0x300A || cp == 0x300B || cp == 0x3008 || cp == 0x3009)
    {
		wchar_t src[2] = { 0 };
		char dest[5] = { 0 };
		src[0] = static_cast<wchar_t>(cp);
		std::string curLocale = setlocale(LC_ALL, NULL);
		setlocale(LC_ALL, "chs");
		wcstombs_s(NULL, dest, 5, src, 2);
		result = dest;
		setlocale(LC_ALL, curLocale.c_str());
	}
	else
	{
		result.resize(3);
		result[2] = static_cast<char>(0x80 | (0x3f & cp));
		//result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
		//result[0] = static_cast<char>(0xE0 | (0xf & (cp >> 12)));
		result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));
		result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));
	}
  } else if (cp <= 0x10FFFF) {
    result.resize(4);
    result[3] = static_cast<char>(0x80 | (0x3f & cp));
    result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
    result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
    result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
  }

  return result;
}


即可完美解决中文乱码问题。

猜你喜欢

转载自blog.csdn.net/paopaoqqcbg/article/details/72673993