由于项目中用到了JSON数据结构,客户端采用VC编写,服务端数据接口采用PHP编写,遇到了JSON中文Unicode编码后,客户端出现了乱码的情形。
网上有一个方法比较好用,就是修改json_reader.cpp中的codePointToUTF8函数。
修改为以下:
static inline JSONCPP_STRING codePointToUTF8(unsigned int cp) { JSONCPP_STRING result; // based on description from http://en.wikipedia.org/wiki/UTF-8 if (cp <= 0x7f) { result.resize(1); result[0] = static_cast<char>(cp); } else if (cp <= 0x7FF) { result.resize(2); result[1] = static_cast<char>(0x80 | (0x3f & cp)); result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6))); } else if (cp <= 0xFFFF) { if ((cp >= 0x4E00 && cp <= 0x9FA5) || (cp >= 0xF900 && cp <= 0xFA2D)) { wchar_t src[2] = { 0 }; char dest[5] = { 0 }; src[0] = static_cast<wchar_t>(cp); std::string curLocale = setlocale(LC_ALL, NULL); setlocale(LC_ALL, "chs"); wcstombs_s(NULL, dest, 5, src, 2); result = dest; setlocale(LC_ALL, curLocale.c_str()); } else { result.resize(3); result[2] = static_cast<char>(0x80 | (0x3f & cp)); //result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 6))); //result[0] = static_cast<char>(0xE0 | (0xf & (cp >> 12))); result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6))); result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12))); } } else if (cp <= 0x10FFFF) { result.resize(4); result[3] = static_cast<char>(0x80 | (0x3f & cp)); result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6))); result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12))); result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18))); } return result; }
经过以上修改,中文确定能显示了,但是中文的标点符号仍为乱码,于是查找了中文标点的UNicode编码,只要在上面处理中文汉字的地方,加入标点编码的处理即可。
句号 0x3002 。
问号 0xFF1F ?
叹号 0xFF01 !
逗号 0xFF0C ,
顿号 0x3001 、
分号 0xFF1B ;
冒号 0xFF1A :
引号 0x300C 「
0x300D 」
引号 0x300E 『
0x300F 』
引号 0x2018 ‘
0x2019 ’
引号 0x201C “
0x201D ”
括号 0xFF08 (
0xFF09 )
括号 0x3014 〔
0x3015 〕
括号 0x3010 【
0x3011 】
破折号 0x2014 —
省略号 0x2026 …
连接号 0x2013 –
间隔号 0xFF0E .
书名号 0x300A 《
0x300B 》
书名号 0x3008 〈
0x3009 〉
最终修改后的函数为:
static inline JSONCPP_STRING codePointToUTF8(unsigned int cp) { JSONCPP_STRING result; // based on description from http://en.wikipedia.org/wiki/UTF-8 if (cp <= 0x7f) { result.resize(1); result[0] = static_cast<char>(cp); } else if (cp <= 0x7FF) { result.resize(2); result[1] = static_cast<char>(0x80 | (0x3f & cp)); result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6))); } else if (cp <= 0xFFFF) { if ((cp >= 0x4E00 && cp <= 0x9FA5) || (cp >= 0xF900 && cp <= 0xFA2D) || cp == 0x3002 || cp == 0xFF1F || cp == 0xFF01 || cp == 0xFF0C || cp == 0x3001 || cp == 0xFF1B || cp == 0xFF1A || cp == 0x300C || cp == 0x300D || cp == 0x300E || cp == 0x300F || cp == 0x2018 || cp == 0x2019 || cp == 0x201C || cp == 0x201D || cp == 0xFF08 || cp == 0xFF09 || cp == 0x3014 || cp == 0x3015 || cp == 0x3010 || cp == 0x3011 || cp == 0x2014 || cp == 0x2026 || cp == 0x2013 || cp == 0xFF0E || cp == 0x300A || cp == 0x300B || cp == 0x3008 || cp == 0x3009) { wchar_t src[2] = { 0 }; char dest[5] = { 0 }; src[0] = static_cast<wchar_t>(cp); std::string curLocale = setlocale(LC_ALL, NULL); setlocale(LC_ALL, "chs"); wcstombs_s(NULL, dest, 5, src, 2); result = dest; setlocale(LC_ALL, curLocale.c_str()); } else { result.resize(3); result[2] = static_cast<char>(0x80 | (0x3f & cp)); //result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 6))); //result[0] = static_cast<char>(0xE0 | (0xf & (cp >> 12))); result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6))); result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12))); } } else if (cp <= 0x10FFFF) { result.resize(4); result[3] = static_cast<char>(0x80 | (0x3f & cp)); result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6))); result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12))); result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18))); } return result; }
即可完美解决中文乱码问题。