C++：UTF-8、UTF-16、UTF-32之间的编码转换

开发语言：C++

功能描述：

Unicode内码转换器。用于UTF-8、UTF-16（UCS2）、UTF-32（UCS4）之间的编码转换。

下载地址：

UnicodeConverter.zip

版本历史：

V1.0 2010年03月12日

完成正式版本。

源代码：

UnicodeConverter.h

[cpp]view plain copy 
    
 /* ---------------------------------------------------------- 
 文件名称：UnicodeConverter.h 
  
 作者：秦建辉 
  
 MSN：[email protected] 
  
 当前版本：V1.0 
  
 历史版本： 
     V1.0    2010年03月12日 
             完成正式版本。 
  
 功能描述： 
     Unicode内码转换器。用于utf-8、utf-16（UCS2）、utf-32（UCS4）之间的编码转换 
  ------------------------------------------------------------ */  
 #pragma once  
   
 #include <windows.h>  
 #include <stdio.h>  
 #include <ostream>  
   
 using namespace std;  
   
 class CUnicodeConverter  
 {  
 /* ------------------------------------------------------------- 
                     内码转换 
    ------------------------------------------------------------- */  
 public:  
     /* 
     功能：将UCS4编码转换成UTF8编码 
     参数： 
         dwUCS4：要转换的UCS4编码 
         pbUTF8：用于存储转换后的UTF8编码。设为NULL，可以获取长度信息（字节数） 
     返回值： 
           0：无效的UCS4编码 
         1-6：UTF8编码的有效长度 
     */  
     static INT UCS4_To_UTF8( DWORD dwUCS4, BYTE* pbUTF8 );  
   
     /* 
     功能：将UTF8编码转换成UCS4编码 
     参数： 
         pbUTF8：要转换的UTF8编码 
         dwUCS4：存储转换后的UCS4编码 
     返回值： 
           0：参数错误或无效的UTF8编码 
         1-6：UTF8编码的有效长度 
     */  
     static INT UTF8_To_UCS4( const BYTE* pbUTF8, DWORD& dwUCS4 );  
   
     /* 
     功能：将UCS4编码转换成UTF16编码 
     参数： 
         dwUCS4：要转换的UCS4编码 
         pwUTF16：用于存储转换后的UTF16编码。设为NULL，可以获取长度信息（字符数） 
     返回值： 
         0：无效的UCS4编码 
         1：转换成1个UTF16编码 
         2：转换成2个UTF16编码 
     */  
     static INT UCS4_To_UTF16( DWORD dwUCS4, WORD* pwUTF16 );  
   
     /* 
     功能：将UTF16编码转换成UCS4编码 
     参数： 
         pwUTF16：需要转换的UTF16编码 
         dwUCS4：存储转换后的UCS4编码 
     返回值： 
         0：参数错误或无效的UTF16编码 
         1：1个UTF16编码被转换 
         2：2个UTF16编码被转换 
     */  
     static INT UTF16_To_UCS4( const WORD* pwUTF16, DWORD& dwUCS4 );  
   
     /* 
     功能：将UTF8字符串转换成UTF16字符串 
     参数： 
         pbszUTF8Str：需要转换的UTF8字符串 
         pwszUTF16Str：存储转换后的UTF16字符串。设为NULL，可以获取所需长度信息（字符数） 
     返回值： 
          0：转换失败 
         >0：UTF16字符串长度 
     */  
     static INT UTF8Str_To_UTF16Str( const BYTE* pbszUTF8Str, WORD* pwszUTF16Str );  
   
     /* 
     功能：将UTF16字符串转换成UTF8字符串 
     参数： 
         pwszUTF16Str：需要转换的UTF16字符串 
         pbszUTF8Str：存储转换后的UTF8字符串。设为NULL，可以获取所需长度信息（字节数） 
     返回值： 
          0：转换失败 
         >0：UTF8字符串长度（不包括NULL字符） 
     */  
     static INT UTF16Str_To_UTF8Str( const WORD* pwszUTF16Str, BYTE* pbszUTF8Str );  
   
 /* ------------------------------------------------------------- 
                     C文件写入操作 
    ------------------------------------------------------------- */  
 public:  
     /* 
     功能：向文件中写入UTF8编码 
     返回值： 
         写入的字节数 
     */  
     static UINT Print_UTF8_By_UCS4( FILE* out, DWORD dwUCS4 );  
   
     /* 
     功能：向文件中写入UTF16编码 
     返回值： 
         写入的字节数 
     */  
     static UINT Print_UTF16_By_UCS4( FILE* out, DWORD dwUCS4, BOOL isBigEndian = FALSE );  
   
     /* 
     功能：将UTF16字符串以UTF8编码输出到文件中 
     返回值： 
         写入的字节数 
     */  
     static UINT Print_UTF8Str_By_UTF16Str( FILE* out, const WORD* pwszUTF16Str );  
       
     /* 
     功能：将UTF8字符串以UTF16编码输出到文件中 
     返回值： 
         写入的字节数 
     */  
     static UINT Print_UTF16Str_By_UTF8Str( FILE* out, const BYTE* pbszUTF8Str, BOOL isBigEndian = FALSE );  
   
     /* 
     功能：向文件中输出UTF8编码字节序标记 
     返回值： 
         写入的字节数 
     */  
     static UINT Print_UTF8_BOM( FILE* out );  
   
     /* 
     功能：向文件中输出UTF16编码字节序标记 
     返回值： 
         写入的字节数 
     */  
     static UINT Print_UTF16_BOM( FILE* out, BOOL isBigEndian = FALSE );  
   
 /* ------------------------------------------------------------- 
                     C++流输出操作 
    ------------------------------------------------------------- */  
 public:  
     /* 
     功能：向流中写入UTF8编码 
     返回值： 
         写入的字节数 
     */  
     static UINT Print_UTF8_By_UCS4( ostream& os, DWORD dwUCS4 );  
   
     /* 
     功能：向流中写入UTF16编码 
     返回值： 
         写入的字节数 
     */  
     static UINT Print_UTF16_By_UCS4( ostream& os, DWORD dwUCS4, BOOL isBigEndian = FALSE );  
   
     /* 
     功能：将UTF16字符串以UTF8编码输出到流中 
     返回值： 
         写入的字节数 
     */  
     static UINT Print_UTF8Str_By_UTF16Str( ostream& os, const WORD* pwszUTF16Str );  
       
     /* 
     功能：将UTF8字符串以UTF16编码输出到流中 
     返回值： 
         写入的字节数 
     */  
     static UINT Print_UTF16Str_By_UTF8Str( ostream& os, const BYTE* pbszUTF8Str, BOOL isBigEndian = FALSE );  
   
     /* 
     功能：向流中输出UTF8编码字节序标记 
     返回值： 
         写入的字节数 
     */  
     static UINT Print_UTF8_BOM( ostream& os );  
   
     /* 
     功能：向流中输出UTF16编码字节序标记 
     返回值： 
         写入的字节数 
     */  
     static UINT Print_UTF16_BOM( ostream& os, BOOL isBigEndian = FALSE );  
 };  
   
 /* ------------------------------ 
                 END 
    ------------------------------ */  

UnicodeConverter.cpp

[cpp]view plain copy 
    
 #include "UnicodeConverter.h"  
   
 /* ------------------------------------------------------------- 
                     内码转换 
    ------------------------------------------------------------- */  
   
 // 转换UCS4编码到UTF8编码  
 INT CUnicodeConverter::UCS4_To_UTF8( DWORD dwUCS4, BYTE* pbUTF8 )  
 {  
     const BYTE  abPrefix[] = {0, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC};  
     const DWORD adwCodeUp[] = {  
         0x80,           // U+00000000 ～ U+0000007F  
         0x800,          // U+00000080 ～ U+000007FF  
         0x10000,        // U+00000800 ～ U+0000FFFF  
         0x200000,       // U+00010000 ～ U+001FFFFF  
         0x4000000,      // U+00200000 ～ U+03FFFFFF  
         0x80000000      // U+04000000 ～ U+7FFFFFFF  
     };  
   
     INT i, iLen;  
   
     // 根据UCS4编码范围确定对应的UTF-8编码字节数  
     iLen = sizeof(adwCodeUp) / sizeof(DWORD);  
     for( i = 0; i < iLen; i++ )  
     {  
         if( dwUCS4 < adwCodeUp[i] )  
         {  
             break;  
         }  
     }  
   
     if( i == iLen )return 0;    // 无效的UCS4编码  
           
     iLen = i + 1;   // UTF-8编码字节数  
     if( pbUTF8 != NULL )  
     {   // 转换为UTF-8编码  
         for( ; i > 0; i-- )  
         {  
             pbUTF8[i] = static_cast<BYTE>((dwUCS4 & 0x3F) | 0x80);  
             dwUCS4 >>= 6;  
         }  
   
         pbUTF8[0] = static_cast<BYTE>(dwUCS4 | abPrefix[iLen - 1]);  
     }  
   
     return iLen;  
 }  
   
 // 转换UTF8编码到UCS4编码  
 INT CUnicodeConverter::UTF8_To_UCS4( const BYTE* pbUTF8, DWORD& dwUCS4 )  
 {  
     INT     i, iLen;  
     BYTE    b;  
   
     if( pbUTF8 == NULL )  
     {   // 参数错误  
         return 0;  
     }  
   
     b = *pbUTF8++;  
     if( b < 0x80 )  
     {  
         dwUCS4 = b;  
         return 1;  
     }  
   
     if( b < 0xC0 || b > 0xFD )  
     {   // 非法UTF8  
         return 0;   
     }  
   
     if( b < 0xE0 )  
     {  
         dwUCS4 = b & 0x1F;  
         iLen = 2;  
     }  
     else if( b < 0xF0 )  
     {  
         dwUCS4 = b & 0x0F;  
         iLen = 3;  
     }  
     else if( b < 0xF8 )  
     {  
         dwUCS4 = b & 7;  
         iLen = 4;  
     }  
     else if( b < 0xFC )  
     {  
         dwUCS4 = b & 3;  
         iLen = 5;  
     }  
     else  
     {  
         dwUCS4 = b & 1;  
         iLen = 6;  
     }  
   
     for( i = 1; i < iLen; i++ )  
     {  
         b = *pbUTF8++;  
         if( b < 0x80 || b > 0xBF )  
         {   // 非法UTF8  
             break;  
         }  
   
         dwUCS4 = (dwUCS4 << 6) + (b & 0x3F);  
     }  
   
     if( i < iLen )  
     {   // 非法UTF8  
         return 0;  
     }  
     else  
     {  
         return iLen;  
     }  
 }  
   
 // 转换UCS4编码到UCS2编码  
 INT CUnicodeConverter::UCS4_To_UTF16( DWORD dwUCS4, WORD* pwUTF16 )  
 {  
     if( dwUCS4 <= 0xFFFF )  
     {  
         if( pwUTF16 != NULL )  
         {  
             *pwUTF16 = static_cast<WORD>(dwUCS4);  
         }  
   
         return 1;  
     }  
     else if( dwUCS4 <= 0xEFFFF )  
     {  
         if( pwUTF16 != NULL )  
         {  
             pwUTF16[0] = static_cast<WORD>( 0xD800 + (dwUCS4 >> 10) - 0x40 );   // 高10位  
             pwUTF16[1] = static_cast<WORD>( 0xDC00 + (dwUCS4 & 0x03FF) );     // 低10位  
         }  
   
         return 2;  
     }  
     else  
     {  
         return 0;  
     }  
 }  
   
 // 转换UCS2编码到UCS4编码  
 INT CUnicodeConverter::UTF16_To_UCS4( const WORD* pwUTF16, DWORD& dwUCS4 )  
 {  
     WORD    w1, w2;  
   
     if( pwUTF16 == NULL )  
     {   // 参数错误  
         return 0;  
     }  
   
     w1 = pwUTF16[0];  
     if( w1 >= 0xD800 && w1 <= 0xDFFF )  
     {   // 编码在替代区域（Surrogate Area）  
         if( w1 < 0xDC00 )  
         {  
             w2 = pwUTF16[1];  
             if( w2 >= 0xDC00 && w2 <= 0xDFFF )  
             {  
                 dwUCS4 = (w2 & 0x03FF) + (((w1 & 0x03FF) + 0x40) << 10);  
                 return 2;  
             }  
         }  
   
         return 0;   // 非法UTF16编码      
     }  
     else  
     {  
         dwUCS4 = w1;  
         return 1;  
     }  
 }  
   
 // 转换UTF8字符串到UTF16字符串  
 INT CUnicodeConverter::UTF8Str_To_UTF16Str( const BYTE* pbszUTF8Str, WORD* pwszUTF16Str )  
 {  
     INT     iNum, iLen;  
     DWORD   dwUCS4;  
   
     if( pbszUTF8Str == NULL )  
     {   // 参数错误  
         return 0;  
     }  
   
     iNum = 0;   // 统计有效字符个数  
     while( *pbszUTF8Str )  
     {   // UTF8编码转换为UCS4编码  
         iLen = UTF8_To_UCS4( pbszUTF8Str, dwUCS4 );  
         if( iLen == 0 )  
         {   // 非法的UTF8编码  
             return 0;  
         }  
   
         pbszUTF8Str += iLen;  
   
         // UCS4编码转换为UTF16编码  
         iLen = UCS4_To_UTF16( dwUCS4, pwszUTF16Str );  
         if( iLen == 0 )  
         {  
             return 0;  
         }  
   
         if( pwszUTF16Str != NULL )  
         {  
             pwszUTF16Str += iLen;  
         }  
           
         iNum += iLen;  
     }  
   
     if( pwszUTF16Str != NULL )  
     {  
         *pwszUTF16Str = 0;  // 写入字符串结束标记  
     }  
   
     return iNum;  
 }  
   
 // 转换UTF16字符串到UTF8字符串  
 INT CUnicodeConverter::UTF16Str_To_UTF8Str( const WORD* pwszUTF16Str, BYTE* pbszUTF8Str )  
 {  
     INT     iNum, iLen;  
     DWORD   dwUCS4;  
   
     if( pwszUTF16Str == NULL )  
     {   // 参数错误  
         return 0;  
     }  
   
     iNum = 0;  
     while( *pwszUTF16Str )  
     {   // UTF16编码转换为UCS4编码  
         iLen = UTF16_To_UCS4( pwszUTF16Str, dwUCS4 );  
         if( iLen == 0 )  
         {   // 非法的UTF16编码  
             return 0;     
         }  
           
         pwszUTF16Str += iLen;  
   
         // UCS4编码转换为UTF8编码  
         iLen = UCS4_To_UTF8( dwUCS4, pbszUTF8Str );  
         if( iLen == 0 )  
         {  
             return 0;  
         }  
   
         if( pbszUTF8Str != NULL )  
         {  
             pbszUTF8Str += iLen;  
         }  
           
         iNum += iLen;  
     }  
   
     if( pbszUTF8Str != NULL )  
     {  
         *pbszUTF8Str = 0;   // 写入字符串结束标记  
     }  
   
     return iNum;  
 }  
   
 /* ------------------------------------------------------------- 
                     C文件写入操作 
    ------------------------------------------------------------- */  
   
 // 向文件中输出UTF8编码  
 UINT CUnicodeConverter::Print_UTF8_By_UCS4( FILE* out, DWORD dwUCS4 )  
 {  
     INT     iLen;  
     BYTE    abUTF8[8];  
   
     if( out == NULL )  
     {  
         return 0;  
     }  
   
     iLen = UCS4_To_UTF8( dwUCS4, abUTF8 );  
     if( iLen == 0 )return 0;  
   
     fwrite( abUTF8, 1, iLen, out );  
   
     return iLen;  
 }  
   
 // 向文件中输出UTF16编码  
 UINT CUnicodeConverter::Print_UTF16_By_UCS4( FILE* out, DWORD dwUCS4, BOOL isBigEndian )  
 {  
     INT     i, iLen;  
     WORD    wCode, awUTF16[2];  
   
     if( out == NULL )  
     {  
         return 0;  
     }  
   
     iLen = UCS4_To_UTF16( dwUCS4, awUTF16 );  
     if( iLen == 0 )return 0;  
   
     for( i = 0; i < iLen; i++ )  
     {  
         wCode = awUTF16[i];  
         if( isBigEndian )  
         {  
             fputc( wCode >> 8, out ); // 输出高位  
             fputc( wCode & 0xFF, out ); // 输出低位  
         }  
         else  
         {  
             fputc( wCode & 0xFF, out ); // 输出低位  
             fputc( wCode >> 8, out ); // 输出高位  
         }  
     }  
   
     return (iLen << 1);  
 }  
   
 // 将UTF16字符串以UTF8编码输出到文件中  
 UINT CUnicodeConverter::Print_UTF8Str_By_UTF16Str( FILE* out, const WORD* pwszUTF16Str )  
 {  
     INT     iCount, iLen;  
     DWORD   dwUCS4;  
   
     if( (out == NULL) || (pwszUTF16Str == NULL) )  
     {  
         return 0;  
     }  
   
     iCount = 0;  
     while( *pwszUTF16Str )  
     {   // 将UTF16编码转换成UCS4编码  
         iLen = UTF16_To_UCS4( pwszUTF16Str, dwUCS4 );  
         if( iLen == 0 )  
         {  
             break;  
         }  
   
         pwszUTF16Str += iLen;  
   
         // 向文件中输出UTF8编码  
         iCount += Print_UTF8_By_UCS4( out, dwUCS4 );  
     }  
   
     return iCount;  // 输出的字节数  
 }  
   
 // 将UTF8字符串以UTF16编码输出到文件中  
 UINT CUnicodeConverter::Print_UTF16Str_By_UTF8Str( FILE* out, const BYTE* pbszUTF8Str, BOOL isBigEndian )  
 {  
     INT     iCount, iLen;  
     DWORD   dwUCS4;  
   
     if( (out == NULL) || (pbszUTF8Str == NULL) )  
     {  
         return 0;  
     }  
   
     iCount = 0;  
     while( *pbszUTF8Str )  
     {   // 将UTF16编码转换成UCS4编码  
         iLen = UTF8_To_UCS4( pbszUTF8Str, dwUCS4 );  
         if( iLen == 0 )  
         {  
             break;  
         }  
   
         pbszUTF8Str += iLen;  
   
         // 向文件中输出UTF8编码  
         iCount += Print_UTF16_By_UCS4( out, dwUCS4, isBigEndian );  
     }  
   
     return iCount;  // 输出的字节数  
 }  
   
 // 向文件中输出UTF8字节序标记  
 UINT CUnicodeConverter::Print_UTF8_BOM( FILE* out )  
 {  
     if( out == NULL )  
     {  
         return 0;  
     }  
   
     fputc( 0xEF, out );  
     fputc( 0xBB, out );  
     fputc( 0xBF, out );  
   
     return 3;  
 }  
   
 // 向文件中输出UTF16字节序标记  
 UINT CUnicodeConverter::Print_UTF16_BOM( FILE* out, BOOL isBigEndian )  
 {  
     if( out == NULL )  
     {  
         return 0;  
     }  
   
     if( isBigEndian )  
     {  
         fputc( 0xFE, out );  
         fputc( 0xFF, out );  
     }  
     else  
     {  
         fputc( 0xFF, out );  
         fputc( 0xFE, out );  
     }  
   
     return 2;  
 }  
   
 /* ------------------------------------------------------------- 
                     C++流输出操作 
    ------------------------------------------------------------- */  
   
 // 向流中输出UTF8编码  
 UINT CUnicodeConverter::Print_UTF8_By_UCS4( ostream& os, DWORD dwUCS4 )  
 {  
     INT     iLen;  
     BYTE    abUTF8[8];  
   
     if( !os )return 0;  
       
     iLen = UCS4_To_UTF8( dwUCS4, abUTF8 );  
     if( iLen == 0 )return 0;  
   
     os.write( reinterpret_cast<CHAR*>(abUTF8), iLen );  
   
     return iLen;      
 }  
   
 // 向流中输出UTF16编码  
 UINT CUnicodeConverter::Print_UTF16_By_UCS4( ostream& os, DWORD dwUCS4, BOOL isBigEndian )  
 {  
     INT     i, iLen;  
     WORD    wCode, awUTF16[2];  
   
     if( !os )return 0;  
       
     iLen = UCS4_To_UTF16( dwUCS4, awUTF16 );  
     if( iLen == 0 )return 0;  
   
     for( i = 0; i < iLen; i++ )  
     {  
         wCode = awUTF16[i];  
         if( isBigEndian )  
         {  
             os.put( wCode >> 8 );     // 输出高位  
             os.put( wCode & 0xFF );     // 输出低位  
         }  
         else  
         {  
             os.put( wCode & 0xFF );     // 输出低位  
             os.put( wCode >> 8 );     // 输出高位  
         }  
     }  
   
     return (iLen << 1);  
 }  
   
 // 将UTF16字符串以UTF8编码输出到流中  
 UINT CUnicodeConverter::Print_UTF8Str_By_UTF16Str( ostream& os, const WORD* pwszUTF16Str )  
 {  
     INT     iCount, iLen;  
     DWORD   dwUCS4;  
   
     if( !os || (pwszUTF16Str == NULL) )return 0;  
       
     iCount = 0;  
     while( *pwszUTF16Str )  
     {   // 将UTF16编码转换成UCS4编码  
         iLen = UTF16_To_UCS4( pwszUTF16Str, dwUCS4 );  
         if( iLen == 0 )  
         {  
             break;  
         }  
   
         pwszUTF16Str += iLen;  
   
         // 向流中输出UTF8编码  
         iCount += Print_UTF8_By_UCS4( os, dwUCS4 );  
     }  
   
     return iCount;  // 输出的字节数  
 }  
   
 // 将UTF8字符串以UTF16编码输出到流中  
 UINT CUnicodeConverter::Print_UTF16Str_By_UTF8Str( ostream& os, const BYTE* pbszUTF8Str, BOOL isBigEndian )  
 {  
     INT     iCount, iLen;  
     DWORD   dwUCS4;  
   
     if( !os || (pbszUTF8Str == NULL) )return 0;  
   
     iCount = 0;  
     while( *pbszUTF8Str )  
     {   // 将UTF16编码转换成UCS4编码  
         iLen = UTF8_To_UCS4( pbszUTF8Str, dwUCS4 );  
         if( iLen == 0 )  
         {  
             break;  
         }  
   
         pbszUTF8Str += iLen;  
   
         // 向流中输出UTF8编码  
         iCount += Print_UTF16_By_UCS4( os, dwUCS4, isBigEndian );  
     }  
   
     return iCount;  // 输出的字节数  
 }  
   
 // 向流中输出UTF8字节序标记  
 UINT CUnicodeConverter::Print_UTF8_BOM( ostream& os )  
 {  
     if( !os )return 0;  
       
     os.put( 0xEF );  
     os.put( 0xBB );  
     os.put( 0xBF );  
   
     return 3;     
 }  
   
 // 向流中输出UTF16字节序标记  
 UINT CUnicodeConverter::Print_UTF16_BOM( ostream& os, BOOL isBigEndian )  
 {  
     if( !os )return 0;  
       
     if( isBigEndian )  
     {  
         os.put( 0xFE );  
         os.put( 0xFF );  
     }  
     else  
     {  
         os.put( 0xFF );  
         os.put( 0xFE );  
     }  
   
     return 2;  
 }  
   
 /* ------------------------------ 
                 END 
    ------------------------------ */  

其他：

1，将UTF-8转换为
int Utf8ToAnsi(const char *pstrUTF8, char *pstrAnsi)
{
    int i = 0;
    int j = 0;
    char strUnicode[200] = {0};
    i = MultiByteToWideChar(CP_UTF8, 0, pstrUTF8, -1, NULL, 0);
    memset(strUnicode, 0, i);
    MultiByteToWideChar(CP_UTF8, 0, pstrUTF8, -1, strUnicode, i);
    j = WideCharToMultiByte(CP_ACP, 0, strUnicode, -1, NULL, 0, NULL, NULL);
    WideCharToMultiByte(CP_ACP, 0, strUnicode, -1, pstrAnsi, j, NULL, NULL);
    return 0;
}
2，其他的编码转换
qp::StringW Global::AnsiToUnicode(const char* buf)
{
    int len = ::MultiByteToWideChar(CP_ACP, 0, buf, -1, NULL, 0);
    if (len == 0) return L"";
    std::vector<wchar_t> unicode(len);
    ::MultiByteToWideChar(CP_ACP, 0, buf, -1, &unicode[0], len);
    return &unicode[0];
}
qp::StringA Global::UnicodeToAnsi(const wchar_t* buf)
{
    int len = ::WideCharToMultiByte(CP_ACP, 0, buf, -1, NULL, 0, NULL, NULL);
    if (len == 0) return "";
    std::vector<char> utf8(len);
    ::WideCharToMultiByte(CP_ACP, 0, buf, -1, &utf8[0], len, NULL, NULL);
    return &utf8[0];
}
qp::StringW Global::Utf8ToUnicode(const char* buf)
{
    int len = ::MultiByteToWideChar(CP_UTF8, 0, buf, -1, NULL, 0);
    if (len == 0) return L"";
    std::vector<wchar_t> unicode(len);
    ::MultiByteToWideChar(CP_UTF8, 0, buf, -1, &unicode[0], len);
    return &unicode[0];
}
qp::StringA Global::UnicodeToUtf8(const wchar_t* buf)
{
    int len = ::WideCharToMultiByte(CP_UTF8, 0, buf, -1, NULL, 0, NULL, NULL);
    if (len == 0) return "";
    std::vector<char> utf8(len);
    ::WideCharToMultiByte(CP_UTF8, 0, buf, -1, &utf8[0], len, NULL, NULL);
    return &utf8[0];
}
程序来自http://topic.csdn.net/u/20101008/12/895e5aaa-3db5-4c9f-8d06-ae7b04913132.html。

C++：UTF-8、UTF-16、UTF-32之间的编码转换

猜你喜欢