解决中文字符乱码问题，如何将gbk格式转换utf-8格式

中文乱码的原因是因为编码格式是gbk的，而显示的时候是utf-8格式的，所以需要将其utf-8格式，才能正常显示出来。

1.gbk生成utf8程序需要经过两次转化。

1.gbk->unicode

2.unicode->utf8

2.gbkuni30_gen.h是gbk转化的unicode的数组，只有unicode部分，通过另一个文件程序可生成。gbk是索引（此文件可在我的下载资源中找到）

源码如下：

#include "gbkuni30_gen.h"

int gbk_to_unicode(unsigned short int* unicode, const char* gbk, int len)

{

int i,j;

扫描二维码关注公众号，回复： 3582041 查看本文章

i = 0;

unsigned char* gb_temp = (unsigned char *)gbk;

for(j = 0; i < len; j++)

{

if (gb_temp[i] <= 0x80)

{

unicode[j] = gb_temp[i];

i++;

}

else

{

unsigned short int temp;

temp = (gb_temp[i] << 8) + gb_temp[i+1];

unicode[j] = gbkuni30[temp];

i += 2;

}

return j;

}

int enc_unicode_to_utf8_one(unsigned long unic, unsigned char *pOutput)

{

if ( unic <= 0x0000007F )

{

// * U-00000000 - U-0000007F: 0xxxxxxx

*pOutput = (unic & 0x7F);

return 1;

}

else if ( unic >= 0x00000080 && unic <= 0x000007FF )

{

// * U-00000080 - U-000007FF: 110xxxxx 10xxxxxx

*(pOutput+1) = (unic & 0x3F) | 0x80;

*pOutput = ((unic >> 6) & 0x1F) | 0xC0;

return 2;

}

else if ( unic >= 0x00000800 && unic <= 0x0000FFFF )

{

// * U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx

*(pOutput+2) = (unic & 0x3F) | 0x80;

*(pOutput+1) = ((unic >> 6) & 0x3F) | 0x80;

*pOutput = ((unic >> 12) & 0x0F) | 0xE0;

return 3;

}

else if ( unic >= 0x00010000 && unic <= 0x001FFFFF )

{

// * U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx

*(pOutput+3) = (unic & 0x3F) | 0x80;

*(pOutput+2) = ((unic >> 6) & 0x3F) | 0x80;

*(pOutput+1) = ((unic >> 12) & 0x3F) | 0x80;

*pOutput = ((unic >> 18) & 0x07) | 0xF0;

return 4;

}

else if ( unic >= 0x00200000 && unic <= 0x03FFFFFF )

{

// * U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx

*(pOutput+4) = (unic & 0x3F) | 0x80;

*(pOutput+3) = ((unic >> 6) & 0x3F) | 0x80;

*(pOutput+2) = ((unic >> 12) & 0x3F) | 0x80;

*(pOutput+1) = ((unic >> 18) & 0x3F) | 0x80;

*pOutput = ((unic >> 24) & 0x03) | 0xF8;

return 5;

}

else if ( unic >= 0x04000000 && unic <= 0x7FFFFFFF )

{

// * U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx

*(pOutput+5) = (unic & 0x3F) | 0x80;

*(pOutput+4) = ((unic >> 6) & 0x3F) | 0x80;

*(pOutput+3) = ((unic >> 12) & 0x3F) | 0x80;

*(pOutput+2) = ((unic >> 18) & 0x3F) | 0x80;

*(pOutput+1) = ((unic >> 24) & 0x3F) | 0x80;

*pOutput = ((unic >> 30) & 0x01) | 0xFC;

return 6;

}

return 0;

}

void bgk_to_utf8( const char* gbk, unsigned char *utf8)

{

int len = (int)strlen(gbk);

unsigned short unicode[len];

int reval=0;

int unicode_len = 0,i;

unicode_len = gbk_to_unicode(unicode,gbk, len);

char *pstr=utf8;

for(i=0;i<unicode_len;i++)

{

reval=enc_unicode_to_utf8_one(unicode[i], pstr) ;

pstr=pstr+reval;

}

*(++pstr)='\0';

}

解决中文字符乱码问题，如何将gbk格式转换utf-8格式

猜你喜欢