VC++关于UNICODE版本的开发

关于UNICODE版本的开发

代码转换方案
1. 概述

在VC6.0中，相应的有一些宏来代替ANSI的函数、宏或数据类型，这些宏在ANSI编译条件中处理字符串为单字节，而在UNICODE中处理字符串为双字节，请在编写程序中请使用这些宏。

数据类型

表-1.1中黑体字部分为替代数据类型的宏。

Generic-Text Data Type Mappings

Generic-Text Data Type Name	SBCS (_UNICODE, _MBCS Not Defined)	_MBCS Defined	_UNICODE Defined
_TCHAR	Char	char	wchar_t
_TINT	Int	int	wint_t
_TSCHAR	signed char	signed char	wchar_t
_TUCHAR	unsigned char	unsigned char	wchar_t
_TXCHAR	Char	unsigned char	wchar_t
_T or _TEXT	No effect (removed by preprocessor)	No effect (removed by preprocessor)	L (converts following character or string to its Unicode counterpart)

表-1.1

函数

表-1.2中黑体字部分为替代函数的宏

Generic-Text Routine Mappings

Generic-Text Routine Name	SBCS (_UNICODE & MBCS Not Defined)	_MBCS Defined	_UNICODE Defined
_fgettc	fgetc	fgetc	fgetwc
_fgettchar	fgetchar	fgetchar	_fgetwchar
_fgetts	fgets	fgets	fgetws
_fputtc	fputc	fputc	fputwc
_fputtchar	fputchar	fputchar	_fputwchar
_fputts	fputs	fputs	fputws
_ftprintf	fprintf	fprintf	fwprintf
_ftscanf	fscanf	fscanf	fwscanf
_gettc	getc	getc	getwc
_gettchar	getchar	getchar	getwchar
_getts	gets	gets	getws
_istalnum	isalnum	_ismbcalnum	iswalnum
_istalpha	isalpha	_ismbcalpha	iswalpha
_istascii	__isascii	__isascii	iswascii
_istcntrl	iscntrl	iscntrl	iswcntrl
_istdigit	isdigit	_ismbcdigit	iswdigit
_istgraph	isgraph	_ismbcgraph	iswgraph
_istlead	Always returns false	_ismbblead	Always returns false
_istleadbyte	Always returns false	isleadbyte	Always returns false
_istlegal	Always returns true	_ismbclegal	Always returns true
_istlower	islower	_ismbclower	iswlower
_istprint	isprint	_ismbcprint	iswprint
_istpunct	ispunct	_ismbcpunct	iswpunct
_istspace	isspace	_ismbcspace	iswspace
_istupper	isupper	_ismbcupper	iswupper
_istxdigit	isxdigit	isxdigit	iswxdigit
_itot	_itoa	_itoa	_itow
_ltot	_ltoa	_ltoa	_ltow
_puttc	putc	putc	putwc
_puttchar	putchar	putchar	putwchar
_putts	puts	puts	putws
_tmain	main	main	wmain
_sntprintf	_snprintf	_snprintf	_snwprintf
_stprintf	sprintf	sprintf	swprintf
_stscanf	sscanf	sscanf	swscanf
_taccess	_access	_access	_waccess
_tasctime	asctime	asctime	_wasctime
_tccpy	Maps to macro or inline function	_mbccpy	Maps to macro or inline function
_tchdir	_chdir	_chdir	_wchdir
_tclen	Maps to macro or inline function	_mbclen	Maps to macro or inline function
_tchmod	_chmod	_chmod	_wchmod
_tcreat	_creat	_creat	_wcreat
_tcscat	strcat	_mbscat	wcscat
_tcschr	strchr	_mbschr	wcschr
_tcsclen	strlen	_mbslen	wcslen
_tcscmp	strcmp	_mbscmp	wcscmp
_tcscoll	strcoll	_mbscoll	wcscoll
_tcscpy	strcpy	_mbscpy	wcscpy
_tcscspn	strcspn	_mbscspn	wcscspn
_tcsdec	_strdec	_mbsdec	_wcsdec
_tcsdup	_strdup	_mbsdup	_wcsdup
_tcsftime	strftime	strftime	wcsftime
_tcsicmp	_stricmp	_mbsicmp	_wcsicmp
_tcsicoll	_stricoll	_stricoll	_wcsicoll
_tcsinc	_strinc	_mbsinc	_wcsinc
_tcslen	strlen	strlen	wcslen
_tcslwr	_strlwr	_mbslwr	_wcslwr
_tcsnbcnt	_strncnt	_mbsnbcnt	_wcnscnt
_tcsncat	strncat	_mbsnbcat	wcsncat
_tcsnccat	strncat	_mbsncat	wcsncat
_tcsncmp	strncmp	_mbsnbcmp	wcsncmp
_tcsnccmp	strncmp	_mbsncmp	wcsncmp
_tcsnccnt	_strncnt	_mbsnccnt	_wcsncnt
_tcsnccpy	strncpy	_mbsncpy	wcsncpy
_tcsncicmp	_strnicmp	_mbsnicmp	_wcsnicmp
_tcsncpy	strncpy	_mbsnbcpy	wcsncpy
_tcsncset	_strnset	_mbsnset	_wcsnset
_tcsnextc	_strnextc	_mbsnextc	_wcsnextc
_tcsnicmp	_strnicmp	_mbsnicmp	_wcsnicmp
_tcsnicoll	_strnicoll	_strnicoll	_wcsnicoll
_tcsninc	_strninc	_mbsninc	_wcsninc
_tcsnccnt	_strncnt	_mbsnccnt	_wcsncnt
_tcsnset	_strnset	_mbsnbset	_wcsnset
_tcspbrk	strpbrk	_mbspbrk	wcspbrk
_tcsspnp	_strspnp	_mbsspnp	_wcsspnp
_tcsrchr	strrchr	_mbsrchr	wcsrchr
_tcsrev	_strrev	_mbsrev	_wcsrev
_tcsset	_strset	_mbsset	_wcsset
_tcsspn	strspn	_mbsspn	wcsspn
_tcsstr	strstr	_mbsstr	wcsstr
_tcstod	strtod	strtod	wcstod
_tcstok	strtok	_mbstok	wcstok
_tcstol	strtol	strtol	wcstol
_tcstoul	strtoul	strtoul	wcstoul
_tcsupr	_strupr	_mbsupr	_wcsupr
_tcsxfrm	strxfrm	strxfrm	wcsxfrm
_tctime	ctime	ctime	_wctime
_texecl	_execl	_execl	_wexecl
_texecle	_execle	_execle	_wexecle
_texeclp	_execlp	_execlp	_wexeclp
_texeclpe	_execlpe	_execlpe	_wexeclpe
_texecv	_execv	_execv	_wexecv
_texecve	_execve	_execve	_wexecve
_texecvp	_execvp	_execvp	_wexecvp
_texecvpe	_execvpe	_execvpe	_wexecvpe
_tfdopen	_fdopen	_fdopen	_wfdopen
_tfindfirst	_findfirst	_findfirst	_wfindfirst
_tfindnext	_findnext	_findnext	_wfindnext
_tfopen	fopen	fopen	_wfopen
_tfreopen	freopen	freopen	_wfreopen
_tfsopen	_fsopen	_fsopen	_wfsopen
_tfullpath	_fullpath	_fullpath	_wfullpath
_tgetcwd	_getcwd	_getcwd	_wgetcwd
_tgetenv	getenv	getenv	_wgetenv
_tmain	main	main	wmain
_tmakepath	_makepath	_makepath	_wmakepath
_tmkdir	_mkdir	_mkdir	_wmkdir
_tmktemp	_mktemp	_mktemp	_wmktemp
_tperror	perror	perror	_wperror
_topen	_open	_open	_wopen
_totlower	tolower	_mbctolower	towlower
_totupper	toupper	_mbctoupper	towupper
_tpopen	_popen	_popen	_wpopen
_tprintf	printf	printf	wprintf
_tremove	remove	remove	_wremove
_trename	rename	rename	_wrename
_trmdir	_rmdir	_rmdir	_wrmdir
_tsearchenv	_searchenv	_searchenv	_wsearchenv
_tscanf	scanf	scanf	wscanf
_tsetlocale	setlocale	setlocale	_wsetlocale
_tsopen	_sopen	_sopen	_wsopen
_tspawnl	_spawnl	_spawnl	_wspawnl
_tspawnle	_spawnle	_spawnle	_wspawnle
_tspawnlp	_spawnlp	_spawnlp	_wspawnlp
_tspawnlpe	_spawnlpe	_spawnlpe	_wspawnlpe
_tspawnv	_spawnv	_spawnv	_wspawnv
_tspawnve	_spawnve	_spawnve	_wspawnve
_tspawnvp	_spawnvp	_spawnvp	_tspawnvp
_tspawnvpe	_spawnvpe	_spawnvpe	_tspawnvpe
_tsplitpath	_splitpath	_splitpath	_wsplitpath
_tstat	_stat	_stat	_wstat
_tstrdate	_strdate	_strdate	_wstrdate
_tstrtime	_strtime	_strtime	_wstrtime
_tsystem	system	system	_wsystem
_ttempnam	_tempnam	_tempnam	_wtempnam
_ttmpnam	tmpnam	tmpnam	_wtmpnam
_ttoi	atoi	atoi	_wtoi
_ttol	atol	atol	_wtol
_tutime	_utime	_utime	_wutime
_tWinMain	WinMain	WinMain	wWinMain
_ultot	_ultoa	_ultoa	_ultow
_ungettc	ungetc	ungetc	ungetwc
_vftprintf	vfprintf	vfprintf	vfwprintf
_vsntprintf	_vsnprintf	_vsnprintf	_vsnwprintf
_vstprintf	vsprintf	vsprintf	vswprintf
_vtprintf	vprintf	vprintf	vwprintf

表1.2

表1.4中黑体字部分为替代宏的宏

Generic-Text Marco Mappings

Generic-Text – Object Name	SBCS (_UNICODE, _MBCS Not Defined)	_MBCS Defined	_UNICODE Defined
LPCTSTR	LPCSTR(CONST CHAR*)	LPCSTR(CONST CHAR*)	CONST WCHAR(const unsigned short)
LPTSTR	LPSTR(CHAR*)	LPSTR(CHAR*)	WCHAR(unsigned short)

表1.3

常量和全局变量

表1.4中黑体字部分为替代常量和全局变量的宏

Generic-Text Constant and Global Variable Mappings

Generic-Text – Object Name	SBCS (_UNICODE, _MBCS Not Defined)	_MBCS Defined	_UNICODE Defined
_TEOF	EOF	EOF	WEOF
_tenviron	_environ	_environ	_wenviron
_tfinddata_t	_finddata_t	_finddata_t	_wfinddata_t

表-1.4

不能转换的函数

atof替换方案

创建函数atof

#ifdef _UNICODE

double my_atof( LPCTSTR string )

{

assert( string!= NULL );

//double f3;

//_stscanf(string, _T("%lf"), &f3) ;//double

float f;

_stscanf(string, _T("%f"), &f) ;//float

return f;

}

#else

double my_atof( LPCTSTR string )

{

assert( string != NULL );

return atof(string);

}

#endif

fcvt替换方案

创建函数my_fcvt，如下：（注：FLT_EPSILON在float.h中）

#ifdef _UNICODE

LPCTSTR my_fcvt( double value, int count, int *dec, int *sign )

{

static TCHAR szValue[32];

if ( value < FLT_EPSILON && value > -FLT_EPSILON )

{

*dec = 0;

*sign = 0;

_stprintf(szValue, _T("%0*d"), count, 0);

return (LPTSTR)&szValue;

}

if( value > FLT_EPSILON )

*sign = 0;

else

{

*sign = 1;

value = -value;

}

*dec = 0;

int nSize = _stprintf(szValue, _T("%.*f"), count, value);

if( nSize > 0 )

{

//remove "."

LPTSTR pstrSource = szValue;

LPTSTR pstrDest = szValue;

LPTSTR pstrEnd = szValue + nSize;

LPTSTR pstrPoint = szValue;

while (pstrSource < pstrEnd)

{

if (*pstrSource != '.')

{

*pstrDest = *pstrSource;

pstrDest = _tcsinc(pstrDest);

}

else

{

pstrPoint = pstrDest;

}

pstrSource = _tcsinc(pstrSource);

}

*pstrDest = '\0';

//trim "0"

pstrDest = szValue;

while (*pstrDest != '\0')

{

if (_tcschr(_T("0"), *pstrDest) == NULL)

break;

pstrDest = _tcsinc(pstrDest);

}

if (pstrDest != szValue)

{

int nDataLength = _tcslen(szValue) - (pstrDest - szValue);

memmove(szValue, pstrDest, (nDataLength+1)*sizeof(TCHAR));

}

*dec = pstrPoint - pstrDest;

}

return (LPCTSTR)&szValue;

}

#else

inline LPCTSTR my_fcvt( double value, int count, int *dec, int *sign )

{

return _fcvt( value, count, dec, sign );

}

#endif

UNICODE文本文件的读写

对于UNICODE的文本文件，实际上就是双字节的二进制文件，该文件包含两个字节的文件头，内容是byte-order mark (BOM)值是0xfffe或0xfeff，表示字节的顺序，在这里使用的是0xfffe。

下面是一个写、读文件的例程：

#ifdef UNICODE

FILE* fp;

fp = _tfopen(_T("c:\\1.txt"), _T("w+b"));

if( fp != NULL )

{

LPCTSTR buf = _T("abc\r\n");

const BYTE head[] = {0xff, 0xfe};//BOM(byte-order mark)

fwrite(head, sizeof(BYTE), sizeof(head)/sizeof(BYTE), fp);

fwrite(buf, sizeof(TCHAR), _tcslen(buf), fp);

fclose(fp);

}

#endif

fp = _tfopen(_T("c:\\1.txt"), _T("r+t"));

#ifdef UNICODE

if( fp != NULL )

{

TCHAR text[512];

memset(text, 0, sizeof(text));

fread(text, sizeof(TCHAR), sizeof(text)-1, fp);

int ret;

IsTextUnicode(text, _tcslen(text), &ret);

LPTSTR pBuf = text;

if( ret & IS_TEXT_UNICODE_SIGNATURE )

{

pBuf ++;//文本的内容

}

fclose(fp);

}

#endif

工程转换方案

如果VC6.0工程选择的是中文，在菜单Projects->Setting的C/C++属性页中，把Preprocessor definitinons: _MBCS改为_UNICODE，如果是英文的工程则直接加入_UNICODE。

如果工程是EXE文件，则须在Link属性页中，选择Category为Output，把Entry-point symbol:设置为wWinMainCRTStartup，如果工程不是EXE文件，则不需要改变。

最后点击OK即可。

附：ANSI、DBCS 和 Unicode 的定义

Unicode 是一种用两个字节表示一个字符的字符集。另外一些程序，如 Windows 95 API，使用 ANSI (American National Standards Institute) 或 DBCS 存储和操作字符串。

ANSI

ANSI 是个人计算机使用得最普遍的字符集。由于 ANSI 标准使用单一字节表示每个字符，因此最多只能有 256 个字符和标点符号代码。虽然对英语来说已经足够了，但不能完全支持其它语言。

DBCS

发行在亚洲大部分地区的 Microsoft Windows 系统使用 DBCS。它支持很多不同的东亚语言字母，如汉语、日语和朝鲜语。DBCS 使用数字 0–128 表示 ASCII 字符集。其它大于 128 的数字作为前导字节字符，它并不是真正的字符，只是简单的表明下一个字符属于非拉丁字符集。在 DBCS 中，ASCII 字符的长度是一个字节，而日语、朝鲜语和其它东亚字符的长度是 2 个字节。

Unicode

Unicode 是用两个字节表示每个字符的字符编码方案。国际标准组织 (ISO) 几乎为每种语言的每个字符和符号在 0 到 65,535 (2¹⁶ – 1) 范围内定义了一个数字（再加上为将来发展保留的一些空余空间）。在所有 32 位版本的 Windows 中，部件对象模型 (COM) 都使用 Unicode，它是 OLE 和 ActiveX 技术的基础。Windows NT 全部支持 Unicode。虽然 Unicode 和 DBCS 都是双字节字符，但它们的编码方案完全不同。

VC++关于UNICODE版本的开发

关于UNICODE版本的开发

代码转换方案

概述

数据类型

函数

宏

常量和全局变量

不能转换的函数

UNICODE文本文件的读写

工程转换方案

附：ANSI、DBCS 和 Unicode 的定义

ANSI

DBCS

Unicode

猜你喜欢