第二章: Unicode
Unicode 用的是16位字符编码;
Ascii字符:这个图是网上找的,二进制,十进制,十六进制都有!
(图一)
26个字母:
大写字母 + 0x20(十进制32)== 小字母 (对照"图一"看看)
====================================================
这里引入一个函数,功能就是打印指定内存存储的内容!
#include <string.h>
#include <string>
//第一个参数是要打印的内存起始地址,第二个参数是要打印的长度!
std::string printBin(const char* content, int length)
{
std::string message("");
if (nullptr == content)
{
return message;
}
const int iLineCharNum = 16;
const int iLineByte = 7 + iLineCharNum * 3 + 3 + iLineCharNum;
char strBuf[iLineByte * 256 + 1] = "";
char * pLineBuf = NULL;
char* pCurPos = NULL;
const char * src = content;
int iTotalLineNum = (length + iLineCharNum - 1) >> 4;
int iCurSrcLineLen = iLineCharNum;
int offset = 0;
pLineBuf = strBuf;
int i = 0;
int j = 0;
for (i = 1; i <= iTotalLineNum; i++)
{
if ((i & 0x00ff) == 0)
{
*pLineBuf = 0;
message += strBuf;
pLineBuf = strBuf;
}
pCurPos = pLineBuf;
src = content + offset;
memset(pCurPos, ' ', iLineByte);
if (i == iTotalLineNum)
{
iCurSrcLineLen = (length - offset);
}
sprintf_s(pCurPos, 8, "\n%05d\t", offset);
pCurPos += 7;
for (j = 0; j < iCurSrcLineLen; j++)
{
*pCurPos = ((src[j] >> 4) & 0x0f) + '0';
if (*pCurPos > '9') *pCurPos += 7;
pCurPos++;
*pCurPos = (src[j] & 0x0f) + '0';
if (*pCurPos > '9') *pCurPos += 7;
pCurPos++;
*pCurPos = ' ';
pCurPos++;
if (src[j] >= -1 && src[j] <= 255)
{
pLineBuf[iLineByte - iLineCharNum + j] = isprint(src[j]) ? src[j] : '.';
}
else
{
pLineBuf[iLineByte - iLineCharNum + j] = '.';
}
}
pLineBuf += iLineByte;
offset += iCurSrcLineLen;
}
*pLineBuf = 0;
message += strBuf;
message = message + "\n";
printf("%s", message.c_str());
return message;
}
测试代码:
int main(int argc, char* argv[])
{
int a = 100;
printBin((char *)&a,sizeof(a));
int b = 0x12345678;
printBin((char *)&b, sizeof(a));
char *p = "123456789";
printBin(p, strlen(p)+ 1);
system("PAUSE");
return 0;
}
(注:使用的是vs 2015 编译器)
结果:
说明:64 00 00 00 是100的十六进制,即是变量a的值在内存中的存储; 注意看变量b的存储!
====================================================
继续书本上的内容......
char 数据类型:
int main(int argc, char* argv[])
{
char c = 'A';
char *p = "Hello";
char a[10] = {0};
printf("%d\n",sizeof(c));
printBin((char *)&c, sizeof(c));
printf("%d\n", sizeof(p)); // p为一个指针地址,其长度与sizeof(int) 一样
printBin(p, sizeof(p)); //打印p指向开始的4个字符
printBin(p, strlen(p)); //打印p指向的所有字符
printf("%d\n", sizeof(a));
printBin(a, sizeof(a));
system("PAUSE");
return 0;
}
结果:
更宽的字符:
wchar_t : typedef unsigned short wchar_t
int main(int argc, char* argv[])
{
char c = 'A';
char *p = "Hello";
wchar_t wc = 'A';
wchar_t *p_w = L"Hello";
printf("strlen(p)=%d\n", strlen(p));
printf("strlen(p_w)=%d\n", strlen((char *)p_w));
printf("wcslen(p_w)=%d\n",wcslen(p_w));
printBin((char *)&c, sizeof(c));
printBin((char *)&wc, sizeof(wc));
printBin(p, strlen(p));
printBin((char *)p_w, wcslen(p_w)*2 );
system("PAUSE");
return 0;
}
结果:
下面是strlen的代码,在碰到'\0'就结束,所以在strlen((char *)p_w) 进行求其长度的时候,返回的值为1;
size_t strlen(const char *s)
{
const char *sc;
for (sc = s; *sc != '\0'; ++sc)
/* nothing */;
return sc - s;
}