利用位运算统计文本文档中的汉字字数

在做C++教材练习题时,其中一题要求统计文本文档中的汉字个数,当我直接按char ch做的时候,发现统计出的字数和文本文档的字节数相同。why?
也就是说,常规方法下统计的实际不是文本文档中的字符个数,而是这些字符的字节和。
下面是我原先的代码:

#include <iostream>
#include <strstream>
#include <fstream>
#include <io.h>
#include <string.h>

using namespace std;

int FrequencyCounts(char *);

int main()
{
    long handle;
    struct _finddata_t info;
    handle = _findfirst("*.txt", &info);
    if (handle == -1)
        return 0;
    int count[3];
    int i = 0;
    do
    {
        count[i] = FrequencyCounts(info.name);
        ++i;
    } while (_findnext(handle, &info) == 0);
    char output_name[100];
    strcpy(output_name, "多个文件汉字字频统计结果.txt");
    ofstream out;
    out.open(output_name, ios::app);
    if (!out)
    {
        cout << "Can't open the file!" << output_name << endl;
        exit(0);
    }
    for (i = 0; i < 3; ++i)
    {
        out << "test" << i+1 << ".txt" << "中共有" << count[i] << "个字" << endl;
    }
    out.close();

    cout << "统计完毕" << endl;

    return 0;
}

int FrequencyCounts(char *a)
{
    ifstream in(a);
    if (!in)
    {
        cout << "Can't open the file!" << a << endl;
        exit(0);
    }
    char ch;
    int count = 0;
    while (in.get(ch))
    {
        count++;
    }
    in.close();

    return count;
}

最后如何改正呢?参考https://blog.csdn.net/bufanq/article/details/51034156的文章,
改为:

    while (in.get(ch))
    {
        if((ch & 0x80) == 0x80)
            count++;
    }

仅此记录。

猜你喜欢

转载自blog.csdn.net/t11383/article/details/80687543