#include<iostream>
#include<fstream>
using namespace std;
// 关于ANSI,unicode与utf-8的区别
//以下程序对gb2312编码有效,文档格式为ansi
void calculate(const char* srcFile)
{
unsigned char ch[2];
ifstream inFile(srcFile, ios::in);
if (!inFile)
{
cerr << "File could not be open." << endl;
exit(-1);
}
int all_num = 0;
string s = "";
while (true)
{
inFile >> ch[0];
if (inFile.eof())break;
//GB2312编码范围:A1A1-FEFE,
//其中汉字的编码范围为B0A1-F7FE,第一字节0xB0-0xF7(对应区号:16-87),第二个字节0xA0-0xFE(对应位号:01-94)。
if (ch[0] >= 0xb0 && ch[0] <= 0xf7)//GB2312下的汉字,最小是0XB0
{
inFile >> ch[1];
if (ch[1] >= 0xa0 && ch[1] <= 0xfe)
all_num += 1;
}
else
continue;
}
cout << "总中文数" << all_num << endl;
inFile.close();
}
int main()
{
calculate("C:\\Users\\Lenovo\\Desktop\\1.txt");
}