C++ 代码区分 UTF-8 和 GB2312 编码
#include
bool isUTF8(const std::string& filePath) { std::ifstream file(filePath, std::ios::binary); if (!file) { std::cerr << "Failed to open file: " << filePath << std::endl; return false; }
unsigned char bytes[4];
file.read(reinterpret_cast<char*>(bytes), sizeof(bytes));
// UTF-8 BOM (Byte Order Mark) is encoded as 0xEF, 0xBB, 0xBF
if (bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) {
std::cout << filePath << " is UTF-8 with BOM." << std::endl;
return true;
}
// UTF-8 without BOM
if ((bytes[0] & 0x80) == 0 && (bytes[1] & 0x80) == 0) {
std::cout << filePath << " is UTF-8 without BOM." << std::endl;
return true;
}
std::cout << filePath << " is not UTF-8 encoding." << std::endl;
return false;
}
bool isGB2312(const std::string& filePath) { std::ifstream file(filePath, std::ios::binary); if (!file) { std::cerr << "Failed to open file: " << filePath << std::endl; return false; }
unsigned char bytes[2];
file.read(reinterpret_cast<char*>(bytes), sizeof(bytes));
// GB2312 characters have the range of 0xA1A1 to 0xFEFE
if ((bytes[0] >= 0xA1 && bytes[0] <= 0xFE) && (bytes[1] >= 0xA1 && bytes[1] <= 0xFE)) {
std::cout << filePath << " is GB2312 encoding." << std::endl;
return true;
}
std::cout << filePath << " is not GB2312 encoding." << std::endl;
return false;
}
int main() { std::string filePath = "example.txt"; // 替换为你要检测的文件路径
if (isUTF8(filePath)) {
// 文件是UTF-8编码
} else if (isGB2312(filePath)) {
// 文件是GB2312编码
} else {
// 文件既不是UTF-8也不是GB2312编码
}
return 0;
}
原文地址: https://www.cveoy.top/t/topic/p8f9 著作权归作者所有。请勿转载和采集!