#include #include #include

bool isUTF16LE(std::vector data) { if (data.size() < 2) return false; return (data[0] == '\xFF' && data[1] == '\xFE'); }

bool isUTF16BE(std::vector data) { if (data.size() < 2) return false; return (data[0] == '\xFE' && data[1] == '\xFF'); }

bool isUTF32LE(std::vector data) { if (data.size() < 4) return false; return (data[0] == '\xFF' && data[1] == '\xFE' && data[2] == '\x00' && data[3] == '\x00'); }

bool isUTF32BE(std::vector data) { if (data.size() < 4) return false; return (data[0] == '\x00' && data[1] == '\x00' && data[2] == '\xFE' && data[3] == '\xFF'); }

bool isUTF8(std::vector data) { if (data.size() < 3) return false; return (data[0] == '\xEF' && data[1] == '\xBB' && data[2] == '\xBF'); }

bool isGB2312(std::vector data) { // 判断GB2312编码的方法比较复杂,可以使用第三方库如iconv进行判断 // 这里简化为判断文件大小是否为偶数(因为GB2312每个字符占用两个字节) return (data.size() % 2 == 0); }

bool isUTF16LENoBOM(std::vector data) { if (data.size() < 2) return false; return (data[0] != '\xFF' || data[1] != '\xFE'); }

bool isUTF16BENoBOM(std::vector data) { if (data.size() < 2) return false; return (data[0] != '\xFE' || data[1] != '\xFF'); }

bool isUTF32LENoBOM(std::vector data) { if (data.size() < 4) return false; return (data[0] != '\xFF' || data[1] != '\xFE' || data[2] != '\x00' || data[3] != '\x00'); }

bool isUTF32BENoBOM(std::vector data) { if (data.size() < 4) return false; return (data[0] != '\x00' || data[1] != '\x00' || data[2] != '\xFE' || data[3] != '\xFF'); }

bool isUTF8NoBOM(std::vector data) { if (data.size() < 3) return false; return (data[0] != '\xEF' || data[1] != '\xBB' || data[2] != '\xBF'); }

int main() { std::ifstream file("example.txt", std::ios::binary); std::vector data((std::istreambuf_iterator(file)), std::istreambuf_iterator());

if (isUTF16LE(data)) {
    std::cout << "UTF16LE" << std::endl;
} else if (isUTF16BE(data)) {
    std::cout << "UTF16BE" << std::endl;
} else if (isUTF32LE(data)) {
    std::cout << "UTF32LE" << std::endl;
} else if (isUTF32BE(data)) {
    std::cout << "UTF32BE" << std::endl;
} else if (isUTF8(data)) {
    std::cout << "UTF8" << std::endl;
} else if (isGB2312(data)) {
    std::cout << "GB2312" << std::endl;
} else if (isUTF16LENoBOM(data)) {
    std::cout << "UTF16LE无签名" << std::endl;
} else if (isUTF16BENoBOM(data)) {
    std::cout << "UTF16BE无签名" << std::endl;
} else if (isUTF32LENoBOM(data)) {
    std::cout << "UTF32LE无签名" << std::endl;
} else if (isUTF32BENoBOM(data)) {
    std::cout << "UTF32BE无签名" << std::endl;
} else if (isUTF8NoBOM(data)) {
    std::cout << "UTF8无签名" << std::endl;
} else {
    std::cout << "未知编码" << std::endl;
}

return 0;

}

C++ 代码判断文件编码格式 (UTF-16, UTF-32, UTF-8, GB2312)

原文地址: https://www.cveoy.top/t/topic/p8gT 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录