#include #include

bool isUTF8(const std::string& filePath) { std::ifstream file(filePath, std::ios::binary); if (!file) { std::cerr << "Failed to open file: " << filePath << std::endl; return false; }

unsigned char bytes[4];
file.read(reinterpret_cast<char*>(bytes), sizeof(bytes));

// UTF-8 BOM (Byte Order Mark) is encoded as 0xEF, 0xBB, 0xBF
if (bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) {
    std::cout << filePath << " is UTF-8 with BOM." << std::endl;
    return true;
}

// UTF-8 without BOM
if ((bytes[0] & 0x80) == 0 && (bytes[1] & 0x80) == 0) {
    std::cout << filePath << " is UTF-8 without BOM." << std::endl;
    return true;
}

std::cout << filePath << " is not UTF-8 encoding." << std::endl;
return false;

}

bool isGB2312(const std::string& filePath) { std::ifstream file(filePath, std::ios::binary); if (!file) { std::cerr << "Failed to open file: " << filePath << std::endl; return false; }

unsigned char bytes[2];
file.read(reinterpret_cast<char*>(bytes), sizeof(bytes));

// GB2312 characters have the range of 0xA1A1 to 0xFEFE
if ((bytes[0] >= 0xA1 && bytes[0] <= 0xFE) && (bytes[1] >= 0xA1 && bytes[1] <= 0xFE)) {
    std::cout << filePath << " is GB2312 encoding." << std::endl;
    return true;
}

std::cout << filePath << " is not GB2312 encoding." << std::endl;
return false;

}

int main() { std::string filePath = "example.txt"; // 替换为你要检测的文件路径

if (isUTF8(filePath)) {
    // 文件是UTF-8编码
} else if (isGB2312(filePath)) {
    // 文件是GB2312编码
} else {
    // 文件既不是UTF-8也不是GB2312编码
}

return 0;

}

C++ 代码区分 UTF-8 和 GB2312 编码

原文地址: https://www.cveoy.top/t/topic/p8f9 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录