C++ 代码:根据列分组计算 MIC 系数
#include
namespace po = boost::program_options; using namespace boost::numeric::ublas;
// 读取文件
matrix<double, row_major> read_tsv(const std::string& filename, std::vectorstd::string& col_names, int& group_index) {
std::ifstream in(filename);
std::string line;
std::vector<std::vector
// 读取第一行并解析为列名
if (std::getline(in, line)) {
boost::algorithm::split(col_names, line, boost::is_any_of("\t"));
col_names.erase(col_names.begin(), col_names.begin() + 1); // 去除第一个空列
// 查找名为'group'的列索引
for (size_t i = 0; i < col_names.size(); ++i) {
if (col_names[i] == "group") {
group_index = i;
break;
}
}
}
while (std::getline(in, line)) {
std::vector<std::string> str_vec;
boost::algorithm::split(str_vec, line, boost::is_any_of("\t"));
std::vector<double> num_vec(str_vec.size() - 1);
std::transform(str_vec.begin() + 1, str_vec.end(), num_vec.begin(),
[](const std::string& str) { return std::stod(str); });
data.push_back(num_vec);
}
matrix<double, row_major> mat(data.size(), data[0].size());
for (size_t i = 0; i < mat.size1(); ++i)
for (size_t j = 0; j < mat.size2(); ++j)
mat(i, j) = data[i][j];
return mat;
}
//计算mic系数 std::map<std::string, double> calc_mic(const matrix<double, row_major>& mat, int threads, int group_index) { // 当样本量小于6个时,终止程序 if (mat.size1() < 6) { std::cerr << "error: the number of data rows should be greater than 5." << std::endl; } if (mat.size2() < 2) { std::cerr << "error: the number of data columns should be greater than 1." << std::endl; }
// 创建空的对称矩阵
matrix<double, row_major> result(mat.size2(), mat.size2());
// 创建MINE对象
MINE mine(0.5, 15, EST_MIC_APPROX);
// 创建map对象,用于存储每个组的mic系数
std::map<std::string, double> mic_map;
// 计算每列两两之间的mic系数,并填充对称矩阵
omp_set_num_threads(threads);
#pragma omp parallel for
for (size_t i = 0; i < mat.size2(); i++) {
for (size_t j = i + 1; j < mat.size2(); j++) { // 计算上三角部分
double x[mat.size1()];
double y[mat.size1()];
for (size_t k = 0; k < mat.size1(); k++) {
x[k] = mat(k, i);
y[k] = mat(k, j);
}
mine.compute_score(x, y, mat.size1());
result(i, j) = mine.mic();
result(j, i) = result(i, j); // 填充下三角部分
// 根据当前的组名称将mic系数存储到map对象中
std::string group_name = col_names[group_index];
mic_map[group_name] = mine.mic();
}
}
return mic_map;
}
//导出结果 void write_tsv(const std::string& filename, const std::map<std::string, double>& mic_map) { std::ofstream out(filename);
// 输出列名
out << "Group\tMIC\n";
// 输出每个组的mic系数
for (const auto& pair : mic_map) {
out << pair.first << "\t" << std::setprecision(3) << std::fixed << pair.second << "\n";
}
}
//main函数
int main(int argc, char** argv) {
// 声明命令行参数选项
int default_threads = 4;
int threads;
po::options_description desc(" Multi-threaded computation of maximum information coefficient.\n"
" For more detailed information, please refer to https://minepy.readthedocs.io/en/latest/");
desc.add_options()
("help,h", "help message")
("input,i", po::valuestd::string(), "input.txt")
("output,o", po::valuestd::string(), "output.txt,The first column is the name of the indicator, such as ASV/OTU or gene")
("threads,t", po::value
// 解析命令行参数选项
po::variables_map vm;
po::store(po::parse_command_line(argc, argv, desc), vm);
po::notify(vm);
if (vm.count("help")) {
std::cout << desc << "\n";
return 0;
}
if (vm.empty()) {
std::cout << desc << "\n";
return 0;
}
int coreNum = omp_get_num_procs(); // 获得处理器个数
if (coreNum > 1 && threads > coreNum) {
fprintf(stderr, "\n%s: error: only %d threads are available\n", argv[0], coreNum);
exit(1);
}
if (vm.count("input")) {
std::string filename = vm["input"].as<std::string>();
std::ifstream file(filename);
if (file) {
// 文件存在,执行你的操作
if (vm.count("output")) {
std::vector<std::string> col_names;
int group_index = -1;
auto mat = read_tsv(vm["input"].as<std::string>(), col_names, group_index);
auto mic_map = calc_mic(mat, threads, group_index);
write_tsv(vm["output"].as<std::string>(), mic_map);
} else {
std::cerr << "output is not specified!" << std::endl;
return 1;
}
} else {
// 文件不存在
std::cerr << vm["input"].as<std::string>() << " not found!" << std::endl;
return 1;
}
} else {
std::cerr << "No file provided!" << std::endl;
return 1;
}
return 0;
}
原文地址: https://www.cveoy.top/t/topic/fU4F 著作权归作者所有。请勿转载和采集!