C++多线程计算最大互信息系数(MIC)并按列分组
#include
// 读取文件
matrix<double, row_major> read_tsv(const std::string& filename, std::vectorstd::string& col_names) {
std::ifstream in(filename);
std::string line;
std::vector<std::vector
// 读取第一行并解析为列名
if (std::getline(in, line)) {
boost::algorithm::split(col_names, line, boost::is_any_of('\t'));
col_names.erase(col_names.begin()); // 去除第一个空列
}
while (std::getline(in, line)) {
std::vector<std::string> str_vec;
boost::algorithm::split(str_vec, line, boost::is_any_of('\t'));
std::vector<double> num_vec(str_vec.size() - 1);
std::transform(str_vec.begin() + 1, str_vec.end(), num_vec.begin(),
[](const std::string& str) { return std::stod(str); });
data.push_back(num_vec);
}
matrix<double, row_major> mat(data.size(), data[0].size());
for (size_t i = 0; i < mat.size1(); ++i)
for (size_t j = 0; j < mat.size2(); ++j)
mat(i, j) = data[i][j];
return mat;
}
//计算mic系数
matrix<double, row_major> calc_mic(const matrix<double, row_major>& mat, const std::vectorstd::string& col_names, int threads) { //当样本量小于6个时,终止程序 if(mat.size1()<6){ std::cerr << 'error: the number of data rows should be greater than 5.' << std::endl; exit(1); } if(mat.size2()<2){ std::cerr << 'error: the number of data columns should be greater than 1.' << std::endl; exit(1); } // 创建空的对称矩阵 matrix<double, row_major> result(mat.size2(), mat.size2()); // 根据列名分组 std::map<std::string, std::vector<size_t>> groups; for (size_t i = 0; i < col_names.size(); ++i) { groups[col_names[i]].push_back(i); } // 计算每列两两之间的mic系数,并填充对称矩阵 omp_set_num_threads(threads); #pragma omp parallel for for (size_t g = 0; g < groups.size(); ++g) { for (size_t i = 0; i < groups[col_names[g]].size(); ++i) { MINE mine(0.5, 15, EST_MIC_APPROX); for (size_t j = i + 1; j < groups[col_names[g]].size(); ++j) { // 计算上三角部分 double x[mat.size1()]; double y[mat.size1()]; for (size_t k = 0; k < mat.size1(); ++k) { x[k] = mat(k,groups[col_names[g]][i]); y[k] = mat(k,groups[col_names[g]][j]); } mine.compute_score(x, y, mat.size1()); result(groups[col_names[g]][i],groups[col_names[g]][j]) = mine.mic(); result(groups[col_names[g]][j],groups[col_names[g]][i]) = result(groups[col_names[g]][i],groups[col_names[g]][j]); // 填充下三角部分 } } } return result; }
//导出结果
void write_tsv(const std::string& filename, const matrix<double, row_major>& mat, const std::vectorstd::string& col_names) { std::ofstream out(filename); // 输出列名 out << '\t'; for (const auto& col_name : col_names) out << col_name << '\t'; out << '\n'; // 输出数据行 for (size_t i = 0; i < mat.size1(); ++i) { out << col_names[i] << '\t'; for (size_t j = 0; j < mat.size2(); ++j) { out << std::setprecision(3) <<std::fixed << mat(i, j); if (j < mat.size2() - 1) out << '\t'; } out << '\n'; } }
//main函数
int main(int argc, char **argv) {
// 声明命令行参数选项
int default_threads = 4;
int threads;
po::options_description desc(" Multi-threaded computation of maximum information coefficient.\n"
" For more detailed information, please refer to https://minepy.readthedocs.io/en/latest/");
desc.add_options()
("help,h", "help message")
("input,i", po::valuestd::string(), "input.txt")
("output,o", po::valuestd::string(), "output.txt,The first column is the name of the indicator, such as ASV/OTU or gene")
("threads,t", po::value
if (vm.count("help")) {
std::cout << desc << '\n';
return 0;
}
if (vm.empty()) {
std::cout << desc << '\n';
return 0;
}
//
int coreNum = omp_get_num_procs();//获得处理器个数
// thread_ct = vm.count["threads"].as<int>();
if(coreNum > 1 && threads > coreNum){
fprintf(stderr, "\n%s: error: only %d threads are available\n", argv[0], coreNum);
exit(1);
}
if (vm.count("input")) {
std::string filename = vm["input"].as<std::string>();
std::ifstream file(filename);
if (file) {
// 文件存在,执行你的操作
if (vm.count("output")){
std::vector<std::string> col_names;
auto mat = read_tsv(vm["input"].as<std::string>(), col_names);
auto result = calc_mic(mat,col_names,threads);
write_tsv(vm["output"].as<std::string>(), result, col_names);
}else{
std::cerr << "output is not specified!" << std::endl;
return 1;
}
} else {
// 文件不存在
std::cerr << vm["input"].as<std::string>() << " not found!" << std::endl;
return 1;
}
} else {
std::cerr << "No file provided!" << std::endl;
return 1;
}
return 0;
}
原文地址: https://www.cveoy.top/t/topic/fU4X 著作权归作者所有。请勿转载和采集!