决策树训练c++代码
以下是一个简单的决策树训练的C++代码示例:
#include
struct TreeNode { int val; TreeNode *left; TreeNode *right; TreeNode(int x) : val(x), left(NULL), right(NULL) {} };
class DecisionTree {
public:
TreeNode* train(vector<vector
private:
TreeNode* buildTree(vector<vector
// 计算信息增益最大的特征
int bestFeature = 0;
double bestGain = -1.0;
for (int i = 0; i < n; i++) {
double gain = getInformationGain(data, labels, i);
if (gain > bestGain) {
bestGain = gain;
bestFeature = i;
}
}
// 构建决策树
TreeNode* root = new TreeNode(bestFeature);
vector<vector<int>> leftData, rightData;
vector<int> leftLabels, rightLabels;
for (int i = 0; i < data.size(); i++) {
if (data[i][bestFeature] == 0) {
leftData.push_back(data[i]);
leftLabels.push_back(labels[i]);
} else {
rightData.push_back(data[i]);
rightLabels.push_back(labels[i]);
}
}
root->left = buildTree(leftData, leftLabels, n);
root->right = buildTree(rightData, rightLabels, n);
return root;
}
double getInformationGain(vector<vector<int>>& data, vector<int>& labels, int feature) {
// 计算整个数据集的熵
double entropy = getEntropy(labels);
// 计算按照当前特征划分后的熵
int n = data.size();
vector<int> leftLabels, rightLabels;
for (int i = 0; i < n; i++) {
if (data[i][feature] == 0) {
leftLabels.push_back(labels[i]);
} else {
rightLabels.push_back(labels[i]);
}
}
double leftEntropy = getEntropy(leftLabels);
double rightEntropy = getEntropy(rightLabels);
// 计算信息增益
double gain = entropy - (leftLabels.size() * leftEntropy + rightLabels.size() * rightEntropy) / n;
return gain;
}
double getEntropy(vector<int>& labels) {
int n = labels.size();
if (n == 0) {
return 0.0;
}
// 统计每个标签出现的次数
unordered_map<int, int> count;
for (int i = 0; i < n; i++) {
count[labels[i]]++;
}
// 计算熵
double entropy = 0.0;
for (auto it = count.begin(); it != count.end(); it++) {
double p = (double)it->second / n;
entropy -= p * log2(p);
}
return entropy;
}
};
int main() {
vector<vector
原文地址: https://www.cveoy.top/t/topic/e9bR 著作权归作者所有。请勿转载和采集!