下面是一个用C语言编写的示例代码,用于计算五个子集的AUROC和AUPR,并绘制ROC曲线和PR曲线。

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

// 计算AUROC
double calculate_auroc(double* labels, double* scores, int num_samples) {
    // 根据得分对样本进行排序
    for (int i = 0; i < num_samples; i++) {
        for (int j = i + 1; j < num_samples; j++) {
            if (scores[j] > scores[i]) {
                double temp_score = scores[i];
                scores[i] = scores[j];
                scores[j] = temp_score;
                
                double temp_label = labels[i];
                labels[i] = labels[j];
                labels[j] = temp_label;
            }
        }
    }
    
    // 统计正例和负例的数量
    int num_positives = 0;
    int num_negatives = 0;
    for (int i = 0; i < num_samples; i++) {
        if (labels[i] == 1) {
            num_positives++;
        } else {
            num_negatives++;
        }
    }
    
    // 计算TPR和FPR
    double tpr[num_samples + 1];
    double fpr[num_samples + 1];
    tpr[0] = 0.0;
    fpr[0] = 0.0;
    for (int i = 0; i < num_samples; i++) {
        if (labels[i] == 1) {
            tpr[i + 1] = tpr[i] + 1.0 / num_positives;
            fpr[i + 1] = fpr[i];
        } else {
            tpr[i + 1] = tpr[i];
            fpr[i + 1] = fpr[i] + 1.0 / num_negatives;
        }
    }
    
    // 计算AUROC
    double auroc = 0.0;
    for (int i = 0; i < num_samples; i++) {
        auroc += (fpr[i + 1] - fpr[i]) * (tpr[i + 1] + tpr[i]) / 2.0;
    }
    
    return auroc;
}

// 计算AUPR
double calculate_aupr(double* labels, double* scores, int num_samples) {
    // 根据得分对样本进行排序
    for (int i = 0; i < num_samples; i++) {
        for (int j = i + 1; j < num_samples; j++) {
            if (scores[j] > scores[i]) {
                double temp_score = scores[i];
                scores[i] = scores[j];
                scores[j] = temp_score;
                
                double temp_label = labels[i];
                labels[i] = labels[j];
                labels[j] = temp_label;
            }
        }
    }
    
    // 计算P和R
    double p[num_samples + 1];
    double r[num_samples + 1];
    p[0] = 0.0;
    r[0] = 0.0;
    for (int i = 0; i < num_samples; i++) {
        if (labels[i] == 1) {
            p[i + 1] = p[i] + 1.0;
            r[i + 1] = r[i] + 1.0;
        } else {
            p[i + 1] = p[i];
            r[i + 1] = r[i] + 1.0;
        }
    }
    
    // 计算AUPR
    double aupr = 0.0;
    for (int i = 0; i < num_samples; i++) {
        aupr += (r[i + 1] - r[i]) * p[i + 1];
    }
    
    aupr /= (p[num_samples] * r[num_samples]);
    
    return aupr;
}

int main() {
    // 读取五个子集文件,每个文件包含样本的真实标签和预测值
    FILE* file;
    double labels[5][100];
    double scores[5][100];
    int num_samples[5];
    for (int i = 0; i < 5; i++) {
        char filename[20];
        sprintf(filename, "subset%d.txt", i);
        
        file = fopen(filename, "r");
        if (file == NULL) {
            printf("Failed to open file %s\n", filename);
            return 1;
        }
        
        int j = 0;
        while (fscanf(file, "%lf %lf", &labels[i][j], &scores[i][j]) == 2) {
            j++;
        }
        
        num_samples[i] = j;
        
        fclose(file);
    }
    
    // 计算每个子集的AUROC和AUPR,并绘制ROC曲线和PR曲线
    for (int i = 0; i < 5; i++) {
        double auroc = calculate_auroc(labels[i], scores[i], num_samples[i]);
        double aupr = calculate_aupr(labels[i], scores[i], num_samples[i]);
        
        printf("Subset %d - AUROC: %lf, AUPR: %lf\n", i, auroc, aupr);
        
        // 绘制ROC曲线
        FILE* roc_file = fopen("roc_curve.txt", "w");
        if (roc_file == NULL) {
            printf("Failed to create file roc_curve.txt\n");
            return 1;
        }
        
        for (int j = 0; j <= num_samples[i]; j++) {
            fprintf(roc_file, "%lf %lf\n", calculate_fpr(labels[i], scores[i], num_samples[i], j), calculate_tpr(labels[i], scores[i], num_samples[i], j));
        }
        
        fclose(roc_file);
        
        // 绘制PR曲线
        FILE* pr_file = fopen("pr_curve.txt", "w");
        if (pr_file == NULL) {
            printf("Failed to create file pr_curve.txt\n");
            return 1;
        }
        
        for (int j = 0; j <= num_samples[i]; j++) {
            fprintf(pr_file, "%lf %lf\n", calculate_recall(labels[i], scores[i], num_samples[i], j), calculate_precision(labels[i], scores[i], num_samples[i], j));
        }
        
        fclose(pr_file);
    }
    
    // 计算五个子集的AUROC和AUPR的均值
    double mean_auroc = 0.0;
    double mean_aupr = 0.0;
    for (int i = 0; i < 5; i++) {
        mean_auroc += calculate_auroc(labels[i], scores[i], num_samples[i]);
        mean_aupr += calculate_aupr(labels[i], scores[i], num_samples[i]);
    }
    mean_auroc /= 5.0;
    mean_aupr /= 5.0;
    
    printf("Mean AUROC: %lf, Mean AUPR: %lf\n", mean_auroc, mean_aupr);
    
    // 将五个文件合并到一起,计算整个大数据集的AUROC和AUPR,并绘制ROC曲线和PR曲线
    FILE* merged_file = fopen("merged_data.txt", "w");
    if (merged_file == NULL) {
        printf("Failed to create file merged_data.txt\n");
        return 1;
    }
    
    int total_samples = 0;
    for (int i = 0; i < 5; i++) {
        for (int j = 0; j < num_samples[i]; j++) {
            fprintf(merged_file, "%lf %lf\n", labels[i][j], scores[i][j]);
            total_samples++;
        }
    }
    
    fclose(merged_file);
    
    double merged_labels[total_samples];
    double merged_scores[total_samples];
    
    merged_file = fopen("merged_data.txt", "r");
    if (merged_file == NULL) {
        printf("Failed to open file merged_data.txt\n");
        return 1;
    }
    
    for (int i = 0; i < total_samples; i++) {
        fscanf(merged_file, "%lf %lf", &merged_labels[i], &merged_scores[i]);
    }
    
    fclose(merged_file);
    
    double auroc = calculate_auroc(merged_labels, merged_scores, total_samples);
    double aupr = calculate_aupr(merged_labels, merged_scores, total_samples);
    
    printf("Whole Dataset - AUROC: %lf, AUPR: %lf\n", auroc, aupr);
    
    // 绘制整个大数据集的ROC曲线
    FILE* roc_file = fopen("whole_dataset_roc_curve.txt", "w");
    if (roc_file == NULL) {
        printf("Failed to create file whole_dataset_roc_curve.txt\n");
        return 1;
    }
    
    for (int i = 0; i <= total_samples; i++) {
        fprintf(roc_file, "%lf %lf\n", calculate_fpr(merged_labels, merged_scores, total_samples, i), calculate_tpr(merged_labels, merged_scores, total_samples, i));
    }
    
    fclose(roc_file);
    
    // 绘制整个大数据集的PR曲线
    FILE* pr_file = fopen("whole_dataset_pr_curve.txt", "w");
    if (pr_file == NULL) {
        printf("Failed to create file whole_dataset_pr_curve.txt\n");
        return 1;
    }
    
    for (int i = 0; i <= total_samples; i++) {
        fprintf(pr_file, "%lf %lf\n", calculate_recall(merged_labels, merged_scores, total_samples, i), calculate_precision(merged_labels, merged_scores, total_samples, i));
    }
    
    fclose(pr_file);
    
    return 0;
}

请注意,上述代码中的函数calculate_fprcalculate_tprcalculate_recallcalculate_precision需要根据具体的计算方法进行实现。这些函数的实现取决于您所使用的具体评估指标和算法。

在代码中,每个子集的真实标签和预测值存储在labelsscores数组中,子集中的样本数量存储在num_samples数组中。代码会计算每个子集的AUROC和AUPR,并将结果打印出来。然后,将所有子集的数据合并到一个文件中,并计算整个大数据集的AUROC和AUPR,并将结果打印出来。最后,代码会将每个子集和整个大数据集的ROC曲线和PR曲线保存到文件中。

要运行此代码,需要将五个子集的数据保存在名为subset0.txtsubset1.txtsubset2.txtsubset3.txtsubset4.txt的文件中,并确保文件中的数据格式正确。另外,需要根据具体的计算方法实现缺失的函数。

文件中储存着样本的真实标签和预测值。根据我们所讲的计算方法c语言编程求解这五个子集的AUROC和AUPR并画图注意:不要直接调用现成的计算AUROC和AUPR的函数。并计算这五个子集的AUROC和AUPR的均值。将这五个文件合并到一起计算整个大数据集的AUROC和AUPR并画图并查看它跟前面的均值是否相同

原文地址: https://www.cveoy.top/t/topic/i3Qu 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录