#include #include #include #include <curl/curl.h>

// 回调函数,用于处理libcurl接收到的数据 size_t WriteCallback(void* contents, size_t size, size_t nmemb, std::string* output) { size_t total_size = size * nmemb; output->append((char*)contents, total_size); return total_size; }

int main() { // 输入基因名字 std::string gene_name; std::cout << "请输入基因名字:"; std::cin >> gene_name;

// 构建搜索URL
std::string search_url = "https://www.ncbi.nlm.nih.gov/bioproject/?term=" + gene_name;

// 初始化libcurl
curl_global_init(CURL_GLOBAL_ALL);
CURL* curl = curl_easy_init();
if (curl) {
    // 设置libcurl选项
    curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);

    // 创建回调函数的输出字符串
    std::string response;

    // 设置回调函数
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response);

    // 执行第一页的HTTP请求
    curl_easy_setopt(curl, CURLOPT_URL, search_url.c_str());
    CURLcode res = curl_easy_perform(curl);

    // 检查请求是否成功
    if (res != CURLE_OK) {
        std::cerr << "请求失败:" << curl_easy_strerror(res) << std::endl;
        return 1;
    }

    // 关闭libcurl
    curl_easy_cleanup(curl);

    // 将结果写入txt文档
    std::ofstream output_file("accession_numbers.txt");
    if (output_file.is_open()) {
        // 在response中查找Accession号
        size_t start_pos = 0;
        while ((start_pos = response.find("Accession:", start_pos)) != std::string::npos) {
            size_t accession_start = start_pos + 10;
            size_t accession_end = response.find("</dd>", accession_start);
            std::string accession_number = response.substr(accession_start, accession_end - accession_start);
            output_file << accession_number << std::endl;
            start_pos = accession_end;
        }

        // 获取其他页的结果
        size_t page_start_pos = response.find("Page 1 of ");
        if (page_start_pos != std::string::npos) {
            size_t page_end_pos = response.find("</div>", page_start_pos);
            std::string page_info = response.substr(page_start_pos, page_end_pos - page_start_pos);
            size_t total_pages = std::stoi(page_info.substr(10));
            for (size_t page = 2; page <= total_pages; page++) {
                std::string page_url = search_url + "&page=" + std::to_string(page);
                curl = curl_easy_init();
                if (curl) {
                    // 设置libcurl选项
                    curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);

                    // 重设回调函数的输出字符串
                    response = "";

                    // 设置回调函数
                    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
                    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response);

                    // 执行其他页的HTTP请求
                    curl_easy_setopt(curl, CURLOPT_URL, page_url.c_str());
                    res = curl_easy_perform(curl);

                    // 检查请求是否成功
                    if (res != CURLE_OK) {
                        std::cerr << "请求失败:" << curl_easy_strerror(res) << std::endl;
                        return 1;
                    }

                    // 在response中查找Accession号
                    start_pos = 0;
                    while ((start_pos = response.find("Accession:", start_pos)) != std::string::npos) {
                        size_t accession_start = start_pos + 10;
                        size_t accession_end = response.find("</dd>", accession_start);
                        std::string accession_number = response.substr(accession_start, accession_end - accession_start);
                        output_file << accession_number << std::endl;
                        start_pos = accession_end;
                    }

                    // 关闭libcurl
                    curl_easy_cleanup(curl);
                } else {
                    std::cerr << "无法初始化libcurl" << std::endl;
                    return 1;
                }
            }
        }

        output_file.close();
        std::cout << "Accession号已写入accession_numbers.txt" << std::endl;
    } else {
        std::cerr << "无法打开文件" << std::endl;
        return 1;
    }
} else {
    std::cerr << "无法初始化libcurl" << std::endl;
    return 1;
}

// 清理libcurl
curl_global_cleanup();

return 0;
C++ 使用libcurl 获取NCBI BioProject页面所有Accession号

原文地址: https://www.cveoy.top/t/topic/fUdy 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录