下面是一个使用C++ Boost库的asio实现从NCBI高速下载fastq文件并具有断点续下载功能的示例代码:

#include <iostream>
#include <fstream>
#include <boost/asio.hpp>
#include <boost/bind.hpp>
#include <boost/format.hpp>

using boost::asio::ip::tcp;

class NCBIFileDownloader {
public:
    NCBIFileDownloader(boost::asio::io_service& io_service, const std::string& accession)
        : io_service_(io_service), socket_(io_service), accession_(accession), file_size_(0), bytes_transferred_(0) {
        std::string hostname = "ftp.ncbi.nlm.nih.gov";
        std::string path = "/sra/sra-instant/reads/ByRun/sra/" + accession_.substr(0, 3) + "/" + accession_.substr(0, 6) + "/" + accession_ + "/" + accession_ + ".fastq.gz";
        tcp::resolver resolver(io_service_);
        tcp::resolver::query query(hostname, "ftp");
        tcp::resolver::iterator endpoint_iterator = resolver.resolve(query);
        tcp::resolver::iterator end;
        boost::system::error_code error = boost::asio::error::host_not_found;
        while (error && endpoint_iterator != end) {
            socket_.close();
            socket_.connect(*endpoint_iterator++, error);
        }
        if (error) {
            throw boost::system::system_error(error);
        }
        request_ = boost::str(boost::format("GET %1% HTTP/1.1\r\nHost: %2%\r\nRange: bytes=%3%-\r\nConnection: close\r\n\r\n") % path % hostname % bytes_transferred_);
    }

    void start() {
        boost::asio::async_write(socket_, boost::asio::buffer(request_), boost::bind(&NCBIFileDownloader::handle_write, this, boost::asio::placeholders::error, boost::asio::placeholders::bytes_transferred));
    }

    void handle_write(const boost::system::error_code& error, std::size_t /*bytes_transferred*/) {
        if (!error) {
            boost::asio::async_read_until(socket_, response_, "\r\n\r\n", boost::bind(&NCBIFileDownloader::handle_header, this, boost::asio::placeholders::error, boost::asio::placeholders::bytes_transferred));
        } else {
            std::cout << "Error: " << error.message() << std::endl;
            socket_.close();
        }
    }

    void handle_header(const boost::system::error_code& error, std::size_t /*bytes_transferred*/) {
        if (!error) {
            std::istream response_stream(&response_);
            std::string http_version;
            response_stream >> http_version;
            unsigned int status_code;
            response_stream >> status_code;
            std::string status_message;
            std::getline(response_stream, status_message);
            if (status_code == 200 || status_code == 206) {
                std::string header;
                while (std::getline(response_stream, header) && header != "\r") {
                    if (header.find("Content-Length: ") == 0) {
                        file_size_ = std::stoull(header.substr(16));
                    }
                }
                std::cout << "File size: " << file_size_ << " bytes" << std::endl;
                std::string output_filename = accession_ + ".fastq.gz";
                output_file_.open(output_filename, std::ios::app | std::ios::binary);
                if (!output_file_.is_open()) {
                    throw std::runtime_error("Failed to open file for writing: " + output_filename);
                }
                boost::asio::async_read(socket_, response_, boost::asio::transfer_at_least(1), boost::bind(&NCBIFileDownloader::handle_read, this, boost::asio::placeholders::error, boost::asio::placeholders::bytes_transferred));
            } else if (status_code == 206) {
                // Resume download from the last transferred byte
                std::cout << "Resuming download from byte: " << bytes_transferred_ << std::endl;
                boost::asio::async_read(socket_, response_, boost::asio::transfer_at_least(1), boost::bind(&NCBIFileDownloader::handle_read, this, boost::asio::placeholders::error, boost::asio::placeholders::bytes_transferred));
            } else {
                std::cout << "Download failed with status code: " << status_code << std::endl;
                socket_.close();
            }
        } else {
            std::cout << "Error: " << error.message() << std::endl;
            socket_.close();
        }
    }

    void handle_read(const boost::system::error_code& error, std::size_t bytes_transferred) {
        if (!error) {
            bytes_transferred_ += bytes_transferred;
            output_file_.write(boost::asio::buffer_cast<const char*>(response_.data()), bytes_transferred);
            response_.consume(bytes_transferred);
            if (bytes_transferred_ < file_size_) {
                boost::asio::async_read(socket_, response_, boost::asio::transfer_at_least(1), boost::bind(&NCBIFileDownloader::handle_read, this, boost::asio::placeholders::error, boost::asio::placeholders::bytes_transferred));
            } else {
                std::cout << "Download completed" << std::endl;
                output_file_.close();
                socket_.close();
            }
        } else {
            std::cout << "Error: " << error.message() << std::endl;
            output_file_.close();
            socket_.close();
        }
    }

private:
    boost::asio::io_service& io_service_;
    tcp::socket socket_;
    std::string accession_;
    std::string request_;
    boost::asio::streambuf response_;
    std::ofstream output_file_;
    std::size_t file_size_;
    std::size_t bytes_transferred_;
};

int main() {
    std::string accession = "SRR000000";
    boost::asio::io_service io_service;
    NCBIFileDownloader downloader(io_service, accession);
    downloader.start();
    io_service.run();
    return 0;
}

请确保已正确安装和配置Boost库,并使用适当的命令编译和链接代码

根据给定的accession号使用c++的boost库的asio从ncbi高速下载对应的fastq文件要有断点续下载的功能以原文文件名为输出的文件名。

原文地址: https://www.cveoy.top/t/topic/inaH 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录