根据给定的accession号使用c++的boost库的asio从ncbi高速下载对应的fastq文件要有断点续下载的功能以原文文件名为输出的文件名。
下面是一个使用C++ Boost库的asio实现从NCBI高速下载fastq文件并具有断点续下载功能的示例代码:
#include <iostream>
#include <fstream>
#include <boost/asio.hpp>
#include <boost/bind.hpp>
#include <boost/format.hpp>
using boost::asio::ip::tcp;
class NCBIFileDownloader {
public:
NCBIFileDownloader(boost::asio::io_service& io_service, const std::string& accession)
: io_service_(io_service), socket_(io_service), accession_(accession), file_size_(0), bytes_transferred_(0) {
std::string hostname = "ftp.ncbi.nlm.nih.gov";
std::string path = "/sra/sra-instant/reads/ByRun/sra/" + accession_.substr(0, 3) + "/" + accession_.substr(0, 6) + "/" + accession_ + "/" + accession_ + ".fastq.gz";
tcp::resolver resolver(io_service_);
tcp::resolver::query query(hostname, "ftp");
tcp::resolver::iterator endpoint_iterator = resolver.resolve(query);
tcp::resolver::iterator end;
boost::system::error_code error = boost::asio::error::host_not_found;
while (error && endpoint_iterator != end) {
socket_.close();
socket_.connect(*endpoint_iterator++, error);
}
if (error) {
throw boost::system::system_error(error);
}
request_ = boost::str(boost::format("GET %1% HTTP/1.1\r\nHost: %2%\r\nRange: bytes=%3%-\r\nConnection: close\r\n\r\n") % path % hostname % bytes_transferred_);
}
void start() {
boost::asio::async_write(socket_, boost::asio::buffer(request_), boost::bind(&NCBIFileDownloader::handle_write, this, boost::asio::placeholders::error, boost::asio::placeholders::bytes_transferred));
}
void handle_write(const boost::system::error_code& error, std::size_t /*bytes_transferred*/) {
if (!error) {
boost::asio::async_read_until(socket_, response_, "\r\n\r\n", boost::bind(&NCBIFileDownloader::handle_header, this, boost::asio::placeholders::error, boost::asio::placeholders::bytes_transferred));
} else {
std::cout << "Error: " << error.message() << std::endl;
socket_.close();
}
}
void handle_header(const boost::system::error_code& error, std::size_t /*bytes_transferred*/) {
if (!error) {
std::istream response_stream(&response_);
std::string http_version;
response_stream >> http_version;
unsigned int status_code;
response_stream >> status_code;
std::string status_message;
std::getline(response_stream, status_message);
if (status_code == 200 || status_code == 206) {
std::string header;
while (std::getline(response_stream, header) && header != "\r") {
if (header.find("Content-Length: ") == 0) {
file_size_ = std::stoull(header.substr(16));
}
}
std::cout << "File size: " << file_size_ << " bytes" << std::endl;
std::string output_filename = accession_ + ".fastq.gz";
output_file_.open(output_filename, std::ios::app | std::ios::binary);
if (!output_file_.is_open()) {
throw std::runtime_error("Failed to open file for writing: " + output_filename);
}
boost::asio::async_read(socket_, response_, boost::asio::transfer_at_least(1), boost::bind(&NCBIFileDownloader::handle_read, this, boost::asio::placeholders::error, boost::asio::placeholders::bytes_transferred));
} else if (status_code == 206) {
// Resume download from the last transferred byte
std::cout << "Resuming download from byte: " << bytes_transferred_ << std::endl;
boost::asio::async_read(socket_, response_, boost::asio::transfer_at_least(1), boost::bind(&NCBIFileDownloader::handle_read, this, boost::asio::placeholders::error, boost::asio::placeholders::bytes_transferred));
} else {
std::cout << "Download failed with status code: " << status_code << std::endl;
socket_.close();
}
} else {
std::cout << "Error: " << error.message() << std::endl;
socket_.close();
}
}
void handle_read(const boost::system::error_code& error, std::size_t bytes_transferred) {
if (!error) {
bytes_transferred_ += bytes_transferred;
output_file_.write(boost::asio::buffer_cast<const char*>(response_.data()), bytes_transferred);
response_.consume(bytes_transferred);
if (bytes_transferred_ < file_size_) {
boost::asio::async_read(socket_, response_, boost::asio::transfer_at_least(1), boost::bind(&NCBIFileDownloader::handle_read, this, boost::asio::placeholders::error, boost::asio::placeholders::bytes_transferred));
} else {
std::cout << "Download completed" << std::endl;
output_file_.close();
socket_.close();
}
} else {
std::cout << "Error: " << error.message() << std::endl;
output_file_.close();
socket_.close();
}
}
private:
boost::asio::io_service& io_service_;
tcp::socket socket_;
std::string accession_;
std::string request_;
boost::asio::streambuf response_;
std::ofstream output_file_;
std::size_t file_size_;
std::size_t bytes_transferred_;
};
int main() {
std::string accession = "SRR000000";
boost::asio::io_service io_service;
NCBIFileDownloader downloader(io_service, accession);
downloader.start();
io_service.run();
return 0;
}
请确保已正确安装和配置Boost库,并使用适当的命令编译和链接代码
原文地址: https://www.cveoy.top/t/topic/inaH 著作权归作者所有。请勿转载和采集!