Files
OpenSpace/include/openspace/util/httprequest.h
2024-02-06 15:53:24 +01:00

476 lines
21 KiB
C++

/*****************************************************************************************
* *
* OpenSpace *
* *
* Copyright (c) 2014-2024 *
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy of this *
* software and associated documentation files (the "Software"), to deal in the Software *
* without restriction, including without limitation the rights to use, copy, modify, *
* merge, publish, distribute, sublicense, and/or sell copies of the Software, and to *
* permit persons to whom the Software is furnished to do so, subject to the following *
* conditions: *
* *
* The above copyright notice and this permission notice shall be included in all copies *
* or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, *
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A *
* PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT *
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF *
* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE *
* OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. *
****************************************************************************************/
#ifndef __OPENSPACE_CORE___HTTPREQUEST___H__
#define __OPENSPACE_CORE___HTTPREQUEST___H__
#include <ghoul/misc/boolean.h>
#include <atomic>
#include <condition_variable>
#include <filesystem>
#include <fstream>
#include <functional>
#include <optional>
#include <string>
#include <thread>
#include <vector>
#include <chrono>
namespace openspace {
/**
* This class performs a synchronous HTTP request to the provided URL. Any result that is
* returned based on this request is returned through three callback functions that can be
* registered using the #onHeader, #onProgress, and #onData functions. Calling these
* functions will overwrite any previously registered handler. The ProgressCallback can be
* used to stop the download if the handler returns `false`.
*
* The workflow for this class:
* 1. Create a new object with the URL that points to the location from which the data
* should be loaded
* 2. Register the callbacks that are needed (at least the #onData callback)
* 3. Start the download with the #perform function
*/
class HttpRequest {
public:
/**
* This callback is called every time there is progress to report for the download. It
* transmits currently downloaded number of bytes (which can be 0 if the download
* hasn't started yet) and the total number of bytes if that value is known. The
* return value determines if the download should continue (if the function returns
* `true`) or if it should abort (if `false` is returned).
*
* \param downloadedBytes The number of bytes that have been downloaded thus far or 0
* if the download hasn't started yet
* \param totalBytes The total number of bytes for the download if it is known, or a
* std::nullopt of the total size is unknown
* \return `true` if the download should continue or `false` if it should be aborted
*/
using ProgressCallback = std::function<
bool(size_t downloadedBytes, std::optional<size_t> totalBytes)
>;
/**
* This callback is executed every time a chunk of data arrived for the download. The
* parameters point to the buffer of this new incoming data and the number of bytes
* that have arrived. The buffer pointed to will most likely only be valid during the
* time of the callback and it is the callbacks responsibility to store the contents
* of the buffer before the callback returns. If the return value is `true`, the
* download continues, if it is `false`, this signals to the library that an error has
* occurred from which recovery is not possible.
*
* \param buffer The pointer to the beginning of the buffer where the new incoming
* data is located. This buffer is only valid during the execution of this
* callback and the contents of the buffer should be copied to a different
* location
* \param size The number of bytes that are contained in the \p buffer
* \return `true` if the download should continue or `false` if it should be aborted
*/
using DataCallback = std::function<bool(char* buffer, size_t size)>;
/**
* This callback is executed when the header of an HTTP request is received
* successfully. The provided buffer and size contain the contents of the header field
* for the response. The buffer pointed to will most likely only be valid during the
* time of the callback and it is the callbacks responsibility to store the contents
* of the buffer before the callback returns. If the return value is `true`, the
* download continues, if it is `false`, this signals to the library that an error has
* occurred from which recovery is not possible. If this function returns `false`, it
* will cause the main download to not start at all.
*
* \param buffer The pointer to the beginning of the buffer where the header
* information is located. This buffer is only valid during the execution of
* this callback and the contents of the buffer should be copied to a different
* location
* \param size The number of bytes that are contained in the \p buffer
* \return `true` if the download should continue or `false` if it should be aborted
*/
using HeaderCallback = std::function<bool(char* buffer, size_t size)>;
/**
* Creates a new HttpRequest object that will try to download the contents at the
* provided \p url.
*
* \param url The URL that should be requested by this HttpRequest
*
* \pre \p url must not be empty
*/
explicit HttpRequest(std::string url);
/**
* Registers a callback that will be called when the header for the request has been
* transmitted successfully. The contents of the header will be passed into the
* callback and the callback returns whether the request should proceed or be aborted.
*
* \param cb The callback that should be registered. This will silently replace any
* previously registered callback
*/
void onHeader(HeaderCallback cb);
/**
* Registers a callback that will be called whenever there is progress to be reported
* on the transfer of the request's body. The callback will receive information about
* the number of bytes that have been downloaded and the total number of bytes that
* should be downloaded to complete the request. This information might not always be
* available. The callback's return value determines whether the request should
* continue or be aborted.
*
* \param cb The callback that should be registered. This will silently replace any
* previously registered callback
*/
void onProgress(ProgressCallback cb);
/**
* Registers a callback that will be called whenever there is new data that has been
* received from the request. It is this callback's responsibility to store that data
* in a place that is persistent across multiple calls to the callback, usually by
* storing it in an external buffer and appending to it. The callback can return
* whether the download should proceed (by returning `true`) or be aborted (by
* returning `false`).
*
* \param cb The callback that should be registered. This will silently replace any
* previously registered callback
*/
void onData(DataCallback cb);
/**
* Performs the request to the URL provided in the constructor. As this request is
* handled synchronously, this function will only return once the request has been
* completed successfully or failed. During this call, the registered callbacks will
* be called repeatedly until the request finishes. This function returns whether the
* request was completed successfully or failed.
*
* \param timeout The amount of time the request will wait before aborting due to the
* server not responding. If this value is 0, there is no timeout on the
* request.
* \return `true` if the request completed successfully, `false` otherwise
*/
bool perform(std::chrono::milliseconds timeout = std::chrono::milliseconds(0));
/**
* Returns the URL that was passed into the constructor of this HttpRequest.
*
* \return The URL that was passed into the constructor of this HttpRequest
*/
const std::string& url() const;
private:
/// The callback that will be called when the header was received successfully
HeaderCallback _onHeader;
/// The callback that will be called when there is progress to be reported
ProgressCallback _onProgress;
/// The callback that will be called when there is data to be stored away
DataCallback _onData;
/// The URL that this HttpRequest is going to request
std::string _url;
};
/**
* This abstract base class uses the HttpRequest class to perform an asynchronous
* download. Every subclass needs to implement at least the #handleData function that will
* be called every time a chunk of data has been received from the request. The download
* is started through the #start function and it is possible to turn this into a
* synchronous download by executing the #wait function directly afterwards. If a
* HttpRequest::ProgressCallback has been registered through the #onProgress function,
* that callback is called every time the download some progress to report.
*/
class HttpDownload {
public:
/**
* Creates a new HttpDownload that will start to download the file pointed to by the
* \p url parameter as soon as the download is #start ed.
*
* \param url The URL that should be downloaded by this HttpDownload
*
* \pre \p url must not be empty
*/
explicit HttpDownload(std::string url);
/**
* Virtual destructor that will cancel the ongoing download and block until the
* download is successfully canceled.
*/
virtual ~HttpDownload();
/**
* Registers a callback that will be called whenever there is progress to be reported
* on the file's download. The callback will receive information about the number of
* bytes that have been downloaded and the total number of bytes that should be
* downloaded. This information might not always be available. The callback's return
* value determines whether the request should continue or be aborted.
*
* \param progressCallback The callback that should be registered. This will silently
* replace any previously registered callback
*/
void onProgress(HttpRequest::ProgressCallback progressCallback);
/**
* Starts the asynchronous download of the file by starting a new thread that will
* take care of the download, meaning that this function will return almost
* instantaneously. If the HttpDownload is already downloading a file this function
* does nothing.
*
* \param timeout The number of milliseconds that the download will be kept alive
* while waiting for a reply from the server. If this value is 0, the
* connection will never time out
*/
void start(std::chrono::milliseconds timeout = std::chrono::milliseconds(0));
/**
* Cancels the ongoing download. Because of the underlying library that is used, the
* transfer will only be aborted the next time any piece of data is received or the
* library reports any progress.
*/
void cancel();
/**
* This function will wait until the download has completed and will return the
* success of the download back to the caller.
*
* \return `true` if the downloaded succeeded or `false` if the download failed
*/
bool wait();
/**
* Returns `true` if the download has completed and it failed, or `false` if either
* the download is till ongoing or is finished and has succeeded.
*
* \return Whether the download has completed and it failed
*/
bool hasFailed() const;
/**
* Returns `true` if the download has completed successfully , or `false` if either
* the download is till ongoing or is finished and has failed.
*
* \return Whether the download has completed successfully
*/
bool hasSucceeded() const;
/**
* Returns the URL that was passed into the constructor of this HttpDownload.
*
* \return The URL that was passed into the constructor of this HttpDownload
*/
const std::string& url() const;
protected:
/**
* This abstract function has to be implemented by any concrete subclass to handle an
* incoming chunk of data from the download. The parameters point to the \p buffer of
* this new incoming data and the number of bytes that have arrived. The buffer
* pointed to will most likely only be valid during the time of the callback and it is
* the callbacks responsibility to store the contents of the buffer before the
* callback returns. If the return value is `true`, the download continues, if it is
* `false`, this signals to the library that an error has occurred from which recovery
* is not possible. This function will be called on a different thread from the one
* that called the #start method.
*
* \param buffer The beginning of the buffer of this chunk of data
* \param size The number of bytes that the \p buffer contains
* \return The implementation should return `true` if the downloading should continue
* and `false` if the handling of the data caused some error that the
* subclass is incapable of recovering from
*/
virtual bool handleData(char* buffer, size_t size) = 0;
/**
* This function is called before the downloading starts and can be used by subclasses
* to perform one-time setup functions, such as opening a file, reserving a block of
* storage, etc. This function guaranteed to be only called once per HttpDownload. The
* return value determines if the setup operation completed successfully or if an
* error occurred that will cause the download to be terminated. This function will be
* called on a different thread from the one that called the #start method.
*
* \return `true` if the setup completed successfully and `false` if the setup
* failed unrecoverably
*/
virtual bool setup();
/**
* This function is called after the downloading has finished and before a potential
* call to #wait is performed. This function can be used by a subclass to perform
* one-time operations that are required when the downloading fininshes, such as
* closing file handles, committing some memory etc. The return value of this function
* signals whether the teardown completed successfully. This function will be called
* on a different thread from the one that called the #start method.
*
* \return `true` if the teardown completed successfully and `false` if it failed
*/
virtual bool teardown();
private:
/// The callback that will be called whenever there is some progress to be reported
HttpRequest::ProgressCallback _onProgress;
/// Value indicating whether the HttpDownload is currently downloading a file
bool _isDownloading = false;
/// Value indicating whether the download is finished
bool _isFinished = false;
/// Value indicated whether the download was successful
bool _isSuccessful = false;
/// Marker telling the downloading thread that the download should be cancelled
bool _shouldCancel = false;
/// The HttpRequest class that will be used for the download
HttpRequest _httpRequest;
/// The thread that contains the HttpRequest to download the file
std::thread _downloadThread;
/// This condition variable is used by the #wait function to be able to wait for
/// completion of the downloading thread
std::condition_variable _downloadFinishCondition;
};
/**
* This specific subclass of the HttpDownload downloads the contents of the provided URL
* into a file on disk. By default, an existing file will not be overwritten and will
* cause the download to fail. This behavior can be overwritten through a parameter in the
* constructor of this class.
*/
class HttpFileDownload : public HttpDownload {
public:
BooleanType(Overwrite);
/**
* Constructor that will create a HttpFileDownload which will download the contents of
* the provided \p url to the \p destinationPath. If the \p destinationPath already
* contains a file and \p overwrite is Overwrite::No, the download will fail; if it is
* Overwrite::Yes, the existing content at the \p destinationPath will be overwritten.
*/
HttpFileDownload(std::string url, std::filesystem::path destinationPath,
Overwrite overwrite = Overwrite::No);
/**
* This destructor will cancel any ongoing download and wait for its completion, so it
* might not block for a short amount of time.
*/
virtual ~HttpFileDownload() override = default;
/**
* Returns the path where the contents of the URL provided in the constructor will be
* saved to.
*
* \return The path where URL will be downloaded to
*/
std::filesystem::path destination() const;
private:
/**
* Will create all directories that are necessary to reach _destination and then
* fight with other HttpFileDownloads to get one of a limited number of file handles
* to open _file.
*/
bool setup() override;
/**
* Closes the _file and returns the handle back to the pool of available handles.
*/
bool teardown() override;
/**
* Stores the chunk of data into the _file handle.
*/
bool handleData(char* buffer, size_t size) override;
/// A flag whether this HttpFileDownload got a handle from the limited supply of
/// handles. This limit is used to prevent all HttpFileDownloads from getting file
/// handles from the operating system as that resource is limited and downloads would
/// fail unrecoverably if no handles are available. So we limit the maximum number and
/// if that number is exceeded, the HttpFileDownload will wait until a handle is
/// available
std::atomic_bool _hasHandle = false;
/// The destination path where the contents of the URL provided in the constructor
/// will be saved to
std::filesystem::path _destination;
/// The file handle to the _destination used to save incoming chunks
std::ofstream _file;
/// Mutex that will be prevent multiple HttpFileDownloads to simultaneously try to
/// create the necessary intermediate directories, which would cause issues
static std::mutex _directoryCreationMutex;
/// The maximum number of file handles that all HttpFileDownloads combined can use up
static constexpr int MaxFileHandles = 32;
/// Stores the number of currently open file handles across all HttpFileDownloads
static std::atomic_int nCurrentFileHandles;
};
/**
* This concerete HttpDownload subclass downloads the contents of the URL passed into the
* constructor into a buffer of memory that can be retrieve. Please note that that buffer
* should only be used accessed once the HttpDownload::hasFinished function returns
* `true`.
*/
class HttpMemoryDownload : public HttpDownload {
public:
/**
* Creates an instance of a HttpMemoryDownload that will download the contents of the
* \p url into memory.
*
* \param url The URL whose contents should be downloaded
*/
explicit HttpMemoryDownload(std::string url);
/**
* This destructor will cancel any ongoing download and wait for its completion, so it
* might not block for a short amount of time.
*/
virtual ~HttpMemoryDownload() override = default;
/**
* Returns a reference to the buffer that is used to store the contents of the URL
* passed in the constructor. Please observe that while the HttpDownload::hasFinished
* method returns `false`, this buffer will be changed by a different thread and
* access is not thread-safe. After that function returns `true`, it is safe to access
* the buffer.
*
* \return A reference to the buffer used to hold the contents of the URL
*/
const std::vector<char>& downloadedData() const;
private:
/**
* Stores each downloaded chunk into the stored buffer.
*/
bool handleData(char* buffer, size_t size) override;
/// The buffer where the downloaded chunks are accumulated
std::vector<char> _buffer;
};
} // namespace openspace
#endif // __OPENSPACE_CORE___HTTPREQUEST___H__