Added a ScraperIgnoreHTTP404Errors option to specifically ignore 404 errors (resource not found)

This commit is contained in:
Leon Styhre 2024-01-11 00:46:11 +01:00
parent 88e4ed38fe
commit ee0f2be819
4 changed files with 26 additions and 7 deletions

View file

@ -168,6 +168,13 @@ void ScraperHttpRequest::update()
if (status == HttpReq::REQ_IN_PROGRESS)
return;
if (status == HttpReq::REQ_RESOURCE_NOT_FOUND) {
LOG(LogWarning)
<< "ScraperHttpRequest: Server returned HTTP error code 404 (resource not found)";
setStatus(ASYNC_DONE);
return;
}
// Everything else is some sort of error.
LOG(LogError) << "ScraperHttpRequest network error (status: " << status << ") - "
<< mReq->getErrorMsg();

View file

@ -41,6 +41,7 @@ HttpReq::HttpReq(const std::string& url, bool scraperRequest)
, mHandle(nullptr)
, mTotalBytes {0}
, mDownloadedBytes {0}
, mScraperRequest {scraperRequest}
{
// The multi-handle is cleaned up via a call from GuiScraperSearch after the scraping
// has been completed for a game, meaning the handle is valid for all curl requests
@ -81,7 +82,7 @@ HttpReq::HttpReq(const std::string& url, bool scraperRequest)
long connectionTimeout;
if (scraperRequest) {
if (mScraperRequest) {
connectionTimeout =
static_cast<long>(Settings::getInstance()->getInt("ScraperConnectionTimeout"));
@ -103,7 +104,7 @@ HttpReq::HttpReq(const std::string& url, bool scraperRequest)
long transferTimeout;
if (scraperRequest) {
if (mScraperRequest) {
transferTimeout =
static_cast<long>(Settings::getInstance()->getInt("ScraperTransferTimeout"));
@ -259,10 +260,18 @@ HttpReq::Status HttpReq::status()
req->onError(curl_easy_strerror(msg->data.result));
}
else if (msg->data.result == CURLE_HTTP_RETURNED_ERROR) {
req->mStatus = REQ_BAD_STATUS_CODE;
long responseCode;
curl_easy_getinfo(msg->easy_handle, CURLINFO_RESPONSE_CODE, &responseCode);
req->onError("Server returned HTTP error code " + std::to_string(responseCode));
if (responseCode == 404 && mScraperRequest &&
Settings::getInstance()->getBool("ScraperIgnoreHTTP404Errors")) {
req->mStatus = REQ_RESOURCE_NOT_FOUND;
}
else {
req->onError("Server returned HTTP error code " +
std::to_string(responseCode));
req->mStatus = REQ_BAD_STATUS_CODE;
}
}
else {
req->mStatus = REQ_IO_ERROR;

View file

@ -25,10 +25,11 @@ public:
enum Status {
// clang-format off
REQ_IN_PROGRESS, // Request is in progress.
REQ_SUCCESS, // Request completed successfully, get it with getContent().
REQ_IO_ERROR, // Some error happened, get it with getErrorMsg().
REQ_SUCCESS, // Request completed successfully.
REQ_IO_ERROR, // An error occured.
REQ_FAILED_VERIFICATION, // Peer's certificate or fingerprint wasn't verified correctly.
REQ_BAD_STATUS_CODE, // Some invalid HTTP response status code happened (non-200).
REQ_BAD_STATUS_CODE, // HTTP error response >= 400.
REQ_RESOURCE_NOT_FOUND, // HTTP error code 404 specifically.
REQ_INVALID_RESPONSE, // The HTTP response was invalid.
REQ_UNDEFINED_ERROR
// clang-format on
@ -69,6 +70,7 @@ private:
std::string mErrorMsg;
std::atomic<long> mTotalBytes;
std::atomic<long> mDownloadedBytes;
bool mScraperRequest;
};
#endif // ES_CORE_HTTP_REQ_H

View file

@ -150,6 +150,7 @@ void Settings::setDefaults()
mIntMap["ScraperSearchFileHashMaxSize"] = {384, 384};
mBoolMap["ScraperOverwriteData"] = {true, true};
mBoolMap["ScraperHaltOnInvalidMedia"] = {true, true};
mBoolMap["ScraperIgnoreHTTP404Errors"] = {true, true};
mBoolMap["ScraperSearchFileHash"] = {true, true};
mBoolMap["ScraperSearchMetadataName"] = {true, true};
mBoolMap["ScraperIncludeFolders"] = {true, true};