Added a ScraperIgnoreHTTP404Errors option to specifically ignore 404 errors (resource not found)

This commit is contained in:
Leon Styhre 2024-01-11 00:46:11 +01:00
parent 88e4ed38fe
commit ee0f2be819
4 changed files with 26 additions and 7 deletions

View file

@ -168,6 +168,13 @@ void ScraperHttpRequest::update()
if (status == HttpReq::REQ_IN_PROGRESS) if (status == HttpReq::REQ_IN_PROGRESS)
return; return;
if (status == HttpReq::REQ_RESOURCE_NOT_FOUND) {
LOG(LogWarning)
<< "ScraperHttpRequest: Server returned HTTP error code 404 (resource not found)";
setStatus(ASYNC_DONE);
return;
}
// Everything else is some sort of error. // Everything else is some sort of error.
LOG(LogError) << "ScraperHttpRequest network error (status: " << status << ") - " LOG(LogError) << "ScraperHttpRequest network error (status: " << status << ") - "
<< mReq->getErrorMsg(); << mReq->getErrorMsg();

View file

@ -41,6 +41,7 @@ HttpReq::HttpReq(const std::string& url, bool scraperRequest)
, mHandle(nullptr) , mHandle(nullptr)
, mTotalBytes {0} , mTotalBytes {0}
, mDownloadedBytes {0} , mDownloadedBytes {0}
, mScraperRequest {scraperRequest}
{ {
// The multi-handle is cleaned up via a call from GuiScraperSearch after the scraping // The multi-handle is cleaned up via a call from GuiScraperSearch after the scraping
// has been completed for a game, meaning the handle is valid for all curl requests // has been completed for a game, meaning the handle is valid for all curl requests
@ -81,7 +82,7 @@ HttpReq::HttpReq(const std::string& url, bool scraperRequest)
long connectionTimeout; long connectionTimeout;
if (scraperRequest) { if (mScraperRequest) {
connectionTimeout = connectionTimeout =
static_cast<long>(Settings::getInstance()->getInt("ScraperConnectionTimeout")); static_cast<long>(Settings::getInstance()->getInt("ScraperConnectionTimeout"));
@ -103,7 +104,7 @@ HttpReq::HttpReq(const std::string& url, bool scraperRequest)
long transferTimeout; long transferTimeout;
if (scraperRequest) { if (mScraperRequest) {
transferTimeout = transferTimeout =
static_cast<long>(Settings::getInstance()->getInt("ScraperTransferTimeout")); static_cast<long>(Settings::getInstance()->getInt("ScraperTransferTimeout"));
@ -259,10 +260,18 @@ HttpReq::Status HttpReq::status()
req->onError(curl_easy_strerror(msg->data.result)); req->onError(curl_easy_strerror(msg->data.result));
} }
else if (msg->data.result == CURLE_HTTP_RETURNED_ERROR) { else if (msg->data.result == CURLE_HTTP_RETURNED_ERROR) {
req->mStatus = REQ_BAD_STATUS_CODE;
long responseCode; long responseCode;
curl_easy_getinfo(msg->easy_handle, CURLINFO_RESPONSE_CODE, &responseCode); curl_easy_getinfo(msg->easy_handle, CURLINFO_RESPONSE_CODE, &responseCode);
req->onError("Server returned HTTP error code " + std::to_string(responseCode));
if (responseCode == 404 && mScraperRequest &&
Settings::getInstance()->getBool("ScraperIgnoreHTTP404Errors")) {
req->mStatus = REQ_RESOURCE_NOT_FOUND;
}
else {
req->onError("Server returned HTTP error code " +
std::to_string(responseCode));
req->mStatus = REQ_BAD_STATUS_CODE;
}
} }
else { else {
req->mStatus = REQ_IO_ERROR; req->mStatus = REQ_IO_ERROR;

View file

@ -25,10 +25,11 @@ public:
enum Status { enum Status {
// clang-format off // clang-format off
REQ_IN_PROGRESS, // Request is in progress. REQ_IN_PROGRESS, // Request is in progress.
REQ_SUCCESS, // Request completed successfully, get it with getContent(). REQ_SUCCESS, // Request completed successfully.
REQ_IO_ERROR, // Some error happened, get it with getErrorMsg(). REQ_IO_ERROR, // An error occured.
REQ_FAILED_VERIFICATION, // Peer's certificate or fingerprint wasn't verified correctly. REQ_FAILED_VERIFICATION, // Peer's certificate or fingerprint wasn't verified correctly.
REQ_BAD_STATUS_CODE, // Some invalid HTTP response status code happened (non-200). REQ_BAD_STATUS_CODE, // HTTP error response >= 400.
REQ_RESOURCE_NOT_FOUND, // HTTP error code 404 specifically.
REQ_INVALID_RESPONSE, // The HTTP response was invalid. REQ_INVALID_RESPONSE, // The HTTP response was invalid.
REQ_UNDEFINED_ERROR REQ_UNDEFINED_ERROR
// clang-format on // clang-format on
@ -69,6 +70,7 @@ private:
std::string mErrorMsg; std::string mErrorMsg;
std::atomic<long> mTotalBytes; std::atomic<long> mTotalBytes;
std::atomic<long> mDownloadedBytes; std::atomic<long> mDownloadedBytes;
bool mScraperRequest;
}; };
#endif // ES_CORE_HTTP_REQ_H #endif // ES_CORE_HTTP_REQ_H

View file

@ -150,6 +150,7 @@ void Settings::setDefaults()
mIntMap["ScraperSearchFileHashMaxSize"] = {384, 384}; mIntMap["ScraperSearchFileHashMaxSize"] = {384, 384};
mBoolMap["ScraperOverwriteData"] = {true, true}; mBoolMap["ScraperOverwriteData"] = {true, true};
mBoolMap["ScraperHaltOnInvalidMedia"] = {true, true}; mBoolMap["ScraperHaltOnInvalidMedia"] = {true, true};
mBoolMap["ScraperIgnoreHTTP404Errors"] = {true, true};
mBoolMap["ScraperSearchFileHash"] = {true, true}; mBoolMap["ScraperSearchFileHash"] = {true, true};
mBoolMap["ScraperSearchMetadataName"] = {true, true}; mBoolMap["ScraperSearchMetadataName"] = {true, true};
mBoolMap["ScraperIncludeFolders"] = {true, true}; mBoolMap["ScraperIncludeFolders"] = {true, true};