From 4ad8017c9f2ca4edecb7337f0528e4d7333b700d Mon Sep 17 00:00:00 2001 From: Aloshi Date: Fri, 6 Jun 2014 16:06:10 -0500 Subject: [PATCH] Restored error propogation to scrapers. Now you have to use subclasses because I didn't want to pass "setError" function pointers in the arguments. Fixed tricky timing bug in ScraperHttpRequest::update that could cause misreported network errors. --- src/scrapers/GamesDBScraper.cpp | 13 +++++--- src/scrapers/GamesDBScraper.h | 8 ++++- src/scrapers/Scraper.cpp | 53 +++++++++++++++++++++++------- src/scrapers/Scraper.h | 16 ++++----- src/scrapers/TheArchiveScraper.cpp | 10 ++++-- src/scrapers/TheArchiveScraper.h | 8 +++++ 6 files changed, 80 insertions(+), 28 deletions(-) diff --git a/src/scrapers/GamesDBScraper.cpp b/src/scrapers/GamesDBScraper.cpp index 2ce893995..db3c6635a 100644 --- a/src/scrapers/GamesDBScraper.cpp +++ b/src/scrapers/GamesDBScraper.cpp @@ -78,7 +78,7 @@ void thegamesdb_generate_scraper_requests(const ScraperSearchParams& params, std if(params.system->getPlatformIds().empty()) { // no platform specified, we're done - requests.push(std::unique_ptr(new ScraperHttpRequest(results, path, &thegamesdb_process_httpreq))); + requests.push(std::unique_ptr(new TheGamesDBRequest(results, path))); }else{ // go through the list, we need to split this into multiple requests // because TheGamesDB API either sucks or I don't know how to use it properly... @@ -95,12 +95,13 @@ void thegamesdb_generate_scraper_requests(const ScraperSearchParams& params, std }else{ LOG(LogWarning) << "TheGamesDB scraper warning - no support for platform " << getPlatformName(*platformIt); } - requests.push(std::unique_ptr(new ScraperHttpRequest(results, path, &thegamesdb_process_httpreq))); + + requests.push(std::unique_ptr(new TheGamesDBRequest(results, path))); } } } -void thegamesdb_process_httpreq(const std::unique_ptr& req, std::vector& results) +void TheGamesDBRequest::process(const std::unique_ptr& req, std::vector& results) { assert(req->status() == HttpReq::REQ_SUCCESS); @@ -108,7 +109,11 @@ void thegamesdb_process_httpreq(const std::unique_ptr& req, std::vector pugi::xml_parse_result parseResult = doc.load(req->getContent().c_str()); if(!parseResult) { - LOG(LogError) << "GamesDBRequest - Error parsing XML. \n\t" << parseResult.description() << ""; + std::stringstream ss; + ss << "GamesDBRequest - Error parsing XML. \n\t" << parseResult.description() << ""; + std::string err = ss.str(); + setError(err); + LOG(LogError) << err; return; } diff --git a/src/scrapers/GamesDBScraper.h b/src/scrapers/GamesDBScraper.h index 83ac525fa..0d9e49cf5 100644 --- a/src/scrapers/GamesDBScraper.h +++ b/src/scrapers/GamesDBScraper.h @@ -5,4 +5,10 @@ void thegamesdb_generate_scraper_requests(const ScraperSearchParams& params, std::queue< std::unique_ptr >& requests, std::vector& results); -void thegamesdb_process_httpreq(const std::unique_ptr& req, std::vector& results); +class TheGamesDBRequest : public ScraperHttpRequest +{ +public: + TheGamesDBRequest(std::vector& resultsWrite, const std::string& url) : ScraperHttpRequest(resultsWrite, url) {} +protected: + void process(const std::unique_ptr& req, std::vector& results) override; +}; diff --git a/src/scrapers/Scraper.cpp b/src/scrapers/Scraper.cpp index be87d0397..6f468c257 100644 --- a/src/scrapers/Scraper.cpp +++ b/src/scrapers/Scraper.cpp @@ -44,9 +44,34 @@ void ScraperSearchHandle::update() if(mStatus == ASYNC_DONE) return; - while(!mRequestQueue.empty() && mRequestQueue.front()->update()) - mRequestQueue.pop(); + while(!mRequestQueue.empty()) + { + auto& req = mRequestQueue.front(); + AsyncHandleStatus status = req->status(); + if(status == ASYNC_ERROR) + { + // propegate error + setError(req->getStatusString()); + + // empty our queue + while(!mRequestQueue.empty()) + mRequestQueue.pop(); + + return; + } + + // finished this one, see if we have any more + if(status == ASYNC_DONE) + { + mRequestQueue.pop(); + continue; + } + + // status == ASYNC_IN_PROGRESS + } + + // we finished without any errors! if(mRequestQueue.empty()) { setStatus(ASYNC_DONE); @@ -63,26 +88,30 @@ ScraperRequest::ScraperRequest(std::vector& resultsWrite) : // ScraperHttpRequest -ScraperHttpRequest::ScraperHttpRequest(std::vector& resultsWrite, const std::string& url, scraper_process_httpreq processFunc) - : ScraperRequest(resultsWrite), mProcessFunc(processFunc) +ScraperHttpRequest::ScraperHttpRequest(std::vector& resultsWrite, const std::string& url) + : ScraperRequest(resultsWrite) { + setStatus(ASYNC_IN_PROGRESS); mReq = std::unique_ptr(new HttpReq(url)); } -bool ScraperHttpRequest::update() +void ScraperHttpRequest::update() { - if(mReq->status() == HttpReq::REQ_SUCCESS) + HttpReq::Status status = mReq->status(); + if(status == HttpReq::REQ_SUCCESS) { - mProcessFunc(mReq, mResults); - return true; + setStatus(ASYNC_DONE); // if process() has an error, status will be changed to ASYNC_ERROR + process(mReq, mResults); + return; } - if(mReq->status() == HttpReq::REQ_IN_PROGRESS) - return false; + // not ready yet + if(status == HttpReq::REQ_IN_PROGRESS) + return; // everything else is some sort of error - LOG(LogError) << "ScraperHttpRequest network error - " << mReq->getErrorMsg(); - return true; + LOG(LogError) << "ScraperHttpRequest network error (status: " << status << ") - " << mReq->getErrorMsg(); + setError(mReq->getErrorMsg()); } diff --git a/src/scrapers/Scraper.h b/src/scrapers/Scraper.h index 328256989..1e87f4c5d 100644 --- a/src/scrapers/Scraper.h +++ b/src/scrapers/Scraper.h @@ -53,30 +53,30 @@ struct ScraperSearchResult // a scraper search gathers results from (potentially multiple) ScraperRequests -class ScraperRequest +class ScraperRequest : public AsyncHandle { public: ScraperRequest(std::vector& resultsWrite); // returns "true" once we're done - virtual bool update() = 0; - + virtual void update() = 0; + protected: std::vector& mResults; }; -typedef void (*scraper_process_httpreq)(const std::unique_ptr& req, std::vector& results); - // a single HTTP request that needs to be processed to get the results class ScraperHttpRequest : public ScraperRequest { public: - ScraperHttpRequest(std::vector& resultsWrite, const std::string& url, scraper_process_httpreq processFunc); - bool update() override; + ScraperHttpRequest(std::vector& resultsWrite, const std::string& url); + virtual void update() override; + +protected: + virtual void process(const std::unique_ptr& req, std::vector& results) = 0; private: - scraper_process_httpreq mProcessFunc; std::unique_ptr mReq; }; diff --git a/src/scrapers/TheArchiveScraper.cpp b/src/scrapers/TheArchiveScraper.cpp index cf1c722bc..336de2346 100644 --- a/src/scrapers/TheArchiveScraper.cpp +++ b/src/scrapers/TheArchiveScraper.cpp @@ -16,10 +16,10 @@ void thearchive_generate_scraper_requests(const ScraperSearchParams& params, std path += HttpReq::urlEncode(cleanName); //platform TODO, should use some params.system get method - requests.push(std::unique_ptr(new ScraperHttpRequest(results, path, &thearchive_process_httpreq))); + requests.push(std::unique_ptr(new TheArchiveRequest(results, path))); } -void thearchive_process_httpreq(const std::unique_ptr& req, std::vector& results) +void TheArchiveRequest::process(const std::unique_ptr& req, std::vector& results) { assert(req->status() == HttpReq::REQ_SUCCESS); @@ -27,7 +27,11 @@ void thearchive_process_httpreq(const std::unique_ptr& req, std::vector pugi::xml_parse_result parseResult = doc.load(req->getContent().c_str()); if(!parseResult) { - LOG(LogError) << "TheArchiveRequest - error parsing XML.\n\t" << parseResult.description(); + std::stringstream ss; + ss << "TheArchiveRequest - error parsing XML.\n\t" << parseResult.description(); + std::string err = ss.str(); + setError(err); + LOG(LogError) << err; return; } diff --git a/src/scrapers/TheArchiveScraper.h b/src/scrapers/TheArchiveScraper.h index 89105e0cc..93f640770 100644 --- a/src/scrapers/TheArchiveScraper.h +++ b/src/scrapers/TheArchiveScraper.h @@ -6,3 +6,11 @@ void thearchive_generate_scraper_requests(const ScraperSearchParams& params, std std::vector& results); void thearchive_process_httpreq(const std::unique_ptr& req, std::vector& results); + +class TheArchiveRequest : public ScraperHttpRequest +{ +public: + TheArchiveRequest(std::vector& resultsWrite, const std::string& url) : ScraperHttpRequest(resultsWrite, url) {} +protected: + void process(const std::unique_ptr& req, std::vector& results) override; +}; \ No newline at end of file