Restored error propogation to scrapers.

Now you have to use subclasses because I didn't want to pass "setError" function pointers in the arguments.
Fixed tricky timing bug in ScraperHttpRequest::update that could cause misreported network errors.
This commit is contained in:
Aloshi 2014-06-06 16:06:10 -05:00
parent 447b63f1e0
commit 4ad8017c9f
6 changed files with 80 additions and 28 deletions

View file

@ -78,7 +78,7 @@ void thegamesdb_generate_scraper_requests(const ScraperSearchParams& params, std
if(params.system->getPlatformIds().empty())
{
// no platform specified, we're done
requests.push(std::unique_ptr<ScraperRequest>(new ScraperHttpRequest(results, path, &thegamesdb_process_httpreq)));
requests.push(std::unique_ptr<ScraperRequest>(new TheGamesDBRequest(results, path)));
}else{
// go through the list, we need to split this into multiple requests
// because TheGamesDB API either sucks or I don't know how to use it properly...
@ -95,12 +95,13 @@ void thegamesdb_generate_scraper_requests(const ScraperSearchParams& params, std
}else{
LOG(LogWarning) << "TheGamesDB scraper warning - no support for platform " << getPlatformName(*platformIt);
}
requests.push(std::unique_ptr<ScraperRequest>(new ScraperHttpRequest(results, path, &thegamesdb_process_httpreq)));
requests.push(std::unique_ptr<ScraperRequest>(new TheGamesDBRequest(results, path)));
}
}
}
void thegamesdb_process_httpreq(const std::unique_ptr<HttpReq>& req, std::vector<ScraperSearchResult>& results)
void TheGamesDBRequest::process(const std::unique_ptr<HttpReq>& req, std::vector<ScraperSearchResult>& results)
{
assert(req->status() == HttpReq::REQ_SUCCESS);
@ -108,7 +109,11 @@ void thegamesdb_process_httpreq(const std::unique_ptr<HttpReq>& req, std::vector
pugi::xml_parse_result parseResult = doc.load(req->getContent().c_str());
if(!parseResult)
{
LOG(LogError) << "GamesDBRequest - Error parsing XML. \n\t" << parseResult.description() << "";
std::stringstream ss;
ss << "GamesDBRequest - Error parsing XML. \n\t" << parseResult.description() << "";
std::string err = ss.str();
setError(err);
LOG(LogError) << err;
return;
}

View file

@ -5,4 +5,10 @@
void thegamesdb_generate_scraper_requests(const ScraperSearchParams& params, std::queue< std::unique_ptr<ScraperRequest> >& requests,
std::vector<ScraperSearchResult>& results);
void thegamesdb_process_httpreq(const std::unique_ptr<HttpReq>& req, std::vector<ScraperSearchResult>& results);
class TheGamesDBRequest : public ScraperHttpRequest
{
public:
TheGamesDBRequest(std::vector<ScraperSearchResult>& resultsWrite, const std::string& url) : ScraperHttpRequest(resultsWrite, url) {}
protected:
void process(const std::unique_ptr<HttpReq>& req, std::vector<ScraperSearchResult>& results) override;
};

View file

@ -44,9 +44,34 @@ void ScraperSearchHandle::update()
if(mStatus == ASYNC_DONE)
return;
while(!mRequestQueue.empty() && mRequestQueue.front()->update())
while(!mRequestQueue.empty())
{
auto& req = mRequestQueue.front();
AsyncHandleStatus status = req->status();
if(status == ASYNC_ERROR)
{
// propegate error
setError(req->getStatusString());
// empty our queue
while(!mRequestQueue.empty())
mRequestQueue.pop();
return;
}
// finished this one, see if we have any more
if(status == ASYNC_DONE)
{
mRequestQueue.pop();
continue;
}
// status == ASYNC_IN_PROGRESS
}
// we finished without any errors!
if(mRequestQueue.empty())
{
setStatus(ASYNC_DONE);
@ -63,26 +88,30 @@ ScraperRequest::ScraperRequest(std::vector<ScraperSearchResult>& resultsWrite) :
// ScraperHttpRequest
ScraperHttpRequest::ScraperHttpRequest(std::vector<ScraperSearchResult>& resultsWrite, const std::string& url, scraper_process_httpreq processFunc)
: ScraperRequest(resultsWrite), mProcessFunc(processFunc)
ScraperHttpRequest::ScraperHttpRequest(std::vector<ScraperSearchResult>& resultsWrite, const std::string& url)
: ScraperRequest(resultsWrite)
{
setStatus(ASYNC_IN_PROGRESS);
mReq = std::unique_ptr<HttpReq>(new HttpReq(url));
}
bool ScraperHttpRequest::update()
void ScraperHttpRequest::update()
{
if(mReq->status() == HttpReq::REQ_SUCCESS)
HttpReq::Status status = mReq->status();
if(status == HttpReq::REQ_SUCCESS)
{
mProcessFunc(mReq, mResults);
return true;
setStatus(ASYNC_DONE); // if process() has an error, status will be changed to ASYNC_ERROR
process(mReq, mResults);
return;
}
if(mReq->status() == HttpReq::REQ_IN_PROGRESS)
return false;
// not ready yet
if(status == HttpReq::REQ_IN_PROGRESS)
return;
// everything else is some sort of error
LOG(LogError) << "ScraperHttpRequest network error - " << mReq->getErrorMsg();
return true;
LOG(LogError) << "ScraperHttpRequest network error (status: " << status << ") - " << mReq->getErrorMsg();
setError(mReq->getErrorMsg());
}

View file

@ -53,30 +53,30 @@ struct ScraperSearchResult
// a scraper search gathers results from (potentially multiple) ScraperRequests
class ScraperRequest
class ScraperRequest : public AsyncHandle
{
public:
ScraperRequest(std::vector<ScraperSearchResult>& resultsWrite);
// returns "true" once we're done
virtual bool update() = 0;
virtual void update() = 0;
protected:
std::vector<ScraperSearchResult>& mResults;
};
typedef void (*scraper_process_httpreq)(const std::unique_ptr<HttpReq>& req, std::vector<ScraperSearchResult>& results);
// a single HTTP request that needs to be processed to get the results
class ScraperHttpRequest : public ScraperRequest
{
public:
ScraperHttpRequest(std::vector<ScraperSearchResult>& resultsWrite, const std::string& url, scraper_process_httpreq processFunc);
bool update() override;
ScraperHttpRequest(std::vector<ScraperSearchResult>& resultsWrite, const std::string& url);
virtual void update() override;
protected:
virtual void process(const std::unique_ptr<HttpReq>& req, std::vector<ScraperSearchResult>& results) = 0;
private:
scraper_process_httpreq mProcessFunc;
std::unique_ptr<HttpReq> mReq;
};

View file

@ -16,10 +16,10 @@ void thearchive_generate_scraper_requests(const ScraperSearchParams& params, std
path += HttpReq::urlEncode(cleanName);
//platform TODO, should use some params.system get method
requests.push(std::unique_ptr<ScraperRequest>(new ScraperHttpRequest(results, path, &thearchive_process_httpreq)));
requests.push(std::unique_ptr<ScraperRequest>(new TheArchiveRequest(results, path)));
}
void thearchive_process_httpreq(const std::unique_ptr<HttpReq>& req, std::vector<ScraperSearchResult>& results)
void TheArchiveRequest::process(const std::unique_ptr<HttpReq>& req, std::vector<ScraperSearchResult>& results)
{
assert(req->status() == HttpReq::REQ_SUCCESS);
@ -27,7 +27,11 @@ void thearchive_process_httpreq(const std::unique_ptr<HttpReq>& req, std::vector
pugi::xml_parse_result parseResult = doc.load(req->getContent().c_str());
if(!parseResult)
{
LOG(LogError) << "TheArchiveRequest - error parsing XML.\n\t" << parseResult.description();
std::stringstream ss;
ss << "TheArchiveRequest - error parsing XML.\n\t" << parseResult.description();
std::string err = ss.str();
setError(err);
LOG(LogError) << err;
return;
}

View file

@ -6,3 +6,11 @@ void thearchive_generate_scraper_requests(const ScraperSearchParams& params, std
std::vector<ScraperSearchResult>& results);
void thearchive_process_httpreq(const std::unique_ptr<HttpReq>& req, std::vector<ScraperSearchResult>& results);
class TheArchiveRequest : public ScraperHttpRequest
{
public:
TheArchiveRequest(std::vector<ScraperSearchResult>& resultsWrite, const std::string& url) : ScraperHttpRequest(resultsWrite, url) {}
protected:
void process(const std::unique_ptr<HttpReq>& req, std::vector<ScraperSearchResult>& results) override;
};