From 8c0a40cebb75534c0cf3db8a634d23e632b388fa Mon Sep 17 00:00:00 2001 From: Aloshi Date: Tue, 3 Jun 2014 18:30:03 -0500 Subject: [PATCH] Refactored scraper code to support multiple requests, even multiple requests mixed between scrapers. --- CMakeLists.txt | 2 + src/Settings.cpp | 24 +------ src/Settings.h | 11 +-- src/components/ScraperSearchComponent.cpp | 2 +- src/guis/GuiMenu.cpp | 11 ++- src/guis/GuiSettings.cpp | 1 + src/scrapers/GamesDBScraper.cpp | 60 ++++------------ src/scrapers/GamesDBScraper.h | 22 +----- src/scrapers/Scraper.cpp | 85 ++++++++++++++++++++--- src/scrapers/Scraper.h | 81 ++++++++++++++++++--- src/scrapers/TheArchiveScraper.cpp | 52 +++----------- src/scrapers/TheArchiveScraper.h | 22 +----- 12 files changed, 187 insertions(+), 186 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index eb94a4405..1bf833229 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,7 @@ cmake_minimum_required(VERSION 2.6) +INCLUDE(CPack) + project(emulationstation) #------------------------------------------------------------------------------- diff --git a/src/Settings.cpp b/src/Settings.cpp index 96053a431..f9e6549b1 100644 --- a/src/Settings.cpp +++ b/src/Settings.cpp @@ -3,7 +3,6 @@ #include "pugiXML/pugixml.hpp" #include "platform.h" #include -#include "scrapers/GamesDBScraper.h" Settings* Settings::sInstance = NULL; @@ -50,8 +49,7 @@ void Settings::setDefaults() mStringMap["TransitionStyle"] = "fade"; mStringMap["ThemeSet"] = ""; mStringMap["ScreenSaverBehavior"] = "dim"; - - mScraper = std::shared_ptr(new GamesDBScraper()); + mStringMap["Scraper"] = "TheGamesDB"; } template @@ -83,9 +81,6 @@ void Settings::saveFile() node.append_attribute("value").set_value(iter->second.c_str()); } - pugi::xml_node scraperNode = doc.append_child("scraper"); - scraperNode.append_attribute("value").set_value(mScraper->getName()); - doc.save_file(path.c_str()); } @@ -112,23 +107,6 @@ void Settings::loadFile() setFloat(node.attribute("name").as_string(), node.attribute("value").as_float()); for(pugi::xml_node node = doc.child("string"); node; node = node.next_sibling("string")) setString(node.attribute("name").as_string(), node.attribute("value").as_string()); - - if(doc.child("scraper")) - { - std::shared_ptr scr = createScraperByName(doc.child("scraper").attribute("value").as_string()); - if(scr) - mScraper = scr; - } -} - -std::shared_ptr Settings::getScraper() -{ - return mScraper; -} - -void Settings::setScraper(std::shared_ptr scraper) -{ - mScraper = scraper; } //Print a warning message if the setting we're trying to get doesn't already exist in the map, then return the value in the map. diff --git a/src/Settings.h b/src/Settings.h index d4670801f..6fb82f9a1 100644 --- a/src/Settings.h +++ b/src/Settings.h @@ -1,9 +1,6 @@ -#ifndef _SETTINGS_H_ -#define _SETTINGS_H_ - +#pragma once #include #include -#include "scrapers/Scraper.h" //This is a singleton for storing settings. class Settings @@ -25,9 +22,6 @@ public: void setFloat(const std::string& name, float value); void setString(const std::string& name, const std::string& value); - std::shared_ptr getScraper(); - void setScraper(std::shared_ptr scraper); - private: static Settings* sInstance; @@ -41,8 +35,5 @@ private: std::map mFloatMap; std::map mStringMap; - std::shared_ptr mScraper; std::string mHomePathOverride; }; - -#endif diff --git a/src/components/ScraperSearchComponent.cpp b/src/components/ScraperSearchComponent.cpp index 0fcbc7ad9..3ba5c8625 100644 --- a/src/components/ScraperSearchComponent.cpp +++ b/src/components/ScraperSearchComponent.cpp @@ -212,7 +212,7 @@ void ScraperSearchComponent::search(const ScraperSearchParams& params) updateInfoPane(); mLastSearch = params; - mSearchHandle = Settings::getInstance()->getScraper()->getResultsAsync(params); + mSearchHandle = startScraperSearch(params); } void ScraperSearchComponent::stop() diff --git a/src/guis/GuiMenu.cpp b/src/guis/GuiMenu.cpp index ce1597788..e4c7b8a4b 100644 --- a/src/guis/GuiMenu.cpp +++ b/src/guis/GuiMenu.cpp @@ -38,16 +38,13 @@ GuiMenu::GuiMenu(Window* window) : GuiComponent(window), mMenu(window, "MAIN MEN auto s = new GuiSettings(mWindow, "SCRAPER"); // scrape from - auto scraper_list = std::make_shared< OptionListComponent< std::shared_ptr > >(mWindow, "SCRAPE FROM", false); - std::vector< std::shared_ptr > scrapers; - scrapers.push_back(std::make_shared()); - scrapers.push_back(std::make_shared()); - + auto scraper_list = std::make_shared< OptionListComponent< std::string > >(mWindow, "SCRAPE FROM", false); + std::vector scrapers = getScraperList(); for(auto it = scrapers.begin(); it != scrapers.end(); it++) - scraper_list->add((*it)->getName(), *it, (*it)->getName() == Settings::getInstance()->getScraper()->getName()); + scraper_list->add(*it, *it, *it == Settings::getInstance()->getString("Scraper")); s->addWithLabel("SCRAPE FROM", scraper_list); - s->addSaveFunc([scraper_list] { Settings::getInstance()->setScraper(scraper_list->getSelected()); }); + s->addSaveFunc([scraper_list] { Settings::getInstance()->setString("Scraper", scraper_list->getSelected()); }); // scrape ratings auto scrape_ratings = std::make_shared(mWindow); diff --git a/src/guis/GuiSettings.cpp b/src/guis/GuiSettings.cpp index 29ec79249..8b5d777d0 100644 --- a/src/guis/GuiSettings.cpp +++ b/src/guis/GuiSettings.cpp @@ -1,4 +1,5 @@ #include "GuiSettings.h" +#include "../Window.h" #include "../Settings.h" #include "../views/ViewController.h" diff --git a/src/scrapers/GamesDBScraper.cpp b/src/scrapers/GamesDBScraper.cpp index fd8ea2299..fa35b1eca 100644 --- a/src/scrapers/GamesDBScraper.cpp +++ b/src/scrapers/GamesDBScraper.cpp @@ -1,14 +1,12 @@ #include "GamesDBScraper.h" #include "../components/ScraperSearchComponent.h" -#include "../components/AsyncReqComponent.h" +#include "Scraper.h" #include "../Log.h" #include "../pugiXML/pugixml.hpp" #include "../MetaData.h" #include "../Settings.h" #include -const char* GamesDBScraper::getName() { return "TheGamesDB"; } - using namespace PlatformIds; const std::map gamesdb_platformid_map = boost::assign::map_list_of (THREEDO, "3DO") @@ -61,14 +59,15 @@ const std::map gamesdb_platformid_map = boost::assign:: (ZX_SPECTRUM, "Sinclair ZX Spectrum"); -std::unique_ptr GamesDBScraper::getResultsAsync(const ScraperSearchParams& params) +void thegamesdb_generate_scraper_requests(const ScraperSearchParams& params, std::queue< std::unique_ptr >& requests, + std::vector& results) { - std::string path = "/api/GetGame.php?"; + std::string path = "thegamesdb.net/api/GetGame.php?"; std::string cleanName = params.nameOverride; if(cleanName.empty()) cleanName = params.game->getCleanName(); - + path += "name=" + HttpReq::urlEncode(cleanName); if(params.system->getPlatformId() != PLATFORM_UNKNOWN) @@ -78,58 +77,33 @@ std::unique_ptr GamesDBScraper::getResultsAsync(const Scrap { path += "&platform="; path += HttpReq::urlEncode(platformIt->second); - }else{ + } + else{ LOG(LogWarning) << "TheGamesDB scraper warning - no support for platform " << getPlatformName(params.system->getPlatformId()); } } - path = "thegamesdb.net" + path; - - return std::unique_ptr(new GamesDBHandle(params, path)); + requests.push(std::unique_ptr(new ScraperHttpRequest(results, path, &thegamesdb_process_httpreq))); } -GamesDBHandle::GamesDBHandle(const ScraperSearchParams& params, const std::string& url) : - mReq(std::unique_ptr(new HttpReq(url))) +void thegamesdb_process_httpreq(const std::unique_ptr& req, std::vector& results) { - setStatus(ASYNC_IN_PROGRESS); -} - -void GamesDBHandle::update() -{ - if(mStatus == ASYNC_DONE) - return; - - if(mReq->status() == HttpReq::REQ_IN_PROGRESS) - return; - - if(mReq->status() != HttpReq::REQ_SUCCESS) - { - std::stringstream ss; - ss << "Network error - " << mReq->getErrorMsg(); - setError(ss.str()); - return; - } - - // our HTTP request was successful - // try to build our result list - - std::vector results; + assert(req->status() == HttpReq::REQ_SUCCESS); pugi::xml_document doc; - pugi::xml_parse_result parseResult = doc.load(mReq->getContent().c_str()); + pugi::xml_parse_result parseResult = doc.load(req->getContent().c_str()); if(!parseResult) { - setError("Error parsing XML"); + LOG(LogError) << "GamesDBRequest - Error parsing XML. \n\t" << parseResult.description() << ""; return; } pugi::xml_node data = doc.child("Data"); std::string baseImageUrl = data.child("baseImgUrl").text().get(); - - unsigned int resultNum = 0; + pugi::xml_node game = data.child("Game"); - while(game && resultNum < MAX_SCRAPER_RESULTS) + while(game && results.size() < MAX_SCRAPER_RESULTS) { ScraperSearchResult result; @@ -166,12 +140,6 @@ void GamesDBHandle::update() } results.push_back(result); - - resultNum++; game = game.next_sibling("Game"); } - - setStatus(ASYNC_DONE); - setResults(results); - return; } diff --git a/src/scrapers/GamesDBScraper.h b/src/scrapers/GamesDBScraper.h index f94671114..83ac525fa 100644 --- a/src/scrapers/GamesDBScraper.h +++ b/src/scrapers/GamesDBScraper.h @@ -1,24 +1,8 @@ #pragma once #include "Scraper.h" -#include "../HttpReq.h" -class GamesDBHandle : public ScraperSearchHandle -{ -public: - GamesDBHandle(const ScraperSearchParams& params, const std::string& url); +void thegamesdb_generate_scraper_requests(const ScraperSearchParams& params, std::queue< std::unique_ptr >& requests, + std::vector& results); - void update() override; - -private: - std::unique_ptr mReq; - ScraperSearchParams mParams; -}; - -class GamesDBScraper : public Scraper -{ -public: - std::unique_ptr getResultsAsync(const ScraperSearchParams& params) override; - - const char* getName(); -}; +void thegamesdb_process_httpreq(const std::unique_ptr& req, std::vector& results); diff --git a/src/scrapers/Scraper.cpp b/src/scrapers/Scraper.cpp index c60a16e4d..be87d0397 100644 --- a/src/scrapers/Scraper.cpp +++ b/src/scrapers/Scraper.cpp @@ -4,21 +4,90 @@ #include "../Settings.h" #include #include -#include +#include #include "GamesDBScraper.h" #include "TheArchiveScraper.h" -std::shared_ptr createScraperByName(const std::string& name) -{ - if(name == "TheGamesDB") - return std::shared_ptr(new GamesDBScraper()); - else if(name == "TheArchive") - return std::shared_ptr(new TheArchiveScraper()); +const std::map scraper_request_funcs = boost::assign::map_list_of + ("TheGamesDB", &thegamesdb_generate_scraper_requests) + ("TheArchive", &thearchive_generate_scraper_requests); - return nullptr; +std::unique_ptr startScraperSearch(const ScraperSearchParams& params) +{ + const std::string& name = Settings::getInstance()->getString("Scraper"); + + std::unique_ptr handle(new ScraperSearchHandle()); + scraper_request_funcs.at(name)(params, handle->mRequestQueue, handle->mResults); + return handle; } +std::vector getScraperList() +{ + std::vector list; + for(auto it = scraper_request_funcs.begin(); it != scraper_request_funcs.end(); it++) + { + list.push_back(it->first); + } + + return list; +} + +// ScraperSearchHandle +ScraperSearchHandle::ScraperSearchHandle() +{ + setStatus(ASYNC_IN_PROGRESS); +} + +void ScraperSearchHandle::update() +{ + if(mStatus == ASYNC_DONE) + return; + + while(!mRequestQueue.empty() && mRequestQueue.front()->update()) + mRequestQueue.pop(); + + if(mRequestQueue.empty()) + { + setStatus(ASYNC_DONE); + return; + } +} + + + +// ScraperRequest +ScraperRequest::ScraperRequest(std::vector& resultsWrite) : mResults(resultsWrite) +{ +} + + +// ScraperHttpRequest +ScraperHttpRequest::ScraperHttpRequest(std::vector& resultsWrite, const std::string& url, scraper_process_httpreq processFunc) + : ScraperRequest(resultsWrite), mProcessFunc(processFunc) +{ + mReq = std::unique_ptr(new HttpReq(url)); +} + +bool ScraperHttpRequest::update() +{ + if(mReq->status() == HttpReq::REQ_SUCCESS) + { + mProcessFunc(mReq, mResults); + return true; + } + + if(mReq->status() == HttpReq::REQ_IN_PROGRESS) + return false; + + // everything else is some sort of error + LOG(LogError) << "ScraperHttpRequest network error - " << mReq->getErrorMsg(); + return true; +} + + +// metadata resolving stuff + std::unique_ptr resolveMetaDataAssets(const ScraperSearchResult& result, const ScraperSearchParams& search) { return std::unique_ptr(new MDResolveHandle(result, search)); diff --git a/src/scrapers/Scraper.h b/src/scrapers/Scraper.h index 88422e7bd..38fd0365d 100644 --- a/src/scrapers/Scraper.h +++ b/src/scrapers/Scraper.h @@ -6,6 +6,7 @@ #include "../AsyncHandle.h" #include #include +#include struct ScraperSearchParams { @@ -24,29 +25,87 @@ struct ScraperSearchResult std::string thumbnailUrl; }; +// So let me explain why I've abstracted this so heavily. +// There are two ways I can think of that you'd want to write a scraper. + +// 1. Do some HTTP request(s) -> process it -> return the results +// 2. Do some local filesystem queries (an offline scraper) -> return the results + +// The first way needs to be asynchronous while it's waiting for the HTTP request to return. +// The second doesn't. + +// It would be nice if we could write it like this: +// search = generate_http_request(searchparams); +// wait_until_done(search); +// ... process search ... +// return results; + +// We could do this if we used threads. Right now ES doesn't because I'm pretty sure I'll fuck it up, +// and I'm not sure of the performance of threads on the Pi (single-core ARM). +// We could also do this if we used coroutines. +// I can't find a really good cross-platform coroutine library (x86/64/ARM Linux + Windows), +// and I don't want to spend more time chasing libraries than just writing it the long way once. + +// So, I did it the "long" way. +// ScraperSearchHandle - one logical search, e.g. "search for mario" +// ScraperRequest - encapsulates some sort of asynchronous request that will ultimately return some results +// ScraperHttpRequest - implementation of ScraperRequest that waits on an HttpReq, then processes it with some processing function. + + +// a scraper search gathers results from (potentially multiple) ScraperRequests +class ScraperRequest +{ +public: + ScraperRequest(std::vector& resultsWrite); + + // returns "true" once we're done + virtual bool update() = 0; + +protected: + std::vector& mResults; +}; + + +typedef void (*scraper_process_httpreq)(const std::unique_ptr& req, std::vector& results); + +// a single HTTP request that needs to be processed to get the results +class ScraperHttpRequest : ScraperRequest +{ +public: + ScraperHttpRequest(std::vector& resultsWrite, const std::string& url, scraper_process_httpreq processFunc); + bool update() override; + +private: + scraper_process_httpreq mProcessFunc; + std::unique_ptr mReq; +}; + +// a request to get a list of results class ScraperSearchHandle : public AsyncHandle { public: - virtual void update() = 0; + ScraperSearchHandle(); + + void update(); inline const std::vector& getResults() const { assert(mStatus != ASYNC_IN_PROGRESS); return mResults; } protected: - inline void setResults(const std::vector& results) { mResults = results; } + friend std::unique_ptr startScraperSearch(const ScraperSearchParams& params); -private: + std::queue< std::unique_ptr > mRequestQueue; std::vector mResults; }; -class Scraper -{ -public: - //Get a list of potential results. - virtual std::unique_ptr getResultsAsync(const ScraperSearchParams& params) = 0; +// will use the current scraper settings to pick the result source +std::unique_ptr startScraperSearch(const ScraperSearchParams& params); - virtual const char* getName() = 0; -}; +// returns a list of valid scraper names +std::vector getScraperList(); + +typedef void (*generate_scraper_requests_func)(const ScraperSearchParams& params, std::queue< std::unique_ptr >& requests, std::vector& results); + +// ------------------------------------------------------------------------- -std::shared_ptr createScraperByName(const std::string& name); // Meta data asset downloading stuff. diff --git a/src/scrapers/TheArchiveScraper.cpp b/src/scrapers/TheArchiveScraper.cpp index 9302ea528..cf1c722bc 100644 --- a/src/scrapers/TheArchiveScraper.cpp +++ b/src/scrapers/TheArchiveScraper.cpp @@ -4,64 +4,37 @@ #include "../Log.h" #include "../pugiXML/pugixml.hpp" -const char* TheArchiveScraper::getName() { return "TheArchive"; } - -std::unique_ptr TheArchiveScraper::getResultsAsync(const ScraperSearchParams& params) +void thearchive_generate_scraper_requests(const ScraperSearchParams& params, std::queue< std::unique_ptr >& requests, + std::vector& results) { - std::string path = "/2.0/Archive.search/xml/7TTRM4MNTIKR2NNAGASURHJOZJ3QXQC5/"; + std::string path = "api.archive.vg/2.0/Archive.search/xml/7TTRM4MNTIKR2NNAGASURHJOZJ3QXQC5/"; std::string cleanName = params.nameOverride; if(cleanName.empty()) cleanName = params.game->getCleanName(); - + path += HttpReq::urlEncode(cleanName); //platform TODO, should use some params.system get method - path = "api.archive.vg" + path; - - return std::unique_ptr(new TheArchiveHandle(params, path)); + requests.push(std::unique_ptr(new ScraperHttpRequest(results, path, &thearchive_process_httpreq))); } -TheArchiveHandle::TheArchiveHandle(const ScraperSearchParams& params, const std::string& url) : - mReq(std::unique_ptr(new HttpReq(url))) +void thearchive_process_httpreq(const std::unique_ptr& req, std::vector& results) { - setStatus(ASYNC_IN_PROGRESS); -} - -void TheArchiveHandle::update() -{ - if(mStatus == ASYNC_DONE) - return; - - if(mReq->status() == HttpReq::REQ_IN_PROGRESS) - return; - - if(mReq->status() != HttpReq::REQ_SUCCESS) - { - std::stringstream ss; - ss << "Network error: " << mReq->getErrorMsg(); - setError(ss.str()); - return; - } - - // if we're here, our HTTP request finished successfully - - // so, let's try building our result list - std::vector results; + assert(req->status() == HttpReq::REQ_SUCCESS); pugi::xml_document doc; - pugi::xml_parse_result parseResult = doc.load(mReq->getContent().c_str()); + pugi::xml_parse_result parseResult = doc.load(req->getContent().c_str()); if(!parseResult) { - setError("Error parsing XML"); + LOG(LogError) << "TheArchiveRequest - error parsing XML.\n\t" << parseResult.description(); return; } pugi::xml_node data = doc.child("OpenSearchDescription").child("games"); - unsigned int resultNum = 0; pugi::xml_node game = data.child("game"); - while(game && resultNum < MAX_SCRAPER_RESULTS) + while(game && results.size() < MAX_SCRAPER_RESULTS) { ScraperSearchResult result; @@ -86,11 +59,6 @@ void TheArchiveHandle::update() result.thumbnailUrl = thumbnail.text().get(); results.push_back(result); - - resultNum++; game = game.next_sibling("game"); } - - setStatus(ASYNC_DONE); - setResults(results); } diff --git a/src/scrapers/TheArchiveScraper.h b/src/scrapers/TheArchiveScraper.h index 60724c946..89105e0cc 100644 --- a/src/scrapers/TheArchiveScraper.h +++ b/src/scrapers/TheArchiveScraper.h @@ -1,24 +1,8 @@ #pragma once #include "Scraper.h" -#include "../HttpReq.h" -class TheArchiveHandle : public ScraperSearchHandle -{ -public: - TheArchiveHandle(const ScraperSearchParams& params, const std::string& url); +void thearchive_generate_scraper_requests(const ScraperSearchParams& params, std::queue< std::unique_ptr >& requests, + std::vector& results); - void update() override; - -private: - std::unique_ptr mReq; - ScraperSearchParams mParams; -}; - -class TheArchiveScraper : public Scraper -{ -public: - std::unique_ptr getResultsAsync(const ScraperSearchParams& params) override; - - const char* getName(); -}; +void thearchive_process_httpreq(const std::unique_ptr& req, std::vector& results);