2020-09-21 17:17:34 +00:00
|
|
|
// SPDX-License-Identifier: MIT
|
2020-05-26 16:34:33 +00:00
|
|
|
//
|
2020-09-21 17:17:34 +00:00
|
|
|
// EmulationStation Desktop Edition
|
2020-06-21 12:25:28 +00:00
|
|
|
// ScreenScraper.cpp
|
2020-05-26 16:34:33 +00:00
|
|
|
//
|
2020-06-21 12:25:28 +00:00
|
|
|
// Functions specifically for scraping from screenscraper.fr
|
|
|
|
// Called from Scraper.
|
2020-05-26 16:34:33 +00:00
|
|
|
//
|
|
|
|
|
2019-01-11 19:51:05 +00:00
|
|
|
#include "scrapers/ScreenScraper.h"
|
|
|
|
|
2020-07-30 18:11:45 +00:00
|
|
|
#include "math/Misc.h"
|
2019-01-11 19:51:05 +00:00
|
|
|
#include "utils/StringUtil.h"
|
2020-07-30 18:11:45 +00:00
|
|
|
#include "utils/TimeUtil.h"
|
2019-01-11 19:51:05 +00:00
|
|
|
#include "FileData.h"
|
|
|
|
#include "Log.h"
|
|
|
|
#include "PlatformId.h"
|
|
|
|
#include "Settings.h"
|
|
|
|
#include "SystemData.h"
|
2020-07-30 18:11:45 +00:00
|
|
|
|
2020-12-28 10:29:32 +00:00
|
|
|
#include <cmath>
|
2019-01-11 19:51:05 +00:00
|
|
|
#include <cstring>
|
2020-07-30 18:11:45 +00:00
|
|
|
#include <pugixml.hpp>
|
2019-01-11 19:51:05 +00:00
|
|
|
|
|
|
|
using namespace PlatformIds;
|
|
|
|
|
2020-05-26 16:34:33 +00:00
|
|
|
// List of systems and their IDs from:
|
|
|
|
// https://www.screenscraper.fr/api/systemesListe.php?devid=xxx&devpassword=yyy&softname=zzz&output=XML
|
|
|
|
const std::map<PlatformId, unsigned short> screenscraper_platformid_map {
|
2020-06-21 12:25:28 +00:00
|
|
|
{ THREEDO, 29 },
|
2021-02-07 19:49:52 +00:00
|
|
|
{ COMMODORE_AMIGA, 64 },
|
|
|
|
{ COMMODORE_AMIGA_CD32, 130 },
|
2020-06-21 12:25:28 +00:00
|
|
|
{ AMSTRAD_CPC, 65 },
|
2020-08-05 08:45:27 +00:00
|
|
|
{ AMSTRAD_GX4000, 87 },
|
2020-06-21 12:25:28 +00:00
|
|
|
{ APPLE_II, 86 },
|
2020-08-05 08:45:27 +00:00
|
|
|
{ APPLE_IIGS, 217 },
|
2020-06-21 12:25:28 +00:00
|
|
|
{ ARCADE, 75 },
|
2020-08-05 08:45:27 +00:00
|
|
|
{ ATARI_800, 43 },
|
2020-06-21 12:25:28 +00:00
|
|
|
{ ATARI_2600, 26 },
|
|
|
|
{ ATARI_5200, 40 },
|
|
|
|
{ ATARI_7800, 41 },
|
|
|
|
{ ATARI_JAGUAR, 27 },
|
|
|
|
{ ATARI_JAGUAR_CD, 171 },
|
|
|
|
{ ATARI_LYNX, 28 },
|
|
|
|
{ ATARI_ST, 42},
|
2021-02-05 16:38:21 +00:00
|
|
|
{ ATARI_XE, 43 },
|
2020-08-05 08:45:27 +00:00
|
|
|
{ ATOMISWAVE, 53 },
|
2021-02-05 16:38:21 +00:00
|
|
|
{ BBC_MICRO, 37 },
|
2021-02-07 19:49:52 +00:00
|
|
|
{ CAVESTORY, 135 },
|
2020-06-21 12:25:28 +00:00
|
|
|
{ COLECOVISION, 48 },
|
|
|
|
{ COMMODORE_64, 66 },
|
2020-08-05 08:45:27 +00:00
|
|
|
{ COMMODORE_CDTV, 129 },
|
2021-02-07 19:49:52 +00:00
|
|
|
{ DAPHNE, 49 },
|
2020-06-21 12:25:28 +00:00
|
|
|
{ INTELLIVISION, 115 },
|
2021-02-07 23:17:01 +00:00
|
|
|
{ GAMEENGINE_LUTRO, 206 },
|
2021-02-07 19:49:52 +00:00
|
|
|
{ APPLE_MACINTOSH, 146 },
|
|
|
|
{ MICROSOFT_XBOX, 32 },
|
|
|
|
{ MICROSOFT_XBOX_360, 33 },
|
2020-08-05 08:45:27 +00:00
|
|
|
{ MOONLIGHT, 138 },
|
2020-06-21 12:25:28 +00:00
|
|
|
{ MSX, 113 },
|
2021-02-07 21:35:34 +00:00
|
|
|
{ MSX2, 116 },
|
|
|
|
{ MSX_TURBO_R, 118 },
|
2021-02-07 19:49:52 +00:00
|
|
|
{ SNK_NEO_GEO, 142 },
|
|
|
|
{ SNK_NEO_GEO_CD, 142 },
|
|
|
|
{ SNK_NEO_GEO_POCKET, 25},
|
|
|
|
{ SNK_NEO_GEO_POCKET_COLOR, 82 },
|
2020-06-21 12:25:28 +00:00
|
|
|
{ NINTENDO_3DS, 17 },
|
|
|
|
{ NINTENDO_64, 14 },
|
|
|
|
{ NINTENDO_DS, 15 },
|
2021-02-07 19:49:52 +00:00
|
|
|
{ NINTENDO_FAMICOM_DISK_SYSTEM, 106 },
|
2020-06-21 12:25:28 +00:00
|
|
|
{ NINTENDO_ENTERTAINMENT_SYSTEM, 3 },
|
|
|
|
{ FAIRCHILD_CHANNELF, 80 },
|
2021-02-07 19:49:52 +00:00
|
|
|
{ NINTENDO_GAME_BOY, 9 },
|
|
|
|
{ NINTENDO_GAME_BOY_ADVANCE, 12 },
|
|
|
|
{ NINTENDO_GAME_BOY_COLOR, 10 },
|
2020-06-21 12:25:28 +00:00
|
|
|
{ NINTENDO_GAMECUBE, 13 },
|
|
|
|
{ NINTENDO_WII, 16 },
|
|
|
|
{ NINTENDO_WII_U, 18 },
|
|
|
|
{ NINTENDO_VIRTUAL_BOY, 11 },
|
|
|
|
{ NINTENDO_GAME_AND_WATCH, 52 },
|
2020-08-05 08:45:27 +00:00
|
|
|
{ NINTENDO_POKEMON_MINI, 211 },
|
|
|
|
{ NINTENDO_SATELLAVIEW, 107 },
|
2021-02-07 19:49:52 +00:00
|
|
|
{ BANDAI_SUFAMI_TURBO, 108 },
|
2020-07-29 09:19:26 +00:00
|
|
|
{ DOS, 135 },
|
2020-07-30 20:19:12 +00:00
|
|
|
{ PC, 135 },
|
2021-02-07 19:49:52 +00:00
|
|
|
{ NEC_PCFX, 72 },
|
2021-02-07 23:17:01 +00:00
|
|
|
{ GAMEENGINE_OPENBOR, 214 },
|
2021-02-07 19:49:52 +00:00
|
|
|
{ TANGERINE_ORIC, 131 },
|
2021-02-07 23:17:01 +00:00
|
|
|
{ GAMEENGINE_SCUMMVM, 123},
|
2020-06-21 12:25:28 +00:00
|
|
|
{ SEGA_32X, 19 },
|
|
|
|
{ SEGA_CD, 20 },
|
|
|
|
{ SEGA_DREAMCAST, 23 },
|
|
|
|
{ SEGA_GAME_GEAR, 21 },
|
|
|
|
{ SEGA_GENESIS, 1 },
|
|
|
|
{ SEGA_MASTER_SYSTEM, 2 },
|
|
|
|
{ SEGA_MEGA_DRIVE, 1 },
|
|
|
|
{ SEGA_SATURN, 22 },
|
|
|
|
{ SEGA_SG1000, 109 },
|
2021-02-05 20:35:51 +00:00
|
|
|
{ SHARP_X1, 220},
|
2020-07-25 14:18:41 +00:00
|
|
|
{ SHARP_X68000, 79},
|
2021-02-07 23:17:01 +00:00
|
|
|
{ GAMEENGINE_SOLARUS, 223 },
|
2021-02-07 19:49:52 +00:00
|
|
|
{ SONY_PLAYSTATION, 57 },
|
|
|
|
{ SONY_PLAYSTATION_2, 58 },
|
|
|
|
{ SONY_PLAYSTATION_3, 59 },
|
|
|
|
{ SONY_PLAYSTATION_VITA, 62 },
|
|
|
|
{ SONY_PLAYSTATION_PORTABLE, 61 },
|
2020-08-05 08:45:27 +00:00
|
|
|
{ SAMCOUPE, 213 },
|
2020-06-21 12:25:28 +00:00
|
|
|
{ SUPER_NINTENDO, 4 },
|
2021-02-07 19:49:52 +00:00
|
|
|
{ NEC_SUPERGRAFX, 105 },
|
2021-02-07 23:17:01 +00:00
|
|
|
{ GAMEENGINE_TIC80, 222 },
|
2021-02-07 20:36:33 +00:00
|
|
|
{ NEC_PC_8800, 221},
|
|
|
|
{ NEC_PC_9800, 208},
|
2021-02-07 19:49:52 +00:00
|
|
|
{ NEC_PC_ENGINE, 31 },
|
|
|
|
{ NEC_PC_ENGINE_CD, 114 },
|
|
|
|
{ BANDAI_WONDERSWAN, 45 },
|
|
|
|
{ BANDAI_WONDERSWAN_COLOR, 46 },
|
|
|
|
{ SINCLAIR_ZX_SPECTRUM, 76 },
|
|
|
|
{ SINCLAIR_ZX81_SINCLAR, 77 },
|
2020-06-21 12:25:28 +00:00
|
|
|
{ VIDEOPAC_ODYSSEY2, 104 },
|
|
|
|
{ VECTREX, 102 },
|
2021-02-07 19:49:52 +00:00
|
|
|
{ TANDY_TRS80, 144 },
|
|
|
|
{ TANDY_COLOR_COMPUTER, 144 },
|
2020-08-05 08:45:27 +00:00
|
|
|
{ SEGA_NAOMI, 56 },
|
2021-02-05 16:27:55 +00:00
|
|
|
{ THOMSON_MOTO, 141 },
|
2020-08-05 08:45:27 +00:00
|
|
|
{ UZEBOX, 216 },
|
|
|
|
{ SPECTRAVIDEO, 218 },
|
|
|
|
{ PALM_OS, 219 }
|
2019-01-11 19:51:05 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
// Helper XML parsing method, finding a node-by-name recursively.
|
2020-05-26 16:34:33 +00:00
|
|
|
pugi::xml_node find_node_by_name_re(const pugi::xml_node& node,
|
2020-06-21 12:25:28 +00:00
|
|
|
const std::vector<std::string> node_names) {
|
2019-01-11 19:51:05 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
for (const std::string& _val : node_names) {
|
|
|
|
pugi::xpath_query query_node_name((static_cast<std::string>("//") + _val).c_str());
|
|
|
|
pugi::xpath_node_set results = node.select_nodes(query_node_name);
|
2019-01-11 19:51:05 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
if (results.size() > 0)
|
|
|
|
return results.first().node();
|
|
|
|
}
|
2019-01-11 19:51:05 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
return pugi::xml_node();
|
2019-01-11 19:51:05 +00:00
|
|
|
}
|
|
|
|
|
2020-05-26 16:34:33 +00:00
|
|
|
// Help XML parsing method, finding an direct child XML node starting from the parent and
|
|
|
|
// filtering by an attribute value list.
|
|
|
|
pugi::xml_node find_child_by_attribute_list(const pugi::xml_node& node_parent,
|
2020-06-21 12:25:28 +00:00
|
|
|
const std::string& node_name, const std::string& attribute_name,
|
|
|
|
const std::vector<std::string> attribute_values)
|
2019-01-11 19:51:05 +00:00
|
|
|
{
|
2020-06-21 12:25:28 +00:00
|
|
|
for (auto _val : attribute_values) {
|
|
|
|
for (pugi::xml_node node : node_parent.children(node_name.c_str())) {
|
2020-12-16 22:59:00 +00:00
|
|
|
if (node.attribute(attribute_name.c_str()).value() == _val)
|
2020-06-21 12:25:28 +00:00
|
|
|
return node;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-23 18:07:00 +00:00
|
|
|
return pugi::xml_node(nullptr);
|
2019-01-11 19:51:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void screenscraper_generate_scraper_requests(const ScraperSearchParams& params,
|
2020-11-14 14:30:49 +00:00
|
|
|
std::queue<std::unique_ptr<ScraperRequest>>& requests,
|
2020-06-21 12:25:28 +00:00
|
|
|
std::vector<ScraperSearchResult>& results)
|
2019-01-11 19:51:05 +00:00
|
|
|
{
|
2020-06-21 12:25:28 +00:00
|
|
|
std::string path;
|
|
|
|
|
|
|
|
ScreenScraperRequest::ScreenScraperConfig ssConfig;
|
|
|
|
|
2020-11-14 14:30:49 +00:00
|
|
|
if (params.game->isArcadeGame())
|
|
|
|
ssConfig.isArcadeSystem = true;
|
|
|
|
else
|
|
|
|
ssConfig.isArcadeSystem = false;
|
|
|
|
|
2020-10-10 11:05:12 +00:00
|
|
|
if (params.nameOverride == "") {
|
|
|
|
if (Settings::getInstance()->getBool("ScraperSearchMetadataName"))
|
2020-11-14 14:30:49 +00:00
|
|
|
path = ssConfig.getGameSearchUrl(
|
|
|
|
Utils::String::removeParenthesis(params.game->metadata.get("name")));
|
2020-10-10 11:05:12 +00:00
|
|
|
else
|
|
|
|
path = ssConfig.getGameSearchUrl(params.game->getCleanName());
|
|
|
|
}
|
|
|
|
else {
|
2020-06-21 12:25:28 +00:00
|
|
|
path = ssConfig.getGameSearchUrl(params.nameOverride);
|
2020-10-10 11:05:12 +00:00
|
|
|
}
|
2020-06-21 12:25:28 +00:00
|
|
|
|
|
|
|
auto& platforms = params.system->getPlatformIds();
|
|
|
|
std::vector<unsigned short> p_ids;
|
|
|
|
|
|
|
|
// Get the IDs of each platform from the ScreenScraper list.
|
|
|
|
for (auto platformIt = platforms.cbegin(); platformIt != platforms.cend(); platformIt++) {
|
|
|
|
auto mapIt = screenscraper_platformid_map.find(*platformIt);
|
|
|
|
|
|
|
|
if (mapIt != screenscraper_platformid_map.cend()) {
|
|
|
|
p_ids.push_back(mapIt->second);
|
|
|
|
}
|
|
|
|
else {
|
2021-02-08 19:56:11 +00:00
|
|
|
LOG(LogWarning) << "ScreenScraper: No support for platform \"" <<
|
|
|
|
getPlatformName(*platformIt) << "\", search will be inaccurate";
|
2020-06-21 12:25:28 +00:00
|
|
|
// Add the scrape request without a platform/system ID.
|
|
|
|
requests.push(std::unique_ptr<ScraperRequest>
|
|
|
|
(new ScreenScraperRequest(requests, results, path)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-08 19:56:11 +00:00
|
|
|
if (p_ids.size() == 0) {
|
|
|
|
LOG(LogWarning) << "ScreenScraper: No platform defined, search will be inaccurate";
|
|
|
|
// Add the scrape request without a platform/system ID.
|
|
|
|
requests.push(std::unique_ptr<ScraperRequest>
|
|
|
|
(new ScreenScraperRequest(requests, results, path)));
|
|
|
|
}
|
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
// Sort the platform IDs and remove duplicates.
|
|
|
|
std::sort(p_ids.begin(), p_ids.end());
|
|
|
|
auto last = std::unique(p_ids.begin(), p_ids.end());
|
|
|
|
p_ids.erase(last, p_ids.end());
|
|
|
|
|
2020-07-03 18:23:51 +00:00
|
|
|
for (auto platform = p_ids.cbegin(); platform != p_ids.cend(); platform++) {
|
2020-06-21 12:25:28 +00:00
|
|
|
path += "&systemeid=";
|
|
|
|
path += HttpReq::urlEncode(std::to_string(*platform));
|
|
|
|
requests.push(std::unique_ptr<ScraperRequest>
|
|
|
|
(new ScreenScraperRequest(requests, results, path)));
|
|
|
|
}
|
2019-01-11 19:51:05 +00:00
|
|
|
}
|
|
|
|
|
2020-05-26 16:34:33 +00:00
|
|
|
void ScreenScraperRequest::process(const std::unique_ptr<HttpReq>& req,
|
2020-06-21 12:25:28 +00:00
|
|
|
std::vector<ScraperSearchResult>& results)
|
2019-01-11 19:51:05 +00:00
|
|
|
{
|
2020-06-21 12:25:28 +00:00
|
|
|
assert(req->status() == HttpReq::REQ_SUCCESS);
|
2019-01-11 19:51:05 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
pugi::xml_document doc;
|
2020-07-28 21:15:35 +00:00
|
|
|
|
|
|
|
// It seems as if screenscraper.fr has changed their API slightly and now just returns
|
|
|
|
// a simple text messsage upon not finding any matching game. If we don't return here,
|
|
|
|
// we will get a pugixml error trying to process this string as an XML message.
|
|
|
|
if (req->getContent().find("Erreur : Rom") == 0)
|
|
|
|
return;
|
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
pugi::xml_parse_result parseResult = doc.load_string(req->getContent().c_str());
|
2019-01-11 19:51:05 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
if (!parseResult) {
|
|
|
|
std::stringstream ss;
|
2020-07-26 21:30:45 +00:00
|
|
|
ss << "ScreenScraperRequest - Error parsing XML: " << parseResult.description();
|
2019-01-11 19:51:05 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
std::string err = ss.str();
|
|
|
|
LOG(LogError) << err;
|
2020-08-05 08:51:02 +00:00
|
|
|
setError("ScreenScraper error: \n" + req->getContent());
|
2020-07-30 18:11:45 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
return;
|
|
|
|
}
|
2019-01-11 19:51:05 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
processGame(doc, results);
|
2020-09-26 11:07:52 +00:00
|
|
|
|
|
|
|
// For some files, screenscraper.fr consistently responds with the game name 'ZZZ(notgame)',
|
|
|
|
// or sometimes in the longer format 'ZZZ(notgame):Fichier Annexes - Non Jeux'. For instance
|
|
|
|
// this can happen for configuration files for DOS games such as 'setup.exe' and similar.
|
|
|
|
// We definitely don't want to save these to our gamelists, so we simply skip these
|
|
|
|
// responses. There also seems to be some cases where this type of response is randomly
|
|
|
|
// returned instead of a valid game name, and retrying a second time returns the proper
|
|
|
|
// name. But it's basically impossible to know which is the case, and we really can't
|
|
|
|
// compensate for errors in the scraper service.
|
|
|
|
for (auto it = results.cbegin(); it != results.cend(); it++) {
|
|
|
|
std::string gameName = Utils::String::toUpper((*it).mdl.get("name"));
|
|
|
|
if (gameName.substr(0, 12) == "ZZZ(NOTGAME)") {
|
|
|
|
LOG(LogWarning) << "ScreenScraperRequest - Received \"ZZZ(notgame)\" as game name, "
|
2021-01-26 16:28:54 +00:00
|
|
|
"ignoring response";
|
2020-09-26 11:07:52 +00:00
|
|
|
results.pop_back();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
2019-01-11 19:51:05 +00:00
|
|
|
}
|
|
|
|
|
2020-05-26 16:34:33 +00:00
|
|
|
void ScreenScraperRequest::processGame(const pugi::xml_document& xmldoc,
|
2020-06-21 12:25:28 +00:00
|
|
|
std::vector<ScraperSearchResult>& out_results)
|
2019-01-11 19:51:05 +00:00
|
|
|
{
|
2020-06-21 12:25:28 +00:00
|
|
|
pugi::xml_node data = xmldoc.child("Data");
|
|
|
|
|
2020-11-14 14:30:49 +00:00
|
|
|
// Check if our username was included in the response (assuming an account is used).
|
|
|
|
// It seems as if this information is randomly missing from the server response, which
|
|
|
|
// also seems to correlate with missing scraper allowance data. This is however a scraper
|
|
|
|
// service issue so we're not attempting to compensate for it here.
|
|
|
|
if (Settings::getInstance()->getBool("ScraperUseAccountScreenScraper") &&
|
|
|
|
Settings::getInstance()->getString("ScraperUsernameScreenScraper") != "" &&
|
|
|
|
Settings::getInstance()->getString("ScraperPasswordScreenScraper") != "") {
|
|
|
|
std::string userID = data.child("ssuser").child("id").text().get();
|
|
|
|
if (userID != "") {
|
2021-02-01 18:00:38 +00:00
|
|
|
LOG(LogDebug) << "ScreenScraperRequest::processGame(): Scraping using account \"" <<
|
|
|
|
userID << "\"";
|
2020-11-14 14:30:49 +00:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
LOG(LogDebug) << "ScreenScraperRequest::processGame(): The configured account '" <<
|
|
|
|
Settings::getInstance()->getString("ScraperUsernameScreenScraper") <<
|
|
|
|
"' was not included in the scraper response, wrong username or password?";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Find how many more requests we can make before the scraper request
|
|
|
|
// allowance counter is reset. For some strange reason the ssuser information
|
|
|
|
// is not provided for all games even though the request looks identical apart
|
|
|
|
// from the game name.
|
|
|
|
unsigned requestsToday = data.child("ssuser").child("requeststoday").text().as_uint();
|
|
|
|
unsigned maxRequestsPerDay = data.child("ssuser").child("maxrequestsperday").text().as_uint();
|
|
|
|
unsigned int scraperRequestAllowance = maxRequestsPerDay - requestsToday;
|
|
|
|
|
|
|
|
// Scraping allowance.
|
|
|
|
if (maxRequestsPerDay > 0) {
|
|
|
|
LOG(LogDebug) << "ScreenScraperRequest::processGame(): Daily scraping allowance: " <<
|
|
|
|
requestsToday << "/" << maxRequestsPerDay << " (" <<
|
2021-01-26 16:28:54 +00:00
|
|
|
scraperRequestAllowance << " remaining)";
|
2020-11-14 14:30:49 +00:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
LOG(LogDebug) << "ScreenScraperRequest::processGame(): Daily scraping allowance: "
|
2021-01-26 16:28:54 +00:00
|
|
|
"No statistics were provided with the response";
|
2020-11-14 14:30:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (data.child("jeux"))
|
|
|
|
data = data.child("jeux");
|
|
|
|
|
|
|
|
for (pugi::xml_node game = data.child("jeu"); game; game = game.next_sibling("jeu")) {
|
2020-06-21 12:25:28 +00:00
|
|
|
ScraperSearchResult result;
|
|
|
|
ScreenScraperRequest::ScreenScraperConfig ssConfig;
|
|
|
|
|
2020-11-14 14:30:49 +00:00
|
|
|
result.scraperRequestAllowance = scraperRequestAllowance;
|
2020-06-21 12:25:28 +00:00
|
|
|
result.gameID = game.attribute("id").as_string();
|
|
|
|
|
|
|
|
std::string region =
|
|
|
|
Utils::String::toLower(Settings::getInstance()->getString("ScraperRegion"));
|
|
|
|
std::string language =
|
|
|
|
Utils::String::toLower(Settings::getInstance()->getString("ScraperLanguage"));
|
|
|
|
|
2020-08-05 20:38:44 +00:00
|
|
|
// Name fallback: US, WOR(LD). (Xpath: Data/jeu[0]/noms/nom[*]).
|
2020-06-21 12:25:28 +00:00
|
|
|
result.mdl.set("name", find_child_by_attribute_list(game.child("noms"),
|
|
|
|
"nom", "region", { region, "wor", "us" , "ss", "eu", "jp" }).text().get());
|
2021-01-26 20:04:16 +00:00
|
|
|
LOG(LogDebug) << "ScreenScraperRequest::processGame(): Name: " << result.mdl.get("name");
|
2020-08-02 12:19:51 +00:00
|
|
|
|
|
|
|
// Validate rating.
|
2020-12-16 18:03:05 +00:00
|
|
|
// Process the rating even if the setting to scrape ratings has been disabled.
|
|
|
|
// This is required so that the rating can still be shown in the scraper GUI.
|
|
|
|
// GuiScraperSearch::saveMetadata() will take care of skipping the rating saving
|
|
|
|
// if this option has been set as such.
|
|
|
|
if (game.child("note")) {
|
2020-08-02 12:19:51 +00:00
|
|
|
float ratingVal = (game.child("note").text().as_int() / 20.0f);
|
|
|
|
// Round up to the closest .1 value, i.e. to the closest half-star.
|
2020-12-29 11:54:24 +00:00
|
|
|
ratingVal = ceilf(ratingVal / 0.1f) / 10;
|
2020-08-02 12:19:51 +00:00
|
|
|
std::stringstream ss;
|
|
|
|
ss << ratingVal;
|
|
|
|
if (ratingVal > 0) {
|
|
|
|
result.mdl.set("rating", ss.str());
|
|
|
|
LOG(LogDebug) << "ScreenScraperRequest::processGame(): Rating: " <<
|
|
|
|
result.mdl.get("rating");
|
|
|
|
}
|
|
|
|
}
|
2020-06-21 12:25:28 +00:00
|
|
|
|
|
|
|
// Description fallback language: EN, WOR(LD).
|
|
|
|
std::string description = find_child_by_attribute_list(game.child("synopsis"),
|
|
|
|
"synopsis", "langue", { language, "en", "wor" }).text().get();
|
|
|
|
|
2021-03-22 19:32:54 +00:00
|
|
|
// Translate some HTML character codes to UTF-8 characters.
|
|
|
|
if (!description.empty()) {
|
2020-06-21 12:25:28 +00:00
|
|
|
result.mdl.set("desc", Utils::String::replace(description, " ", " "));
|
2021-03-22 19:32:54 +00:00
|
|
|
result.mdl.set("desc", Utils::String::replace(description, "©", "©"));
|
|
|
|
}
|
2020-06-21 12:25:28 +00:00
|
|
|
|
|
|
|
// Get the date proper. The API returns multiple 'date' children nodes to the 'dates'
|
|
|
|
// main child of 'jeu'.
|
|
|
|
// Date fallback: WOR(LD), US, SS, JP, EU.
|
|
|
|
std::string _date = find_child_by_attribute_list(game.child("dates"), "date", "region",
|
|
|
|
{ region, "wor", "us", "ss", "jp", "eu" }).text().get();
|
|
|
|
|
|
|
|
// Date can be YYYY-MM-DD or just YYYY.
|
|
|
|
if (_date.length() > 4) {
|
|
|
|
result.mdl.set("releasedate", Utils::Time::DateTime(
|
|
|
|
Utils::Time::stringToTime(_date, "%Y-%m-%d")));
|
|
|
|
}
|
|
|
|
else if (_date.length() > 0) {
|
|
|
|
result.mdl.set("releasedate", Utils::Time::DateTime(
|
|
|
|
Utils::Time::stringToTime(_date, "%Y")));
|
|
|
|
}
|
|
|
|
|
2020-08-02 12:19:51 +00:00
|
|
|
if (_date.length() > 0) {
|
|
|
|
LOG(LogDebug) << "ScreenScraperRequest::processGame(): Release Date (unparsed): " <<
|
|
|
|
_date;
|
|
|
|
LOG(LogDebug) << "ScreenScraperRequest::processGame(): Release Date (parsed): " <<
|
|
|
|
result.mdl.get("releasedate");
|
|
|
|
}
|
|
|
|
|
2020-11-14 14:30:49 +00:00
|
|
|
// Developer for the game (Xpath: Data/jeu[0]/developpeur).
|
2020-06-21 12:25:28 +00:00
|
|
|
std::string developer = game.child("developpeur").text().get();
|
2020-08-02 12:19:51 +00:00
|
|
|
if (!developer.empty()) {
|
2020-06-21 12:25:28 +00:00
|
|
|
result.mdl.set("developer", Utils::String::replace(developer, " ", " "));
|
2020-08-02 12:19:51 +00:00
|
|
|
LOG(LogDebug) << "ScreenScraperRequest::processGame(): Developer: " <<
|
|
|
|
result.mdl.get("developer");
|
|
|
|
}
|
2020-06-21 12:25:28 +00:00
|
|
|
|
2020-11-14 14:30:49 +00:00
|
|
|
// Publisher for the game (Xpath: Data/jeu[0]/editeur).
|
2020-06-21 12:25:28 +00:00
|
|
|
std::string publisher = game.child("editeur").text().get();
|
2020-08-02 12:19:51 +00:00
|
|
|
if (!publisher.empty()) {
|
2020-06-21 12:25:28 +00:00
|
|
|
result.mdl.set("publisher", Utils::String::replace(publisher, " ", " "));
|
2020-08-02 12:19:51 +00:00
|
|
|
LOG(LogDebug) << "ScreenScraperRequest::processGame(): Publisher: " <<
|
|
|
|
result.mdl.get("publisher");
|
|
|
|
}
|
2020-06-21 12:25:28 +00:00
|
|
|
|
2020-11-14 14:30:49 +00:00
|
|
|
// Genre fallback language: EN. (Xpath: Data/jeu[0]/genres/genre[*]).
|
2020-08-02 12:19:51 +00:00
|
|
|
std::string genre = find_child_by_attribute_list(game.child("genres"),
|
|
|
|
"genre", "langue", { language, "en" }).text().get();
|
|
|
|
if (!genre.empty()) {
|
|
|
|
result.mdl.set("genre", genre);
|
|
|
|
LOG(LogDebug) << "ScreenScraperRequest::processGame(): Genre: " <<
|
|
|
|
result.mdl.get("genre");
|
2020-06-21 12:25:28 +00:00
|
|
|
}
|
|
|
|
|
2020-08-02 12:19:51 +00:00
|
|
|
// Players.
|
|
|
|
std::string players = game.child("joueurs").text().get();
|
|
|
|
if (!players.empty()) {
|
|
|
|
result.mdl.set("players", players);
|
|
|
|
LOG(LogDebug) << "ScreenScraperRequest::processGame(): Players: " <<
|
|
|
|
result.mdl.get("players");
|
|
|
|
}
|
2020-07-08 15:01:47 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
// Media super-node.
|
|
|
|
pugi::xml_node media_list = game.child("medias");
|
|
|
|
|
|
|
|
if (media_list) {
|
|
|
|
// 3D box
|
|
|
|
processMedia(result, media_list, ssConfig.media_3dbox,
|
|
|
|
result.box3dUrl, result.box3dFormat, region);
|
|
|
|
// Cover
|
|
|
|
processMedia(result, media_list, ssConfig.media_cover,
|
|
|
|
result.coverUrl, result.coverFormat, region);
|
|
|
|
// Marquee (wheel)
|
|
|
|
processMedia(result, media_list, ssConfig.media_marquee,
|
|
|
|
result.marqueeUrl, result.marqueeFormat, region);
|
|
|
|
// Screenshot
|
|
|
|
processMedia(result, media_list, ssConfig.media_screenshot,
|
|
|
|
result.screenshotUrl, result.screenshotFormat, region);
|
2020-08-05 20:38:44 +00:00
|
|
|
// Video
|
|
|
|
processMedia(result, media_list, ssConfig.media_video,
|
|
|
|
result.videoUrl, result.videoFormat, region);
|
2020-06-21 12:25:28 +00:00
|
|
|
}
|
|
|
|
result.mediaURLFetch = COMPLETED;
|
|
|
|
out_results.push_back(result);
|
|
|
|
} // Game.
|
2020-11-14 14:30:49 +00:00
|
|
|
|
|
|
|
if (out_results.size() == 0) {
|
2021-01-26 16:28:54 +00:00
|
|
|
LOG(LogDebug) << "ScreenScraperRequest::processGame(): No games found";
|
2020-11-14 14:30:49 +00:00
|
|
|
}
|
2020-06-06 11:10:33 +00:00
|
|
|
}
|
2019-01-11 19:51:05 +00:00
|
|
|
|
2020-06-06 11:10:33 +00:00
|
|
|
void ScreenScraperRequest::processMedia(
|
2020-06-21 12:25:28 +00:00
|
|
|
ScraperSearchResult& result,
|
|
|
|
const pugi::xml_node& media_list,
|
|
|
|
std::string mediaType,
|
|
|
|
std::string& fileURL,
|
|
|
|
std::string& fileFormat,
|
|
|
|
std::string region)
|
2020-06-06 11:10:33 +00:00
|
|
|
{
|
2020-06-23 18:07:00 +00:00
|
|
|
pugi::xml_node art = pugi::xml_node(nullptr);
|
2020-06-21 12:25:28 +00:00
|
|
|
|
|
|
|
// Do an XPath query for media[type='$media_type'], then filter by region.
|
|
|
|
// We need to do this because any child of 'medias' has the form
|
|
|
|
// <media type="..." region="..." format="...">
|
|
|
|
// and we need to find the right media for the region.
|
|
|
|
pugi::xpath_node_set results = media_list.select_nodes((static_cast<std::string>
|
|
|
|
("media[@type='") + mediaType + "']").c_str());
|
|
|
|
|
|
|
|
if (results.size()) {
|
2020-08-05 20:38:44 +00:00
|
|
|
// Videos don't have any region attributes, so just take the first entry
|
|
|
|
// (which should be the only entry as well).
|
|
|
|
if (mediaType == "video" || mediaType == "video-normalized") {
|
|
|
|
art = results.first().node();
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
// Region fallback: WOR(LD), US, CUS(TOM?), JP, EU.
|
|
|
|
for (auto _region : std::vector<std::string>{
|
|
|
|
region, "wor", "us", "cus", "jp", "eu" }) {
|
|
|
|
if (art)
|
2020-06-21 12:25:28 +00:00
|
|
|
break;
|
2020-08-05 20:38:44 +00:00
|
|
|
|
|
|
|
for (auto node : results) {
|
|
|
|
if (node.node().attribute("region").value() == _region) {
|
|
|
|
art = node.node();
|
|
|
|
break;
|
|
|
|
}
|
2020-06-21 12:25:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (art) {
|
2020-08-05 20:38:44 +00:00
|
|
|
// Sending a 'softname' containing space will make the media URLs returned
|
2020-06-21 12:25:28 +00:00
|
|
|
// by the API also contain the space. Escape any spaces in the URL here.
|
|
|
|
fileURL = Utils::String::replace(art.text().get(), " ", "%20");
|
|
|
|
|
|
|
|
// Get the media type returned by ScreenScraper.
|
|
|
|
std::string media_type = art.attribute("format").value();
|
|
|
|
if (!media_type.empty())
|
|
|
|
fileFormat = "." + media_type;
|
|
|
|
}
|
|
|
|
else {
|
2020-08-06 13:12:04 +00:00
|
|
|
LOG(LogDebug) << "ScreenScraperRequest::processMedia(): "
|
2021-01-26 16:28:54 +00:00
|
|
|
"Failed to find media XML node with name '" << mediaType << "'";
|
2020-06-21 12:25:28 +00:00
|
|
|
}
|
2019-01-11 19:51:05 +00:00
|
|
|
}
|
|
|
|
|
2020-05-26 16:34:33 +00:00
|
|
|
// Currently not used in this module.
|
|
|
|
void ScreenScraperRequest::processList(const pugi::xml_document& xmldoc,
|
2020-06-21 12:25:28 +00:00
|
|
|
std::vector<ScraperSearchResult>& results)
|
2019-01-11 19:51:05 +00:00
|
|
|
{
|
2020-06-21 12:25:28 +00:00
|
|
|
assert(mRequestQueue != nullptr);
|
2019-01-11 19:51:05 +00:00
|
|
|
|
2021-01-26 16:28:54 +00:00
|
|
|
LOG(LogDebug) << "ScreenScraperRequest::processList(): Processing a list of results";
|
2019-01-11 19:51:05 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
pugi::xml_node data = xmldoc.child("Data");
|
|
|
|
pugi::xml_node game = data.child("jeu");
|
2019-01-11 19:51:05 +00:00
|
|
|
|
2020-06-25 17:52:38 +00:00
|
|
|
if (!game) {
|
2021-01-26 16:28:54 +00:00
|
|
|
LOG(LogDebug) << "ScreenScraperRequest::processList(): Found nothing";
|
2020-06-25 17:52:38 +00:00
|
|
|
}
|
2019-01-11 19:51:05 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
ScreenScraperRequest::ScreenScraperConfig ssConfig;
|
2019-01-11 19:51:05 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
// Limit the number of results per platform, not in total.
|
|
|
|
// Otherwise if the first platform returns >= 7 games
|
|
|
|
// but the second platform contains the relevant game,
|
|
|
|
// the relevant result would not be shown.
|
|
|
|
for (int i = 0; game && i < MAX_SCRAPER_RESULTS; i++) {
|
|
|
|
std::string id = game.child("id").text().get();
|
|
|
|
std::string name = game.child("nom").text().get();
|
|
|
|
std::string platformId = game.child("systemeid").text().get();
|
|
|
|
std::string path = ssConfig.getGameSearchUrl(name) + "&systemeid=" +
|
|
|
|
platformId + "&gameid=" + id;
|
2019-01-11 19:51:05 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
mRequestQueue->push(std::unique_ptr<ScraperRequest>
|
|
|
|
(new ScreenScraperRequest(results, path)));
|
2019-01-11 19:51:05 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
game = game.next_sibling("jeu");
|
|
|
|
}
|
2019-01-11 19:51:05 +00:00
|
|
|
}
|
|
|
|
|
2020-05-26 16:34:33 +00:00
|
|
|
std::string ScreenScraperRequest::ScreenScraperConfig::getGameSearchUrl(
|
2020-06-21 12:25:28 +00:00
|
|
|
const std::string gameName) const
|
2019-01-11 19:51:05 +00:00
|
|
|
{
|
2020-11-14 14:30:49 +00:00
|
|
|
std::string screenScraperURL;
|
2021-02-01 18:00:38 +00:00
|
|
|
std::string searchName = gameName;
|
2021-01-26 16:31:27 +00:00
|
|
|
bool singleSearch = false;
|
2020-11-14 14:30:49 +00:00
|
|
|
|
2021-02-01 18:00:38 +00:00
|
|
|
// Trim leading and trailing whitespaces.
|
|
|
|
searchName.erase(searchName.begin(),
|
|
|
|
std::find_if(searchName.begin(), searchName.end(), [](char c) {
|
|
|
|
return !std::isspace(static_cast<unsigned char>(c));
|
|
|
|
}));
|
|
|
|
searchName.erase(std::find_if(searchName.rbegin(), searchName.rend(), [](char c) {
|
|
|
|
return !std::isspace(static_cast<unsigned char>(c));
|
|
|
|
}).base(), searchName.end());
|
|
|
|
|
|
|
|
// If only whitespaces were entered as the search string, then search using a random string
|
|
|
|
// that will not return any results. This is a quick and dirty way to avoid french error
|
|
|
|
// messages about malformed URLs that would surely confuse the user.
|
|
|
|
if (searchName == "")
|
|
|
|
searchName = "zzzzzz";
|
|
|
|
|
2021-01-08 19:34:11 +00:00
|
|
|
// If the game is an arcade game and we're not searching using the metadata name, then
|
|
|
|
// search using the individual ROM name rather than running a wider text matching search.
|
|
|
|
// Also run this search mode if the game name is shorter than four characters, as
|
|
|
|
// screenscraper.fr will otherwise throw an error that the necessary search parameters
|
|
|
|
// were not provided with the search. Possibly this is because a search using less than
|
|
|
|
// four characters would return too many results. But there are some games with really
|
|
|
|
// short names, so it's annoying that they can't be searched using this method.
|
2021-01-26 16:31:27 +00:00
|
|
|
if (isArcadeSystem && !Settings::getInstance()->getBool("ScraperSearchMetadataName")) {
|
|
|
|
singleSearch = true;
|
|
|
|
}
|
2021-02-01 18:00:38 +00:00
|
|
|
else if (searchName.size() < 4) {
|
2021-01-26 16:31:27 +00:00
|
|
|
singleSearch = true;
|
|
|
|
}
|
2021-02-01 18:00:38 +00:00
|
|
|
else if (searchName.back() == '+') {
|
2021-01-26 16:31:27 +00:00
|
|
|
// Special case where ScreenScraper will apparently strip trailing plus characters
|
|
|
|
// from the search strings, and if we don't handle this we could end up with less
|
|
|
|
// than four characters which would break the wide search.
|
2021-02-01 18:00:38 +00:00
|
|
|
std::string trimTrailingPluses = searchName;
|
|
|
|
trimTrailingPluses.erase(std::find_if(trimTrailingPluses.rbegin(),
|
|
|
|
trimTrailingPluses.rend(), [](char c) {
|
|
|
|
return c != '+';
|
|
|
|
}).base(), trimTrailingPluses.end());
|
|
|
|
|
2021-01-26 16:31:27 +00:00
|
|
|
if (trimTrailingPluses.size() < 4)
|
|
|
|
singleSearch = true;
|
|
|
|
}
|
2021-02-01 18:00:38 +00:00
|
|
|
// Another issue is that ScreenScraper removes the word "the" from the search string, which
|
|
|
|
// could also lead to an error for short game names.
|
|
|
|
if (!singleSearch) {
|
|
|
|
std::string removeThe =
|
|
|
|
Utils::String::replace(Utils::String::toUpper(searchName), "THE ", "");
|
|
|
|
// Any additional spaces must also be removed.
|
|
|
|
removeThe.erase(removeThe.begin(),
|
|
|
|
std::find_if(removeThe.begin(), removeThe.end(), [](char c) {
|
|
|
|
return !std::isspace(static_cast<unsigned char>(c));
|
|
|
|
}));
|
|
|
|
// If "the" is placed at the end of the search string, ScreenScraper also removes it.
|
|
|
|
if (removeThe.size() > 4) {
|
|
|
|
if (removeThe.substr(removeThe.size() - 4, 4) == " THE")
|
|
|
|
removeThe = removeThe.substr(0, removeThe.size() - 4);
|
|
|
|
}
|
|
|
|
if (removeThe.size() < 4)
|
|
|
|
singleSearch = true;
|
|
|
|
}
|
2021-01-26 16:31:27 +00:00
|
|
|
|
|
|
|
if (singleSearch) {
|
2020-11-14 14:30:49 +00:00
|
|
|
screenScraperURL = API_URL_BASE
|
|
|
|
+ "/jeuInfos.php?devid=" + Utils::String::scramble(API_DEV_U, API_DEV_KEY)
|
|
|
|
+ "&devpassword=" + Utils::String::scramble(API_DEV_P, API_DEV_KEY)
|
|
|
|
+ "&softname=" + HttpReq::urlEncode(API_SOFT_NAME)
|
|
|
|
+ "&output=xml"
|
2021-02-01 18:00:38 +00:00
|
|
|
+ "&romnom=" + HttpReq::urlEncode(searchName);
|
2020-11-14 14:30:49 +00:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
screenScraperURL = API_URL_BASE
|
|
|
|
+ "/jeuRecherche.php?devid=" + Utils::String::scramble(API_DEV_U, API_DEV_KEY)
|
|
|
|
+ "&devpassword=" + Utils::String::scramble(API_DEV_P, API_DEV_KEY)
|
|
|
|
+ "&softname=" + HttpReq::urlEncode(API_SOFT_NAME)
|
|
|
|
+ "&output=xml"
|
2021-02-01 18:00:38 +00:00
|
|
|
+ "&recherche=" + HttpReq::urlEncode(searchName);
|
2020-11-14 14:30:49 +00:00
|
|
|
}
|
2020-11-07 14:34:15 +00:00
|
|
|
|
|
|
|
// Username / password, if this has been setup and activated.
|
|
|
|
if (Settings::getInstance()->getBool("ScraperUseAccountScreenScraper")) {
|
|
|
|
std::string username = Settings::getInstance()->getString("ScraperUsernameScreenScraper");
|
|
|
|
std::string password = Settings::getInstance()->getString("ScraperPasswordScreenScraper");
|
|
|
|
if (!username.empty() && !password.empty())
|
|
|
|
screenScraperURL += "&ssid=" + HttpReq::urlEncode(username) + "&sspassword=" +
|
|
|
|
HttpReq::urlEncode(password);
|
|
|
|
}
|
|
|
|
|
|
|
|
return screenScraperURL;
|
2019-01-11 19:51:05 +00:00
|
|
|
}
|