From 40d27ff772d36f0898e9f5facb8d0784c566d746 Mon Sep 17 00:00:00 2001 From: Leon Styhre Date: Thu, 3 Aug 2023 20:48:54 +0200 Subject: [PATCH] Added MD5 file hash searching support to the scraper when using ScreenScraper --- es-app/src/guis/GuiScraperMenu.cpp | 74 +++++++++++++++++++++++++++ es-app/src/guis/GuiScraperSearch.cpp | 56 ++++++++++++++++++-- es-app/src/guis/GuiScraperSearch.h | 13 +++++ es-app/src/scrapers/Scraper.h | 5 +- es-app/src/scrapers/ScreenScraper.cpp | 55 ++++++-------------- es-app/src/scrapers/ScreenScraper.h | 5 +- es-core/src/Settings.cpp | 2 + 7 files changed, 166 insertions(+), 44 deletions(-) diff --git a/es-app/src/guis/GuiScraperMenu.cpp b/es-app/src/guis/GuiScraperMenu.cpp index 0ed2b75ab..585a1f12c 100644 --- a/es-app/src/guis/GuiScraperMenu.cpp +++ b/es-app/src/guis/GuiScraperMenu.cpp @@ -145,6 +145,12 @@ GuiScraperMenu::GuiScraperMenu(std::string title) setSize(mMenu.getSize()); setPosition((Renderer::getScreenWidth() - mSize.x) / 2.0f, Renderer::getScreenHeight() * 0.13f); + + // Make sure that the hash searching max file size is within the allowed range. + if (Settings::getInstance()->getInt("ScraperSearchFileHashMaxSize") < 32) + Settings::getInstance()->setInt("ScraperSearchFileHashMaxSize", 32); + else if (Settings::getInstance()->getInt("ScraperSearchFileHashMaxSize") > 800) + Settings::getInstance()->setInt("ScraperSearchFileHashMaxSize", 800); } GuiScraperMenu::~GuiScraperMenu() @@ -902,6 +908,33 @@ void GuiScraperMenu::openOtherOptions() ->setOpacity(DISABLED_OPACITY); } + // Maximum file size for non-interactive mode file hash searching. + auto scraperSearchFileHashMaxSize = + std::make_shared(32.0f, 800.0f, 32.0f, "MiB"); + scraperSearchFileHashMaxSize->setValue( + static_cast(Settings::getInstance()->getInt("ScraperSearchFileHashMaxSize"))); + s->addWithLabel("HASH SEARCHES MAX FILE SIZE", scraperSearchFileHashMaxSize); + s->addSaveFunc([scraperSearchFileHashMaxSize, s] { + if (scraperSearchFileHashMaxSize->getValue() != + static_cast(Settings::getInstance()->getInt("ScraperSearchFileHashMaxSize"))) { + Settings::getInstance()->setInt( + "ScraperSearchFileHashMaxSize", + static_cast(scraperSearchFileHashMaxSize->getValue())); + s->setNeedsSaving(); + } + }); + + // File hash searching is not supported by TheGamesDB, so gray out the option if this scraper + // is selected. Also gray it out for ScreenScraper if file hash searching has been disabled. + if (Settings::getInstance()->getString("Scraper") == "thegamesdb" || + !Settings::getInstance()->getBool("ScraperSearchFileHash")) { + scraperSearchFileHashMaxSize->setEnabled(false); + scraperSearchFileHashMaxSize->setOpacity(DISABLED_OPACITY); + scraperSearchFileHashMaxSize->getParent() + ->getChild(scraperSearchFileHashMaxSize->getChildIndex() - 1) + ->setOpacity(DISABLED_OPACITY); + } + // Overwrite files and data. auto scraperOverwriteData = std::make_shared(); scraperOverwriteData->setState(Settings::getInstance()->getBool("ScraperOverwriteData")); @@ -929,6 +962,29 @@ void GuiScraperMenu::openOtherOptions() } }); + // Search using file hashes for non-interactive mode. + auto scraperSearchFileHash = std::make_shared(); + scraperSearchFileHash->setState(Settings::getInstance()->getBool("ScraperSearchFileHash")); + s->addWithLabel("SEARCH USING FILE HASHES (NON-INTERACTIVE MODE)", scraperSearchFileHash); + s->addSaveFunc([scraperSearchFileHash, s] { + if (scraperSearchFileHash->getState() != + Settings::getInstance()->getBool("ScraperSearchFileHash")) { + Settings::getInstance()->setBool("ScraperSearchFileHash", + scraperSearchFileHash->getState()); + s->setNeedsSaving(); + } + }); + + // File hash searching is not supported by TheGamesDB, so gray out the option if this scraper + // is selected. + if (Settings::getInstance()->getString("Scraper") == "thegamesdb") { + scraperSearchFileHash->setEnabled(false); + scraperSearchFileHash->setOpacity(DISABLED_OPACITY); + scraperSearchFileHash->getParent() + ->getChild(scraperSearchFileHash->getChildIndex() - 1) + ->setOpacity(DISABLED_OPACITY); + } + // Search using metadata names. auto scraperSearchMetadataName = std::make_shared(); scraperSearchMetadataName->setState( @@ -1105,6 +1161,23 @@ void GuiScraperMenu::openOtherOptions() }; // Switch callbacks. + auto hashSearchToggleFunc = [scraperSearchFileHashMaxSize]() { + if (scraperSearchFileHashMaxSize->getEnabled()) { + scraperSearchFileHashMaxSize->setEnabled(false); + scraperSearchFileHashMaxSize->setOpacity(DISABLED_OPACITY); + scraperSearchFileHashMaxSize->getParent() + ->getChild(scraperSearchFileHashMaxSize->getChildIndex() - 1) + ->setOpacity(DISABLED_OPACITY); + } + else { + scraperSearchFileHashMaxSize->setEnabled(true); + scraperSearchFileHashMaxSize->setOpacity(1.0f); + scraperSearchFileHashMaxSize->getParent() + ->getChild(scraperSearchFileHashMaxSize->getChildIndex() - 1) + ->setOpacity(1.0f); + } + }; + auto interactiveToggleFunc = [scraperSemiautomatic]() { if (scraperSemiautomatic->getEnabled()) { scraperSemiautomatic->setEnabled(false); @@ -1140,6 +1213,7 @@ void GuiScraperMenu::openOtherOptions() }; mScraperRetryOnErrorCount->setCallback(scraperRetryCountFunc); + scraperSearchFileHash->setCallback(hashSearchToggleFunc); scraperInteractive->setCallback(interactiveToggleFunc); scraperRespectExclusions->setCallback(excludeRecursivelyToggleFunc); diff --git a/es-app/src/guis/GuiScraperSearch.cpp b/es-app/src/guis/GuiScraperSearch.cpp index 75bca7a2b..73ef9c6c9 100644 --- a/es-app/src/guis/GuiScraperSearch.cpp +++ b/es-app/src/guis/GuiScraperSearch.cpp @@ -40,6 +40,8 @@ GuiScraperSearch::GuiScraperSearch(SearchType type, unsigned int scrapeCount, in , mSearchType {type} , mRowCount {rowCount} , mScrapeCount {scrapeCount} + , mNextSearch {false} + , mHashSearch {false} , mRefinedSearch {false} , mBlockAccept {false} , mAcceptedResult {false} @@ -164,14 +166,17 @@ GuiScraperSearch::~GuiScraperSearch() // scraping when the miximage was getting generated. if (Settings::getInstance()->getBool("MiximageGenerate") && mMiximageGeneratorThread.joinable()) { - mScrapeResult.savedNewMedia = true; // We always let the miximage generator thread complete. mMiximageGeneratorThread.join(); mMiximageGenerator.reset(); + mScrapeResult.savedNewMedia = true; TextureResource::manualUnload(mLastSearch.game->getMiximagePath(), false); ViewController::getInstance()->onFileChanged(mLastSearch.game, true); } + if (mCalculateMD5HashThread.joinable()) + mCalculateMD5HashThread.join(); + mWindow->setAllowTextScrolling(false); } @@ -335,6 +340,7 @@ void GuiScraperSearch::updateView() void GuiScraperSearch::search(ScraperSearchParams& params) { + mHashSearch = false; mBlockAccept = true; mAcceptedResult = false; mMiximageResult = false; @@ -358,8 +364,30 @@ void GuiScraperSearch::search(ScraperSearchParams& params) else params.automaticMode = false; + params.md5Hash = ""; + if (!Utils::FileSystem::isDirectory(params.game->getPath())) + params.fileSize = Utils::FileSystem::getFileSize(params.game->getPath()); + + // Only use MD5 file hash searching when in non-interactive mode. + if (mSearchType == ALWAYS_ACCEPT_FIRST_RESULT && + Settings::getInstance()->getBool("ScraperSearchFileHash") && + Settings::getInstance()->getString("Scraper") == "screenscraper" && params.fileSize != 0 && + params.fileSize <= + Settings::getInstance()->getInt("ScraperSearchFileHashMaxSize") * 1024 * 1024) { + + // Run the MD5 hash calculation in a separate thread as it may take a long time to + // complete and we don't want to freeze the UI in the meanwhile. + std::promise().swap(mMD5HashPromise); + mMD5HashFuture = mMD5HashPromise.get_future(); + + mHashSearch = true; + mCalculateMD5HashThread = + std::thread(&GuiScraperSearch::calculateMD5Hash, this, params.game->getPath()); + } + mLastSearch = params; - mSearchHandle = startScraperSearch(params); + mSearchHandle = nullptr; + mNextSearch = true; } void GuiScraperSearch::stop() @@ -691,6 +719,27 @@ void GuiScraperSearch::returnResult(ScraperSearchResult result) void GuiScraperSearch::update(int deltaTime) { + // The only purpose of calling startScraperSearch() here instead of in search() is because + // the optional MD5 hash calculation needs to run in a separate thread to not lock the UI. + if (mNextSearch && mHashSearch) { + if (mMD5HashFuture.valid()) { + // Only wait one millisecond as this update() function runs very frequently. + if (mMD5HashFuture.wait_for(std::chrono::milliseconds(1)) == + std::future_status::ready) { + if (mCalculateMD5HashThread.joinable()) + mCalculateMD5HashThread.join(); + mLastSearch.md5Hash = mMD5Hash; + mSearchHandle = startScraperSearch(mLastSearch); + mMD5Hash = ""; + mNextSearch = false; + } + } + } + else if (mNextSearch) { + mSearchHandle = startScraperSearch(mLastSearch); + mNextSearch = false; + } + GuiComponent::update(deltaTime); if (mBlockAccept) @@ -789,7 +838,8 @@ void GuiScraperSearch::update(int deltaTime) if (mGeneratorFuture.wait_for(std::chrono::milliseconds(1)) == std::future_status::ready) { mMDResolveHandle.reset(); // We always let the miximage generator thread complete. - mMiximageGeneratorThread.join(); + if (mMiximageGeneratorThread.joinable()) + mMiximageGeneratorThread.join(); if (!mGeneratorFuture.get()) mScrapeResult.savedNewMedia = true; returnResult(mScrapeResult); diff --git a/es-app/src/guis/GuiScraperSearch.h b/es-app/src/guis/GuiScraperSearch.h index 808c3d3de..7353429bb 100644 --- a/es-app/src/guis/GuiScraperSearch.h +++ b/es-app/src/guis/GuiScraperSearch.h @@ -114,6 +114,12 @@ private: int getSelectedIndex(); + void calculateMD5Hash(std::string path) + { + mMD5Hash = Utils::Math::md5Hash(path, true); + mMD5HashPromise.set_value(true); + } + // For TheGamesDB, retrieve URLs for the additional metadata assets // that need to be downloaded. void retrieveMediaURLs(ScraperSearchResult result); @@ -166,6 +172,8 @@ private: std::function mRefineCallback; int mRowCount; unsigned int mScrapeCount; + bool mNextSearch; + bool mHashSearch; bool mRefinedSearch; bool mBlockAccept; bool mAcceptedResult; @@ -183,6 +191,11 @@ private: std::vector mScraperResults; std::map> mThumbnailReqMap; + std::string mMD5Hash; + std::thread mCalculateMD5HashThread; + std::promise mMD5HashPromise; + std::future mMD5HashFuture; + std::unique_ptr mMiximageGenerator; std::thread mMiximageGeneratorThread; std::promise mGeneratorPromise; diff --git a/es-app/src/scrapers/Scraper.h b/es-app/src/scrapers/Scraper.h index a4c7c7c69..9af2b0240 100644 --- a/es-app/src/scrapers/Scraper.h +++ b/es-app/src/scrapers/Scraper.h @@ -36,12 +36,15 @@ enum downloadStatus { struct ScraperSearchParams { SystemData* system; FileData* game; + std::string md5Hash; + long fileSize; std::string nameOverride; bool automaticMode; ScraperSearchParams() - : automaticMode {false} + : fileSize {0} + , automaticMode {false} { } }; diff --git a/es-app/src/scrapers/ScreenScraper.cpp b/es-app/src/scrapers/ScreenScraper.cpp index cce4c2360..034f796c1 100644 --- a/es-app/src/scrapers/ScreenScraper.cpp +++ b/es-app/src/scrapers/ScreenScraper.cpp @@ -194,7 +194,8 @@ void screenscraper_generate_scraper_requests(const ScraperSearchParams& params, if (params.nameOverride == "") { if (Settings::getInstance()->getBool("ScraperSearchMetadataName")) { path = ssConfig.getGameSearchUrl( - Utils::String::removeParenthesis(params.game->metadata.get("name"))); + Utils::String::removeParenthesis(params.game->metadata.get("name")), params.md5Hash, + params.fileSize); } else { std::string cleanName; @@ -208,11 +209,11 @@ void screenscraper_generate_scraper_requests(const ScraperSearchParams& params, cleanName = params.game->getCleanName(); } - path = ssConfig.getGameSearchUrl(cleanName); + path = ssConfig.getGameSearchUrl(cleanName, params.md5Hash, params.fileSize); } } else { - path = ssConfig.getGameSearchUrl(params.nameOverride); + path = ssConfig.getGameSearchUrl(params.nameOverride, params.md5Hash, params.fileSize); } auto& platforms = params.system->getPlatformIds(); @@ -697,44 +698,16 @@ bool ScreenScraperRequest::processMedia(ScraperSearchResult& result, return regionFallback; } -// Currently not used in this module. -void ScreenScraperRequest::processList(const pugi::xml_document& xmldoc, - std::vector& results) +std::string ScreenScraperRequest::ScreenScraperConfig::getGameSearchUrl(const std::string& gameName, + const std::string& md5Hash, + const long fileSize) const { - assert(mRequestQueue != nullptr); - - LOG(LogDebug) << "ScreenScraperRequest::processList(): Processing a list of results"; - - pugi::xml_node data {xmldoc.child("Data")}; - pugi::xml_node game {data.child("jeu")}; - - if (!game) { - LOG(LogDebug) << "ScreenScraperRequest::processList(): Found nothing"; + if (md5Hash != "") { + LOG(LogDebug) + << "ScreenScraper::getGameSearchUrl(): Performing MD5 file hash search using digest \"" + << md5Hash << "\""; } - ScreenScraperRequest::ScreenScraperConfig ssConfig; - - // Limit the number of results per platform, not in total. - // Otherwise if the first platform returns >= 7 games - // but the second platform contains the relevant game, - // the relevant result would not be shown. - for (int i {0}; game && i < MAX_SCRAPER_RESULTS; ++i) { - std::string id {game.child("id").text().get()}; - std::string name {game.child("nom").text().get()}; - std::string platformId {game.child("systemeid").text().get()}; - std::string path {ssConfig.getGameSearchUrl(name) + "&systemeid=" + platformId + - "&gameid=" + id}; - - mRequestQueue->push( - std::unique_ptr(new ScreenScraperRequest(results, path))); - - game = game.next_sibling("jeu"); - } -} - -std::string ScreenScraperRequest::ScreenScraperConfig::getGameSearchUrl( - const std::string gameName) const -{ std::string searchName {gameName}; bool singleSearch {false}; @@ -811,6 +784,12 @@ std::string ScreenScraperRequest::ScreenScraperConfig::getGameSearchUrl( .append("&output=xml") .append("&romnom=") .append(HttpReq::urlEncode(searchName)); + if (md5Hash != "") { + screenScraperURL.append("&md5=") + .append(md5Hash) + .append("&romtaille=") + .append(std::to_string(fileSize)); + } } else { screenScraperURL.append(API_URL_BASE) diff --git a/es-app/src/scrapers/ScreenScraper.h b/es-app/src/scrapers/ScreenScraper.h index 365f616c8..dc995cefd 100644 --- a/es-app/src/scrapers/ScreenScraper.h +++ b/es-app/src/scrapers/ScreenScraper.h @@ -41,7 +41,9 @@ public: // Settings for the scraper. static const struct ScreenScraperConfig { - std::string getGameSearchUrl(const std::string gameName) const; + std::string getGameSearchUrl(const std::string& gameName, + const std::string& md5Hash, + const long fileSize) const; // Access to the API. const std::string API_DEV_U = {15, 21, 39, 22, 42, 40}; @@ -119,7 +121,6 @@ protected: void process(const std::unique_ptr& req, std::vector& results) override; - void processList(const pugi::xml_document& xmldoc, std::vector& results); void processGame(const pugi::xml_document& xmldoc, std::vector& results); bool processMedia(ScraperSearchResult& result, const pugi::xml_node& media_list, diff --git a/es-core/src/Settings.cpp b/es-core/src/Settings.cpp index 75e0af55b..99cda2e72 100644 --- a/es-core/src/Settings.cpp +++ b/es-core/src/Settings.cpp @@ -142,8 +142,10 @@ void Settings::setDefaults() mStringMap["ScraperLanguage"] = {"en", "en"}; mIntMap["ScraperRetryOnErrorCount"] = {3, 3}; mIntMap["ScraperRetryOnErrorTimer"] = {3, 3}; + mIntMap["ScraperSearchFileHashMaxSize"] = {128, 128}; mBoolMap["ScraperOverwriteData"] = {true, true}; mBoolMap["ScraperHaltOnInvalidMedia"] = {true, true}; + mBoolMap["ScraperSearchFileHash"] = {true, true}; mBoolMap["ScraperSearchMetadataName"] = {true, true}; mBoolMap["ScraperIncludeFolders"] = {true, true}; mBoolMap["ScraperInteractive"] = {false, false};