Added MD5 file hash searching support to the scraper when using ScreenScraper

This commit is contained in:
Leon Styhre 2023-08-03 20:48:54 +02:00
parent 590591d0fb
commit 40d27ff772
7 changed files with 166 additions and 44 deletions

View file

@ -145,6 +145,12 @@ GuiScraperMenu::GuiScraperMenu(std::string title)
setSize(mMenu.getSize()); setSize(mMenu.getSize());
setPosition((Renderer::getScreenWidth() - mSize.x) / 2.0f, Renderer::getScreenHeight() * 0.13f); setPosition((Renderer::getScreenWidth() - mSize.x) / 2.0f, Renderer::getScreenHeight() * 0.13f);
// Make sure that the hash searching max file size is within the allowed range.
if (Settings::getInstance()->getInt("ScraperSearchFileHashMaxSize") < 32)
Settings::getInstance()->setInt("ScraperSearchFileHashMaxSize", 32);
else if (Settings::getInstance()->getInt("ScraperSearchFileHashMaxSize") > 800)
Settings::getInstance()->setInt("ScraperSearchFileHashMaxSize", 800);
} }
GuiScraperMenu::~GuiScraperMenu() GuiScraperMenu::~GuiScraperMenu()
@ -902,6 +908,33 @@ void GuiScraperMenu::openOtherOptions()
->setOpacity(DISABLED_OPACITY); ->setOpacity(DISABLED_OPACITY);
} }
// Maximum file size for non-interactive mode file hash searching.
auto scraperSearchFileHashMaxSize =
std::make_shared<SliderComponent>(32.0f, 800.0f, 32.0f, "MiB");
scraperSearchFileHashMaxSize->setValue(
static_cast<float>(Settings::getInstance()->getInt("ScraperSearchFileHashMaxSize")));
s->addWithLabel("HASH SEARCHES MAX FILE SIZE", scraperSearchFileHashMaxSize);
s->addSaveFunc([scraperSearchFileHashMaxSize, s] {
if (scraperSearchFileHashMaxSize->getValue() !=
static_cast<float>(Settings::getInstance()->getInt("ScraperSearchFileHashMaxSize"))) {
Settings::getInstance()->setInt(
"ScraperSearchFileHashMaxSize",
static_cast<int>(scraperSearchFileHashMaxSize->getValue()));
s->setNeedsSaving();
}
});
// File hash searching is not supported by TheGamesDB, so gray out the option if this scraper
// is selected. Also gray it out for ScreenScraper if file hash searching has been disabled.
if (Settings::getInstance()->getString("Scraper") == "thegamesdb" ||
!Settings::getInstance()->getBool("ScraperSearchFileHash")) {
scraperSearchFileHashMaxSize->setEnabled(false);
scraperSearchFileHashMaxSize->setOpacity(DISABLED_OPACITY);
scraperSearchFileHashMaxSize->getParent()
->getChild(scraperSearchFileHashMaxSize->getChildIndex() - 1)
->setOpacity(DISABLED_OPACITY);
}
// Overwrite files and data. // Overwrite files and data.
auto scraperOverwriteData = std::make_shared<SwitchComponent>(); auto scraperOverwriteData = std::make_shared<SwitchComponent>();
scraperOverwriteData->setState(Settings::getInstance()->getBool("ScraperOverwriteData")); scraperOverwriteData->setState(Settings::getInstance()->getBool("ScraperOverwriteData"));
@ -929,6 +962,29 @@ void GuiScraperMenu::openOtherOptions()
} }
}); });
// Search using file hashes for non-interactive mode.
auto scraperSearchFileHash = std::make_shared<SwitchComponent>();
scraperSearchFileHash->setState(Settings::getInstance()->getBool("ScraperSearchFileHash"));
s->addWithLabel("SEARCH USING FILE HASHES (NON-INTERACTIVE MODE)", scraperSearchFileHash);
s->addSaveFunc([scraperSearchFileHash, s] {
if (scraperSearchFileHash->getState() !=
Settings::getInstance()->getBool("ScraperSearchFileHash")) {
Settings::getInstance()->setBool("ScraperSearchFileHash",
scraperSearchFileHash->getState());
s->setNeedsSaving();
}
});
// File hash searching is not supported by TheGamesDB, so gray out the option if this scraper
// is selected.
if (Settings::getInstance()->getString("Scraper") == "thegamesdb") {
scraperSearchFileHash->setEnabled(false);
scraperSearchFileHash->setOpacity(DISABLED_OPACITY);
scraperSearchFileHash->getParent()
->getChild(scraperSearchFileHash->getChildIndex() - 1)
->setOpacity(DISABLED_OPACITY);
}
// Search using metadata names. // Search using metadata names.
auto scraperSearchMetadataName = std::make_shared<SwitchComponent>(); auto scraperSearchMetadataName = std::make_shared<SwitchComponent>();
scraperSearchMetadataName->setState( scraperSearchMetadataName->setState(
@ -1105,6 +1161,23 @@ void GuiScraperMenu::openOtherOptions()
}; };
// Switch callbacks. // Switch callbacks.
auto hashSearchToggleFunc = [scraperSearchFileHashMaxSize]() {
if (scraperSearchFileHashMaxSize->getEnabled()) {
scraperSearchFileHashMaxSize->setEnabled(false);
scraperSearchFileHashMaxSize->setOpacity(DISABLED_OPACITY);
scraperSearchFileHashMaxSize->getParent()
->getChild(scraperSearchFileHashMaxSize->getChildIndex() - 1)
->setOpacity(DISABLED_OPACITY);
}
else {
scraperSearchFileHashMaxSize->setEnabled(true);
scraperSearchFileHashMaxSize->setOpacity(1.0f);
scraperSearchFileHashMaxSize->getParent()
->getChild(scraperSearchFileHashMaxSize->getChildIndex() - 1)
->setOpacity(1.0f);
}
};
auto interactiveToggleFunc = [scraperSemiautomatic]() { auto interactiveToggleFunc = [scraperSemiautomatic]() {
if (scraperSemiautomatic->getEnabled()) { if (scraperSemiautomatic->getEnabled()) {
scraperSemiautomatic->setEnabled(false); scraperSemiautomatic->setEnabled(false);
@ -1140,6 +1213,7 @@ void GuiScraperMenu::openOtherOptions()
}; };
mScraperRetryOnErrorCount->setCallback(scraperRetryCountFunc); mScraperRetryOnErrorCount->setCallback(scraperRetryCountFunc);
scraperSearchFileHash->setCallback(hashSearchToggleFunc);
scraperInteractive->setCallback(interactiveToggleFunc); scraperInteractive->setCallback(interactiveToggleFunc);
scraperRespectExclusions->setCallback(excludeRecursivelyToggleFunc); scraperRespectExclusions->setCallback(excludeRecursivelyToggleFunc);

View file

@ -40,6 +40,8 @@ GuiScraperSearch::GuiScraperSearch(SearchType type, unsigned int scrapeCount, in
, mSearchType {type} , mSearchType {type}
, mRowCount {rowCount} , mRowCount {rowCount}
, mScrapeCount {scrapeCount} , mScrapeCount {scrapeCount}
, mNextSearch {false}
, mHashSearch {false}
, mRefinedSearch {false} , mRefinedSearch {false}
, mBlockAccept {false} , mBlockAccept {false}
, mAcceptedResult {false} , mAcceptedResult {false}
@ -164,14 +166,17 @@ GuiScraperSearch::~GuiScraperSearch()
// scraping when the miximage was getting generated. // scraping when the miximage was getting generated.
if (Settings::getInstance()->getBool("MiximageGenerate") && if (Settings::getInstance()->getBool("MiximageGenerate") &&
mMiximageGeneratorThread.joinable()) { mMiximageGeneratorThread.joinable()) {
mScrapeResult.savedNewMedia = true;
// We always let the miximage generator thread complete. // We always let the miximage generator thread complete.
mMiximageGeneratorThread.join(); mMiximageGeneratorThread.join();
mMiximageGenerator.reset(); mMiximageGenerator.reset();
mScrapeResult.savedNewMedia = true;
TextureResource::manualUnload(mLastSearch.game->getMiximagePath(), false); TextureResource::manualUnload(mLastSearch.game->getMiximagePath(), false);
ViewController::getInstance()->onFileChanged(mLastSearch.game, true); ViewController::getInstance()->onFileChanged(mLastSearch.game, true);
} }
if (mCalculateMD5HashThread.joinable())
mCalculateMD5HashThread.join();
mWindow->setAllowTextScrolling(false); mWindow->setAllowTextScrolling(false);
} }
@ -335,6 +340,7 @@ void GuiScraperSearch::updateView()
void GuiScraperSearch::search(ScraperSearchParams& params) void GuiScraperSearch::search(ScraperSearchParams& params)
{ {
mHashSearch = false;
mBlockAccept = true; mBlockAccept = true;
mAcceptedResult = false; mAcceptedResult = false;
mMiximageResult = false; mMiximageResult = false;
@ -358,8 +364,30 @@ void GuiScraperSearch::search(ScraperSearchParams& params)
else else
params.automaticMode = false; params.automaticMode = false;
params.md5Hash = "";
if (!Utils::FileSystem::isDirectory(params.game->getPath()))
params.fileSize = Utils::FileSystem::getFileSize(params.game->getPath());
// Only use MD5 file hash searching when in non-interactive mode.
if (mSearchType == ALWAYS_ACCEPT_FIRST_RESULT &&
Settings::getInstance()->getBool("ScraperSearchFileHash") &&
Settings::getInstance()->getString("Scraper") == "screenscraper" && params.fileSize != 0 &&
params.fileSize <=
Settings::getInstance()->getInt("ScraperSearchFileHashMaxSize") * 1024 * 1024) {
// Run the MD5 hash calculation in a separate thread as it may take a long time to
// complete and we don't want to freeze the UI in the meanwhile.
std::promise<bool>().swap(mMD5HashPromise);
mMD5HashFuture = mMD5HashPromise.get_future();
mHashSearch = true;
mCalculateMD5HashThread =
std::thread(&GuiScraperSearch::calculateMD5Hash, this, params.game->getPath());
}
mLastSearch = params; mLastSearch = params;
mSearchHandle = startScraperSearch(params); mSearchHandle = nullptr;
mNextSearch = true;
} }
void GuiScraperSearch::stop() void GuiScraperSearch::stop()
@ -691,6 +719,27 @@ void GuiScraperSearch::returnResult(ScraperSearchResult result)
void GuiScraperSearch::update(int deltaTime) void GuiScraperSearch::update(int deltaTime)
{ {
// The only purpose of calling startScraperSearch() here instead of in search() is because
// the optional MD5 hash calculation needs to run in a separate thread to not lock the UI.
if (mNextSearch && mHashSearch) {
if (mMD5HashFuture.valid()) {
// Only wait one millisecond as this update() function runs very frequently.
if (mMD5HashFuture.wait_for(std::chrono::milliseconds(1)) ==
std::future_status::ready) {
if (mCalculateMD5HashThread.joinable())
mCalculateMD5HashThread.join();
mLastSearch.md5Hash = mMD5Hash;
mSearchHandle = startScraperSearch(mLastSearch);
mMD5Hash = "";
mNextSearch = false;
}
}
}
else if (mNextSearch) {
mSearchHandle = startScraperSearch(mLastSearch);
mNextSearch = false;
}
GuiComponent::update(deltaTime); GuiComponent::update(deltaTime);
if (mBlockAccept) if (mBlockAccept)
@ -789,7 +838,8 @@ void GuiScraperSearch::update(int deltaTime)
if (mGeneratorFuture.wait_for(std::chrono::milliseconds(1)) == std::future_status::ready) { if (mGeneratorFuture.wait_for(std::chrono::milliseconds(1)) == std::future_status::ready) {
mMDResolveHandle.reset(); mMDResolveHandle.reset();
// We always let the miximage generator thread complete. // We always let the miximage generator thread complete.
mMiximageGeneratorThread.join(); if (mMiximageGeneratorThread.joinable())
mMiximageGeneratorThread.join();
if (!mGeneratorFuture.get()) if (!mGeneratorFuture.get())
mScrapeResult.savedNewMedia = true; mScrapeResult.savedNewMedia = true;
returnResult(mScrapeResult); returnResult(mScrapeResult);

View file

@ -114,6 +114,12 @@ private:
int getSelectedIndex(); int getSelectedIndex();
void calculateMD5Hash(std::string path)
{
mMD5Hash = Utils::Math::md5Hash(path, true);
mMD5HashPromise.set_value(true);
}
// For TheGamesDB, retrieve URLs for the additional metadata assets // For TheGamesDB, retrieve URLs for the additional metadata assets
// that need to be downloaded. // that need to be downloaded.
void retrieveMediaURLs(ScraperSearchResult result); void retrieveMediaURLs(ScraperSearchResult result);
@ -166,6 +172,8 @@ private:
std::function<void()> mRefineCallback; std::function<void()> mRefineCallback;
int mRowCount; int mRowCount;
unsigned int mScrapeCount; unsigned int mScrapeCount;
bool mNextSearch;
bool mHashSearch;
bool mRefinedSearch; bool mRefinedSearch;
bool mBlockAccept; bool mBlockAccept;
bool mAcceptedResult; bool mAcceptedResult;
@ -183,6 +191,11 @@ private:
std::vector<ScraperSearchResult> mScraperResults; std::vector<ScraperSearchResult> mScraperResults;
std::map<std::string, std::unique_ptr<HttpReq>> mThumbnailReqMap; std::map<std::string, std::unique_ptr<HttpReq>> mThumbnailReqMap;
std::string mMD5Hash;
std::thread mCalculateMD5HashThread;
std::promise<bool> mMD5HashPromise;
std::future<bool> mMD5HashFuture;
std::unique_ptr<MiximageGenerator> mMiximageGenerator; std::unique_ptr<MiximageGenerator> mMiximageGenerator;
std::thread mMiximageGeneratorThread; std::thread mMiximageGeneratorThread;
std::promise<bool> mGeneratorPromise; std::promise<bool> mGeneratorPromise;

View file

@ -36,12 +36,15 @@ enum downloadStatus {
struct ScraperSearchParams { struct ScraperSearchParams {
SystemData* system; SystemData* system;
FileData* game; FileData* game;
std::string md5Hash;
long fileSize;
std::string nameOverride; std::string nameOverride;
bool automaticMode; bool automaticMode;
ScraperSearchParams() ScraperSearchParams()
: automaticMode {false} : fileSize {0}
, automaticMode {false}
{ {
} }
}; };

View file

@ -194,7 +194,8 @@ void screenscraper_generate_scraper_requests(const ScraperSearchParams& params,
if (params.nameOverride == "") { if (params.nameOverride == "") {
if (Settings::getInstance()->getBool("ScraperSearchMetadataName")) { if (Settings::getInstance()->getBool("ScraperSearchMetadataName")) {
path = ssConfig.getGameSearchUrl( path = ssConfig.getGameSearchUrl(
Utils::String::removeParenthesis(params.game->metadata.get("name"))); Utils::String::removeParenthesis(params.game->metadata.get("name")), params.md5Hash,
params.fileSize);
} }
else { else {
std::string cleanName; std::string cleanName;
@ -208,11 +209,11 @@ void screenscraper_generate_scraper_requests(const ScraperSearchParams& params,
cleanName = params.game->getCleanName(); cleanName = params.game->getCleanName();
} }
path = ssConfig.getGameSearchUrl(cleanName); path = ssConfig.getGameSearchUrl(cleanName, params.md5Hash, params.fileSize);
} }
} }
else { else {
path = ssConfig.getGameSearchUrl(params.nameOverride); path = ssConfig.getGameSearchUrl(params.nameOverride, params.md5Hash, params.fileSize);
} }
auto& platforms = params.system->getPlatformIds(); auto& platforms = params.system->getPlatformIds();
@ -697,44 +698,16 @@ bool ScreenScraperRequest::processMedia(ScraperSearchResult& result,
return regionFallback; return regionFallback;
} }
// Currently not used in this module. std::string ScreenScraperRequest::ScreenScraperConfig::getGameSearchUrl(const std::string& gameName,
void ScreenScraperRequest::processList(const pugi::xml_document& xmldoc, const std::string& md5Hash,
std::vector<ScraperSearchResult>& results) const long fileSize) const
{ {
assert(mRequestQueue != nullptr); if (md5Hash != "") {
LOG(LogDebug)
LOG(LogDebug) << "ScreenScraperRequest::processList(): Processing a list of results"; << "ScreenScraper::getGameSearchUrl(): Performing MD5 file hash search using digest \""
<< md5Hash << "\"";
pugi::xml_node data {xmldoc.child("Data")};
pugi::xml_node game {data.child("jeu")};
if (!game) {
LOG(LogDebug) << "ScreenScraperRequest::processList(): Found nothing";
} }
ScreenScraperRequest::ScreenScraperConfig ssConfig;
// Limit the number of results per platform, not in total.
// Otherwise if the first platform returns >= 7 games
// but the second platform contains the relevant game,
// the relevant result would not be shown.
for (int i {0}; game && i < MAX_SCRAPER_RESULTS; ++i) {
std::string id {game.child("id").text().get()};
std::string name {game.child("nom").text().get()};
std::string platformId {game.child("systemeid").text().get()};
std::string path {ssConfig.getGameSearchUrl(name) + "&systemeid=" + platformId +
"&gameid=" + id};
mRequestQueue->push(
std::unique_ptr<ScraperRequest>(new ScreenScraperRequest(results, path)));
game = game.next_sibling("jeu");
}
}
std::string ScreenScraperRequest::ScreenScraperConfig::getGameSearchUrl(
const std::string gameName) const
{
std::string searchName {gameName}; std::string searchName {gameName};
bool singleSearch {false}; bool singleSearch {false};
@ -811,6 +784,12 @@ std::string ScreenScraperRequest::ScreenScraperConfig::getGameSearchUrl(
.append("&output=xml") .append("&output=xml")
.append("&romnom=") .append("&romnom=")
.append(HttpReq::urlEncode(searchName)); .append(HttpReq::urlEncode(searchName));
if (md5Hash != "") {
screenScraperURL.append("&md5=")
.append(md5Hash)
.append("&romtaille=")
.append(std::to_string(fileSize));
}
} }
else { else {
screenScraperURL.append(API_URL_BASE) screenScraperURL.append(API_URL_BASE)

View file

@ -41,7 +41,9 @@ public:
// Settings for the scraper. // Settings for the scraper.
static const struct ScreenScraperConfig { static const struct ScreenScraperConfig {
std::string getGameSearchUrl(const std::string gameName) const; std::string getGameSearchUrl(const std::string& gameName,
const std::string& md5Hash,
const long fileSize) const;
// Access to the API. // Access to the API.
const std::string API_DEV_U = {15, 21, 39, 22, 42, 40}; const std::string API_DEV_U = {15, 21, 39, 22, 42, 40};
@ -119,7 +121,6 @@ protected:
void process(const std::unique_ptr<HttpReq>& req, void process(const std::unique_ptr<HttpReq>& req,
std::vector<ScraperSearchResult>& results) override; std::vector<ScraperSearchResult>& results) override;
void processList(const pugi::xml_document& xmldoc, std::vector<ScraperSearchResult>& results);
void processGame(const pugi::xml_document& xmldoc, std::vector<ScraperSearchResult>& results); void processGame(const pugi::xml_document& xmldoc, std::vector<ScraperSearchResult>& results);
bool processMedia(ScraperSearchResult& result, bool processMedia(ScraperSearchResult& result,
const pugi::xml_node& media_list, const pugi::xml_node& media_list,

View file

@ -142,8 +142,10 @@ void Settings::setDefaults()
mStringMap["ScraperLanguage"] = {"en", "en"}; mStringMap["ScraperLanguage"] = {"en", "en"};
mIntMap["ScraperRetryOnErrorCount"] = {3, 3}; mIntMap["ScraperRetryOnErrorCount"] = {3, 3};
mIntMap["ScraperRetryOnErrorTimer"] = {3, 3}; mIntMap["ScraperRetryOnErrorTimer"] = {3, 3};
mIntMap["ScraperSearchFileHashMaxSize"] = {128, 128};
mBoolMap["ScraperOverwriteData"] = {true, true}; mBoolMap["ScraperOverwriteData"] = {true, true};
mBoolMap["ScraperHaltOnInvalidMedia"] = {true, true}; mBoolMap["ScraperHaltOnInvalidMedia"] = {true, true};
mBoolMap["ScraperSearchFileHash"] = {true, true};
mBoolMap["ScraperSearchMetadataName"] = {true, true}; mBoolMap["ScraperSearchMetadataName"] = {true, true};
mBoolMap["ScraperIncludeFolders"] = {true, true}; mBoolMap["ScraperIncludeFolders"] = {true, true};
mBoolMap["ScraperInteractive"] = {false, false}; mBoolMap["ScraperInteractive"] = {false, false};