Added MD5 file hash searching support to the scraper when using ScreenScraper

This commit is contained in:
Leon Styhre 2023-08-03 20:48:54 +02:00
parent 590591d0fb
commit 40d27ff772
7 changed files with 166 additions and 44 deletions

View file

@ -145,6 +145,12 @@ GuiScraperMenu::GuiScraperMenu(std::string title)
setSize(mMenu.getSize());
setPosition((Renderer::getScreenWidth() - mSize.x) / 2.0f, Renderer::getScreenHeight() * 0.13f);
// Make sure that the hash searching max file size is within the allowed range.
if (Settings::getInstance()->getInt("ScraperSearchFileHashMaxSize") < 32)
Settings::getInstance()->setInt("ScraperSearchFileHashMaxSize", 32);
else if (Settings::getInstance()->getInt("ScraperSearchFileHashMaxSize") > 800)
Settings::getInstance()->setInt("ScraperSearchFileHashMaxSize", 800);
}
GuiScraperMenu::~GuiScraperMenu()
@ -902,6 +908,33 @@ void GuiScraperMenu::openOtherOptions()
->setOpacity(DISABLED_OPACITY);
}
// Maximum file size for non-interactive mode file hash searching.
auto scraperSearchFileHashMaxSize =
std::make_shared<SliderComponent>(32.0f, 800.0f, 32.0f, "MiB");
scraperSearchFileHashMaxSize->setValue(
static_cast<float>(Settings::getInstance()->getInt("ScraperSearchFileHashMaxSize")));
s->addWithLabel("HASH SEARCHES MAX FILE SIZE", scraperSearchFileHashMaxSize);
s->addSaveFunc([scraperSearchFileHashMaxSize, s] {
if (scraperSearchFileHashMaxSize->getValue() !=
static_cast<float>(Settings::getInstance()->getInt("ScraperSearchFileHashMaxSize"))) {
Settings::getInstance()->setInt(
"ScraperSearchFileHashMaxSize",
static_cast<int>(scraperSearchFileHashMaxSize->getValue()));
s->setNeedsSaving();
}
});
// File hash searching is not supported by TheGamesDB, so gray out the option if this scraper
// is selected. Also gray it out for ScreenScraper if file hash searching has been disabled.
if (Settings::getInstance()->getString("Scraper") == "thegamesdb" ||
!Settings::getInstance()->getBool("ScraperSearchFileHash")) {
scraperSearchFileHashMaxSize->setEnabled(false);
scraperSearchFileHashMaxSize->setOpacity(DISABLED_OPACITY);
scraperSearchFileHashMaxSize->getParent()
->getChild(scraperSearchFileHashMaxSize->getChildIndex() - 1)
->setOpacity(DISABLED_OPACITY);
}
// Overwrite files and data.
auto scraperOverwriteData = std::make_shared<SwitchComponent>();
scraperOverwriteData->setState(Settings::getInstance()->getBool("ScraperOverwriteData"));
@ -929,6 +962,29 @@ void GuiScraperMenu::openOtherOptions()
}
});
// Search using file hashes for non-interactive mode.
auto scraperSearchFileHash = std::make_shared<SwitchComponent>();
scraperSearchFileHash->setState(Settings::getInstance()->getBool("ScraperSearchFileHash"));
s->addWithLabel("SEARCH USING FILE HASHES (NON-INTERACTIVE MODE)", scraperSearchFileHash);
s->addSaveFunc([scraperSearchFileHash, s] {
if (scraperSearchFileHash->getState() !=
Settings::getInstance()->getBool("ScraperSearchFileHash")) {
Settings::getInstance()->setBool("ScraperSearchFileHash",
scraperSearchFileHash->getState());
s->setNeedsSaving();
}
});
// File hash searching is not supported by TheGamesDB, so gray out the option if this scraper
// is selected.
if (Settings::getInstance()->getString("Scraper") == "thegamesdb") {
scraperSearchFileHash->setEnabled(false);
scraperSearchFileHash->setOpacity(DISABLED_OPACITY);
scraperSearchFileHash->getParent()
->getChild(scraperSearchFileHash->getChildIndex() - 1)
->setOpacity(DISABLED_OPACITY);
}
// Search using metadata names.
auto scraperSearchMetadataName = std::make_shared<SwitchComponent>();
scraperSearchMetadataName->setState(
@ -1105,6 +1161,23 @@ void GuiScraperMenu::openOtherOptions()
};
// Switch callbacks.
auto hashSearchToggleFunc = [scraperSearchFileHashMaxSize]() {
if (scraperSearchFileHashMaxSize->getEnabled()) {
scraperSearchFileHashMaxSize->setEnabled(false);
scraperSearchFileHashMaxSize->setOpacity(DISABLED_OPACITY);
scraperSearchFileHashMaxSize->getParent()
->getChild(scraperSearchFileHashMaxSize->getChildIndex() - 1)
->setOpacity(DISABLED_OPACITY);
}
else {
scraperSearchFileHashMaxSize->setEnabled(true);
scraperSearchFileHashMaxSize->setOpacity(1.0f);
scraperSearchFileHashMaxSize->getParent()
->getChild(scraperSearchFileHashMaxSize->getChildIndex() - 1)
->setOpacity(1.0f);
}
};
auto interactiveToggleFunc = [scraperSemiautomatic]() {
if (scraperSemiautomatic->getEnabled()) {
scraperSemiautomatic->setEnabled(false);
@ -1140,6 +1213,7 @@ void GuiScraperMenu::openOtherOptions()
};
mScraperRetryOnErrorCount->setCallback(scraperRetryCountFunc);
scraperSearchFileHash->setCallback(hashSearchToggleFunc);
scraperInteractive->setCallback(interactiveToggleFunc);
scraperRespectExclusions->setCallback(excludeRecursivelyToggleFunc);

View file

@ -40,6 +40,8 @@ GuiScraperSearch::GuiScraperSearch(SearchType type, unsigned int scrapeCount, in
, mSearchType {type}
, mRowCount {rowCount}
, mScrapeCount {scrapeCount}
, mNextSearch {false}
, mHashSearch {false}
, mRefinedSearch {false}
, mBlockAccept {false}
, mAcceptedResult {false}
@ -164,14 +166,17 @@ GuiScraperSearch::~GuiScraperSearch()
// scraping when the miximage was getting generated.
if (Settings::getInstance()->getBool("MiximageGenerate") &&
mMiximageGeneratorThread.joinable()) {
mScrapeResult.savedNewMedia = true;
// We always let the miximage generator thread complete.
mMiximageGeneratorThread.join();
mMiximageGenerator.reset();
mScrapeResult.savedNewMedia = true;
TextureResource::manualUnload(mLastSearch.game->getMiximagePath(), false);
ViewController::getInstance()->onFileChanged(mLastSearch.game, true);
}
if (mCalculateMD5HashThread.joinable())
mCalculateMD5HashThread.join();
mWindow->setAllowTextScrolling(false);
}
@ -335,6 +340,7 @@ void GuiScraperSearch::updateView()
void GuiScraperSearch::search(ScraperSearchParams& params)
{
mHashSearch = false;
mBlockAccept = true;
mAcceptedResult = false;
mMiximageResult = false;
@ -358,8 +364,30 @@ void GuiScraperSearch::search(ScraperSearchParams& params)
else
params.automaticMode = false;
params.md5Hash = "";
if (!Utils::FileSystem::isDirectory(params.game->getPath()))
params.fileSize = Utils::FileSystem::getFileSize(params.game->getPath());
// Only use MD5 file hash searching when in non-interactive mode.
if (mSearchType == ALWAYS_ACCEPT_FIRST_RESULT &&
Settings::getInstance()->getBool("ScraperSearchFileHash") &&
Settings::getInstance()->getString("Scraper") == "screenscraper" && params.fileSize != 0 &&
params.fileSize <=
Settings::getInstance()->getInt("ScraperSearchFileHashMaxSize") * 1024 * 1024) {
// Run the MD5 hash calculation in a separate thread as it may take a long time to
// complete and we don't want to freeze the UI in the meanwhile.
std::promise<bool>().swap(mMD5HashPromise);
mMD5HashFuture = mMD5HashPromise.get_future();
mHashSearch = true;
mCalculateMD5HashThread =
std::thread(&GuiScraperSearch::calculateMD5Hash, this, params.game->getPath());
}
mLastSearch = params;
mSearchHandle = startScraperSearch(params);
mSearchHandle = nullptr;
mNextSearch = true;
}
void GuiScraperSearch::stop()
@ -691,6 +719,27 @@ void GuiScraperSearch::returnResult(ScraperSearchResult result)
void GuiScraperSearch::update(int deltaTime)
{
// The only purpose of calling startScraperSearch() here instead of in search() is because
// the optional MD5 hash calculation needs to run in a separate thread to not lock the UI.
if (mNextSearch && mHashSearch) {
if (mMD5HashFuture.valid()) {
// Only wait one millisecond as this update() function runs very frequently.
if (mMD5HashFuture.wait_for(std::chrono::milliseconds(1)) ==
std::future_status::ready) {
if (mCalculateMD5HashThread.joinable())
mCalculateMD5HashThread.join();
mLastSearch.md5Hash = mMD5Hash;
mSearchHandle = startScraperSearch(mLastSearch);
mMD5Hash = "";
mNextSearch = false;
}
}
}
else if (mNextSearch) {
mSearchHandle = startScraperSearch(mLastSearch);
mNextSearch = false;
}
GuiComponent::update(deltaTime);
if (mBlockAccept)
@ -789,6 +838,7 @@ void GuiScraperSearch::update(int deltaTime)
if (mGeneratorFuture.wait_for(std::chrono::milliseconds(1)) == std::future_status::ready) {
mMDResolveHandle.reset();
// We always let the miximage generator thread complete.
if (mMiximageGeneratorThread.joinable())
mMiximageGeneratorThread.join();
if (!mGeneratorFuture.get())
mScrapeResult.savedNewMedia = true;

View file

@ -114,6 +114,12 @@ private:
int getSelectedIndex();
void calculateMD5Hash(std::string path)
{
mMD5Hash = Utils::Math::md5Hash(path, true);
mMD5HashPromise.set_value(true);
}
// For TheGamesDB, retrieve URLs for the additional metadata assets
// that need to be downloaded.
void retrieveMediaURLs(ScraperSearchResult result);
@ -166,6 +172,8 @@ private:
std::function<void()> mRefineCallback;
int mRowCount;
unsigned int mScrapeCount;
bool mNextSearch;
bool mHashSearch;
bool mRefinedSearch;
bool mBlockAccept;
bool mAcceptedResult;
@ -183,6 +191,11 @@ private:
std::vector<ScraperSearchResult> mScraperResults;
std::map<std::string, std::unique_ptr<HttpReq>> mThumbnailReqMap;
std::string mMD5Hash;
std::thread mCalculateMD5HashThread;
std::promise<bool> mMD5HashPromise;
std::future<bool> mMD5HashFuture;
std::unique_ptr<MiximageGenerator> mMiximageGenerator;
std::thread mMiximageGeneratorThread;
std::promise<bool> mGeneratorPromise;

View file

@ -36,12 +36,15 @@ enum downloadStatus {
struct ScraperSearchParams {
SystemData* system;
FileData* game;
std::string md5Hash;
long fileSize;
std::string nameOverride;
bool automaticMode;
ScraperSearchParams()
: automaticMode {false}
: fileSize {0}
, automaticMode {false}
{
}
};

View file

@ -194,7 +194,8 @@ void screenscraper_generate_scraper_requests(const ScraperSearchParams& params,
if (params.nameOverride == "") {
if (Settings::getInstance()->getBool("ScraperSearchMetadataName")) {
path = ssConfig.getGameSearchUrl(
Utils::String::removeParenthesis(params.game->metadata.get("name")));
Utils::String::removeParenthesis(params.game->metadata.get("name")), params.md5Hash,
params.fileSize);
}
else {
std::string cleanName;
@ -208,11 +209,11 @@ void screenscraper_generate_scraper_requests(const ScraperSearchParams& params,
cleanName = params.game->getCleanName();
}
path = ssConfig.getGameSearchUrl(cleanName);
path = ssConfig.getGameSearchUrl(cleanName, params.md5Hash, params.fileSize);
}
}
else {
path = ssConfig.getGameSearchUrl(params.nameOverride);
path = ssConfig.getGameSearchUrl(params.nameOverride, params.md5Hash, params.fileSize);
}
auto& platforms = params.system->getPlatformIds();
@ -697,44 +698,16 @@ bool ScreenScraperRequest::processMedia(ScraperSearchResult& result,
return regionFallback;
}
// Currently not used in this module.
void ScreenScraperRequest::processList(const pugi::xml_document& xmldoc,
std::vector<ScraperSearchResult>& results)
std::string ScreenScraperRequest::ScreenScraperConfig::getGameSearchUrl(const std::string& gameName,
const std::string& md5Hash,
const long fileSize) const
{
assert(mRequestQueue != nullptr);
LOG(LogDebug) << "ScreenScraperRequest::processList(): Processing a list of results";
pugi::xml_node data {xmldoc.child("Data")};
pugi::xml_node game {data.child("jeu")};
if (!game) {
LOG(LogDebug) << "ScreenScraperRequest::processList(): Found nothing";
if (md5Hash != "") {
LOG(LogDebug)
<< "ScreenScraper::getGameSearchUrl(): Performing MD5 file hash search using digest \""
<< md5Hash << "\"";
}
ScreenScraperRequest::ScreenScraperConfig ssConfig;
// Limit the number of results per platform, not in total.
// Otherwise if the first platform returns >= 7 games
// but the second platform contains the relevant game,
// the relevant result would not be shown.
for (int i {0}; game && i < MAX_SCRAPER_RESULTS; ++i) {
std::string id {game.child("id").text().get()};
std::string name {game.child("nom").text().get()};
std::string platformId {game.child("systemeid").text().get()};
std::string path {ssConfig.getGameSearchUrl(name) + "&systemeid=" + platformId +
"&gameid=" + id};
mRequestQueue->push(
std::unique_ptr<ScraperRequest>(new ScreenScraperRequest(results, path)));
game = game.next_sibling("jeu");
}
}
std::string ScreenScraperRequest::ScreenScraperConfig::getGameSearchUrl(
const std::string gameName) const
{
std::string searchName {gameName};
bool singleSearch {false};
@ -811,6 +784,12 @@ std::string ScreenScraperRequest::ScreenScraperConfig::getGameSearchUrl(
.append("&output=xml")
.append("&romnom=")
.append(HttpReq::urlEncode(searchName));
if (md5Hash != "") {
screenScraperURL.append("&md5=")
.append(md5Hash)
.append("&romtaille=")
.append(std::to_string(fileSize));
}
}
else {
screenScraperURL.append(API_URL_BASE)

View file

@ -41,7 +41,9 @@ public:
// Settings for the scraper.
static const struct ScreenScraperConfig {
std::string getGameSearchUrl(const std::string gameName) const;
std::string getGameSearchUrl(const std::string& gameName,
const std::string& md5Hash,
const long fileSize) const;
// Access to the API.
const std::string API_DEV_U = {15, 21, 39, 22, 42, 40};
@ -119,7 +121,6 @@ protected:
void process(const std::unique_ptr<HttpReq>& req,
std::vector<ScraperSearchResult>& results) override;
void processList(const pugi::xml_document& xmldoc, std::vector<ScraperSearchResult>& results);
void processGame(const pugi::xml_document& xmldoc, std::vector<ScraperSearchResult>& results);
bool processMedia(ScraperSearchResult& result,
const pugi::xml_node& media_list,

View file

@ -142,8 +142,10 @@ void Settings::setDefaults()
mStringMap["ScraperLanguage"] = {"en", "en"};
mIntMap["ScraperRetryOnErrorCount"] = {3, 3};
mIntMap["ScraperRetryOnErrorTimer"] = {3, 3};
mIntMap["ScraperSearchFileHashMaxSize"] = {128, 128};
mBoolMap["ScraperOverwriteData"] = {true, true};
mBoolMap["ScraperHaltOnInvalidMedia"] = {true, true};
mBoolMap["ScraperSearchFileHash"] = {true, true};
mBoolMap["ScraperSearchMetadataName"] = {true, true};
mBoolMap["ScraperIncludeFolders"] = {true, true};
mBoolMap["ScraperInteractive"] = {false, false};