From 6e863a82ecdf472c224447d726d6d88dad44bc93 Mon Sep 17 00:00:00 2001 From: Leon Styhre Date: Sat, 9 Sep 2023 09:48:56 +0200 Subject: [PATCH] Added conversion of some additional HTML character codes when scraping using ScreenScraper --- es-app/src/guis/GuiScraperSearch.cpp | 5 ----- es-app/src/scrapers/ScreenScraper.cpp | 18 +++++++++++++----- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/es-app/src/guis/GuiScraperSearch.cpp b/es-app/src/guis/GuiScraperSearch.cpp index c30992c6e..52351a2bc 100644 --- a/es-app/src/guis/GuiScraperSearch.cpp +++ b/es-app/src/guis/GuiScraperSearch.cpp @@ -1104,11 +1104,6 @@ bool GuiScraperSearch::saveMetadata(const ScraperSearchResult& result, metadata.set(key, result.mdl.get(key)); metadataUpdated = true; } - - // For the description, expand any escaped HTML quotation marks to literal - // quotation marks. - if (key == "desc" && metadataUpdated) - metadata.set(key, Utils::String::replace(metadata.get(key), """, "\"")); } return metadataUpdated; diff --git a/es-app/src/scrapers/ScreenScraper.cpp b/es-app/src/scrapers/ScreenScraper.cpp index 2892832f6..51f8f4380 100644 --- a/es-app/src/scrapers/ScreenScraper.cpp +++ b/es-app/src/scrapers/ScreenScraper.cpp @@ -382,12 +382,15 @@ void ScreenScraperRequest::processGame(const pugi::xml_document& xmldoc, {region, "wor", "us", "ss", "eu", "jp"}) .text() .get()}; - // Game names sometimes contain ampersands encoded as hex codes. + + // Translate some HTML character codes to UTF-8 characters for the game name. + gameName = Utils::String::replace(gameName, " ", " "); gameName = Utils::String::replace(gameName, "&", "&"); + // In some very rare cases game names contain newline characters that we need to remove. result.mdl.set("name", Utils::String::replace(gameName, "\n", "")); - LOG(LogDebug) << "ScreenScraperRequest::processGame(): Name: " << result.mdl.get("name"); + LOG(LogDebug) << "ScreenScraperRequest::processGame(): Name: " << result.mdl.get("name"); LOG(LogDebug) << "ScreenScraperRequest::processGame(): Game ID: " << result.gameID; pugi::xml_node system {game.child("systeme")}; @@ -431,10 +434,15 @@ void ScreenScraperRequest::processGame(const pugi::xml_document& xmldoc, .text() .get()}; - // Translate some HTML character codes to UTF-8 characters. + // Translate some HTML character codes to UTF-8 characters for the description. + // This does not capture all such characters in the ScreenScraper database but these + // are the most common ones. if (!description.empty()) { - result.mdl.set("desc", Utils::String::replace(description, " ", " ")); - result.mdl.set("desc", Utils::String::replace(description, "©", "©")); + description = Utils::String::replace(description, " ", " "); + description = Utils::String::replace(description, """, "\""); + description = Utils::String::replace(description, "©", "©"); + description = Utils::String::replace(description, "'", "'"); + result.mdl.set("desc", description); } // Get the date proper. The API returns multiple 'date' children nodes to the 'dates'