Added conversion of some additional HTML character codes when scraping using ScreenScraper

This commit is contained in:
Leon Styhre 2023-09-09 09:48:56 +02:00
parent 87ed152c5a
commit 6e863a82ec
2 changed files with 13 additions and 10 deletions

View file

@ -1104,11 +1104,6 @@ bool GuiScraperSearch::saveMetadata(const ScraperSearchResult& result,
metadata.set(key, result.mdl.get(key));
metadataUpdated = true;
}
// For the description, expand any escaped HTML quotation marks to literal
// quotation marks.
if (key == "desc" && metadataUpdated)
metadata.set(key, Utils::String::replace(metadata.get(key), """, "\""));
}
return metadataUpdated;

View file

@ -382,12 +382,15 @@ void ScreenScraperRequest::processGame(const pugi::xml_document& xmldoc,
{region, "wor", "us", "ss", "eu", "jp"})
.text()
.get()};
// Game names sometimes contain ampersands encoded as hex codes.
// Translate some HTML character codes to UTF-8 characters for the game name.
gameName = Utils::String::replace(gameName, " ", " ");
gameName = Utils::String::replace(gameName, "&", "&");
// In some very rare cases game names contain newline characters that we need to remove.
result.mdl.set("name", Utils::String::replace(gameName, "\n", ""));
LOG(LogDebug) << "ScreenScraperRequest::processGame(): Name: " << result.mdl.get("name");
LOG(LogDebug) << "ScreenScraperRequest::processGame(): Name: " << result.mdl.get("name");
LOG(LogDebug) << "ScreenScraperRequest::processGame(): Game ID: " << result.gameID;
pugi::xml_node system {game.child("systeme")};
@ -431,10 +434,15 @@ void ScreenScraperRequest::processGame(const pugi::xml_document& xmldoc,
.text()
.get()};
// Translate some HTML character codes to UTF-8 characters.
// Translate some HTML character codes to UTF-8 characters for the description.
// This does not capture all such characters in the ScreenScraper database but these
// are the most common ones.
if (!description.empty()) {
result.mdl.set("desc", Utils::String::replace(description, "&nbsp;", " "));
result.mdl.set("desc", Utils::String::replace(description, "&copy;", "©"));
description = Utils::String::replace(description, "&nbsp;", " ");
description = Utils::String::replace(description, "&quot;", "\"");
description = Utils::String::replace(description, "&copy;", "©");
description = Utils::String::replace(description, "&#039;", "'");
result.mdl.set("desc", description);
}
// Get the date proper. The API returns multiple 'date' children nodes to the 'dates'