mirror of
https://github.com/RetroDECK/ES-DE.git
synced 2024-11-25 23:55:38 +00:00
Added conversion of some additional HTML character codes when scraping using ScreenScraper
This commit is contained in:
parent
87ed152c5a
commit
6e863a82ec
|
@ -1104,11 +1104,6 @@ bool GuiScraperSearch::saveMetadata(const ScraperSearchResult& result,
|
|||
metadata.set(key, result.mdl.get(key));
|
||||
metadataUpdated = true;
|
||||
}
|
||||
|
||||
// For the description, expand any escaped HTML quotation marks to literal
|
||||
// quotation marks.
|
||||
if (key == "desc" && metadataUpdated)
|
||||
metadata.set(key, Utils::String::replace(metadata.get(key), """, "\""));
|
||||
}
|
||||
|
||||
return metadataUpdated;
|
||||
|
|
|
@ -382,12 +382,15 @@ void ScreenScraperRequest::processGame(const pugi::xml_document& xmldoc,
|
|||
{region, "wor", "us", "ss", "eu", "jp"})
|
||||
.text()
|
||||
.get()};
|
||||
// Game names sometimes contain ampersands encoded as hex codes.
|
||||
|
||||
// Translate some HTML character codes to UTF-8 characters for the game name.
|
||||
gameName = Utils::String::replace(gameName, " ", " ");
|
||||
gameName = Utils::String::replace(gameName, "&", "&");
|
||||
|
||||
// In some very rare cases game names contain newline characters that we need to remove.
|
||||
result.mdl.set("name", Utils::String::replace(gameName, "\n", ""));
|
||||
LOG(LogDebug) << "ScreenScraperRequest::processGame(): Name: " << result.mdl.get("name");
|
||||
|
||||
LOG(LogDebug) << "ScreenScraperRequest::processGame(): Name: " << result.mdl.get("name");
|
||||
LOG(LogDebug) << "ScreenScraperRequest::processGame(): Game ID: " << result.gameID;
|
||||
|
||||
pugi::xml_node system {game.child("systeme")};
|
||||
|
@ -431,10 +434,15 @@ void ScreenScraperRequest::processGame(const pugi::xml_document& xmldoc,
|
|||
.text()
|
||||
.get()};
|
||||
|
||||
// Translate some HTML character codes to UTF-8 characters.
|
||||
// Translate some HTML character codes to UTF-8 characters for the description.
|
||||
// This does not capture all such characters in the ScreenScraper database but these
|
||||
// are the most common ones.
|
||||
if (!description.empty()) {
|
||||
result.mdl.set("desc", Utils::String::replace(description, " ", " "));
|
||||
result.mdl.set("desc", Utils::String::replace(description, "©", "©"));
|
||||
description = Utils::String::replace(description, " ", " ");
|
||||
description = Utils::String::replace(description, """, "\"");
|
||||
description = Utils::String::replace(description, "©", "©");
|
||||
description = Utils::String::replace(description, "'", "'");
|
||||
result.mdl.set("desc", description);
|
||||
}
|
||||
|
||||
// Get the date proper. The API returns multiple 'date' children nodes to the 'dates'
|
||||
|
|
Loading…
Reference in a new issue