2020-05-26 16:34:33 +00:00
|
|
|
//
|
2020-06-21 12:25:28 +00:00
|
|
|
// Scraper.h
|
2020-05-26 16:34:33 +00:00
|
|
|
//
|
2020-06-21 12:25:28 +00:00
|
|
|
// Main scraper logic.
|
|
|
|
// Called from GuiScraperSearch.
|
|
|
|
// Calls either GamesDBJSONScraper or ScreenScraper.
|
2020-05-26 16:34:33 +00:00
|
|
|
//
|
|
|
|
|
2014-06-25 16:29:58 +00:00
|
|
|
#pragma once
|
2017-10-31 17:12:50 +00:00
|
|
|
#ifndef ES_APP_SCRAPERS_SCRAPER_H
|
|
|
|
#define ES_APP_SCRAPERS_SCRAPER_H
|
2014-06-25 16:29:58 +00:00
|
|
|
|
|
|
|
#include "AsyncHandle.h"
|
2017-11-01 22:21:10 +00:00
|
|
|
#include "HttpReq.h"
|
|
|
|
#include "MetaData.h"
|
2018-01-29 22:50:10 +00:00
|
|
|
#include <functional>
|
|
|
|
#include <memory>
|
2014-06-25 16:29:58 +00:00
|
|
|
#include <queue>
|
2018-01-29 22:50:10 +00:00
|
|
|
#include <utility>
|
|
|
|
#include <assert.h>
|
2014-06-25 16:29:58 +00:00
|
|
|
|
|
|
|
#define MAX_SCRAPER_RESULTS 7
|
|
|
|
|
2017-11-01 22:21:10 +00:00
|
|
|
class FileData;
|
|
|
|
class SystemData;
|
|
|
|
|
2020-06-06 11:10:33 +00:00
|
|
|
enum eDownloadStatus {
|
2020-06-21 12:25:28 +00:00
|
|
|
NOT_STARTED,
|
|
|
|
IN_PROGRESS,
|
|
|
|
COMPLETED
|
2020-06-06 11:10:33 +00:00
|
|
|
};
|
|
|
|
|
2020-05-26 16:34:33 +00:00
|
|
|
struct ScraperSearchParams {
|
2020-06-21 12:25:28 +00:00
|
|
|
SystemData* system;
|
|
|
|
FileData* game;
|
2014-06-25 16:29:58 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
std::string nameOverride;
|
2014-06-25 16:29:58 +00:00
|
|
|
};
|
|
|
|
|
2020-05-26 16:34:33 +00:00
|
|
|
struct ScraperSearchResult {
|
2020-06-21 12:25:28 +00:00
|
|
|
ScraperSearchResult() : mdl(GAME_METADATA) {};
|
2014-06-25 16:29:58 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
MetaDataList mdl;
|
|
|
|
std::string gameID;
|
2020-06-06 11:10:33 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
// How many more objects the scraper service allows to be downloaded
|
|
|
|
// within a given time period.
|
|
|
|
unsigned int scraperRequestAllowance;
|
2020-06-06 11:10:33 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
enum eDownloadStatus mediaURLFetch = NOT_STARTED;
|
|
|
|
enum eDownloadStatus thumbnailDownloadStatus = NOT_STARTED;
|
|
|
|
enum eDownloadStatus mediaFilesDownloadStatus = NOT_STARTED;
|
2020-06-06 11:10:33 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
std::string ThumbnailImageData; // Thumbnail cache, will containe entire image.
|
|
|
|
std::string ThumbnailImageUrl;
|
2020-06-06 11:10:33 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
std::string box3dUrl;
|
|
|
|
std::string coverUrl;
|
|
|
|
std::string marqueeUrl;
|
|
|
|
std::string screenshotUrl;
|
2019-01-11 19:51:05 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
// Needed to pre-set the image type.
|
|
|
|
std::string box3dFormat;
|
|
|
|
std::string coverFormat;
|
|
|
|
std::string marqueeFormat;
|
|
|
|
std::string screenshotFormat;
|
2014-06-25 16:29:58 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
// So let me explain why I've abstracted this so heavily.
|
|
|
|
// There are two ways I can think of that you'd want to write a scraper.
|
|
|
|
|
2020-05-26 16:34:33 +00:00
|
|
|
// 1. Do some HTTP request(s) -> process it -> return the results.
|
|
|
|
// 2. Do some local filesystem queries (an offline scraper) -> return the results.
|
2014-06-25 16:29:58 +00:00
|
|
|
|
|
|
|
// The first way needs to be asynchronous while it's waiting for the HTTP request to return.
|
|
|
|
// The second doesn't.
|
|
|
|
|
|
|
|
// It would be nice if we could write it like this:
|
|
|
|
// search = generate_http_request(searchparams);
|
|
|
|
// wait_until_done(search);
|
|
|
|
// ... process search ...
|
|
|
|
// return results;
|
|
|
|
|
2020-05-26 16:34:33 +00:00
|
|
|
// We could do this if we used threads. Right now ES doesn't because I'm pretty sure I'll
|
|
|
|
// fuck it up, and I'm not sure of the performance of threads on the Pi (single-core ARM).
|
2019-08-25 15:23:02 +00:00
|
|
|
// We could also do this if we used coroutines.
|
|
|
|
// I can't find a really good cross-platform coroutine library (x86/64/ARM Linux + Windows),
|
2014-06-25 16:29:58 +00:00
|
|
|
// and I don't want to spend more time chasing libraries than just writing it the long way once.
|
|
|
|
|
|
|
|
// So, I did it the "long" way.
|
2020-05-26 16:34:33 +00:00
|
|
|
// ScraperSearchHandle - one logical search, e.g. "search for mario".
|
|
|
|
// ScraperRequest - encapsulates some sort of asynchronous request that will ultimately
|
|
|
|
// return some results.
|
|
|
|
// ScraperHttpRequest - implementation of ScraperRequest that waits on an HttpReq, then
|
|
|
|
// processes it with some processing function.
|
2014-06-25 16:29:58 +00:00
|
|
|
|
2020-05-26 16:34:33 +00:00
|
|
|
// A scraper search gathers results from (potentially multiple) ScraperRequests.
|
2014-06-25 16:29:58 +00:00
|
|
|
class ScraperRequest : public AsyncHandle
|
|
|
|
{
|
|
|
|
public:
|
2020-06-21 12:25:28 +00:00
|
|
|
ScraperRequest(std::vector<ScraperSearchResult>& resultsWrite);
|
2014-06-25 16:29:58 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
// Returns "true" once we're done.
|
|
|
|
virtual void update() = 0;
|
2019-08-25 15:23:02 +00:00
|
|
|
|
2014-06-25 16:29:58 +00:00
|
|
|
protected:
|
2020-06-21 12:25:28 +00:00
|
|
|
std::vector<ScraperSearchResult>& mResults;
|
2014-06-25 16:29:58 +00:00
|
|
|
};
|
|
|
|
|
2020-05-26 16:34:33 +00:00
|
|
|
// A single HTTP request that needs to be processed to get the results.
|
2014-06-25 16:29:58 +00:00
|
|
|
class ScraperHttpRequest : public ScraperRequest
|
|
|
|
{
|
|
|
|
public:
|
2020-06-21 12:25:28 +00:00
|
|
|
ScraperHttpRequest(std::vector<ScraperSearchResult>& resultsWrite, const std::string& url);
|
|
|
|
virtual void update() override;
|
2014-06-25 16:29:58 +00:00
|
|
|
|
|
|
|
protected:
|
2020-06-21 12:25:28 +00:00
|
|
|
virtual void process(const std::unique_ptr<HttpReq>& req,
|
|
|
|
std::vector<ScraperSearchResult>& results) = 0;
|
2014-06-25 16:29:58 +00:00
|
|
|
|
|
|
|
private:
|
2020-06-21 12:25:28 +00:00
|
|
|
std::unique_ptr<HttpReq> mReq;
|
2014-06-25 16:29:58 +00:00
|
|
|
};
|
|
|
|
|
2020-05-26 16:34:33 +00:00
|
|
|
// A request to get a list of results.
|
2014-06-25 16:29:58 +00:00
|
|
|
class ScraperSearchHandle : public AsyncHandle
|
|
|
|
{
|
|
|
|
public:
|
2020-06-21 12:25:28 +00:00
|
|
|
ScraperSearchHandle();
|
2014-06-25 16:29:58 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
void update();
|
|
|
|
inline const std::vector<ScraperSearchResult>& getResults() const {
|
|
|
|
assert(mStatus != ASYNC_IN_PROGRESS); return mResults; }
|
2014-06-25 16:29:58 +00:00
|
|
|
|
|
|
|
protected:
|
2020-06-21 12:25:28 +00:00
|
|
|
friend std::unique_ptr<ScraperSearchHandle>
|
|
|
|
startScraperSearch(const ScraperSearchParams& params);
|
2014-06-25 16:29:58 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
friend std::unique_ptr<ScraperSearchHandle>
|
|
|
|
startMediaURLsFetch(const std::string& gameIDs);
|
2020-06-06 11:10:33 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
std::queue< std::unique_ptr<ScraperRequest> > mRequestQueue;
|
|
|
|
std::vector<ScraperSearchResult> mResults;
|
2014-06-25 16:29:58 +00:00
|
|
|
};
|
|
|
|
|
2020-05-26 16:34:33 +00:00
|
|
|
// Will use the current scraper settings to pick the result source.
|
2014-06-25 16:29:58 +00:00
|
|
|
std::unique_ptr<ScraperSearchHandle> startScraperSearch(const ScraperSearchParams& params);
|
|
|
|
|
2020-06-06 11:10:33 +00:00
|
|
|
std::unique_ptr<ScraperSearchHandle> startMediaURLsFetch(const std::string& gameIDs);
|
|
|
|
|
2020-05-26 16:34:33 +00:00
|
|
|
// Returns a list of valid scraper names.
|
2014-06-25 16:29:58 +00:00
|
|
|
std::vector<std::string> getScraperList();
|
2019-01-24 18:00:19 +00:00
|
|
|
|
2020-05-26 16:34:33 +00:00
|
|
|
// Returns true if the scraper configured in the settings is still valid.
|
2019-01-24 18:00:19 +00:00
|
|
|
bool isValidConfiguredScraper();
|
2014-06-25 16:29:58 +00:00
|
|
|
|
2020-05-26 16:34:33 +00:00
|
|
|
typedef void (*generate_scraper_requests_func)(const ScraperSearchParams& params,
|
2020-06-21 12:25:28 +00:00
|
|
|
std::queue<std::unique_ptr<ScraperRequest>>& requests,
|
|
|
|
std::vector<ScraperSearchResult>& results);
|
2014-06-25 16:29:58 +00:00
|
|
|
|
|
|
|
// -------------------------------------------------------------------------
|
|
|
|
|
|
|
|
// Meta data asset downloading stuff.
|
|
|
|
class MDResolveHandle : public AsyncHandle
|
|
|
|
{
|
|
|
|
public:
|
2020-06-21 12:25:28 +00:00
|
|
|
MDResolveHandle(const ScraperSearchResult& result, const ScraperSearchParams& search);
|
2014-06-25 16:29:58 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
void update() override;
|
|
|
|
inline const ScraperSearchResult& getResult() const
|
|
|
|
{ assert(mStatus == ASYNC_DONE); return mResult; }
|
2014-06-25 16:29:58 +00:00
|
|
|
|
|
|
|
private:
|
2020-06-21 12:25:28 +00:00
|
|
|
ScraperSearchResult mResult;
|
2014-06-25 16:29:58 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
typedef std::pair<std::unique_ptr<AsyncHandle>, std::function<void()>> ResolvePair;
|
|
|
|
std::vector<ResolvePair> mFuncs;
|
2014-06-25 16:29:58 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
class ImageDownloadHandle : public AsyncHandle
|
|
|
|
{
|
|
|
|
public:
|
2020-06-21 12:25:28 +00:00
|
|
|
ImageDownloadHandle(
|
|
|
|
const std::string& url,
|
|
|
|
const std::string& path,
|
|
|
|
const std::string& existingMediaPath,
|
|
|
|
int maxWidth,
|
|
|
|
int maxHeight);
|
2014-06-25 16:29:58 +00:00
|
|
|
|
2020-06-21 12:25:28 +00:00
|
|
|
void update() override;
|
2014-06-25 16:29:58 +00:00
|
|
|
|
|
|
|
private:
|
2020-06-21 12:25:28 +00:00
|
|
|
std::unique_ptr<HttpReq> mReq;
|
|
|
|
std::string mSavePath;
|
|
|
|
std::string mExistingMediaFile;
|
|
|
|
int mMaxWidth;
|
|
|
|
int mMaxHeight;
|
2014-06-25 16:29:58 +00:00
|
|
|
};
|
|
|
|
|
2020-05-26 16:34:33 +00:00
|
|
|
// About the same as:
|
|
|
|
// "~/.emulationstation/downloaded_images/[system_name]/[game_name].[url's extension]".
|
|
|
|
// Will create the "downloaded_images" and "subdirectory" directories if they do not exist.
|
2020-06-06 11:10:33 +00:00
|
|
|
std::string getSaveAsPath(const ScraperSearchParams& params,
|
2020-06-21 12:25:28 +00:00
|
|
|
const std::string& filetypeSubdirectory, const std::string& url);
|
2014-06-25 16:29:58 +00:00
|
|
|
|
2020-05-26 16:34:33 +00:00
|
|
|
// Will resize according to Settings::getInt("ScraperResizeWidth") and
|
|
|
|
// Settings::getInt("ScraperResizeHeight").
|
|
|
|
std::unique_ptr<ImageDownloadHandle> downloadImageAsync(const std::string& url,
|
2020-06-21 12:25:28 +00:00
|
|
|
const std::string& saveAs, const std::string& existingMediaPath);
|
2014-06-25 16:29:58 +00:00
|
|
|
|
|
|
|
// Resolves all metadata assets that need to be downloaded.
|
2020-05-26 16:34:33 +00:00
|
|
|
std::unique_ptr<MDResolveHandle> resolveMetaDataAssets(const ScraperSearchResult& result,
|
2020-06-21 12:25:28 +00:00
|
|
|
const ScraperSearchParams& search);
|
2014-06-25 16:29:58 +00:00
|
|
|
|
2020-05-26 16:34:33 +00:00
|
|
|
// You can pass 0 for maxWidth or maxHeight to automatically keep the aspect ratio.
|
|
|
|
// It will overwrite the image at [path] with the new resized one.
|
|
|
|
// Returns true if successful, false otherwise.
|
2014-06-25 16:29:58 +00:00
|
|
|
bool resizeImage(const std::string& path, int maxWidth, int maxHeight);
|
2017-10-31 17:12:50 +00:00
|
|
|
|
|
|
|
#endif // ES_APP_SCRAPERS_SCRAPER_H
|