Duckstation/dep/reshadefx/src/effect_preprocessor.cpp

1317 lines
36 KiB
C++
Raw Normal View History

2023-08-13 04:03:17 +00:00
/*
* Copyright (C) 2014 Patrick Mours
* SPDX-License-Identifier: BSD-3-Clause
*/
#include "effect_lexer.hpp"
#include "effect_preprocessor.hpp"
#include <cassert>
#include <fstream>
#include <algorithm> // std::find_if
#ifndef _WIN32
// On Linux systems the native path encoding is UTF-8 already, so no conversion necessary
#define u8path(p) path(p)
#define u8string() string()
#endif
enum op_type
{
op_none = -1,
op_or,
op_and,
op_bitor,
op_bitxor,
op_bitand,
op_not_equal,
op_equal,
op_less,
op_greater,
op_less_equal,
op_greater_equal,
op_leftshift,
op_rightshift,
op_add,
op_subtract,
op_modulo,
op_divide,
op_multiply,
op_plus,
op_negate,
op_not,
op_bitnot,
op_parentheses
};
enum macro_replacement
{
macro_replacement_start = '\x00',
macro_replacement_argument = '\xFD',
macro_replacement_concat = '\xFF',
macro_replacement_stringize = '\xFE',
};
static const int precedence_lookup[] = {
0, 1, 2, 3, 4, // bitwise operators
5, 6, 7, 7, 7, 7, // logical operators
8, 8, // left shift, right shift
9, 9, // add, subtract
10, 10, 10, // modulo, divide, multiply
11, 11, 11, 11 // unary operators
};
static bool read_file(const std::string &path, std::string &data, reshadefx::preprocessor::include_read_file_callback &cb)
2023-08-13 04:03:17 +00:00
{
std::string file_data;
if (!cb(path, file_data))
2023-08-13 04:03:17 +00:00
return false;
// Append a new line feed to the end of the input string to avoid issues with parsing
file_data.push_back('\n');
2023-08-13 04:03:17 +00:00
// Remove BOM (0xefbbbf means 0xfeff)
if (file_data.size() >= 3 &&
static_cast<unsigned char>(file_data[0]) == 0xef &&
static_cast<unsigned char>(file_data[1]) == 0xbb &&
static_cast<unsigned char>(file_data[2]) == 0xbf)
file_data.erase(0, 3);
data = std::move(file_data);
return true;
}
bool reshadefx::preprocessor::stdfs_read_file_callback(const std::string &path, std::string &data)
{
std::ifstream file(std::filesystem::path(path), std::ios::binary);
if (!file)
return false;
// Read file contents into memory
std::error_code ec;
const uintmax_t file_size = std::filesystem::file_size(path, ec);
if (ec)
return false;
data.reserve(file_size + 1);
data.resize(static_cast<size_t>(file_size), '\0');
if (!file.read(data.data(), file_size))
return false;
// No longer need to have a handle open to the file, since all data was read, so can safely close it
file.close();
return true;
}
bool reshadefx::preprocessor::stdfs_file_exists_callback(const std::string &path)
{
return std::filesystem::exists(std::filesystem::path(path));
}
2023-08-13 04:03:17 +00:00
template <char ESCAPE_CHAR = '\\'>
static std::string escape_string(std::string s)
{
for (size_t offset = 0; (offset = s.find(ESCAPE_CHAR, offset)) != std::string::npos; offset += 2)
s.insert(offset, "\\", 1);
return '\"' + s + '\"';
}
reshadefx::preprocessor::preprocessor()
: _file_exists_cb(stdfs_file_exists_callback)
, _read_file_cb(stdfs_read_file_callback)
2023-08-13 04:03:17 +00:00
{
}
reshadefx::preprocessor::~preprocessor()
{
}
void reshadefx::preprocessor::set_include_callbacks(include_file_exists_callback file_exists,
include_read_file_callback read_file)
{
_file_exists_cb = file_exists;
_read_file_cb = read_file;
}
void reshadefx::preprocessor::add_include_path(const std::string &path)
2023-08-13 04:03:17 +00:00
{
assert(!path.empty());
_include_paths.push_back(std::filesystem::path(path));
2023-08-13 04:03:17 +00:00
}
bool reshadefx::preprocessor::add_macro_definition(const std::string &name, const macro &macro)
{
assert(!name.empty());
return _macros.emplace(name, macro).second;
}
bool reshadefx::preprocessor::append_file(const std::string &path)
2023-08-13 04:03:17 +00:00
{
std::string source_code;
if (!read_file(path, source_code, _read_file_cb))
2023-08-13 04:03:17 +00:00
return false;
return append_string(std::move(source_code), path);
}
bool reshadefx::preprocessor::append_string(std::string source_code, const std::string &path /* = std::string() */)
2023-08-13 04:03:17 +00:00
{
// Enforce all input strings to end with a line feed
assert(!source_code.empty() && source_code.back() == '\n');
_success = true; // Clear success flag before parsing a new string
// Give this push a name, so that lexer location starts at a new line
// This is necessary in case this string starts with a preprocessor directive, since the lexer only reports those as such if they appear at the beginning of a new line
// But without a name, the lexer location is set to the last token location, which most likely will not be at the start of the line
push(std::move(source_code), path.empty() ? "unknown" : path);
2023-08-13 04:03:17 +00:00
parse();
return _success;
}
std::vector<std::filesystem::path> reshadefx::preprocessor::included_files() const
{
std::vector<std::filesystem::path> files;
files.reserve(_file_cache.size());
for (const auto &it : _file_cache)
files.push_back(std::filesystem::u8path(it.first));
return files;
}
std::vector<std::pair<std::string, std::string>> reshadefx::preprocessor::used_macro_definitions() const
{
std::vector<std::pair<std::string, std::string>> defines;
defines.reserve(_used_macros.size());
for (const std::string &name : _used_macros)
if (const auto it = _macros.find(name);
// Do not include function-like macros, since they are more likely to contain a complex replacement list
it != _macros.end() && !it->second.is_function_like)
defines.emplace_back(name, it->second.replacement_list);
return defines;
}
void reshadefx::preprocessor::error(const location &location, const std::string &message)
{
_errors += location.source + '(' + std::to_string(location.line) + ", " + std::to_string(location.column) + ')' + ": preprocessor error: " + message + '\n';
_success = false; // Unset success flag
}
void reshadefx::preprocessor::warning(const location &location, const std::string &message)
{
_errors += location.source + '(' + std::to_string(location.line) + ", " + std::to_string(location.column) + ')' + ": preprocessor warning: " + message + '\n';
}
void reshadefx::preprocessor::push(std::string input, const std::string &name)
{
location start_location = !name.empty() ?
// Start at the beginning of the file when pushing a new file
location(name, 1) :
// Start with last known token location when pushing an unnamed string
_token.location;
input_level level = { name };
level.lexer.reset(new lexer(
std::move(input),
true /* ignore_comments */,
false /* ignore_whitespace */,
false /* ignore_pp_directives */,
false /* ignore_line_directives */,
true /* ignore_keywords */,
false /* escape_string_literals */,
start_location));
level.next_token.id = tokenid::unknown;
level.next_token.location = start_location; // This is used in 'consume' to initialize the output location
// Inherit hidden macros from parent
if (!_input_stack.empty())
level.hidden_macros = _input_stack.back().hidden_macros;
_input_stack.push_back(std::move(level));
_next_input_index = _input_stack.size() - 1;
// Advance into the input stack to update next token
consume();
}
bool reshadefx::preprocessor::peek(tokenid tokid) const
{
if (_input_stack.empty())
return tokid == tokenid::end_of_file;
return _input_stack[_next_input_index].next_token == tokid;
}
void reshadefx::preprocessor::consume()
{
_current_input_index = _next_input_index;
if (_input_stack.empty())
{
// End of input has been reached already (this can happen when the input text is not terminated with a new line)
assert(_current_input_index == 0);
return;
}
// Clear out input stack, now that the current token is overwritten
while (_input_stack.size() > (_current_input_index + 1))
_input_stack.pop_back();
// Update location information after switching input levels
input_level &input = _input_stack[_current_input_index];
if (!input.name.empty() && input.name != _output_location.source)
{
_output += "#line " + std::to_string(input.next_token.location.line) + " \"" + input.name + "\"\n";
// Line number is increased before checking against next token in 'tokenid::end_of_line' handling in 'parse' function below, so compensate for that here
_output_location.line = input.next_token.location.line - 1;
_output_location.source = input.name;
}
// Set current token
_token = std::move(input.next_token);
_current_token_raw_data = input.lexer->input_string().substr(_token.offset, _token.length);
// Get the next token
input.next_token = input.lexer->lex();
// Verify string literals (since the lexer cannot throw errors itself)
if (_token == tokenid::string_literal && _current_token_raw_data.back() != '\"')
error(_token.location, "unterminated string literal");
// Pop input level if lexical analysis has reached the end of it
// This ensures the EOF token is not consumed until the very last file
while (peek(tokenid::end_of_file))
{
// Remove any unterminated blocks from the stack
for (; !_if_stack.empty() && _if_stack.back().input_index >= _next_input_index; _if_stack.pop_back())
error(_if_stack.back().pp_token.location, "unterminated #if");
if (_next_input_index == 0)
{
// End of input has been reached, so cannot pop further and this is the last token
_input_stack.pop_back();
return;
}
else
{
_next_input_index -= 1;
}
}
}
void reshadefx::preprocessor::consume_until(tokenid tokid)
{
while (!accept(tokid) && !peek(tokenid::end_of_file))
{
consume();
}
}
bool reshadefx::preprocessor::accept(tokenid tokid, bool ignore_whitespace)
{
if (ignore_whitespace)
{
while (peek(tokenid::space))
{
consume();
}
}
if (peek(tokid))
{
consume();
return true;
}
return false;
}
bool reshadefx::preprocessor::expect(tokenid tokid)
{
if (!accept(tokid))
{
if (_input_stack.empty())
return tokid == tokenid::end_of_line || tokid == tokenid::end_of_file;
token actual_token = _input_stack[_next_input_index].next_token;
actual_token.location.source = _output_location.source;
if (actual_token == tokenid::end_of_line)
error(actual_token.location, "syntax error: unexpected new line");
else
error(actual_token.location, "syntax error: unexpected token '" +
_input_stack[_next_input_index].lexer->input_string().substr(actual_token.offset, actual_token.length) + '\'');
return false;
}
return true;
}
void reshadefx::preprocessor::parse()
{
std::string line;
// Consume all tokens in the input
while (!peek(tokenid::end_of_file))
{
consume();
_recursion_count = 0;
const bool skip = !_if_stack.empty() && _if_stack.back().skipping;
switch (_token)
{
case tokenid::hash_if:
parse_if();
if (!expect(tokenid::end_of_line))
consume_until(tokenid::end_of_line);
continue;
case tokenid::hash_ifdef:
parse_ifdef();
if (!expect(tokenid::end_of_line))
consume_until(tokenid::end_of_line);
continue;
case tokenid::hash_ifndef:
parse_ifndef();
if (!expect(tokenid::end_of_line))
consume_until(tokenid::end_of_line);
continue;
case tokenid::hash_else:
parse_else();
if (!expect(tokenid::end_of_line))
consume_until(tokenid::end_of_line);
continue;
case tokenid::hash_elif:
parse_elif();
if (!expect(tokenid::end_of_line))
consume_until(tokenid::end_of_line);
continue;
case tokenid::hash_endif:
parse_endif();
if (!expect(tokenid::end_of_line))
consume_until(tokenid::end_of_line);
continue;
default:
// All other tokens are handled below
break;
}
if (skip)
// Ignore token since the current section is disabled
continue;
switch (_token)
{
case tokenid::hash_def:
parse_def();
if (!expect(tokenid::end_of_line))
consume_until(tokenid::end_of_line);
continue;
case tokenid::hash_undef:
parse_undef();
if (!expect(tokenid::end_of_line))
consume_until(tokenid::end_of_line);
continue;
case tokenid::hash_error:
parse_error();
if (!expect(tokenid::end_of_line))
consume_until(tokenid::end_of_line);
continue;
case tokenid::hash_warning:
parse_warning();
if (!expect(tokenid::end_of_line))
consume_until(tokenid::end_of_line);
continue;
case tokenid::hash_pragma:
parse_pragma();
if (!expect(tokenid::end_of_line))
consume_until(tokenid::end_of_line);
continue;
case tokenid::hash_include:
parse_include();
continue;
case tokenid::hash_unknown:
// Standalone "#" is valid and should be ignored
if (_token.length != 0)
error(_token.location, "unrecognized preprocessing directive '" + _token.literal_as_string + '\'');
if (!expect(tokenid::end_of_line))
consume_until(tokenid::end_of_line);
continue;
case tokenid::end_of_line:
if (line.empty())
continue; // Do not append empty lines to output, instead emit "#line" statements
_output_location.line++;
if (_token.location.line != _output_location.line)
{
_output += "#line " + std::to_string(_token.location.line) + '\n';
_output_location.line = _token.location.line;
}
_output += line;
_output += '\n';
line.clear();
continue;
case tokenid::identifier:
if (evaluate_identifier_as_macro())
continue;
[[fallthrough]];
default:
line += _current_token_raw_data;
break;
}
}
// Append the last line after the EOF token was reached to the output
_output += line;
_output += '\n';
}
void reshadefx::preprocessor::parse_def()
{
if (!expect(tokenid::identifier))
return;
if (_token.literal_as_string == "defined")
return warning(_token.location, "macro name 'defined' is reserved");
macro m;
const location location = std::move(_token.location);
const std::string macro_name = std::move(_token.literal_as_string);
// Only create function-like macro if the parenthesis follows the macro name without any whitespace between
if (accept(tokenid::parenthesis_open, false))
{
m.is_function_like = true;
while (accept(tokenid::identifier))
{
m.parameters.push_back(_token.literal_as_string);
if (!accept(tokenid::comma))
break;
}
if (accept(tokenid::ellipsis))
m.is_variadic = true;
if (!expect(tokenid::parenthesis_close))
return;
}
create_macro_replacement_list(m);
if (!add_macro_definition(macro_name, m))
return error(location, "redefinition of '" + macro_name + "'");
}
void reshadefx::preprocessor::parse_undef()
{
if (!expect(tokenid::identifier))
return;
if (_token.literal_as_string == "defined")
return warning(_token.location, "macro name 'defined' is reserved");
_macros.erase(_token.literal_as_string);
}
void reshadefx::preprocessor::parse_if()
{
if_level level;
level.pp_token = _token;
level.input_index = _current_input_index;
// Evaluate expression after updating 'pp_token', so that it points at the beginning # token
level.value = evaluate_expression();
const bool parent_skipping = !_if_stack.empty() && _if_stack.back().skipping;
level.skipping = parent_skipping || !level.value;
_if_stack.push_back(std::move(level));
}
void reshadefx::preprocessor::parse_ifdef()
{
if_level level;
level.pp_token = _token;
level.input_index = _current_input_index;
if (!expect(tokenid::identifier))
return;
level.value = is_defined(_token.literal_as_string);
const bool parent_skipping = !_if_stack.empty() && _if_stack.back().skipping;
level.skipping = parent_skipping || !level.value;
_if_stack.push_back(std::move(level));
// Only add to used macro list if this #ifdef is active and the macro was not defined before
if (!parent_skipping)
if (const auto it = _macros.find(_token.literal_as_string); it == _macros.end() || it->second.is_predefined)
_used_macros.emplace(_token.literal_as_string);
}
void reshadefx::preprocessor::parse_ifndef()
{
if_level level;
level.pp_token = _token;
level.input_index = _current_input_index;
if (!expect(tokenid::identifier))
return;
level.value = !is_defined(_token.literal_as_string);
const bool parent_skipping = !_if_stack.empty() && _if_stack.back().skipping;
level.skipping = parent_skipping || !level.value;
_if_stack.push_back(std::move(level));
// Only add to used macro list if this #ifndef is active and the macro was not defined before
if (!parent_skipping)
if (const auto it = _macros.find(_token.literal_as_string); it == _macros.end() || it->second.is_predefined)
_used_macros.emplace(_token.literal_as_string);
}
void reshadefx::preprocessor::parse_elif()
{
if (_if_stack.empty())
return error(_token.location, "missing #if for #elif");
if_level &level = _if_stack.back();
if (level.pp_token == tokenid::hash_else)
return error(_token.location, "#elif is not allowed after #else");
// Update 'pp_token' before evaluating expression, so that it points at the beginning # token
level.pp_token = _token;
level.input_index = _current_input_index;
const bool parent_skipping = _if_stack.size() > 1 && _if_stack[_if_stack.size() - 2].skipping;
const bool condition_result = evaluate_expression();
level.skipping = parent_skipping || level.value || !condition_result;
if (!level.value) level.value = condition_result;
}
void reshadefx::preprocessor::parse_else()
{
if (_if_stack.empty())
return error(_token.location, "missing #if for #else");
if_level &level = _if_stack.back();
if (level.pp_token == tokenid::hash_else)
return error(_token.location, "#else is not allowed after #else");
level.pp_token = _token;
level.input_index = _current_input_index;
const bool parent_skipping = _if_stack.size() > 1 && _if_stack[_if_stack.size() - 2].skipping;
level.skipping = parent_skipping || level.value;
if (!level.value) level.value = true;
}
void reshadefx::preprocessor::parse_endif()
{
if (_if_stack.empty())
error(_token.location, "missing #if for #endif");
else
_if_stack.pop_back();
}
void reshadefx::preprocessor::parse_error()
{
const location keyword_location = std::move(_token.location);
if (!expect(tokenid::string_literal))
return;
error(keyword_location, _token.literal_as_string);
}
void reshadefx::preprocessor::parse_warning()
{
const location keyword_location = std::move(_token.location);
if (!expect(tokenid::string_literal))
return;
warning(keyword_location, _token.literal_as_string);
}
void reshadefx::preprocessor::parse_pragma()
{
const location keyword_location = std::move(_token.location);
if (!expect(tokenid::identifier))
return;
std::string pragma = std::move(_token.literal_as_string);
std::string pragma_args;
// Ignore whitespace preceding the argument list
accept(tokenid::space);
while (!peek(tokenid::end_of_line) && !peek(tokenid::end_of_file))
{
consume();
if (_token == tokenid::identifier && evaluate_identifier_as_macro())
continue;
// Collapse all whitespace down to a single space
if (_token == tokenid::space)
pragma_args += ' ';
else
pragma_args += _current_token_raw_data;
}
if (pragma == "once")
{
// Clear file contents, so that future include statements simply push an empty string instead of these file contents again
if (const auto it = _file_cache.find(_output_location.source); it != _file_cache.end())
it->second.clear();
return;
}
if (pragma == "warning" || pragma == "reshade")
{
_used_pragmas.emplace_back(std::move(pragma), std::move(pragma_args));
return;
}
warning(keyword_location, "unknown pragma ignored");
}
void reshadefx::preprocessor::parse_include()
{
const location keyword_location = std::move(_token.location);
while (accept(tokenid::identifier))
{
if (!evaluate_identifier_as_macro())
{
error(_token.location, "syntax error: unexpected identifier in #include");
consume_until(tokenid::end_of_line);
return;
}
}
if (!expect(tokenid::string_literal))
{
consume_until(tokenid::end_of_line);
return;
}
std::filesystem::path file_name = std::filesystem::u8path(_token.literal_as_string);
std::filesystem::path file_path = std::filesystem::u8path(_output_location.source);
file_path.replace_filename(file_name);
if (!_file_exists_cb(file_path.u8string()))
2023-08-13 04:03:17 +00:00
for (const std::filesystem::path &include_path : _include_paths)
if (_file_exists_cb((file_path = include_path / file_name).u8string()))
2023-08-13 04:03:17 +00:00
break;
const std::string file_path_string = file_path.u8string();
// Detect recursive include and abort to avoid infinite loop
if (std::find_if(_input_stack.begin(), _input_stack.end(),
[&file_path_string](const input_level &level) { return level.name == file_path_string; }) != _input_stack.end())
return error(_token.location, "recursive #include");
std::string input;
if (const auto it = _file_cache.find(file_path_string); it != _file_cache.end())
{
input = it->second;
}
else
{
if (!read_file(file_path_string, input, _read_file_cb))
2023-08-13 04:03:17 +00:00
return error(keyword_location, "could not open included file '" + file_name.u8string() + '\'');
_file_cache.emplace(file_path_string, input);
}
// Skip end of line character following the include statement before pushing, so that the line number is already pointing to the next line when popping out of it again
if (!expect(tokenid::end_of_line))
consume_until(tokenid::end_of_line);
// Clear out input stack before pushing include, so that hidden macros do not bleed into the include
while (_input_stack.size() > (_next_input_index + 1))
_input_stack.pop_back();
push(std::move(input), file_path_string);
}
bool reshadefx::preprocessor::evaluate_expression()
{
struct rpn_token
{
int value;
bool is_op;
};
size_t rpn_index = 0;
size_t stack_index = 0;
const size_t STACK_SIZE = 128;
rpn_token rpn[STACK_SIZE];
int stack[STACK_SIZE];
// Keep track of previous token to figure out data type of expression
tokenid previous_token = _token;
// Run shunting-yard algorithm
while (!peek(tokenid::end_of_line) && !peek(tokenid::end_of_file))
{
if (stack_index >= STACK_SIZE || rpn_index >= STACK_SIZE)
return error(_token.location, "expression evaluator ran out of stack space"), false;
consume();
auto op = op_none;
bool left_associative = true;
bool parenthesis_matched = false;
switch (_token)
{
case tokenid::space:
continue;
case tokenid::exclaim:
op = op_not;
left_associative = false;
break;
case tokenid::percent:
op = op_modulo;
break;
case tokenid::ampersand:
op = op_bitand;
break;
case tokenid::star:
op = op_multiply;
break;
case tokenid::plus:
left_associative =
previous_token == tokenid::int_literal ||
previous_token == tokenid::uint_literal ||
previous_token == tokenid::identifier ||
previous_token == tokenid::parenthesis_close;
op = left_associative ? op_add : op_plus;
break;
case tokenid::minus:
left_associative =
previous_token == tokenid::int_literal ||
previous_token == tokenid::uint_literal ||
previous_token == tokenid::identifier ||
previous_token == tokenid::parenthesis_close;
op = left_associative ? op_subtract : op_negate;
break;
case tokenid::slash:
op = op_divide;
break;
case tokenid::less:
op = op_less;
break;
case tokenid::greater:
op = op_greater;
break;
case tokenid::caret:
op = op_bitxor;
break;
case tokenid::pipe:
op = op_bitor;
break;
case tokenid::tilde:
op = op_bitnot;
left_associative = false;
break;
case tokenid::exclaim_equal:
op = op_not_equal;
break;
case tokenid::ampersand_ampersand:
op = op_and;
break;
case tokenid::less_less:
op = op_leftshift;
break;
case tokenid::less_equal:
op = op_less_equal;
break;
case tokenid::equal_equal:
op = op_equal;
break;
case tokenid::greater_greater:
op = op_rightshift;
break;
case tokenid::greater_equal:
op = op_greater_equal;
break;
case tokenid::pipe_pipe:
op = op_or;
break;
default:
// This is not an operator token
break;
}
switch (_token)
{
case tokenid::parenthesis_open:
stack[stack_index++] = op_parentheses;
break;
case tokenid::parenthesis_close:
parenthesis_matched = false;
while (stack_index > 0)
{
const int op2 = stack[--stack_index];
if (op2 == op_parentheses)
{
parenthesis_matched = true;
break;
}
rpn[rpn_index++] = { op2, true };
}
if (!parenthesis_matched)
return error(_token.location, "unmatched ')'"), false;
break;
case tokenid::identifier:
if (evaluate_identifier_as_macro())
continue;
if (_token.literal_as_string == "exists")
{
const bool has_parentheses = accept(tokenid::parenthesis_open);
while (accept(tokenid::identifier))
{
if (!evaluate_identifier_as_macro())
{
error(_token.location, "syntax error: unexpected identifier after 'exists'");
return false;
}
}
if (!expect(tokenid::string_literal))
return false;
std::filesystem::path file_name = std::filesystem::u8path(_token.literal_as_string);
std::filesystem::path file_path = std::filesystem::u8path(_output_location.source);
file_path.replace_filename(file_name);
if (has_parentheses && !expect(tokenid::parenthesis_close))
return false;
if (!_file_exists_cb(file_path.u8string()))
2023-08-13 04:03:17 +00:00
for (const std::filesystem::path &include_path : _include_paths)
if (_file_exists_cb((file_path = include_path / file_name).u8string()))
2023-08-13 04:03:17 +00:00
break;
rpn[rpn_index++] = { _file_exists_cb(file_path.u8string()) ? 1 : 0, false };
2023-08-13 04:03:17 +00:00
continue;
}
if (_token.literal_as_string == "defined")
{
const bool has_parentheses = accept(tokenid::parenthesis_open);
if (!expect(tokenid::identifier))
return false;
const std::string macro_name = std::move(_token.literal_as_string);
if (has_parentheses && !expect(tokenid::parenthesis_close))
return false;
rpn[rpn_index++] = { is_defined(macro_name) ? 1 : 0, false };
continue;
}
// An identifier that cannot be replaced with a number becomes zero
rpn[rpn_index++] = { 0, false };
break;
case tokenid::int_literal:
case tokenid::uint_literal:
rpn[rpn_index++] = { _token.literal_as_int, false };
break;
default:
if (op == op_none)
return error(_token.location, "invalid expression"), false;
while (stack_index > 0)
{
const int prev_op = stack[stack_index - 1];
if (prev_op == op_parentheses)
break;
if (left_associative ?
(precedence_lookup[op] > precedence_lookup[prev_op]) :
(precedence_lookup[op] >= precedence_lookup[prev_op]))
break;
stack_index--;
rpn[rpn_index++] = { prev_op, true };
}
stack[stack_index++] = op;
break;
}
previous_token = _token;
}
while (stack_index > 0)
{
const int op = stack[--stack_index];
if (op == op_parentheses)
return error(_token.location, "unmatched ')'"), false;
rpn[rpn_index++] = { op, true };
}
#define UNARY_OPERATION(op) { \
if (stack_index < 1) \
return error(_token.location, "invalid expression"), 0; \
stack[stack_index - 1] = op stack[stack_index - 1]; \
}
#define BINARY_OPERATION(op) { \
if (stack_index < 2) \
return error(_token.location, "invalid expression"), 0; \
stack[stack_index - 2] = stack[stack_index - 2] op stack[stack_index - 1]; \
stack_index--; \
}
// Evaluate reverse polish notation output
for (rpn_token *token = rpn; rpn_index--; token++)
{
if (token->is_op)
{
switch (token->value)
{
case op_or:
BINARY_OPERATION(||);
break;
case op_and:
BINARY_OPERATION(&&);
break;
case op_bitor:
BINARY_OPERATION(|);
break;
case op_bitxor:
BINARY_OPERATION(^);
break;
case op_bitand:
BINARY_OPERATION(&);
break;
case op_not_equal:
BINARY_OPERATION(!=);
break;
case op_equal:
BINARY_OPERATION(==);
break;
case op_less:
BINARY_OPERATION(<);
break;
case op_greater:
BINARY_OPERATION(>);
break;
case op_less_equal:
BINARY_OPERATION(<=);
break;
case op_greater_equal:
BINARY_OPERATION(>=);
break;
case op_leftshift:
BINARY_OPERATION(<<);
break;
case op_rightshift:
BINARY_OPERATION(>>);
break;
case op_add:
BINARY_OPERATION(+);
break;
case op_subtract:
BINARY_OPERATION(-);
break;
case op_modulo:
BINARY_OPERATION(%);
break;
case op_divide:
BINARY_OPERATION(/);
break;
case op_multiply:
BINARY_OPERATION(*);
break;
case op_plus:
UNARY_OPERATION(+);
break;
case op_negate:
UNARY_OPERATION(-);
break;
case op_not:
UNARY_OPERATION(!);
break;
case op_bitnot:
UNARY_OPERATION(~);
break;
}
}
else
{
stack[stack_index++] = token->value;
}
}
if (stack_index != 1)
return error(_token.location, "invalid expression"), false;
return stack[0] != 0;
}
bool reshadefx::preprocessor::evaluate_identifier_as_macro()
{
if (_token.literal_as_string == "__LINE__")
{
push(std::to_string(_token.location.line));
return true;
}
if (_token.literal_as_string == "__FILE__")
{
push(escape_string(_token.location.source));
return true;
}
if (_token.literal_as_string == "__FILE_STEM__")
{
const std::filesystem::path file_stem = std::filesystem::u8path(_token.location.source).stem();
push(escape_string(file_stem.u8string()));
return true;
}
if (_token.literal_as_string == "__FILE_NAME__")
{
const std::filesystem::path file_name = std::filesystem::u8path(_token.location.source).filename();
push(escape_string(file_name.u8string()));
return true;
}
const auto it = _macros.find(_token.literal_as_string);
if (it == _macros.end())
return false;
if (!_input_stack.empty())
{
const std::unordered_set<std::string> &hidden_macros = _input_stack[_current_input_index].hidden_macros;
if (hidden_macros.find(_token.literal_as_string) != hidden_macros.end())
return false;
}
const location macro_location = _token.location;
if (_recursion_count++ >= 256)
return error(macro_location, "macro recursion too high"), false;
std::vector<std::string> arguments;
if (it->second.is_function_like)
{
if (!accept(tokenid::parenthesis_open))
return false; // Function like macro used without arguments, handle that like a normal identifier instead
while (true)
{
int parentheses_level = 0;
std::string argument;
// Ignore whitespace preceding the argument
accept(tokenid::space);
if (accept(tokenid::parenthesis_close))
break; // Special case for when there are no arguments
while (true)
{
if (peek(tokenid::end_of_file))
return error(macro_location, "unexpected end of file in macro expansion"), false;
// Consume all tokens of the argument
consume();
if (_token == tokenid::comma && parentheses_level == 0 && !(it->second.is_variadic && arguments.size() == it->second.parameters.size()))
break; // Comma marks end of an argument (unless this is the last argument in a variadic macro invocation)
if (_token == tokenid::parenthesis_open)
parentheses_level++;
if (_token == tokenid::parenthesis_close && --parentheses_level < 0)
break;
// Collapse all whitespace down to a single space
if (_token == tokenid::space)
argument += ' ';
else
argument += _current_token_raw_data;
}
// Trim whitespace following the argument
if (argument.size() && argument.back() == ' ')
argument.pop_back();
arguments.push_back(std::move(argument));
if (parentheses_level < 0)
break;
}
}
expand_macro(it->first, it->second, arguments);
return true;
}
bool reshadefx::preprocessor::is_defined(const std::string &name) const
{
return _macros.find(name) != _macros.end() ||
// Check built-in macros as well
name == "__LINE__" ||
name == "__FILE__" ||
name == "__FILE_NAME__" ||
name == "__FILE_STEM__";
}
void reshadefx::preprocessor::expand_macro(const std::string &name, const macro &macro, const std::vector<std::string> &arguments)
{
if (macro.replacement_list.empty())
return;
// Verify argument count for function-like macros
if (arguments.size() < macro.parameters.size())
return warning(_token.location, "not enough arguments for function-like macro invocation '" + name + "'");
if (arguments.size() > macro.parameters.size() && !macro.is_variadic)
return warning(_token.location, "too many arguments for function-like macro invocation '" + name + "'");
std::string input;
input.reserve(macro.replacement_list.size());
for (size_t offset = 0; offset < macro.replacement_list.size(); ++offset)
{
if (macro.replacement_list[offset] != macro_replacement_start)
{
input += macro.replacement_list[offset];
continue;
}
// This is a special replacement sequence
const char type = macro.replacement_list[++offset];
const char index = macro.replacement_list[++offset];
if (static_cast<size_t>(index) >= arguments.size())
{
if (macro.is_variadic)
{
// The concatenation operator has a special meaning when placed between a comma and a variable argument, deleting the preceding comma
if (type == macro_replacement_concat && input.back() == ',')
input.pop_back();
if (type == macro_replacement_stringize)
input += "\"\"";
}
continue;
}
switch (type)
{
case macro_replacement_argument:
// Argument prescan
push(arguments[index] + static_cast<char>(macro_replacement_argument));
while (true)
{
// Consume all tokens of the argument (until the end marker is reached)
consume();
if (_token == tokenid::unknown) // 'macro_replacement_argument' is 'tokenid::unknown'
break;
if (_token == tokenid::identifier && evaluate_identifier_as_macro())
continue;
input += _current_token_raw_data;
}
assert(_current_token_raw_data[0] == macro_replacement_argument);
break;
case macro_replacement_concat:
input += arguments[index];
break;
case macro_replacement_stringize:
// Adds backslashes to escape quotes
input += escape_string<'\"'>(arguments[index]);
break;
}
}
push(std::move(input));
// Avoid expanding macros again that are referencing themselves
_input_stack[_current_input_index].hidden_macros.insert(name);
}
void reshadefx::preprocessor::create_macro_replacement_list(macro &macro)
{
// Since the number of parameters is encoded in the string, it may not exceed the available size of a char
if (macro.parameters.size() >= std::numeric_limits<unsigned char>::max())
return error(_token.location, "too many macro parameters");
// Ignore whitespace preceding the replacement list
accept(tokenid::space);
bool next_concat = false;
while (!peek(tokenid::end_of_line) && !peek(tokenid::end_of_file))
{
consume();
switch (_token)
{
case tokenid::hash:
if (accept(tokenid::hash, false))
{
if (macro.replacement_list.empty())
return error(_token.location, "## cannot appear at start of macro expansion");
if (peek(tokenid::end_of_line))
return error(_token.location, "## cannot appear at end of macro expansion");
// Remove any whitespace preceding or following the concatenation operator (so "a ## b" becomes "ab")
if (macro.replacement_list.back() == ' ')
macro.replacement_list.pop_back();
accept(tokenid::space);
// Disable macro expansion for any argument preceding or following the ## token concatenation operator
if (macro.replacement_list.size() > 2 && macro.replacement_list[macro.replacement_list.size() - 2] == macro_replacement_argument)
macro.replacement_list[macro.replacement_list.size() - 2] = macro_replacement_concat;
next_concat = true;
continue;
}
if (macro.is_function_like)
{
if (!expect(tokenid::identifier))
return;
const auto it = std::find(macro.parameters.begin(), macro.parameters.end(), _token.literal_as_string);
if (it == macro.parameters.end() && !(macro.is_variadic && _token.literal_as_string == "__VA_ARGS__"))
return error(_token.location, "# must be followed by parameter name");
// Start a # stringize operator
macro.replacement_list += macro_replacement_start;
macro.replacement_list += macro_replacement_stringize;
macro.replacement_list += static_cast<char>(std::distance(macro.parameters.begin(), it));
next_concat = false;
continue;
}
break;
case tokenid::space:
// Collapse all whitespace down to a single space
macro.replacement_list += ' ';
continue;
case tokenid::minus:
// Special case to handle things like "#define NUM -1\n -NUM", which would otherwise result in "--1", making parsing fail
if (macro.replacement_list.empty())
macro.replacement_list += ' ';
break;
case tokenid::identifier:
if (const auto it = std::find(macro.parameters.begin(), macro.parameters.end(), _token.literal_as_string);
it != macro.parameters.end() || (macro.is_variadic && _token.literal_as_string == "__VA_ARGS__"))
{
macro.replacement_list += macro_replacement_start;
macro.replacement_list += static_cast<char>(next_concat ? macro_replacement_concat : macro_replacement_argument);
macro.replacement_list += static_cast<char>(std::distance(macro.parameters.begin(), it));
next_concat = false;
continue;
}
break;
default:
// Token needs no special handling, raw data is added to macro below
break;
}
macro.replacement_list += _current_token_raw_data;
next_concat = false;
}
// Trim whitespace following the replacement list
if (macro.replacement_list.size() && macro.replacement_list.back() == ' ')
macro.replacement_list.pop_back();
}