mirror of
https://github.com/WerWolv/ImHex.git
synced 2026-04-02 21:47:40 -05:00
patterns: Use standalone pattern language library instead of built-in one
This commit is contained in:
@@ -220,7 +220,7 @@ namespace hex {
|
||||
|
||||
namespace ContentRegistry::PatternLanguage {
|
||||
|
||||
static std::string getFunctionName(const Namespace &ns, const std::string &name) {
|
||||
static std::string getFunctionName(const pl::api::Namespace &ns, const std::string &name) {
|
||||
std::string functionName;
|
||||
for (auto &scope : ns)
|
||||
functionName += scope + "::";
|
||||
@@ -230,63 +230,67 @@ namespace hex {
|
||||
return functionName;
|
||||
}
|
||||
|
||||
void addFunction(const Namespace &ns, const std::string &name, ParameterCount parameterCount, const Callback &func) {
|
||||
std::unique_ptr<pl::PatternLanguage> createDefaultRuntime(prv::Provider *provider) {
|
||||
auto runtime = std::make_unique<pl::PatternLanguage>();
|
||||
|
||||
runtime->setDataSource([provider](u64 offset, u8 *buffer, size_t size) {
|
||||
provider->read(offset, buffer, size);
|
||||
}, 0, 0);
|
||||
|
||||
runtime->setIncludePaths(fs::getDefaultPaths(fs::ImHexPath::PatternsInclude));
|
||||
|
||||
for (const auto &func : getFunctions()) {
|
||||
if (func.dangerous)
|
||||
runtime->addDangerousFunction(func.ns, func.name, func.parameterCount, func.callback);
|
||||
else
|
||||
runtime->addFunction(func.ns, func.name, func.parameterCount, func.callback);
|
||||
}
|
||||
|
||||
for (const auto &[name, callback] : getPragmas()) {
|
||||
runtime->addPragma(name, callback);
|
||||
}
|
||||
|
||||
return runtime;
|
||||
}
|
||||
|
||||
void addPragma(const std::string &name, const pl::api::PragmaHandler &handler) {
|
||||
log::info("Registered new pattern language pragma: {}", name);
|
||||
|
||||
getPragmas()[name] = handler;
|
||||
}
|
||||
|
||||
void addFunction(const pl::api::Namespace &ns, const std::string &name, pl::api::FunctionParameterCount parameterCount, const pl::api::FunctionCallback &func) {
|
||||
log::info("Registered new pattern language function: {}", getFunctionName(ns, name));
|
||||
|
||||
getFunctions()[getFunctionName(ns, name)] = Function { parameterCount, { }, func, false };
|
||||
getFunctions().push_back({
|
||||
ns, name,
|
||||
parameterCount, func,
|
||||
false
|
||||
});
|
||||
}
|
||||
|
||||
void addDangerousFunction(const Namespace &ns, const std::string &name, ParameterCount parameterCount, const Callback &func) {
|
||||
void addDangerousFunction(const pl::api::Namespace &ns, const std::string &name, pl::api::FunctionParameterCount parameterCount, const pl::api::FunctionCallback &func) {
|
||||
log::info("Registered new dangerous pattern language function: {}", getFunctionName(ns, name));
|
||||
|
||||
getFunctions()[getFunctionName(ns, name)] = Function { parameterCount, { }, func, true };
|
||||
getFunctions().push_back({
|
||||
ns, name,
|
||||
parameterCount, func,
|
||||
true
|
||||
});
|
||||
}
|
||||
|
||||
std::map<std::string, Function> &getFunctions() {
|
||||
static std::map<std::string, Function> functions;
|
||||
std::map<std::string, pl::api::PragmaHandler> &getPragmas() {
|
||||
static std::map<std::string, pl::api::PragmaHandler> pragmas;
|
||||
|
||||
return pragmas;
|
||||
}
|
||||
|
||||
std::vector<impl::FunctionDefinition> &getFunctions() {
|
||||
static std::vector<impl::FunctionDefinition> functions;
|
||||
|
||||
return functions;
|
||||
}
|
||||
|
||||
|
||||
static std::vector<impl::ColorPalette> s_colorPalettes;
|
||||
static u32 s_colorIndex;
|
||||
static u32 s_selectedColorPalette;
|
||||
|
||||
std::vector<impl::ColorPalette> &getPalettes() {
|
||||
return s_colorPalettes;
|
||||
}
|
||||
|
||||
void addColorPalette(const std::string &unlocalizedName, const std::vector<u32> &colors) {
|
||||
s_colorPalettes.push_back({ unlocalizedName,
|
||||
colors });
|
||||
}
|
||||
|
||||
void setSelectedPalette(u32 index) {
|
||||
if (index < s_colorPalettes.size())
|
||||
s_selectedColorPalette = index;
|
||||
|
||||
resetPalette();
|
||||
}
|
||||
|
||||
u32 getNextColor() {
|
||||
if (s_colorPalettes.empty())
|
||||
return 0x00;
|
||||
|
||||
auto &currColors = s_colorPalettes[s_selectedColorPalette].colors;
|
||||
|
||||
u32 color = currColors[s_colorIndex];
|
||||
|
||||
s_colorIndex++;
|
||||
s_colorIndex %= currColors.size();
|
||||
|
||||
return color;
|
||||
}
|
||||
|
||||
void resetPalette() {
|
||||
s_colorIndex = 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,254 +0,0 @@
|
||||
#include <hex/pattern_language/evaluator.hpp>
|
||||
#include <hex/pattern_language/patterns/pattern.hpp>
|
||||
|
||||
#include <hex/pattern_language/ast/ast_node.hpp>
|
||||
#include <hex/pattern_language/ast/ast_node_type_decl.hpp>
|
||||
#include <hex/pattern_language/ast/ast_node_variable_decl.hpp>
|
||||
#include <hex/pattern_language/ast/ast_node_function_call.hpp>
|
||||
#include <hex/pattern_language/ast/ast_node_function_definition.hpp>
|
||||
|
||||
#include <hex/pattern_language/patterns/pattern_unsigned.hpp>
|
||||
#include <hex/pattern_language/patterns/pattern_signed.hpp>
|
||||
#include <hex/pattern_language/patterns/pattern_float.hpp>
|
||||
#include <hex/pattern_language/patterns/pattern_boolean.hpp>
|
||||
#include <hex/pattern_language/patterns/pattern_character.hpp>
|
||||
#include <hex/pattern_language/patterns/pattern_string.hpp>
|
||||
#include <hex/pattern_language/patterns/pattern_enum.hpp>
|
||||
|
||||
#include <hex/helpers/logger.hpp>
|
||||
|
||||
namespace hex::pl {
|
||||
|
||||
void Evaluator::createParameterPack(const std::string &name, const std::vector<Token::Literal> &values) {
|
||||
this->getScope(0).parameterPack = ParameterPack {
|
||||
name,
|
||||
values
|
||||
};
|
||||
}
|
||||
|
||||
void Evaluator::createVariable(const std::string &name, ASTNode *type, const std::optional<Token::Literal> &value, bool outVariable) {
|
||||
auto &variables = *this->getScope(0).scope;
|
||||
for (auto &variable : variables) {
|
||||
if (variable->getVariableName() == name) {
|
||||
LogConsole::abortEvaluation(hex::format("variable with name '{}' already exists", name));
|
||||
}
|
||||
}
|
||||
|
||||
auto startOffset = this->dataOffset();
|
||||
|
||||
std::unique_ptr<Pattern> pattern;
|
||||
|
||||
bool referenceType = false;
|
||||
|
||||
auto typePattern = type->createPatterns(this);
|
||||
|
||||
this->dataOffset() = startOffset;
|
||||
|
||||
if (typePattern.empty()) {
|
||||
// Handle auto variables
|
||||
if (!value.has_value())
|
||||
LogConsole::abortEvaluation("cannot determine type of auto variable", type);
|
||||
|
||||
if (std::get_if<u128>(&value.value()) != nullptr)
|
||||
pattern = std::unique_ptr<Pattern>(new PatternUnsigned(this, 0, sizeof(u128)));
|
||||
else if (std::get_if<i128>(&value.value()) != nullptr)
|
||||
pattern = std::unique_ptr<Pattern>(new PatternSigned(this, 0, sizeof(i128)));
|
||||
else if (std::get_if<double>(&value.value()) != nullptr)
|
||||
pattern = std::unique_ptr<Pattern>(new PatternFloat(this, 0, sizeof(double)));
|
||||
else if (std::get_if<bool>(&value.value()) != nullptr)
|
||||
pattern = std::unique_ptr<Pattern>(new PatternBoolean(this, 0));
|
||||
else if (std::get_if<char>(&value.value()) != nullptr)
|
||||
pattern = std::unique_ptr<Pattern>(new PatternCharacter(this, 0));
|
||||
else if (std::get_if<std::string>(&value.value()) != nullptr)
|
||||
pattern = std::unique_ptr<Pattern>(new PatternString(this, 0, 1));
|
||||
else if (auto patternValue = std::get_if<Pattern *>(&value.value()); patternValue != nullptr) {
|
||||
pattern = (*patternValue)->clone();
|
||||
referenceType = true;
|
||||
} else
|
||||
LogConsole::abortEvaluation("cannot determine type of auto variable", type);
|
||||
} else {
|
||||
pattern = std::move(typePattern.front());
|
||||
}
|
||||
|
||||
pattern->setVariableName(name);
|
||||
|
||||
if (!referenceType) {
|
||||
pattern->setOffset(this->getStack().size());
|
||||
pattern->setLocal(true);
|
||||
this->getStack().emplace_back();
|
||||
}
|
||||
|
||||
if (outVariable)
|
||||
this->m_outVariables[name] = pattern->getOffset();
|
||||
|
||||
variables.push_back(std::move(pattern));
|
||||
}
|
||||
|
||||
void Evaluator::setVariable(const std::string &name, const Token::Literal &value) {
|
||||
std::unique_ptr<Pattern> pattern = nullptr;
|
||||
|
||||
{
|
||||
auto &variables = *this->getScope(0).scope;
|
||||
for (auto &variable : variables) {
|
||||
if (variable->getVariableName() == name) {
|
||||
pattern = variable->clone();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (pattern == nullptr) {
|
||||
auto &variables = *this->getGlobalScope().scope;
|
||||
for (auto &variable : variables) {
|
||||
if (variable->getVariableName() == name) {
|
||||
if (!variable->isLocal())
|
||||
LogConsole::abortEvaluation(hex::format("cannot modify global variable '{}' which has been placed in memory", name));
|
||||
|
||||
pattern = variable->clone();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (pattern == nullptr)
|
||||
LogConsole::abortEvaluation(hex::format("no variable with name '{}' found", name));
|
||||
|
||||
if (!pattern->isLocal()) return;
|
||||
|
||||
Token::Literal castedLiteral = std::visit(overloaded {
|
||||
[&](double &value) -> Token::Literal {
|
||||
if (dynamic_cast<PatternUnsigned *>(pattern.get()))
|
||||
return u128(value) & bitmask(pattern->getSize() * 8);
|
||||
else if (dynamic_cast<PatternSigned *>(pattern.get()))
|
||||
return i128(value) & bitmask(pattern->getSize() * 8);
|
||||
else if (dynamic_cast<PatternFloat *>(pattern.get()))
|
||||
return pattern->getSize() == sizeof(float) ? double(float(value)) : value;
|
||||
else
|
||||
LogConsole::abortEvaluation(hex::format("cannot cast type 'double' to type '{}'", pattern->getTypeName()));
|
||||
},
|
||||
[&](const std::string &value) -> Token::Literal {
|
||||
if (dynamic_cast<PatternString *>(pattern.get()))
|
||||
return value;
|
||||
else
|
||||
LogConsole::abortEvaluation(hex::format("cannot cast type 'string' to type '{}'", pattern->getTypeName()));
|
||||
},
|
||||
[&](Pattern *value) -> Token::Literal {
|
||||
if (value->getTypeName() == pattern->getTypeName())
|
||||
return value;
|
||||
else
|
||||
LogConsole::abortEvaluation(hex::format("cannot cast type '{}' to type '{}'", value->getTypeName(), pattern->getTypeName()));
|
||||
},
|
||||
[&](auto &&value) -> Token::Literal {
|
||||
if (dynamic_cast<PatternUnsigned *>(pattern.get()) || dynamic_cast<PatternEnum *>(pattern.get()))
|
||||
return u128(value) & bitmask(pattern->getSize() * 8);
|
||||
else if (dynamic_cast<PatternSigned *>(pattern.get()))
|
||||
return i128(value) & bitmask(pattern->getSize() * 8);
|
||||
else if (dynamic_cast<PatternCharacter *>(pattern.get()))
|
||||
return char(value);
|
||||
else if (dynamic_cast<PatternBoolean *>(pattern.get()))
|
||||
return bool(value);
|
||||
else if (dynamic_cast<PatternFloat *>(pattern.get()))
|
||||
return pattern->getSize() == sizeof(float) ? double(float(value)) : value;
|
||||
else
|
||||
LogConsole::abortEvaluation(hex::format("cannot cast integer literal to type '{}'", pattern->getTypeName()));
|
||||
} },
|
||||
value);
|
||||
|
||||
this->getStack()[pattern->getOffset()] = castedLiteral;
|
||||
}
|
||||
|
||||
std::optional<std::vector<std::shared_ptr<Pattern>>> Evaluator::evaluate(const std::vector<std::shared_ptr<ASTNode>> &ast) {
|
||||
this->m_stack.clear();
|
||||
this->m_customFunctions.clear();
|
||||
this->m_scopes.clear();
|
||||
this->m_mainResult.reset();
|
||||
this->m_aborted = false;
|
||||
|
||||
if (this->m_allowDangerousFunctions == DangerousFunctionPermission::Deny)
|
||||
this->m_allowDangerousFunctions = DangerousFunctionPermission::Ask;
|
||||
|
||||
this->m_dangerousFunctionCalled = false;
|
||||
|
||||
ON_SCOPE_EXIT {
|
||||
this->m_envVariables.clear();
|
||||
};
|
||||
|
||||
this->dataOffset() = 0x00;
|
||||
this->m_currPatternCount = 0;
|
||||
|
||||
this->m_customFunctionDefinitions.clear();
|
||||
|
||||
std::vector<std::shared_ptr<Pattern>> patterns;
|
||||
|
||||
try {
|
||||
this->setCurrentControlFlowStatement(ControlFlowStatement::None);
|
||||
pushScope(nullptr, patterns);
|
||||
ON_SCOPE_EXIT {
|
||||
popScope();
|
||||
};
|
||||
|
||||
for (auto &node : ast) {
|
||||
if (dynamic_cast<ASTNodeTypeDecl *>(node.get())) {
|
||||
; // Don't create patterns from type declarations
|
||||
} else if (dynamic_cast<ASTNodeFunctionCall *>(node.get())) {
|
||||
(void)node->evaluate(this);
|
||||
} else if (dynamic_cast<ASTNodeFunctionDefinition *>(node.get())) {
|
||||
this->m_customFunctionDefinitions.push_back(node->evaluate(this));
|
||||
} else if (auto varDeclNode = dynamic_cast<ASTNodeVariableDecl *>(node.get())) {
|
||||
for (auto &pattern : node->createPatterns(this)) {
|
||||
if (varDeclNode->getPlacementOffset() == nullptr) {
|
||||
auto type = varDeclNode->getType()->evaluate(this);
|
||||
|
||||
auto &name = pattern->getVariableName();
|
||||
this->createVariable(name, type.get(), std::nullopt, varDeclNode->isOutVariable());
|
||||
|
||||
if (varDeclNode->isInVariable() && this->m_inVariables.contains(name))
|
||||
this->setVariable(name, this->m_inVariables[name]);
|
||||
} else {
|
||||
patterns.push_back(std::move(pattern));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto newPatterns = node->createPatterns(this);
|
||||
std::move(newPatterns.begin(), newPatterns.end(), std::back_inserter(patterns));
|
||||
}
|
||||
}
|
||||
|
||||
if (this->m_customFunctions.contains("main")) {
|
||||
auto mainFunction = this->m_customFunctions["main"];
|
||||
|
||||
if (mainFunction.parameterCount.max > 0)
|
||||
LogConsole::abortEvaluation("main function may not accept any arguments");
|
||||
|
||||
this->m_mainResult = mainFunction.func(this, {});
|
||||
}
|
||||
} catch (PatternLanguageError &error) {
|
||||
if (error.getLineNumber() != 0)
|
||||
this->m_console.setHardError(error);
|
||||
|
||||
patterns.clear();
|
||||
|
||||
this->m_currPatternCount = 0;
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Remove global local variables
|
||||
std::erase_if(patterns, [](const std::shared_ptr<Pattern> &pattern) {
|
||||
return pattern->isLocal();
|
||||
});
|
||||
|
||||
return patterns;
|
||||
}
|
||||
|
||||
void Evaluator::patternCreated() {
|
||||
if (this->m_currPatternCount > this->m_patternLimit)
|
||||
LogConsole::abortEvaluation(hex::format("exceeded maximum number of patterns: {}", this->m_patternLimit));
|
||||
this->m_currPatternCount++;
|
||||
}
|
||||
|
||||
void Evaluator::patternDestroyed() {
|
||||
this->m_currPatternCount--;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,534 +0,0 @@
|
||||
#include <hex/pattern_language/lexer.hpp>
|
||||
|
||||
#include <algorithm>
|
||||
#include <charconv>
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
namespace hex::pl {
|
||||
|
||||
#define TOKEN(type, value) Token::Type::type, Token::type::value, lineNumber
|
||||
#define VALUE_TOKEN(type, value) Token::Type::type, value, lineNumber
|
||||
|
||||
std::string matchTillInvalid(const char *characters, const std::function<bool(char)> &predicate) {
|
||||
std::string ret;
|
||||
|
||||
while (*characters != 0x00) {
|
||||
ret += *characters;
|
||||
characters++;
|
||||
|
||||
if (!predicate(*characters))
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool isIdentifierCharacter(char c) {
|
||||
return std::isalnum(c) || c == '_';
|
||||
}
|
||||
|
||||
size_t getIntegerLiteralLength(std::string_view string) {
|
||||
auto count = string.find_first_not_of("0123456789ABCDEFabcdef'xXoOpP.uU");
|
||||
if (count == std::string_view::npos)
|
||||
return string.size();
|
||||
else
|
||||
return count;
|
||||
}
|
||||
|
||||
std::optional<Token::Literal> lexIntegerLiteral(std::string_view string) {
|
||||
bool hasFloatSuffix = string.ends_with('D') || string.ends_with('F') || string.ends_with('d') || string.ends_with('f');
|
||||
bool isFloat = std::count(string.begin(), string.end(), '.') == 1 || (!string.starts_with("0x") && hasFloatSuffix);
|
||||
|
||||
if (isFloat) {
|
||||
// Parse double
|
||||
char suffix = 0x00;
|
||||
if (hasFloatSuffix) {
|
||||
suffix = string.back();
|
||||
string = string.substr(0, string.length() - 1);
|
||||
}
|
||||
|
||||
char *end = nullptr;
|
||||
double value = std::strtod(string.begin(), &end);
|
||||
|
||||
if (end == string.end()) {
|
||||
switch (suffix) {
|
||||
case 'd':
|
||||
case 'D':
|
||||
return double(value);
|
||||
case 'f':
|
||||
case 'F':
|
||||
return float(value);
|
||||
default:
|
||||
return value;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
bool isUnsigned = false;
|
||||
if (string.ends_with('U') || string.ends_with('u')) {
|
||||
isUnsigned = true;
|
||||
string = string.substr(0, string.length() - 1);
|
||||
}
|
||||
|
||||
u8 prefixOffset = 0;
|
||||
u8 base = 10;
|
||||
|
||||
if (string.starts_with("0x") || string.starts_with("0X")) {
|
||||
// Parse hexadecimal
|
||||
prefixOffset = 2;
|
||||
base = 16;
|
||||
} else if (string.starts_with("0o") || string.starts_with("0O")) {
|
||||
// Parse octal
|
||||
prefixOffset = 2;
|
||||
base = 8;
|
||||
} else if (string.starts_with("0b") || string.starts_with("0B")) {
|
||||
// Parse binary
|
||||
prefixOffset = 2;
|
||||
base = 2;
|
||||
} else {
|
||||
// Parse decimal
|
||||
prefixOffset = 0;
|
||||
base = 10;
|
||||
}
|
||||
|
||||
u128 value = 0x00;
|
||||
for (char c : string.substr(prefixOffset)) {
|
||||
value *= base;
|
||||
value += [&] {
|
||||
if (c >= '0' && c <= '9') return c - '0';
|
||||
else if (c >= 'A' && c <= 'F') return 0xA + (c - 'A');
|
||||
else if (c >= 'a' && c <= 'f') return 0xA + (c - 'a');
|
||||
else return 0x00;
|
||||
}();
|
||||
}
|
||||
|
||||
if (isUnsigned)
|
||||
return value;
|
||||
else
|
||||
return i128(value);
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::optional<Token::Literal> lexIntegerLiteralWithSeparator(std::string_view string) {
|
||||
|
||||
if (string.starts_with('\'') || string.ends_with('\''))
|
||||
return std::nullopt;
|
||||
else if (string.find('\'') == std::string_view::npos)
|
||||
return lexIntegerLiteral(string);
|
||||
else {
|
||||
auto preprocessedString = std::string(string);
|
||||
preprocessedString.erase(std::remove(preprocessedString.begin(), preprocessedString.end(), '\''), preprocessedString.end());
|
||||
return lexIntegerLiteral(preprocessedString);
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<std::pair<char, size_t>> getCharacter(const std::string &string) {
|
||||
|
||||
if (string.length() < 1)
|
||||
return std::nullopt;
|
||||
|
||||
// Escape sequences
|
||||
if (string[0] == '\\') {
|
||||
|
||||
if (string.length() < 2)
|
||||
return std::nullopt;
|
||||
|
||||
// Handle simple escape sequences
|
||||
switch (string[1]) {
|
||||
case 'a':
|
||||
return {
|
||||
{'\a', 2}
|
||||
};
|
||||
case 'b':
|
||||
return {
|
||||
{'\b', 2}
|
||||
};
|
||||
case 'f':
|
||||
return {
|
||||
{'\f', 2}
|
||||
};
|
||||
case 'n':
|
||||
return {
|
||||
{'\n', 2}
|
||||
};
|
||||
case 'r':
|
||||
return {
|
||||
{'\r', 2}
|
||||
};
|
||||
case 't':
|
||||
return {
|
||||
{'\t', 2}
|
||||
};
|
||||
case 'v':
|
||||
return {
|
||||
{'\v', 2}
|
||||
};
|
||||
case '\\':
|
||||
return {
|
||||
{'\\', 2}
|
||||
};
|
||||
case '\'':
|
||||
return {
|
||||
{'\'', 2}
|
||||
};
|
||||
case '\"':
|
||||
return {
|
||||
{'\"', 2}
|
||||
};
|
||||
}
|
||||
|
||||
// Hexadecimal number
|
||||
if (string[1] == 'x') {
|
||||
if (string.length() != 4)
|
||||
return std::nullopt;
|
||||
|
||||
if (!isxdigit(string[2]) || !isxdigit(string[3]))
|
||||
return std::nullopt;
|
||||
|
||||
return {
|
||||
{std::strtoul(&string[2], nullptr, 16), 4}
|
||||
};
|
||||
}
|
||||
|
||||
// Octal number
|
||||
if (string[1] == 'o') {
|
||||
if (string.length() != 5)
|
||||
return {};
|
||||
|
||||
if (string[2] < '0' || string[2] > '7' || string[3] < '0' || string[3] > '7' || string[4] < '0' || string[4] > '7')
|
||||
return {};
|
||||
|
||||
return {
|
||||
{std::strtoul(&string[2], nullptr, 8), 5}
|
||||
};
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
} else return {
|
||||
{string[0], 1}
|
||||
};
|
||||
}
|
||||
|
||||
std::optional<std::pair<std::string, size_t>> getStringLiteral(const std::string &string) {
|
||||
if (!string.starts_with('\"'))
|
||||
return {};
|
||||
|
||||
size_t size = 1;
|
||||
|
||||
std::string result;
|
||||
while (string[size] != '\"') {
|
||||
auto character = getCharacter(string.substr(size));
|
||||
|
||||
if (!character.has_value())
|
||||
return {};
|
||||
|
||||
auto &[c, charSize] = character.value();
|
||||
|
||||
result += c;
|
||||
size += charSize;
|
||||
|
||||
if (size >= string.length())
|
||||
return {};
|
||||
}
|
||||
|
||||
return {
|
||||
{result, size + 1}
|
||||
};
|
||||
}
|
||||
|
||||
std::optional<std::pair<char, size_t>> getCharacterLiteral(const std::string &string) {
|
||||
if (string.empty())
|
||||
return {};
|
||||
|
||||
if (string[0] != '\'')
|
||||
return {};
|
||||
|
||||
|
||||
auto character = getCharacter(string.substr(1));
|
||||
|
||||
if (!character.has_value())
|
||||
return {};
|
||||
|
||||
auto &[c, charSize] = character.value();
|
||||
|
||||
if (string.length() >= charSize + 2 && string[charSize + 1] != '\'')
|
||||
return {};
|
||||
|
||||
return {
|
||||
{c, charSize + 2}
|
||||
};
|
||||
}
|
||||
|
||||
std::optional<std::vector<Token>> Lexer::lex(const std::string &code) {
|
||||
std::vector<Token> tokens;
|
||||
u32 offset = 0;
|
||||
|
||||
u32 lineNumber = 1;
|
||||
|
||||
try {
|
||||
|
||||
while (offset < code.length()) {
|
||||
const char &c = code[offset];
|
||||
|
||||
if (c == 0x00)
|
||||
break;
|
||||
|
||||
if (std::isblank(c) || std::isspace(c)) {
|
||||
if (code[offset] == '\n') lineNumber++;
|
||||
offset += 1;
|
||||
} else if (c == ';') {
|
||||
tokens.emplace_back(TOKEN(Separator, EndOfExpression));
|
||||
offset += 1;
|
||||
} else if (c == '(') {
|
||||
tokens.emplace_back(TOKEN(Separator, RoundBracketOpen));
|
||||
offset += 1;
|
||||
} else if (c == ')') {
|
||||
tokens.emplace_back(TOKEN(Separator, RoundBracketClose));
|
||||
offset += 1;
|
||||
} else if (c == '{') {
|
||||
tokens.emplace_back(TOKEN(Separator, CurlyBracketOpen));
|
||||
offset += 1;
|
||||
} else if (c == '}') {
|
||||
tokens.emplace_back(TOKEN(Separator, CurlyBracketClose));
|
||||
offset += 1;
|
||||
} else if (c == '[') {
|
||||
tokens.emplace_back(TOKEN(Separator, SquareBracketOpen));
|
||||
offset += 1;
|
||||
} else if (c == ']') {
|
||||
tokens.emplace_back(TOKEN(Separator, SquareBracketClose));
|
||||
offset += 1;
|
||||
} else if (c == ',') {
|
||||
tokens.emplace_back(TOKEN(Separator, Comma));
|
||||
offset += 1;
|
||||
} else if (c == '.') {
|
||||
tokens.emplace_back(TOKEN(Separator, Dot));
|
||||
offset += 1;
|
||||
} else if (code.substr(offset, 2) == "::") {
|
||||
tokens.emplace_back(TOKEN(Operator, ScopeResolution));
|
||||
offset += 2;
|
||||
} else if (c == '@') {
|
||||
tokens.emplace_back(TOKEN(Operator, AtDeclaration));
|
||||
offset += 1;
|
||||
} else if (code.substr(offset, 2) == "==") {
|
||||
tokens.emplace_back(TOKEN(Operator, BoolEquals));
|
||||
offset += 2;
|
||||
} else if (code.substr(offset, 2) == "!=") {
|
||||
tokens.emplace_back(TOKEN(Operator, BoolNotEquals));
|
||||
offset += 2;
|
||||
} else if (code.substr(offset, 2) == ">=") {
|
||||
tokens.emplace_back(TOKEN(Operator, BoolGreaterThanOrEquals));
|
||||
offset += 2;
|
||||
} else if (code.substr(offset, 2) == "<=") {
|
||||
tokens.emplace_back(TOKEN(Operator, BoolLessThanOrEquals));
|
||||
offset += 2;
|
||||
} else if (code.substr(offset, 2) == "&&") {
|
||||
tokens.emplace_back(TOKEN(Operator, BoolAnd));
|
||||
offset += 2;
|
||||
} else if (code.substr(offset, 2) == "||") {
|
||||
tokens.emplace_back(TOKEN(Operator, BoolOr));
|
||||
offset += 2;
|
||||
} else if (code.substr(offset, 2) == "^^") {
|
||||
tokens.emplace_back(TOKEN(Operator, BoolXor));
|
||||
offset += 2;
|
||||
} else if (c == '=') {
|
||||
tokens.emplace_back(TOKEN(Operator, Assignment));
|
||||
offset += 1;
|
||||
} else if (c == ':') {
|
||||
tokens.emplace_back(TOKEN(Operator, Inherit));
|
||||
offset += 1;
|
||||
} else if (c == '+') {
|
||||
tokens.emplace_back(TOKEN(Operator, Plus));
|
||||
offset += 1;
|
||||
} else if (c == '-') {
|
||||
tokens.emplace_back(TOKEN(Operator, Minus));
|
||||
offset += 1;
|
||||
} else if (c == '*') {
|
||||
tokens.emplace_back(TOKEN(Operator, Star));
|
||||
offset += 1;
|
||||
} else if (c == '/') {
|
||||
tokens.emplace_back(TOKEN(Operator, Slash));
|
||||
offset += 1;
|
||||
} else if (c == '%') {
|
||||
tokens.emplace_back(TOKEN(Operator, Percent));
|
||||
offset += 1;
|
||||
} else if (code.substr(offset, 2) == "<<") {
|
||||
tokens.emplace_back(TOKEN(Operator, ShiftLeft));
|
||||
offset += 2;
|
||||
} else if (code.substr(offset, 2) == ">>") {
|
||||
tokens.emplace_back(TOKEN(Operator, ShiftRight));
|
||||
offset += 2;
|
||||
} else if (c == '>') {
|
||||
tokens.emplace_back(TOKEN(Operator, BoolGreaterThan));
|
||||
offset += 1;
|
||||
} else if (c == '<') {
|
||||
tokens.emplace_back(TOKEN(Operator, BoolLessThan));
|
||||
offset += 1;
|
||||
} else if (c == '!') {
|
||||
tokens.emplace_back(TOKEN(Operator, BoolNot));
|
||||
offset += 1;
|
||||
} else if (c == '|') {
|
||||
tokens.emplace_back(TOKEN(Operator, BitOr));
|
||||
offset += 1;
|
||||
} else if (c == '&') {
|
||||
tokens.emplace_back(TOKEN(Operator, BitAnd));
|
||||
offset += 1;
|
||||
} else if (c == '^') {
|
||||
tokens.emplace_back(TOKEN(Operator, BitXor));
|
||||
offset += 1;
|
||||
} else if (c == '~') {
|
||||
tokens.emplace_back(TOKEN(Operator, BitNot));
|
||||
offset += 1;
|
||||
} else if (c == '?') {
|
||||
tokens.emplace_back(TOKEN(Operator, TernaryConditional));
|
||||
offset += 1;
|
||||
} else if (c == '$') {
|
||||
tokens.emplace_back(TOKEN(Operator, Dollar));
|
||||
offset += 1;
|
||||
} else if (code.substr(offset, 9) == "addressof" && !isIdentifierCharacter(code[offset + 9])) {
|
||||
tokens.emplace_back(TOKEN(Operator, AddressOf));
|
||||
offset += 9;
|
||||
} else if (code.substr(offset, 6) == "sizeof" && !isIdentifierCharacter(code[offset + 6])) {
|
||||
tokens.emplace_back(TOKEN(Operator, SizeOf));
|
||||
offset += 6;
|
||||
} else if (c == '\'') {
|
||||
auto lexedCharacter = getCharacterLiteral(code.substr(offset));
|
||||
|
||||
if (!lexedCharacter.has_value())
|
||||
throwLexerError("invalid character literal", lineNumber);
|
||||
|
||||
auto [character, charSize] = lexedCharacter.value();
|
||||
|
||||
tokens.emplace_back(VALUE_TOKEN(Integer, Token::Literal(character)));
|
||||
offset += charSize;
|
||||
} else if (c == '\"') {
|
||||
auto string = getStringLiteral(code.substr(offset));
|
||||
|
||||
if (!string.has_value())
|
||||
throwLexerError("invalid string literal", lineNumber);
|
||||
|
||||
auto [s, stringSize] = string.value();
|
||||
|
||||
tokens.emplace_back(VALUE_TOKEN(String, Token::Literal(s)));
|
||||
offset += stringSize;
|
||||
} else if (isIdentifierCharacter(c) && !std::isdigit(c)) {
|
||||
std::string identifier = matchTillInvalid(&code[offset], isIdentifierCharacter);
|
||||
|
||||
// Check for reserved keywords
|
||||
|
||||
if (identifier == "struct")
|
||||
tokens.emplace_back(TOKEN(Keyword, Struct));
|
||||
else if (identifier == "union")
|
||||
tokens.emplace_back(TOKEN(Keyword, Union));
|
||||
else if (identifier == "using")
|
||||
tokens.emplace_back(TOKEN(Keyword, Using));
|
||||
else if (identifier == "enum")
|
||||
tokens.emplace_back(TOKEN(Keyword, Enum));
|
||||
else if (identifier == "bitfield")
|
||||
tokens.emplace_back(TOKEN(Keyword, Bitfield));
|
||||
else if (identifier == "be")
|
||||
tokens.emplace_back(TOKEN(Keyword, BigEndian));
|
||||
else if (identifier == "le")
|
||||
tokens.emplace_back(TOKEN(Keyword, LittleEndian));
|
||||
else if (identifier == "if")
|
||||
tokens.emplace_back(TOKEN(Keyword, If));
|
||||
else if (identifier == "else")
|
||||
tokens.emplace_back(TOKEN(Keyword, Else));
|
||||
else if (identifier == "false")
|
||||
tokens.emplace_back(VALUE_TOKEN(Integer, Token::Literal(false)));
|
||||
else if (identifier == "true")
|
||||
tokens.emplace_back(VALUE_TOKEN(Integer, Token::Literal(true)));
|
||||
else if (identifier == "parent")
|
||||
tokens.emplace_back(TOKEN(Keyword, Parent));
|
||||
else if (identifier == "this")
|
||||
tokens.emplace_back(TOKEN(Keyword, This));
|
||||
else if (identifier == "while")
|
||||
tokens.emplace_back(TOKEN(Keyword, While));
|
||||
else if (identifier == "for")
|
||||
tokens.emplace_back(TOKEN(Keyword, For));
|
||||
else if (identifier == "fn")
|
||||
tokens.emplace_back(TOKEN(Keyword, Function));
|
||||
else if (identifier == "return")
|
||||
tokens.emplace_back(TOKEN(Keyword, Return));
|
||||
else if (identifier == "namespace")
|
||||
tokens.emplace_back(TOKEN(Keyword, Namespace));
|
||||
else if (identifier == "in")
|
||||
tokens.emplace_back(TOKEN(Keyword, In));
|
||||
else if (identifier == "out")
|
||||
tokens.emplace_back(TOKEN(Keyword, Out));
|
||||
else if (identifier == "break")
|
||||
tokens.emplace_back(TOKEN(Keyword, Break));
|
||||
else if (identifier == "continue")
|
||||
tokens.emplace_back(TOKEN(Keyword, Continue));
|
||||
|
||||
// Check for built-in types
|
||||
else if (identifier == "u8")
|
||||
tokens.emplace_back(TOKEN(ValueType, Unsigned8Bit));
|
||||
else if (identifier == "s8")
|
||||
tokens.emplace_back(TOKEN(ValueType, Signed8Bit));
|
||||
else if (identifier == "u16")
|
||||
tokens.emplace_back(TOKEN(ValueType, Unsigned16Bit));
|
||||
else if (identifier == "s16")
|
||||
tokens.emplace_back(TOKEN(ValueType, Signed16Bit));
|
||||
else if (identifier == "u32")
|
||||
tokens.emplace_back(TOKEN(ValueType, Unsigned32Bit));
|
||||
else if (identifier == "s32")
|
||||
tokens.emplace_back(TOKEN(ValueType, Signed32Bit));
|
||||
else if (identifier == "u64")
|
||||
tokens.emplace_back(TOKEN(ValueType, Unsigned64Bit));
|
||||
else if (identifier == "s64")
|
||||
tokens.emplace_back(TOKEN(ValueType, Signed64Bit));
|
||||
else if (identifier == "u128")
|
||||
tokens.emplace_back(TOKEN(ValueType, Unsigned128Bit));
|
||||
else if (identifier == "s128")
|
||||
tokens.emplace_back(TOKEN(ValueType, Signed128Bit));
|
||||
else if (identifier == "float")
|
||||
tokens.emplace_back(TOKEN(ValueType, Float));
|
||||
else if (identifier == "double")
|
||||
tokens.emplace_back(TOKEN(ValueType, Double));
|
||||
else if (identifier == "char")
|
||||
tokens.emplace_back(TOKEN(ValueType, Character));
|
||||
else if (identifier == "char16")
|
||||
tokens.emplace_back(TOKEN(ValueType, Character16));
|
||||
else if (identifier == "bool")
|
||||
tokens.emplace_back(TOKEN(ValueType, Boolean));
|
||||
else if (identifier == "str")
|
||||
tokens.emplace_back(TOKEN(ValueType, String));
|
||||
else if (identifier == "padding")
|
||||
tokens.emplace_back(TOKEN(ValueType, Padding));
|
||||
else if (identifier == "auto")
|
||||
tokens.emplace_back(TOKEN(ValueType, Auto));
|
||||
|
||||
// If it's not a keyword and a builtin type, it has to be an identifier
|
||||
|
||||
else
|
||||
tokens.emplace_back(VALUE_TOKEN(Identifier, Token::Identifier(identifier)));
|
||||
|
||||
offset += identifier.length();
|
||||
} else if (std::isdigit(c)) {
|
||||
auto integerLength = getIntegerLiteralLength(&code[offset]);
|
||||
auto integer = lexIntegerLiteralWithSeparator(std::string_view(&code[offset], integerLength));
|
||||
|
||||
if (!integer.has_value())
|
||||
throwLexerError("invalid integer literal", lineNumber);
|
||||
|
||||
|
||||
tokens.emplace_back(VALUE_TOKEN(Integer, Token::Literal(integer.value())));
|
||||
offset += integerLength;
|
||||
} else
|
||||
throwLexerError("unknown token", lineNumber);
|
||||
}
|
||||
|
||||
tokens.emplace_back(TOKEN(Separator, EndOfProgram));
|
||||
} catch (PatternLanguageError &e) {
|
||||
this->m_error = e;
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
|
||||
return tokens;
|
||||
}
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
#include <hex/pattern_language/log_console.hpp>
|
||||
|
||||
#include <hex/pattern_language/ast/ast_node.hpp>
|
||||
|
||||
namespace hex::pl {
|
||||
|
||||
[[noreturn]] void LogConsole::abortEvaluation(const std::string &message, const ASTNode *node) {
|
||||
if (node == nullptr)
|
||||
throw PatternLanguageError(0, "Evaluator: " + message);
|
||||
else
|
||||
throw PatternLanguageError(node->getLineNumber(), "Evaluator: " + message);
|
||||
}
|
||||
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,255 +0,0 @@
|
||||
#include <hex/pattern_language/pattern_language.hpp>
|
||||
|
||||
#include <hex/helpers/file.hpp>
|
||||
#include <hex/providers/provider.hpp>
|
||||
#include <hex/helpers/logger.hpp>
|
||||
|
||||
#include <hex/pattern_language/preprocessor.hpp>
|
||||
#include <hex/pattern_language/lexer.hpp>
|
||||
#include <hex/pattern_language/parser.hpp>
|
||||
#include <hex/pattern_language/validator.hpp>
|
||||
#include <hex/pattern_language/evaluator.hpp>
|
||||
|
||||
namespace hex::pl {
|
||||
|
||||
class Pattern;
|
||||
|
||||
PatternLanguage::PatternLanguage() {
|
||||
this->m_preprocessor = new Preprocessor();
|
||||
this->m_lexer = new Lexer();
|
||||
this->m_parser = new Parser();
|
||||
this->m_validator = new Validator();
|
||||
this->m_evaluator = new Evaluator();
|
||||
|
||||
this->m_preprocessor->addDefaultPragmaHandlers();
|
||||
|
||||
this->m_preprocessor->addPragmaHandler("endian", [this](std::string value) {
|
||||
if (value == "big") {
|
||||
this->m_evaluator->setDefaultEndian(std::endian::big);
|
||||
return true;
|
||||
} else if (value == "little") {
|
||||
this->m_evaluator->setDefaultEndian(std::endian::little);
|
||||
return true;
|
||||
} else if (value == "native") {
|
||||
this->m_evaluator->setDefaultEndian(std::endian::native);
|
||||
return true;
|
||||
} else
|
||||
return false;
|
||||
});
|
||||
|
||||
this->m_preprocessor->addPragmaHandler("eval_depth", [this](std::string value) {
|
||||
auto limit = strtol(value.c_str(), nullptr, 0);
|
||||
|
||||
if (limit <= 0)
|
||||
return false;
|
||||
|
||||
this->m_evaluator->setEvaluationDepth(limit);
|
||||
return true;
|
||||
});
|
||||
|
||||
this->m_preprocessor->addPragmaHandler("array_limit", [this](const std::string &value) {
|
||||
auto limit = strtol(value.c_str(), nullptr, 0);
|
||||
|
||||
if (limit <= 0)
|
||||
return false;
|
||||
|
||||
this->m_evaluator->setArrayLimit(limit);
|
||||
return true;
|
||||
});
|
||||
|
||||
this->m_preprocessor->addPragmaHandler("pattern_limit", [this](const std::string &value) {
|
||||
auto limit = strtol(value.c_str(), nullptr, 0);
|
||||
|
||||
if (limit <= 0)
|
||||
return false;
|
||||
|
||||
this->m_evaluator->setPatternLimit(limit);
|
||||
return true;
|
||||
});
|
||||
|
||||
this->m_preprocessor->addPragmaHandler("loop_limit", [this](const std::string &value) {
|
||||
auto limit = strtol(value.c_str(), nullptr, 0);
|
||||
|
||||
if (limit <= 0)
|
||||
return false;
|
||||
|
||||
this->m_evaluator->setLoopLimit(limit);
|
||||
return true;
|
||||
});
|
||||
|
||||
this->m_preprocessor->addPragmaHandler("base_address", [](const std::string &value) {
|
||||
auto baseAddress = strtoull(value.c_str(), nullptr, 0);
|
||||
|
||||
ImHexApi::Provider::get()->setBaseAddress(baseAddress);
|
||||
return true;
|
||||
});
|
||||
|
||||
this->m_preprocessor->addPragmaHandler("bitfield_order", [this](const std::string &value) {
|
||||
if (value == "left_to_right") {
|
||||
this->m_evaluator->setBitfieldOrder(BitfieldOrder::LeftToRight);
|
||||
return true;
|
||||
} else if (value == "right_to_left") {
|
||||
this->m_evaluator->setBitfieldOrder(BitfieldOrder::RightToLeft);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
PatternLanguage::~PatternLanguage() {
|
||||
delete this->m_preprocessor;
|
||||
delete this->m_lexer;
|
||||
delete this->m_parser;
|
||||
delete this->m_validator;
|
||||
}
|
||||
|
||||
std::optional<std::vector<std::shared_ptr<ASTNode>>> PatternLanguage::parseString(const std::string &code) {
|
||||
auto preprocessedCode = this->m_preprocessor->preprocess(code);
|
||||
if (!preprocessedCode.has_value()) {
|
||||
this->m_currError = this->m_preprocessor->getError();
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
auto tokens = this->m_lexer->lex(preprocessedCode.value());
|
||||
if (!tokens.has_value()) {
|
||||
this->m_currError = this->m_lexer->getError();
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
auto ast = this->m_parser->parse(tokens.value());
|
||||
if (!ast.has_value()) {
|
||||
this->m_currError = this->m_parser->getError();
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (!this->m_validator->validate(*ast)) {
|
||||
this->m_currError = this->m_validator->getError();
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
return ast;
|
||||
}
|
||||
|
||||
bool PatternLanguage::executeString(prv::Provider *provider, const std::string &code, const std::map<std::string, Token::Literal> &envVars, const std::map<std::string, Token::Literal> &inVariables, bool checkResult) {
|
||||
this->m_running = true;
|
||||
ON_SCOPE_EXIT { this->m_running = false; };
|
||||
|
||||
ON_SCOPE_EXIT {
|
||||
if (this->m_currError.has_value()) {
|
||||
const auto &error = this->m_currError.value();
|
||||
|
||||
if (error.getLineNumber() > 0)
|
||||
this->m_evaluator->getConsole().log(LogConsole::Level::Error, hex::format("{}: {}", error.getLineNumber(), error.what()));
|
||||
else
|
||||
this->m_evaluator->getConsole().log(LogConsole::Level::Error, error.what());
|
||||
}
|
||||
};
|
||||
|
||||
this->m_currError.reset();
|
||||
this->m_evaluator->getConsole().clear();
|
||||
this->m_evaluator->setProvider(provider);
|
||||
this->m_evaluator->setDefaultEndian(std::endian::native);
|
||||
this->m_evaluator->setEvaluationDepth(32);
|
||||
this->m_evaluator->setArrayLimit(0x1000);
|
||||
this->m_evaluator->setPatternLimit(0x2000);
|
||||
this->m_evaluator->setLoopLimit(0x1000);
|
||||
this->m_evaluator->setInVariables(inVariables);
|
||||
|
||||
for (const auto &[name, value] : envVars)
|
||||
this->m_evaluator->setEnvVariable(name, value);
|
||||
|
||||
this->m_currAST.clear();
|
||||
|
||||
{
|
||||
auto ast = this->parseString(code);
|
||||
if (!ast)
|
||||
return false;
|
||||
|
||||
this->m_currAST = std::move(ast.value());
|
||||
}
|
||||
|
||||
|
||||
auto patterns = this->m_evaluator->evaluate(this->m_currAST);
|
||||
if (!patterns.has_value()) {
|
||||
this->m_currError = this->m_evaluator->getConsole().getLastHardError();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (auto mainResult = this->m_evaluator->getMainResult(); checkResult && mainResult.has_value()) {
|
||||
auto returnCode = Token::literalToSigned(*mainResult);
|
||||
|
||||
if (returnCode != 0) {
|
||||
this->m_currError = PatternLanguageError(0, hex::format("non-success value returned from main: {}", returnCode));
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
this->m_patterns = std::move(patterns.value());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PatternLanguage::executeFile(prv::Provider *provider, const std::fs::path &path, const std::map<std::string, Token::Literal> &envVars, const std::map<std::string, Token::Literal> &inVariables) {
|
||||
fs::File file(path, fs::File::Mode::Read);
|
||||
|
||||
return this->executeString(provider, file.readString(), envVars, inVariables, true);
|
||||
}
|
||||
|
||||
std::pair<bool, std::optional<Token::Literal>> PatternLanguage::executeFunction(prv::Provider *provider, const std::string &code) {
|
||||
|
||||
auto functionContent = hex::format("fn main() {{ {0} }};", code);
|
||||
|
||||
auto success = this->executeString(provider, functionContent, {}, {}, false);
|
||||
auto result = this->m_evaluator->getMainResult();
|
||||
|
||||
return { success, std::move(result) };
|
||||
}
|
||||
|
||||
void PatternLanguage::abort() {
|
||||
this->m_evaluator->abort();
|
||||
}
|
||||
|
||||
const std::vector<std::shared_ptr<ASTNode>> &PatternLanguage::getCurrentAST() const {
|
||||
return this->m_currAST;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::map<std::string, Token::Literal> PatternLanguage::getOutVariables() const {
|
||||
return this->m_evaluator->getOutVariables();
|
||||
}
|
||||
|
||||
|
||||
const std::vector<std::pair<LogConsole::Level, std::string>> &PatternLanguage::getConsoleLog() {
|
||||
return this->m_evaluator->getConsole().getLog();
|
||||
}
|
||||
|
||||
const std::optional<PatternLanguageError> &PatternLanguage::getError() {
|
||||
return this->m_currError;
|
||||
}
|
||||
|
||||
u32 PatternLanguage::getCreatedPatternCount() {
|
||||
return this->m_evaluator->getPatternCount();
|
||||
}
|
||||
|
||||
u32 PatternLanguage::getMaximumPatternCount() {
|
||||
return this->m_evaluator->getPatternLimit();
|
||||
}
|
||||
|
||||
|
||||
void PatternLanguage::allowDangerousFunctions(bool allow) {
|
||||
this->m_evaluator->allowDangerousFunctions(allow);
|
||||
}
|
||||
|
||||
bool PatternLanguage::hasDangerousFunctionBeenCalled() const {
|
||||
return this->m_evaluator->hasDangerousFunctionBeenCalled();
|
||||
}
|
||||
|
||||
void PatternLanguage::reset() {
|
||||
this->m_patterns.clear();
|
||||
|
||||
this->m_currAST.clear();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,276 +0,0 @@
|
||||
#include <hex/pattern_language/preprocessor.hpp>
|
||||
|
||||
#include <hex/helpers/fmt.hpp>
|
||||
#include <hex/helpers/fs.hpp>
|
||||
#include <hex/helpers/file.hpp>
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
namespace hex::pl {
|
||||
|
||||
std::optional<std::string> Preprocessor::preprocess(std::string code, bool initialRun) {
|
||||
u32 offset = 0;
|
||||
u32 lineNumber = 1;
|
||||
bool isInString = false;
|
||||
|
||||
if (initialRun) {
|
||||
this->m_defines.clear();
|
||||
this->m_pragmas.clear();
|
||||
}
|
||||
|
||||
std::string output;
|
||||
output.reserve(code.length());
|
||||
|
||||
try {
|
||||
while (offset < code.length()) {
|
||||
if (code.substr(offset, 2) == "//") {
|
||||
while (code[offset] != '\n' && offset < code.length())
|
||||
offset += 1;
|
||||
} else if (code.substr(offset, 2) == "/*") {
|
||||
while (code.substr(offset, 2) != "*/" && offset < code.length()) {
|
||||
if (code[offset] == '\n') {
|
||||
output += '\n';
|
||||
lineNumber++;
|
||||
}
|
||||
|
||||
offset += 1;
|
||||
}
|
||||
|
||||
offset += 2;
|
||||
if (offset >= code.length())
|
||||
throwPreprocessorError("unterminated comment", lineNumber - 1);
|
||||
} else {
|
||||
output += code[offset];
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
|
||||
offset = 0;
|
||||
code = output;
|
||||
output.clear();
|
||||
output.reserve(code.size());
|
||||
|
||||
bool startOfLine = true;
|
||||
while (offset < code.length()) {
|
||||
if (offset > 0 && code[offset - 1] != '\\' && code[offset] == '\"')
|
||||
isInString = !isInString;
|
||||
else if (isInString) {
|
||||
output += code[offset];
|
||||
offset += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (code[offset] == '#' && startOfLine) {
|
||||
offset += 1;
|
||||
|
||||
if (code.substr(offset, 7) == "include") {
|
||||
offset += 7;
|
||||
|
||||
while (std::isblank(code[offset]) || std::isspace(code[offset]))
|
||||
offset += 1;
|
||||
|
||||
if (code[offset] != '<' && code[offset] != '"')
|
||||
throwPreprocessorError("expected '<' or '\"' before file name", lineNumber);
|
||||
|
||||
char endChar = code[offset];
|
||||
if (endChar == '<') endChar = '>';
|
||||
|
||||
offset += 1;
|
||||
|
||||
std::string includeFile;
|
||||
while (code[offset] != endChar && code[offset] != '\n') {
|
||||
includeFile += code[offset];
|
||||
offset += 1;
|
||||
|
||||
if (offset >= code.length())
|
||||
throwPreprocessorError(hex::format("missing terminating '{0}' character", endChar), lineNumber);
|
||||
}
|
||||
offset += 1;
|
||||
|
||||
std::fs::path includePath = includeFile;
|
||||
|
||||
if (includeFile[0] != '/') {
|
||||
for (const auto &dir : fs::getDefaultPaths(fs::ImHexPath::PatternsInclude)) {
|
||||
std::fs::path tempPath = dir / includePath;
|
||||
if (fs::isRegularFile(tempPath)) {
|
||||
includePath = tempPath;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!fs::isRegularFile(includePath)) {
|
||||
if (includePath.parent_path().filename().string() == "std")
|
||||
throwPreprocessorError(hex::format("{0}: No such file.\n\nThis file might be part of the standard library.\nYou can install the standard library though\nthe Content Store found under Help -> Content Store.", includeFile.c_str()), lineNumber);
|
||||
else
|
||||
throwPreprocessorError(hex::format("{0}: No such file", includeFile.c_str()), lineNumber);
|
||||
}
|
||||
|
||||
fs::File file(includePath, fs::File::Mode::Read);
|
||||
if (!file.isValid()) {
|
||||
throwPreprocessorError(hex::format("{0}: Failed to open file", includeFile.c_str()), lineNumber);
|
||||
}
|
||||
|
||||
Preprocessor preprocessor;
|
||||
preprocessor.addDefaultPragmaHandlers();
|
||||
preprocessor.m_defines = this->m_defines;
|
||||
preprocessor.m_onceIncludedFiles = this->m_onceIncludedFiles;
|
||||
|
||||
auto preprocessedInclude = preprocessor.preprocess(file.readString(), /*initialRun =*/false);
|
||||
|
||||
if (!preprocessedInclude.has_value()) {
|
||||
auto error = *preprocessor.m_error;
|
||||
throw error;
|
||||
}
|
||||
|
||||
if (preprocessor.shouldOnlyIncludeOnce()) {
|
||||
auto [iter, added] = this->m_onceIncludedFiles.insert(includePath);
|
||||
if (added) {
|
||||
auto content = preprocessedInclude.value();
|
||||
|
||||
std::replace(content.begin(), content.end(), '\n', ' ');
|
||||
std::replace(content.begin(), content.end(), '\r', ' ');
|
||||
|
||||
output += content;
|
||||
}
|
||||
}
|
||||
|
||||
this->m_defines = preprocessor.m_defines;
|
||||
this->m_onceIncludedFiles = preprocessor.m_onceIncludedFiles;
|
||||
} else if (code.substr(offset, 6) == "define") {
|
||||
offset += 6;
|
||||
|
||||
while (std::isblank(code[offset])) {
|
||||
offset += 1;
|
||||
}
|
||||
|
||||
std::string defineName;
|
||||
while (!std::isblank(code[offset])) {
|
||||
defineName += code[offset];
|
||||
|
||||
if (offset >= code.length() || code[offset] == '\n' || code[offset] == '\r')
|
||||
throwPreprocessorError("no value given in #define directive", lineNumber);
|
||||
offset += 1;
|
||||
}
|
||||
|
||||
while (std::isblank(code[offset])) {
|
||||
offset += 1;
|
||||
if (offset >= code.length())
|
||||
throwPreprocessorError("no value given in #define directive", lineNumber);
|
||||
}
|
||||
|
||||
std::string replaceValue;
|
||||
while (code[offset] != '\n' && code[offset] != '\r') {
|
||||
if (offset >= code.length())
|
||||
throwPreprocessorError("missing new line after #define directive", lineNumber);
|
||||
|
||||
replaceValue += code[offset];
|
||||
offset += 1;
|
||||
}
|
||||
|
||||
if (replaceValue.empty())
|
||||
throwPreprocessorError("no value given in #define directive", lineNumber);
|
||||
|
||||
this->m_defines.emplace(defineName, replaceValue, lineNumber);
|
||||
} else if (code.substr(offset, 6) == "pragma") {
|
||||
offset += 6;
|
||||
|
||||
while (std::isblank(code[offset])) {
|
||||
offset += 1;
|
||||
|
||||
if (code[offset] == '\n' || code[offset] == '\r')
|
||||
throwPreprocessorError("no instruction given in #pragma directive", lineNumber);
|
||||
}
|
||||
|
||||
std::string pragmaKey;
|
||||
while (!std::isblank(code[offset]) && code[offset] != '\n' && code[offset] != '\r') {
|
||||
pragmaKey += code[offset];
|
||||
|
||||
if (offset >= code.length())
|
||||
throwPreprocessorError("no instruction given in #pragma directive", lineNumber);
|
||||
|
||||
offset += 1;
|
||||
}
|
||||
|
||||
while (std::isblank(code[offset]))
|
||||
offset += 1;
|
||||
|
||||
std::string pragmaValue;
|
||||
while (code[offset] != '\n' && code[offset] != '\r') {
|
||||
if (offset >= code.length())
|
||||
throwPreprocessorError("missing new line after #pragma directive", lineNumber);
|
||||
|
||||
pragmaValue += code[offset];
|
||||
offset += 1;
|
||||
}
|
||||
|
||||
this->m_pragmas.emplace(pragmaKey, pragmaValue, lineNumber);
|
||||
} else
|
||||
throwPreprocessorError("unknown preprocessor directive", lineNumber);
|
||||
}
|
||||
|
||||
if (code[offset] == '\n') {
|
||||
lineNumber++;
|
||||
startOfLine = true;
|
||||
} else if (!std::isspace(code[offset]))
|
||||
startOfLine = false;
|
||||
|
||||
output += code[offset];
|
||||
offset += 1;
|
||||
}
|
||||
|
||||
// Apply defines
|
||||
std::vector<std::tuple<std::string, std::string, u32>> sortedDefines;
|
||||
std::copy(this->m_defines.begin(), this->m_defines.end(), std::back_inserter(sortedDefines));
|
||||
std::sort(sortedDefines.begin(), sortedDefines.end(), [](const auto &left, const auto &right) {
|
||||
return std::get<0>(left).size() > std::get<0>(right).size();
|
||||
});
|
||||
|
||||
for (const auto &[define, value, defineLine] : sortedDefines) {
|
||||
size_t index = 0;
|
||||
while ((index = output.find(define, index)) != std::string::npos) {
|
||||
output.replace(index, define.length(), value);
|
||||
index += value.length();
|
||||
}
|
||||
}
|
||||
|
||||
// Handle pragmas
|
||||
for (const auto &[type, value, pragmaLine] : this->m_pragmas) {
|
||||
if (this->m_pragmaHandlers.contains(type)) {
|
||||
if (!this->m_pragmaHandlers[type](value))
|
||||
throwPreprocessorError(hex::format("invalid value provided to '{0}' #pragma directive", type.c_str()), pragmaLine);
|
||||
} else
|
||||
throwPreprocessorError(hex::format("no #pragma handler registered for type {0}", type.c_str()), pragmaLine);
|
||||
}
|
||||
} catch (PatternLanguageError &e) {
|
||||
this->m_error = e;
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
void Preprocessor::addPragmaHandler(const std::string &pragmaType, const std::function<bool(const std::string &)> &function) {
|
||||
this->m_pragmaHandlers[pragmaType] = function;
|
||||
}
|
||||
|
||||
void Preprocessor::removePragmaHandler(const std::string &pragmaType) {
|
||||
this->m_pragmaHandlers.erase(pragmaType);
|
||||
}
|
||||
|
||||
void Preprocessor::addDefaultPragmaHandlers() {
|
||||
this->addPragmaHandler("MIME", [](const std::string &value) {
|
||||
return !std::all_of(value.begin(), value.end(), isspace) && !value.ends_with('\n') && !value.ends_with('\r');
|
||||
});
|
||||
this->addPragmaHandler("endian", [](const std::string &value) {
|
||||
return value == "big" || value == "little" || value == "native";
|
||||
});
|
||||
this->addPragmaHandler("once", [this](const std::string &value) {
|
||||
this->m_onlyIncludeOnce = true;
|
||||
|
||||
return value.empty();
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,58 +0,0 @@
|
||||
#include <hex/pattern_language/validator.hpp>
|
||||
|
||||
#include <hex/pattern_language/ast/ast_node.hpp>
|
||||
#include <hex/pattern_language/ast/ast_node_variable_decl.hpp>
|
||||
#include <hex/pattern_language/ast/ast_node_type_decl.hpp>
|
||||
#include <hex/pattern_language/ast/ast_node_struct.hpp>
|
||||
#include <hex/pattern_language/ast/ast_node_union.hpp>
|
||||
#include <hex/pattern_language/ast/ast_node_enum.hpp>
|
||||
|
||||
#include <hex/helpers/fmt.hpp>
|
||||
|
||||
#include <unordered_set>
|
||||
#include <string>
|
||||
|
||||
namespace hex::pl {
|
||||
|
||||
bool Validator::validate(const std::vector<std::shared_ptr<ASTNode>> &ast) {
|
||||
std::unordered_set<std::string> identifiers;
|
||||
std::unordered_set<std::string> types;
|
||||
|
||||
try {
|
||||
|
||||
for (const auto &node : ast) {
|
||||
if (node == nullptr)
|
||||
throwValidatorError("nullptr in AST. This is a bug!", 1);
|
||||
|
||||
if (auto variableDeclNode = dynamic_cast<ASTNodeVariableDecl *>(node.get()); variableDeclNode != nullptr) {
|
||||
if (!identifiers.insert(variableDeclNode->getName().data()).second)
|
||||
throwValidatorError(hex::format("redefinition of identifier '{0}'", variableDeclNode->getName().data()), variableDeclNode->getLineNumber());
|
||||
|
||||
this->validate(hex::moveToVector<std::shared_ptr<ASTNode>>(variableDeclNode->getType()->clone()));
|
||||
} else if (auto typeDeclNode = dynamic_cast<ASTNodeTypeDecl *>(node.get()); typeDeclNode != nullptr) {
|
||||
if (!types.insert(typeDeclNode->getName().data()).second)
|
||||
throwValidatorError(hex::format("redefinition of type '{0}'", typeDeclNode->getName().data()), typeDeclNode->getLineNumber());
|
||||
|
||||
if (!typeDeclNode->isForwardDeclared())
|
||||
this->validate(hex::moveToVector<std::shared_ptr<ASTNode>>(typeDeclNode->getType()->clone()));
|
||||
} else if (auto structNode = dynamic_cast<ASTNodeStruct *>(node.get()); structNode != nullptr) {
|
||||
this->validate(structNode->getMembers());
|
||||
} else if (auto unionNode = dynamic_cast<ASTNodeUnion *>(node.get()); unionNode != nullptr) {
|
||||
this->validate(unionNode->getMembers());
|
||||
} else if (auto enumNode = dynamic_cast<ASTNodeEnum *>(node.get()); enumNode != nullptr) {
|
||||
std::unordered_set<std::string> enumIdentifiers;
|
||||
for (auto &[name, value] : enumNode->getEntries()) {
|
||||
if (!enumIdentifiers.insert(name).second)
|
||||
throwValidatorError(hex::format("redefinition of enum constant '{0}'", name.c_str()), value->getLineNumber());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} catch (PatternLanguageError &e) {
|
||||
this->m_error = e;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,8 @@
|
||||
#include <hex/providers/provider.hpp>
|
||||
|
||||
#include <hex.hpp>
|
||||
#include <hex/api/content_registry.hpp>
|
||||
#include <hex/api/event.hpp>
|
||||
#include <hex/pattern_language/patterns/pattern.hpp>
|
||||
#include <hex/pattern_language/pattern_language.hpp>
|
||||
|
||||
#include <hex/ui/view.hpp>
|
||||
|
||||
#include <cmath>
|
||||
@@ -13,11 +11,13 @@
|
||||
#include <optional>
|
||||
#include <string>
|
||||
|
||||
#include <pl/pattern_language.hpp>
|
||||
|
||||
namespace hex::prv {
|
||||
|
||||
Provider::Provider() {
|
||||
this->m_patches.emplace_back();
|
||||
this->m_patternLanguageRuntime = std::make_unique<pl::PatternLanguage>();
|
||||
this->m_patternLanguageRuntime = ContentRegistry::PatternLanguage::createDefaultRuntime(this);
|
||||
|
||||
if (this->hasLoadInterface())
|
||||
EventManager::post<RequestOpenPopup>(View::toWindowName("hex.builtin.view.provider_settings.load_popup"));
|
||||
@@ -44,7 +44,7 @@ namespace hex::prv {
|
||||
}
|
||||
|
||||
void Provider::resize(size_t newSize) {
|
||||
hex::unused(newSize);
|
||||
this->m_patternLanguageRuntime->setDataSize(newSize);
|
||||
}
|
||||
|
||||
void Provider::insert(u64 offset, size_t size) {
|
||||
@@ -129,6 +129,7 @@ namespace hex::prv {
|
||||
|
||||
void Provider::setBaseAddress(u64 address) {
|
||||
this->m_baseAddress = address;
|
||||
this->m_patternLanguageRuntime->setDataBaseAddress(address);
|
||||
}
|
||||
|
||||
u64 Provider::getBaseAddress() const {
|
||||
|
||||
Reference in New Issue
Block a user