patterns: Use standalone pattern language library instead of built-in one

This commit is contained in:
WerWolv
2022-04-17 16:57:30 +02:00
parent f5fe49923b
commit 17383083fb
92 changed files with 416 additions and 8336 deletions

View File

@@ -220,7 +220,7 @@ namespace hex {
namespace ContentRegistry::PatternLanguage {
static std::string getFunctionName(const Namespace &ns, const std::string &name) {
static std::string getFunctionName(const pl::api::Namespace &ns, const std::string &name) {
std::string functionName;
for (auto &scope : ns)
functionName += scope + "::";
@@ -230,63 +230,67 @@ namespace hex {
return functionName;
}
void addFunction(const Namespace &ns, const std::string &name, ParameterCount parameterCount, const Callback &func) {
std::unique_ptr<pl::PatternLanguage> createDefaultRuntime(prv::Provider *provider) {
auto runtime = std::make_unique<pl::PatternLanguage>();
runtime->setDataSource([provider](u64 offset, u8 *buffer, size_t size) {
provider->read(offset, buffer, size);
}, 0, 0);
runtime->setIncludePaths(fs::getDefaultPaths(fs::ImHexPath::PatternsInclude));
for (const auto &func : getFunctions()) {
if (func.dangerous)
runtime->addDangerousFunction(func.ns, func.name, func.parameterCount, func.callback);
else
runtime->addFunction(func.ns, func.name, func.parameterCount, func.callback);
}
for (const auto &[name, callback] : getPragmas()) {
runtime->addPragma(name, callback);
}
return runtime;
}
void addPragma(const std::string &name, const pl::api::PragmaHandler &handler) {
log::info("Registered new pattern language pragma: {}", name);
getPragmas()[name] = handler;
}
void addFunction(const pl::api::Namespace &ns, const std::string &name, pl::api::FunctionParameterCount parameterCount, const pl::api::FunctionCallback &func) {
log::info("Registered new pattern language function: {}", getFunctionName(ns, name));
getFunctions()[getFunctionName(ns, name)] = Function { parameterCount, { }, func, false };
getFunctions().push_back({
ns, name,
parameterCount, func,
false
});
}
void addDangerousFunction(const Namespace &ns, const std::string &name, ParameterCount parameterCount, const Callback &func) {
void addDangerousFunction(const pl::api::Namespace &ns, const std::string &name, pl::api::FunctionParameterCount parameterCount, const pl::api::FunctionCallback &func) {
log::info("Registered new dangerous pattern language function: {}", getFunctionName(ns, name));
getFunctions()[getFunctionName(ns, name)] = Function { parameterCount, { }, func, true };
getFunctions().push_back({
ns, name,
parameterCount, func,
true
});
}
std::map<std::string, Function> &getFunctions() {
static std::map<std::string, Function> functions;
std::map<std::string, pl::api::PragmaHandler> &getPragmas() {
static std::map<std::string, pl::api::PragmaHandler> pragmas;
return pragmas;
}
std::vector<impl::FunctionDefinition> &getFunctions() {
static std::vector<impl::FunctionDefinition> functions;
return functions;
}
static std::vector<impl::ColorPalette> s_colorPalettes;
static u32 s_colorIndex;
static u32 s_selectedColorPalette;
std::vector<impl::ColorPalette> &getPalettes() {
return s_colorPalettes;
}
void addColorPalette(const std::string &unlocalizedName, const std::vector<u32> &colors) {
s_colorPalettes.push_back({ unlocalizedName,
colors });
}
void setSelectedPalette(u32 index) {
if (index < s_colorPalettes.size())
s_selectedColorPalette = index;
resetPalette();
}
u32 getNextColor() {
if (s_colorPalettes.empty())
return 0x00;
auto &currColors = s_colorPalettes[s_selectedColorPalette].colors;
u32 color = currColors[s_colorIndex];
s_colorIndex++;
s_colorIndex %= currColors.size();
return color;
}
void resetPalette() {
s_colorIndex = 0;
}
}

View File

@@ -1,254 +0,0 @@
#include <hex/pattern_language/evaluator.hpp>
#include <hex/pattern_language/patterns/pattern.hpp>
#include <hex/pattern_language/ast/ast_node.hpp>
#include <hex/pattern_language/ast/ast_node_type_decl.hpp>
#include <hex/pattern_language/ast/ast_node_variable_decl.hpp>
#include <hex/pattern_language/ast/ast_node_function_call.hpp>
#include <hex/pattern_language/ast/ast_node_function_definition.hpp>
#include <hex/pattern_language/patterns/pattern_unsigned.hpp>
#include <hex/pattern_language/patterns/pattern_signed.hpp>
#include <hex/pattern_language/patterns/pattern_float.hpp>
#include <hex/pattern_language/patterns/pattern_boolean.hpp>
#include <hex/pattern_language/patterns/pattern_character.hpp>
#include <hex/pattern_language/patterns/pattern_string.hpp>
#include <hex/pattern_language/patterns/pattern_enum.hpp>
#include <hex/helpers/logger.hpp>
namespace hex::pl {
void Evaluator::createParameterPack(const std::string &name, const std::vector<Token::Literal> &values) {
this->getScope(0).parameterPack = ParameterPack {
name,
values
};
}
void Evaluator::createVariable(const std::string &name, ASTNode *type, const std::optional<Token::Literal> &value, bool outVariable) {
auto &variables = *this->getScope(0).scope;
for (auto &variable : variables) {
if (variable->getVariableName() == name) {
LogConsole::abortEvaluation(hex::format("variable with name '{}' already exists", name));
}
}
auto startOffset = this->dataOffset();
std::unique_ptr<Pattern> pattern;
bool referenceType = false;
auto typePattern = type->createPatterns(this);
this->dataOffset() = startOffset;
if (typePattern.empty()) {
// Handle auto variables
if (!value.has_value())
LogConsole::abortEvaluation("cannot determine type of auto variable", type);
if (std::get_if<u128>(&value.value()) != nullptr)
pattern = std::unique_ptr<Pattern>(new PatternUnsigned(this, 0, sizeof(u128)));
else if (std::get_if<i128>(&value.value()) != nullptr)
pattern = std::unique_ptr<Pattern>(new PatternSigned(this, 0, sizeof(i128)));
else if (std::get_if<double>(&value.value()) != nullptr)
pattern = std::unique_ptr<Pattern>(new PatternFloat(this, 0, sizeof(double)));
else if (std::get_if<bool>(&value.value()) != nullptr)
pattern = std::unique_ptr<Pattern>(new PatternBoolean(this, 0));
else if (std::get_if<char>(&value.value()) != nullptr)
pattern = std::unique_ptr<Pattern>(new PatternCharacter(this, 0));
else if (std::get_if<std::string>(&value.value()) != nullptr)
pattern = std::unique_ptr<Pattern>(new PatternString(this, 0, 1));
else if (auto patternValue = std::get_if<Pattern *>(&value.value()); patternValue != nullptr) {
pattern = (*patternValue)->clone();
referenceType = true;
} else
LogConsole::abortEvaluation("cannot determine type of auto variable", type);
} else {
pattern = std::move(typePattern.front());
}
pattern->setVariableName(name);
if (!referenceType) {
pattern->setOffset(this->getStack().size());
pattern->setLocal(true);
this->getStack().emplace_back();
}
if (outVariable)
this->m_outVariables[name] = pattern->getOffset();
variables.push_back(std::move(pattern));
}
void Evaluator::setVariable(const std::string &name, const Token::Literal &value) {
std::unique_ptr<Pattern> pattern = nullptr;
{
auto &variables = *this->getScope(0).scope;
for (auto &variable : variables) {
if (variable->getVariableName() == name) {
pattern = variable->clone();
break;
}
}
}
if (pattern == nullptr) {
auto &variables = *this->getGlobalScope().scope;
for (auto &variable : variables) {
if (variable->getVariableName() == name) {
if (!variable->isLocal())
LogConsole::abortEvaluation(hex::format("cannot modify global variable '{}' which has been placed in memory", name));
pattern = variable->clone();
break;
}
}
}
if (pattern == nullptr)
LogConsole::abortEvaluation(hex::format("no variable with name '{}' found", name));
if (!pattern->isLocal()) return;
Token::Literal castedLiteral = std::visit(overloaded {
[&](double &value) -> Token::Literal {
if (dynamic_cast<PatternUnsigned *>(pattern.get()))
return u128(value) & bitmask(pattern->getSize() * 8);
else if (dynamic_cast<PatternSigned *>(pattern.get()))
return i128(value) & bitmask(pattern->getSize() * 8);
else if (dynamic_cast<PatternFloat *>(pattern.get()))
return pattern->getSize() == sizeof(float) ? double(float(value)) : value;
else
LogConsole::abortEvaluation(hex::format("cannot cast type 'double' to type '{}'", pattern->getTypeName()));
},
[&](const std::string &value) -> Token::Literal {
if (dynamic_cast<PatternString *>(pattern.get()))
return value;
else
LogConsole::abortEvaluation(hex::format("cannot cast type 'string' to type '{}'", pattern->getTypeName()));
},
[&](Pattern *value) -> Token::Literal {
if (value->getTypeName() == pattern->getTypeName())
return value;
else
LogConsole::abortEvaluation(hex::format("cannot cast type '{}' to type '{}'", value->getTypeName(), pattern->getTypeName()));
},
[&](auto &&value) -> Token::Literal {
if (dynamic_cast<PatternUnsigned *>(pattern.get()) || dynamic_cast<PatternEnum *>(pattern.get()))
return u128(value) & bitmask(pattern->getSize() * 8);
else if (dynamic_cast<PatternSigned *>(pattern.get()))
return i128(value) & bitmask(pattern->getSize() * 8);
else if (dynamic_cast<PatternCharacter *>(pattern.get()))
return char(value);
else if (dynamic_cast<PatternBoolean *>(pattern.get()))
return bool(value);
else if (dynamic_cast<PatternFloat *>(pattern.get()))
return pattern->getSize() == sizeof(float) ? double(float(value)) : value;
else
LogConsole::abortEvaluation(hex::format("cannot cast integer literal to type '{}'", pattern->getTypeName()));
} },
value);
this->getStack()[pattern->getOffset()] = castedLiteral;
}
std::optional<std::vector<std::shared_ptr<Pattern>>> Evaluator::evaluate(const std::vector<std::shared_ptr<ASTNode>> &ast) {
this->m_stack.clear();
this->m_customFunctions.clear();
this->m_scopes.clear();
this->m_mainResult.reset();
this->m_aborted = false;
if (this->m_allowDangerousFunctions == DangerousFunctionPermission::Deny)
this->m_allowDangerousFunctions = DangerousFunctionPermission::Ask;
this->m_dangerousFunctionCalled = false;
ON_SCOPE_EXIT {
this->m_envVariables.clear();
};
this->dataOffset() = 0x00;
this->m_currPatternCount = 0;
this->m_customFunctionDefinitions.clear();
std::vector<std::shared_ptr<Pattern>> patterns;
try {
this->setCurrentControlFlowStatement(ControlFlowStatement::None);
pushScope(nullptr, patterns);
ON_SCOPE_EXIT {
popScope();
};
for (auto &node : ast) {
if (dynamic_cast<ASTNodeTypeDecl *>(node.get())) {
; // Don't create patterns from type declarations
} else if (dynamic_cast<ASTNodeFunctionCall *>(node.get())) {
(void)node->evaluate(this);
} else if (dynamic_cast<ASTNodeFunctionDefinition *>(node.get())) {
this->m_customFunctionDefinitions.push_back(node->evaluate(this));
} else if (auto varDeclNode = dynamic_cast<ASTNodeVariableDecl *>(node.get())) {
for (auto &pattern : node->createPatterns(this)) {
if (varDeclNode->getPlacementOffset() == nullptr) {
auto type = varDeclNode->getType()->evaluate(this);
auto &name = pattern->getVariableName();
this->createVariable(name, type.get(), std::nullopt, varDeclNode->isOutVariable());
if (varDeclNode->isInVariable() && this->m_inVariables.contains(name))
this->setVariable(name, this->m_inVariables[name]);
} else {
patterns.push_back(std::move(pattern));
}
}
} else {
auto newPatterns = node->createPatterns(this);
std::move(newPatterns.begin(), newPatterns.end(), std::back_inserter(patterns));
}
}
if (this->m_customFunctions.contains("main")) {
auto mainFunction = this->m_customFunctions["main"];
if (mainFunction.parameterCount.max > 0)
LogConsole::abortEvaluation("main function may not accept any arguments");
this->m_mainResult = mainFunction.func(this, {});
}
} catch (PatternLanguageError &error) {
if (error.getLineNumber() != 0)
this->m_console.setHardError(error);
patterns.clear();
this->m_currPatternCount = 0;
return std::nullopt;
}
// Remove global local variables
std::erase_if(patterns, [](const std::shared_ptr<Pattern> &pattern) {
return pattern->isLocal();
});
return patterns;
}
void Evaluator::patternCreated() {
if (this->m_currPatternCount > this->m_patternLimit)
LogConsole::abortEvaluation(hex::format("exceeded maximum number of patterns: {}", this->m_patternLimit));
this->m_currPatternCount++;
}
void Evaluator::patternDestroyed() {
this->m_currPatternCount--;
}
}

View File

@@ -1,534 +0,0 @@
#include <hex/pattern_language/lexer.hpp>
#include <algorithm>
#include <charconv>
#include <functional>
#include <optional>
#include <vector>
namespace hex::pl {
#define TOKEN(type, value) Token::Type::type, Token::type::value, lineNumber
#define VALUE_TOKEN(type, value) Token::Type::type, value, lineNumber
std::string matchTillInvalid(const char *characters, const std::function<bool(char)> &predicate) {
std::string ret;
while (*characters != 0x00) {
ret += *characters;
characters++;
if (!predicate(*characters))
break;
}
return ret;
}
bool isIdentifierCharacter(char c) {
return std::isalnum(c) || c == '_';
}
size_t getIntegerLiteralLength(std::string_view string) {
auto count = string.find_first_not_of("0123456789ABCDEFabcdef'xXoOpP.uU");
if (count == std::string_view::npos)
return string.size();
else
return count;
}
std::optional<Token::Literal> lexIntegerLiteral(std::string_view string) {
bool hasFloatSuffix = string.ends_with('D') || string.ends_with('F') || string.ends_with('d') || string.ends_with('f');
bool isFloat = std::count(string.begin(), string.end(), '.') == 1 || (!string.starts_with("0x") && hasFloatSuffix);
if (isFloat) {
// Parse double
char suffix = 0x00;
if (hasFloatSuffix) {
suffix = string.back();
string = string.substr(0, string.length() - 1);
}
char *end = nullptr;
double value = std::strtod(string.begin(), &end);
if (end == string.end()) {
switch (suffix) {
case 'd':
case 'D':
return double(value);
case 'f':
case 'F':
return float(value);
default:
return value;
}
}
} else {
bool isUnsigned = false;
if (string.ends_with('U') || string.ends_with('u')) {
isUnsigned = true;
string = string.substr(0, string.length() - 1);
}
u8 prefixOffset = 0;
u8 base = 10;
if (string.starts_with("0x") || string.starts_with("0X")) {
// Parse hexadecimal
prefixOffset = 2;
base = 16;
} else if (string.starts_with("0o") || string.starts_with("0O")) {
// Parse octal
prefixOffset = 2;
base = 8;
} else if (string.starts_with("0b") || string.starts_with("0B")) {
// Parse binary
prefixOffset = 2;
base = 2;
} else {
// Parse decimal
prefixOffset = 0;
base = 10;
}
u128 value = 0x00;
for (char c : string.substr(prefixOffset)) {
value *= base;
value += [&] {
if (c >= '0' && c <= '9') return c - '0';
else if (c >= 'A' && c <= 'F') return 0xA + (c - 'A');
else if (c >= 'a' && c <= 'f') return 0xA + (c - 'a');
else return 0x00;
}();
}
if (isUnsigned)
return value;
else
return i128(value);
}
return std::nullopt;
}
std::optional<Token::Literal> lexIntegerLiteralWithSeparator(std::string_view string) {
if (string.starts_with('\'') || string.ends_with('\''))
return std::nullopt;
else if (string.find('\'') == std::string_view::npos)
return lexIntegerLiteral(string);
else {
auto preprocessedString = std::string(string);
preprocessedString.erase(std::remove(preprocessedString.begin(), preprocessedString.end(), '\''), preprocessedString.end());
return lexIntegerLiteral(preprocessedString);
}
}
std::optional<std::pair<char, size_t>> getCharacter(const std::string &string) {
if (string.length() < 1)
return std::nullopt;
// Escape sequences
if (string[0] == '\\') {
if (string.length() < 2)
return std::nullopt;
// Handle simple escape sequences
switch (string[1]) {
case 'a':
return {
{'\a', 2}
};
case 'b':
return {
{'\b', 2}
};
case 'f':
return {
{'\f', 2}
};
case 'n':
return {
{'\n', 2}
};
case 'r':
return {
{'\r', 2}
};
case 't':
return {
{'\t', 2}
};
case 'v':
return {
{'\v', 2}
};
case '\\':
return {
{'\\', 2}
};
case '\'':
return {
{'\'', 2}
};
case '\"':
return {
{'\"', 2}
};
}
// Hexadecimal number
if (string[1] == 'x') {
if (string.length() != 4)
return std::nullopt;
if (!isxdigit(string[2]) || !isxdigit(string[3]))
return std::nullopt;
return {
{std::strtoul(&string[2], nullptr, 16), 4}
};
}
// Octal number
if (string[1] == 'o') {
if (string.length() != 5)
return {};
if (string[2] < '0' || string[2] > '7' || string[3] < '0' || string[3] > '7' || string[4] < '0' || string[4] > '7')
return {};
return {
{std::strtoul(&string[2], nullptr, 8), 5}
};
}
return std::nullopt;
} else return {
{string[0], 1}
};
}
std::optional<std::pair<std::string, size_t>> getStringLiteral(const std::string &string) {
if (!string.starts_with('\"'))
return {};
size_t size = 1;
std::string result;
while (string[size] != '\"') {
auto character = getCharacter(string.substr(size));
if (!character.has_value())
return {};
auto &[c, charSize] = character.value();
result += c;
size += charSize;
if (size >= string.length())
return {};
}
return {
{result, size + 1}
};
}
std::optional<std::pair<char, size_t>> getCharacterLiteral(const std::string &string) {
if (string.empty())
return {};
if (string[0] != '\'')
return {};
auto character = getCharacter(string.substr(1));
if (!character.has_value())
return {};
auto &[c, charSize] = character.value();
if (string.length() >= charSize + 2 && string[charSize + 1] != '\'')
return {};
return {
{c, charSize + 2}
};
}
std::optional<std::vector<Token>> Lexer::lex(const std::string &code) {
std::vector<Token> tokens;
u32 offset = 0;
u32 lineNumber = 1;
try {
while (offset < code.length()) {
const char &c = code[offset];
if (c == 0x00)
break;
if (std::isblank(c) || std::isspace(c)) {
if (code[offset] == '\n') lineNumber++;
offset += 1;
} else if (c == ';') {
tokens.emplace_back(TOKEN(Separator, EndOfExpression));
offset += 1;
} else if (c == '(') {
tokens.emplace_back(TOKEN(Separator, RoundBracketOpen));
offset += 1;
} else if (c == ')') {
tokens.emplace_back(TOKEN(Separator, RoundBracketClose));
offset += 1;
} else if (c == '{') {
tokens.emplace_back(TOKEN(Separator, CurlyBracketOpen));
offset += 1;
} else if (c == '}') {
tokens.emplace_back(TOKEN(Separator, CurlyBracketClose));
offset += 1;
} else if (c == '[') {
tokens.emplace_back(TOKEN(Separator, SquareBracketOpen));
offset += 1;
} else if (c == ']') {
tokens.emplace_back(TOKEN(Separator, SquareBracketClose));
offset += 1;
} else if (c == ',') {
tokens.emplace_back(TOKEN(Separator, Comma));
offset += 1;
} else if (c == '.') {
tokens.emplace_back(TOKEN(Separator, Dot));
offset += 1;
} else if (code.substr(offset, 2) == "::") {
tokens.emplace_back(TOKEN(Operator, ScopeResolution));
offset += 2;
} else if (c == '@') {
tokens.emplace_back(TOKEN(Operator, AtDeclaration));
offset += 1;
} else if (code.substr(offset, 2) == "==") {
tokens.emplace_back(TOKEN(Operator, BoolEquals));
offset += 2;
} else if (code.substr(offset, 2) == "!=") {
tokens.emplace_back(TOKEN(Operator, BoolNotEquals));
offset += 2;
} else if (code.substr(offset, 2) == ">=") {
tokens.emplace_back(TOKEN(Operator, BoolGreaterThanOrEquals));
offset += 2;
} else if (code.substr(offset, 2) == "<=") {
tokens.emplace_back(TOKEN(Operator, BoolLessThanOrEquals));
offset += 2;
} else if (code.substr(offset, 2) == "&&") {
tokens.emplace_back(TOKEN(Operator, BoolAnd));
offset += 2;
} else if (code.substr(offset, 2) == "||") {
tokens.emplace_back(TOKEN(Operator, BoolOr));
offset += 2;
} else if (code.substr(offset, 2) == "^^") {
tokens.emplace_back(TOKEN(Operator, BoolXor));
offset += 2;
} else if (c == '=') {
tokens.emplace_back(TOKEN(Operator, Assignment));
offset += 1;
} else if (c == ':') {
tokens.emplace_back(TOKEN(Operator, Inherit));
offset += 1;
} else if (c == '+') {
tokens.emplace_back(TOKEN(Operator, Plus));
offset += 1;
} else if (c == '-') {
tokens.emplace_back(TOKEN(Operator, Minus));
offset += 1;
} else if (c == '*') {
tokens.emplace_back(TOKEN(Operator, Star));
offset += 1;
} else if (c == '/') {
tokens.emplace_back(TOKEN(Operator, Slash));
offset += 1;
} else if (c == '%') {
tokens.emplace_back(TOKEN(Operator, Percent));
offset += 1;
} else if (code.substr(offset, 2) == "<<") {
tokens.emplace_back(TOKEN(Operator, ShiftLeft));
offset += 2;
} else if (code.substr(offset, 2) == ">>") {
tokens.emplace_back(TOKEN(Operator, ShiftRight));
offset += 2;
} else if (c == '>') {
tokens.emplace_back(TOKEN(Operator, BoolGreaterThan));
offset += 1;
} else if (c == '<') {
tokens.emplace_back(TOKEN(Operator, BoolLessThan));
offset += 1;
} else if (c == '!') {
tokens.emplace_back(TOKEN(Operator, BoolNot));
offset += 1;
} else if (c == '|') {
tokens.emplace_back(TOKEN(Operator, BitOr));
offset += 1;
} else if (c == '&') {
tokens.emplace_back(TOKEN(Operator, BitAnd));
offset += 1;
} else if (c == '^') {
tokens.emplace_back(TOKEN(Operator, BitXor));
offset += 1;
} else if (c == '~') {
tokens.emplace_back(TOKEN(Operator, BitNot));
offset += 1;
} else if (c == '?') {
tokens.emplace_back(TOKEN(Operator, TernaryConditional));
offset += 1;
} else if (c == '$') {
tokens.emplace_back(TOKEN(Operator, Dollar));
offset += 1;
} else if (code.substr(offset, 9) == "addressof" && !isIdentifierCharacter(code[offset + 9])) {
tokens.emplace_back(TOKEN(Operator, AddressOf));
offset += 9;
} else if (code.substr(offset, 6) == "sizeof" && !isIdentifierCharacter(code[offset + 6])) {
tokens.emplace_back(TOKEN(Operator, SizeOf));
offset += 6;
} else if (c == '\'') {
auto lexedCharacter = getCharacterLiteral(code.substr(offset));
if (!lexedCharacter.has_value())
throwLexerError("invalid character literal", lineNumber);
auto [character, charSize] = lexedCharacter.value();
tokens.emplace_back(VALUE_TOKEN(Integer, Token::Literal(character)));
offset += charSize;
} else if (c == '\"') {
auto string = getStringLiteral(code.substr(offset));
if (!string.has_value())
throwLexerError("invalid string literal", lineNumber);
auto [s, stringSize] = string.value();
tokens.emplace_back(VALUE_TOKEN(String, Token::Literal(s)));
offset += stringSize;
} else if (isIdentifierCharacter(c) && !std::isdigit(c)) {
std::string identifier = matchTillInvalid(&code[offset], isIdentifierCharacter);
// Check for reserved keywords
if (identifier == "struct")
tokens.emplace_back(TOKEN(Keyword, Struct));
else if (identifier == "union")
tokens.emplace_back(TOKEN(Keyword, Union));
else if (identifier == "using")
tokens.emplace_back(TOKEN(Keyword, Using));
else if (identifier == "enum")
tokens.emplace_back(TOKEN(Keyword, Enum));
else if (identifier == "bitfield")
tokens.emplace_back(TOKEN(Keyword, Bitfield));
else if (identifier == "be")
tokens.emplace_back(TOKEN(Keyword, BigEndian));
else if (identifier == "le")
tokens.emplace_back(TOKEN(Keyword, LittleEndian));
else if (identifier == "if")
tokens.emplace_back(TOKEN(Keyword, If));
else if (identifier == "else")
tokens.emplace_back(TOKEN(Keyword, Else));
else if (identifier == "false")
tokens.emplace_back(VALUE_TOKEN(Integer, Token::Literal(false)));
else if (identifier == "true")
tokens.emplace_back(VALUE_TOKEN(Integer, Token::Literal(true)));
else if (identifier == "parent")
tokens.emplace_back(TOKEN(Keyword, Parent));
else if (identifier == "this")
tokens.emplace_back(TOKEN(Keyword, This));
else if (identifier == "while")
tokens.emplace_back(TOKEN(Keyword, While));
else if (identifier == "for")
tokens.emplace_back(TOKEN(Keyword, For));
else if (identifier == "fn")
tokens.emplace_back(TOKEN(Keyword, Function));
else if (identifier == "return")
tokens.emplace_back(TOKEN(Keyword, Return));
else if (identifier == "namespace")
tokens.emplace_back(TOKEN(Keyword, Namespace));
else if (identifier == "in")
tokens.emplace_back(TOKEN(Keyword, In));
else if (identifier == "out")
tokens.emplace_back(TOKEN(Keyword, Out));
else if (identifier == "break")
tokens.emplace_back(TOKEN(Keyword, Break));
else if (identifier == "continue")
tokens.emplace_back(TOKEN(Keyword, Continue));
// Check for built-in types
else if (identifier == "u8")
tokens.emplace_back(TOKEN(ValueType, Unsigned8Bit));
else if (identifier == "s8")
tokens.emplace_back(TOKEN(ValueType, Signed8Bit));
else if (identifier == "u16")
tokens.emplace_back(TOKEN(ValueType, Unsigned16Bit));
else if (identifier == "s16")
tokens.emplace_back(TOKEN(ValueType, Signed16Bit));
else if (identifier == "u32")
tokens.emplace_back(TOKEN(ValueType, Unsigned32Bit));
else if (identifier == "s32")
tokens.emplace_back(TOKEN(ValueType, Signed32Bit));
else if (identifier == "u64")
tokens.emplace_back(TOKEN(ValueType, Unsigned64Bit));
else if (identifier == "s64")
tokens.emplace_back(TOKEN(ValueType, Signed64Bit));
else if (identifier == "u128")
tokens.emplace_back(TOKEN(ValueType, Unsigned128Bit));
else if (identifier == "s128")
tokens.emplace_back(TOKEN(ValueType, Signed128Bit));
else if (identifier == "float")
tokens.emplace_back(TOKEN(ValueType, Float));
else if (identifier == "double")
tokens.emplace_back(TOKEN(ValueType, Double));
else if (identifier == "char")
tokens.emplace_back(TOKEN(ValueType, Character));
else if (identifier == "char16")
tokens.emplace_back(TOKEN(ValueType, Character16));
else if (identifier == "bool")
tokens.emplace_back(TOKEN(ValueType, Boolean));
else if (identifier == "str")
tokens.emplace_back(TOKEN(ValueType, String));
else if (identifier == "padding")
tokens.emplace_back(TOKEN(ValueType, Padding));
else if (identifier == "auto")
tokens.emplace_back(TOKEN(ValueType, Auto));
// If it's not a keyword and a builtin type, it has to be an identifier
else
tokens.emplace_back(VALUE_TOKEN(Identifier, Token::Identifier(identifier)));
offset += identifier.length();
} else if (std::isdigit(c)) {
auto integerLength = getIntegerLiteralLength(&code[offset]);
auto integer = lexIntegerLiteralWithSeparator(std::string_view(&code[offset], integerLength));
if (!integer.has_value())
throwLexerError("invalid integer literal", lineNumber);
tokens.emplace_back(VALUE_TOKEN(Integer, Token::Literal(integer.value())));
offset += integerLength;
} else
throwLexerError("unknown token", lineNumber);
}
tokens.emplace_back(TOKEN(Separator, EndOfProgram));
} catch (PatternLanguageError &e) {
this->m_error = e;
return std::nullopt;
}
return tokens;
}
}

View File

@@ -1,14 +0,0 @@
#include <hex/pattern_language/log_console.hpp>
#include <hex/pattern_language/ast/ast_node.hpp>
namespace hex::pl {
[[noreturn]] void LogConsole::abortEvaluation(const std::string &message, const ASTNode *node) {
if (node == nullptr)
throw PatternLanguageError(0, "Evaluator: " + message);
else
throw PatternLanguageError(node->getLineNumber(), "Evaluator: " + message);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,255 +0,0 @@
#include <hex/pattern_language/pattern_language.hpp>
#include <hex/helpers/file.hpp>
#include <hex/providers/provider.hpp>
#include <hex/helpers/logger.hpp>
#include <hex/pattern_language/preprocessor.hpp>
#include <hex/pattern_language/lexer.hpp>
#include <hex/pattern_language/parser.hpp>
#include <hex/pattern_language/validator.hpp>
#include <hex/pattern_language/evaluator.hpp>
namespace hex::pl {
class Pattern;
PatternLanguage::PatternLanguage() {
this->m_preprocessor = new Preprocessor();
this->m_lexer = new Lexer();
this->m_parser = new Parser();
this->m_validator = new Validator();
this->m_evaluator = new Evaluator();
this->m_preprocessor->addDefaultPragmaHandlers();
this->m_preprocessor->addPragmaHandler("endian", [this](std::string value) {
if (value == "big") {
this->m_evaluator->setDefaultEndian(std::endian::big);
return true;
} else if (value == "little") {
this->m_evaluator->setDefaultEndian(std::endian::little);
return true;
} else if (value == "native") {
this->m_evaluator->setDefaultEndian(std::endian::native);
return true;
} else
return false;
});
this->m_preprocessor->addPragmaHandler("eval_depth", [this](std::string value) {
auto limit = strtol(value.c_str(), nullptr, 0);
if (limit <= 0)
return false;
this->m_evaluator->setEvaluationDepth(limit);
return true;
});
this->m_preprocessor->addPragmaHandler("array_limit", [this](const std::string &value) {
auto limit = strtol(value.c_str(), nullptr, 0);
if (limit <= 0)
return false;
this->m_evaluator->setArrayLimit(limit);
return true;
});
this->m_preprocessor->addPragmaHandler("pattern_limit", [this](const std::string &value) {
auto limit = strtol(value.c_str(), nullptr, 0);
if (limit <= 0)
return false;
this->m_evaluator->setPatternLimit(limit);
return true;
});
this->m_preprocessor->addPragmaHandler("loop_limit", [this](const std::string &value) {
auto limit = strtol(value.c_str(), nullptr, 0);
if (limit <= 0)
return false;
this->m_evaluator->setLoopLimit(limit);
return true;
});
this->m_preprocessor->addPragmaHandler("base_address", [](const std::string &value) {
auto baseAddress = strtoull(value.c_str(), nullptr, 0);
ImHexApi::Provider::get()->setBaseAddress(baseAddress);
return true;
});
this->m_preprocessor->addPragmaHandler("bitfield_order", [this](const std::string &value) {
if (value == "left_to_right") {
this->m_evaluator->setBitfieldOrder(BitfieldOrder::LeftToRight);
return true;
} else if (value == "right_to_left") {
this->m_evaluator->setBitfieldOrder(BitfieldOrder::RightToLeft);
return true;
} else {
return false;
}
});
}
PatternLanguage::~PatternLanguage() {
delete this->m_preprocessor;
delete this->m_lexer;
delete this->m_parser;
delete this->m_validator;
}
std::optional<std::vector<std::shared_ptr<ASTNode>>> PatternLanguage::parseString(const std::string &code) {
auto preprocessedCode = this->m_preprocessor->preprocess(code);
if (!preprocessedCode.has_value()) {
this->m_currError = this->m_preprocessor->getError();
return std::nullopt;
}
auto tokens = this->m_lexer->lex(preprocessedCode.value());
if (!tokens.has_value()) {
this->m_currError = this->m_lexer->getError();
return std::nullopt;
}
auto ast = this->m_parser->parse(tokens.value());
if (!ast.has_value()) {
this->m_currError = this->m_parser->getError();
return std::nullopt;
}
if (!this->m_validator->validate(*ast)) {
this->m_currError = this->m_validator->getError();
return std::nullopt;
}
return ast;
}
bool PatternLanguage::executeString(prv::Provider *provider, const std::string &code, const std::map<std::string, Token::Literal> &envVars, const std::map<std::string, Token::Literal> &inVariables, bool checkResult) {
this->m_running = true;
ON_SCOPE_EXIT { this->m_running = false; };
ON_SCOPE_EXIT {
if (this->m_currError.has_value()) {
const auto &error = this->m_currError.value();
if (error.getLineNumber() > 0)
this->m_evaluator->getConsole().log(LogConsole::Level::Error, hex::format("{}: {}", error.getLineNumber(), error.what()));
else
this->m_evaluator->getConsole().log(LogConsole::Level::Error, error.what());
}
};
this->m_currError.reset();
this->m_evaluator->getConsole().clear();
this->m_evaluator->setProvider(provider);
this->m_evaluator->setDefaultEndian(std::endian::native);
this->m_evaluator->setEvaluationDepth(32);
this->m_evaluator->setArrayLimit(0x1000);
this->m_evaluator->setPatternLimit(0x2000);
this->m_evaluator->setLoopLimit(0x1000);
this->m_evaluator->setInVariables(inVariables);
for (const auto &[name, value] : envVars)
this->m_evaluator->setEnvVariable(name, value);
this->m_currAST.clear();
{
auto ast = this->parseString(code);
if (!ast)
return false;
this->m_currAST = std::move(ast.value());
}
auto patterns = this->m_evaluator->evaluate(this->m_currAST);
if (!patterns.has_value()) {
this->m_currError = this->m_evaluator->getConsole().getLastHardError();
return false;
}
if (auto mainResult = this->m_evaluator->getMainResult(); checkResult && mainResult.has_value()) {
auto returnCode = Token::literalToSigned(*mainResult);
if (returnCode != 0) {
this->m_currError = PatternLanguageError(0, hex::format("non-success value returned from main: {}", returnCode));
return false;
}
}
this->m_patterns = std::move(patterns.value());
return true;
}
bool PatternLanguage::executeFile(prv::Provider *provider, const std::fs::path &path, const std::map<std::string, Token::Literal> &envVars, const std::map<std::string, Token::Literal> &inVariables) {
fs::File file(path, fs::File::Mode::Read);
return this->executeString(provider, file.readString(), envVars, inVariables, true);
}
std::pair<bool, std::optional<Token::Literal>> PatternLanguage::executeFunction(prv::Provider *provider, const std::string &code) {
auto functionContent = hex::format("fn main() {{ {0} }};", code);
auto success = this->executeString(provider, functionContent, {}, {}, false);
auto result = this->m_evaluator->getMainResult();
return { success, std::move(result) };
}
void PatternLanguage::abort() {
this->m_evaluator->abort();
}
const std::vector<std::shared_ptr<ASTNode>> &PatternLanguage::getCurrentAST() const {
return this->m_currAST;
}
[[nodiscard]] std::map<std::string, Token::Literal> PatternLanguage::getOutVariables() const {
return this->m_evaluator->getOutVariables();
}
const std::vector<std::pair<LogConsole::Level, std::string>> &PatternLanguage::getConsoleLog() {
return this->m_evaluator->getConsole().getLog();
}
const std::optional<PatternLanguageError> &PatternLanguage::getError() {
return this->m_currError;
}
u32 PatternLanguage::getCreatedPatternCount() {
return this->m_evaluator->getPatternCount();
}
u32 PatternLanguage::getMaximumPatternCount() {
return this->m_evaluator->getPatternLimit();
}
void PatternLanguage::allowDangerousFunctions(bool allow) {
this->m_evaluator->allowDangerousFunctions(allow);
}
bool PatternLanguage::hasDangerousFunctionBeenCalled() const {
return this->m_evaluator->hasDangerousFunctionBeenCalled();
}
void PatternLanguage::reset() {
this->m_patterns.clear();
this->m_currAST.clear();
}
}

View File

@@ -1,276 +0,0 @@
#include <hex/pattern_language/preprocessor.hpp>
#include <hex/helpers/fmt.hpp>
#include <hex/helpers/fs.hpp>
#include <hex/helpers/file.hpp>
#include <filesystem>
namespace hex::pl {
std::optional<std::string> Preprocessor::preprocess(std::string code, bool initialRun) {
u32 offset = 0;
u32 lineNumber = 1;
bool isInString = false;
if (initialRun) {
this->m_defines.clear();
this->m_pragmas.clear();
}
std::string output;
output.reserve(code.length());
try {
while (offset < code.length()) {
if (code.substr(offset, 2) == "//") {
while (code[offset] != '\n' && offset < code.length())
offset += 1;
} else if (code.substr(offset, 2) == "/*") {
while (code.substr(offset, 2) != "*/" && offset < code.length()) {
if (code[offset] == '\n') {
output += '\n';
lineNumber++;
}
offset += 1;
}
offset += 2;
if (offset >= code.length())
throwPreprocessorError("unterminated comment", lineNumber - 1);
} else {
output += code[offset];
offset++;
}
}
offset = 0;
code = output;
output.clear();
output.reserve(code.size());
bool startOfLine = true;
while (offset < code.length()) {
if (offset > 0 && code[offset - 1] != '\\' && code[offset] == '\"')
isInString = !isInString;
else if (isInString) {
output += code[offset];
offset += 1;
continue;
}
if (code[offset] == '#' && startOfLine) {
offset += 1;
if (code.substr(offset, 7) == "include") {
offset += 7;
while (std::isblank(code[offset]) || std::isspace(code[offset]))
offset += 1;
if (code[offset] != '<' && code[offset] != '"')
throwPreprocessorError("expected '<' or '\"' before file name", lineNumber);
char endChar = code[offset];
if (endChar == '<') endChar = '>';
offset += 1;
std::string includeFile;
while (code[offset] != endChar && code[offset] != '\n') {
includeFile += code[offset];
offset += 1;
if (offset >= code.length())
throwPreprocessorError(hex::format("missing terminating '{0}' character", endChar), lineNumber);
}
offset += 1;
std::fs::path includePath = includeFile;
if (includeFile[0] != '/') {
for (const auto &dir : fs::getDefaultPaths(fs::ImHexPath::PatternsInclude)) {
std::fs::path tempPath = dir / includePath;
if (fs::isRegularFile(tempPath)) {
includePath = tempPath;
break;
}
}
}
if (!fs::isRegularFile(includePath)) {
if (includePath.parent_path().filename().string() == "std")
throwPreprocessorError(hex::format("{0}: No such file.\n\nThis file might be part of the standard library.\nYou can install the standard library though\nthe Content Store found under Help -> Content Store.", includeFile.c_str()), lineNumber);
else
throwPreprocessorError(hex::format("{0}: No such file", includeFile.c_str()), lineNumber);
}
fs::File file(includePath, fs::File::Mode::Read);
if (!file.isValid()) {
throwPreprocessorError(hex::format("{0}: Failed to open file", includeFile.c_str()), lineNumber);
}
Preprocessor preprocessor;
preprocessor.addDefaultPragmaHandlers();
preprocessor.m_defines = this->m_defines;
preprocessor.m_onceIncludedFiles = this->m_onceIncludedFiles;
auto preprocessedInclude = preprocessor.preprocess(file.readString(), /*initialRun =*/false);
if (!preprocessedInclude.has_value()) {
auto error = *preprocessor.m_error;
throw error;
}
if (preprocessor.shouldOnlyIncludeOnce()) {
auto [iter, added] = this->m_onceIncludedFiles.insert(includePath);
if (added) {
auto content = preprocessedInclude.value();
std::replace(content.begin(), content.end(), '\n', ' ');
std::replace(content.begin(), content.end(), '\r', ' ');
output += content;
}
}
this->m_defines = preprocessor.m_defines;
this->m_onceIncludedFiles = preprocessor.m_onceIncludedFiles;
} else if (code.substr(offset, 6) == "define") {
offset += 6;
while (std::isblank(code[offset])) {
offset += 1;
}
std::string defineName;
while (!std::isblank(code[offset])) {
defineName += code[offset];
if (offset >= code.length() || code[offset] == '\n' || code[offset] == '\r')
throwPreprocessorError("no value given in #define directive", lineNumber);
offset += 1;
}
while (std::isblank(code[offset])) {
offset += 1;
if (offset >= code.length())
throwPreprocessorError("no value given in #define directive", lineNumber);
}
std::string replaceValue;
while (code[offset] != '\n' && code[offset] != '\r') {
if (offset >= code.length())
throwPreprocessorError("missing new line after #define directive", lineNumber);
replaceValue += code[offset];
offset += 1;
}
if (replaceValue.empty())
throwPreprocessorError("no value given in #define directive", lineNumber);
this->m_defines.emplace(defineName, replaceValue, lineNumber);
} else if (code.substr(offset, 6) == "pragma") {
offset += 6;
while (std::isblank(code[offset])) {
offset += 1;
if (code[offset] == '\n' || code[offset] == '\r')
throwPreprocessorError("no instruction given in #pragma directive", lineNumber);
}
std::string pragmaKey;
while (!std::isblank(code[offset]) && code[offset] != '\n' && code[offset] != '\r') {
pragmaKey += code[offset];
if (offset >= code.length())
throwPreprocessorError("no instruction given in #pragma directive", lineNumber);
offset += 1;
}
while (std::isblank(code[offset]))
offset += 1;
std::string pragmaValue;
while (code[offset] != '\n' && code[offset] != '\r') {
if (offset >= code.length())
throwPreprocessorError("missing new line after #pragma directive", lineNumber);
pragmaValue += code[offset];
offset += 1;
}
this->m_pragmas.emplace(pragmaKey, pragmaValue, lineNumber);
} else
throwPreprocessorError("unknown preprocessor directive", lineNumber);
}
if (code[offset] == '\n') {
lineNumber++;
startOfLine = true;
} else if (!std::isspace(code[offset]))
startOfLine = false;
output += code[offset];
offset += 1;
}
// Apply defines
std::vector<std::tuple<std::string, std::string, u32>> sortedDefines;
std::copy(this->m_defines.begin(), this->m_defines.end(), std::back_inserter(sortedDefines));
std::sort(sortedDefines.begin(), sortedDefines.end(), [](const auto &left, const auto &right) {
return std::get<0>(left).size() > std::get<0>(right).size();
});
for (const auto &[define, value, defineLine] : sortedDefines) {
size_t index = 0;
while ((index = output.find(define, index)) != std::string::npos) {
output.replace(index, define.length(), value);
index += value.length();
}
}
// Handle pragmas
for (const auto &[type, value, pragmaLine] : this->m_pragmas) {
if (this->m_pragmaHandlers.contains(type)) {
if (!this->m_pragmaHandlers[type](value))
throwPreprocessorError(hex::format("invalid value provided to '{0}' #pragma directive", type.c_str()), pragmaLine);
} else
throwPreprocessorError(hex::format("no #pragma handler registered for type {0}", type.c_str()), pragmaLine);
}
} catch (PatternLanguageError &e) {
this->m_error = e;
return std::nullopt;
}
return output;
}
void Preprocessor::addPragmaHandler(const std::string &pragmaType, const std::function<bool(const std::string &)> &function) {
this->m_pragmaHandlers[pragmaType] = function;
}
void Preprocessor::removePragmaHandler(const std::string &pragmaType) {
this->m_pragmaHandlers.erase(pragmaType);
}
void Preprocessor::addDefaultPragmaHandlers() {
this->addPragmaHandler("MIME", [](const std::string &value) {
return !std::all_of(value.begin(), value.end(), isspace) && !value.ends_with('\n') && !value.ends_with('\r');
});
this->addPragmaHandler("endian", [](const std::string &value) {
return value == "big" || value == "little" || value == "native";
});
this->addPragmaHandler("once", [this](const std::string &value) {
this->m_onlyIncludeOnce = true;
return value.empty();
});
}
}

View File

@@ -1,58 +0,0 @@
#include <hex/pattern_language/validator.hpp>
#include <hex/pattern_language/ast/ast_node.hpp>
#include <hex/pattern_language/ast/ast_node_variable_decl.hpp>
#include <hex/pattern_language/ast/ast_node_type_decl.hpp>
#include <hex/pattern_language/ast/ast_node_struct.hpp>
#include <hex/pattern_language/ast/ast_node_union.hpp>
#include <hex/pattern_language/ast/ast_node_enum.hpp>
#include <hex/helpers/fmt.hpp>
#include <unordered_set>
#include <string>
namespace hex::pl {
bool Validator::validate(const std::vector<std::shared_ptr<ASTNode>> &ast) {
std::unordered_set<std::string> identifiers;
std::unordered_set<std::string> types;
try {
for (const auto &node : ast) {
if (node == nullptr)
throwValidatorError("nullptr in AST. This is a bug!", 1);
if (auto variableDeclNode = dynamic_cast<ASTNodeVariableDecl *>(node.get()); variableDeclNode != nullptr) {
if (!identifiers.insert(variableDeclNode->getName().data()).second)
throwValidatorError(hex::format("redefinition of identifier '{0}'", variableDeclNode->getName().data()), variableDeclNode->getLineNumber());
this->validate(hex::moveToVector<std::shared_ptr<ASTNode>>(variableDeclNode->getType()->clone()));
} else if (auto typeDeclNode = dynamic_cast<ASTNodeTypeDecl *>(node.get()); typeDeclNode != nullptr) {
if (!types.insert(typeDeclNode->getName().data()).second)
throwValidatorError(hex::format("redefinition of type '{0}'", typeDeclNode->getName().data()), typeDeclNode->getLineNumber());
if (!typeDeclNode->isForwardDeclared())
this->validate(hex::moveToVector<std::shared_ptr<ASTNode>>(typeDeclNode->getType()->clone()));
} else if (auto structNode = dynamic_cast<ASTNodeStruct *>(node.get()); structNode != nullptr) {
this->validate(structNode->getMembers());
} else if (auto unionNode = dynamic_cast<ASTNodeUnion *>(node.get()); unionNode != nullptr) {
this->validate(unionNode->getMembers());
} else if (auto enumNode = dynamic_cast<ASTNodeEnum *>(node.get()); enumNode != nullptr) {
std::unordered_set<std::string> enumIdentifiers;
for (auto &[name, value] : enumNode->getEntries()) {
if (!enumIdentifiers.insert(name).second)
throwValidatorError(hex::format("redefinition of enum constant '{0}'", name.c_str()), value->getLineNumber());
}
}
}
} catch (PatternLanguageError &e) {
this->m_error = e;
return false;
}
return true;
}
}

View File

@@ -1,10 +1,8 @@
#include <hex/providers/provider.hpp>
#include <hex.hpp>
#include <hex/api/content_registry.hpp>
#include <hex/api/event.hpp>
#include <hex/pattern_language/patterns/pattern.hpp>
#include <hex/pattern_language/pattern_language.hpp>
#include <hex/ui/view.hpp>
#include <cmath>
@@ -13,11 +11,13 @@
#include <optional>
#include <string>
#include <pl/pattern_language.hpp>
namespace hex::prv {
Provider::Provider() {
this->m_patches.emplace_back();
this->m_patternLanguageRuntime = std::make_unique<pl::PatternLanguage>();
this->m_patternLanguageRuntime = ContentRegistry::PatternLanguage::createDefaultRuntime(this);
if (this->hasLoadInterface())
EventManager::post<RequestOpenPopup>(View::toWindowName("hex.builtin.view.provider_settings.load_popup"));
@@ -44,7 +44,7 @@ namespace hex::prv {
}
void Provider::resize(size_t newSize) {
hex::unused(newSize);
this->m_patternLanguageRuntime->setDataSize(newSize);
}
void Provider::insert(u64 offset, size_t size) {
@@ -129,6 +129,7 @@ namespace hex::prv {
void Provider::setBaseAddress(u64 address) {
this->m_baseAddress = address;
this->m_patternLanguageRuntime->setDataBaseAddress(address);
}
u64 Provider::getBaseAddress() const {