mirror of
https://github.com/WerWolv/ImHex.git
synced 2026-03-29 00:10:02 -05:00
Pattern Language rewrite (#111)
* Initial parser rewrite effort Lexer and Token cleanup, Parser started over * Greatly improved parser syntax * Reimplemented using declarations and variable placement parsing * Added back unions and structs * Added enums as well as mathematical expressions (+, -, *, /, <<, >>, &, |, ^) * Code style improvement * Implemented arrays and fixed memory issues * Fixed more memory issues in parser, reimplemented validator, evaluator and patterns * Fixed builtin types, arrays and reimplemented strings * Improved error messages * Made character a distinct type, used for chars and strings * Implemented padding, fixed arrays * Added bitfields * Added rvalue parsing, no evaluating yet * Added .idea folder to gitignore * Fixed build on MacOS * Added custom implementation of integral concept if not available * Rebased onto master * Fixed array variable decl crash * Added rvalues and dot syntax * Lower case all pattern language error messages * Fixed typo in variable name * Fixed bug where preprocessor would not ignore commented out directives * Reimplemented pointers * Fixed rebase issues
This commit is contained in:
@@ -18,165 +18,188 @@ namespace hex::lang {
|
||||
std::string output;
|
||||
output.reserve(code.length());
|
||||
|
||||
while (offset < code.length()) {
|
||||
if (code[offset] == '#') {
|
||||
try {
|
||||
while (offset < code.length()) {
|
||||
if (code[offset] == '#') {
|
||||
offset += 1;
|
||||
|
||||
if (code.substr(offset, 7) == "include") {
|
||||
offset += 7;
|
||||
|
||||
while (std::isblank(code[offset]) || std::isspace(code[offset]))
|
||||
offset += 1;
|
||||
|
||||
if (code[offset] != '<' && code[offset] != '"')
|
||||
throwPreprocessorError("expected '<' or '\"' before file name", lineNumber);
|
||||
|
||||
char endChar = code[offset];
|
||||
if (endChar == '<') endChar = '>';
|
||||
|
||||
offset += 1;
|
||||
|
||||
std::string includeFile;
|
||||
while (code[offset] != endChar) {
|
||||
includeFile += code[offset];
|
||||
offset += 1;
|
||||
|
||||
if (offset >= code.length())
|
||||
throwPreprocessorError(hex::format("missing terminating '%c' character", endChar), lineNumber);
|
||||
}
|
||||
offset += 1;
|
||||
|
||||
if (includeFile[0] != '/')
|
||||
includeFile = "include/" + includeFile;
|
||||
|
||||
FILE *file = fopen(includeFile.c_str(), "r");
|
||||
if (file == nullptr)
|
||||
throwPreprocessorError(hex::format("%s: No such file or directory", includeFile.c_str()), lineNumber);
|
||||
|
||||
fseek(file, 0, SEEK_END);
|
||||
size_t size = ftell(file);
|
||||
char *buffer = new char[size + 1];
|
||||
rewind(file);
|
||||
|
||||
fread(buffer, size, 1, file);
|
||||
buffer[size] = 0x00;
|
||||
|
||||
auto preprocessedInclude = this->preprocess(buffer, false);
|
||||
if (!preprocessedInclude.has_value())
|
||||
throw this->m_error;
|
||||
|
||||
output += preprocessedInclude.value();
|
||||
|
||||
delete[] buffer;
|
||||
|
||||
fclose(file);
|
||||
} else if (code.substr(offset, 6) == "define") {
|
||||
offset += 6;
|
||||
|
||||
while (std::isblank(code[offset])) {
|
||||
offset += 1;
|
||||
}
|
||||
|
||||
std::string defineName;
|
||||
while (!std::isblank(code[offset])) {
|
||||
defineName += code[offset];
|
||||
|
||||
if (offset >= code.length() || code[offset] == '\n' || code[offset] == '\r')
|
||||
throwPreprocessorError("no value given in #define directive", lineNumber);
|
||||
offset += 1;
|
||||
}
|
||||
|
||||
while (std::isblank(code[offset])) {
|
||||
offset += 1;
|
||||
if (offset >= code.length())
|
||||
throwPreprocessorError("no value given in #define directive", lineNumber);
|
||||
}
|
||||
|
||||
std::string replaceValue;
|
||||
while (code[offset] != '\n' && code[offset] != '\r') {
|
||||
if (offset >= code.length())
|
||||
throwPreprocessorError("missing new line after #define directive", lineNumber);
|
||||
|
||||
replaceValue += code[offset];
|
||||
offset += 1;
|
||||
}
|
||||
|
||||
if (replaceValue.empty())
|
||||
throwPreprocessorError("no value given in #define directive", lineNumber);
|
||||
|
||||
this->m_defines.emplace(defineName, replaceValue);
|
||||
} else if (code.substr(offset, 6) == "pragma") {
|
||||
offset += 6;
|
||||
|
||||
while (std::isblank(code[offset]))
|
||||
offset += 1;
|
||||
|
||||
std::string pragmaKey;
|
||||
while (!std::isblank(code[offset])) {
|
||||
pragmaKey += code[offset];
|
||||
|
||||
if (offset >= code.length() || code[offset] == '\n' || code[offset] == '\r')
|
||||
throwPreprocessorError("no instruction given in #pragma directive", lineNumber);
|
||||
|
||||
offset += 1;
|
||||
}
|
||||
|
||||
while (std::isblank(code[offset]))
|
||||
offset += 1;
|
||||
|
||||
std::string pragmaValue;
|
||||
while (code[offset] != '\n' && code[offset] != '\r') {
|
||||
if (offset >= code.length())
|
||||
throwPreprocessorError("missing new line after #pragma directive", lineNumber);
|
||||
|
||||
pragmaValue += code[offset];
|
||||
offset += 1;
|
||||
}
|
||||
|
||||
if (pragmaValue.empty())
|
||||
throwPreprocessorError("missing value in #pragma directive", lineNumber);
|
||||
|
||||
this->m_pragmas.emplace(pragmaKey, pragmaValue);
|
||||
} else
|
||||
throwPreprocessorError("unknown preprocessor directive", lineNumber);
|
||||
} else if (code.substr(offset, 2) == "//") {
|
||||
while (code[offset] != '\n' && offset < code.length())
|
||||
offset += 1;
|
||||
} else if (code.substr(offset, 2) == "/*") {
|
||||
while (code.substr(offset, 2) != "*/" && offset < code.length()) {
|
||||
if (code[offset] == '\n')
|
||||
lineNumber++;
|
||||
|
||||
offset += 1;
|
||||
}
|
||||
|
||||
offset += 2;
|
||||
if (offset >= code.length())
|
||||
throwPreprocessorError("unterminated comment", lineNumber);
|
||||
}
|
||||
|
||||
if (code[offset] == '\n')
|
||||
lineNumber++;
|
||||
|
||||
output += code[offset];
|
||||
offset += 1;
|
||||
|
||||
if (code.substr(offset, 7) == "include") {
|
||||
offset += 7;
|
||||
|
||||
while (std::isblank(code[offset]) || std::isspace(code[offset]))
|
||||
offset += 1;
|
||||
|
||||
if (code[offset] != '<' && code[offset] != '"')
|
||||
return { };
|
||||
|
||||
char endChar = code[offset];
|
||||
if (endChar == '<') endChar = '>';
|
||||
|
||||
offset += 1;
|
||||
|
||||
std::string includeFile;
|
||||
while (code[offset] != endChar) {
|
||||
includeFile += code[offset];
|
||||
offset += 1;
|
||||
|
||||
if (offset >= code.length())
|
||||
return { };
|
||||
}
|
||||
offset += 1;
|
||||
|
||||
if (includeFile[0] != '/')
|
||||
includeFile = "include/" + includeFile;
|
||||
|
||||
FILE *file = fopen(includeFile.c_str(), "r");
|
||||
if (file == nullptr)
|
||||
return { };
|
||||
|
||||
fseek(file, 0, SEEK_END);
|
||||
size_t size = ftell(file);
|
||||
char *buffer = new char[size + 1];
|
||||
rewind(file);
|
||||
|
||||
fread(buffer, size, 1, file);
|
||||
buffer[size] = 0x00;
|
||||
|
||||
auto preprocessedInclude = this->preprocess(buffer, false);
|
||||
if (!preprocessedInclude.has_value())
|
||||
return { };
|
||||
|
||||
output += preprocessedInclude.value();
|
||||
|
||||
delete[] buffer;
|
||||
|
||||
fclose(file);
|
||||
} else if (code.substr(offset, 6) == "define") {
|
||||
offset += 6;
|
||||
|
||||
while (std::isblank(code[offset]))
|
||||
offset += 1;
|
||||
|
||||
std::string defineName;
|
||||
while (!std::isblank(code[offset])) {
|
||||
defineName += code[offset];
|
||||
|
||||
if (offset >= code.length() || code[offset] == '\n' || code[offset] == '\r')
|
||||
return { };
|
||||
offset += 1;
|
||||
}
|
||||
|
||||
while (std::isblank(code[offset]))
|
||||
offset += 1;
|
||||
|
||||
std::string replaceValue;
|
||||
while (code[offset] != '\n' && code[offset] != '\r') {
|
||||
if (offset >= code.length())
|
||||
return { };
|
||||
|
||||
replaceValue += code[offset];
|
||||
offset += 1;
|
||||
}
|
||||
|
||||
if (replaceValue.empty())
|
||||
return { };
|
||||
|
||||
this->m_defines.emplace(defineName, replaceValue);
|
||||
} else if (code.substr(offset, 6) == "pragma") {
|
||||
offset += 6;
|
||||
|
||||
while (std::isblank(code[offset]))
|
||||
offset += 1;
|
||||
|
||||
std::string pragmaKey;
|
||||
while (!std::isblank(code[offset])) {
|
||||
pragmaKey += code[offset];
|
||||
|
||||
if (offset >= code.length() || code[offset] == '\n' || code[offset] == '\r')
|
||||
return { };
|
||||
|
||||
offset += 1;
|
||||
}
|
||||
|
||||
while (std::isblank(code[offset]))
|
||||
offset += 1;
|
||||
|
||||
std::string pragmaValue;
|
||||
while (code[offset] != '\n' && code[offset] != '\r') {
|
||||
if (offset >= code.length())
|
||||
return { };
|
||||
|
||||
pragmaValue += code[offset];
|
||||
offset += 1;
|
||||
}
|
||||
|
||||
if (pragmaValue.empty())
|
||||
return { };
|
||||
|
||||
this->m_pragmas.emplace(pragmaKey, pragmaValue);
|
||||
} else
|
||||
return { };
|
||||
}
|
||||
|
||||
if (code[offset] == '\n')
|
||||
lineNumber++;
|
||||
if (initialRun) {
|
||||
// Apply defines
|
||||
for (const auto &[define, value] : this->m_defines) {
|
||||
s32 index = 0;
|
||||
while((index = output.find(define, index)) != std::string::npos) {
|
||||
output.replace(index, define.length(), value);
|
||||
index += value.length();
|
||||
}
|
||||
}
|
||||
|
||||
output += code[offset];
|
||||
offset += 1;
|
||||
}
|
||||
|
||||
if (initialRun) {
|
||||
// Apply defines
|
||||
for (const auto &[define, value] : this->m_defines) {
|
||||
s32 index = 0;
|
||||
while((index = output.find(define, index)) != std::string::npos) {
|
||||
output.replace(index, define.length(), value);
|
||||
index += value.length();
|
||||
// Handle pragmas
|
||||
for (const auto &[type, value] : this->m_pragmas) {
|
||||
if (this->m_pragmaHandlers.contains(type)) {
|
||||
if (!this->m_pragmaHandlers[type](value))
|
||||
throwPreprocessorError(hex::format("invalid value provided to '%s' #pragma directive", type.c_str()), lineNumber);
|
||||
} else
|
||||
throwPreprocessorError(hex::format("no #pragma handler registered for type %s", type.c_str()), lineNumber);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle pragmas
|
||||
for (const auto &[type, value] : this->m_pragmas) {
|
||||
if (this->m_pragmaHandlers.contains(type)) {
|
||||
if (!this->m_pragmaHandlers[type](value))
|
||||
return { };
|
||||
} else
|
||||
return { };
|
||||
}
|
||||
} catch (PreprocessorError &e) {
|
||||
this->m_error = e;
|
||||
return { };
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
void Preprocessor::addPragmaHandler(std::string pragmaType, std::function<bool(std::string)> function) {
|
||||
void Preprocessor::addPragmaHandler(const std::string &pragmaType, const std::function<bool(const std::string&)> &function) {
|
||||
if (!this->m_pragmaHandlers.contains(pragmaType))
|
||||
this->m_pragmaHandlers.emplace(pragmaType, function);
|
||||
}
|
||||
|
||||
void Preprocessor::addDefaultPragmaHandlers() {
|
||||
this->addPragmaHandler("MIME", [](std::string value) {
|
||||
this->addPragmaHandler("MIME", [](const std::string &value) {
|
||||
return !std::all_of(value.begin(), value.end(), isspace) && !value.ends_with('\n') && !value.ends_with('\r');
|
||||
});
|
||||
this->addPragmaHandler("endian", [](std::string value) {
|
||||
this->addPragmaHandler("endian", [](const std::string &value) {
|
||||
return value == "big" || value == "little" || value == "native";
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user