Files
imhex/plugins/builtin/include/content/text_highlighting/pattern_language.hpp
2026-03-20 10:46:34 -07:00

341 lines
17 KiB
C++

#pragma once
#include <pl/core/token.hpp>
#include <pl/core/preprocessor.hpp>
#include <pl/helpers/safe_iterator.hpp>
#include <ui/text_editor.hpp>
#include <hex/helpers/types.hpp>
#include <utility>
namespace hex::plugin::builtin {
class ViewPatternEditor;
class TextHighlighter {
public:
/// Intervals are the sets finite contiguous non-negative integer that
/// are described by their endpoints. The sets must have the following
/// properties:
/// 1. Any two elements of the set can either have an empty intersection or
/// 2. their intersection is equal to one of the two sets (i.e. one is
/// a subset of the other).
/// An interval is defined to be smaller than another if:
/// 1. The interval lies entirely to the left of the other interval or
/// 2. The interval is a proper subset of the other interval.
/// Two intervals are equal if they have identical start and end values.
/// This ordering is used for things like code blocks or the token
/// ranges that are defined by the blocks.
class TokenInterval {
public:
friend class TextHighlighter;
TokenInterval() : m_start(0), m_end(0) {}
TokenInterval(i32 start, i32 end) : m_start(start), m_end(end) {
if (m_start > m_end)
std::swap(m_start,m_end);
}
bool operator<(const TokenInterval &other) const {
return other.m_end > m_end;
}
bool operator>(const TokenInterval &other) const {
return m_end > other.m_end;
}
bool operator==(const TokenInterval &other) const {
return m_start == other.m_start && m_end == other.m_end;
}
bool operator!=(const TokenInterval &other) const {
return m_start != other.m_start || m_end != other.m_end;
}
bool operator<=(const TokenInterval &other) const {
return other.m_end >= m_end;
}
bool operator>=(const TokenInterval &other) const {
return m_end >= other.m_end;
}
[[nodiscard]] bool contains(const TokenInterval &other) const {
return other.m_start >= m_start && other.m_end <= m_end;
}
[[nodiscard]] bool contains(i32 value) const {
return value >= m_start && value <= m_end;
}
[[nodiscard]] bool contiguous(const TokenInterval &other) const {
auto highEndDiff = m_start - other.m_end;
auto lowEndDiff = other.m_start - m_end;
return highEndDiff == 0 || highEndDiff == 1 || lowEndDiff == 0 || lowEndDiff == 1;
}
private:
i32 m_start;
i32 m_end;
};
using Coordinates = ui::TextEditor::Coordinates;
using TokenInterval = TextHighlighter::TokenInterval;
using Token = pl::core::Token;
using ASTNode = pl::core::ast::ASTNode;
using CompileError = pl::core::err::CompileError;
using Identifier = Token::Identifier;
using IdentifierType = Identifier::IdentifierType;
using UnorderedBlocks = std::map<std::string,TokenInterval>;
using OrderedBlocks = std::map<TokenInterval,std::string>;
using Scopes = std::set<TokenInterval>;
using Location = pl::core::Location;
using StringVector = std::vector<std::string>;
using StringSet = std::set<std::string>;
using SafeTokenIterator = pl::hlp::SafeIterator<std::vector<Token>::const_iterator>;
using VariableScopes = std::map<std::string,Scopes>;
using Inheritances = std::map<std::string,StringSet>;
using IdentifierTypeColor = std::map<Identifier::IdentifierType,ui::TextEditor::PaletteIndex>;
using TokenTypeColor = std::map<Token::Type,ui::TextEditor::PaletteIndex>;
using TokenColor = std::map<i32, ui::TextEditor::PaletteIndex>;
using Types = std::map<std::string, pl::hlp::safe_shared_ptr<pl::core::ast::ASTNodeTypeDecl>>;
using ParsedImports = std::map<std::string,std::vector<Token>>;
using Str2StrMap = std::map<std::string,std::string>;
using CompileErrors = std::vector<CompileError>;
using TokenSequence = std::vector<Token>;
using TokenIdVector = std::vector<i32>;
using TokenIdSet = std::set<i32>;
using Instances = std::map<std::string,std::vector<i32>>;
using CodeFoldBlocks = ui::TextEditor::CodeFoldBlocks;
struct ParentDefinition;
struct Definition {
Definition()= default;
Definition(IdentifierType identifierType, std::string typeStr,i32 tokenId, Location location) : idType(identifierType), typeStr(std::move(typeStr)), tokenIndex(tokenId),location(location) {}
IdentifierType idType{};
std::string typeStr;
i32 tokenIndex{};
Location location{};
};
struct ParentDefinition {
ParentDefinition() = default;
ParentDefinition(IdentifierType identifierType, i32 tokenId, Location location) : idType(identifierType), tokenIndex(tokenId), location(location) {}
IdentifierType idType;
i32 tokenIndex;
Location location;
};
struct RequiredInputs {
friend class TextHighlighter;
private:
TextHighlighter *m_textHighlighter;
Types definedTypes;
StringVector usedNamespaces;
ParsedImports parsedImports;
Str2StrMap importedHeaders;
TokenSequence fullTokens;
std::string editedText;
CompileErrors compileErrors;
StringVector linesOfColors;
public:
RequiredInputs() : m_textHighlighter(nullptr) {};
explicit RequiredInputs(TextHighlighter *textHighlighter) : m_textHighlighter(textHighlighter) {}
void setRequiredInputs();
void setTypes();
void setNamespaces();
void setImports();
void setText();
void setCompileErrors();
};
/// to define functions and types
using Definitions = std::map<std::string,ParentDefinition>;
/// to define global variables
using Variables = std::map<std::string,std::vector<Definition>>;
/// to define UDT and function variables
using VariableMap = std::map<std::string,Variables>;
inline static const Coordinates Invalid = Coordinates(0x80000000, 0x80000000);
private:
StringVector m_lines;
TokenIdVector m_firstTokenIdOfLine;
ViewPatternEditor *m_viewPatternEditor{};
TokenColor m_tokenColors;
Instances m_instances;
Definitions m_UDTDefinitions;
Definitions m_functionDefinitions;
OrderedBlocks m_namespaceTokenRange;
UnorderedBlocks m_UDTTokenRange;
UnorderedBlocks m_functionTokenRange;
Scopes m_globalTokenRange;
VariableMap m_UDTVariables;
VariableMap m_ImportedUDTVariables;
VariableMap m_functionVariables;
Variables m_globalVariables;
Str2StrMap m_attributeFunctionArgumentType;
Str2StrMap m_typeDefMap;
Str2StrMap m_typeDefInvMap;
StringVector m_UDTs;
TokenIdSet m_taggedIdentifiers;
TokenIdSet m_memberChains;
TokenIdSet m_scopeChains;
TokenIdSet m_identifierTokenIds;
RequiredInputs m_requiredInputs;
SafeTokenIterator m_curr;
SafeTokenIterator m_startToken, m_originalPosition, m_partOriginalPosition;
VariableScopes m_UDTBlocks;
VariableScopes m_functionBlocks;
Scopes m_globalBlocks;
CodeFoldBlocks m_foldEndPoints;
Inheritances m_inheritances;
const static IdentifierTypeColor m_identifierTypeColor;
const static TokenTypeColor m_tokenTypeColor;
public:
constexpr static u32 Normal = 0;
constexpr static u32 Not = 1;
pl::PatternLanguage *getPatternLanguage();
void updateRequiredInputs();
RequiredInputs& getRequiredInputs();
ViewPatternEditor* getViewPatternEditor();
void setViewPatternEditor(ViewPatternEditor *viewPatternEditor);
void setTokenIds();
TextHighlighter();
~TextHighlighter();
explicit TextHighlighter(ViewPatternEditor *viewPatternEditor) : m_viewPatternEditor(viewPatternEditor) {}
/**
* @brief Entry point to syntax highlighting
*/
void highlightSourceCode();
void processSource();
void clearVariables();
/**
* @brief Syntax highlighting from parser
*/
void setInitialColors();
/**
* @brief Create data to pass to text editor
*/
void setRequestedIdentifierColors();
/**
* @brief Set the color of a token
*/
void setColor(i32 tokenId=-1, const IdentifierType &type = IdentifierType::Unknown);
void setIdentifierColor(i32 tokenId=-1, const IdentifierType &type = IdentifierType::Unknown);
/**
* @brief Only identifiers not in chains should remain
*/
void colorRemainingIdentifierTokens();
/**
* @brief Renders compile errors in real time
*/
void renderErrors();
/// A token range is the set of token indices of a definition. The namespace token
/// ranges are obtained first because they are needed to obtain unique identifiers.
void getTokenRanges(IdentifierType identifierTypeToSearch);
std::vector<TokenInterval> searchRangeForBlocks(TokenInterval interval);
/// The global scope is the complement of the union of all the function and UDT token ranges
void getGlobalTokenRanges();
/// If the current token is a function or UDT, creates a map entry from the name to the token range. These are ordered alphabetically by name.
/// If the current token is a namespace, creates a map entry from the token range to the name. Namespace entries are stored in the order they occur in the source code.
bool getTokenRange(std::vector<Token> keywords,UnorderedBlocks &tokenRange, OrderedBlocks &tokenRangeInv, bool fullName, VariableScopes *blocks);
/// Global variables are the variables that are not inside a function or UDT
void fixGlobalVariables();
/// Creates the definition maps for UDTs, functions, their variables and global variables
void getDefinitions();
void loadGlobalDefinitions(Scopes tokenRangeSet, std::vector<IdentifierType> identifierTypes, Variables &variables);
void loadVariableDefinitions(UnorderedBlocks tokenRangeMap, Token delimiter1, Token delimiter2, std::vector<IdentifierType> identifierTypes, bool isArgument, VariableMap &variableMap);
void loadTypeDefinitions(UnorderedBlocks tokenRangeMap, std::vector<IdentifierType> identifierTypes, Definitions &types);
std::string getArgumentTypeName(i32 rangeStart, Token delimiter2);
std::string getVariableTypeName();
/// Append the variable definitions of the parent to the child
void appendInheritances();
void recurseInheritances(std::string name);
///Loads a map of identifiers to their token id instances
void loadInstances();
/// Replace auto with the actual type for template arguments and function parameters
void fixAutos();
void resolveAutos(VariableMap &variableMap, UnorderedBlocks &tokenRange);
/// Chains are sequences of identifiers separated by scope resolution or dot operators.
void fixChains();
bool colorSeparatorScopeChain();
bool colorRealSeparatorScopeChain();
bool colorImplicitSeparatorScopeChain();
bool colorOperatorDotChain();
/// Returns the next/previous valid source code line
u32 nextLine(u32 line);
u32 previousLine(u32 line);
/// Loads the source code and calculates the first token index of each line
void loadText();
/// The complement of a set is also known as its inverse
void invertGlobalTokenRange();
/// Starting at the identifier, it tracks all the scope resolution and dot operators and returns the full chain without arrays, templates, pointers,...
bool getFullName(std::string &identifierName, std::vector<Identifier *> &identifiers, bool preserveCurr = true);
/// Returns the identifier value.
bool getIdentifierName(std::string &identifierName, Identifier *identifier);
/// Adds namespaces to the full name if they exist
bool getQualifiedName(std::string &identifierName, std::vector<Identifier *> &identifiers, bool useDefinitions = false, bool preserveCurr = true);
/// As it moves forward it loads the result to the argument. Used by getFullName
bool forwardIdentifierName(std::string &identifierName, std::vector<Identifier *> &identifiers, bool preserveCurr = true);
/// Takes as input the full name and returns the type of the last element.
bool resolveIdentifierType(Definition &result, std::string identifierName);
/// like previous functions but returns the type of the variable that is a member of a UDT
std::string findIdentifierTypeStr(const std::string &identifierName, std::string context="");
IdentifierType findIdentifierType(const std::string &identifierName, std::string context);
/// If context is empty search for the variable, if it isnt use the variable map.
bool findOrContains(std::string &context, UnorderedBlocks tokenRange, VariableMap variableMap);
/// Search for instances inside some block
void setBlockInstancesColor(const std::string &name, const Definition &definition, const TokenInterval &block);
/// Convenience functions.
void skipAttribute();
void skipArray(i32 maxSkipCount, bool forward = true);
void skipTemplate(i32 maxSkipCount, bool forward = true);
void skipDelimiters(i32 maxSkipCount, Token delimiter[2], i8 increment);
void skipToken(Token token, i8 step=1);
/// from given or current names find the corresponding definition
bool findIdentifierDefinition(Definition &result, const std::string &optionalIdentifierName = "", std::string optionalName = "", bool optional = false);
/// To deal with the Parent keyword
std::optional<Definition> setChildrenTypes();
bool findParentTypes(StringVector &parentTypes, const std::string &optionalName="");
bool findAllParentTypes(StringVector &parentTypes, std::vector<Identifier *> &identifiers, std::string &optionalFullName);
bool tryParentType(const std::string &parentType, std::string &variableName, std::optional<Definition> &result, std::vector<Identifier *> &identifiers);
/// Convenience function
bool isTokenIdValid(i32 tokenId);
bool isLocationValid(Location location);
/// Returns the name of the context where the current or given token is located
bool findScope(std::string &name, const UnorderedBlocks &map, i32 optionalTokenId=-1);
/// Returns the name of the namespace where the current or given token is located
bool findNamespace(std::string &nameSpace, i32 optionalTokenId=-1);
/// Calculate the source code, line and column numbers of a token index
pl::core::Location getLocation(i32 tokenId);
/// Calculate the token index of a source code, line and column numbers
i32 getTokenId(pl::core::Location location);
i32 getTokenId(SafeTokenIterator tokenIterator);
i32 getTokenId();
/// Calculate the function or template argument position from token indices
i32 getArgumentNumber(i32 start,i32 arg);
/// Calculate the token index of a function or template argument position
void getTokenIdForArgument(i32 start, i32 argNumber, Token delimiter);
///Creates a map from function name to argument type
void linkAttribute();
/// The following functions were copied from the parser and some were modified
template<typename T> T *getValue(i32 index);
void next(i32 count = 1);
bool begin();
void partBegin();
void reset();
void partReset();
bool resetIfFailed(bool value) ;
template<auto S = Normal> bool sequenceImpl();
template<auto S = Normal> bool matchOne(const Token &token);
template<auto S = Normal> bool sequenceImpl(const auto &... args);
template<auto S = Normal> bool sequence(const Token &token, const auto &... args);
bool isValid();
bool peek(const Token &token, i32 index = 0);
};
}