diff --git a/lib/external/pattern_language b/lib/external/pattern_language index cb4b5a14a..572a48180 160000 --- a/lib/external/pattern_language +++ b/lib/external/pattern_language @@ -1 +1 @@ -Subproject commit cb4b5a14ae98027b9cf14e3235de0e91a34786cd +Subproject commit 572a481803aa19c5bd0c3ccce08d63666d01499c diff --git a/lib/libimhex/include/hex/helpers/utils.hpp b/lib/libimhex/include/hex/helpers/utils.hpp index db00c7d63..d9e62ade9 100644 --- a/lib/libimhex/include/hex/helpers/utils.hpp +++ b/lib/libimhex/include/hex/helpers/utils.hpp @@ -27,6 +27,20 @@ struct ImVec2; namespace hex { + template + std::vector sampleData(const std::vector &data, size_t count) { + size_t stride = std::max(1.0, double(data.size()) / count); + + std::vector result; + result.reserve(count); + + for (size_t i = 0; i < data.size(); i += stride) { + result.push_back(data[i]); + } + + return result; + } + float operator""_scaled(long double value); float operator""_scaled(unsigned long long value); ImVec2 scaled(const ImVec2 &vector); diff --git a/plugins/builtin/include/content/helpers/diagrams.hpp b/plugins/builtin/include/content/helpers/diagrams.hpp index b609efd1c..0d5050c10 100644 --- a/plugins/builtin/include/content/helpers/diagrams.hpp +++ b/plugins/builtin/include/content/helpers/diagrams.hpp @@ -3,9 +3,15 @@ #include #include +#include + +#include +#include + #define IMGUI_DEFINE_MATH_OPERATORS #include +#include #include namespace hex { @@ -130,6 +136,28 @@ namespace hex { this->m_processing = false; } + void reset(u64 size) { + this->m_processing = true; + this->m_buffer.clear(); + this->m_buffer.resize(size); + this->m_byteCount = 0; + this->m_fileSize = size; + } + + void update(u8 byte) { + // Check if there is some space left + if (this->m_byteCount < this->m_fileSize) { + this->m_buffer[this->m_byteCount] = byte; + ++this->m_byteCount; + if (this->m_byteCount == this->m_fileSize) { + this->m_buffer = getSampleSelection(this->m_buffer, this->m_sampleSize); + processImpl(); + this->m_processing = false; + } + } + } + + private: void processImpl() { this->m_glowBuffer.resize(this->m_buffer.size()); @@ -142,7 +170,7 @@ namespace hex { } for (size_t i = 0; i < (this->m_buffer.empty() ? 0 : this->m_buffer.size() - 1); i++) { - this->m_glowBuffer[i] = std::min(0.2F + (float(heatMap[this->m_buffer[i] << 8 | this->m_buffer[i + 1]]) / float(this->m_highestCount / 1000)), 1.0F); + this->m_glowBuffer[i] = std::min(0.2F + (float(heatMap[this->m_buffer[i] << 8 | this->m_buffer[i + 1]]) / float(this->m_highestCount / 1000)), 1.0F); } this->m_opacity = (log10(float(this->m_sampleSize)) / log10(float(m_highestCount))) / 10.0F; @@ -151,6 +179,10 @@ namespace hex { private: size_t m_sampleSize; + // The number of byte processed and the size of + // the file to analyze (useful for iterative analysis) + u64 m_byteCount; + u64 m_fileSize; std::vector m_buffer; std::vector m_glowBuffer; float m_opacity = 0.0F; @@ -158,7 +190,6 @@ namespace hex { std::atomic m_processing = false; }; - class DiagramLayeredDistribution { public: DiagramLayeredDistribution(size_t sampleSize = 0x9000) : m_sampleSize(sampleSize) { } @@ -200,6 +231,27 @@ namespace hex { this->m_processing = false; } + void reset(u64 size) { + this->m_processing = true; + this->m_buffer.clear(); + this->m_buffer.resize(size); + this->m_byteCount = 0; + this->m_fileSize = size; + } + + void update(u8 byte) { + // Check if there is some space left + if (this->m_byteCount < this->m_fileSize) { + this->m_buffer[this->m_byteCount] = byte; + ++this->m_byteCount; + if (this->m_byteCount == this->m_fileSize) { + this->m_buffer = getSampleSelection(this->m_buffer, this->m_sampleSize); + processImpl(); + this->m_processing = false; + } + } + } + private: void processImpl() { this->m_glowBuffer.resize(this->m_buffer.size()); @@ -212,13 +264,18 @@ namespace hex { } for (size_t i = 0; i < (this->m_buffer.empty() ? 0 : this->m_buffer.size() - 1); i++) { - this->m_glowBuffer[i] = std::min(0.2F + (float(heatMap[this->m_buffer[i] << 8 | this->m_buffer[i + 1]]) / float(this->m_highestCount / 1000)), 1.0F); + this->m_glowBuffer[i] = std::min(0.2F + (float(heatMap[this->m_buffer[i] << 8 | this->m_buffer[i + 1]]) / float(this->m_highestCount / 1000)), 1.0F); } this->m_opacity = (log10(float(this->m_sampleSize)) / log10(float(m_highestCount))) / 10.0F; } private: size_t m_sampleSize; + + // The number of byte processed and the size of + // the file to analyze (useful for iterative analysis) + u64 m_byteCount; + u64 m_fileSize; std::vector m_buffer; std::vector m_glowBuffer; @@ -227,4 +284,619 @@ namespace hex { std::atomic m_processing = false; }; -} \ No newline at end of file + class DiagramChunkBasedEntropyAnalysis { + public: + DiagramChunkBasedEntropyAnalysis(u64 blockSize = 256, size_t sampleSize = 0x1000) : m_blockSize(blockSize), m_sampleSize(sampleSize) { } + + void draw(ImVec2 size, ImPlotFlags flags, bool updateHandle = false) { + + if (!this->m_processing && ImPlot::BeginPlot("##ChunkBasedAnalysis", size, flags)) { + ImPlot::SetupAxes("hex.builtin.common.address"_lang, "hex.builtin.view.information.entropy"_lang, ImPlotAxisFlags_Lock, ImPlotAxisFlags_Lock); + + // Set the axis limit to [first block : last block] + ImPlot::SetupAxesLimits( + this->m_startAddress / this->m_blockSize, + this->m_endAddress / this->m_blockSize, + -0.1F, 1.1F, ImGuiCond_Always + ); + + // Draw the plot + ImPlot::PlotLine("##ChunkBasedAnalysisLine", this->m_xBlockEntropy.data(), this->m_yBlockEntropy.data(), this->m_blockCount); + + // The parameter updateHandle is used when using the pattern language since we don't have a provider + // but just a set of bytes we won't be able to use the drag bar correctly. + if (updateHandle) { + // Set a draggable line on the plot + if (ImPlot::DragLineX(1, &this->m_handlePosition, ImGui::GetStyleColorVec4(ImGuiCol_Text))) { + // The line was dragged, update the position in the hex editor + + // Clamp the value between the start/end of the region to analyze + this->m_handlePosition = std::clamp( + this->m_handlePosition, + std::ceil(this->m_startAddress / double(this->m_blockSize)), + std::floor(this->m_endAddress / double(this->m_blockSize))); + + // Compute the position inside hex editor + u64 address = u64(std::max(this->m_handlePosition * this->m_blockSize, 0)) + this->m_baseAddress; + address = std::min(address, this->m_baseAddress + this->m_fileSize - 1); + ImHexApi::HexEditor::setSelection(address, 1); + } + } + ImPlot::EndPlot(); + } + } + + void process(prv::Provider *provider, u64 chunkSize, u64 startAddress, u64 endAddress) { + this->m_processing = true; + + // Update attributes + this->m_chunkSize = chunkSize; + this->m_startAddress = startAddress; + this->m_endAddress = endAddress; + + this->m_baseAddress = provider->getBaseAddress(); + this->m_fileSize = provider->getSize(); + + // Get a file reader + auto reader = prv::BufferedReader(provider); + std::vector bytes = reader.read(this->m_startAddress, this->m_endAddress - this->m_startAddress); + + this->processImpl(bytes); + + // Set the diagram handle position to the start of the plot + this->m_handlePosition = this->m_startAddress / double(this->m_blockSize); + + this->m_processing = false; + } + + void process(std::vector buffer, u64 chunkSize) { + this->m_processing = true; + + // Update attributes (use buffer size as end address) + this->m_chunkSize = chunkSize; + this->m_startAddress = 0; + this->m_endAddress = buffer.size(); + + this->m_baseAddress = 0; + this->m_fileSize = buffer.size(); + + this->processImpl(buffer); + + // Set the diagram handle position to the start of the plot + this->m_handlePosition = this->m_startAddress / double(this->m_blockSize); + + this->m_processing = false; + } + + // Reset the entropy analysis + void reset(u64 chunkSize, u64 startAddress, u64 endAddress, u64 baseAddress, u64 size) { + this->m_processing = true; + + // Update attributes + this->m_chunkSize = chunkSize; + this->m_startAddress = startAddress; + this->m_endAddress = endAddress; + this->m_baseAddress = baseAddress; + this->m_fileSize = size; + + this->m_blockValueCounts = { 0 }; + + // Reset and resize the array + this->m_yBlockEntropy.clear(); + this->m_yBlockEntropy.resize(((this->m_endAddress - this->m_startAddress) / this->m_chunkSize) + 1); + + this->m_byteCount = 0; + this->m_blockCount = 0; + + // Set the diagram handle position to the start of the plot + this->m_handlePosition = this->m_startAddress / double(this->m_blockSize); + } + + // Process one byte at the time + void update(u8 byte) { + u64 totalBlock = std::ceil((this->m_endAddress - this->m_startAddress) / this->m_chunkSize); + + // Check if there is still some + if (this->m_blockCount < totalBlock) { + // Increment the occurrence of the current byte + this->m_blockValueCounts[byte]++; + + this->m_byteCount++; + // Check if we processed one complete chunk, if so compute the entropy and start analysing the next chunk + if (((this->m_byteCount % this->m_chunkSize) == 0) || this->m_byteCount == (this->m_endAddress - this->m_startAddress)) [[unlikely]] { + this->m_yBlockEntropy[this->m_blockCount] = calculateEntropy(this->m_blockValueCounts, this->m_chunkSize); + + this->m_blockCount += 1; + this->m_blockValueCounts = { 0 }; + } + + // Check if we processed the last block, if so setup the X axis part of the data + if (this->m_blockCount == totalBlock) { + processFinalize(); + this->m_processing = false; + } + } + } + + // Method used to compute the entropy of a block of size `blockSize` + // using the bytes occurrences from `valueCounts` array. + double calculateEntropy(std::array &valueCounts, size_t blockSize) { + double entropy = 0; + + for (auto count : valueCounts) { + if (count == 0) [[unlikely]] + continue; + + double probability = static_cast(count) / blockSize; + + entropy += probability * std::log2(probability); + } + + return std::min(1.0, (-entropy) / 8); // log2(256) = 8 + } + + // Return the highest entropy value among all of the blocks + double getHighestBlockEntropy() { + double highestBlockEntropy = 0.0f; + if (!this->m_yBlockEntropy.empty()) + highestBlockEntropy = *std::max_element(this->m_yBlockEntropy.begin(), this->m_yBlockEntropy.end()); + return highestBlockEntropy; + } + + // Return the number of blocks that have been processed + u64 getSize() { + return this->m_yBlockEntropy.size(); + } + + // Return the size of the chunk used for this analysis + u64 getChunkSize() { + return this->m_chunkSize; + } + + void setHandlePosition(u64 filePosition) { + this->m_handlePosition = filePosition / double(this->m_blockSize); + } + + private: + // Private method used to factorize the process public method + void processImpl(std::vector bytes) { + this->m_blockValueCounts = { 0 }; + + // Reset and resize the array + this->m_yBlockEntropy.clear(); + this->m_yBlockEntropy.resize(std::ceil((this->m_endAddress - this->m_startAddress) / this->m_chunkSize)); + + this->m_byteCount = 0; + this->m_blockCount = 0; + + // Loop over each byte of the file (or a part of it) + for (u8 byte: bytes) { + // Increment the occurrence of the current byte + this->m_blockValueCounts[byte]++; + + this->m_byteCount++; + // Check if we processed one complete chunk, if so compute the entropy and start analysing the next chunk + if (((this->m_byteCount % this->m_chunkSize) == 0) || this->m_byteCount == bytes.size() * 8) [[unlikely]] { + this->m_yBlockEntropy[this->m_blockCount] = calculateEntropy(this->m_blockValueCounts, this->m_chunkSize); + + this->m_blockCount += 1; + this->m_blockValueCounts = { 0 }; + } + } + processFinalize(); + } + + void processFinalize() { + // Only save at most m_sampleSize elements of the result + this->m_yBlockEntropy = sampleData(this->m_yBlockEntropy, std::min(this->m_blockCount, this->m_sampleSize)); + + size_t stride = std::max(1.0, double( + std::ceil((this->m_endAddress - this->m_startAddress) / this->m_blockSize) / this->m_yBlockEntropy.size())) + 1; + + this->m_blockCount = this->m_yBlockEntropy.size(); + + // The m_xBlockEntropy attribute is used to specify the position of entropy values + // in the plot when the Y axis doesn't start at 0 + this->m_xBlockEntropy.clear(); + this->m_xBlockEntropy.resize(this->m_blockCount); + for (u64 i = 0; i < this->m_blockCount; ++i) + this->m_xBlockEntropy[i] = (this->m_startAddress / this->m_blockSize) + stride*i; + --this->m_blockCount; + } + + private: + // Variables used to store the parameters to process + + // Chunk's size for entropy analysis + u64 m_chunkSize; + u64 m_startAddress; + u64 m_endAddress; + // Start / size of the file + u64 m_baseAddress; + u64 m_fileSize; + // The size of the blocks (for diagram drawing) + u64 m_blockSize; + + // Position of the handle inside the plot + double m_handlePosition = 0.0; + + // Hold the number of block that have been processed + // during the chunk based entropy analysis + u64 m_blockCount; + + // Hold the number of bytes that have been processed + // during the analysis (useful for the iterative analysis) + u64 m_byteCount; + + // Array used to hold the occurrences of each byte + // (useful for the iterative analysis) + std::array m_blockValueCounts; + + // Variable to hold the result of the chunk based + // entropy analysis + std::vector m_xBlockEntropy; + std::vector m_yBlockEntropy; + + // Sampling size, number of elements displayed in the plot, + // avoid showing to many data because it decreased the frame rate + size_t m_sampleSize; + + std::atomic m_processing = false; + }; + + class DiagramByteDistribution { + public: + + void draw(ImVec2 size, ImPlotFlags flags) { + + if (!this->m_processing && ImPlot::BeginPlot("##distribution", size, flags)) { + ImPlot::SetupAxes("hex.builtin.common.value"_lang, "hex.builtin.common.count"_lang, ImPlotAxisFlags_Lock, ImPlotAxisFlags_Lock | ImPlotAxisFlags_LogScale); + ImPlot::SetupAxesLimits(0, 256, 1, double(*std::max_element(this->m_valueCounts.begin(), this->m_valueCounts.end())) * 1.1F, ImGuiCond_Always); + + constexpr static auto x = [] { + std::array result { 0 }; + std::iota(result.begin(), result.end(), 0); + return result; + }(); + + ImPlot::PlotBars("##bytes", x.data(), this->m_valueCounts.data(), x.size(), 1.0); + ImPlot::EndPlot(); + } + } + + void process(prv::Provider *provider, u64 startAddress, u64 endAddress) { + this->m_processing = true; + + // Update attributes + this->m_startAddress = startAddress; + this->m_endAddress = endAddress; + + // Get a file reader + auto reader = prv::BufferedReader(provider); + std::vector bytes = reader.read(this->m_startAddress, this->m_endAddress - this->m_startAddress); + + this->processImpl(bytes); + + this->m_processing = false; + } + + void process(std::vector buffer) { + this->m_processing = true; + + // Update attributes + this->m_startAddress = 0; + this->m_endAddress = buffer.size(); + + this->processImpl(buffer); + + this->m_processing = false; + } + + // Reset the byte distribution array + void reset() { + this->m_processing = true; + this->m_valueCounts.fill(0); + this->m_processing = false; + } + + // Process one byte at the time + void update(u8 byte) { + this->m_processing = true; + this->m_valueCounts[byte]++; + this->m_processing = false; + } + + // Return byte distribution array in it's current state + std::array & get() { + return this->m_valueCounts; + } + + private: + // Private method used to factorize the process public method + void processImpl(std::vector bytes) { + // Reset the array + this->m_valueCounts.fill(0); + // Loop over each byte of the file (or a part of it) + // Increment the occurrence of the current byte + for (u8 byte : bytes) + this->m_valueCounts[byte]++; + } + + private: + // Variables used to store the parameters to process + u64 m_startAddress; + u64 m_endAddress; + + // Hold the result of the byte distribution analysis + std::array m_valueCounts; + std::atomic m_processing = false; + }; + + class DiagramByteTypesDistribution { + public: + DiagramByteTypesDistribution(u64 blockSize = 256, size_t sampleSize = 0x1000) : m_blockSize(blockSize), m_sampleSize(sampleSize){ } + + void draw(ImVec2 size, ImPlotFlags flags, bool updateHandle = false) { + // Draw the result of the analysis + if (!this->m_processing && ImPlot::BeginPlot("##byte_types", size, flags)) { + ImPlot::SetupAxes("hex.builtin.common.address"_lang, "hex.builtin.common.percentage"_lang, ImPlotAxisFlags_Lock, ImPlotAxisFlags_Lock); + ImPlot::SetupAxesLimits(this->m_startAddress / this->m_blockSize, this->m_endAddress / this->m_blockSize, -0.1F, 100.1F, ImGuiCond_Always); + ImPlot::SetupLegend(ImPlotLocation_South, ImPlotLegendFlags_Horizontal | ImPlotLegendFlags_Outside); + + constexpr static std::array Names = { "iscntrl", "isprint", "isspace", "isblank", + "isgraph", "ispunct", "isalnum", "isalpha", + "isupper", "islower", "isdigit", "isxdigit" + }; + + for (u32 i = 0; i < Names.size(); i++) { + ImPlot::PlotLine(Names[i], this->m_xBlockTypeDistributions.data(), this->m_yBlockTypeDistributions[i].data(), this->m_blockCount); + } + + // The parameter updateHandle is used when using the pattern language since we don't have a provider + // but just a set of bytes we won't be able to use the drag bar correctly. + if (updateHandle) { + // Set a draggable line on the plot + if (ImPlot::DragLineX(1, &this->m_handlePosition, ImGui::GetStyleColorVec4(ImGuiCol_Text))) { + // The line was dragged, update the position in the hex editor + + // Clamp the value between the start/end of the region to analyze + this->m_handlePosition = std::clamp( + this->m_handlePosition, + std::ceil(this->m_startAddress / double(this->m_blockSize)), + std::floor(this->m_endAddress / double(this->m_blockSize))); + + // Compute the position inside hex editor + u64 address = u64(std::max(this->m_handlePosition * this->m_blockSize, 0)) + this->m_baseAddress; + address = std::min(address, this->m_baseAddress + this->m_fileSize - 1); + ImHexApi::HexEditor::setSelection(address, 1); + } + } + ImPlot::EndPlot(); + } + } + + void process(prv::Provider *provider, u64 startAddress, u64 endAddress) { + this->m_processing = true; + + // Update attributes + this->m_startAddress = startAddress; + this->m_endAddress = endAddress; + this->m_baseAddress = provider->getBaseAddress(); + this->m_fileSize = provider->getSize(); + + // Get a file reader + auto reader = prv::BufferedReader(provider); + std::vector bytes = reader.read(this->m_startAddress, this->m_endAddress - this->m_startAddress); + + this->processImpl(bytes); + + // Set the diagram handle position to the start of the plot + this->m_handlePosition = this->m_startAddress / double(this->m_blockSize); + + this->m_processing = false; + } + + void process(std::vector buffer, u64 baseAddress, u64 fileSize) { + this->m_processing = true; + + // Update attributes + this->m_startAddress = 0; + this->m_endAddress = buffer.size(); + this->m_baseAddress = baseAddress; + this->m_fileSize = fileSize; + + this->processImpl(buffer); + + // Set the diagram handle position to the start of the plot + this->m_handlePosition = this->m_startAddress / double(this->m_blockSize); + + this->m_processing = false; + } + + // Reset the byte type distribution analysis + void reset(u64 startAddress, u64 endAddress, u64 baseAddress, u64 size) { + this->m_processing = true; + + // Update attributes + this->m_startAddress = startAddress; + this->m_endAddress = endAddress; + this->m_baseAddress = baseAddress; + this->m_fileSize = size; + + this->m_byteCount = 0; + this->m_blockCount = 0; + this->m_blockValueCounts = { 0 }; + + // Reset and resize the array + this->m_yBlockTypeDistributions.fill({}); + for (auto &blockDistribution : this->m_yBlockTypeDistributions) + blockDistribution.resize(((this->m_endAddress - this->m_startAddress) / this->m_blockSize) + 1); + + // Set the diagram handle position to the start of the plot + this->m_handlePosition = this->m_startAddress / double(this->m_blockSize); + } + + // Process one byte at the time + void update(u8 byte) { + u64 totalBlock = std::ceil((this->m_endAddress - this->m_startAddress) / this->m_blockSize); + // Check if there is still some block to process + if (this->m_blockCount < totalBlock) { + + this->m_blockValueCounts[byte]++; + + this->m_byteCount++; + if (((this->m_byteCount % this->m_blockSize) == 0) || this->m_byteCount == (this->m_endAddress - this->m_startAddress)) [[unlikely]] { + auto typeDist = calculateTypeDistribution(this->m_blockValueCounts, this->m_blockSize); + for (u8 i = 0; i < typeDist.size(); i++) + this->m_yBlockTypeDistributions[i][this->m_blockCount] = typeDist[i] * 100; + + this->m_blockCount += 1; + this->m_blockValueCounts = { 0 }; + } + + // Check if we processed the last block, if so setup the X axis part of the data + if (this->m_blockCount == totalBlock) { + + processFinalize(); + this->m_processing = false; + } + } + } + + // Return the percentage of plain text character inside the analyzed region + double getPlainTextCharacterPercentage() { + double plainTextPercentage = std::reduce(this->m_yBlockTypeDistributions[2].begin(), this->m_yBlockTypeDistributions[2].end()) / this->m_yBlockTypeDistributions[2].size(); + return plainTextPercentage + std::reduce(this->m_yBlockTypeDistributions[4].begin(), this->m_yBlockTypeDistributions[4].end()) / this->m_yBlockTypeDistributions[4].size(); + } + + void setHandlePosition(u64 filePosition) { + this->m_handlePosition = filePosition / double(this->m_blockSize); + } + + private: + std::array calculateTypeDistribution(std::array &valueCounts, size_t blockSize) { + std::array counts = {}; + + for (u16 value = 0x00; value < u16(valueCounts.size()); value++) { + const auto &count = valueCounts[value]; + + if (count == 0) [[unlikely]] + continue; + + if (std::iscntrl(value)) + counts[0] += count; + if (std::isprint(value)) + counts[1] += count; + if (std::isspace(value)) + counts[2] += count; + if (std::isblank(value)) + counts[3] += count; + if (std::isgraph(value)) + counts[4] += count; + if (std::ispunct(value)) + counts[5] += count; + if (std::isalnum(value)) + counts[6] += count; + if (std::isalpha(value)) + counts[7] += count; + if (std::isupper(value)) + counts[8] += count; + if (std::islower(value)) + counts[9] += count; + if (std::isdigit(value)) + counts[10] += count; + if (std::isxdigit(value)) + counts[11] += count; + } + + std::array distribution = {}; + for (u32 i = 0; i < distribution.size(); i++) + distribution[i] = static_cast(counts[i]) / blockSize; + + return distribution; + } + + // Private method used to factorize the process public method + void processImpl(std::vector bytes) { + this->m_blockValueCounts = { 0 }; + + this->m_yBlockTypeDistributions.fill({}); + for (auto &blockDistribution : this->m_yBlockTypeDistributions) + blockDistribution.resize(((this->m_endAddress - this->m_startAddress) / this->m_blockSize) + 1); + + this->m_byteCount = 0; + this->m_blockCount = 0; + + // Loop over each byte of the file (or a part of it) + for (u64 i = 0; i < bytes.size(); ++i) { + this->m_blockValueCounts[bytes[i]]++; + + this->m_byteCount++; + if (((this->m_byteCount % this->m_blockSize) == 0) || this->m_byteCount == (this->m_endAddress - this->m_startAddress)) [[unlikely]] { + auto typeDist = calculateTypeDistribution(this->m_blockValueCounts, this->m_blockSize); + for (u8 i = 0; i < typeDist.size(); i++) + this->m_yBlockTypeDistributions[i][this->m_blockCount] = typeDist[i] * 100; + + this->m_blockCount += 1; + this->m_blockValueCounts = { 0 }; + } + } + + processFinalize(); + } + + void processFinalize() { + // Only save at most m_sampleSize elements of the result + for (u8 i = 0; i < this->m_yBlockTypeDistributions.size(); ++i) + this->m_yBlockTypeDistributions[i] = sampleData(this->m_yBlockTypeDistributions[i], std::min(this->m_blockCount, this->m_sampleSize)); + + size_t stride = std::max(1.0, double(this->m_blockCount / this->m_yBlockTypeDistributions[0].size())) + 1; + this->m_blockCount = this->m_yBlockTypeDistributions[0].size(); + + // The m_xBlockTypeDistributions attribute is used to specify the position of entropy + // values in the plot when the Y axis doesn't start at 0 + this->m_xBlockTypeDistributions.clear(); + this->m_xBlockTypeDistributions.resize(this->m_blockCount); + for (u64 i = 0; i < this->m_blockCount; ++i) + this->m_xBlockTypeDistributions[i] = (this->m_startAddress / this->m_blockSize) + stride*i; + --this->m_blockCount; + } + + private: + // Variables used to store the parameters to process + + // The size of the block we are considering for the analysis + u64 m_blockSize; + u64 m_startAddress; + u64 m_endAddress; + // Start / size of the file + u64 m_baseAddress; + u64 m_fileSize; + + // Position of the handle inside the plot + double m_handlePosition = 0.0; + + // Hold the number of block that have been processed + // during the chunk based entropy analysis + u64 m_blockCount; + + // Hold the number of bytes that have been processed + // during the analysis (useful for the iterative analysis) + u64 m_byteCount; + + // Sampling size, number of elements displayed in the plot, + // avoid showing to many data because it decreased the frame rate + size_t m_sampleSize; + + // Array used to hold the occurrences of each byte + // (useful for the iterative analysis) + std::array m_blockValueCounts; + + // The m_xBlockTypeDistributions attributes is used to specify the position of + // the values in the plot when the Y axis doesn't start at 0 + std::vector m_xBlockTypeDistributions; + // Hold the result of the byte distribution analysis + std::array, 12> m_yBlockTypeDistributions; + std::atomic m_processing = false; + }; +} diff --git a/plugins/builtin/include/content/views/view_information.hpp b/plugins/builtin/include/content/views/view_information.hpp index 2835a808a..603930c13 100644 --- a/plugins/builtin/include/content/views/view_information.hpp +++ b/plugins/builtin/include/content/views/view_information.hpp @@ -26,13 +26,7 @@ namespace hex::plugin::builtin { double m_averageEntropy = -1.0; double m_highestBlockEntropy = -1.0; double m_plainTextCharacterPercentage = -1.0; - std::vector m_blockEntropy; - std::array, 12> m_blockTypeDistributions; - std::atomic m_processedBlockCount = 0; - double m_diagramHandlePosition = 0.0; - - std::array m_valueCounts = { 0 }; TaskHolder m_analyzerTask; Region m_analyzedRegion = { 0, 0 }; @@ -42,8 +36,16 @@ namespace hex::plugin::builtin { DiagramDigram m_digram; DiagramLayeredDistribution m_layeredDistribution; + DiagramByteDistribution m_byteDistribution; + DiagramByteTypesDistribution m_byteTypesDistribution; + DiagramChunkBasedEntropyAnalysis m_chunkBasedEntropy; void analyze(); + + // User controlled input (referenced by ImgGui) + int m_inputChunkSize = 0; + int m_inputStartAddress = 0; + int m_inputEndAddress = 0; }; -} \ No newline at end of file +} diff --git a/plugins/builtin/source/content/pl_visualizers.cpp b/plugins/builtin/source/content/pl_visualizers.cpp index 764d5334e..26466a253 100644 --- a/plugins/builtin/source/content/pl_visualizers.cpp +++ b/plugins/builtin/source/content/pl_visualizers.cpp @@ -20,6 +20,8 @@ #include +#include + namespace hex::plugin::builtin { namespace { @@ -36,20 +38,6 @@ namespace hex::plugin::builtin { return result; } - template - std::vector sampleData(const std::vector &data, size_t count) { - size_t stride = std::max(1.0, double(data.size()) / count); - - std::vector result; - result.reserve(count); - - for (size_t i = 0; i < data.size(); i += stride) { - result.push_back(data[i]); - } - - return result; - } - } namespace { @@ -455,6 +443,21 @@ namespace hex::plugin::builtin { (waveData.size() / sampleRate) / 60, (waveData.size() / sampleRate) % 60); } + void drawChunkBasedEntropyVisualizer(pl::ptrn::Pattern &, pl::ptrn::Iteratable &, bool shouldReset, std::span arguments) { + // variable used to store the result to avoid having to recalculate the result at each frame + static DiagramChunkBasedEntropyAnalysis analyzer; + + // compute data + if (shouldReset) { + auto pattern = arguments[0].toPattern(); + auto chunkSize = arguments[1].toUnsigned(); + analyzer.process(pattern->getBytes(), chunkSize); + } + + // show results + analyzer.draw(ImVec2(400, 250), ImPlotFlags_NoChild | ImPlotFlags_CanvasOnly); + } + } void registerPatternLanguageVisualizers() { @@ -465,6 +468,7 @@ namespace hex::plugin::builtin { ContentRegistry::PatternLanguage::addVisualizer("disassembler", drawDisassemblyVisualizer, 4); ContentRegistry::PatternLanguage::addVisualizer("3d", draw3DVisualizer, 2); ContentRegistry::PatternLanguage::addVisualizer("sound", drawSoundVisualizer, 3); + ContentRegistry::PatternLanguage::addVisualizer("chunk_entropy", drawChunkBasedEntropyVisualizer, 2); } -} \ No newline at end of file +} diff --git a/plugins/builtin/source/content/views/view_information.cpp b/plugins/builtin/source/content/views/view_information.cpp index 70b9bb208..b62f4b280 100644 --- a/plugins/builtin/source/content/views/view_information.cpp +++ b/plugins/builtin/source/content/views/view_information.cpp @@ -26,17 +26,19 @@ namespace hex::plugin::builtin { this->m_plainTextCharacterPercentage = -1.0; this->m_averageEntropy = -1.0; this->m_highestBlockEntropy = -1.0; - this->m_blockEntropy.clear(); this->m_blockSize = 0; - this->m_valueCounts.fill(0x00); this->m_dataMimeType.clear(); this->m_dataDescription.clear(); - this->m_analyzedRegion = { 0, 0 }; + this->m_analyzedRegion = { 0, 0 }; }); EventManager::subscribe(this, [this](Region region) { - if (this->m_blockSize != 0) - this->m_diagramHandlePosition = region.getStartAddress() / double(this->m_blockSize); + // Set the position of the diagram relative to the place where + // the user clicked inside the hex editor view + if (this->m_blockSize != 0) { + this->m_byteTypesDistribution.setHandlePosition(region.getStartAddress()); + this->m_chunkBasedEntropy.setHandlePosition(region.getStartAddress()); + } }); EventManager::subscribe(this, [this](const auto*) { @@ -61,70 +63,27 @@ namespace hex::plugin::builtin { EventManager::unsubscribe(this); } - static double calculateEntropy(std::array &valueCounts, size_t blockSize) { - double entropy = 0; - - for (auto count : valueCounts) { - if (count == 0) [[unlikely]] - continue; - - double probability = static_cast(count) / blockSize; - - entropy += probability * std::log2(probability); - } - - return std::min(1.0, (-entropy) / 8); // log2(256) = 8 - } - - static std::array calculateTypeDistribution(std::array &valueCounts, size_t blockSize) { - std::array counts = {}; - - for (u16 value = 0x00; value < u16(valueCounts.size()); value++) { - const auto &count = valueCounts[value]; - - if (count == 0) [[unlikely]] - continue; - - if (std::iscntrl(value)) - counts[0] += count; - if (std::isprint(value)) - counts[1] += count; - if (std::isspace(value)) - counts[2] += count; - if (std::isblank(value)) - counts[3] += count; - if (std::isgraph(value)) - counts[4] += count; - if (std::ispunct(value)) - counts[5] += count; - if (std::isalnum(value)) - counts[6] += count; - if (std::isalpha(value)) - counts[7] += count; - if (std::isupper(value)) - counts[8] += count; - if (std::islower(value)) - counts[9] += count; - if (std::isdigit(value)) - counts[10] += count; - if (std::isxdigit(value)) - counts[11] += count; - } - - std::array distribution = {}; - for (u32 i = 0; i < distribution.size(); i++) - distribution[i] = static_cast(counts[i]) / blockSize; - - return distribution; - } - void ViewInformation::analyze() { this->m_analyzerTask = TaskManager::createTask("hex.builtin.view.information.analyzing", 0, [this](auto &task) { auto provider = ImHexApi::Provider::get(); - task.setMaxValue(provider->getSize()); + if ((this->m_inputChunkSize <= 0) + || (this->m_inputStartAddress < 0) + || (this->m_inputStartAddress >= this->m_inputEndAddress) + || ((size_t) this->m_inputEndAddress > provider->getSize())) { + // Invalid parameters, set default one + this->m_inputChunkSize = 256; + this->m_inputStartAddress = 0; + this->m_inputEndAddress = provider->getSize(); + } - this->m_analyzedRegion = { provider->getBaseAddress(), provider->getBaseAddress() + provider->getSize() }; + task.setMaxValue(this->m_inputEndAddress - this->m_inputStartAddress); + + // Modify the analyzed region + this->m_analyzedRegion = { + provider->getBaseAddress() + this->m_inputStartAddress, + size_t(this->m_inputEndAddress - this->m_inputStartAddress) + }; { magic::compile(); @@ -133,68 +92,50 @@ namespace hex::plugin::builtin { this->m_dataMimeType = magic::getMIMEType(provider); } - this->m_dataValid = true; - { this->m_blockSize = std::max(std::ceil(provider->getSize() / 2048.0F), 256); - std::array blockValueCounts = { 0 }; - - const auto blockCount = (provider->getSize() / this->m_blockSize) + 1; - - this->m_blockTypeDistributions.fill({}); - this->m_blockEntropy.clear(); - this->m_blockEntropy.resize(blockCount); - for (auto &blockDistribution : this->m_blockTypeDistributions) - blockDistribution.resize(blockCount); - - this->m_valueCounts.fill(0); - this->m_processedBlockCount = 0; this->m_averageEntropy = -1.0; this->m_highestBlockEntropy = -1.0; this->m_plainTextCharacterPercentage = -1.0; - this->m_digram.process(provider, this->m_analyzedRegion.getStartAddress(), this->m_analyzedRegion.getSize()); - this->m_layeredDistribution.process(provider, this->m_analyzedRegion.getStartAddress(), this->m_analyzedRegion.getSize()); + // Setup / start each analysis + this->m_byteDistribution.reset(); + this->m_digram.reset(this->m_inputEndAddress - this->m_inputStartAddress); + this->m_layeredDistribution.reset(this->m_inputEndAddress - this->m_inputStartAddress); + this->m_byteTypesDistribution.reset(this->m_inputStartAddress, this->m_inputEndAddress, + provider->getBaseAddress(), provider->getSize()); + this->m_chunkBasedEntropy.reset(this->m_inputChunkSize, this->m_inputStartAddress, this->m_inputEndAddress, + provider->getBaseAddress(), provider->getSize()); + + // Create a handle to the file auto reader = prv::BufferedReader(provider); - reader.setEndAddress(provider->getBaseAddress() + provider->getSize()); + reader.seek(provider->getBaseAddress() + this->m_inputStartAddress); + reader.setEndAddress(provider->getBaseAddress() + this->m_inputEndAddress); u64 count = 0; + // Loop over each byte of the [part of the] file and update each analysis + // one byte at the time in order to process the file only once for (u8 byte : reader) { - this->m_valueCounts[byte]++; - blockValueCounts[byte]++; - - count++; - if (((count % this->m_blockSize) == 0) || count == provider->getSize()) [[unlikely]] { - this->m_blockEntropy[this->m_processedBlockCount] = calculateEntropy(blockValueCounts, this->m_blockSize); - - { - auto typeDist = calculateTypeDistribution(blockValueCounts, this->m_blockSize); - for (u8 i = 0; i < typeDist.size(); i++) - this->m_blockTypeDistributions[i][this->m_processedBlockCount] = typeDist[i] * 100; - - - } - - this->m_processedBlockCount += 1; - blockValueCounts = { 0 }; - task.update(count); - } + this->m_byteDistribution.update(byte); + this->m_byteTypesDistribution.update(byte); + this->m_chunkBasedEntropy.update(byte); + this->m_layeredDistribution.update(byte); + this->m_digram.update(byte); + ++count; + task.update(count); } - this->m_averageEntropy = calculateEntropy(this->m_valueCounts, provider->getSize()); - if (!this->m_blockEntropy.empty()) - this->m_highestBlockEntropy = *std::max_element(this->m_blockEntropy.begin(), this->m_blockEntropy.end()); - else - this->m_highestBlockEntropy = 0; - - this->m_plainTextCharacterPercentage = std::reduce(this->m_blockTypeDistributions[2].begin(), this->m_blockTypeDistributions[2].end()) / this->m_blockTypeDistributions[2].size(); - this->m_plainTextCharacterPercentage += std::reduce(this->m_blockTypeDistributions[4].begin(), this->m_blockTypeDistributions[4].end()) / this->m_blockTypeDistributions[4].size(); + this->m_averageEntropy = this->m_chunkBasedEntropy.calculateEntropy(this->m_byteDistribution.get(), this->m_inputEndAddress - this->m_inputStartAddress); + this->m_highestBlockEntropy = this->m_chunkBasedEntropy.getHighestBlockEntropy(); + this->m_plainTextCharacterPercentage = this->m_byteTypesDistribution.getPlainTextCharacterPercentage(); } + + this->m_dataValid = true; }); - } + } void ViewInformation::drawContent() { if (ImGui::Begin(View::toWindowName("hex.builtin.view.information.name").c_str(), &this->getWindowOpenState(), ImGuiWindowFlags_NoCollapse)) { @@ -204,6 +145,16 @@ namespace hex::plugin::builtin { if (ImHexApi::Provider::isValid() && provider->isReadable()) { ImGui::BeginDisabled(this->m_analyzerTask.isRunning()); { + ImGui::Header("hex.builtin.view.disassembler.settings.header"_lang); + + ImGui::InputInt("hex.builtin.view.information.block_size"_lang, &this->m_inputChunkSize, ImGuiInputTextFlags_CharsDecimal); + + // Clamp the values since the user can Ctrl+Click to transform the slider into a input + ImGui::SliderInt("hex.builtin.common.begin"_lang, &this->m_inputStartAddress, 0, provider->getSize(), "%d", ImGuiSliderFlags_AlwaysClamp); + + // Clamp the values since the user can Ctrl+Click to transform the slider into a input + ImGui::SliderInt("hex.builtin.common.end"_lang, &this->m_inputEndAddress, 0, provider->getSize(), "%d", ImGuiSliderFlags_AlwaysClamp); + if (ImGui::Button("hex.builtin.view.information.analyze"_lang, ImVec2(ImGui::GetContentRegionAvail().x, 0))) this->analyze(); } @@ -215,7 +166,7 @@ namespace hex::plugin::builtin { ImGui::NewLine(); } - if (this->m_dataValid) { + if (!this->m_analyzerTask.isRunning() && this->m_dataValid) { // Analyzed region ImGui::Header("hex.builtin.view.information.region"_lang, true); @@ -279,70 +230,33 @@ namespace hex::plugin::builtin { ImGui::PushStyleColor(ImGuiCol_FrameBg, ImGui::GetColorU32(ImGuiCol_WindowBg)); ImPlot::PushStyleColor(ImPlotCol_FrameBg, ImGui::GetColorU32(ImGuiCol_WindowBg)); + // Display byte distribution analysis ImGui::TextUnformatted("hex.builtin.view.information.distribution"_lang); - if (ImPlot::BeginPlot("##distribution", ImVec2(-1, 0), ImPlotFlags_NoChild | ImPlotFlags_NoLegend | ImPlotFlags_NoMenus | ImPlotFlags_NoBoxSelect)) { - ImPlot::SetupAxes("hex.builtin.common.value"_lang, "hex.builtin.common.count"_lang, ImPlotAxisFlags_Lock, ImPlotAxisFlags_Lock | ImPlotAxisFlags_LogScale); - ImPlot::SetupAxesLimits(0, 256, 1, double(*std::max_element(this->m_valueCounts.begin(), this->m_valueCounts.end())) * 1.1F, ImGuiCond_Always); - - static auto x = [] { - std::array result { 0 }; - std::iota(result.begin(), result.end(), 0); - return result; - }(); - - ImPlot::PlotBars("##bytes", x.data(), this->m_valueCounts.data(), x.size(), 1.0); - - ImPlot::EndPlot(); - } + this->m_byteDistribution.draw( + ImVec2(-1, 0), + ImPlotFlags_NoChild | ImPlotFlags_NoLegend | ImPlotFlags_NoMenus | ImPlotFlags_NoBoxSelect + ); + // Display byte types distribution analysis ImGui::TextUnformatted("hex.builtin.view.information.byte_types"_lang); - if (ImPlot::BeginPlot("##byte_types", ImVec2(-1, 0), ImPlotFlags_NoChild | ImPlotFlags_NoMenus | ImPlotFlags_NoBoxSelect | ImPlotFlags_AntiAliased)) { - ImPlot::SetupAxes("hex.builtin.common.address"_lang, "hex.builtin.common.percentage"_lang, ImPlotAxisFlags_Lock, ImPlotAxisFlags_Lock); - ImPlot::SetupAxesLimits(0, this->m_blockTypeDistributions[0].size(), -0.1F, 100.1F, ImGuiCond_Always); - ImPlot::SetupLegend(ImPlotLocation_South, ImPlotLegendFlags_Horizontal | ImPlotLegendFlags_Outside); - - constexpr static std::array Names = { "iscntrl", "isprint", "isspace", "isblank", "isgraph", "ispunct", "isalnum", "isalpha", "isupper", "islower", "isdigit", "isxdigit" }; - - for (u32 i = 0; i < 12; i++) { - ImPlot::PlotLine(Names[i], this->m_blockTypeDistributions[i].data(), this->m_processedBlockCount); - } - - if (ImPlot::DragLineX(1, &this->m_diagramHandlePosition, ImGui::GetStyleColorVec4(ImGuiCol_Text))) { - u64 address = u64(std::max(this->m_diagramHandlePosition, 0) * this->m_blockSize) + provider->getBaseAddress(); - address = std::min(address, provider->getBaseAddress() + provider->getSize() - 1); - ImHexApi::HexEditor::setSelection(address, 1); - } - - ImPlot::EndPlot(); - } - - ImGui::NewLine(); + this->m_byteTypesDistribution.draw( + ImVec2(-1, 0), + ImPlotFlags_NoChild | ImPlotFlags_NoMenus | ImPlotFlags_NoBoxSelect | ImPlotFlags_AntiAliased, + true + ); + // Display chunk based entropy analysis ImGui::TextUnformatted("hex.builtin.view.information.entropy"_lang); + this->m_chunkBasedEntropy.draw( + ImVec2(-1, 0), + ImPlotFlags_NoChild | ImPlotFlags_CanvasOnly | ImPlotFlags_AntiAliased, + true + ); - if (ImPlot::BeginPlot("##entropy", ImVec2(-1, 0), ImPlotFlags_NoChild | ImPlotFlags_CanvasOnly | ImPlotFlags_AntiAliased)) { - ImPlot::SetupAxes("hex.builtin.common.address"_lang, "hex.builtin.view.information.entropy"_lang, ImPlotAxisFlags_Lock, ImPlotAxisFlags_Lock); - ImPlot::SetupAxesLimits(0, this->m_blockEntropy.size(), -0.1F, 1.1F, ImGuiCond_Always); - - ImPlot::PlotLine("##entropy_line", this->m_blockEntropy.data(), this->m_processedBlockCount); - - if (ImPlot::DragLineX(1, &this->m_diagramHandlePosition, ImGui::GetStyleColorVec4(ImGuiCol_Text))) { - u64 address = u64(std::max(this->m_diagramHandlePosition, 0) * this->m_blockSize) + provider->getBaseAddress(); - address = std::min(address, provider->getBaseAddress() + provider->getSize() - 1); - ImHexApi::HexEditor::setSelection(address, 1); - } - - ImPlot::EndPlot(); - } ImPlot::PopStyleColor(); ImGui::PopStyleColor(); ImGui::NewLine(); - - this->m_diagramHandlePosition = std::clamp( - this->m_diagramHandlePosition, - this->m_analyzedRegion.getStartAddress() / double(this->m_blockSize), - this->m_analyzedRegion.getEndAddress() / double(this->m_blockSize)); } // Entropy information @@ -355,7 +269,7 @@ namespace hex::plugin::builtin { ImGui::TableNextColumn(); ImGui::TextFormatted("{}", "hex.builtin.view.information.block_size"_lang); ImGui::TableNextColumn(); - ImGui::TextFormatted("hex.builtin.view.information.block_size.desc"_lang, this->m_blockEntropy.size(), this->m_blockSize); + ImGui::TextFormatted("hex.builtin.view.information.block_size.desc"_lang, this->m_chunkBasedEntropy.getSize(), this->m_chunkBasedEntropy.getChunkSize()); ImGui::TableNextColumn(); ImGui::TextFormatted("{}", "hex.builtin.view.information.file_entropy"_lang); @@ -423,4 +337,4 @@ namespace hex::plugin::builtin { ImGui::End(); } -} \ No newline at end of file +}