From 43f5cc622e608dbcd4f1558d47bb12806c05abc9 Mon Sep 17 00:00:00 2001 From: WerWolv Date: Sun, 22 Nov 2020 19:43:35 +0100 Subject: [PATCH] Allow loading of huge files efficiently --- include/providers/file_provider.hpp | 2 +- include/providers/provider.hpp | 20 +++++- include/views/view_information.hpp | 5 +- libs/ImGui/include/imgui_memory_editor.h | 12 ---- source/provider/file_provider.cpp | 9 ++- source/views/view_hexeditor.cpp | 42 ++++++++--- source/views/view_information.cpp | 90 +++++++++++++++--------- source/views/view_strings.cpp | 4 +- 8 files changed, 121 insertions(+), 63 deletions(-) diff --git a/include/providers/file_provider.hpp b/include/providers/file_provider.hpp index d6fcfd0b9..08b700d65 100644 --- a/include/providers/file_provider.hpp +++ b/include/providers/file_provider.hpp @@ -19,7 +19,7 @@ namespace hex::prv { void read(u64 offset, void *buffer, size_t size) override; void write(u64 offset, void *buffer, size_t size) override; - size_t getSize() override; + size_t getActualSize() override; std::vector> getDataInformation() override; diff --git a/include/providers/provider.hpp b/include/providers/provider.hpp index 6f01e02b4..ac69dab70 100644 --- a/include/providers/provider.hpp +++ b/include/providers/provider.hpp @@ -2,6 +2,7 @@ #include +#include #include #include @@ -9,6 +10,8 @@ namespace hex::prv { class Provider { public: + constexpr static size_t PageSize = 0x1000'0000; + Provider() = default; virtual ~Provider() = default; @@ -18,9 +21,24 @@ namespace hex::prv { virtual void read(u64 offset, void *buffer, size_t size) = 0; virtual void write(u64 offset, void *buffer, size_t size) = 0; - virtual size_t getSize() = 0; + virtual size_t getActualSize() = 0; + + u32 getPageCount() { return std::ceil(this->getActualSize() / double(PageSize)); } + u32 getCurrentPage() const { return this->m_currPage; } + void setCurrentPage(u32 page) { if (page < getPageCount()) this->m_currPage = page; } + + virtual size_t getBaseAddress() { + return PageSize * this->m_currPage; + } + + virtual size_t getSize() { + return std::min(this->getActualSize() - PageSize * this->m_currPage, PageSize); + } virtual std::vector> getDataInformation() = 0; + + protected: + u32 m_currPage = 0; }; } \ No newline at end of file diff --git a/include/views/view_information.hpp b/include/views/view_information.hpp index c8c704987..a4a7d166b 100644 --- a/include/views/view_information.hpp +++ b/include/views/view_information.hpp @@ -23,13 +23,16 @@ namespace hex { prv::Provider* &m_dataProvider; bool m_windowOpen = true; + bool m_dataValid = false; u32 m_blockSize = 0; float m_averageEntropy = 0; float m_highestBlockEntropy = 0; std::vector m_blockEntropy; std::array m_valueCounts = { 0 }; - bool m_shouldInvalidate = true; + bool m_shouldInvalidate = false; + + std::pair m_analyzedRegion = { 0, 0 }; std::string m_fileDescription; std::string m_mimeType; diff --git a/libs/ImGui/include/imgui_memory_editor.h b/libs/ImGui/include/imgui_memory_editor.h index 665ab6d2a..c6a3c2307 100644 --- a/libs/ImGui/include/imgui_memory_editor.h +++ b/libs/ImGui/include/imgui_memory_editor.h @@ -547,18 +547,6 @@ struct MemoryEditor ImGui::SameLine(); ImGui::Text(format_range, s.AddrDigitsCount, base_display_addr, s.AddrDigitsCount, base_display_addr + mem_size - 1); - ImGui::SameLine(); - ImGui::PushItemWidth((s.AddrDigitsCount + 1) * s.GlyphWidth + style.FramePadding.x * 2.0f); - if (ImGui::InputText("##addr", AddrInputBuf, 32, ImGuiInputTextFlags_CharsHexadecimal | ImGuiInputTextFlags_EnterReturnsTrue)) - { - size_t goto_addr; - if (sscanf(AddrInputBuf, "%" _PRISizeT "X", &goto_addr) == 1) - { - GotoAddr = goto_addr - base_display_addr; - HighlightMin = HighlightMax = (size_t)-1; - } - } - ImGui::PopItemWidth(); if (GotoAddr != (size_t)-1) { diff --git a/source/provider/file_provider.cpp b/source/provider/file_provider.cpp index 58b2a384d..3020e923c 100644 --- a/source/provider/file_provider.cpp +++ b/source/provider/file_provider.cpp @@ -22,7 +22,6 @@ namespace hex::prv { this->m_file = fopen(path.data(), "rb"); this->m_writable = false; } - } FileProvider::~FileProvider() { @@ -48,7 +47,7 @@ namespace hex::prv { if ((offset + size) > this->getSize() || buffer == nullptr || size == 0) return; - fseeko64(this->m_file, offset, SEEK_SET); + fseeko64(this->m_file, this->getCurrentPage() * PageSize + offset, SEEK_SET); fread(buffer, 1, size, this->m_file); } @@ -56,11 +55,11 @@ namespace hex::prv { if (buffer == nullptr || size == 0) return; - fseeko64(this->m_file, offset, SEEK_SET); + fseeko64(this->m_file, this->getCurrentPage() * PageSize + offset, SEEK_SET); fwrite(buffer, 1, size, this->m_file); } - size_t FileProvider::getSize() { + size_t FileProvider::getActualSize() { fseeko64(this->m_file, 0, SEEK_END); return ftello64(this->m_file); } @@ -69,7 +68,7 @@ namespace hex::prv { std::vector> result; result.emplace_back("File path", this->m_path); - result.emplace_back("Size", hex::toByteString(this->getSize())); + result.emplace_back("Size", hex::toByteString(this->getActualSize())); if (this->m_fileStatsValid) { result.emplace_back("Creation time", ctime(&this->m_fileStats.st_ctime)); diff --git a/source/views/view_hexeditor.cpp b/source/views/view_hexeditor.cpp index f62452e9e..7367c53a0 100644 --- a/source/views/view_hexeditor.cpp +++ b/source/views/view_hexeditor.cpp @@ -70,9 +70,32 @@ namespace hex { size_t dataSize = (this->m_dataProvider == nullptr || !this->m_dataProvider->isReadable()) ? 0x00 : this->m_dataProvider->getSize(); - this->m_memoryEditor.DrawWindow("Hex Editor", this, dataSize); + this->m_memoryEditor.DrawWindow("Hex Editor", this, dataSize, dataSize == 0 ? 0x00 : this->m_dataProvider->getBaseAddress()); if (dataSize != 0x00) { + ImGui::Begin("Hex Editor"); + ImGui::SameLine(); + ImGui::Text("Page %d / %d", this->m_dataProvider->getCurrentPage() + 1, this->m_dataProvider->getPageCount()); + ImGui::SameLine(); + + if (ImGui::ArrowButton("prevPage", ImGuiDir_Left)) { + this->m_dataProvider->setCurrentPage(this->m_dataProvider->getCurrentPage() - 1); + + size_t dataPreviewStart = std::min(this->m_memoryEditor.DataPreviewAddr, this->m_memoryEditor.DataPreviewAddrEnd); + View::postEvent(Events::ByteSelected, &dataPreviewStart); + } + + ImGui::SameLine(); + + if (ImGui::ArrowButton("nextPage", ImGuiDir_Right)) { + this->m_dataProvider->setCurrentPage(this->m_dataProvider->getCurrentPage() + 1); + + size_t dataPreviewStart = std::min(this->m_memoryEditor.DataPreviewAddr, this->m_memoryEditor.DataPreviewAddrEnd); + View::postEvent(Events::ByteSelected, &dataPreviewStart); + } + + ImGui::End(); + this->drawSearchPopup(); this->drawGotoPopup(); } @@ -578,8 +601,8 @@ R"( if (ImGui::BeginTabItem("Begin")) { ImGui::InputScalar("##nolabel", ImGuiDataType_U64, &this->m_gotoAddress, nullptr, nullptr, "%llx", ImGuiInputTextFlags_CharsHexadecimal); - if (this->m_gotoAddress >= this->m_dataProvider->getSize()) - this->m_gotoAddress = this->m_dataProvider->getSize() - 1; + if (this->m_gotoAddress >= this->m_dataProvider->getActualSize()) + this->m_gotoAddress = this->m_dataProvider->getActualSize() - 1; newOffset = this->m_gotoAddress; @@ -596,9 +619,9 @@ R"( s64 currHighlightStart = std::min(this->m_memoryEditor.DataPreviewAddr, this->m_memoryEditor.DataPreviewAddrEnd); newOffset = this->m_gotoAddress + currHighlightStart; - if (newOffset >= this->m_dataProvider->getSize()) { - newOffset = this->m_dataProvider->getSize() - 1; - this->m_gotoAddress = (this->m_dataProvider->getSize() - 1) - currHighlightStart; + if (newOffset >= this->m_dataProvider->getActualSize()) { + newOffset = this->m_dataProvider->getActualSize() - 1; + this->m_gotoAddress = (this->m_dataProvider->getActualSize() - 1) - currHighlightStart; } else if (newOffset < 0) { newOffset = 0; this->m_gotoAddress = -currHighlightStart; @@ -609,15 +632,16 @@ R"( if (ImGui::BeginTabItem("End")) { ImGui::InputScalar("##nolabel", ImGuiDataType_U64, &this->m_gotoAddress, nullptr, nullptr, "%llx", ImGuiInputTextFlags_CharsHexadecimal); - if (this->m_gotoAddress >= this->m_dataProvider->getSize()) - this->m_gotoAddress = this->m_dataProvider->getSize() - 1; + if (this->m_gotoAddress >= this->m_dataProvider->getActualSize()) + this->m_gotoAddress = this->m_dataProvider->getActualSize() - 1; - newOffset = (this->m_dataProvider->getSize() - 1) - this->m_gotoAddress; + newOffset = (this->m_dataProvider->getActualSize() - 1) - this->m_gotoAddress; ImGui::EndTabItem(); } if (ImGui::Button("Goto")) { + this->m_dataProvider->setCurrentPage(std::floor(newOffset / double(prv::Provider::PageSize))); this->m_memoryEditor.GotoAddr = newOffset; this->m_memoryEditor.DataPreviewAddr = newOffset; this->m_memoryEditor.DataPreviewAddrEnd = newOffset; diff --git a/source/views/view_information.cpp b/source/views/view_information.cpp index bb474f142..3eadfbfb2 100644 --- a/source/views/view_information.cpp +++ b/source/views/view_information.cpp @@ -17,7 +17,15 @@ namespace hex { ViewInformation::ViewInformation(prv::Provider* &dataProvider) : View(), m_dataProvider(dataProvider) { View::subscribeEvent(Events::DataChanged, [this](const void*) { - this->m_shouldInvalidate = true; + this->m_dataValid = false; + this->m_highestBlockEntropy = 0; + this->m_blockEntropy.clear(); + this->m_averageEntropy = 0; + this->m_blockSize = 0; + this->m_valueCounts.fill(0x00); + this->m_mimeType = ""; + this->m_fileDescription = ""; + this->m_analyzedRegion = { 0, 0 }; }); } @@ -48,6 +56,8 @@ namespace hex { if (this->m_dataProvider != nullptr && this->m_dataProvider->isReadable()) { if (this->m_shouldInvalidate) { + this->m_analyzedRegion = { this->m_dataProvider->getBaseAddress(), this->m_dataProvider->getBaseAddress() + this->m_dataProvider->getSize() }; + { this->m_blockSize = std::ceil(this->m_dataProvider->getSize() / 2048.0F); std::vector buffer(this->m_blockSize, 0x00); @@ -70,7 +80,7 @@ namespace hex { } { - std::vector buffer(std::min(this->m_dataProvider->getSize(), size_t(0xFF'FFFF)), 0x00); + std::vector buffer(this->m_dataProvider->getSize(), 0x00); this->m_dataProvider->read(0x00, buffer.data(), buffer.size()); this->m_fileDescription.clear(); @@ -112,53 +122,67 @@ namespace hex { this->m_shouldInvalidate = false; + this->m_dataValid = true; } } ImGui::NewLine(); - for (auto &[name, value] : this->m_dataProvider->getDataInformation()) { - ImGui::LabelText(name.c_str(), "%s", value.c_str()); - } + if (ImGui::Button("Analyze current page")) + this->m_shouldInvalidate = true; ImGui::NewLine(); ImGui::Separator(); ImGui::NewLine(); - if (!this->m_fileDescription.empty()) { - ImGui::TextUnformatted("Description:"); - ImGui::TextWrapped("%s", this->m_fileDescription.c_str()); + if (this->m_dataValid) { + + for (auto &[name, value] : this->m_dataProvider->getDataInformation()) { + ImGui::LabelText(name.c_str(), "%s", value.c_str()); + } + + ImGui::LabelText("Analyzed region", "0x%llx - 0x%llx", this->m_analyzedRegion.first, this->m_analyzedRegion.second); + ImGui::NewLine(); - } - - if (!this->m_mimeType.empty()) { - ImGui::TextUnformatted("MIME Type:"); - ImGui::TextWrapped("%s", this->m_mimeType.c_str()); + ImGui::Separator(); ImGui::NewLine(); - } - ImGui::Separator(); - ImGui::NewLine(); + if (!this->m_fileDescription.empty()) { + ImGui::TextUnformatted("Description:"); + ImGui::TextWrapped("%s", this->m_fileDescription.c_str()); + ImGui::NewLine(); + } - ImGui::Text("Byte Distribution"); - ImGui::PlotHistogram("##nolabel", this->m_valueCounts.data(), 256, 0, nullptr, FLT_MAX, FLT_MAX, ImVec2(0, 100)); + if (!this->m_mimeType.empty()) { + ImGui::TextUnformatted("MIME Type:"); + ImGui::TextWrapped("%s", this->m_mimeType.c_str()); + ImGui::NewLine(); + } - ImGui::NewLine(); - ImGui::Separator(); - ImGui::NewLine(); - - ImGui::Text("Entropy"); - ImGui::PlotLines("##nolabel", this->m_blockEntropy.data(), this->m_blockEntropy.size(), 0, nullptr, FLT_MAX, FLT_MAX, ImVec2(0, 100)); - - ImGui::NewLine(); - - ImGui::LabelText("Block size", "2048 blocks à %lu bytes", this->m_blockSize); - ImGui::LabelText("Average entropy", "%.8f", this->m_averageEntropy); - ImGui::LabelText("Highest entropy block", "%.8f", this->m_highestBlockEntropy); - - if (this->m_averageEntropy > 0.83 && this->m_highestBlockEntropy > 0.9) { + ImGui::Separator(); ImGui::NewLine(); - ImGui::TextColored(ImVec4(0.92F, 0.25F, 0.2F, 1.0F), "This data is most likely encrypted or compressed!"); + + ImGui::Text("Byte Distribution"); + ImGui::PlotHistogram("##nolabel", this->m_valueCounts.data(), 256, 0, nullptr, FLT_MAX, FLT_MAX,ImVec2(0, 100)); + + ImGui::NewLine(); + ImGui::Separator(); + ImGui::NewLine(); + + ImGui::Text("Entropy"); + ImGui::PlotLines("##nolabel", this->m_blockEntropy.data(), this->m_blockEntropy.size(), 0, nullptr, FLT_MAX, FLT_MAX, ImVec2(0, 100)); + + ImGui::NewLine(); + + ImGui::LabelText("Block size", "2048 blocks à %lu bytes", this->m_blockSize); + ImGui::LabelText("Average entropy", "%.8f", this->m_averageEntropy); + ImGui::LabelText("Highest entropy block", "%.8f", this->m_highestBlockEntropy); + + if (this->m_averageEntropy > 0.83 && this->m_highestBlockEntropy > 0.9) { + ImGui::NewLine(); + ImGui::TextColored(ImVec4(0.92F, 0.25F, 0.2F, 1.0F),"This data is most likely encrypted or compressed!"); + } + } } diff --git a/source/views/view_strings.cpp b/source/views/view_strings.cpp index b51a38d2a..1e6ff2450 100644 --- a/source/views/view_strings.cpp +++ b/source/views/view_strings.cpp @@ -8,7 +8,7 @@ namespace hex { ViewStrings::ViewStrings(prv::Provider* &dataProvider) : View(), m_dataProvider(dataProvider) { View::subscribeEvent(Events::DataChanged, [this](const void*){ - this->m_shouldInvalidate = true; + this->m_foundStrings.clear(); }); this->m_filter = new char[0xFFFF]; @@ -64,6 +64,8 @@ namespace hex { this->m_shouldInvalidate = true; ImGui::InputText("Filter", this->m_filter, 0xFFFF); + if (ImGui::Button("Extract")) + this->m_shouldInvalidate = true; ImGui::Separator(); ImGui::NewLine();