From 5a6e5d22557715d1207599b727b1555fea2f161d Mon Sep 17 00:00:00 2001 From: WerWolv Date: Sun, 7 May 2023 23:27:43 +0200 Subject: [PATCH] build: Switch to better interval tree implementation --- cmake/build_helpers.cmake | 2 +- lib/external/intervaltree/LICENSE | 36 +- lib/external/intervaltree/README.md | 148 ++++++-- lib/external/intervaltree/include/IITree.h | 88 +++++ .../intervaltree/include/IntervalTree.h | 325 ------------------ lib/external/pattern_language | 2 +- .../content/providers/intel_hex_provider.hpp | 4 +- .../include/content/views/view_diff.hpp | 2 +- .../include/content/views/view_find.hpp | 10 +- .../content/providers/intel_hex_provider.cpp | 52 ++- .../providers/motorola_srec_provider.cpp | 6 +- .../source/content/views/view_find.cpp | 61 ++-- .../source/content/views/view_hashes.cpp | 2 +- 13 files changed, 308 insertions(+), 430 deletions(-) create mode 100644 lib/external/intervaltree/include/IITree.h delete mode 100644 lib/external/intervaltree/include/IntervalTree.h diff --git a/cmake/build_helpers.cmake b/cmake/build_helpers.cmake index 8b5bbe419..4d3c679f6 100644 --- a/cmake/build_helpers.cmake +++ b/cmake/build_helpers.cmake @@ -410,7 +410,7 @@ endfunction() macro(setupCompilerWarnings target) set(IMHEX_COMMON_FLAGS "-Wall -Wextra -Wpedantic -Werror") - set(IMHEX_C_FLAGS "${IMHEX_COMMON_FLAGS} -Wno-restrict -Wno-stringop-overread -Wno-stringop-overflow -Wno-array-bounds") + set(IMHEX_C_FLAGS "${IMHEX_COMMON_FLAGS} -Wno-restrict -Wno-stringop-overread -Wno-stringop-overflow -Wno-array-bounds -Wno-dangling-reference") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${IMHEX_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${IMHEX_C_FLAGS}") diff --git a/lib/external/intervaltree/LICENSE b/lib/external/intervaltree/LICENSE index be6b5b5ac..672c2534a 100644 --- a/lib/external/intervaltree/LICENSE +++ b/lib/external/intervaltree/LICENSE @@ -1,19 +1,23 @@ -Copyright (c) 2011 Erik Garrison +The MIT License -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: +Copyright (c) 2019 Dana-Farber Cancer Institute -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/lib/external/intervaltree/README.md b/lib/external/intervaltree/README.md index ed000d024..02cc06b21 100644 --- a/lib/external/intervaltree/README.md +++ b/lib/external/intervaltree/README.md @@ -1,37 +1,133 @@ -# intervaltree +## Introduction -## Overview - -An interval tree can be used to efficiently find a set of numeric intervals overlapping or containing another interval. - -This library provides a basic implementation of an interval tree using C++ templates, allowing the insertion of arbitrary types into the tree. +cgranges is a small C library for genomic interval overlap queries: given a +genomic region *r* and a set of regions *R*, finding all regions in *R* that +overlaps *r*. Although this library is based on [interval tree][itree], a well +known data structure, the core algorithm of cgranges is distinct from all +existing implementations to the best of our knowledge. Specifically, the +interval tree in cgranges is implicitly encoded as a plain sorted array +(similar to [binary heap][bheap] but packed differently). Tree +traversal is achieved by jumping between array indices. This treatment makes +cgranges very efficient and compact in memory. The core algorithm can be +implemented in ~50 lines of C++ code, much shorter than others as well. Please +see the code comments in [cpp/IITree.h](cpp/IITree.h) for details. ## Usage -Add `#include "IntervalTree.h"` to the source files in which you will use the interval tree. +### Test with BED coverage -To make an IntervalTree to contain objects of class T, use: +For testing purposes, this repo implements the [bedtools coverage][bedcov] tool +with cgranges. The source code is located in the [test/](test) directory. You +can compile and run the test with: +```sh +cd test && make +./bedcov-cr test1.bed test2.bed +``` +The first BED file is loaded into RAM and indexed. The depth and the breadth of +coverage of each region in the second file is computed by query against the +index of the first file. -```c++ -vector > intervals; -T a, b, c; -intervals.push_back(Interval(2, 10, a)); -intervals.push_back(Interval(3, 4, b)); -intervals.push_back(Interval(20, 100, c)); -IntervalTree tree; -tree = IntervalTree(intervals); +The [test/](test) directory also contains a few other implementations based on +[IntervalTree.h][ekg-itree] in C++, [quicksect][quicksect] in Cython and +[ncls][ncls] in Cython. The table below shows timing and peak memory on two +test BEDs available in the release page. The first BED contains GenCode +annotations with ~1.2 million lines, mixing all types of features. The second +contains ~10 million direct-RNA mappings. Time1a/Mem1a indexes the GenCode BED +into memory. Time1b adds whole chromosome intervals to the GenCode BED when +indexing. Time2/Mem2 indexes the RNA-mapping BED into memory. Numbers are +averaged over 5 runs. + +|Algo. |Lang. |Cov|Program |Time1a|Time1b|Mem1a |Time2 |Mem2 | +|:-------|:-----|:-:|:---------------|-----:|-----:|-------:|-----:|-------:| +|IAITree |C |Y |cgranges |9.0s |13.9s |19.1MB |4.6s |138.4MB | +|IAITree |C++ |Y |cpp/iitree.h |11.1s |24.5s |22.4MB |5.8s |160.4MB | +|CITree |C++ |Y |IntervalTree.h |17.4s |17.4s |27.2MB |10.5s |179.5MB | +|IAITree |C |N |cgranges |7.6s |13.0s |19.1MB |4.1s |138.4MB | +|AIList |C |N |3rd-party/AIList|7.9s |8.1s |14.4MB |6.5s |104.8MB | +|NCList |C |N |3rd-party/NCList|13.0s |13.4s |21.4MB |10.6s |183.0MB | +|AITree |C |N |3rd-party/AITree|16.8s |18.4s |73.4MB |27.3s |546.4MB | +|IAITree |Cython|N |cgranges |56.6s |63.9s |23.4MB |43.9s |143.1MB | +|binning |C++ |Y |bedtools |201.9s|280.4s|478.5MB |149.1s|3438.1MB| + +Here, IAITree = implicit augmented interval tree, used by cgranges; +CITree = centered interval tree, used by [Erik Garrison's +IntervalTree][itree]; AIList = augmented interval list, by [Feng et +al][ailist]; NCList = nested containment list, taken from [ncls][ncls] by Feng +et al; AITree = augmented interval tree, from [kerneltree][kerneltree]. +"Cov" indicates whether the program calculates breadth of coverage. +Comments: + +* AIList keeps start and end only. IAITree and CITree addtionally store a + 4-byte "ID" field per interval to reference the source of interval. This is + partly why AIList uses the least memory. + +* IAITree is more sensitive to the worse case: the presence of an interval + spanning the whole chromosome. + +* IAITree uses an efficient radix sort. CITree uses std::sort from STL, which + is ok. AIList and NCList use qsort from libc, which is slow. Faster sorting + leads to faster indexing. + +* IAITree in C++ uses identical core algorithm to the C version, but limited by + its APIs, it wastes time on memory locality and management. CITree has a + similar issue. + +* Computing coverage is better done when the returned list of intervals are + start sorted. IAITree returns sorted list. CITree doesn't. Not sure about + others. Computing coverage takes a couple of seconds. Sorting will be slower. + +* Printing intervals also takes a noticeable fraction of time. Custom printf + equivalent would be faster. + +* IAITree+Cython is a wrapper around the C version of cgranges. Cython adds + significant overhead. + +* Bedtools is designed for a variety of applications in addition to computing + coverage. It may keep other information in its internal data structure. This + micro-benchmark may be unfair to bedtools. + +* In general, the performance is affected a lot by subtle implementation + details. CITree, IAITree, NCList and AIList are all broadly comparable in + performance. AITree is not recommended when indexed intervals are immutable. + +### Use cgranges as a C library + +```c +cgranges_t *cr = cr_init(); // initialize a cgranges_t object +cr_add(cr, "chr1", 20, 30, 0); // add a genomic interval +cr_add(cr, "chr2", 10, 30, 1); +cr_add(cr, "chr1", 10, 25, 2); +cr_index(cr); // index + +int64_t i, n, *b = 0, max_b = 0; +n = cr_overlap(cr, "chr1", 15, 22, &b, &max_b); // overlap query; output array b[] can be reused +for (i = 0; i < n; ++i) // traverse overlapping intervals + printf("%d\t%d\t%d\n", cr_start(cr, b[i]), cr_end(cr, b[i]), cr_label(cr, b[i])); +free(b); // b[] is allocated by malloc() inside cr_overlap(), so needs to be freed with free() + +cr_destroy(cr); ``` -Now, it's possible to query the tree and obtain a set of intervals which are contained within the start and stop coordinates. +### Use IITree as a C++ library -```c++ -vector > results; -tree.findContained(start, stop, results); -cout << "found " << results.size() << " overlapping intervals" << endl; +```cpp +IITree tree; +tree.add(12, 34, 0); // add an interval +tree.add(0, 23, 1); +tree.add(34, 56, 2); +tree.index(); // index +std::vector a; +tree.overlap(22, 25, a); // retrieve overlaps +for (size_t i = 0; i < a.size(); ++i) + printf("%d\t%d\t%d\n", tree.start(a[i]), tree.end(a[i]), tree.data(a[i])); ``` -The function IntervalTree::findOverlapping provides a method to find all those intervals which are contained or partially overlap the interval (start, stop). - -### Author: Erik Garrison - -### License: MIT +[bedcov]: https://bedtools.readthedocs.io/en/latest/content/tools/coverage.html +[ekg-itree]: https://github.com/ekg/intervaltree +[quicksect]: https://github.com/brentp/quicksect +[ncls]: https://github.com/hunt-genes/ncls +[citree]: https://en.wikipedia.org/wiki/Interval_tree#Centered_interval_tree +[itree]: https://en.wikipedia.org/wiki/Interval_tree +[bheap]: https://en.wikipedia.org/wiki/Binary_heap +[ailist]: https://www.biorxiv.org/content/10.1101/593657v1 +[kerneltree]: https://github.com/biocore-ntnu/kerneltree \ No newline at end of file diff --git a/lib/external/intervaltree/include/IITree.h b/lib/external/intervaltree/include/IITree.h new file mode 100644 index 000000000..1c5883d9f --- /dev/null +++ b/lib/external/intervaltree/include/IITree.h @@ -0,0 +1,88 @@ +#pragma once + +#include +#include +#include +#include + +template // "S" is a scalar type; "T" is the type of data associated with each interval +class IITree { + struct StackCell { + size_t x; // node + int w; // w: 0 if left child hasn't been processed + StackCell() {}; + StackCell(size_t x_, int w_) : x(x_), w(w_) {}; + }; + struct Interval { + S st, en, max; + T data; + Interval() = default; + Interval(const S &s, const S &e, const T &d) : st(s), en(e), max(e), data(d) { } + }; + struct IntervalLess { + bool operator()(const Interval &intervalA, const Interval &intervalB) const { return intervalA.st < intervalB.st; } + }; + std::vector a; + size_t layout_recur(Interval *b, size_t i = 0, size_t k = 0) { // see https://algorithmica.org/en/eytzinger + if (k < a.size()) { + i = layout_recur(b, i, (k<<1) + 1); + b[k] = a[i++]; + i = layout_recur(b, i, (k<<1) + 2); + } + return i; + } + void index_BFS(Interval *interval, size_t n) { // set Interval::max + int t = 0; + StackCell stack[64]; + stack[t++] = StackCell(0, 0); + while (t) { + StackCell z = stack[--t]; + size_t k = z.x, l = k<<1|1, r = l + 1; + if (z.w == 2) { // Interval::max for both children are computed + interval[k].max = interval[k].en; + if (l < n && interval[k].max < interval[l].max) interval[k].max = interval[l].max; + if (r < n && interval[k].max < interval[r].max) interval[k].max = interval[r].max; + } else { // go down into the two children + stack[t++] = StackCell(k, z.w + 1); + if (l + z.w < n) + stack[t++] = StackCell(l + z.w, 0); + } + } + } +public: + void add(const S &s, const S &e, const T &d) { a.push_back(Interval(s, e, d)); } + void index() { + std::sort(a.begin(), a.end(), IntervalLess()); + std::vector b(a.size()); + layout_recur(b.data()); + a.clear(); + std::copy(b.begin(), b.end(), std::back_inserter(a)); + index_BFS(a.data(), a.size()); + } + bool overlap(const S &st, const S &en, std::vector &out) const { + int t = 0; + std::array stack; + out.clear(); + if (a.empty()) return false; + stack[t++] = StackCell(0, 0); // push the root; this is a top down traversal + while (t) { // the following guarantees that numbers in out[] are always sorted + StackCell z = stack[--t]; + size_t l = (z.x<<1) + 1, r = l + 1; + if (l >= a.size()) { // a leaf node + if (st < a[z.x].en && a[z.x].st <= en) out.push_back(z.x); + } else if (z.w == 0) { // if left child not processed + stack[t++] = StackCell(z.x, 1); // re-add node z.x, but mark the left child having been processed + if (l < a.size() && a[l].max > st) + stack[t++] = StackCell(l, 0); + } else if (a[z.x].st <= en) { // need to push the right child + if (st < a[z.x].en) out.push_back(z.x); // test if z.x overlaps the query; if yes, append to out[] + if (r < a.size()) stack[t++] = StackCell(r, 0); + } + } + return out.size() > 0? true : false; + } + size_t size(void) const { return a.size(); } + const S &start(size_t i) const { return a[i].st; } + const S &end(size_t i) const { return a[i].en; } + const T &data(size_t i) const { return a[i].data; } +}; \ No newline at end of file diff --git a/lib/external/intervaltree/include/IntervalTree.h b/lib/external/intervaltree/include/IntervalTree.h deleted file mode 100644 index 701e45933..000000000 --- a/lib/external/intervaltree/include/IntervalTree.h +++ /dev/null @@ -1,325 +0,0 @@ -#ifndef __INTERVAL_TREE_H -#define __INTERVAL_TREE_H - -#include -#include -#include -#include -#include -#include - -#ifdef USE_INTERVAL_TREE_NAMESPACE -namespace interval_tree { -#endif -template -class Interval { -public: - Scalar start; - Scalar stop; - Value value; - Interval(const Scalar& s, const Scalar& e, const Value& v) - : start(std::min(s, e)) - , stop(std::max(s, e)) - , value(v) - {} -}; - -template -Value intervalStart(const Interval& i) { - return i.start; -} - -template -Value intervalStop(const Interval& i) { - return i.stop; -} - -template -std::ostream& operator<<(std::ostream& out, const Interval& i) { - out << "Interval(" << i.start << ", " << i.stop << "): " << i.value; - return out; -} - -template -class IntervalTree { -public: - typedef Interval interval; - typedef std::vector interval_vector; - - - struct IntervalStartCmp { - bool operator()(const interval& a, const interval& b) { - return a.start < b.start; - } - }; - - struct IntervalStopCmp { - bool operator()(const interval& a, const interval& b) { - return a.stop < b.stop; - } - }; - - IntervalTree() - : left(nullptr) - , right(nullptr) - , center(0) - {} - - ~IntervalTree() = default; - - std::unique_ptr clone() const { - return std::unique_ptr(new IntervalTree(*this)); - } - - IntervalTree(const IntervalTree& other) - : intervals(other.intervals), - left(other.left ? other.left->clone() : nullptr), - right(other.right ? other.right->clone() : nullptr), - center(other.center) - {} - - IntervalTree& operator=(IntervalTree&&) = default; - IntervalTree(IntervalTree&&) = default; - - IntervalTree& operator=(const IntervalTree& other) { - center = other.center; - intervals = other.intervals; - left = other.left ? other.left->clone() : nullptr; - right = other.right ? other.right->clone() : nullptr; - return *this; - } - - IntervalTree( - interval_vector&& ivals, - std::size_t depth = 16, - std::size_t minbucket = 64, - std::size_t maxbucket = 512, - Scalar leftextent = 0, - Scalar rightextent = 0) - : left(nullptr) - , right(nullptr) - { - --depth; - const auto minmaxStop = std::minmax_element(ivals.begin(), ivals.end(), - IntervalStopCmp()); - const auto minmaxStart = std::minmax_element(ivals.begin(), ivals.end(), - IntervalStartCmp()); - if (!ivals.empty()) { - center = (minmaxStart.first->start + minmaxStop.second->stop) / 2; - } - if (leftextent == 0 && rightextent == 0) { - // sort intervals by start - std::sort(ivals.begin(), ivals.end(), IntervalStartCmp()); - } else { - assert(std::is_sorted(ivals.begin(), ivals.end(), IntervalStartCmp())); - } - if (depth == 0 || (ivals.size() < minbucket && ivals.size() < maxbucket)) { - std::sort(ivals.begin(), ivals.end(), IntervalStartCmp()); - intervals = std::move(ivals); - assert(is_valid().first); - return; - } else { - Scalar leftp = 0; - Scalar rightp = 0; - - if (leftextent || rightextent) { - leftp = leftextent; - rightp = rightextent; - } else { - leftp = ivals.front().start; - rightp = std::max_element(ivals.begin(), ivals.end(), - IntervalStopCmp())->stop; - } - - interval_vector lefts; - interval_vector rights; - - for (typename interval_vector::const_iterator i = ivals.begin(); - i != ivals.end(); ++i) { - const interval& interval = *i; - if (interval.stop < center) { - lefts.push_back(interval); - } else if (interval.start > center) { - rights.push_back(interval); - } else { - assert(interval.start <= center); - assert(center <= interval.stop); - intervals.push_back(interval); - } - } - - if (!lefts.empty()) { - left.reset(new IntervalTree(std::move(lefts), - depth, minbucket, maxbucket, - leftp, center)); - } - if (!rights.empty()) { - right.reset(new IntervalTree(std::move(rights), - depth, minbucket, maxbucket, - center, rightp)); - } - } - assert(is_valid().first); - } - - // Call f on all intervals near the range [start, stop]: - template - void visit_near(const Scalar& start, const Scalar& stop, UnaryFunction f) const { - if (!intervals.empty() && ! (stop < intervals.front().start)) { - for (auto & i : intervals) { - f(i); - } - } - if (left && start <= center) { - left->visit_near(start, stop, f); - } - if (right && stop >= center) { - right->visit_near(start, stop, f); - } - } - - // Call f on all intervals crossing pos - template - void visit_overlapping(const Scalar& pos, UnaryFunction f) const { - visit_overlapping(pos, pos, f); - } - - // Call f on all intervals overlapping [start, stop] - template - void visit_overlapping(const Scalar& start, const Scalar& stop, UnaryFunction f) const { - auto filterF = [&](const interval& interval) { - if (interval.stop >= start && interval.start <= stop) { - // Only apply f if overlapping - f(interval); - } - }; - visit_near(start, stop, filterF); - } - - // Call f on all intervals contained within [start, stop] - template - void visit_contained(const Scalar& start, const Scalar& stop, UnaryFunction f) const { - auto filterF = [&](const interval& interval) { - if (start <= interval.start && interval.stop <= stop) { - f(interval); - } - }; - visit_near(start, stop, filterF); - } - - interval_vector findOverlapping(const Scalar& start, const Scalar& stop) const { - interval_vector result; - visit_overlapping(start, stop, - [&](const interval& interval) { - result.emplace_back(interval); - }); - return result; - } - - interval_vector findContained(const Scalar& start, const Scalar& stop) const { - interval_vector result; - visit_contained(start, stop, - [&](const interval& interval) { - result.push_back(interval); - }); - return result; - } - bool empty() const { - if (left && !left->empty()) { - return false; - } - if (!intervals.empty()) { - return false; - } - if (right && !right->empty()) { - return false; - } - return true; - } - - template - void visit_all(UnaryFunction f) const { - if (left) { - left->visit_all(f); - } - std::for_each(intervals.begin(), intervals.end(), f); - if (right) { - right->visit_all(f); - } - } - - std::pair extentBruitForce() const { - struct Extent { - std::pair x = {std::numeric_limits::max(), - std::numeric_limits::min() }; - void operator()(const interval & interval) { - x.first = std::min(x.first, interval.start); - x.second = std::max(x.second, interval.stop); - } - }; - Extent extent; - - visit_all([&](const interval & interval) { extent(interval); }); - return extent.x; - } - - // Check all constraints. - // If first is false, second is invalid. - std::pair> is_valid() const { - const auto minmaxStop = std::minmax_element(intervals.begin(), intervals.end(), - IntervalStopCmp()); - const auto minmaxStart = std::minmax_element(intervals.begin(), intervals.end(), - IntervalStartCmp()); - - std::pair> result = {true, { std::numeric_limits::max(), - std::numeric_limits::min() }}; - if (!intervals.empty()) { - result.second.first = std::min(result.second.first, minmaxStart.first->start); - result.second.second = std::min(result.second.second, minmaxStop.second->stop); - } - if (left) { - auto valid = left->is_valid(); - result.first &= valid.first; - result.second.first = std::min(result.second.first, valid.second.first); - result.second.second = std::min(result.second.second, valid.second.second); - if (!result.first) { return result; } - if (valid.second.second >= center) { - result.first = false; - return result; - } - } - if (right) { - auto valid = right->is_valid(); - result.first &= valid.first; - result.second.first = std::min(result.second.first, valid.second.first); - result.second.second = std::min(result.second.second, valid.second.second); - if (!result.first) { return result; } - if (valid.second.first <= center) { - result.first = false; - return result; - } - } - if (!std::is_sorted(intervals.begin(), intervals.end(), IntervalStartCmp())) { - result.first = false; - } - return result; - } - - void clear() { - left.reset(); - right.reset(); - intervals.clear(); - center = 0; - } - -private: - interval_vector intervals; - std::unique_ptr left; - std::unique_ptr right; - Scalar center; -}; -#ifdef USE_INTERVAL_TREE_NAMESPACE -} -#endif - -#endif diff --git a/lib/external/pattern_language b/lib/external/pattern_language index 9a687a536..20a21a7de 160000 --- a/lib/external/pattern_language +++ b/lib/external/pattern_language @@ -1 +1 @@ -Subproject commit 9a687a5364ea27aa838f499afedb8e231f238a40 +Subproject commit 20a21a7de0db4be0b63553ebac90950dbf2a58fe diff --git a/plugins/builtin/include/content/providers/intel_hex_provider.hpp b/plugins/builtin/include/content/providers/intel_hex_provider.hpp index bd8916f69..720e9f843 100644 --- a/plugins/builtin/include/content/providers/intel_hex_provider.hpp +++ b/plugins/builtin/include/content/providers/intel_hex_provider.hpp @@ -2,7 +2,7 @@ #include -#include +#include namespace hex::plugin::builtin { @@ -44,7 +44,7 @@ namespace hex::plugin::builtin { protected: bool m_dataValid = false; size_t m_dataSize = 0x00; - interval_tree::IntervalTree> m_data; + IITree> m_data; std::fs::path m_sourceFilePath; }; diff --git a/plugins/builtin/include/content/views/view_diff.hpp b/plugins/builtin/include/content/views/view_diff.hpp index 960b2eed5..65a3c9dbb 100644 --- a/plugins/builtin/include/content/views/view_diff.hpp +++ b/plugins/builtin/include/content/views/view_diff.hpp @@ -12,7 +12,7 @@ #include "ui/hex_editor.hpp" -#include +#include namespace hex::plugin::builtin { diff --git a/plugins/builtin/include/content/views/view_find.hpp b/plugins/builtin/include/content/views/view_find.hpp index d2fc95e16..5cc841c36 100644 --- a/plugins/builtin/include/content/views/view_find.hpp +++ b/plugins/builtin/include/content/views/view_find.hpp @@ -9,7 +9,7 @@ #include #include -#include +#include namespace hex::plugin::builtin { @@ -94,11 +94,11 @@ namespace hex::plugin::builtin { } m_searchSettings, m_decodeSettings; - using OccurrenceTree = interval_tree::IntervalTree; + using OccurrenceTree = IITree; - std::map> m_foundOccurrences, m_sortedOccurrences; - std::map m_occurrenceTree; - std::map m_currFilter; + PerProvider> m_foundOccurrences, m_sortedOccurrences; + PerProvider m_occurrenceTree; + PerProvider m_currFilter; TaskHolder m_searchTask, m_filterTask; bool m_settingsValid = false; diff --git a/plugins/builtin/source/content/providers/intel_hex_provider.cpp b/plugins/builtin/source/content/providers/intel_hex_provider.cpp index 9075428ec..2643e0513 100644 --- a/plugins/builtin/source/content/providers/intel_hex_provider.cpp +++ b/plugins/builtin/source/content/providers/intel_hex_provider.cpp @@ -161,26 +161,37 @@ namespace hex::plugin::builtin { void IntelHexProvider::setBaseAddress(u64 address) { auto oldBase = this->getBaseAddress(); - auto intervals = this->m_data.findOverlapping(oldBase, oldBase + this->getActualSize()); + std::vector indices; + this->m_data.overlap(oldBase, oldBase + this->getActualSize(), indices); - for (auto &interval : intervals) { - interval.start = (interval.start - oldBase) + address; - interval.stop = (interval.stop - oldBase) + address; + IITree> intervals; + for (auto &index : indices) { + intervals.add( + (this->m_data.start(index) - oldBase) + address, + (this->m_data.end(index) - oldBase) + address, + this->m_data.data(index) + ); } this->m_data = std::move(intervals); + this->m_data.index(); Provider::setBaseAddress(address); } void IntelHexProvider::readRaw(u64 offset, void *buffer, size_t size) { - auto intervals = this->m_data.findOverlapping(offset, (offset + size) - 1); + std::vector indices; + this->m_data.overlap(offset, (offset + size) - 1, indices); std::memset(buffer, 0x00, size); auto bytes = reinterpret_cast(buffer); - for (const auto &interval : intervals) { - for (u32 i = std::max(interval.start, offset); i <= interval.stop && (i - offset) < size; i++) { - bytes[i - offset] = interval.value[i - interval.start]; + for (const auto &index : indices) { + auto start = this->m_data.start(index); + auto end = this->m_data.end(index); + auto data = this->m_data.data(index); + + for (u32 i = std::max(start, offset); i <= end && (i - offset) < size; i++) { + bytes[i - offset] = data[i - start]; } } } @@ -203,15 +214,15 @@ namespace hex::plugin::builtin { return false; u64 maxAddress = 0x00; - decltype(this->m_data)::interval_vector intervals; for (auto &[address, bytes] : data) { auto endAddress = (address + bytes.size()) - 1; - intervals.emplace_back(address, endAddress, std::move(bytes)); + this->m_data.add(address, endAddress, std::move(bytes)); if (endAddress > maxAddress) maxAddress = endAddress; } - this->m_data = std::move(intervals); + this->m_data.index(); + this->m_dataSize = maxAddress + 1; this->m_dataValid = true; @@ -254,17 +265,22 @@ namespace hex::plugin::builtin { } std::pair IntelHexProvider::getRegionValidity(u64 address) const { - auto intervals = this->m_data.findOverlapping(address, address); - if (intervals.empty()) { + std::vector indices; + this->m_data.overlap(address, address, indices); + if (indices.empty()) { return Provider::getRegionValidity(address); } - auto closestInterval = intervals.front(); - for (const auto &interval : intervals) { - if (interval.start < closestInterval.start) - closestInterval = interval; + auto closestIndex = indices.front(); + for (const auto &index : indices) { + if (this->m_data.start(index) < this->m_data.start(closestIndex)) + closestIndex = index; } - return { Region { closestInterval.start, (closestInterval.stop - closestInterval.start) + 1}, true }; + + auto start = this->m_data.start(closestIndex); + auto end = this->m_data.end(closestIndex); + + return { Region { start, (end - start) + 1 }, true }; } void IntelHexProvider::loadSettings(const nlohmann::json &settings) { diff --git a/plugins/builtin/source/content/providers/motorola_srec_provider.cpp b/plugins/builtin/source/content/providers/motorola_srec_provider.cpp index 437c69f0d..1f3b0eae4 100644 --- a/plugins/builtin/source/content/providers/motorola_srec_provider.cpp +++ b/plugins/builtin/source/content/providers/motorola_srec_provider.cpp @@ -180,15 +180,15 @@ namespace hex::plugin::builtin { return false; u64 maxAddress = 0x00; - decltype(this->m_data)::interval_vector intervals; for (auto &[address, bytes] : data) { auto endAddress = (address + bytes.size()) - 1; - intervals.emplace_back(address, endAddress, std::move(bytes)); + this->m_data.add(address, endAddress, std::move(bytes)); if (endAddress > maxAddress) maxAddress = endAddress; } - this->m_data = std::move(intervals); + this->m_data.index(); + this->m_dataSize = maxAddress + 1; this->m_dataValid = true; diff --git a/plugins/builtin/source/content/views/view_find.cpp b/plugins/builtin/source/content/views/view_find.cpp index d4866851c..3b5c9a45d 100644 --- a/plugins/builtin/source/content/views/view_find.cpp +++ b/plugins/builtin/source/content/views/view_find.cpp @@ -22,9 +22,8 @@ namespace hex::plugin::builtin { if (this->m_searchTask.isRunning()) return { }; - auto provider = ImHexApi::Provider::get(); - - if (!this->m_occurrenceTree[provider].findOverlapping(address, address).empty()) + std::vector occurrences; + if (this->m_occurrenceTree->overlap(address, address, occurrences)) return HighlightColor(); else return std::nullopt; @@ -36,10 +35,8 @@ namespace hex::plugin::builtin { if (this->m_searchTask.isRunning()) return; - auto provider = ImHexApi::Provider::get(); - - auto occurrences = this->m_occurrenceTree[provider].findOverlapping(address, address); - if (occurrences.empty()) + std::vector occurrences; + if (!this->m_occurrenceTree->overlap(address, address, occurrences)) return; ImGui::BeginTooltip(); @@ -51,7 +48,10 @@ namespace hex::plugin::builtin { ImGui::TableNextColumn(); { - const auto value = this->decodeValue(ImHexApi::Provider::get(), occurrence.value, 256); + auto start = this->m_occurrenceTree->start(occurrence); + auto end = this->m_occurrenceTree->end(occurrence) - 1; + const auto &bytes = this->m_occurrenceTree->data(occurrence); + const auto value = this->decodeValue(ImHexApi::Provider::get(), bytes, 256); ImGui::ColorButton("##color", ImColor(HighlightColor())); ImGui::SameLine(0, 10); @@ -65,7 +65,7 @@ namespace hex::plugin::builtin { ImGui::TableNextColumn(); ImGui::TextFormatted("{}: ", "hex.builtin.common.region"_lang); ImGui::TableNextColumn(); - ImGui::TextFormatted("[ 0x{:08X} - 0x{:08X} ]", occurrence.value.region.getStartAddress(), occurrence.value.region.getEndAddress()); + ImGui::TextFormatted("[ 0x{:08X} - 0x{:08X} ]", start, end); auto demangledValue = llvm::demangle(value); @@ -494,28 +494,27 @@ namespace hex::plugin::builtin { switch (settings.mode) { using enum SearchSettings::Mode; case Strings: - this->m_foundOccurrences[provider] = searchStrings(task, provider, searchRegion, settings.strings); + this->m_foundOccurrences.get(provider) = searchStrings(task, provider, searchRegion, settings.strings); break; case Sequence: - this->m_foundOccurrences[provider] = searchSequence(task, provider, searchRegion, settings.bytes); + this->m_foundOccurrences.get(provider) = searchSequence(task, provider, searchRegion, settings.bytes); break; case Regex: - this->m_foundOccurrences[provider] = searchRegex(task, provider, searchRegion, settings.regex); + this->m_foundOccurrences.get(provider) = searchRegex(task, provider, searchRegion, settings.regex); break; case BinaryPattern: - this->m_foundOccurrences[provider] = searchBinaryPattern(task, provider, searchRegion, settings.binaryPattern); + this->m_foundOccurrences.get(provider) = searchBinaryPattern(task, provider, searchRegion, settings.binaryPattern); break; case Value: - this->m_foundOccurrences[provider] = searchValue(task, provider, searchRegion, settings.value); + this->m_foundOccurrences.get(provider) = searchValue(task, provider, searchRegion, settings.value); break; } - this->m_sortedOccurrences[provider] = this->m_foundOccurrences[provider]; + this->m_sortedOccurrences.get(provider) = this->m_foundOccurrences.get(provider); - OccurrenceTree::interval_vector intervals; - for (const auto &occurrence : this->m_foundOccurrences[provider]) - intervals.emplace_back(occurrence.region.getStartAddress(), occurrence.region.getEndAddress(), occurrence); - this->m_occurrenceTree[provider] = std::move(intervals); + for (const auto &occurrence : this->m_foundOccurrences.get(provider)) + this->m_occurrenceTree->add(occurrence.region.getStartAddress(), occurrence.region.getEndAddress() + 1, occurrence); + this->m_occurrenceTree->index(); }); } @@ -800,14 +799,14 @@ namespace hex::plugin::builtin { ImGui::EndDisabled(); ImGui::SameLine(); - ImGui::TextFormatted("hex.builtin.view.find.search.entries"_lang, this->m_foundOccurrences[provider].size()); + ImGui::TextFormatted("hex.builtin.view.find.search.entries"_lang, this->m_foundOccurrences->size()); - ImGui::BeginDisabled(this->m_foundOccurrences[provider].empty()); + ImGui::BeginDisabled(this->m_foundOccurrences->empty()); { if (ImGui::Button("hex.builtin.view.find.search.reset"_lang)) { - this->m_foundOccurrences[provider].clear(); - this->m_sortedOccurrences[provider].clear(); - this->m_occurrenceTree[provider].clear(); + this->m_foundOccurrences->clear(); + this->m_sortedOccurrences->clear(); + *this->m_occurrenceTree = {}; } } ImGui::EndDisabled(); @@ -818,25 +817,25 @@ namespace hex::plugin::builtin { ImGui::Separator(); ImGui::NewLine(); - auto &currOccurrences = this->m_sortedOccurrences[provider]; + auto &currOccurrences = *this->m_sortedOccurrences; ImGui::PushItemWidth(ImGui::GetContentRegionAvail().x); - auto prevFilterLength = this->m_currFilter[provider].length(); - if (ImGui::InputTextWithHint("##filter", "hex.builtin.common.filter"_lang, this->m_currFilter[provider])) { - if (prevFilterLength > this->m_currFilter[provider].length()) - this->m_sortedOccurrences[provider] = this->m_foundOccurrences[provider]; + auto prevFilterLength = this->m_currFilter->length(); + if (ImGui::InputTextWithHint("##filter", "hex.builtin.common.filter"_lang, *this->m_currFilter)) { + if (prevFilterLength > this->m_currFilter->length()) + *this->m_sortedOccurrences = *this->m_foundOccurrences; if (this->m_filterTask.isRunning()) this->m_filterTask.interrupt(); - if (!this->m_currFilter[provider].empty()) { + if (!this->m_currFilter->empty()) { this->m_filterTask = TaskManager::createTask("Filtering", currOccurrences.size(), [this, provider, &currOccurrences](Task &task) { u64 progress = 0; currOccurrences.erase(std::remove_if(currOccurrences.begin(), currOccurrences.end(), [this, provider, &task, &progress](const auto ®ion) { task.update(progress); progress += 1; - return !hex::containsIgnoreCase(this->decodeValue(provider, region), this->m_currFilter[provider]); + return !hex::containsIgnoreCase(this->decodeValue(provider, region), this->m_currFilter.get(provider)); }), currOccurrences.end()); }); } diff --git a/plugins/builtin/source/content/views/view_hashes.cpp b/plugins/builtin/source/content/views/view_hashes.cpp index 683333e58..7f1741ad0 100644 --- a/plugins/builtin/source/content/views/view_hashes.cpp +++ b/plugins/builtin/source/content/views/view_hashes.cpp @@ -18,7 +18,7 @@ namespace hex::plugin::builtin { auto selection = ImHexApi::HexEditor::getSelection(); - if (ImGui::GetIO().KeyShift) { + if (selection.has_value() && ImGui::GetIO().KeyShift) { auto &hashFunctions = this->m_hashFunctions.get(selection->getProvider()); if (!hashFunctions.empty() && selection.has_value() && selection->overlaps(Region { address, size })) { ImGui::BeginTooltip();