From 2f60f61c1598f4c3076adc1ac9bf60c8906417d3 Mon Sep 17 00:00:00 2001 From: rockisch Date: Sun, 15 Sep 2024 10:16:57 -0300 Subject: [PATCH] impr: Add streaming decompression to zstd_decompress (#1898) ### Problem description https://github.com/WerWolv/ImHex/issues/1895 ### Implementation description Added code that handles streamed zstd data. It is based around the [official documentation](http://facebook.github.io/zstd/zstd_manual.html) and the [example](https://github.com/facebook/zstd/blob/dev/examples/simple_decompression.c) provided at the main zstd repo. The loop around the non-streamed version was also removed because I don't think it was doing anything (no `continue`s, `sourceSize` was always being set to 0). ### Additional things To test, I generated streamed zstd data with this python script: ```py import io; import pyzstd; with open("data.zstd", "wb") as f: pyzstd.compress_stream(io.BytesIO(b'ab' * 100), f) ``` And then I ran this pattern script: ``` import std.mem; import hex.dec; u8 data[while(!std::mem::eof())] @ 0x00; std::mem::Section data_sec = std::mem::create_section("data_sec"); hex::dec::zstd_decompress(data, data_sec); ``` Inspecting the section shows the correct data: ![image](https://github.com/user-attachments/assets/83fc9d4c-e6fa-49ee-9923-29dc0c280739) Co-authored-by: Nik --- .../source/content/pl_functions.cpp | 45 ++++++++++++++----- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/plugins/decompress/source/content/pl_functions.cpp b/plugins/decompress/source/content/pl_functions.cpp index d5f4803b8..5589a1b95 100644 --- a/plugins/decompress/source/content/pl_functions.cpp +++ b/plugins/decompress/source/content/pl_functions.cpp @@ -211,25 +211,48 @@ namespace hex::plugin::decompress { const u8* source = compressedData.data(); size_t sourceSize = compressedData.size(); - do { - size_t blockSize = ZSTD_getFrameContentSize(source, sourceSize); + size_t blockSize = ZSTD_getFrameContentSize(source, sourceSize); - if (blockSize == ZSTD_CONTENTSIZE_ERROR) { - return false; + if (blockSize == ZSTD_CONTENTSIZE_ERROR) { + return false; + } + + if (blockSize == ZSTD_CONTENTSIZE_UNKNOWN) { + // Data uses stream compression + ZSTD_inBuffer dataIn = { (void*)source, sourceSize, 0 }; + + size_t outSize = ZSTD_DStreamOutSize(); + std::vector outVec(outSize); + const u8* out = outVec.data(); + + size_t lastRet = 0; + while (dataIn.pos < dataIn.size) { + ZSTD_outBuffer dataOut = { (void*)out, outSize, 0 }; + + size_t ret = ZSTD_decompressStream(dctx, &dataOut, &dataIn); + if (ZSTD_isError(ret)) { + return false; + } + lastRet = ret; + + size_t sectionSize = section.size(); + section.resize(sectionSize + dataOut.pos); + std::memcpy(section.data() + sectionSize, out, dataOut.pos); } + // Incomplete frame + if (lastRet != 0) { + return false; + } + } else { section.resize(section.size() + blockSize); - size_t decodedSize = ZSTD_decompressDCtx(dctx, section.data() + section.size() - blockSize, blockSize, source, sourceSize); + size_t ret = ZSTD_decompressDCtx(dctx, section.data() + section.size() - blockSize, blockSize, source, sourceSize); - if (ZSTD_isError(decodedSize)) { + if (ZSTD_isError(ret)) { return false; } - - source = source + sourceSize; - sourceSize = 0; - - } while (sourceSize > 0); + } return true; #else