https://github.com/shader-slang/slang
Tip revision: f44da6cc5c0f211c13bd1eb0743d79c7861ea64e authored by Yong He on 09 February 2024, 02:29:32 UTC
Support pointers in SPIRV. (#3561)
Support pointers in SPIRV. (#3561)
Tip revision: f44da6c
slang-byte-encode-util.cpp
#include "slang-byte-encode-util.h"
namespace Slang {
// Descriptions of algorithms here...
// https://github.com/stoklund/varint
#if SLANG_LITTLE_ENDIAN && SLANG_UNALIGNED_ACCESS
// Testing on i7, unaligned access is around 40% faster
# define SLANG_BYTE_ENCODE_USE_UNALIGNED_ACCESS 1
#endif
#ifndef SLANG_BYTE_ENCODE_USE_UNALIGNED_ACCESS
# define SLANG_BYTE_ENCODE_USE_UNALIGNED_ACCESS 0
#endif
#define SLANG_REPEAT_2(n) n, n
#define SLANG_REPEAT_4(n) SLANG_REPEAT_2(n), SLANG_REPEAT_2(n)
#define SLANG_REPEAT_8(n) SLANG_REPEAT_4(n), SLANG_REPEAT_4(n)
#define SLANG_REPEAT_16(n) SLANG_REPEAT_8(n), SLANG_REPEAT_8(n)
#define SLANG_REPEAT_32(n) SLANG_REPEAT_16(n), SLANG_REPEAT_16(n)
#define SLANG_REPEAT_64(n) SLANG_REPEAT_32(n), SLANG_REPEAT_32(n)
#define SLANG_REPEAT_128(n) SLANG_REPEAT_64(n), SLANG_REPEAT_64(n)
/* static */const int8_t ByteEncodeUtil::s_msb8[256] =
{
- 1,
0,
SLANG_REPEAT_2(1),
SLANG_REPEAT_4(2),
SLANG_REPEAT_8(3),
SLANG_REPEAT_16(4),
SLANG_REPEAT_32(5),
SLANG_REPEAT_64(6),
SLANG_REPEAT_128(7),
};
/* static */size_t ByteEncodeUtil::calcEncodeLiteSizeUInt32(const uint32_t* in, size_t num)
{
size_t totalNumEncodeBytes = 0;
for (size_t i = 0; i < num; i++)
{
const uint32_t v = in[i];
if (v < kLiteCut1)
{
totalNumEncodeBytes += 1;
}
else if (v <= kLiteCut1 + 255 * (kLiteCut2 - 1 - kLiteCut1))
{
totalNumEncodeBytes += 2;
}
else
{
totalNumEncodeBytes += calcNonZeroMsByte32(v) + 2;
}
}
return totalNumEncodeBytes;
}
/* static */size_t ByteEncodeUtil::encodeLiteUInt32(const uint32_t* in, size_t num, uint8_t* encodeOut)
{
uint8_t* encodeStart = encodeOut;
for (size_t i = 0; i < num; ++i)
{
uint32_t v = in[i];
if(v < kLiteCut1)
{
*encodeOut++ = uint8_t(v);
}
else if (v <= kLiteCut1 + 255 * (kLiteCut2 - 1 - kLiteCut1))
{
v -= kLiteCut1;
encodeOut[0] = uint8_t(kLiteCut1 + (v >> 8));
encodeOut[1] = uint8_t(v);
encodeOut += 2;
}
else
{
uint8_t* encodeOutStart = encodeOut++;
while (v)
{
*encodeOut++ = uint8_t(v);
v >>= 8;
}
// Finally write the size to the start
const int numBytes = int(encodeOut - encodeOutStart);
encodeOutStart[0] = uint8_t(kLiteCut2 + (numBytes - 2));
}
}
return size_t(encodeOut - encodeStart);
}
/* static */void ByteEncodeUtil::encodeLiteUInt32(const uint32_t* in, size_t num, List<uint8_t>& encodeArrayOut)
{
// Make sure there is at least enough space for all bytes
encodeArrayOut.setCount(num);
uint8_t* encodeOut = encodeArrayOut.begin();
uint8_t* encodeOutEnd = encodeArrayOut.end();
for (size_t i = 0; i < num; ++i)
{
// Check if we need some more space
if (encodeOut + kMaxLiteEncodeUInt32 > encodeOutEnd)
{
const size_t offset = size_t(encodeOut - encodeArrayOut.begin());
const UInt oldCapacity = encodeArrayOut.getCapacity();
// Make some more space
encodeArrayOut.reserve(oldCapacity + (oldCapacity >> 1) + kMaxLiteEncodeUInt32);
// Make the size the capacity
const UInt capacity = encodeArrayOut.getCapacity();
encodeArrayOut.setCount(capacity);
encodeOut = encodeArrayOut.begin() + offset;
encodeOutEnd = encodeArrayOut.end();
}
uint32_t v = in[i];
if (v < kLiteCut1)
{
*encodeOut++ = uint8_t(v);
}
else if (v <= kLiteCut1 + 255 * (kLiteCut2 - 1 - kLiteCut1))
{
v -= kLiteCut1;
encodeOut[0] = uint8_t(kLiteCut1 + (v >> 8));
encodeOut[1] = uint8_t(v);
encodeOut += 2;
}
else
{
uint8_t* encodeOutStart = encodeOut++;
while (v)
{
*encodeOut++ = uint8_t(v);
v >>= 8;
}
// Finally write the size to the start
const int numBytes = int(encodeOut - encodeOutStart);
encodeOutStart[0] = uint8_t(kLiteCut2 + (numBytes - 2));
}
}
encodeArrayOut.setCount(UInt(encodeOut - encodeArrayOut.begin()));
encodeArrayOut.compress();
}
/* static */int ByteEncodeUtil::encodeLiteUInt32(uint32_t in, uint8_t out[kMaxLiteEncodeUInt32])
{
// 0-184 1 byte value = B0
// 185 - 248 2 bytes value = 185 + 256 * (B0 - 185) + B1
// 249 - 255 3 - 9 bytes value = (B0 - 249 + 2) little - endian bytes following B0.
if (in < kLiteCut1)
{
out[0] = uint8_t(in);
return 1;
}
else if (in <= kLiteCut1 + 255 * (kLiteCut2 - 1 - kLiteCut1))
{
in -= kLiteCut1;
out[0] = uint8_t(kLiteCut1 + (in >> 8));
out[1] = uint8_t(in);
return 2;
}
else
{
int numBytes = 1;
while (in)
{
out[numBytes++] = uint8_t(in);
in >>= 8;
}
// Finally write the size
out[0] = uint8_t(kLiteCut2 + (numBytes - 2));
return numBytes;
}
}
static const uint32_t s_unalignedUInt32Mask[5] =
{
0x00000000,
0x000000ff,
0x0000ffff,
0x00ffffff,
0xffffffff,
};
// Decode the >= kLiteCut2.
// in is pointing past the first byte.
// Only valid numBytesRemaining is 2, 3, or 4
SLANG_FORCE_INLINE static uint32_t _decodeLiteCut2UInt32(const uint8_t* in, int numBytesRemaining)
{
uint32_t value = 0;
#if SLANG_BYTE_ENCODE_USE_UNALIGNED_ACCESS
switch (numBytesRemaining)
{
case 2: value = *(const uint16_t*)in; break;
case 3: value = (uint32_t(in[2]) << 16) | (uint32_t(in[1]) << 8) | uint32_t(in[0]); break;
case 4: value = *(const uint32_t*)in; break;
default: break;
}
#else
// This works on all cpus although slower
value = in[0];
switch (numBytesRemaining)
{
case 4: value |= uint32_t(in[3]) << 24; /* fall thru */
case 3: value |= uint32_t(in[2]) << 16; /* fall thru */
case 2: value |= uint32_t(in[1]) << 8; /* fall thru */
case 1: break;
}
#endif
return value;
}
/* static */int ByteEncodeUtil::decodeLiteUInt32(const uint8_t* in, uint32_t* out)
{
uint8_t b0 = *in++;
if (b0 < kLiteCut1)
{
*out = uint32_t(b0);
return 1;
}
else if (b0 < kLiteCut2)
{
uint8_t b1 = *in++;
*out = kLiteCut1 + b1 + (uint32_t(b0 - kLiteCut1) << 8);
return 2;
}
else
{
const int numBytesRemaining = b0 - kLiteCut2 + 2 - 1;
*out = _decodeLiteCut2UInt32(in, numBytesRemaining);
return numBytesRemaining + 1;
}
}
/* static */size_t ByteEncodeUtil::decodeLiteUInt32(const uint8_t* encodeIn, size_t numValues, uint32_t* valuesOut)
{
const uint8_t* encodeStart = encodeIn;
for (size_t i = 0; i < numValues; ++i)
{
uint8_t b0 = *encodeIn++;
if (b0 < kLiteCut1)
{
valuesOut[i] = uint32_t(b0);
}
else if (b0 < kLiteCut2)
{
uint8_t b1 = *encodeIn++;
valuesOut[i] = kLiteCut1 + b1 + (uint32_t(b0 - kLiteCut1) << 8);
}
else
{
const int numBytesRemaining = b0 - kLiteCut2 + 2 - 1;
// For unaligned access, do not use unaligned access for the last two values,
// (3rd last is safe because this value will have at least 2 bytes, followed by at worst two 1-byte values)
// otherwise we can access outside the bounds of the encoded array
// This prevents memory validation tools from causing an exception here
if (SLANG_BYTE_ENCODE_USE_UNALIGNED_ACCESS && i < numValues - 2)
{
const uint32_t mask = s_unalignedUInt32Mask[numBytesRemaining];
//const uint32_t mask = ~(uint32_t(0xffffff00) << ((numBytesRemaining - 1) * 8));
valuesOut[i] = (*(const uint32_t*)encodeIn) & mask;
}
else
{
valuesOut[i] = _decodeLiteCut2UInt32(encodeIn, numBytesRemaining);
}
encodeIn += numBytesRemaining;
}
}
return size_t(encodeIn - encodeStart);
}
} // namespace Slang