// Copyright(C) 2021-2022 Intel Corporation
// SPDX - License - Identifier: MIT
#pragma once
#include <span>
#include <cassert>
#include <algorithm>
#include <limits>
#include <cstring>
// Defining likely/unlikely for helping branch prediction.
// Will replace by C++20's [[likely]]/[[unlikely]] attributes, once properly and consistently implemented in both GCC and VS
#ifdef _WIN32
#define likely(x) (x)
#define unlikely(x) (x)
#else
#define likely(x) __builtin_expect((x),1)
#define unlikely(x) __builtin_expect((x),0)
#endif
using namespace std;
namespace Topor
{
template<class T> class CDynArray
{
public:
CDynArray(size_t initCap = 0) : m_Cap(initCap), m_B(initCap > MaxCapacity || initCap == 0 ? nullptr : (T*)malloc(initCap* TSize)) {}
CDynArray(size_t initCap, unsigned char initVal) : m_Cap(initCap), m_B(InitAlloc(initCap, initVal)) {}
// Copy constructor
CDynArray(const CDynArray& da) : CDynArray(da.m_Cap)
{
if (unlikely(uninitialized_or_erroneous()))
{
m_Cap = 0;
}
else
{
std::memcpy(m_B, da.m_B, da.m_Cap * sizeof(*m_B));
}
}
// Assignment operator
CDynArray& operator =(const CDynArray& da)
{
reserve_exactly(da.m_Cap);
if (unlikely(uninitialized_or_erroneous()))
{
m_Cap = 0;
}
else
{
std::memcpy(m_B, da.m_B, da.m_Cap * sizeof(*m_B));
m_Cap = da.m_Cap;
m_Multiplier = da.m_Multiplier;
}
return *this;
}
// Move constructor
CDynArray(CDynArray&& da) { *this = move(da); }
// Move assignment operator
CDynArray& operator=(CDynArray&& da)
{
free(m_B);
m_B = da.m_B;
m_Cap = da.m_Cap;
m_Multiplier = da.m_Multiplier;
da.m_B = nullptr;
da.m_Cap = 0;
da.m_Multiplier = 0;
return *this;
}
~CDynArray() { free(m_B); }
#ifdef _WIN32
void reserve_exactly(size_t newCap)
#else
// Preventing inlining is required to work around an apparent GCC bug, manifesting itself in the following error:
// error: argument 2 range [9223372036854775808, 18446744073709551608] exceeds maximum object size 9223372036854775807 [-Werror=alloc-size-larger-than=] auto tmp = (T*)realloc(m_B, s);
void __attribute__((noinline)) reserve_exactly(size_t newCap)
#endif
{
if (newCap > MaxCapacity || newCap == 0)
{
ClearB();
}
else
{
auto tmp = (T*)realloc(m_B, (m_Cap = newCap) * TSize);
if (unlikely(tmp == nullptr))
{
ClearB();
}
else
{
m_B = tmp;
}
}
}
void reserve_exactly(size_t newCap, unsigned char initVal)
{
if (newCap > MaxCapacity || newCap == 0)
{
ClearB();
} else if (m_B == nullptr)
{
assert(m_Cap == 0);
m_B = InitAlloc(newCap, initVal);
m_Cap = newCap;
}
else
{
auto tmp = (T*)realloc((void*)m_B, newCap * TSize);
if (unlikely(tmp == nullptr))
{
ClearB();
}
else
{
m_B = tmp;
if (newCap > m_Cap)
{
std::memset((void*)(m_B + m_Cap), initVal, (newCap - m_Cap) * TSize);
}
m_Cap = newCap;
}
}
}
void reserve_atleast(size_t newCap)
{
reserve_exactly(GetNewCap(newCap));
}
void reserve_atleast_with_max(size_t newCap, size_t maxCap)
{
const auto tentativeNewCap = GetNewCap(newCap);
reserve_exactly(tentativeNewCap > maxCap ? maxCap : tentativeNewCap);
}
void reserve_atleast_with_max(size_t newCap, size_t maxCap, unsigned char initVal)
{
const auto tentativeNewCap = GetNewCap(newCap);
reserve_exactly(tentativeNewCap > maxCap ? maxCap : tentativeNewCap, initVal);
}
void reserve_atleast(size_t newCap, unsigned char initVal)
{
reserve_exactly(GetNewCap(newCap), initVal);
}
void reserve_beyond_if_requried(size_t indToInclude, bool isResizeAtLeast)
{
if (indToInclude >= cap())
{
if (isResizeAtLeast)
{
reserve_atleast(indToInclude + 1);
}
else
{
reserve_exactly(indToInclude + 1);
}
}
}
void memset(unsigned char newVal, size_t startIndIncl = 0)
{
memset(newVal, startIndIncl, m_Cap);
}
void memset(unsigned char newVal, size_t startIndIncl, size_t endIndExcl)
{
assert(startIndIncl < endIndExcl && endIndExcl <= cap());
std::memset((void*)(m_B + startIndIncl), newVal, (endIndExcl - startIndIncl) * TSize);
}
// Non-overlapping memcpy
void memcpy(size_t outStartInd, size_t inpStartInd, size_t entriesToCopy)
{
assert(inpStartInd < m_Cap && outStartInd < m_Cap);
assert(inpStartInd + entriesToCopy > inpStartInd && inpStartInd + entriesToCopy <= m_Cap);
assert(outStartInd + entriesToCopy > outStartInd && outStartInd + entriesToCopy <= m_Cap);
// No overlap
assert(outStartInd >= inpStartInd + entriesToCopy || inpStartInd >= outStartInd + entriesToCopy);
std::memcpy(m_B + outStartInd, m_B + inpStartInd, entriesToCopy * sizeof(*m_B));
}
// Potentially-overlapping memmove
void memmove(size_t outStartInd, size_t inpStartInd, size_t entriesToCopy)
{
if (outStartInd >= inpStartInd + entriesToCopy || inpStartInd >= outStartInd + entriesToCopy)
{
memcpy(outStartInd, inpStartInd, entriesToCopy);
}
else
{
assert(inpStartInd < m_Cap && outStartInd < m_Cap);
assert(inpStartInd + entriesToCopy > inpStartInd && inpStartInd + entriesToCopy <= m_Cap);
assert(outStartInd + entriesToCopy > outStartInd && outStartInd + entriesToCopy <= m_Cap);
std::memmove(m_B + outStartInd, m_B + inpStartInd, entriesToCopy * sizeof(*m_B));
}
}
inline auto get_span_cap(size_t startIndIncl = 0)
{
assert(startIndIncl < m_Cap);
return span(m_B + startIndIncl, m_Cap - startIndIncl);
}
inline auto get_span_cap(size_t startIndIncl, size_t sz)
{
assert(startIndIncl + sz >= startIndIncl);
assert(startIndIncl + sz <= m_Cap);
return span(m_B + startIndIncl, sz);
}
inline auto get_const_span_cap(size_t startIndIncl = 0) const
{
assert(startIndIncl < m_Cap);
return span(m_B + startIndIncl, m_Cap - startIndIncl);
}
inline auto get_const_span_cap(size_t startIndIncl, size_t sz) const
{
assert(startIndIncl + sz >= startIndIncl);
assert(startIndIncl + sz <= m_Cap);
return span(m_B + startIndIncl, sz);
}
inline T* get_ptr(size_t i = 0)
{
assert(i < m_Cap);
return m_B + i;
}
inline T* get_const_ptr(size_t i = 0) const
{
assert(i < m_Cap);
return m_B + i;
}
inline T& operator [](size_t i) const
{
assert(i < cap());
return m_B[i];
}
inline const size_t& cap() const
{
return m_Cap;
}
inline size_t memMb() const
{
return (m_Cap * TSize) / 1000;
}
inline const bool empty() const
{
return cap() == 0;
}
inline bool uninitialized_or_erroneous() const { return m_B == nullptr; }
inline void remove_if_equal_and_cut_capacity(T eqVal)
{
size_t currIndWrite(0);
for (size_t currIndRead = 0; currIndRead != m_Cap; ++currIndRead)
{
if (m_B[currIndRead] != eqVal)
{
m_B[currIndWrite++] = m_B[currIndRead];
}
}
reserve_exactly(currIndWrite);
}
template <typename TUInd>
void RemoveGarbage(TUInd startInd, TUInd& endInd, function<bool(TUInd clsInd)> IsChunkDeleted, function<TUInd(TUInd clsInd)> ChunkEnd,
function<void(TUInd oldWlInd, TUInd newWlInd)> NotifyAboutRemainingChunkMove = nullptr)
{
auto FindNextDeleted = [&](TUInd firstInd, TUInd lastInd, function<bool(TUInd clsInd)> IsChunkDeleted, function<TUInd(TUInd clsInd)> ChunkEnd, TUInd toInd = numeric_limits<T>::max(), function<void(TUInd oldWlInd, TUInd newWlInd)> NotifyAboutRemainingChunkMove = nullptr)
{
const auto initFirstInd = firstInd;
while (firstInd < lastInd && !IsChunkDeleted(firstInd))
{
const auto nextFirstInd = ChunkEnd(firstInd);
if (NotifyAboutRemainingChunkMove != nullptr)
{
NotifyAboutRemainingChunkMove(firstInd, toInd + firstInd - initFirstInd);
}
firstInd = nextFirstInd;
}
return firstInd;
};
auto FindNextNotDeleted = [&](TUInd firstInd, TUInd lastInd, function<bool(TUInd clsInd)> IsChunkDeleted, function<TUInd(TUInd clsInd)> ChunkEnd)
{
while (firstInd < lastInd && IsChunkDeleted(firstInd))
{
firstInd = ChunkEnd(firstInd);
}
return firstInd;
};
// Find the first deleted chunk and put its index in toInd
TUInd toInd(FindNextDeleted(startInd, endInd, IsChunkDeleted, ChunkEnd, startInd, NotifyAboutRemainingChunkMove));
// Find the first clause after the first deleted chunk and put its index in fromInd
TUInd fromInd(FindNextNotDeleted(toInd, endInd, IsChunkDeleted, ChunkEnd));
// Copy chunks from fromInd to toInd
while (fromInd < endInd)
{
TUInd fromIndEnd = FindNextDeleted(fromInd, endInd, IsChunkDeleted, ChunkEnd, toInd, NotifyAboutRemainingChunkMove);
const auto copiedInds = fromIndEnd - fromInd;
// memmove will still use memcpy, if there is no overlap
memmove(toInd, fromInd, copiedInds);
toInd += copiedInds;
fromInd = FindNextNotDeleted(fromIndEnd, endInd, IsChunkDeleted, ChunkEnd);
}
endInd = toInd;
}
static constexpr double MultiplierDef = 1.625;
inline void SetMultiplier(double multiplier = MultiplierDef) { assert(multiplier >= 1.); m_Multiplier = multiplier; }
inline double GetMultiplier() const { return m_Multiplier; }
protected:
static constexpr size_t TSize = sizeof(T);
static constexpr size_t MaxCapacity = (std::numeric_limits<size_t>::max)() / TSize;
T* InitAlloc(size_t initCap, unsigned char initVal)
{
// #topor: are there more page-fault-friendly ways to allocate: (1) 0-initialized memory; (2) non-initialized memory?
// For non-initialized memory, see: https://stackoverflow.com/questions/56411164/can-i-ask-the-kernel-to-populate-fault-in-a-range-of-anonymous-pages
// For 0-initialized initialized memory, first note that gcc now rewrites malloc & memset(0) into calloc.
// However, once this change in gcc has been made, there was a substantial deterioration in the performance of a random-access hash-table in FB (see https://github.com/facebook/folly/issues/1508)
// One way to block this optimization in gcc is as follows:
// auto p = malloc(n);
// asm volatile ("":"+r"(p));
// // another way to block GCC's optimization is: asm volatile ("":::"memory");
// memset(p, '\0', n);
// We're using calloc directly for now; may want to explore other options later
if (initCap == 0 || initCap > MaxCapacity)
{
return nullptr;
}
if (initVal == 0)
{
return (T*)calloc(initCap, TSize);
}
auto mallocRes = (T*)malloc(initCap * TSize);
if (unlikely(mallocRes == nullptr))
{
return nullptr;
}
std::memset((void*)mallocRes, initVal, initCap * TSize);
return mallocRes;
}
inline size_t GetNewCap(size_t cap) const
{
const double newCapDouble = cap * m_Multiplier + 2;
return newCapDouble > (double)(std::numeric_limits<size_t>::max)() ? (std::numeric_limits<size_t>::max)() : (size_t)newCapDouble;
}
inline void ClearB()
{
free(m_B);
m_B = nullptr;
m_Cap = 0;
}
size_t m_Cap;
double m_Multiplier = MultiplierDef;
T* m_B = nullptr;
};
};