https://github.com/lorenzhs/BuRR
Raw File
Tip revision: 1c62832ad7d6eab5b337f386955868c3ce9a54ea authored by Lorenz Hübschle-Schneider on 11 September 2021, 12:56:53 UTC
README: paper link, bibtex
Tip revision: 1c62832
test_helpers.hpp
//  Copyright (c) Lorenz Hübschle-Schneider
//  Copyright (c) Facebook, Inc. and its affiliates.
//  All Rights Reserved.  This source code is licensed under the Apache 2.0
//  License (found in the LICENSE file in the root directory).

#pragma once

#include "config.hpp"
#include "rocksdb/coding.h"
#include "rocksdb/slice.h"

#include <xxhash.h>

#include <string>

namespace ribbon::test {

struct BasicConfig : ribbon::DefaultConfig<uint32_t, uint8_t, rocksdb::Slice> {
    static constexpr ThreshMode kThreshMode = ThreshMode::normal;
    static constexpr bool log = false;
};

struct RetrievalConfig : public BasicConfig {
    static constexpr bool kIsFilter = false;
};

template <typename CoeffRow, typename ResultRow, typename Key>
struct DefaultRetrievalConfig : public DefaultConfig<CoeffRow, ResultRow, Key> {
    static constexpr bool kIsFilter = false;
    static constexpr ThreshMode kThreshMode = ThreshMode::normal;
    static constexpr bool log = false;
};


// Default config, but specify sizes in bits not types
template <size_t coeff_bits, size_t result_bits>
struct QuietRConfig : public RConfig<coeff_bits, result_bits> {
    static constexpr bool log = false;
};


// Generate semi-sequential keys
struct StandardKeyGen {
    StandardKeyGen(const std::string& prefix, uint64_t id)
        : id_(id), str_(prefix) {
        rocksdb::PutFixed64(&str_, /*placeholder*/ 0);
    }

    // Prefix (only one required)
    StandardKeyGen& operator++() {
        ++id_;
        return *this;
    }

    // Prefix (only one required)
    StandardKeyGen operator+(uint64_t i) {
        StandardKeyGen copy = *this;
        copy += i;
        return copy;
    }

    StandardKeyGen& operator+=(uint64_t i) {
        id_ += i;
        return *this;
    }

    const std::string& operator*() {
        // Use multiplication to mix things up a little in the key
        rocksdb::EncodeFixed64(&str_[str_.size() - 8],
                               id_ * uint64_t{0x1500000001});
        return str_;
    }

    bool operator==(const StandardKeyGen& other) const {
        // Same prefix is assumed
        return id_ == other.id_;
    }
    bool operator!=(const StandardKeyGen& other) const {
        // Same prefix is assumed
        return id_ != other.id_;
    }
    ssize_t operator-(const StandardKeyGen& other) const {
        return id_ - other.id_;
    }

    uint64_t id_;
    std::string str_;
};

// Generate small sequential keys, that can misbehave with sequential seeds
// as in https://github.com/Cyan4973/xxHash/issues/469.
// These keys are only heuristically unique, but that's OK with 64 bits,
// for testing purposes.
struct SmallKeyGen {
    SmallKeyGen(const std::string& prefix, uint64_t id) : id_(id) {
        // Hash the prefix for a heuristically unique offset
        id_ += XXH3_64bits(prefix.c_str(), prefix.size());
        rocksdb::PutFixed64(&str_, id_);
    }

    // Prefix (only one required)
    SmallKeyGen& operator++() {
        ++id_;
        return *this;
    }

    SmallKeyGen operator+(uint64_t i) {
        SmallKeyGen copy = *this;
        copy += i;
        return copy;
    }

    SmallKeyGen& operator+=(uint64_t i) {
        id_ += i;
        return *this;
    }

    const std::string& operator*() {
        rocksdb::EncodeFixed64(&str_[str_.size() - 8], id_);
        return str_;
    }

    bool operator==(const SmallKeyGen& other) const {
        return id_ == other.id_;
    }
    bool operator!=(const SmallKeyGen& other) const {
        return id_ != other.id_;
    }

    uint64_t id_;
    std::string str_;
};


struct RetrievalInputGen {
    RetrievalInputGen(const std::string& prefix, uint64_t id) : id_(id) {
        val_.first = prefix;
        rocksdb::PutFixed64(&val_.first, /*placeholder*/ 0);
    }

    // Prefix (only one required)
    RetrievalInputGen& operator++() {
        ++id_;
        return *this;
    }

    // Prefix (only one required)
    RetrievalInputGen operator+(uint64_t i) {
        RetrievalInputGen copy = *this;
        copy += i;
        return copy;
    }

    RetrievalInputGen& operator+=(uint64_t i) {
        id_ += i;
        return *this;
    }

    const std::pair<std::string, uint8_t>& operator*() {
        // Use multiplication to mix things up a little in the key
        rocksdb::EncodeFixed64(&val_.first[val_.first.size() - 8],
                               id_ * uint64_t{0x1500000001});
        // Occasionally repeat values etc.
        val_.second = static_cast<uint8_t>(id_ * 7 / 8);
        return val_;
    }

    const std::pair<std::string, uint8_t>* operator->() {
        return &**this;
    }

    ssize_t operator-(const RetrievalInputGen& other) const {
        return id_ - other.id_;
    }

    bool operator==(const RetrievalInputGen& other) const {
        // Same prefix is assumed
        return id_ == other.id_;
    }
    bool operator!=(const RetrievalInputGen& other) const {
        // Same prefix is assumed
        return id_ != other.id_;
    }

    uint64_t id_;
    std::pair<std::string, uint8_t> val_;
};

// Copied from rocksdb util/ribbon_test.cpp:
// For testing Poisson-distributed (or similar) statistics, get value for
// `stddevs_allowed` standard deviations above expected mean
// `expected_count`.
// (Poisson approximates Binomial only if probability of a trial being
// in the count is low.)
uint64_t PoissonUpperBound(double expected_count, double stddevs_allowed) {
    return static_cast<uint64_t>(
        expected_count + stddevs_allowed * std::sqrt(expected_count) + 1.0);
}

} // namespace ribbon::test
back to top