Revision faba3fb53defe4fa32f32202d459726ad49f4033 authored by Andrew Kryczka on 06 April 2018, 03:58:35 UTC, committed by Facebook Github Bot on 06 April 2018, 04:13:21 UTC
Summary:
When `max_valid_backups_to_open` is set, the `BackupEngine` doesn't know about the files referenced by existing backups. This PR prevents us from deleting valid files when that option is set, in cases where we are unable to accurately determine refcount. There are warnings logged when we may miss deleting unreferenced files, and a recommendation in the header for users to periodically unset this option and run a full `GarbageCollect`.
Closes https://github.com/facebook/rocksdb/pull/3518

Differential Revision: D7008331

Pulled By: ajkr

fbshipit-source-id: 87907f964dc9716e229d08636a895d2fc7b72305
1 parent 6571770
Raw File
column_aware_encoding_test.cc
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).
//
#ifndef ROCKSDB_LITE

#include <vector>
#include "util/testharness.h"
#include "util/testutil.h"
#include "utilities/col_buf_decoder.h"
#include "utilities/col_buf_encoder.h"

namespace rocksdb {

class ColumnAwareEncodingTest : public testing::Test {
 public:
  ColumnAwareEncodingTest() {}

  ~ColumnAwareEncodingTest() {}
};

class ColumnAwareEncodingTestWithSize
    : public ColumnAwareEncodingTest,
      public testing::WithParamInterface<size_t> {
 public:
  ColumnAwareEncodingTestWithSize() {}

  ~ColumnAwareEncodingTestWithSize() {}

  static std::vector<size_t> GetValues() { return {4, 8}; }
};

INSTANTIATE_TEST_CASE_P(
    ColumnAwareEncodingTestWithSize, ColumnAwareEncodingTestWithSize,
    ::testing::ValuesIn(ColumnAwareEncodingTestWithSize::GetValues()));

TEST_P(ColumnAwareEncodingTestWithSize, NoCompressionEncodeDecode) {
  size_t col_size = GetParam();
  std::unique_ptr<ColBufEncoder> col_buf_encoder(
      new FixedLengthColBufEncoder(col_size, kColNoCompression, false, true));
  std::string str_buf;
  uint64_t base_val = 0x0102030405060708;
  uint64_t val = 0;
  memcpy(&val, &base_val, col_size);
  const int row_count = 4;
  for (int i = 0; i < row_count; ++i) {
    str_buf.append(reinterpret_cast<char*>(&val), col_size);
  }
  const char* str_buf_ptr = str_buf.c_str();
  for (int i = 0; i < row_count; ++i) {
    col_buf_encoder->Append(str_buf_ptr);
  }
  col_buf_encoder->Finish();
  const std::string& encoded_data = col_buf_encoder->GetData();
  // Check correctness of encoded string length
  ASSERT_EQ(row_count * col_size, encoded_data.size());

  const char* encoded_data_ptr = encoded_data.c_str();
  uint64_t expected_encoded_val;
  if (col_size == 8) {
    expected_encoded_val = port::kLittleEndian ? 0x0807060504030201 : 0x0102030405060708;
  } else if (col_size == 4) {
    expected_encoded_val = port::kLittleEndian ? 0x08070605 : 0x0102030400000000;
  }
  uint64_t encoded_val = 0;
  for (int i = 0; i < row_count; ++i) {
    memcpy(&encoded_val, encoded_data_ptr, col_size);
    // Check correctness of encoded value
    ASSERT_EQ(expected_encoded_val, encoded_val);
    encoded_data_ptr += col_size;
  }

  std::unique_ptr<ColBufDecoder> col_buf_decoder(
      new FixedLengthColBufDecoder(col_size, kColNoCompression, false, true));
  encoded_data_ptr = encoded_data.c_str();
  encoded_data_ptr += col_buf_decoder->Init(encoded_data_ptr);
  char* decoded_data = new char[100];
  char* decoded_data_base = decoded_data;
  for (int i = 0; i < row_count; ++i) {
    encoded_data_ptr +=
        col_buf_decoder->Decode(encoded_data_ptr, &decoded_data);
  }

  // Check correctness of decoded string length
  ASSERT_EQ(row_count * col_size, decoded_data - decoded_data_base);
  decoded_data = decoded_data_base;
  for (int i = 0; i < row_count; ++i) {
    uint64_t decoded_val;
    decoded_val = 0;
    memcpy(&decoded_val, decoded_data, col_size);
    // Check correctness of decoded value
    ASSERT_EQ(val, decoded_val);
    decoded_data += col_size;
  }
  delete[] decoded_data_base;
}

TEST_P(ColumnAwareEncodingTestWithSize, RleEncodeDecode) {
  size_t col_size = GetParam();
  std::unique_ptr<ColBufEncoder> col_buf_encoder(
      new FixedLengthColBufEncoder(col_size, kColRle, false, true));
  std::string str_buf;
  uint64_t base_val = 0x0102030405060708;
  uint64_t val = 0;
  memcpy(&val, &base_val, col_size);
  const int row_count = 4;
  for (int i = 0; i < row_count; ++i) {
    str_buf.append(reinterpret_cast<char*>(&val), col_size);
  }
  const char* str_buf_ptr = str_buf.c_str();
  for (int i = 0; i < row_count; ++i) {
    str_buf_ptr += col_buf_encoder->Append(str_buf_ptr);
  }
  col_buf_encoder->Finish();
  const std::string& encoded_data = col_buf_encoder->GetData();
  // Check correctness of encoded string length
  ASSERT_EQ(col_size + 1, encoded_data.size());

  const char* encoded_data_ptr = encoded_data.c_str();
  uint64_t encoded_val = 0;
  memcpy(&encoded_val, encoded_data_ptr, col_size);
  uint64_t expected_encoded_val;
  if (col_size == 8) {
    expected_encoded_val = port::kLittleEndian ? 0x0807060504030201 : 0x0102030405060708;
  } else if (col_size == 4) {
    expected_encoded_val = port::kLittleEndian ? 0x08070605 : 0x0102030400000000;
  }
  // Check correctness of encoded value
  ASSERT_EQ(expected_encoded_val, encoded_val);

  std::unique_ptr<ColBufDecoder> col_buf_decoder(
      new FixedLengthColBufDecoder(col_size, kColRle, false, true));
  char* decoded_data = new char[100];
  char* decoded_data_base = decoded_data;
  encoded_data_ptr += col_buf_decoder->Init(encoded_data_ptr);
  for (int i = 0; i < row_count; ++i) {
    encoded_data_ptr +=
        col_buf_decoder->Decode(encoded_data_ptr, &decoded_data);
  }
  // Check correctness of decoded string length
  ASSERT_EQ(decoded_data - decoded_data_base, row_count * col_size);
  decoded_data = decoded_data_base;
  for (int i = 0; i < row_count; ++i) {
    uint64_t decoded_val;
    decoded_val = 0;
    memcpy(&decoded_val, decoded_data, col_size);
    // Check correctness of decoded value
    ASSERT_EQ(val, decoded_val);
    decoded_data += col_size;
  }
  delete[] decoded_data_base;
}

TEST_P(ColumnAwareEncodingTestWithSize, DeltaEncodeDecode) {
  size_t col_size = GetParam();
  int row_count = 4;
  std::unique_ptr<ColBufEncoder> col_buf_encoder(
      new FixedLengthColBufEncoder(col_size, kColDeltaVarint, false, true));
  std::string str_buf;
  uint64_t base_val1 = port::kLittleEndian ? 0x0102030405060708 : 0x0807060504030201;
  uint64_t base_val2 = port::kLittleEndian ? 0x0202030405060708 : 0x0807060504030202;
  uint64_t val1 = 0, val2 = 0;
  memcpy(&val1, &base_val1, col_size);
  memcpy(&val2, &base_val2, col_size);
  const char* str_buf_ptr;
  for (int i = 0; i < row_count / 2; ++i) {
    str_buf = std::string(reinterpret_cast<char*>(&val1), col_size);
    str_buf_ptr = str_buf.c_str();
    col_buf_encoder->Append(str_buf_ptr);

    str_buf = std::string(reinterpret_cast<char*>(&val2), col_size);
    str_buf_ptr = str_buf.c_str();
    col_buf_encoder->Append(str_buf_ptr);
  }
  col_buf_encoder->Finish();
  const std::string& encoded_data = col_buf_encoder->GetData();
  // Check encoded string length
  int varint_len = 0;
  if (col_size == 8) {
    varint_len = 9;
  } else if (col_size == 4) {
    varint_len = port::kLittleEndian ? 5 : 9;
  }
  // Check encoded string length: first value is original one (val - 0), the
  // coming three are encoded as 1, -1, 1, so they should take 1 byte in varint.
  ASSERT_EQ(varint_len + 3 * 1, encoded_data.size());

  std::unique_ptr<ColBufDecoder> col_buf_decoder(
      new FixedLengthColBufDecoder(col_size, kColDeltaVarint, false, true));
  char* decoded_data = new char[100];
  char* decoded_data_base = decoded_data;
  const char* encoded_data_ptr = encoded_data.c_str();
  encoded_data_ptr += col_buf_decoder->Init(encoded_data_ptr);
  for (int i = 0; i < row_count; ++i) {
    encoded_data_ptr +=
        col_buf_decoder->Decode(encoded_data_ptr, &decoded_data);
  }

  // Check correctness of decoded string length
  ASSERT_EQ(row_count * col_size, decoded_data - decoded_data_base);
  decoded_data = decoded_data_base;

  // Check correctness of decoded data
  for (int i = 0; i < row_count / 2; ++i) {
    uint64_t decoded_val = 0;
    memcpy(&decoded_val, decoded_data, col_size);
    ASSERT_EQ(val1, decoded_val);
    decoded_data += col_size;
    memcpy(&decoded_val, decoded_data, col_size);
    ASSERT_EQ(val2, decoded_val);
    decoded_data += col_size;
  }
  delete[] decoded_data_base;
}

TEST_F(ColumnAwareEncodingTest, ChunkBufEncodeDecode) {
  std::unique_ptr<ColBufEncoder> col_buf_encoder(
      new VariableChunkColBufEncoder(kColDict));
  std::string buf("12345678\377\1\0\0\0\0\0\0\0\376", 18);
  col_buf_encoder->Append(buf.c_str());
  col_buf_encoder->Finish();
  const std::string& encoded_data = col_buf_encoder->GetData();
  const char* str_ptr = encoded_data.c_str();

  std::unique_ptr<ColBufDecoder> col_buf_decoder(
      new VariableChunkColBufDecoder(kColDict));
  str_ptr += col_buf_decoder->Init(str_ptr);
  char* decoded_data = new char[100];
  char* decoded_data_base = decoded_data;
  col_buf_decoder->Decode(str_ptr, &decoded_data);
  for (size_t i = 0; i < buf.size(); ++i) {
    ASSERT_EQ(buf[i], decoded_data_base[i]);
  }
  delete[] decoded_data_base;
}

}  // namespace rocksdb

int main(int argc, char** argv) {
  ::testing::InitGoogleTest(&argc, argv);
  return RUN_ALL_TESTS();
}

#else

#include <cstdio>

int main() {
  fprintf(stderr,
          "SKIPPED as column aware encoding experiment is not enabled in "
          "ROCKSDB_LITE\n");
}
#endif  // ROCKSDB_LITE
back to top