https://github.com/facebook/rocksdb
Raw File
Tip revision: 814ba4bfc1bdc06c9ea843522e187846fa26d610 authored by Andrew Kryczka on 09 April 2024, 19:39:22 UTC
12404+12371 history entry
Tip revision: 814ba4b
trace_replay.h
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).

#pragma once

#include <atomic>
#include <memory>
#include <mutex>
#include <unordered_map>
#include <utility>

#include "rocksdb/options.h"
#include "rocksdb/rocksdb_namespace.h"
#include "rocksdb/status.h"
#include "rocksdb/trace_record.h"
#include "rocksdb/utilities/replayer.h"

namespace ROCKSDB_NAMESPACE {

// This file contains Tracer and Replayer classes that enable capturing and
// replaying RocksDB traces.

class ColumnFamilyHandle;
class ColumnFamilyData;
class DB;
class DBImpl;
class Env;
class Slice;
class SystemClock;
class TraceReader;
class TraceWriter;
class WriteBatch;

struct ReadOptions;
struct TraceOptions;
struct WriteOptions;

extern const std::string kTraceMagic;
const unsigned int kTraceTimestampSize = 8;
const unsigned int kTraceTypeSize = 1;
const unsigned int kTracePayloadLengthSize = 4;
const unsigned int kTraceMetadataSize =
    kTraceTimestampSize + kTraceTypeSize + kTracePayloadLengthSize;

static const int kTraceFileMajorVersion = 0;
static const int kTraceFileMinorVersion = 2;

// The data structure that defines a single trace.
struct Trace {
  uint64_t ts;  // timestamp
  TraceType type;
  // Each bit in payload_map stores which corresponding struct member added in
  // the payload. Each TraceType has its corresponding payload struct. For
  // example, if bit at position 0 is set in write payload, then the write batch
  // will be addedd.
  uint64_t payload_map = 0;
  // Each trace type has its own payload_struct, which will be serialized in the
  // payload.
  std::string payload;

  void reset() {
    ts = 0;
    type = kTraceMax;
    payload_map = 0;
    payload.clear();
  }
};

enum TracePayloadType : char {
  // Each member of all query payload structs should have a corresponding flag
  // here. Make sure to add them sequentially in the order of it is added.
  kEmptyPayload = 0,
  kWriteBatchData = 1,
  kGetCFID = 2,
  kGetKey = 3,
  kIterCFID = 4,
  kIterKey = 5,
  kIterLowerBound = 6,
  kIterUpperBound = 7,
  kMultiGetSize = 8,
  kMultiGetCFIDs = 9,
  kMultiGetKeys = 10,
};

class TracerHelper {
 public:
  // Parse the string with major and minor version only
  static Status ParseVersionStr(std::string& v_string, int* v_num);

  // Parse the trace file version and db version in trace header
  static Status ParseTraceHeader(const Trace& header, int* trace_version,
                                 int* db_version);

  // Encode a version 0.1 trace object into the given string.
  static void EncodeTrace(const Trace& trace, std::string* encoded_trace);

  // Decode a string into the given trace object.
  static Status DecodeTrace(const std::string& encoded_trace, Trace* trace);

  // Decode a string into the given trace header.
  static Status DecodeHeader(const std::string& encoded_trace, Trace* header);

  // Set the payload map based on the payload type
  static bool SetPayloadMap(uint64_t& payload_map,
                            const TracePayloadType payload_type);

  // Decode a Trace object into the corresponding TraceRecord.
  // Return Status::OK() if nothing is wrong, record will be set accordingly.
  // Return Status::NotSupported() if the trace type is not support, or the
  // corresponding error status, record will be set to nullptr.
  static Status DecodeTraceRecord(Trace* trace, int trace_file_version,
                                  std::unique_ptr<TraceRecord>* record);
};

// Tracer captures all RocksDB operations using a user-provided TraceWriter.
// Every RocksDB operation is written as a single trace. Each trace will have a
// timestamp and type, followed by the trace payload.
class Tracer {
 public:
  Tracer(SystemClock* clock, const TraceOptions& trace_options,
         std::unique_ptr<TraceWriter>&& trace_writer);
  ~Tracer();

  // Trace all write operations -- Put, Merge, Delete, SingleDelete, Write
  Status Write(WriteBatch* write_batch);

  // Trace Get operations.
  Status Get(ColumnFamilyHandle* cfname, const Slice& key);

  // Trace Iterators.
  Status IteratorSeek(const uint32_t& cf_id, const Slice& key,
                      const Slice& lower_bound, const Slice upper_bound);
  Status IteratorSeekForPrev(const uint32_t& cf_id, const Slice& key,
                             const Slice& lower_bound, const Slice upper_bound);

  // Trace MultiGet

  Status MultiGet(const size_t num_keys, ColumnFamilyHandle** column_families,
                  const Slice* keys);

  Status MultiGet(const size_t num_keys, ColumnFamilyHandle* column_family,
                  const Slice* keys);

  Status MultiGet(const std::vector<ColumnFamilyHandle*>& column_family,
                  const std::vector<Slice>& keys);

  // Returns true if the trace is over the configured max trace file limit.
  // False otherwise.
  bool IsTraceFileOverMax();

  // Returns true if the order of write trace records must match the order of
  // the corresponding records logged to WAL and applied to the DB.
  bool IsWriteOrderPreserved() { return trace_options_.preserve_write_order; }

  // Writes a trace footer at the end of the tracing
  Status Close();

 private:
  // Write a trace header at the beginning, typically on initiating a trace,
  // with some metadata like a magic number, trace version, RocksDB version, and
  // trace format.
  Status WriteHeader();

  // Write a trace footer, typically on ending a trace, with some metadata.
  Status WriteFooter();

  // Write a single trace using the provided TraceWriter to the underlying
  // system, say, a filesystem or a streaming service.
  Status WriteTrace(const Trace& trace);

  // Helps in filtering and sampling of traces.
  // Returns true if a trace should be skipped, false otherwise.
  bool ShouldSkipTrace(const TraceType& type);

  SystemClock* clock_;
  TraceOptions trace_options_;
  std::unique_ptr<TraceWriter> trace_writer_;
  uint64_t trace_request_count_;
  Status trace_write_status_;
};

}  // namespace ROCKSDB_NAMESPACE
back to top