Revision aed7abbcca7574ed438c3b22e27db9bf249e8de3 authored by Xin Tong on 18 May 2018, 00:49:06 UTC, committed by Facebook Github Bot on 18 May 2018, 00:57:48 UTC
Summary:
Running. TEST_TMPDIR=/dev/shm ./buck-out/gen/rocks/tools/rocks_db_bench --benchmarks=readwhilewriting --num=5000000 -benchmark_write_rate_limit=2000000 --threads=32

Collected esan data and reorder field. Accesses to 4th and 6th fields take majority of the access.  Group them. Overall, this struct takes 10%+ of the total accesses in the program. (637773011/6107964986)

==2433831==  class rocksdb::InlineSkipList
==2433831==   size = 48, count = 637773011, ratio = 112412, array access = 0
==2433831==   # 0: offset = 0,   size = 2,       count = 455137, type = i16
==2433831==   # 1: offset = 2,   size = 2,       count = 6,      type = i16
==2433831==   # 2: offset = 4,   size = 4,       count = 182303, type = i32
==2433831==   # 3: offset = 8,   size = 8,       count = 263953900, type = %"class.rocksdb::MemTableRep::KeyComparator"*
==2433831==   # 4: offset = 16,  size = 8,       count = 136409, type = %"class.rocksdb::Allocator"*
==2433831==   # 5: offset = 24,  size = 8,       count = 366628820, type = %"struct.rocksdb::InlineSkipList<const rocksdb::MemTableRep::KeyComparator &>::Node"*
==2433831==   # 6: offset = 32,  size = 4,       count = 6280031, type = %"struct.std::atomic" = type { %"struct.std::__atomic_base" }
==2433831==   # 7: offset = 40,  size = 8,       count = 136405, type = %"struct.rocksdb::InlineSkipList<const rocksdb::MemTableRep::KeyComparator &>::Splice"*
==2433831==EfficiencySanitizer: total struct field access count = 6107964986

Before re-ordering
[trentxintong@devbig460.frc2 ~/fbsource/fbcode]$ fgrep readwhilewriting
without-ro.log
readwhilewriting :       0.036 micros/op 27545605 ops/sec;   26.8 MB/s
(45954 of 5000000 found)
readwhilewriting :       0.036 micros/op 28024240 ops/sec;   27.2 MB/s
(43158 of 5000000 found)
readwhilewriting :       0.037 micros/op 27345145 ops/sec;   27.1 MB/s
(46725 of 5000000 found)
readwhilewriting :       0.037 micros/op 27072588 ops/sec;   27.3 MB/s
(42605 of 5000000 found)
readwhilewriting :       0.034 micros/op 29578781 ops/sec;   28.3 MB/s
(44294 of 5000000 found)
readwhilewriting :       0.035 micros/op 28528304 ops/sec;   27.7 MB/s
(44176 of 5000000 found)
readwhilewriting :       0.037 micros/op 27075497 ops/sec;   26.5 MB/s
(43763 of 5000000 found)
readwhilewriting :       0.036 micros/op 28024117 ops/sec;   27.1 MB/s
(40622 of 5000000 found)
readwhilewriting :       0.037 micros/op 27078709 ops/sec;   27.6 MB/s
(47774 of 5000000 found)
readwhilewriting :       0.034 micros/op 29020689 ops/sec;   28.1 MB/s
(45066 of 5000000 found)
AVERAGE()=27.37 MB/s

After re-ordering
[trentxintong@devbig460.frc2 ~/fbsource/fbcode]$ fgrep readwhilewriting
ro.log
readwhilewriting :       0.036 micros/op 27542409 ops/sec;   27.7 MB/s
(46163 of 5000000 found)
readwhilewriting :       0.036 micros/op 28021148 ops/sec;   28.2 MB/s
(46155 of 5000000 found)
readwhilewriting :       0.036 micros/op 28021035 ops/sec;   27.3 MB/s
(44039 of 5000000 found)
readwhilewriting :       0.036 micros/op 27538659 ops/sec;   27.5 MB/s
(46781 of 5000000 found)
readwhilewriting :       0.036 micros/op 28028604 ops/sec;   27.6 MB/s
(44689 of 5000000 found)
readwhilewriting :       0.036 micros/op 27541452 ops/sec;   27.3 MB/s
(43156 of 5000000 found)
readwhilewriting :       0.034 micros/op 29041338 ops/sec;   28.8 MB/s
(44895 of 5000000 found)
readwhilewriting :       0.036 micros/op 27784974 ops/sec;   26.3 MB/s
(39963 of 5000000 found)
readwhilewriting :       0.036 micros/op 27538892 ops/sec;   28.1 MB/s
(46570 of 5000000 found)
readwhilewriting :       0.038 micros/op 26622473 ops/sec;   27.0 MB/s
(43236 of 5000000 found)
AVERAGE()=27.58 MB/s
Closes https://github.com/facebook/rocksdb/pull/3855

Reviewed By: siying

Differential Revision: D8048781

Pulled By: trentxintong

fbshipit-source-id: bc9807a9845e2a92cb171ce1ecb5a2c8a51f1481
1 parent fa43948
Raw File
optimistic_transaction_example.cc
// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).

#ifndef ROCKSDB_LITE

#include "rocksdb/db.h"
#include "rocksdb/options.h"
#include "rocksdb/slice.h"
#include "rocksdb/utilities/transaction.h"
#include "rocksdb/utilities/optimistic_transaction_db.h"

using namespace rocksdb;

std::string kDBPath = "/tmp/rocksdb_transaction_example";

int main() {
  // open DB
  Options options;
  options.create_if_missing = true;
  DB* db;
  OptimisticTransactionDB* txn_db;

  Status s = OptimisticTransactionDB::Open(options, kDBPath, &txn_db);
  assert(s.ok());
  db = txn_db->GetBaseDB();

  WriteOptions write_options;
  ReadOptions read_options;
  OptimisticTransactionOptions txn_options;
  std::string value;

  ////////////////////////////////////////////////////////
  //
  // Simple OptimisticTransaction Example ("Read Committed")
  //
  ////////////////////////////////////////////////////////

  // Start a transaction
  Transaction* txn = txn_db->BeginTransaction(write_options);
  assert(txn);

  // Read a key in this transaction
  s = txn->Get(read_options, "abc", &value);
  assert(s.IsNotFound());

  // Write a key in this transaction
  txn->Put("abc", "def");

  // Read a key OUTSIDE this transaction. Does not affect txn.
  s = db->Get(read_options, "abc", &value);

  // Write a key OUTSIDE of this transaction.
  // Does not affect txn since this is an unrelated key.  If we wrote key 'abc'
  // here, the transaction would fail to commit.
  s = db->Put(write_options, "xyz", "zzz");

  // Commit transaction
  s = txn->Commit();
  assert(s.ok());
  delete txn;

  ////////////////////////////////////////////////////////
  //
  // "Repeatable Read" (Snapshot Isolation) Example
  //   -- Using a single Snapshot
  //
  ////////////////////////////////////////////////////////

  // Set a snapshot at start of transaction by setting set_snapshot=true
  txn_options.set_snapshot = true;
  txn = txn_db->BeginTransaction(write_options, txn_options);

  const Snapshot* snapshot = txn->GetSnapshot();

  // Write a key OUTSIDE of transaction
  db->Put(write_options, "abc", "xyz");

  // Read a key using the snapshot
  read_options.snapshot = snapshot;
  s = txn->GetForUpdate(read_options, "abc", &value);
  assert(value == "def");

  // Attempt to commit transaction
  s = txn->Commit();

  // Transaction could not commit since the write outside of the txn conflicted
  // with the read!
  assert(s.IsBusy());

  delete txn;
  // Clear snapshot from read options since it is no longer valid
  read_options.snapshot = nullptr;
  snapshot = nullptr;

  ////////////////////////////////////////////////////////
  //
  // "Read Committed" (Monotonic Atomic Views) Example
  //   --Using multiple Snapshots
  //
  ////////////////////////////////////////////////////////

  // In this example, we set the snapshot multiple times.  This is probably
  // only necessary if you have very strict isolation requirements to
  // implement.

  // Set a snapshot at start of transaction
  txn_options.set_snapshot = true;
  txn = txn_db->BeginTransaction(write_options, txn_options);

  // Do some reads and writes to key "x"
  read_options.snapshot = db->GetSnapshot();
  s = txn->Get(read_options, "x", &value);
  txn->Put("x", "x");

  // Do a write outside of the transaction to key "y"
  s = db->Put(write_options, "y", "y");

  // Set a new snapshot in the transaction
  txn->SetSnapshot();
  read_options.snapshot = db->GetSnapshot();

  // Do some reads and writes to key "y"
  s = txn->GetForUpdate(read_options, "y", &value);
  txn->Put("y", "y");

  // Commit.  Since the snapshot was advanced, the write done outside of the
  // transaction does not prevent this transaction from Committing.
  s = txn->Commit();
  assert(s.ok());
  delete txn;
  // Clear snapshot from read options since it is no longer valid
  read_options.snapshot = nullptr;

  // Cleanup
  delete txn_db;
  DestroyDB(kDBPath, options);
  return 0;
}

#endif  // ROCKSDB_LITE
back to top