Revision aed7abbcca7574ed438c3b22e27db9bf249e8de3 authored by Xin Tong on 18 May 2018, 00:49:06 UTC, committed by Facebook Github Bot on 18 May 2018, 00:57:48 UTC
Summary:
Running. TEST_TMPDIR=/dev/shm ./buck-out/gen/rocks/tools/rocks_db_bench --benchmarks=readwhilewriting --num=5000000 -benchmark_write_rate_limit=2000000 --threads=32

Collected esan data and reorder field. Accesses to 4th and 6th fields take majority of the access.  Group them. Overall, this struct takes 10%+ of the total accesses in the program. (637773011/6107964986)

==2433831==  class rocksdb::InlineSkipList
==2433831==   size = 48, count = 637773011, ratio = 112412, array access = 0
==2433831==   # 0: offset = 0,   size = 2,       count = 455137, type = i16
==2433831==   # 1: offset = 2,   size = 2,       count = 6,      type = i16
==2433831==   # 2: offset = 4,   size = 4,       count = 182303, type = i32
==2433831==   # 3: offset = 8,   size = 8,       count = 263953900, type = %"class.rocksdb::MemTableRep::KeyComparator"*
==2433831==   # 4: offset = 16,  size = 8,       count = 136409, type = %"class.rocksdb::Allocator"*
==2433831==   # 5: offset = 24,  size = 8,       count = 366628820, type = %"struct.rocksdb::InlineSkipList<const rocksdb::MemTableRep::KeyComparator &>::Node"*
==2433831==   # 6: offset = 32,  size = 4,       count = 6280031, type = %"struct.std::atomic" = type { %"struct.std::__atomic_base" }
==2433831==   # 7: offset = 40,  size = 8,       count = 136405, type = %"struct.rocksdb::InlineSkipList<const rocksdb::MemTableRep::KeyComparator &>::Splice"*
==2433831==EfficiencySanitizer: total struct field access count = 6107964986

Before re-ordering
[trentxintong@devbig460.frc2 ~/fbsource/fbcode]$ fgrep readwhilewriting
without-ro.log
readwhilewriting :       0.036 micros/op 27545605 ops/sec;   26.8 MB/s
(45954 of 5000000 found)
readwhilewriting :       0.036 micros/op 28024240 ops/sec;   27.2 MB/s
(43158 of 5000000 found)
readwhilewriting :       0.037 micros/op 27345145 ops/sec;   27.1 MB/s
(46725 of 5000000 found)
readwhilewriting :       0.037 micros/op 27072588 ops/sec;   27.3 MB/s
(42605 of 5000000 found)
readwhilewriting :       0.034 micros/op 29578781 ops/sec;   28.3 MB/s
(44294 of 5000000 found)
readwhilewriting :       0.035 micros/op 28528304 ops/sec;   27.7 MB/s
(44176 of 5000000 found)
readwhilewriting :       0.037 micros/op 27075497 ops/sec;   26.5 MB/s
(43763 of 5000000 found)
readwhilewriting :       0.036 micros/op 28024117 ops/sec;   27.1 MB/s
(40622 of 5000000 found)
readwhilewriting :       0.037 micros/op 27078709 ops/sec;   27.6 MB/s
(47774 of 5000000 found)
readwhilewriting :       0.034 micros/op 29020689 ops/sec;   28.1 MB/s
(45066 of 5000000 found)
AVERAGE()=27.37 MB/s

After re-ordering
[trentxintong@devbig460.frc2 ~/fbsource/fbcode]$ fgrep readwhilewriting
ro.log
readwhilewriting :       0.036 micros/op 27542409 ops/sec;   27.7 MB/s
(46163 of 5000000 found)
readwhilewriting :       0.036 micros/op 28021148 ops/sec;   28.2 MB/s
(46155 of 5000000 found)
readwhilewriting :       0.036 micros/op 28021035 ops/sec;   27.3 MB/s
(44039 of 5000000 found)
readwhilewriting :       0.036 micros/op 27538659 ops/sec;   27.5 MB/s
(46781 of 5000000 found)
readwhilewriting :       0.036 micros/op 28028604 ops/sec;   27.6 MB/s
(44689 of 5000000 found)
readwhilewriting :       0.036 micros/op 27541452 ops/sec;   27.3 MB/s
(43156 of 5000000 found)
readwhilewriting :       0.034 micros/op 29041338 ops/sec;   28.8 MB/s
(44895 of 5000000 found)
readwhilewriting :       0.036 micros/op 27784974 ops/sec;   26.3 MB/s
(39963 of 5000000 found)
readwhilewriting :       0.036 micros/op 27538892 ops/sec;   28.1 MB/s
(46570 of 5000000 found)
readwhilewriting :       0.038 micros/op 26622473 ops/sec;   27.0 MB/s
(43236 of 5000000 found)
AVERAGE()=27.58 MB/s
Closes https://github.com/facebook/rocksdb/pull/3855

Reviewed By: siying

Differential Revision: D8048781

Pulled By: trentxintong

fbshipit-source-id: bc9807a9845e2a92cb171ce1ecb5a2c8a51f1481
1 parent fa43948
Raw File
internal_iterator.h
// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).
//

#pragma once

#include <string>
#include "rocksdb/comparator.h"
#include "rocksdb/iterator.h"
#include "rocksdb/status.h"

namespace rocksdb {

class PinnedIteratorsManager;

class InternalIterator : public Cleanable {
 public:
  InternalIterator() {}
  virtual ~InternalIterator() {}

  // An iterator is either positioned at a key/value pair, or
  // not valid.  This method returns true iff the iterator is valid.
  // Always returns false if !status().ok().
  virtual bool Valid() const = 0;

  // Position at the first key in the source.  The iterator is Valid()
  // after this call iff the source is not empty.
  virtual void SeekToFirst() = 0;

  // Position at the last key in the source.  The iterator is
  // Valid() after this call iff the source is not empty.
  virtual void SeekToLast() = 0;

  // Position at the first key in the source that at or past target
  // The iterator is Valid() after this call iff the source contains
  // an entry that comes at or past target.
  // All Seek*() methods clear any error status() that the iterator had prior to
  // the call; after the seek, status() indicates only the error (if any) that
  // happened during the seek, not any past errors.
  virtual void Seek(const Slice& target) = 0;

  // Position at the first key in the source that at or before target
  // The iterator is Valid() after this call iff the source contains
  // an entry that comes at or before target.
  virtual void SeekForPrev(const Slice& target) = 0;

  // Moves to the next entry in the source.  After this call, Valid() is
  // true iff the iterator was not positioned at the last entry in the source.
  // REQUIRES: Valid()
  virtual void Next() = 0;

  // Moves to the previous entry in the source.  After this call, Valid() is
  // true iff the iterator was not positioned at the first entry in source.
  // REQUIRES: Valid()
  virtual void Prev() = 0;

  // Return the key for the current entry.  The underlying storage for
  // the returned slice is valid only until the next modification of
  // the iterator.
  // REQUIRES: Valid()
  virtual Slice key() const = 0;

  // Return the value for the current entry.  The underlying storage for
  // the returned slice is valid only until the next modification of
  // the iterator.
  // REQUIRES: Valid()
  virtual Slice value() const = 0;

  // If an error has occurred, return it.  Else return an ok status.
  // If non-blocking IO is requested and this operation cannot be
  // satisfied without doing some IO, then this returns Status::Incomplete().
  virtual Status status() const = 0;

  // True if the iterator is invalidated because it is out of the iterator
  // upper bound
  virtual bool IsOutOfBound() { return false; }

  // Pass the PinnedIteratorsManager to the Iterator, most Iterators dont
  // communicate with PinnedIteratorsManager so default implementation is no-op
  // but for Iterators that need to communicate with PinnedIteratorsManager
  // they will implement this function and use the passed pointer to communicate
  // with PinnedIteratorsManager.
  virtual void SetPinnedItersMgr(PinnedIteratorsManager* /*pinned_iters_mgr*/) {
  }

  // If true, this means that the Slice returned by key() is valid as long as
  // PinnedIteratorsManager::ReleasePinnedData is not called and the
  // Iterator is not deleted.
  //
  // IsKeyPinned() is guaranteed to always return true if
  //  - Iterator is created with ReadOptions::pin_data = true
  //  - DB tables were created with BlockBasedTableOptions::use_delta_encoding
  //    set to false.
  virtual bool IsKeyPinned() const { return false; }

  // If true, this means that the Slice returned by value() is valid as long as
  // PinnedIteratorsManager::ReleasePinnedData is not called and the
  // Iterator is not deleted.
  virtual bool IsValuePinned() const { return false; }

  virtual Status GetProperty(std::string /*prop_name*/, std::string* /*prop*/) {
    return Status::NotSupported("");
  }

 protected:
  void SeekForPrevImpl(const Slice& target, const Comparator* cmp) {
    Seek(target);
    if (!Valid()) {
      SeekToLast();
    }
    while (Valid() && cmp->Compare(target, key()) < 0) {
      Prev();
    }
  }

 private:
  // No copying allowed
  InternalIterator(const InternalIterator&) = delete;
  InternalIterator& operator=(const InternalIterator&) = delete;
};

// Return an empty iterator (yields nothing).
extern InternalIterator* NewEmptyInternalIterator();

// Return an empty iterator with the specified status.
extern InternalIterator* NewErrorInternalIterator(const Status& status);

}  // namespace rocksdb
back to top