Revision 7103559f49b46b3287973045f741c0679e3e9e44 authored by Sagar Vemuri on 21 June 2018, 18:02:49 UTC, committed by Facebook Github Bot on 21 June 2018, 18:13:08 UTC
Summary:
This PR extends the improvements in #3282 to also work when using Direct IO.
We see **4.5X performance improvement** in seekrandom benchmark doing long range scans, when using direct reads, on flash.

**Description:**
This change improves the performance of iterators doing long range scans (e.g. big/full index or table scans in MyRocks) by using readahead and prefetching additional data on each disk IO, and storing in a local buffer. This prefetching is automatically enabled on noticing more than 2 IOs for the same table file during iteration. The readahead size starts with 8KB and is exponentially increased on each additional sequential IO, up to a max of 256 KB. This helps in cutting down the number of IOs needed to complete the range scan.

**Implementation Details:**
- Used `FilePrefetchBuffer` as the underlying buffer to store the readahead data. `FilePrefetchBuffer` can now take file_reader, readahead_size and max_readahead_size as input to the constructor, and automatically do readahead.
- `FilePrefetchBuffer::TryReadFromCache` can now call `FilePrefetchBuffer::Prefetch` if readahead is enabled.
- `AlignedBuffer` (which is the underlying store for `FilePrefetchBuffer`) now takes a few additional args in `AlignedBuffer::AllocateNewBuffer` to allow copying data from the old buffer.
- Made sure not to re-read partial chunks of data that were already available in the buffer, from device again.
- Fixed a couple of cases where `AlignedBuffer::cursize_` was not being properly kept up-to-date.

**Constraints:**
- Similar to #3282, this gets currently enabled only when ReadOptions.readahead_size = 0 (which is the default value).
- Since the prefetched data is stored in a temporary buffer allocated on heap, this could increase the memory usage if you have many iterators doing long range scans simultaneously.
- Enabled only for user reads, and disabled for compactions. Compaction reads are controlled by the options `use_direct_io_for_flush_and_compaction` and `compaction_readahead_size`, and the current feature takes precautions not to mess with them.

**Benchmarks:**
I used the same benchmark as used in #3282.
Data fill:
```
TEST_TMPDIR=/data/users/$USER/benchmarks/iter ./db_bench -benchmarks=fillrandom -num=1000000000 -compression_type="none" -level_compaction_dynamic_level_bytes
```

Do a long range scan: Seekrandom with large number of nexts
```
TEST_TMPDIR=/data/users/$USER/benchmarks/iter ./db_bench -benchmarks=seekrandom -use_direct_reads -duration=60 -num=1000000000 -use_existing_db -seek_nexts=10000 -statistics -histogram
```

```
Before:
seekrandom   :   37939.906 micros/op 26 ops/sec;   29.2 MB/s (1636 of 1999 found)
With this change:
seekrandom   :   8527.720 micros/op 117 ops/sec;  129.7 MB/s (6530 of 7999 found)
```
~4.5X perf improvement. Taken on an average of 3 runs.
Closes https://github.com/facebook/rocksdb/pull/3884

Differential Revision: D8082143

Pulled By: sagar0

fbshipit-source-id: 4d7a8561cbac03478663713df4d31ad2620253bb
1 parent 524c6e6
Raw File
make_package.sh
# shellcheck disable=SC1113
#/usr/bin/env bash

set -e

function log() {
  echo "[+] $1"
}

function fatal() {
  echo "[!] $1"
  exit 1
}

function platform() {
  local  __resultvar=$1
  if [[ -f "/etc/yum.conf" ]]; then
    eval $__resultvar="centos"
  elif [[ -f "/etc/dpkg/dpkg.cfg" ]]; then
    eval $__resultvar="ubuntu"
  else
    fatal "Unknwon operating system"
  fi
}
platform OS

function package() {
  if [[ $OS = "ubuntu" ]]; then
    if dpkg --get-selections | grep --quiet $1; then
      log "$1 is already installed. skipping."
    else
      # shellcheck disable=SC2068
      apt-get install $@ -y
    fi
  elif [[ $OS = "centos" ]]; then
    if rpm -qa | grep --quiet $1; then
      log "$1 is already installed. skipping."
    else
      # shellcheck disable=SC2068
      yum install $@ -y
    fi
  fi
}

function detect_fpm_output() {
  if [[ $OS = "ubuntu" ]]; then
    export FPM_OUTPUT=deb
  elif [[ $OS = "centos" ]]; then
    export FPM_OUTPUT=rpm
  fi
}
detect_fpm_output

function gem_install() {
  if gem list | grep --quiet $1; then
    log "$1 is already installed. skipping."
  else
    # shellcheck disable=SC2068
    gem install $@
  fi
}

function main() {
  if [[ $# -ne 1 ]]; then
    fatal "Usage: $0 <rocksdb_version>"
  else
    log "using rocksdb version: $1"
  fi

  if [[ -d /vagrant ]]; then
    if [[ $OS = "ubuntu" ]]; then
      package g++-4.8
      export CXX=g++-4.8

      # the deb would depend on libgflags2, but the static lib is the only thing
      # installed by make install
      package libgflags-dev

      package ruby-all-dev
    elif [[ $OS = "centos" ]]; then
      pushd /etc/yum.repos.d
      if [[ ! -f /etc/yum.repos.d/devtools-1.1.repo ]]; then
        wget http://people.centos.org/tru/devtools-1.1/devtools-1.1.repo
      fi
      package devtoolset-1.1-gcc --enablerepo=testing-1.1-devtools-6
      package devtoolset-1.1-gcc-c++ --enablerepo=testing-1.1-devtools-6
      export CC=/opt/centos/devtoolset-1.1/root/usr/bin/gcc
      export CPP=/opt/centos/devtoolset-1.1/root/usr/bin/cpp
      export CXX=/opt/centos/devtoolset-1.1/root/usr/bin/c++
      export PATH=$PATH:/opt/centos/devtoolset-1.1/root/usr/bin
      popd
      if ! rpm -qa | grep --quiet gflags; then
        rpm -i https://github.com/schuhschuh/gflags/releases/download/v2.1.0/gflags-devel-2.1.0-1.amd64.rpm
      fi

      package ruby
      package ruby-devel
      package rubygems
      package rpm-build
    fi
  fi
  gem_install fpm

  make static_lib
  make install INSTALL_PATH=package

  cd package

  LIB_DIR=lib
  if [[ -z "$ARCH" ]]; then
      ARCH=$(getconf LONG_BIT)
  fi
  if [[ ("$FPM_OUTPUT" = "rpm") && ($ARCH -eq 64) ]]; then
      mv lib lib64
      LIB_DIR=lib64
  fi

  fpm \
    -s dir \
    -t $FPM_OUTPUT \
    -n rocksdb \
    -v $1 \
    --prefix /usr \
    --url http://rocksdb.org/ \
    -m rocksdb@fb.com \
    --license BSD \
    --vendor Facebook \
    --description "RocksDB is an embeddable persistent key-value store for fast storage." \
    include $LIB_DIR
}

# shellcheck disable=SC2068
main $@
back to top