Revision cd7c4143d795ab9a53e6eaeb4ee572b4e258313b authored by Siying Dong on 23 November 2016, 17:19:11 UTC, committed by Facebook Github Bot on 23 November 2016, 17:24:15 UTC
Summary:
Current write stalling system has the problem of lacking of positive feedback if the restricted rate is already too low. Users sometimes stack in very low slowdown value. With the diff, we add a positive feedback (increasing the slowdown value) if we recover from slowdown state back to normal. To avoid the positive feedback to keep the slowdown value to be to high, we add issue a negative feedback every time we are close to the stop condition. Experiments show it is easier to reach a relative balance than before.

Also increase level0_stop_writes_trigger default from 24 to 32. Since level0_slowdown_writes_trigger default is 20, stop trigger 24 only gives four files as the buffer time to slowdown writes. In order to avoid stop in four files while 20 files have been accumulated, the slowdown value must be very low, which is amost the same as stop. It also doesn't give enough time for the slowdown value to converge. Increase it to 32 will smooth out the system.
Closes https://github.com/facebook/rocksdb/pull/1562

Differential Revision: D4218519

Pulled By: siying

fbshipit-source-id: 95e4088
1 parent dfb6fe6
Raw File
write_stress_runner.py
#! /usr/bin/env python
import subprocess
import argparse
import random
import time
import sys


def generate_runtimes(total_runtime):
    # combination of short runtimes and long runtimes, with heavier
    # weight on short runtimes
    possible_runtimes_sec = range(1, 10) + range(1, 20) + [100, 1000]
    runtimes = []
    while total_runtime > 0:
        chosen = random.choice(possible_runtimes_sec)
        chosen = min(chosen, total_runtime)
        runtimes.append(chosen)
        total_runtime -= chosen
    return runtimes


def main(args):
    runtimes = generate_runtimes(int(args.runtime_sec))
    print "Going to execute write stress for " + str(runtimes)
    first_time = True

    for runtime in runtimes:
        kill = random.choice([False, True])

        cmd = './write_stress --runtime_sec=' + \
            ("-1" if kill else str(runtime))

        if len(args.db) > 0:
            cmd = cmd + ' --db=' + args.db

        if first_time:
            first_time = False
        else:
            # use current db
            cmd = cmd + ' --destroy_db=false'
        if random.choice([False, True]):
            cmd = cmd + ' --delete_obsolete_files_with_fullscan=true'
        if random.choice([False, True]):
            cmd = cmd + ' --low_open_files_mode=true'

        print("Running write_stress for %d seconds (%s): %s" %
              (runtime, ("kill-mode" if kill else "clean-shutdown-mode"),
              cmd))

        child = subprocess.Popen([cmd], shell=True)
        killtime = time.time() + runtime
        while not kill or time.time() < killtime:
            time.sleep(1)
            if child.poll() is not None:
                if child.returncode == 0:
                    break
                else:
                    print("ERROR: write_stress died with exitcode=%d\n"
                          % child.returncode)
                    sys.exit(1)
        if kill:
            child.kill()
        # breathe
        time.sleep(3)

if __name__ == '__main__':
    random.seed(time.time())
    parser = argparse.ArgumentParser(description="This script runs and kills \
        write_stress multiple times")
    parser.add_argument("--runtime_sec", default='1000')
    parser.add_argument("--db", default='')
    args = parser.parse_args()
    main(args)
back to top