Revision b3ecba096729f521312d1863ad22530695527aed authored by Jeff Layton on 13 November 2014, 12:30:46 UTC, committed by Trond Myklebust on 13 November 2014, 18:15:49 UTC
Bruce reported that he was seeing the following BUG pop:

    BUG: sleeping function called from invalid context at mm/slab.c:2846
    in_atomic(): 0, irqs_disabled(): 0, pid: 4539, name: mount.nfs
    2 locks held by mount.nfs/4539:
    #0:  (nfs_clid_init_mutex){+.+.+.}, at: [<ffffffffa01c0a9a>] nfs4_discover_server_trunking+0x4a/0x2f0 [nfsv4]
    #1:  (rcu_read_lock){......}, at: [<ffffffffa00e3185>] gss_stringify_acceptor+0x5/0xb0 [auth_rpcgss]
    Preemption disabled at:[<ffffffff81a4f082>] printk+0x4d/0x4f

    CPU: 3 PID: 4539 Comm: mount.nfs Not tainted 3.18.0-rc1-00013-g5b095e9 #3393
    Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
    ffff880021499390 ffff8800381476a8 ffffffff81a534cf 0000000000000001
    0000000000000000 ffff8800381476c8 ffffffff81097854 00000000000000d0
    0000000000000018 ffff880038147718 ffffffff8118e4f3 0000000020479f00
    Call Trace:
    [<ffffffff81a534cf>] dump_stack+0x4f/0x7c
    [<ffffffff81097854>] __might_sleep+0x114/0x180
    [<ffffffff8118e4f3>] __kmalloc+0x1a3/0x280
    [<ffffffffa00e31d8>] gss_stringify_acceptor+0x58/0xb0 [auth_rpcgss]
    [<ffffffffa00e3185>] ? gss_stringify_acceptor+0x5/0xb0 [auth_rpcgss]
    [<ffffffffa006b438>] rpcauth_stringify_acceptor+0x18/0x30 [sunrpc]
    [<ffffffffa01b0469>] nfs4_proc_setclientid+0x199/0x380 [nfsv4]
    [<ffffffffa01b04d0>] ? nfs4_proc_setclientid+0x200/0x380 [nfsv4]
    [<ffffffffa01bdf1a>] nfs40_discover_server_trunking+0xda/0x150 [nfsv4]
    [<ffffffffa01bde45>] ? nfs40_discover_server_trunking+0x5/0x150 [nfsv4]
    [<ffffffffa01c0acf>] nfs4_discover_server_trunking+0x7f/0x2f0 [nfsv4]
    [<ffffffffa01c8e24>] nfs4_init_client+0x104/0x2f0 [nfsv4]
    [<ffffffffa01539b4>] nfs_get_client+0x314/0x3f0 [nfs]
    [<ffffffffa0153780>] ? nfs_get_client+0xe0/0x3f0 [nfs]
    [<ffffffffa01c83aa>] nfs4_set_client+0x8a/0x110 [nfsv4]
    [<ffffffffa0069708>] ? __rpc_init_priority_wait_queue+0xa8/0xf0 [sunrpc]
    [<ffffffffa01c9b2f>] nfs4_create_server+0x12f/0x390 [nfsv4]
    [<ffffffffa01c1472>] nfs4_remote_mount+0x32/0x60 [nfsv4]
    [<ffffffff81196489>] mount_fs+0x39/0x1b0
    [<ffffffff81166145>] ? __alloc_percpu+0x15/0x20
    [<ffffffff811b276b>] vfs_kern_mount+0x6b/0x150
    [<ffffffffa01c1396>] nfs_do_root_mount+0x86/0xc0 [nfsv4]
    [<ffffffffa01c1784>] nfs4_try_mount+0x44/0xc0 [nfsv4]
    [<ffffffffa01549b7>] ? get_nfs_version+0x27/0x90 [nfs]
    [<ffffffffa0161a2d>] nfs_fs_mount+0x47d/0xd60 [nfs]
    [<ffffffff81a59c5e>] ? mutex_unlock+0xe/0x10
    [<ffffffffa01606a0>] ? nfs_remount+0x430/0x430 [nfs]
    [<ffffffffa01609c0>] ? nfs_clone_super+0x140/0x140 [nfs]
    [<ffffffff81196489>] mount_fs+0x39/0x1b0
    [<ffffffff81166145>] ? __alloc_percpu+0x15/0x20
    [<ffffffff811b276b>] vfs_kern_mount+0x6b/0x150
    [<ffffffff811b5830>] do_mount+0x210/0xbe0
    [<ffffffff811b54ca>] ? copy_mount_options+0x3a/0x160
    [<ffffffff811b651f>] SyS_mount+0x6f/0xb0
    [<ffffffff81a5c852>] system_call_fastpath+0x12/0x17

Sleeping under the rcu_read_lock is bad. This patch fixes it by dropping
the rcu_read_lock before doing the allocation and then reacquiring it
and redoing the dereference before doing the copy. If we find that the
string has somehow grown in the meantime, we'll reallocate and try again.

Cc: <stable@vger.kernel.org> # v3.17+
Reported-by: "J. Bruce Fields" <bfields@fieldses.org>
Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
1 parent f8ebf7a
Raw File
fault-inject.c
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/random.h>
#include <linux/sched.h>
#include <linux/stat.h>
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/export.h>
#include <linux/interrupt.h>
#include <linux/stacktrace.h>
#include <linux/fault-inject.h>

/*
 * setup_fault_attr() is a helper function for various __setup handlers, so it
 * returns 0 on error, because that is what __setup handlers do.
 */
int setup_fault_attr(struct fault_attr *attr, char *str)
{
	unsigned long probability;
	unsigned long interval;
	int times;
	int space;

	/* "<interval>,<probability>,<space>,<times>" */
	if (sscanf(str, "%lu,%lu,%d,%d",
			&interval, &probability, &space, &times) < 4) {
		printk(KERN_WARNING
			"FAULT_INJECTION: failed to parse arguments\n");
		return 0;
	}

	attr->probability = probability;
	attr->interval = interval;
	atomic_set(&attr->times, times);
	atomic_set(&attr->space, space);

	return 1;
}
EXPORT_SYMBOL_GPL(setup_fault_attr);

static void fail_dump(struct fault_attr *attr)
{
	if (attr->verbose > 0)
		printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure\n");
	if (attr->verbose > 1)
		dump_stack();
}

#define atomic_dec_not_zero(v)		atomic_add_unless((v), -1, 0)

static bool fail_task(struct fault_attr *attr, struct task_struct *task)
{
	return !in_interrupt() && task->make_it_fail;
}

#define MAX_STACK_TRACE_DEPTH 32

#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER

static bool fail_stacktrace(struct fault_attr *attr)
{
	struct stack_trace trace;
	int depth = attr->stacktrace_depth;
	unsigned long entries[MAX_STACK_TRACE_DEPTH];
	int n;
	bool found = (attr->require_start == 0 && attr->require_end == ULONG_MAX);

	if (depth == 0)
		return found;

	trace.nr_entries = 0;
	trace.entries = entries;
	trace.max_entries = depth;
	trace.skip = 1;

	save_stack_trace(&trace);
	for (n = 0; n < trace.nr_entries; n++) {
		if (attr->reject_start <= entries[n] &&
			       entries[n] < attr->reject_end)
			return false;
		if (attr->require_start <= entries[n] &&
			       entries[n] < attr->require_end)
			found = true;
	}
	return found;
}

#else

static inline bool fail_stacktrace(struct fault_attr *attr)
{
	return true;
}

#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */

/*
 * This code is stolen from failmalloc-1.0
 * http://www.nongnu.org/failmalloc/
 */

bool should_fail(struct fault_attr *attr, ssize_t size)
{
	/* No need to check any other properties if the probability is 0 */
	if (attr->probability == 0)
		return false;

	if (attr->task_filter && !fail_task(attr, current))
		return false;

	if (atomic_read(&attr->times) == 0)
		return false;

	if (atomic_read(&attr->space) > size) {
		atomic_sub(size, &attr->space);
		return false;
	}

	if (attr->interval > 1) {
		attr->count++;
		if (attr->count % attr->interval)
			return false;
	}

	if (attr->probability <= prandom_u32() % 100)
		return false;

	if (!fail_stacktrace(attr))
		return false;

	fail_dump(attr);

	if (atomic_read(&attr->times) != -1)
		atomic_dec_not_zero(&attr->times);

	return true;
}
EXPORT_SYMBOL_GPL(should_fail);

#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS

static int debugfs_ul_set(void *data, u64 val)
{
	*(unsigned long *)data = val;
	return 0;
}

static int debugfs_ul_get(void *data, u64 *val)
{
	*val = *(unsigned long *)data;
	return 0;
}

DEFINE_SIMPLE_ATTRIBUTE(fops_ul, debugfs_ul_get, debugfs_ul_set, "%llu\n");

static struct dentry *debugfs_create_ul(const char *name, umode_t mode,
				struct dentry *parent, unsigned long *value)
{
	return debugfs_create_file(name, mode, parent, value, &fops_ul);
}

#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER

static int debugfs_stacktrace_depth_set(void *data, u64 val)
{
	*(unsigned long *)data =
		min_t(unsigned long, val, MAX_STACK_TRACE_DEPTH);

	return 0;
}

DEFINE_SIMPLE_ATTRIBUTE(fops_stacktrace_depth, debugfs_ul_get,
			debugfs_stacktrace_depth_set, "%llu\n");

static struct dentry *debugfs_create_stacktrace_depth(
	const char *name, umode_t mode,
	struct dentry *parent, unsigned long *value)
{
	return debugfs_create_file(name, mode, parent, value,
				   &fops_stacktrace_depth);
}

#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */

struct dentry *fault_create_debugfs_attr(const char *name,
			struct dentry *parent, struct fault_attr *attr)
{
	umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
	struct dentry *dir;

	dir = debugfs_create_dir(name, parent);
	if (!dir)
		return ERR_PTR(-ENOMEM);

	if (!debugfs_create_ul("probability", mode, dir, &attr->probability))
		goto fail;
	if (!debugfs_create_ul("interval", mode, dir, &attr->interval))
		goto fail;
	if (!debugfs_create_atomic_t("times", mode, dir, &attr->times))
		goto fail;
	if (!debugfs_create_atomic_t("space", mode, dir, &attr->space))
		goto fail;
	if (!debugfs_create_ul("verbose", mode, dir, &attr->verbose))
		goto fail;
	if (!debugfs_create_bool("task-filter", mode, dir, &attr->task_filter))
		goto fail;

#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER

	if (!debugfs_create_stacktrace_depth("stacktrace-depth", mode, dir,
				&attr->stacktrace_depth))
		goto fail;
	if (!debugfs_create_ul("require-start", mode, dir,
				&attr->require_start))
		goto fail;
	if (!debugfs_create_ul("require-end", mode, dir, &attr->require_end))
		goto fail;
	if (!debugfs_create_ul("reject-start", mode, dir, &attr->reject_start))
		goto fail;
	if (!debugfs_create_ul("reject-end", mode, dir, &attr->reject_end))
		goto fail;

#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */

	return dir;
fail:
	debugfs_remove_recursive(dir);

	return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL_GPL(fault_create_debugfs_attr);

#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
back to top