Revision 93e6f119c0ce8a1bba6e81dc8dd97d67be360844 authored by Doug Ledford on 31 May 2012, 23:26:28 UTC, committed by Linus Torvalds on 01 June 2012, 00:49:30 UTC
Since commit b231cca4381e ("message queues: increase range limits") on
Oct 18, 2008, calls to mq_open() that did not pass in an attribute
struct and expected to get default values for the size of the queue and
the max message size now get the system wide maximums instead of
hardwired defaults like they used to get.

This was uncovered when one of the earlier patches in this patch set
increased the default system wide maximums at the same time it increased
the hard ceiling on the system wide maximums (a customer specifically
needed the hard ceiling brought back up, the new ceiling that commit
b231cca4381e introduced was too low for their production systems).  By
increasing the default maximums and not realising they were tied to any
attempt to create a message queue without an attribute struct, I had
inadvertently made it such that all message queue creation attempts
without an attribute struct were failing because the new default
maximums would create a queue that exceeded the default rlimit for
message queue bytes.

As a result, the system wide defaults were brought back down to their
previous levels, and the system wide ceilings on the maximums were
raised to meet the customer's needs.  However, the fact that the no
attribute struct behavior of mq_open() could be broken by changing the
system wide maximums for message queues was seen as fundamentally broken
itself.  So we hardwired the no attribute case back like it used to be.
But, then we realized that on the very off chance that some piece of
software in the wild depended on that behavior, we could work around
that issue by adding two new knobs to /proc that allowed setting the
defaults for message queues created without an attr struct separately
from the system wide maximums.

What is not an option IMO is to leave the current behavior in place.  No
piece of software should ever rely on setting the system wide maximums
in order to get a desired message queue.  Such a reliance would be so
fundamentally multitasking OS unfriendly as to not really be tolerable.
Fortunately, we don't know of any software in the wild that uses this
except for a regression test program that caught the issue in the first
place.  If there is though, we have made accommodations with the two new
/proc knobs (and that's all the accommodations such fundamentally broken
software can be allowed)..

This patch:

The various defines for minimums and maximums of the sysctl controllable
mqueue values are scattered amongst different files and named
inconsistently.  Move them all into ipc_namespace.h and make them have
consistent names.  Additionally, make the number of queues per namespace
also have a minimum and maximum and use the same sysctl function as the
other two settable variables.

Signed-off-by: Doug Ledford <dledford@redhat.com>
Acked-by: Serge E. Hallyn <serue@us.ibm.com>
Cc: Amerigo Wang <amwang@redhat.com>
Cc: Joe Korty <joe.korty@ccur.com>
Cc: Jiri Slaby <jslaby@suse.cz>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 29a5c67
Raw File
fault-inject.c
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/random.h>
#include <linux/sched.h>
#include <linux/stat.h>
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/export.h>
#include <linux/interrupt.h>
#include <linux/stacktrace.h>
#include <linux/fault-inject.h>

/*
 * setup_fault_attr() is a helper function for various __setup handlers, so it
 * returns 0 on error, because that is what __setup handlers do.
 */
int setup_fault_attr(struct fault_attr *attr, char *str)
{
	unsigned long probability;
	unsigned long interval;
	int times;
	int space;

	/* "<interval>,<probability>,<space>,<times>" */
	if (sscanf(str, "%lu,%lu,%d,%d",
			&interval, &probability, &space, &times) < 4) {
		printk(KERN_WARNING
			"FAULT_INJECTION: failed to parse arguments\n");
		return 0;
	}

	attr->probability = probability;
	attr->interval = interval;
	atomic_set(&attr->times, times);
	atomic_set(&attr->space, space);

	return 1;
}
EXPORT_SYMBOL_GPL(setup_fault_attr);

static void fail_dump(struct fault_attr *attr)
{
	if (attr->verbose > 0)
		printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure\n");
	if (attr->verbose > 1)
		dump_stack();
}

#define atomic_dec_not_zero(v)		atomic_add_unless((v), -1, 0)

static bool fail_task(struct fault_attr *attr, struct task_struct *task)
{
	return !in_interrupt() && task->make_it_fail;
}

#define MAX_STACK_TRACE_DEPTH 32

#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER

static bool fail_stacktrace(struct fault_attr *attr)
{
	struct stack_trace trace;
	int depth = attr->stacktrace_depth;
	unsigned long entries[MAX_STACK_TRACE_DEPTH];
	int n;
	bool found = (attr->require_start == 0 && attr->require_end == ULONG_MAX);

	if (depth == 0)
		return found;

	trace.nr_entries = 0;
	trace.entries = entries;
	trace.max_entries = depth;
	trace.skip = 1;

	save_stack_trace(&trace);
	for (n = 0; n < trace.nr_entries; n++) {
		if (attr->reject_start <= entries[n] &&
			       entries[n] < attr->reject_end)
			return false;
		if (attr->require_start <= entries[n] &&
			       entries[n] < attr->require_end)
			found = true;
	}
	return found;
}

#else

static inline bool fail_stacktrace(struct fault_attr *attr)
{
	return true;
}

#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */

/*
 * This code is stolen from failmalloc-1.0
 * http://www.nongnu.org/failmalloc/
 */

bool should_fail(struct fault_attr *attr, ssize_t size)
{
	if (attr->task_filter && !fail_task(attr, current))
		return false;

	if (atomic_read(&attr->times) == 0)
		return false;

	if (atomic_read(&attr->space) > size) {
		atomic_sub(size, &attr->space);
		return false;
	}

	if (attr->interval > 1) {
		attr->count++;
		if (attr->count % attr->interval)
			return false;
	}

	if (attr->probability <= random32() % 100)
		return false;

	if (!fail_stacktrace(attr))
		return false;

	fail_dump(attr);

	if (atomic_read(&attr->times) != -1)
		atomic_dec_not_zero(&attr->times);

	return true;
}
EXPORT_SYMBOL_GPL(should_fail);

#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS

static int debugfs_ul_set(void *data, u64 val)
{
	*(unsigned long *)data = val;
	return 0;
}

static int debugfs_ul_get(void *data, u64 *val)
{
	*val = *(unsigned long *)data;
	return 0;
}

DEFINE_SIMPLE_ATTRIBUTE(fops_ul, debugfs_ul_get, debugfs_ul_set, "%llu\n");

static struct dentry *debugfs_create_ul(const char *name, umode_t mode,
				struct dentry *parent, unsigned long *value)
{
	return debugfs_create_file(name, mode, parent, value, &fops_ul);
}

#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER

static int debugfs_stacktrace_depth_set(void *data, u64 val)
{
	*(unsigned long *)data =
		min_t(unsigned long, val, MAX_STACK_TRACE_DEPTH);

	return 0;
}

DEFINE_SIMPLE_ATTRIBUTE(fops_stacktrace_depth, debugfs_ul_get,
			debugfs_stacktrace_depth_set, "%llu\n");

static struct dentry *debugfs_create_stacktrace_depth(
	const char *name, umode_t mode,
	struct dentry *parent, unsigned long *value)
{
	return debugfs_create_file(name, mode, parent, value,
				   &fops_stacktrace_depth);
}

#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */

static int debugfs_atomic_t_set(void *data, u64 val)
{
	atomic_set((atomic_t *)data, val);
	return 0;
}

static int debugfs_atomic_t_get(void *data, u64 *val)
{
	*val = atomic_read((atomic_t *)data);
	return 0;
}

DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get,
			debugfs_atomic_t_set, "%lld\n");

static struct dentry *debugfs_create_atomic_t(const char *name, umode_t mode,
				struct dentry *parent, atomic_t *value)
{
	return debugfs_create_file(name, mode, parent, value, &fops_atomic_t);
}

struct dentry *fault_create_debugfs_attr(const char *name,
			struct dentry *parent, struct fault_attr *attr)
{
	umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
	struct dentry *dir;

	dir = debugfs_create_dir(name, parent);
	if (!dir)
		return ERR_PTR(-ENOMEM);

	if (!debugfs_create_ul("probability", mode, dir, &attr->probability))
		goto fail;
	if (!debugfs_create_ul("interval", mode, dir, &attr->interval))
		goto fail;
	if (!debugfs_create_atomic_t("times", mode, dir, &attr->times))
		goto fail;
	if (!debugfs_create_atomic_t("space", mode, dir, &attr->space))
		goto fail;
	if (!debugfs_create_ul("verbose", mode, dir, &attr->verbose))
		goto fail;
	if (!debugfs_create_bool("task-filter", mode, dir, &attr->task_filter))
		goto fail;

#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER

	if (!debugfs_create_stacktrace_depth("stacktrace-depth", mode, dir,
				&attr->stacktrace_depth))
		goto fail;
	if (!debugfs_create_ul("require-start", mode, dir,
				&attr->require_start))
		goto fail;
	if (!debugfs_create_ul("require-end", mode, dir, &attr->require_end))
		goto fail;
	if (!debugfs_create_ul("reject-start", mode, dir, &attr->reject_start))
		goto fail;
	if (!debugfs_create_ul("reject-end", mode, dir, &attr->reject_end))
		goto fail;

#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */

	return dir;
fail:
	debugfs_remove_recursive(dir);

	return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL_GPL(fault_create_debugfs_attr);

#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
back to top