Revision d1908362ae0b97374eb8328fbb471576332f9fb1 authored by Minchan Kim on 22 September 2010, 20:05:01 UTC, committed by Linus Torvalds on 23 September 2010, 00:22:39 UTC
M.  Vefa Bicakci reported 2.6.35 kernel hang up when hibernation on his
32bit 3GB mem machine.
(https://bugzilla.kernel.org/show_bug.cgi?id=16771). Also he bisected
the regression to

  commit bb21c7ce18eff8e6e7877ca1d06c6db719376e3c
  Author: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
  Date:   Fri Jun 4 14:15:05 2010 -0700

     vmscan: fix do_try_to_free_pages() return value when priority==0 reclaim failure

At first impression, this seemed very strange because the above commit
only chenged function return value and hibernate_preallocate_memory()
ignore return value of shrink_all_memory().  But it's related.

Now, page allocation from hibernation code may enter infinite loop if the
system has highmem.  The reasons are that vmscan don't care enough OOM
case when oom_killer_disabled.

The problem sequence is following as.

1. hibernation
2. oom_disable
3. alloc_pages
4. do_try_to_free_pages
       if (scanning_global_lru(sc) && !all_unreclaimable)
               return 1;

If kswapd is not freozen, it would set zone->all_unreclaimable to 1 and
then shrink_zones maybe return true(ie, all_unreclaimable is true).  So at
last, alloc_pages could go to _nopage_.  If it is, it should have no
problem.

This patch adds all_unreclaimable check to protect in direct reclaim path,
too.  It can care of hibernation OOM case and help bailout
all_unreclaimable case slightly.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Reported-by: M. Vefa Bicakci <bicave@superonline.com>
Reported-by: <caiqian@redhat.com>
Reviewed-by: Johannes Weiner <hannes@cmpxchg.org>
Tested-by: <caiqian@redhat.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent eba93fc
Raw File
sysctl_check.c
#include <linux/stat.h>
#include <linux/sysctl.h>
#include "../fs/xfs/linux-2.6/xfs_sysctl.h"
#include <linux/sunrpc/debug.h>
#include <linux/string.h>
#include <net/ip_vs.h>


static int sysctl_depth(struct ctl_table *table)
{
	struct ctl_table *tmp;
	int depth;

	depth = 0;
	for (tmp = table; tmp->parent; tmp = tmp->parent)
		depth++;

	return depth;
}

static struct ctl_table *sysctl_parent(struct ctl_table *table, int n)
{
	int i;

	for (i = 0; table && i < n; i++)
		table = table->parent;

	return table;
}


static void sysctl_print_path(struct ctl_table *table)
{
	struct ctl_table *tmp;
	int depth, i;
	depth = sysctl_depth(table);
	if (table->procname) {
		for (i = depth; i >= 0; i--) {
			tmp = sysctl_parent(table, i);
			printk("/%s", tmp->procname?tmp->procname:"");
		}
	}
	printk(" ");
}

static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces,
						struct ctl_table *table)
{
	struct ctl_table_header *head;
	struct ctl_table *ref, *test;
	int depth, cur_depth;

	depth = sysctl_depth(table);

	for (head = __sysctl_head_next(namespaces, NULL); head;
	     head = __sysctl_head_next(namespaces, head)) {
		cur_depth = depth;
		ref = head->ctl_table;
repeat:
		test = sysctl_parent(table, cur_depth);
		for (; ref->procname; ref++) {
			int match = 0;
			if (cur_depth && !ref->child)
				continue;

			if (test->procname && ref->procname &&
			    (strcmp(test->procname, ref->procname) == 0))
					match++;

			if (match) {
				if (cur_depth != 0) {
					cur_depth--;
					ref = ref->child;
					goto repeat;
				}
				goto out;
			}
		}
	}
	ref = NULL;
out:
	sysctl_head_finish(head);
	return ref;
}

static void set_fail(const char **fail, struct ctl_table *table, const char *str)
{
	if (*fail) {
		printk(KERN_ERR "sysctl table check failed: ");
		sysctl_print_path(table);
		printk(" %s\n", *fail);
		dump_stack();
	}
	*fail = str;
}

static void sysctl_check_leaf(struct nsproxy *namespaces,
				struct ctl_table *table, const char **fail)
{
	struct ctl_table *ref;

	ref = sysctl_check_lookup(namespaces, table);
	if (ref && (ref != table))
		set_fail(fail, table, "Sysctl already exists");
}

int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
{
	int error = 0;
	for (; table->procname; table++) {
		const char *fail = NULL;

		if (table->parent) {
			if (table->procname && !table->parent->procname)
				set_fail(&fail, table, "Parent without procname");
		}
		if (!table->procname)
			set_fail(&fail, table, "No procname");
		if (table->child) {
			if (table->data)
				set_fail(&fail, table, "Directory with data?");
			if (table->maxlen)
				set_fail(&fail, table, "Directory with maxlen?");
			if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode)
				set_fail(&fail, table, "Writable sysctl directory");
			if (table->proc_handler)
				set_fail(&fail, table, "Directory with proc_handler");
			if (table->extra1)
				set_fail(&fail, table, "Directory with extra1");
			if (table->extra2)
				set_fail(&fail, table, "Directory with extra2");
		} else {
			if ((table->proc_handler == proc_dostring) ||
			    (table->proc_handler == proc_dointvec) ||
			    (table->proc_handler == proc_dointvec_minmax) ||
			    (table->proc_handler == proc_dointvec_jiffies) ||
			    (table->proc_handler == proc_dointvec_userhz_jiffies) ||
			    (table->proc_handler == proc_dointvec_ms_jiffies) ||
			    (table->proc_handler == proc_doulongvec_minmax) ||
			    (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
				if (!table->data)
					set_fail(&fail, table, "No data");
				if (!table->maxlen)
					set_fail(&fail, table, "No maxlen");
			}
			if ((table->proc_handler == proc_doulongvec_minmax) ||
			    (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
				if (table->maxlen > sizeof (unsigned long)) {
					if (!table->extra1)
						set_fail(&fail, table, "No min");
					if (!table->extra2)
						set_fail(&fail, table, "No max");
				}
			}
#ifdef CONFIG_PROC_SYSCTL
			if (table->procname && !table->proc_handler)
				set_fail(&fail, table, "No proc_handler");
#endif
#if 0
			if (!table->procname && table->proc_handler)
				set_fail(&fail, table, "proc_handler without procname");
#endif
			sysctl_check_leaf(namespaces, table, &fail);
		}
		if (table->mode > 0777)
			set_fail(&fail, table, "bogus .mode");
		if (fail) {
			set_fail(&fail, table, NULL);
			error = -EINVAL;
		}
		if (table->child)
			error |= sysctl_check_table(namespaces, table->child);
	}
	return error;
}
back to top