Revision fb59e9f1e9786635ea12e12bf6adbb132e10f979 authored by Hugh Dickins on 04 March 2008, 22:29:16 UTC, committed by Linus Torvalds on 05 March 2008, 00:35:15 UTC
While testing force_empty, during an exit_mmap, __mem_cgroup_remove_list
called from mem_cgroup_uncharge_page oopsed on a NULL pointer in the lru list.
 I couldn't see what racing tasks on other cpus were doing, but surmise that
another must have been in mem_cgroup_charge_common on the same page, between
its unlock_page_cgroup and spin_lock_irqsave near done (thanks to that kzalloc
which I'd almost changed to a kmalloc).

Normally such a race cannot happen, the ref_cnt prevents it, the final
uncharge cannot race with the initial charge.  But force_empty buggers the
ref_cnt, that's what it's all about; and thereafter forced pages are
vulnerable to races such as this (just think of a shared page also mapped into
an mm of another mem_cgroup than that just emptied).  And remain vulnerable
until they're freed indefinitely later.

This patch just fixes the oops by moving the unlock_page_cgroups down below
adding to and removing from the list (only possible given the previous patch);
and while we're at it, we might as well make it an invariant that
page->page_cgroup is always set while pc is on lru.

But this behaviour of force_empty seems highly unsatisfactory to me: why have
a ref_cnt if we always have to cope with it being violated (as in the earlier
page migration patch).  We may prefer force_empty to move pages to an orphan
mem_cgroup (could be the root, but better not), from which other cgroups could
recover them; we might need to reverse the locking again; but no time now for
such concerns.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 9b3c0a0
Raw File
user_namespace.c
/*
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU General Public License as
 *  published by the Free Software Foundation, version 2 of the
 *  License.
 */

#include <linux/module.h>
#include <linux/version.h>
#include <linux/nsproxy.h>
#include <linux/user_namespace.h>

/*
 * Clone a new ns copying an original user ns, setting refcount to 1
 * @old_ns: namespace to clone
 * Return NULL on error (failure to kmalloc), new ns otherwise
 */
static struct user_namespace *clone_user_ns(struct user_namespace *old_ns)
{
	struct user_namespace *ns;
	struct user_struct *new_user;
	int n;

	ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
	if (!ns)
		return ERR_PTR(-ENOMEM);

	kref_init(&ns->kref);

	for (n = 0; n < UIDHASH_SZ; ++n)
		INIT_HLIST_HEAD(ns->uidhash_table + n);

	/* Insert new root user.  */
	ns->root_user = alloc_uid(ns, 0);
	if (!ns->root_user) {
		kfree(ns);
		return ERR_PTR(-ENOMEM);
	}

	/* Reset current->user with a new one */
	new_user = alloc_uid(ns, current->uid);
	if (!new_user) {
		free_uid(ns->root_user);
		kfree(ns);
		return ERR_PTR(-ENOMEM);
	}

	switch_uid(new_user);
	return ns;
}

struct user_namespace * copy_user_ns(int flags, struct user_namespace *old_ns)
{
	struct user_namespace *new_ns;

	BUG_ON(!old_ns);
	get_user_ns(old_ns);

	if (!(flags & CLONE_NEWUSER))
		return old_ns;

	new_ns = clone_user_ns(old_ns);

	put_user_ns(old_ns);
	return new_ns;
}

void free_user_ns(struct kref *kref)
{
	struct user_namespace *ns;

	ns = container_of(kref, struct user_namespace, kref);
	release_uids(ns);
	kfree(ns);
}
back to top