Revision b1dd693e5b9348bd68a80e679e03cf9c0973b01b authored by Daisuke Nishimura on 24 November 2010, 20:57:06 UTC, committed by Linus Torvalds on 24 November 2010, 21:50:44 UTC
__mem_cgroup_try_charge() can be called under down_write(&mmap_sem)(e.g.
mlock does it). This means it can cause deadlock if it races with move charge:

Ex.1)
                move charge             |        try charge
  --------------------------------------+------------------------------
    mem_cgroup_can_attach()             |  down_write(&mmap_sem)
      mc.moving_task = current          |    ..
      mem_cgroup_precharge_mc()         |  __mem_cgroup_try_charge()
        mem_cgroup_count_precharge()    |    prepare_to_wait()
          down_read(&mmap_sem)          |    if (mc.moving_task)
          -> cannot aquire the lock     |    -> true
                                        |      schedule()

Ex.2)
                move charge             |        try charge
  --------------------------------------+------------------------------
    mem_cgroup_can_attach()             |
      mc.moving_task = current          |
      mem_cgroup_precharge_mc()         |
        mem_cgroup_count_precharge()    |
          down_read(&mmap_sem)          |
          ..                            |
          up_read(&mmap_sem)            |
                                        |  down_write(&mmap_sem)
    mem_cgroup_move_task()              |    ..
      mem_cgroup_move_charge()          |  __mem_cgroup_try_charge()
        down_read(&mmap_sem)            |    prepare_to_wait()
        -> cannot aquire the lock       |    if (mc.moving_task)
                                        |    -> true
                                        |      schedule()

To avoid this deadlock, we do all the move charge works (both can_attach() and
attach()) under one mmap_sem section.
And after this patch, we set/clear mc.moving_task outside mc.lock, because we
use the lock only to check mc.from/to.

Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 11e7946
Raw File
em_nbyte.c
/*
 * net/sched/em_nbyte.c	N-Byte ematch
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Thomas Graf <tgraf@suug.ch>
 */

#include <linux/gfp.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/skbuff.h>
#include <linux/tc_ematch/tc_em_nbyte.h>
#include <net/pkt_cls.h>

struct nbyte_data
{
	struct tcf_em_nbyte	hdr;
	char			pattern[0];
};

static int em_nbyte_change(struct tcf_proto *tp, void *data, int data_len,
			   struct tcf_ematch *em)
{
	struct tcf_em_nbyte *nbyte = data;

	if (data_len < sizeof(*nbyte) ||
	    data_len < (sizeof(*nbyte) + nbyte->len))
		return -EINVAL;

	em->datalen = sizeof(*nbyte) + nbyte->len;
	em->data = (unsigned long)kmemdup(data, em->datalen, GFP_KERNEL);
	if (em->data == 0UL)
		return -ENOBUFS;

	return 0;
}

static int em_nbyte_match(struct sk_buff *skb, struct tcf_ematch *em,
			  struct tcf_pkt_info *info)
{
	struct nbyte_data *nbyte = (struct nbyte_data *) em->data;
	unsigned char *ptr = tcf_get_base_ptr(skb, nbyte->hdr.layer);

	ptr += nbyte->hdr.off;

	if (!tcf_valid_offset(skb, ptr, nbyte->hdr.len))
		return 0;

	return !memcmp(ptr + nbyte->hdr.off, nbyte->pattern, nbyte->hdr.len);
}

static struct tcf_ematch_ops em_nbyte_ops = {
	.kind	  = TCF_EM_NBYTE,
	.change	  = em_nbyte_change,
	.match	  = em_nbyte_match,
	.owner	  = THIS_MODULE,
	.link	  = LIST_HEAD_INIT(em_nbyte_ops.link)
};

static int __init init_em_nbyte(void)
{
	return tcf_em_register(&em_nbyte_ops);
}

static void __exit exit_em_nbyte(void)
{
	tcf_em_unregister(&em_nbyte_ops);
}

MODULE_LICENSE("GPL");

module_init(init_em_nbyte);
module_exit(exit_em_nbyte);

MODULE_ALIAS_TCF_EMATCH(TCF_EM_NBYTE);
back to top