Revision 6f6acb00514c10be35529402f36ad7a288f08c2e authored by Michal Hocko on 22 May 2014, 18:54:19 UTC, committed by Linus Torvalds on 23 May 2014, 16:37:29 UTC
Commit 284f39afeaa4 ("mm: memcg: push !mm handling out to page cache
charge function") explicitly checks for page cache charges without any
mm context (from kernel thread context[1]).

This seemed to be the only possible case where memory could be charged
without mm context so commit 03583f1a631c ("memcg: remove unnecessary
!mm check from try_get_mem_cgroup_from_mm()") removed the mm check from
get_mem_cgroup_from_mm().  This however caused another NULL ptr
dereference during early boot when loopback kernel thread splices to
tmpfs as reported by Stephan Kulow:

  BUG: unable to handle kernel NULL pointer dereference at 0000000000000360
  IP: get_mem_cgroup_from_mm.isra.42+0x2b/0x60
  Oops: 0000 [#1] SMP
  Modules linked in: btrfs dm_multipath dm_mod scsi_dh multipath raid10 raid456 async_raid6_recov async_memcpy async_pq raid6_pq async_xor xor async_tx raid1 raid0 md_mod parport_pc parport nls_utf8 isofs usb_storage iscsi_ibft iscsi_boot_sysfs arc4 ecb fan thermal nfs lockd fscache nls_iso8859_1 nls_cp437 sg st hid_generic usbhid af_packet sunrpc sr_mod cdrom ata_generic uhci_hcd virtio_net virtio_blk ehci_hcd usbcore ata_piix floppy processor button usb_common virtio_pci virtio_ring virtio edd squashfs loop ppa]
  CPU: 0 PID: 97 Comm: loop1 Not tainted 3.15.0-rc5-5-default #1
  Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
  Call Trace:
    __mem_cgroup_try_charge_swapin+0x40/0xe0
    mem_cgroup_charge_file+0x8b/0xd0
    shmem_getpage_gfp+0x66b/0x7b0
    shmem_file_splice_read+0x18f/0x430
    splice_direct_to_actor+0xa2/0x1c0
    do_lo_receive+0x5a/0x60 [loop]
    loop_thread+0x298/0x720 [loop]
    kthread+0xc6/0xe0
    ret_from_fork+0x7c/0xb0

Also Branimir Maksimovic reported the following oops which is tiggered
for the swapcache charge path from the accounting code for kernel threads:

  CPU: 1 PID: 160 Comm: kworker/u8:5 Tainted: P           OE 3.15.0-rc5-core2-custom #159
  Hardware name: System manufacturer System Product Name/MAXIMUSV GENE, BIOS 1903 08/19/2013
  task: ffff880404e349b0 ti: ffff88040486a000 task.ti: ffff88040486a000
  RIP: get_mem_cgroup_from_mm.isra.42+0x2b/0x60
  Call Trace:
    __mem_cgroup_try_charge_swapin+0x45/0xf0
    mem_cgroup_charge_file+0x9c/0xe0
    shmem_getpage_gfp+0x62c/0x770
    shmem_write_begin+0x38/0x40
    generic_perform_write+0xc5/0x1c0
    __generic_file_aio_write+0x1d1/0x3f0
    generic_file_aio_write+0x4f/0xc0
    do_sync_write+0x5a/0x90
    do_acct_process+0x4b1/0x550
    acct_process+0x6d/0xa0
    do_exit+0x827/0xa70
    kthread+0xc3/0xf0

This patch fixes the issue by reintroducing mm check into
get_mem_cgroup_from_mm.  We could do the same trick in
__mem_cgroup_try_charge_swapin as we do for the regular page cache path
but it is not worth troubles.  The check is not that expensive and it is
better to have get_mem_cgroup_from_mm more robust.

[1] - http://marc.info/?l=linux-mm&m=139463617808941&w=2

Fixes: 03583f1a631c ("memcg: remove unnecessary !mm check from try_get_mem_cgroup_from_mm()")
Reported-and-tested-by: Stephan Kulow <coolo@suse.com>
Reported-by: Branimir Maksimovic <branimir.maksimovic@gmail.com>
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 55231e5
Raw File
hwpoison-inject.c
/* Inject a hwpoison memory failure on a arbitrary pfn */
#include <linux/module.h>
#include <linux/debugfs.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/pagemap.h>
#include <linux/hugetlb.h>
#include "internal.h"

static struct dentry *hwpoison_dir;

static int hwpoison_inject(void *data, u64 val)
{
	unsigned long pfn = val;
	struct page *p;
	struct page *hpage;
	int err;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	if (!pfn_valid(pfn))
		return -ENXIO;

	p = pfn_to_page(pfn);
	hpage = compound_head(p);
	/*
	 * This implies unable to support free buddy pages.
	 */
	if (!get_page_unless_zero(hpage))
		return 0;

	if (!hwpoison_filter_enable)
		goto inject;

	if (!PageLRU(p) && !PageHuge(p))
		shake_page(p, 0);
	/*
	 * This implies unable to support non-LRU pages.
	 */
	if (!PageLRU(p) && !PageHuge(p))
		return 0;

	/*
	 * do a racy check with elevated page count, to make sure PG_hwpoison
	 * will only be set for the targeted owner (or on a free page).
	 * We temporarily take page lock for try_get_mem_cgroup_from_page().
	 * memory_failure() will redo the check reliably inside page lock.
	 */
	lock_page(hpage);
	err = hwpoison_filter(hpage);
	unlock_page(hpage);
	if (err)
		return 0;

inject:
	pr_info("Injecting memory failure at pfn %#lx\n", pfn);
	return memory_failure(pfn, 18, MF_COUNT_INCREASED);
}

static int hwpoison_unpoison(void *data, u64 val)
{
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	return unpoison_memory(val);
}

DEFINE_SIMPLE_ATTRIBUTE(hwpoison_fops, NULL, hwpoison_inject, "%lli\n");
DEFINE_SIMPLE_ATTRIBUTE(unpoison_fops, NULL, hwpoison_unpoison, "%lli\n");

static void pfn_inject_exit(void)
{
	if (hwpoison_dir)
		debugfs_remove_recursive(hwpoison_dir);
}

static int pfn_inject_init(void)
{
	struct dentry *dentry;

	hwpoison_dir = debugfs_create_dir("hwpoison", NULL);
	if (hwpoison_dir == NULL)
		return -ENOMEM;

	/*
	 * Note that the below poison/unpoison interfaces do not involve
	 * hardware status change, hence do not require hardware support.
	 * They are mainly for testing hwpoison in software level.
	 */
	dentry = debugfs_create_file("corrupt-pfn", 0200, hwpoison_dir,
					  NULL, &hwpoison_fops);
	if (!dentry)
		goto fail;

	dentry = debugfs_create_file("unpoison-pfn", 0200, hwpoison_dir,
				     NULL, &unpoison_fops);
	if (!dentry)
		goto fail;

	dentry = debugfs_create_u32("corrupt-filter-enable", 0600,
				    hwpoison_dir, &hwpoison_filter_enable);
	if (!dentry)
		goto fail;

	dentry = debugfs_create_u32("corrupt-filter-dev-major", 0600,
				    hwpoison_dir, &hwpoison_filter_dev_major);
	if (!dentry)
		goto fail;

	dentry = debugfs_create_u32("corrupt-filter-dev-minor", 0600,
				    hwpoison_dir, &hwpoison_filter_dev_minor);
	if (!dentry)
		goto fail;

	dentry = debugfs_create_u64("corrupt-filter-flags-mask", 0600,
				    hwpoison_dir, &hwpoison_filter_flags_mask);
	if (!dentry)
		goto fail;

	dentry = debugfs_create_u64("corrupt-filter-flags-value", 0600,
				    hwpoison_dir, &hwpoison_filter_flags_value);
	if (!dentry)
		goto fail;

#ifdef CONFIG_MEMCG_SWAP
	dentry = debugfs_create_u64("corrupt-filter-memcg", 0600,
				    hwpoison_dir, &hwpoison_filter_memcg);
	if (!dentry)
		goto fail;
#endif

	return 0;
fail:
	pfn_inject_exit();
	return -ENOMEM;
}

module_init(pfn_inject_init);
module_exit(pfn_inject_exit);
MODULE_LICENSE("GPL");
back to top