Revision 864b9a393dcb5aed09b8fd31b9bbda0fdda99374 authored by Michal Hocko on 02 June 2017, 21:46:49 UTC, committed by Linus Torvalds on 02 June 2017, 22:07:38 UTC
We have seen an early OOM killer invocation on ppc64 systems with
crashkernel=4096M:

	kthreadd invoked oom-killer: gfp_mask=0x16040c0(GFP_KERNEL|__GFP_COMP|__GFP_NOTRACK), nodemask=7, order=0, oom_score_adj=0
	kthreadd cpuset=/ mems_allowed=7
	CPU: 0 PID: 2 Comm: kthreadd Not tainted 4.4.68-1.gd7fe927-default #1
	Call Trace:
	  dump_stack+0xb0/0xf0 (unreliable)
	  dump_header+0xb0/0x258
	  out_of_memory+0x5f0/0x640
	  __alloc_pages_nodemask+0xa8c/0xc80
	  kmem_getpages+0x84/0x1a0
	  fallback_alloc+0x2a4/0x320
	  kmem_cache_alloc_node+0xc0/0x2e0
	  copy_process.isra.25+0x260/0x1b30
	  _do_fork+0x94/0x470
	  kernel_thread+0x48/0x60
	  kthreadd+0x264/0x330
	  ret_from_kernel_thread+0x5c/0xa4

	Mem-Info:
	active_anon:0 inactive_anon:0 isolated_anon:0
	 active_file:0 inactive_file:0 isolated_file:0
	 unevictable:0 dirty:0 writeback:0 unstable:0
	 slab_reclaimable:5 slab_unreclaimable:73
	 mapped:0 shmem:0 pagetables:0 bounce:0
	 free:0 free_pcp:0 free_cma:0
	Node 7 DMA free:0kB min:0kB low:0kB high:0kB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:52428800kB managed:110016kB mlocked:0kB dirty:0kB writeback:0kB mapped:0kB shmem:0kB slab_reclaimable:320kB slab_unreclaimable:4672kB kernel_stack:1152kB pagetables:0kB unstable:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? yes
	lowmem_reserve[]: 0 0 0 0
	Node 7 DMA: 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB 0*8192kB 0*16384kB = 0kB
	0 total pagecache pages
	0 pages in swap cache
	Swap cache stats: add 0, delete 0, find 0/0
	Free swap  = 0kB
	Total swap = 0kB
	819200 pages RAM
	0 pages HighMem/MovableOnly
	817481 pages reserved
	0 pages cma reserved
	0 pages hwpoisoned

the reason is that the managed memory is too low (only 110MB) while the
rest of the the 50GB is still waiting for the deferred intialization to
be done.  update_defer_init estimates the initial memoty to initialize
to 2GB at least but it doesn't consider any memory allocated in that
range.  In this particular case we've had

	Reserving 4096MB of memory at 128MB for crashkernel (System RAM: 51200MB)

so the low 2GB is mostly depleted.

Fix this by considering memblock allocations in the initial static
initialization estimation.  Move the max_initialise to
reset_deferred_meminit and implement a simple memblock_reserved_memory
helper which iterates all reserved blocks and sums the size of all that
start below the given address.  The cumulative size is than added on top
of the initial estimation.  This is still not ideal because
reset_deferred_meminit doesn't consider holes and so reservation might
be above the initial estimation whihch we ignore but let's make the
logic simpler until we really need to handle more complicated cases.

Fixes: 3a80a7fa7989 ("mm: meminit: initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set")
Link: http://lkml.kernel.org/r/20170531104010.GI27783@dhcp22.suse.cz
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Tested-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: <stable@vger.kernel.org>	[4.2+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 9a291a7
Raw File
test_user_copy.c
/*
 * Kernel module for testing copy_to/from_user infrastructure.
 *
 * Copyright 2013 Google Inc. All Rights Reserved
 *
 * Authors:
 *      Kees Cook       <keescook@chromium.org>
 *
 * This software is licensed under the terms of the GNU General Public
 * License version 2, as published by the Free Software Foundation, and
 * may be copied, distributed, and modified under those terms.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/mman.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>

/*
 * Several 32-bit architectures support 64-bit {get,put}_user() calls.
 * As there doesn't appear to be anything that can safely determine
 * their capability at compile-time, we just have to opt-out certain archs.
 */
#if BITS_PER_LONG == 64 || (!(defined(CONFIG_ARM) && !defined(MMU)) && \
			    !defined(CONFIG_BLACKFIN) &&	\
			    !defined(CONFIG_M32R) &&		\
			    !defined(CONFIG_M68K) &&		\
			    !defined(CONFIG_MICROBLAZE) &&	\
			    !defined(CONFIG_MN10300) &&		\
			    !defined(CONFIG_NIOS2) &&		\
			    !defined(CONFIG_PPC32) &&		\
			    !defined(CONFIG_SUPERH))
# define TEST_U64
#endif

#define test(condition, msg)		\
({					\
	int cond = (condition);		\
	if (cond)			\
		pr_warn("%s\n", msg);	\
	cond;				\
})

static int __init test_user_copy_init(void)
{
	int ret = 0;
	char *kmem;
	char __user *usermem;
	char *bad_usermem;
	unsigned long user_addr;
	u8 val_u8;
	u16 val_u16;
	u32 val_u32;
#ifdef TEST_U64
	u64 val_u64;
#endif

	kmem = kmalloc(PAGE_SIZE * 2, GFP_KERNEL);
	if (!kmem)
		return -ENOMEM;

	user_addr = vm_mmap(NULL, 0, PAGE_SIZE * 2,
			    PROT_READ | PROT_WRITE | PROT_EXEC,
			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
	if (user_addr >= (unsigned long)(TASK_SIZE)) {
		pr_warn("Failed to allocate user memory\n");
		kfree(kmem);
		return -ENOMEM;
	}

	usermem = (char __user *)user_addr;
	bad_usermem = (char *)user_addr;

	/*
	 * Legitimate usage: none of these copies should fail.
	 */
	memset(kmem, 0x3a, PAGE_SIZE * 2);
	ret |= test(copy_to_user(usermem, kmem, PAGE_SIZE),
		    "legitimate copy_to_user failed");
	memset(kmem, 0x0, PAGE_SIZE);
	ret |= test(copy_from_user(kmem, usermem, PAGE_SIZE),
		    "legitimate copy_from_user failed");
	ret |= test(memcmp(kmem, kmem + PAGE_SIZE, PAGE_SIZE),
		    "legitimate usercopy failed to copy data");

#define test_legit(size, check)						  \
	do {								  \
		val_##size = check;					  \
		ret |= test(put_user(val_##size, (size __user *)usermem), \
		    "legitimate put_user (" #size ") failed");		  \
		val_##size = 0;						  \
		ret |= test(get_user(val_##size, (size __user *)usermem), \
		    "legitimate get_user (" #size ") failed");		  \
		ret |= test(val_##size != check,			  \
		    "legitimate get_user (" #size ") failed to do copy"); \
		if (val_##size != check) {				  \
			pr_info("0x%llx != 0x%llx\n",			  \
				(unsigned long long)val_##size,		  \
				(unsigned long long)check);		  \
		}							  \
	} while (0)

	test_legit(u8,  0x5a);
	test_legit(u16, 0x5a5b);
	test_legit(u32, 0x5a5b5c5d);
#ifdef TEST_U64
	test_legit(u64, 0x5a5b5c5d6a6b6c6d);
#endif
#undef test_legit

	/*
	 * Invalid usage: none of these copies should succeed.
	 */

	/* Prepare kernel memory with check values. */
	memset(kmem, 0x5a, PAGE_SIZE);
	memset(kmem + PAGE_SIZE, 0, PAGE_SIZE);

	/* Reject kernel-to-kernel copies through copy_from_user(). */
	ret |= test(!copy_from_user(kmem, (char __user *)(kmem + PAGE_SIZE),
				    PAGE_SIZE),
		    "illegal all-kernel copy_from_user passed");

	/* Destination half of buffer should have been zeroed. */
	ret |= test(memcmp(kmem + PAGE_SIZE, kmem, PAGE_SIZE),
		    "zeroing failure for illegal all-kernel copy_from_user");

#if 0
	/*
	 * When running with SMAP/PAN/etc, this will Oops the kernel
	 * due to the zeroing of userspace memory on failure. This needs
	 * to be tested in LKDTM instead, since this test module does not
	 * expect to explode.
	 */
	ret |= test(!copy_from_user(bad_usermem, (char __user *)kmem,
				    PAGE_SIZE),
		    "illegal reversed copy_from_user passed");
#endif
	ret |= test(!copy_to_user((char __user *)kmem, kmem + PAGE_SIZE,
				  PAGE_SIZE),
		    "illegal all-kernel copy_to_user passed");
	ret |= test(!copy_to_user((char __user *)kmem, bad_usermem,
				  PAGE_SIZE),
		    "illegal reversed copy_to_user passed");

#define test_illegal(size, check)					    \
	do {								    \
		val_##size = (check);					    \
		ret |= test(!get_user(val_##size, (size __user *)kmem),	    \
		    "illegal get_user (" #size ") passed");		    \
		ret |= test(val_##size != (size)0,			    \
		    "zeroing failure for illegal get_user (" #size ")");    \
		if (val_##size != (size)0) {				    \
			pr_info("0x%llx != 0\n",			    \
				(unsigned long long)val_##size);	    \
		}							    \
		ret |= test(!put_user(val_##size, (size __user *)kmem),	    \
		    "illegal put_user (" #size ") passed");		    \
	} while (0)

	test_illegal(u8,  0x5a);
	test_illegal(u16, 0x5a5b);
	test_illegal(u32, 0x5a5b5c5d);
#ifdef TEST_U64
	test_illegal(u64, 0x5a5b5c5d6a6b6c6d);
#endif
#undef test_illegal

	vm_munmap(user_addr, PAGE_SIZE * 2);
	kfree(kmem);

	if (ret == 0) {
		pr_info("tests passed.\n");
		return 0;
	}

	return -EINVAL;
}

module_init(test_user_copy_init);

static void __exit test_user_copy_exit(void)
{
	pr_info("unloaded.\n");
}

module_exit(test_user_copy_exit);

MODULE_AUTHOR("Kees Cook <keescook@chromium.org>");
MODULE_LICENSE("GPL");
back to top