Revision 864b9a393dcb5aed09b8fd31b9bbda0fdda99374 authored by Michal Hocko on 02 June 2017, 21:46:49 UTC, committed by Linus Torvalds on 02 June 2017, 22:07:38 UTC
We have seen an early OOM killer invocation on ppc64 systems with
crashkernel=4096M:

	kthreadd invoked oom-killer: gfp_mask=0x16040c0(GFP_KERNEL|__GFP_COMP|__GFP_NOTRACK), nodemask=7, order=0, oom_score_adj=0
	kthreadd cpuset=/ mems_allowed=7
	CPU: 0 PID: 2 Comm: kthreadd Not tainted 4.4.68-1.gd7fe927-default #1
	Call Trace:
	  dump_stack+0xb0/0xf0 (unreliable)
	  dump_header+0xb0/0x258
	  out_of_memory+0x5f0/0x640
	  __alloc_pages_nodemask+0xa8c/0xc80
	  kmem_getpages+0x84/0x1a0
	  fallback_alloc+0x2a4/0x320
	  kmem_cache_alloc_node+0xc0/0x2e0
	  copy_process.isra.25+0x260/0x1b30
	  _do_fork+0x94/0x470
	  kernel_thread+0x48/0x60
	  kthreadd+0x264/0x330
	  ret_from_kernel_thread+0x5c/0xa4

	Mem-Info:
	active_anon:0 inactive_anon:0 isolated_anon:0
	 active_file:0 inactive_file:0 isolated_file:0
	 unevictable:0 dirty:0 writeback:0 unstable:0
	 slab_reclaimable:5 slab_unreclaimable:73
	 mapped:0 shmem:0 pagetables:0 bounce:0
	 free:0 free_pcp:0 free_cma:0
	Node 7 DMA free:0kB min:0kB low:0kB high:0kB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:52428800kB managed:110016kB mlocked:0kB dirty:0kB writeback:0kB mapped:0kB shmem:0kB slab_reclaimable:320kB slab_unreclaimable:4672kB kernel_stack:1152kB pagetables:0kB unstable:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? yes
	lowmem_reserve[]: 0 0 0 0
	Node 7 DMA: 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB 0*8192kB 0*16384kB = 0kB
	0 total pagecache pages
	0 pages in swap cache
	Swap cache stats: add 0, delete 0, find 0/0
	Free swap  = 0kB
	Total swap = 0kB
	819200 pages RAM
	0 pages HighMem/MovableOnly
	817481 pages reserved
	0 pages cma reserved
	0 pages hwpoisoned

the reason is that the managed memory is too low (only 110MB) while the
rest of the the 50GB is still waiting for the deferred intialization to
be done.  update_defer_init estimates the initial memoty to initialize
to 2GB at least but it doesn't consider any memory allocated in that
range.  In this particular case we've had

	Reserving 4096MB of memory at 128MB for crashkernel (System RAM: 51200MB)

so the low 2GB is mostly depleted.

Fix this by considering memblock allocations in the initial static
initialization estimation.  Move the max_initialise to
reset_deferred_meminit and implement a simple memblock_reserved_memory
helper which iterates all reserved blocks and sums the size of all that
start below the given address.  The cumulative size is than added on top
of the initial estimation.  This is still not ideal because
reset_deferred_meminit doesn't consider holes and so reservation might
be above the initial estimation whihch we ignore but let's make the
logic simpler until we really need to handle more complicated cases.

Fixes: 3a80a7fa7989 ("mm: meminit: initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set")
Link: http://lkml.kernel.org/r/20170531104010.GI27783@dhcp22.suse.cz
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Tested-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: <stable@vger.kernel.org>	[4.2+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 9a291a7
Raw File
Kconfig
menuconfig SAMPLES
	bool "Sample kernel code"
	help
	  You can build and test sample kernel code here.

if SAMPLES

config SAMPLE_TRACE_EVENTS
	tristate "Build trace_events examples -- loadable modules only"
	depends on EVENT_TRACING && m
	help
	  This build trace event example modules.

config SAMPLE_TRACE_PRINTK
        tristate "Build trace_printk module - tests various trace_printk formats"
	depends on EVENT_TRACING && m
	help
	 This builds a module that calls trace_printk() and can be used to
	 test various trace_printk() calls from a module.

config SAMPLE_KOBJECT
	tristate "Build kobject examples -- loadable modules only"
	depends on m
	help
	  This config option will allow you to build a number of
	  different kobject sample modules showing how to use kobjects,
	  ksets, and ktypes properly.

	  If in doubt, say "N" here.

config SAMPLE_KPROBES
	tristate "Build kprobes examples -- loadable modules only"
	depends on KPROBES && m
	help
	  This build several kprobes example modules.

config SAMPLE_KRETPROBES
	tristate "Build kretprobes example -- loadable modules only"
	default m
	depends on SAMPLE_KPROBES && KRETPROBES

config SAMPLE_HW_BREAKPOINT
	tristate "Build kernel hardware breakpoint examples -- loadable module only"
	depends on HAVE_HW_BREAKPOINT && m
	help
	  This builds kernel hardware breakpoint example modules.

config SAMPLE_KFIFO
	tristate "Build kfifo examples -- loadable modules only"
	depends on m
	help
	  This config option will allow you to build a number of
	  different kfifo sample modules showing how to use the
	  generic kfifo API.

	  If in doubt, say "N" here.

config SAMPLE_KDB
	tristate "Build kdb command example -- loadable modules only"
	depends on KGDB_KDB && m
	help
	  Build an example of how to dynamically add the hello
	  command to the kdb shell.

config SAMPLE_RPMSG_CLIENT
	tristate "Build rpmsg client sample -- loadable modules only"
	depends on RPMSG && m
	help
	  Build an rpmsg client sample driver, which demonstrates how
	  to communicate with an AMP-configured remote processor over
	  the rpmsg bus.

config SAMPLE_LIVEPATCH
	tristate "Build live patching sample -- loadable modules only"
	depends on LIVEPATCH && m
	help
	  Builds a sample live patch that replaces the procfs handler
	  for /proc/cmdline to print "this has been live patched".

config SAMPLE_CONFIGFS
	tristate "Build configfs patching sample -- loadable modules only"
	depends on CONFIGFS_FS && m
	help
	  Builds a sample configfs interface.

config SAMPLE_CONNECTOR
	tristate "Build connector sample -- loadable modules only"
	depends on CONNECTOR && m
	help
	  When enabled, this builds both a sample kernel module for
	  the connector interface and a user space tool to communicate
	  with it.
	  See also Documentation/connector/connector.txt

config SAMPLE_SECCOMP
	tristate "Build seccomp sample code -- loadable modules only"
	depends on SECCOMP_FILTER && m
	help
	  Build samples of seccomp filters using various methods of
	  BPF filter construction.

config SAMPLE_BLACKFIN_GPTIMERS
	tristate "Build blackfin gptimers sample code -- loadable modules only"
	depends on BLACKFIN && BFIN_GPTIMERS && m
	help
	  Build samples of blackfin gptimers sample module.

config SAMPLE_VFIO_MDEV_MTTY
	tristate "Build VFIO mtty example mediated device sample code -- loadable modules only"
	depends on VFIO_MDEV_DEVICE && m
	help
	  Build a virtual tty sample driver for use as a VFIO
	  mediated device

config SAMPLE_STATX
	bool "Build example extended-stat using code"
	depends on BROKEN
	help
	  Build example userspace program to use the new extended-stat syscall.

endif # SAMPLES
back to top