Revision f6ba488073fe8159851fe398cc3c5ee383bb4c7a authored by Vladimir Davydov on 18 August 2017, 22:16:08 UTC, committed by Linus Torvalds on 18 August 2017, 22:32:01 UTC
To avoid a possible deadlock, sysfs_slab_remove() schedules an
asynchronous work to delete sysfs entries corresponding to the kmem
cache.  To ensure the cache isn't freed before the work function is
called, it takes a reference to the cache kobject.  The reference is
supposed to be released by the work function.

However, the work function (sysfs_slab_remove_workfn()) does nothing in
case the cache sysfs entry has already been deleted, leaking the kobject
and the corresponding cache.

This may happen on a per memcg cache destruction, because sysfs entries
of a per memcg cache are deleted on memcg offline if the cache is empty
(see __kmemcg_cache_deactivate()).

The kmemleak report looks like this:

  unreferenced object 0xffff9f798a79f540 (size 32):
    comm "kworker/1:4", pid 15416, jiffies 4307432429 (age 28687.554s)
    hex dump (first 32 bytes):
      6b 6d 61 6c 6c 6f 63 2d 31 36 28 31 35 39 39 3a  kmalloc-16(1599:
      6e 65 77 72 6f 6f 74 29 00 23 6b c0 ff ff ff ff  newroot).#k.....
    backtrace:
       kmemleak_alloc+0x4a/0xa0
       __kmalloc_track_caller+0x148/0x2c0
       kvasprintf+0x66/0xd0
       kasprintf+0x49/0x70
       memcg_create_kmem_cache+0xe6/0x160
       memcg_kmem_cache_create_func+0x20/0x110
       process_one_work+0x205/0x5d0
       worker_thread+0x4e/0x3a0
       kthread+0x109/0x140
       ret_from_fork+0x2a/0x40
  unreferenced object 0xffff9f79b6136840 (size 416):
    comm "kworker/1:4", pid 15416, jiffies 4307432429 (age 28687.573s)
    hex dump (first 32 bytes):
      40 fb 80 c2 3e 33 00 00 00 00 00 40 00 00 00 00  @...>3.....@....
      00 00 00 00 00 00 00 00 10 00 00 00 10 00 00 00  ................
    backtrace:
       kmemleak_alloc+0x4a/0xa0
       kmem_cache_alloc+0x128/0x280
       create_cache+0x3b/0x1e0
       memcg_create_kmem_cache+0x118/0x160
       memcg_kmem_cache_create_func+0x20/0x110
       process_one_work+0x205/0x5d0
       worker_thread+0x4e/0x3a0
       kthread+0x109/0x140
       ret_from_fork+0x2a/0x40

Fix the leak by adding the missing call to kobject_put() to
sysfs_slab_remove_workfn().

Link: http://lkml.kernel.org/r/20170812181134.25027-1-vdavydov.dev@gmail.com
Fixes: 3b7b314053d02 ("slub: make sysfs file removal asynchronous")
Signed-off-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Reported-by: Andrei Vagin <avagin@gmail.com>
Tested-by: Andrei Vagin <avagin@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: <stable@vger.kernel.org>	[4.12.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 3010f87
Raw File
Kconfig.kmemcheck
config HAVE_ARCH_KMEMCHECK
	bool

if HAVE_ARCH_KMEMCHECK

menuconfig KMEMCHECK
	bool "kmemcheck: trap use of uninitialized memory"
	depends on DEBUG_KERNEL
	depends on !X86_USE_3DNOW
	depends on SLUB || SLAB
	depends on !CC_OPTIMIZE_FOR_SIZE
	depends on !FUNCTION_TRACER
	select FRAME_POINTER
	select STACKTRACE
	default n
	help
	  This option enables tracing of dynamically allocated kernel memory
	  to see if memory is used before it has been given an initial value.
	  Be aware that this requires half of your memory for bookkeeping and
	  will insert extra code at *every* read and write to tracked memory
	  thus slow down the kernel code (but user code is unaffected).

	  The kernel may be started with kmemcheck=0 or kmemcheck=1 to disable
	  or enable kmemcheck at boot-time. If the kernel is started with
	  kmemcheck=0, the large memory and CPU overhead is not incurred.

choice
	prompt "kmemcheck: default mode at boot"
	depends on KMEMCHECK
	default KMEMCHECK_ONESHOT_BY_DEFAULT
	help
	  This option controls the default behaviour of kmemcheck when the
	  kernel boots and no kmemcheck= parameter is given.

config KMEMCHECK_DISABLED_BY_DEFAULT
	bool "disabled"
	depends on KMEMCHECK

config KMEMCHECK_ENABLED_BY_DEFAULT
	bool "enabled"
	depends on KMEMCHECK

config KMEMCHECK_ONESHOT_BY_DEFAULT
	bool "one-shot"
	depends on KMEMCHECK
	help
	  In one-shot mode, only the first error detected is reported before
	  kmemcheck is disabled.

endchoice

config KMEMCHECK_QUEUE_SIZE
	int "kmemcheck: error queue size"
	depends on KMEMCHECK
	default 64
	help
	  Select the maximum number of errors to store in the queue. Since
	  errors can occur virtually anywhere and in any context, we need a
	  temporary storage area which is guarantueed not to generate any
	  other faults. The queue will be emptied as soon as a tasklet may
	  be scheduled. If the queue is full, new error reports will be
	  lost.

config KMEMCHECK_SHADOW_COPY_SHIFT
	int "kmemcheck: shadow copy size (5 => 32 bytes, 6 => 64 bytes)"
	depends on KMEMCHECK
	range 2 8
	default 5
	help
	  Select the number of shadow bytes to save along with each entry of
	  the queue. These bytes indicate what parts of an allocation are
	  initialized, uninitialized, etc. and will be displayed when an
	  error is detected to help the debugging of a particular problem.

config KMEMCHECK_PARTIAL_OK
	bool "kmemcheck: allow partially uninitialized memory"
	depends on KMEMCHECK
	default y
	help
	  This option works around certain GCC optimizations that produce
	  32-bit reads from 16-bit variables where the upper 16 bits are
	  thrown away afterwards. This may of course also hide some real
	  bugs.

config KMEMCHECK_BITOPS_OK
	bool "kmemcheck: allow bit-field manipulation"
	depends on KMEMCHECK
	default n
	help
	  This option silences warnings that would be generated for bit-field
	  accesses where not all the bits are initialized at the same time.
	  This may also hide some real bugs.

endif
back to top