Revision f6ba488073fe8159851fe398cc3c5ee383bb4c7a authored by Vladimir Davydov on 18 August 2017, 22:16:08 UTC, committed by Linus Torvalds on 18 August 2017, 22:32:01 UTC
To avoid a possible deadlock, sysfs_slab_remove() schedules an
asynchronous work to delete sysfs entries corresponding to the kmem
cache.  To ensure the cache isn't freed before the work function is
called, it takes a reference to the cache kobject.  The reference is
supposed to be released by the work function.

However, the work function (sysfs_slab_remove_workfn()) does nothing in
case the cache sysfs entry has already been deleted, leaking the kobject
and the corresponding cache.

This may happen on a per memcg cache destruction, because sysfs entries
of a per memcg cache are deleted on memcg offline if the cache is empty
(see __kmemcg_cache_deactivate()).

The kmemleak report looks like this:

  unreferenced object 0xffff9f798a79f540 (size 32):
    comm "kworker/1:4", pid 15416, jiffies 4307432429 (age 28687.554s)
    hex dump (first 32 bytes):
      6b 6d 61 6c 6c 6f 63 2d 31 36 28 31 35 39 39 3a  kmalloc-16(1599:
      6e 65 77 72 6f 6f 74 29 00 23 6b c0 ff ff ff ff  newroot).#k.....
    backtrace:
       kmemleak_alloc+0x4a/0xa0
       __kmalloc_track_caller+0x148/0x2c0
       kvasprintf+0x66/0xd0
       kasprintf+0x49/0x70
       memcg_create_kmem_cache+0xe6/0x160
       memcg_kmem_cache_create_func+0x20/0x110
       process_one_work+0x205/0x5d0
       worker_thread+0x4e/0x3a0
       kthread+0x109/0x140
       ret_from_fork+0x2a/0x40
  unreferenced object 0xffff9f79b6136840 (size 416):
    comm "kworker/1:4", pid 15416, jiffies 4307432429 (age 28687.573s)
    hex dump (first 32 bytes):
      40 fb 80 c2 3e 33 00 00 00 00 00 40 00 00 00 00  @...>3.....@....
      00 00 00 00 00 00 00 00 10 00 00 00 10 00 00 00  ................
    backtrace:
       kmemleak_alloc+0x4a/0xa0
       kmem_cache_alloc+0x128/0x280
       create_cache+0x3b/0x1e0
       memcg_create_kmem_cache+0x118/0x160
       memcg_kmem_cache_create_func+0x20/0x110
       process_one_work+0x205/0x5d0
       worker_thread+0x4e/0x3a0
       kthread+0x109/0x140
       ret_from_fork+0x2a/0x40

Fix the leak by adding the missing call to kobject_put() to
sysfs_slab_remove_workfn().

Link: http://lkml.kernel.org/r/20170812181134.25027-1-vdavydov.dev@gmail.com
Fixes: 3b7b314053d02 ("slub: make sysfs file removal asynchronous")
Signed-off-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Reported-by: Andrei Vagin <avagin@gmail.com>
Tested-by: Andrei Vagin <avagin@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: <stable@vger.kernel.org>	[4.12.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 3010f87
Raw File
dma-noop.c
/*
 *	lib/dma-noop.c
 *
 * DMA operations that map to physical addresses without flushing memory.
 */
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/scatterlist.h>
#include <linux/pfn.h>

static void *dma_noop_alloc(struct device *dev, size_t size,
			    dma_addr_t *dma_handle, gfp_t gfp,
			    unsigned long attrs)
{
	void *ret;

	ret = (void *)__get_free_pages(gfp, get_order(size));
	if (ret)
		*dma_handle = virt_to_phys(ret) - PFN_PHYS(dev->dma_pfn_offset);

	return ret;
}

static void dma_noop_free(struct device *dev, size_t size,
			  void *cpu_addr, dma_addr_t dma_addr,
			  unsigned long attrs)
{
	free_pages((unsigned long)cpu_addr, get_order(size));
}

static dma_addr_t dma_noop_map_page(struct device *dev, struct page *page,
				      unsigned long offset, size_t size,
				      enum dma_data_direction dir,
				      unsigned long attrs)
{
	return page_to_phys(page) + offset - PFN_PHYS(dev->dma_pfn_offset);
}

static int dma_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
			     enum dma_data_direction dir,
			     unsigned long attrs)
{
	int i;
	struct scatterlist *sg;

	for_each_sg(sgl, sg, nents, i) {
		dma_addr_t offset = PFN_PHYS(dev->dma_pfn_offset);
		void *va;

		BUG_ON(!sg_page(sg));
		va = sg_virt(sg);
		sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va) - offset;
		sg_dma_len(sg) = sg->length;
	}

	return nents;
}

const struct dma_map_ops dma_noop_ops = {
	.alloc			= dma_noop_alloc,
	.free			= dma_noop_free,
	.map_page		= dma_noop_map_page,
	.map_sg			= dma_noop_map_sg,
};

EXPORT_SYMBOL(dma_noop_ops);
back to top