Revision f6ba488073fe8159851fe398cc3c5ee383bb4c7a authored by Vladimir Davydov on 18 August 2017, 22:16:08 UTC, committed by Linus Torvalds on 18 August 2017, 22:32:01 UTC
To avoid a possible deadlock, sysfs_slab_remove() schedules an
asynchronous work to delete sysfs entries corresponding to the kmem
cache.  To ensure the cache isn't freed before the work function is
called, it takes a reference to the cache kobject.  The reference is
supposed to be released by the work function.

However, the work function (sysfs_slab_remove_workfn()) does nothing in
case the cache sysfs entry has already been deleted, leaking the kobject
and the corresponding cache.

This may happen on a per memcg cache destruction, because sysfs entries
of a per memcg cache are deleted on memcg offline if the cache is empty
(see __kmemcg_cache_deactivate()).

The kmemleak report looks like this:

  unreferenced object 0xffff9f798a79f540 (size 32):
    comm "kworker/1:4", pid 15416, jiffies 4307432429 (age 28687.554s)
    hex dump (first 32 bytes):
      6b 6d 61 6c 6c 6f 63 2d 31 36 28 31 35 39 39 3a  kmalloc-16(1599:
      6e 65 77 72 6f 6f 74 29 00 23 6b c0 ff ff ff ff  newroot).#k.....
    backtrace:
       kmemleak_alloc+0x4a/0xa0
       __kmalloc_track_caller+0x148/0x2c0
       kvasprintf+0x66/0xd0
       kasprintf+0x49/0x70
       memcg_create_kmem_cache+0xe6/0x160
       memcg_kmem_cache_create_func+0x20/0x110
       process_one_work+0x205/0x5d0
       worker_thread+0x4e/0x3a0
       kthread+0x109/0x140
       ret_from_fork+0x2a/0x40
  unreferenced object 0xffff9f79b6136840 (size 416):
    comm "kworker/1:4", pid 15416, jiffies 4307432429 (age 28687.573s)
    hex dump (first 32 bytes):
      40 fb 80 c2 3e 33 00 00 00 00 00 40 00 00 00 00  @...>3.....@....
      00 00 00 00 00 00 00 00 10 00 00 00 10 00 00 00  ................
    backtrace:
       kmemleak_alloc+0x4a/0xa0
       kmem_cache_alloc+0x128/0x280
       create_cache+0x3b/0x1e0
       memcg_create_kmem_cache+0x118/0x160
       memcg_kmem_cache_create_func+0x20/0x110
       process_one_work+0x205/0x5d0
       worker_thread+0x4e/0x3a0
       kthread+0x109/0x140
       ret_from_fork+0x2a/0x40

Fix the leak by adding the missing call to kobject_put() to
sysfs_slab_remove_workfn().

Link: http://lkml.kernel.org/r/20170812181134.25027-1-vdavydov.dev@gmail.com
Fixes: 3b7b314053d02 ("slub: make sysfs file removal asynchronous")
Signed-off-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Reported-by: Andrei Vagin <avagin@gmail.com>
Tested-by: Andrei Vagin <avagin@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: <stable@vger.kernel.org>	[4.12.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 3010f87
Raw File
sg_split.c
/*
 * Copyright (C) 2015 Robert Jarzmik <robert.jarzmik@free.fr>
 *
 * Scatterlist splitting helpers.
 *
 * This source code is licensed under the GNU General Public License,
 * Version 2. See the file COPYING for more details.
 */

#include <linux/scatterlist.h>
#include <linux/slab.h>

struct sg_splitter {
	struct scatterlist *in_sg0;
	int nents;
	off_t skip_sg0;
	unsigned int length_last_sg;

	struct scatterlist *out_sg;
};

static int sg_calculate_split(struct scatterlist *in, int nents, int nb_splits,
			      off_t skip, const size_t *sizes,
			      struct sg_splitter *splitters, bool mapped)
{
	int i;
	unsigned int sglen;
	size_t size = sizes[0], len;
	struct sg_splitter *curr = splitters;
	struct scatterlist *sg;

	for (i = 0; i < nb_splits; i++) {
		splitters[i].in_sg0 = NULL;
		splitters[i].nents = 0;
	}

	for_each_sg(in, sg, nents, i) {
		sglen = mapped ? sg_dma_len(sg) : sg->length;
		if (skip > sglen) {
			skip -= sglen;
			continue;
		}

		len = min_t(size_t, size, sglen - skip);
		if (!curr->in_sg0) {
			curr->in_sg0 = sg;
			curr->skip_sg0 = skip;
		}
		size -= len;
		curr->nents++;
		curr->length_last_sg = len;

		while (!size && (skip + len < sglen) && (--nb_splits > 0)) {
			curr++;
			size = *(++sizes);
			skip += len;
			len = min_t(size_t, size, sglen - skip);

			curr->in_sg0 = sg;
			curr->skip_sg0 = skip;
			curr->nents = 1;
			curr->length_last_sg = len;
			size -= len;
		}
		skip = 0;

		if (!size && --nb_splits > 0) {
			curr++;
			size = *(++sizes);
		}

		if (!nb_splits)
			break;
	}

	return (size || !splitters[0].in_sg0) ? -EINVAL : 0;
}

static void sg_split_phys(struct sg_splitter *splitters, const int nb_splits)
{
	int i, j;
	struct scatterlist *in_sg, *out_sg;
	struct sg_splitter *split;

	for (i = 0, split = splitters; i < nb_splits; i++, split++) {
		in_sg = split->in_sg0;
		out_sg = split->out_sg;
		for (j = 0; j < split->nents; j++, out_sg++) {
			*out_sg = *in_sg;
			if (!j) {
				out_sg->offset += split->skip_sg0;
				out_sg->length -= split->skip_sg0;
			} else {
				out_sg->offset = 0;
			}
			sg_dma_address(out_sg) = 0;
			sg_dma_len(out_sg) = 0;
			in_sg = sg_next(in_sg);
		}
		out_sg[-1].length = split->length_last_sg;
		sg_mark_end(out_sg - 1);
	}
}

static void sg_split_mapped(struct sg_splitter *splitters, const int nb_splits)
{
	int i, j;
	struct scatterlist *in_sg, *out_sg;
	struct sg_splitter *split;

	for (i = 0, split = splitters; i < nb_splits; i++, split++) {
		in_sg = split->in_sg0;
		out_sg = split->out_sg;
		for (j = 0; j < split->nents; j++, out_sg++) {
			sg_dma_address(out_sg) = sg_dma_address(in_sg);
			sg_dma_len(out_sg) = sg_dma_len(in_sg);
			if (!j) {
				sg_dma_address(out_sg) += split->skip_sg0;
				sg_dma_len(out_sg) -= split->skip_sg0;
			}
			in_sg = sg_next(in_sg);
		}
		sg_dma_len(--out_sg) = split->length_last_sg;
	}
}

/**
 * sg_split - split a scatterlist into several scatterlists
 * @in: the input sg list
 * @in_mapped_nents: the result of a dma_map_sg(in, ...), or 0 if not mapped.
 * @skip: the number of bytes to skip in the input sg list
 * @nb_splits: the number of desired sg outputs
 * @split_sizes: the respective size of each output sg list in bytes
 * @out: an array where to store the allocated output sg lists
 * @out_mapped_nents: the resulting sg lists mapped number of sg entries. Might
 *                    be NULL if sglist not already mapped (in_mapped_nents = 0)
 * @gfp_mask: the allocation flag
 *
 * This function splits the input sg list into nb_splits sg lists, which are
 * allocated and stored into out.
 * The @in is split into :
 *  - @out[0], which covers bytes [@skip .. @skip + @split_sizes[0] - 1] of @in
 *  - @out[1], which covers bytes [@skip + split_sizes[0] ..
 *                                 @skip + @split_sizes[0] + @split_sizes[1] -1]
 * etc ...
 * It will be the caller's duty to kfree() out array members.
 *
 * Returns 0 upon success, or error code
 */
int sg_split(struct scatterlist *in, const int in_mapped_nents,
	     const off_t skip, const int nb_splits,
	     const size_t *split_sizes,
	     struct scatterlist **out, int *out_mapped_nents,
	     gfp_t gfp_mask)
{
	int i, ret;
	struct sg_splitter *splitters;

	splitters = kcalloc(nb_splits, sizeof(*splitters), gfp_mask);
	if (!splitters)
		return -ENOMEM;

	ret = sg_calculate_split(in, sg_nents(in), nb_splits, skip, split_sizes,
			   splitters, false);
	if (ret < 0)
		goto err;

	ret = -ENOMEM;
	for (i = 0; i < nb_splits; i++) {
		splitters[i].out_sg = kmalloc_array(splitters[i].nents,
						    sizeof(struct scatterlist),
						    gfp_mask);
		if (!splitters[i].out_sg)
			goto err;
	}

	/*
	 * The order of these 3 calls is important and should be kept.
	 */
	sg_split_phys(splitters, nb_splits);
	ret = sg_calculate_split(in, in_mapped_nents, nb_splits, skip,
				 split_sizes, splitters, true);
	if (ret < 0)
		goto err;
	sg_split_mapped(splitters, nb_splits);

	for (i = 0; i < nb_splits; i++) {
		out[i] = splitters[i].out_sg;
		if (out_mapped_nents)
			out_mapped_nents[i] = splitters[i].nents;
	}

	kfree(splitters);
	return 0;

err:
	for (i = 0; i < nb_splits; i++)
		kfree(splitters[i].out_sg);
	kfree(splitters);
	return ret;
}
EXPORT_SYMBOL(sg_split);
back to top