Revision f491bd71118beba608d39ac2d5f1530e1160cd2e authored by Michael Kerrisk (man-pages) on 11 October 2016, 20:53:22 UTC, committed by Linus Torvalds on 11 October 2016, 22:06:31 UTC
Patch series "pipe: fix limit handling", v2.

When changing a pipe's capacity with fcntl(F_SETPIPE_SZ), various limits
defined by /proc/sys/fs/pipe-* files are checked to see if unprivileged
users are exceeding limits on memory consumption.

While documenting and testing the operation of these limits I noticed
that, as currently implemented, these checks have a number of problems:

(1) When increasing the pipe capacity, the checks against the limits
    in /proc/sys/fs/pipe-user-pages-{soft,hard} are made against
    existing consumption, and exclude the memory required for the
    increased pipe capacity. The new increase in pipe capacity can then
    push the total memory used by the user for pipes (possibly far) over
    a limit. This can also trigger the problem described next.

(2) The limit checks are performed even when the new pipe capacity
    is less than the existing pipe capacity. This can lead to problems
    if a user sets a large pipe capacity, and then the limits are
    lowered, with the result that the user will no longer be able to
    decrease the pipe capacity.

(3) As currently implemented, accounting and checking against the
    limits is done as follows:

    (a) Test whether the user has exceeded the limit.
    (b) Make new pipe buffer allocation.
    (c) Account new allocation against the limits.

    This is racey. Multiple processes may pass point (a) simultaneously,
    and then allocate pipe buffers that are accounted for only in step
    (c).  The race means that the user's pipe buffer allocation could be
    pushed over the limit (by an arbitrary amount, depending on how
    unlucky we were in the race). [Thanks to Vegard Nossum for spotting
    this point, which I had missed.]

This patch series addresses these three problems.

This patch (of 8):

This is a minor preparatory patch.  After subsequent patches,
round_pipe_size() will be called from pipe_set_size(), so place
round_pipe_size() above pipe_set_size().

Link: http://lkml.kernel.org/r/91a91fdb-a959-ba7f-b551-b62477cc98a1@gmail.com
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
Reviewed-by: Vegard Nossum <vegard.nossum@oracle.com>
Cc: Willy Tarreau <w@1wt.eu>
Cc: <socketpair@gmail.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Jens Axboe <axboe@fb.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent fcc2453
Raw File
kmemleak-test.c
/*
 * mm/kmemleak-test.c
 *
 * Copyright (C) 2008 ARM Limited
 * Written by Catalin Marinas <catalin.marinas@arm.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */

#define pr_fmt(fmt) "kmemleak: " fmt

#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/list.h>
#include <linux/percpu.h>
#include <linux/fdtable.h>

#include <linux/kmemleak.h>

struct test_node {
	long header[25];
	struct list_head list;
	long footer[25];
};

static LIST_HEAD(test_list);
static DEFINE_PER_CPU(void *, kmemleak_test_pointer);

/*
 * Some very simple testing. This function needs to be extended for
 * proper testing.
 */
static int __init kmemleak_test_init(void)
{
	struct test_node *elem;
	int i;

	pr_info("Kmemleak testing\n");

	/* make some orphan objects */
	pr_info("kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL));
	pr_info("kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL));
	pr_info("kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL));
	pr_info("kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL));
	pr_info("kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL));
	pr_info("kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL));
	pr_info("kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL));
	pr_info("kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL));
#ifndef CONFIG_MODULES
	pr_info("kmem_cache_alloc(files_cachep) = %p\n",
		kmem_cache_alloc(files_cachep, GFP_KERNEL));
	pr_info("kmem_cache_alloc(files_cachep) = %p\n",
		kmem_cache_alloc(files_cachep, GFP_KERNEL));
#endif
	pr_info("vmalloc(64) = %p\n", vmalloc(64));
	pr_info("vmalloc(64) = %p\n", vmalloc(64));
	pr_info("vmalloc(64) = %p\n", vmalloc(64));
	pr_info("vmalloc(64) = %p\n", vmalloc(64));
	pr_info("vmalloc(64) = %p\n", vmalloc(64));

	/*
	 * Add elements to a list. They should only appear as orphan
	 * after the module is removed.
	 */
	for (i = 0; i < 10; i++) {
		elem = kzalloc(sizeof(*elem), GFP_KERNEL);
		pr_info("kzalloc(sizeof(*elem)) = %p\n", elem);
		if (!elem)
			return -ENOMEM;
		INIT_LIST_HEAD(&elem->list);
		list_add_tail(&elem->list, &test_list);
	}

	for_each_possible_cpu(i) {
		per_cpu(kmemleak_test_pointer, i) = kmalloc(129, GFP_KERNEL);
		pr_info("kmalloc(129) = %p\n",
			per_cpu(kmemleak_test_pointer, i));
	}

	return 0;
}
module_init(kmemleak_test_init);

static void __exit kmemleak_test_exit(void)
{
	struct test_node *elem, *tmp;

	/*
	 * Remove the list elements without actually freeing the
	 * memory.
	 */
	list_for_each_entry_safe(elem, tmp, &test_list, list)
		list_del(&elem->list);
}
module_exit(kmemleak_test_exit);

MODULE_LICENSE("GPL");
back to top