Revision 0ee931c4e31a5efb134c76440405e9219f896e33 authored by Michal Hocko on 13 September 2017, 23:28:29 UTC, committed by Linus Torvalds on 14 September 2017, 01:53:16 UTC
GFP_TEMPORARY was introduced by commit e12ba74d8ff3 ("Group short-lived
and reclaimable kernel allocations") along with __GFP_RECLAIMABLE.  It's
primary motivation was to allow users to tell that an allocation is
short lived and so the allocator can try to place such allocations close
together and prevent long term fragmentation.  As much as this sounds
like a reasonable semantic it becomes much less clear when to use the
highlevel GFP_TEMPORARY allocation flag.  How long is temporary? Can the
context holding that memory sleep? Can it take locks? It seems there is
no good answer for those questions.

The current implementation of GFP_TEMPORARY is basically GFP_KERNEL |
__GFP_RECLAIMABLE which in itself is tricky because basically none of
the existing caller provide a way to reclaim the allocated memory.  So
this is rather misleading and hard to evaluate for any benefits.

I have checked some random users and none of them has added the flag
with a specific justification.  I suspect most of them just copied from
other existing users and others just thought it might be a good idea to
use without any measuring.  This suggests that GFP_TEMPORARY just
motivates for cargo cult usage without any reasoning.

I believe that our gfp flags are quite complex already and especially
those with highlevel semantic should be clearly defined to prevent from
confusion and abuse.  Therefore I propose dropping GFP_TEMPORARY and
replace all existing users to simply use GFP_KERNEL.  Please note that
SLAB users with shrinkers will still get __GFP_RECLAIMABLE heuristic and
so they will be placed properly for memory fragmentation prevention.

I can see reasons we might want some gfp flag to reflect shorterm
allocations but I propose starting from a clear semantic definition and
only then add users with proper justification.

This was been brought up before LSF this year by Matthew [1] and it
turned out that GFP_TEMPORARY really doesn't have a clear semantic.  It
seems to be a heuristic without any measured advantage for most (if not
all) its current users.  The follow up discussion has revealed that
opinions on what might be temporary allocation differ a lot between
developers.  So rather than trying to tweak existing users into a
semantic which they haven't expected I propose to simply remove the flag
and start from scratch if we really need a semantic for short term
allocations.

[1] http://lkml.kernel.org/r/20170118054945.GD18349@bombadil.infradead.org

[akpm@linux-foundation.org: fix typo]
[akpm@linux-foundation.org: coding-style fixes]
[sfr@canb.auug.org.au: drm/i915: fix up]
  Link: http://lkml.kernel.org/r/20170816144703.378d4f4d@canb.auug.org.au
Link: http://lkml.kernel.org/r/20170728091904.14627-1-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Neil Brown <neilb@suse.de>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent d0dbf77
Raw File
extable.c
/* Rewritten by Rusty Russell, on the backs of many others...
   Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
#include <linux/ftrace.h>
#include <linux/memory.h>
#include <linux/extable.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/init.h>
#include <linux/kprobes.h>
#include <linux/filter.h>

#include <asm/sections.h>
#include <linux/uaccess.h>

/*
 * mutex protecting text section modification (dynamic code patching).
 * some users need to sleep (allocating memory...) while they hold this lock.
 *
 * NOT exported to modules - patching kernel text is a really delicate matter.
 */
DEFINE_MUTEX(text_mutex);

extern struct exception_table_entry __start___ex_table[];
extern struct exception_table_entry __stop___ex_table[];

/* Cleared by build time tools if the table is already sorted. */
u32 __initdata __visible main_extable_sort_needed = 1;

/* Sort the kernel's built-in exception table */
void __init sort_main_extable(void)
{
	if (main_extable_sort_needed && __stop___ex_table > __start___ex_table) {
		pr_notice("Sorting __ex_table...\n");
		sort_extable(__start___ex_table, __stop___ex_table);
	}
}

/* Given an address, look for it in the exception tables. */
const struct exception_table_entry *search_exception_tables(unsigned long addr)
{
	const struct exception_table_entry *e;

	e = search_extable(__start___ex_table,
			   __stop___ex_table - __start___ex_table, addr);
	if (!e)
		e = search_module_extables(addr);
	return e;
}

static inline int init_kernel_text(unsigned long addr)
{
	if (addr >= (unsigned long)_sinittext &&
	    addr < (unsigned long)_einittext)
		return 1;
	return 0;
}

int notrace core_kernel_text(unsigned long addr)
{
	if (addr >= (unsigned long)_stext &&
	    addr < (unsigned long)_etext)
		return 1;

	if (system_state < SYSTEM_RUNNING &&
	    init_kernel_text(addr))
		return 1;
	return 0;
}

/**
 * core_kernel_data - tell if addr points to kernel data
 * @addr: address to test
 *
 * Returns true if @addr passed in is from the core kernel data
 * section.
 *
 * Note: On some archs it may return true for core RODATA, and false
 *  for others. But will always be true for core RW data.
 */
int core_kernel_data(unsigned long addr)
{
	if (addr >= (unsigned long)_sdata &&
	    addr < (unsigned long)_edata)
		return 1;
	return 0;
}

int __kernel_text_address(unsigned long addr)
{
	if (core_kernel_text(addr))
		return 1;
	if (is_module_text_address(addr))
		return 1;
	if (is_ftrace_trampoline(addr))
		return 1;
	if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr))
		return 1;
	if (is_bpf_text_address(addr))
		return 1;
	/*
	 * There might be init symbols in saved stacktraces.
	 * Give those symbols a chance to be printed in
	 * backtraces (such as lockdep traces).
	 *
	 * Since we are after the module-symbols check, there's
	 * no danger of address overlap:
	 */
	if (init_kernel_text(addr))
		return 1;
	return 0;
}

int kernel_text_address(unsigned long addr)
{
	if (core_kernel_text(addr))
		return 1;
	if (is_module_text_address(addr))
		return 1;
	if (is_ftrace_trampoline(addr))
		return 1;
	if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr))
		return 1;
	if (is_bpf_text_address(addr))
		return 1;
	return 0;
}

/*
 * On some architectures (PPC64, IA64) function pointers
 * are actually only tokens to some data that then holds the
 * real function address. As a result, to find if a function
 * pointer is part of the kernel text, we need to do some
 * special dereferencing first.
 */
int func_ptr_is_kernel_text(void *ptr)
{
	unsigned long addr;
	addr = (unsigned long) dereference_function_descriptor(ptr);
	if (core_kernel_text(addr))
		return 1;
	return is_module_text_address(addr);
}
back to top