https://github.com/torvalds/linux
Revision 5f32b265400de723ab0db23101a75ac073bdd980 authored by Jérôme Glisse on 31 August 2017, 21:17:38 UTC, committed by Linus Torvalds on 31 August 2017, 23:13:00 UTC
The invalidate_page callback suffered from two pitfalls.  First it used
to happen after the page table lock was release and thus a new page
might have setup before the call to invalidate_page() happened.

This is in a weird way fixed by commit c7ab0d2fdc84 ("mm: convert
try_to_unmap_one() to use page_vma_mapped_walk()") that moved the
callback under the page table lock but this also broke several existing
users of the mmu_notifier API that assumed they could sleep inside this
callback.

The second pitfall was invalidate_page() being the only callback not
taking a range of address in respect to invalidation but was giving an
address and a page.  Lots of the callback implementers assumed this
could never be THP and thus failed to invalidate the appropriate range
for THP.

By killing this callback we unify the mmu_notifier callback API to
always take a virtual address range as input.

Finally this also simplifies the end user life as there is now two clear
choices:
  - invalidate_range_start()/end() callback (which allow you to sleep)
  - invalidate_range() where you can not sleep but happen right after
    page table update under page table lock

Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Cc: Bernhard Held <berny156@gmx.de>
Cc: Adam Borowski <kilobyte@angband.pl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Wanpeng Li <kernellwp@gmail.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: axie <axie@amd.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent fb1522e
Raw File
Tip revision: 5f32b265400de723ab0db23101a75ac073bdd980 authored by Jérôme Glisse on 31 August 2017, 21:17:38 UTC
mm/mmu_notifier: kill invalidate_page
Tip revision: 5f32b26
hwpoison-inject.c
/* Inject a hwpoison memory failure on a arbitrary pfn */
#include <linux/module.h>
#include <linux/debugfs.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/pagemap.h>
#include <linux/hugetlb.h>
#include "internal.h"

static struct dentry *hwpoison_dir;

static int hwpoison_inject(void *data, u64 val)
{
	unsigned long pfn = val;
	struct page *p;
	struct page *hpage;
	int err;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	if (!pfn_valid(pfn))
		return -ENXIO;

	p = pfn_to_page(pfn);
	hpage = compound_head(p);
	/*
	 * This implies unable to support free buddy pages.
	 */
	if (!get_hwpoison_page(p))
		return 0;

	if (!hwpoison_filter_enable)
		goto inject;

	shake_page(hpage, 0);
	/*
	 * This implies unable to support non-LRU pages.
	 */
	if (!PageLRU(hpage) && !PageHuge(p))
		goto put_out;

	/*
	 * do a racy check with elevated page count, to make sure PG_hwpoison
	 * will only be set for the targeted owner (or on a free page).
	 * memory_failure() will redo the check reliably inside page lock.
	 */
	err = hwpoison_filter(hpage);
	if (err)
		goto put_out;

inject:
	pr_info("Injecting memory failure at pfn %#lx\n", pfn);
	return memory_failure(pfn, 18, MF_COUNT_INCREASED);
put_out:
	put_hwpoison_page(p);
	return 0;
}

static int hwpoison_unpoison(void *data, u64 val)
{
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	return unpoison_memory(val);
}

DEFINE_SIMPLE_ATTRIBUTE(hwpoison_fops, NULL, hwpoison_inject, "%lli\n");
DEFINE_SIMPLE_ATTRIBUTE(unpoison_fops, NULL, hwpoison_unpoison, "%lli\n");

static void pfn_inject_exit(void)
{
	debugfs_remove_recursive(hwpoison_dir);
}

static int pfn_inject_init(void)
{
	struct dentry *dentry;

	hwpoison_dir = debugfs_create_dir("hwpoison", NULL);
	if (hwpoison_dir == NULL)
		return -ENOMEM;

	/*
	 * Note that the below poison/unpoison interfaces do not involve
	 * hardware status change, hence do not require hardware support.
	 * They are mainly for testing hwpoison in software level.
	 */
	dentry = debugfs_create_file("corrupt-pfn", 0200, hwpoison_dir,
					  NULL, &hwpoison_fops);
	if (!dentry)
		goto fail;

	dentry = debugfs_create_file("unpoison-pfn", 0200, hwpoison_dir,
				     NULL, &unpoison_fops);
	if (!dentry)
		goto fail;

	dentry = debugfs_create_u32("corrupt-filter-enable", 0600,
				    hwpoison_dir, &hwpoison_filter_enable);
	if (!dentry)
		goto fail;

	dentry = debugfs_create_u32("corrupt-filter-dev-major", 0600,
				    hwpoison_dir, &hwpoison_filter_dev_major);
	if (!dentry)
		goto fail;

	dentry = debugfs_create_u32("corrupt-filter-dev-minor", 0600,
				    hwpoison_dir, &hwpoison_filter_dev_minor);
	if (!dentry)
		goto fail;

	dentry = debugfs_create_u64("corrupt-filter-flags-mask", 0600,
				    hwpoison_dir, &hwpoison_filter_flags_mask);
	if (!dentry)
		goto fail;

	dentry = debugfs_create_u64("corrupt-filter-flags-value", 0600,
				    hwpoison_dir, &hwpoison_filter_flags_value);
	if (!dentry)
		goto fail;

#ifdef CONFIG_MEMCG
	dentry = debugfs_create_u64("corrupt-filter-memcg", 0600,
				    hwpoison_dir, &hwpoison_filter_memcg);
	if (!dentry)
		goto fail;
#endif

	return 0;
fail:
	pfn_inject_exit();
	return -ENOMEM;
}

module_init(pfn_inject_init);
module_exit(pfn_inject_exit);
MODULE_LICENSE("GPL");
back to top