Revision 9a291a7c9428155e8e623e4a3989f8be47134df5 authored by James Morse on 02 June 2017, 21:46:46 UTC, committed by Linus Torvalds on 02 June 2017, 22:07:38 UTC
KVM uses get_user_pages() to resolve its stage2 faults.  KVM sets the
FOLL_HWPOISON flag causing faultin_page() to return -EHWPOISON when it
finds a VM_FAULT_HWPOISON.  KVM handles these hwpoison pages as a
special case.  (check_user_page_hwpoison())

When huge pages are involved, this doesn't work so well.
get_user_pages() calls follow_hugetlb_page(), which stops early if it
receives VM_FAULT_HWPOISON from hugetlb_fault(), eventually returning
-EFAULT to the caller.  The step to map this to -EHWPOISON based on the
FOLL_ flags is missing.  The hwpoison special case is skipped, and
-EFAULT is returned to user-space, causing Qemu or kvmtool to exit.

Instead, move this VM_FAULT_ to errno mapping code into a header file
and use it from faultin_page() and follow_hugetlb_page().

With this, KVM works as expected.

This isn't a problem for arm64 today as we haven't enabled
MEMORY_FAILURE, but I can't see any reason this doesn't happen on x86
too, so I think this should be a fix.  This doesn't apply earlier than
stable's v4.11.1 due to all sorts of cleanup.

[james.morse@arm.com: add vm_fault_to_errno() call to faultin_page()]
suggested.
  Link: http://lkml.kernel.org/r/20170525171035.16359-1-james.morse@arm.com
[akpm@linux-foundation.org: coding-style fixes]
Link: http://lkml.kernel.org/r/20170524160900.28786-1-james.morse@arm.com
Signed-off-by: James Morse <james.morse@arm.com>
Acked-by: Punit Agrawal <punit.agrawal@arm.com>
Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: <stable@vger.kernel.org>	[4.11.1+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 70feee0
Raw File
symlink.c
/*
 *  linux/fs/nfs/symlink.c
 *
 *  Copyright (C) 1992  Rick Sladkey
 *
 *  Optimization changes Copyright (C) 1994 Florian La Roche
 *
 *  Jun 7 1999, cache symlink lookups in the page cache.  -DaveM
 *
 *  nfs symlink handling code
 */

#include <linux/time.h>
#include <linux/errno.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs.h>
#include <linux/nfs2.h>
#include <linux/nfs_fs.h>
#include <linux/pagemap.h>
#include <linux/stat.h>
#include <linux/mm.h>
#include <linux/string.h>

/* Symlink caching in the page cache is even more simplistic
 * and straight-forward than readdir caching.
 */

static int nfs_symlink_filler(struct inode *inode, struct page *page)
{
	int error;

	error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE);
	if (error < 0)
		goto error;
	SetPageUptodate(page);
	unlock_page(page);
	return 0;

error:
	SetPageError(page);
	unlock_page(page);
	return -EIO;
}

static const char *nfs_get_link(struct dentry *dentry,
				struct inode *inode,
				struct delayed_call *done)
{
	struct page *page;
	void *err;

	if (!dentry) {
		err = ERR_PTR(nfs_revalidate_mapping_rcu(inode));
		if (err)
			return err;
		page = find_get_page(inode->i_mapping, 0);
		if (!page)
			return ERR_PTR(-ECHILD);
		if (!PageUptodate(page)) {
			put_page(page);
			return ERR_PTR(-ECHILD);
		}
	} else {
		err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
		if (err)
			return err;
		page = read_cache_page(&inode->i_data, 0,
					(filler_t *)nfs_symlink_filler, inode);
		if (IS_ERR(page))
			return ERR_CAST(page);
	}
	set_delayed_call(done, page_put_link, page);
	return page_address(page);
}

/*
 * symlinks can't do much...
 */
const struct inode_operations nfs_symlink_inode_operations = {
	.get_link	= nfs_get_link,
	.getattr	= nfs_getattr,
	.setattr	= nfs_setattr,
};
back to top