Revision 9e368259ad988356c4c95150fafd1a06af095d98 authored by Andrea Arcangeli on 30 November 2018, 22:09:25 UTC, committed by Linus Torvalds on 30 November 2018, 22:56:14 UTC
Patch series "userfaultfd shmem updates".

Jann found two bugs in the userfaultfd shmem MAP_SHARED backend: the
lack of the VM_MAYWRITE check and the lack of i_size checks.

Then looking into the above we also fixed the MAP_PRIVATE case.

Hugh by source review also found a data loss source if UFFDIO_COPY is
used on shmem MAP_SHARED PROT_READ mappings (the production usages
incidentally run with PROT_READ|PROT_WRITE, so the data loss couldn't
happen in those production usages like with QEMU).

The whole patchset is marked for stable.

We verified QEMU postcopy live migration with guest running on shmem
MAP_PRIVATE run as well as before after the fix of shmem MAP_PRIVATE.
Regardless if it's shmem or hugetlbfs or MAP_PRIVATE or MAP_SHARED, QEMU
unconditionally invokes a punch hole if the guest mapping is filebacked
and a MADV_DONTNEED too (needed to get rid of the MAP_PRIVATE COWs and
for the anon backend).

This patch (of 5):

We internally used EFAULT to communicate with the caller, switch to
ENOENT, so EFAULT can be used as a non internal retval.

Link: http://lkml.kernel.org/r/20181126173452.26955-2-aarcange@redhat.com
Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support")
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Hugh Dickins <hughd@google.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Jann Horn <jannh@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: <stable@vger.kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 5618cf0
Raw File
dcookies.c
/*
 * dcookies.c
 *
 * Copyright 2002 John Levon <levon@movementarian.org>
 *
 * Persistent cookie-path mappings. These are used by
 * profilers to convert a per-task EIP value into something
 * non-transitory that can be processed at a later date.
 * This is done by locking the dentry/vfsmnt pair in the
 * kernel until released by the tasks needing the persistent
 * objects. The tag is simply an unsigned long that refers
 * to the pair and can be looked up from userspace.
 */

#include <linux/syscalls.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/mount.h>
#include <linux/capability.h>
#include <linux/dcache.h>
#include <linux/mm.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/dcookies.h>
#include <linux/mutex.h>
#include <linux/path.h>
#include <linux/compat.h>
#include <linux/uaccess.h>

/* The dcookies are allocated from a kmem_cache and
 * hashed onto a small number of lists. None of the
 * code here is particularly performance critical
 */
struct dcookie_struct {
	struct path path;
	struct list_head hash_list;
};

static LIST_HEAD(dcookie_users);
static DEFINE_MUTEX(dcookie_mutex);
static struct kmem_cache *dcookie_cache __read_mostly;
static struct list_head *dcookie_hashtable __read_mostly;
static size_t hash_size __read_mostly;

static inline int is_live(void)
{
	return !(list_empty(&dcookie_users));
}


/* The dentry is locked, its address will do for the cookie */
static inline unsigned long dcookie_value(struct dcookie_struct * dcs)
{
	return (unsigned long)dcs->path.dentry;
}


static size_t dcookie_hash(unsigned long dcookie)
{
	return (dcookie >> L1_CACHE_SHIFT) & (hash_size - 1);
}


static struct dcookie_struct * find_dcookie(unsigned long dcookie)
{
	struct dcookie_struct *found = NULL;
	struct dcookie_struct * dcs;
	struct list_head * pos;
	struct list_head * list;

	list = dcookie_hashtable + dcookie_hash(dcookie);

	list_for_each(pos, list) {
		dcs = list_entry(pos, struct dcookie_struct, hash_list);
		if (dcookie_value(dcs) == dcookie) {
			found = dcs;
			break;
		}
	}

	return found;
}


static void hash_dcookie(struct dcookie_struct * dcs)
{
	struct list_head * list = dcookie_hashtable + dcookie_hash(dcookie_value(dcs));
	list_add(&dcs->hash_list, list);
}


static struct dcookie_struct *alloc_dcookie(const struct path *path)
{
	struct dcookie_struct *dcs = kmem_cache_alloc(dcookie_cache,
							GFP_KERNEL);
	struct dentry *d;
	if (!dcs)
		return NULL;

	d = path->dentry;
	spin_lock(&d->d_lock);
	d->d_flags |= DCACHE_COOKIE;
	spin_unlock(&d->d_lock);

	dcs->path = *path;
	path_get(path);
	hash_dcookie(dcs);
	return dcs;
}


/* This is the main kernel-side routine that retrieves the cookie
 * value for a dentry/vfsmnt pair.
 */
int get_dcookie(const struct path *path, unsigned long *cookie)
{
	int err = 0;
	struct dcookie_struct * dcs;

	mutex_lock(&dcookie_mutex);

	if (!is_live()) {
		err = -EINVAL;
		goto out;
	}

	if (path->dentry->d_flags & DCACHE_COOKIE) {
		dcs = find_dcookie((unsigned long)path->dentry);
	} else {
		dcs = alloc_dcookie(path);
		if (!dcs) {
			err = -ENOMEM;
			goto out;
		}
	}

	*cookie = dcookie_value(dcs);

out:
	mutex_unlock(&dcookie_mutex);
	return err;
}


/* And here is where the userspace process can look up the cookie value
 * to retrieve the path.
 */
static int do_lookup_dcookie(u64 cookie64, char __user *buf, size_t len)
{
	unsigned long cookie = (unsigned long)cookie64;
	int err = -EINVAL;
	char * kbuf;
	char * path;
	size_t pathlen;
	struct dcookie_struct * dcs;

	/* we could leak path information to users
	 * without dir read permission without this
	 */
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	mutex_lock(&dcookie_mutex);

	if (!is_live()) {
		err = -EINVAL;
		goto out;
	}

	if (!(dcs = find_dcookie(cookie)))
		goto out;

	err = -ENOMEM;
	kbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
	if (!kbuf)
		goto out;

	/* FIXME: (deleted) ? */
	path = d_path(&dcs->path, kbuf, PAGE_SIZE);

	mutex_unlock(&dcookie_mutex);

	if (IS_ERR(path)) {
		err = PTR_ERR(path);
		goto out_free;
	}

	err = -ERANGE;
 
	pathlen = kbuf + PAGE_SIZE - path;
	if (pathlen <= len) {
		err = pathlen;
		if (copy_to_user(buf, path, pathlen))
			err = -EFAULT;
	}

out_free:
	kfree(kbuf);
	return err;
out:
	mutex_unlock(&dcookie_mutex);
	return err;
}

SYSCALL_DEFINE3(lookup_dcookie, u64, cookie64, char __user *, buf, size_t, len)
{
	return do_lookup_dcookie(cookie64, buf, len);
}

#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE4(lookup_dcookie, u32, w0, u32, w1, char __user *, buf, compat_size_t, len)
{
#ifdef __BIG_ENDIAN
	return do_lookup_dcookie(((u64)w0 << 32) | w1, buf, len);
#else
	return do_lookup_dcookie(((u64)w1 << 32) | w0, buf, len);
#endif
}
#endif

static int dcookie_init(void)
{
	struct list_head * d;
	unsigned int i, hash_bits;
	int err = -ENOMEM;

	dcookie_cache = kmem_cache_create("dcookie_cache",
		sizeof(struct dcookie_struct),
		0, 0, NULL);

	if (!dcookie_cache)
		goto out;

	dcookie_hashtable = kmalloc(PAGE_SIZE, GFP_KERNEL);
	if (!dcookie_hashtable)
		goto out_kmem;

	err = 0;

	/*
	 * Find the power-of-two list-heads that can fit into the allocation..
	 * We don't guarantee that "sizeof(struct list_head)" is necessarily
	 * a power-of-two.
	 */
	hash_size = PAGE_SIZE / sizeof(struct list_head);
	hash_bits = 0;
	do {
		hash_bits++;
	} while ((hash_size >> hash_bits) != 0);
	hash_bits--;

	/*
	 * Re-calculate the actual number of entries and the mask
	 * from the number of bits we can fit.
	 */
	hash_size = 1UL << hash_bits;

	/* And initialize the newly allocated array */
	d = dcookie_hashtable;
	i = hash_size;
	do {
		INIT_LIST_HEAD(d);
		d++;
		i--;
	} while (i);

out:
	return err;
out_kmem:
	kmem_cache_destroy(dcookie_cache);
	goto out;
}


static void free_dcookie(struct dcookie_struct * dcs)
{
	struct dentry *d = dcs->path.dentry;

	spin_lock(&d->d_lock);
	d->d_flags &= ~DCACHE_COOKIE;
	spin_unlock(&d->d_lock);

	path_put(&dcs->path);
	kmem_cache_free(dcookie_cache, dcs);
}


static void dcookie_exit(void)
{
	struct list_head * list;
	struct list_head * pos;
	struct list_head * pos2;
	struct dcookie_struct * dcs;
	size_t i;

	for (i = 0; i < hash_size; ++i) {
		list = dcookie_hashtable + i;
		list_for_each_safe(pos, pos2, list) {
			dcs = list_entry(pos, struct dcookie_struct, hash_list);
			list_del(&dcs->hash_list);
			free_dcookie(dcs);
		}
	}

	kfree(dcookie_hashtable);
	kmem_cache_destroy(dcookie_cache);
}


struct dcookie_user {
	struct list_head next;
};
 
struct dcookie_user * dcookie_register(void)
{
	struct dcookie_user * user;

	mutex_lock(&dcookie_mutex);

	user = kmalloc(sizeof(struct dcookie_user), GFP_KERNEL);
	if (!user)
		goto out;

	if (!is_live() && dcookie_init())
		goto out_free;

	list_add(&user->next, &dcookie_users);

out:
	mutex_unlock(&dcookie_mutex);
	return user;
out_free:
	kfree(user);
	user = NULL;
	goto out;
}


void dcookie_unregister(struct dcookie_user * user)
{
	mutex_lock(&dcookie_mutex);

	list_del(&user->next);
	kfree(user);

	if (!is_live())
		dcookie_exit();

	mutex_unlock(&dcookie_mutex);
}

EXPORT_SYMBOL_GPL(dcookie_register);
EXPORT_SYMBOL_GPL(dcookie_unregister);
EXPORT_SYMBOL_GPL(get_dcookie);
back to top