Revision 9e368259ad988356c4c95150fafd1a06af095d98 authored by Andrea Arcangeli on 30 November 2018, 22:09:25 UTC, committed by Linus Torvalds on 30 November 2018, 22:56:14 UTC
Patch series "userfaultfd shmem updates".

Jann found two bugs in the userfaultfd shmem MAP_SHARED backend: the
lack of the VM_MAYWRITE check and the lack of i_size checks.

Then looking into the above we also fixed the MAP_PRIVATE case.

Hugh by source review also found a data loss source if UFFDIO_COPY is
used on shmem MAP_SHARED PROT_READ mappings (the production usages
incidentally run with PROT_READ|PROT_WRITE, so the data loss couldn't
happen in those production usages like with QEMU).

The whole patchset is marked for stable.

We verified QEMU postcopy live migration with guest running on shmem
MAP_PRIVATE run as well as before after the fix of shmem MAP_PRIVATE.
Regardless if it's shmem or hugetlbfs or MAP_PRIVATE or MAP_SHARED, QEMU
unconditionally invokes a punch hole if the guest mapping is filebacked
and a MADV_DONTNEED too (needed to get rid of the MAP_PRIVATE COWs and
for the anon backend).

This patch (of 5):

We internally used EFAULT to communicate with the caller, switch to
ENOENT, so EFAULT can be used as a non internal retval.

Link: http://lkml.kernel.org/r/20181126173452.26955-2-aarcange@redhat.com
Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support")
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Hugh Dickins <hughd@google.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Jann Horn <jannh@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: <stable@vger.kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 5618cf0
Raw File
cache.h
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Request reply cache. This was heavily inspired by the
 * implementation in 4.3BSD/4.4BSD.
 *
 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
 */

#ifndef NFSCACHE_H
#define NFSCACHE_H

#include <linux/sunrpc/svc.h>

/*
 * Representation of a reply cache entry.
 *
 * Note that we use a sockaddr_in6 to hold the address instead of the more
 * typical sockaddr_storage. This is for space reasons, since sockaddr_storage
 * is much larger than a sockaddr_in6.
 */
struct svc_cacherep {
	struct {
		/* Keep often-read xid, csum in the same cache line: */
		__be32			k_xid;
		__wsum			k_csum;
		u32			k_proc;
		u32			k_prot;
		u32			k_vers;
		unsigned int		k_len;
		struct sockaddr_in6	k_addr;
	} c_key;

	struct rb_node		c_node;
	struct list_head	c_lru;
	unsigned char		c_state,	/* unused, inprog, done */
				c_type,		/* status, buffer */
				c_secure : 1;	/* req came from port < 1024 */
	unsigned long		c_timestamp;
	union {
		struct kvec	u_vec;
		__be32		u_status;
	}			c_u;
};

#define c_replvec		c_u.u_vec
#define c_replstat		c_u.u_status

/* cache entry states */
enum {
	RC_UNUSED,
	RC_INPROG,
	RC_DONE
};

/* return values */
enum {
	RC_DROPIT,
	RC_REPLY,
	RC_DOIT
};

/*
 * Cache types.
 * We may want to add more types one day, e.g. for diropres and
 * attrstat replies. Using cache entries with fixed length instead
 * of buffer pointers may be more efficient.
 */
enum {
	RC_NOCACHE,
	RC_REPLSTAT,
	RC_REPLBUFF,
};

/* Cache entries expire after this time period */
#define RC_EXPIRE		(120 * HZ)

/* Checksum this amount of the request */
#define RC_CSUMLEN		(256U)

int	nfsd_reply_cache_init(void);
void	nfsd_reply_cache_shutdown(void);
int	nfsd_cache_lookup(struct svc_rqst *);
void	nfsd_cache_update(struct svc_rqst *, int, __be32 *);
int	nfsd_reply_cache_stats_open(struct inode *, struct file *);

#endif /* NFSCACHE_H */
back to top