Revision 7b6efc2bc4f19952b25ebf9b236e5ac43cd386c2 authored by Andrea Arcangeli on 01 November 2011, 00:08:26 UTC, committed by Linus Torvalds on 01 November 2011, 00:30:48 UTC
This replaces ptep_clear_flush() with ptep_get_and_clear() and a single
flush_tlb_range() at the end of the loop, to avoid sending one IPI for
each page.

The mmu_notifier_invalidate_range_start/end section is enlarged
accordingly but this is not going to fundamentally change things.  It was
more by accident that the region under mremap was for the most part still
available for secondary MMUs: the primary MMU was never allowed to
reliably access that region for the duration of the mremap (modulo
trapping SIGSEGV on the old address range which sounds unpractical and
flakey).  If users wants secondary MMUs not to lose access to a large
region under mremap they should reduce the mremap size accordingly in
userland and run multiple calls.  Overall this will run faster so it's
actually going to reduce the time the region is under mremap for the
primary MMU which should provide a net benefit to apps.

For KVM this is a noop because the guest physical memory is never
mremapped, there's just no point it ever moving it while guest runs.  One
target of this optimization is JVM GC (so unrelated to the mmu notifier
logic).

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Johannes Weiner <jweiner@redhat.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent ebed484
Raw File
syscall.c
#include <linux/ptrace.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <asm/syscall.h>

static int collect_syscall(struct task_struct *target, long *callno,
			   unsigned long args[6], unsigned int maxargs,
			   unsigned long *sp, unsigned long *pc)
{
	struct pt_regs *regs = task_pt_regs(target);
	if (unlikely(!regs))
		return -EAGAIN;

	*sp = user_stack_pointer(regs);
	*pc = instruction_pointer(regs);

	*callno = syscall_get_nr(target, regs);
	if (*callno != -1L && maxargs > 0)
		syscall_get_arguments(target, regs, 0, maxargs, args);

	return 0;
}

/**
 * task_current_syscall - Discover what a blocked task is doing.
 * @target:		thread to examine
 * @callno:		filled with system call number or -1
 * @args:		filled with @maxargs system call arguments
 * @maxargs:		number of elements in @args to fill
 * @sp:			filled with user stack pointer
 * @pc:			filled with user PC
 *
 * If @target is blocked in a system call, returns zero with *@callno
 * set to the the call's number and @args filled in with its arguments.
 * Registers not used for system call arguments may not be available and
 * it is not kosher to use &struct user_regset calls while the system
 * call is still in progress.  Note we may get this result if @target
 * has finished its system call but not yet returned to user mode, such
 * as when it's stopped for signal handling or syscall exit tracing.
 *
 * If @target is blocked in the kernel during a fault or exception,
 * returns zero with *@callno set to -1 and does not fill in @args.
 * If so, it's now safe to examine @target using &struct user_regset
 * get() calls as long as we're sure @target won't return to user mode.
 *
 * Returns -%EAGAIN if @target does not remain blocked.
 *
 * Returns -%EINVAL if @maxargs is too large (maximum is six).
 */
int task_current_syscall(struct task_struct *target, long *callno,
			 unsigned long args[6], unsigned int maxargs,
			 unsigned long *sp, unsigned long *pc)
{
	long state;
	unsigned long ncsw;

	if (unlikely(maxargs > 6))
		return -EINVAL;

	if (target == current)
		return collect_syscall(target, callno, args, maxargs, sp, pc);

	state = target->state;
	if (unlikely(!state))
		return -EAGAIN;

	ncsw = wait_task_inactive(target, state);
	if (unlikely(!ncsw) ||
	    unlikely(collect_syscall(target, callno, args, maxargs, sp, pc)) ||
	    unlikely(wait_task_inactive(target, state) != ncsw))
		return -EAGAIN;

	return 0;
}
EXPORT_SYMBOL_GPL(task_current_syscall);
back to top