Revision 3ad33b2436b545cbe8b28e53f3710432cad457ab authored by Lee Schermerhorn on 15 November 2007, 00:59:10 UTC, committed by Linus Torvalds on 15 November 2007, 02:45:38 UTC
We hit the BUG_ON() in mm/rmap.c:vma_address() when trying to migrate via
mbind(MPOL_MF_MOVE) a non-anon region that spans multiple vmas.  For
anon-regions, we just fail to migrate any pages beyond the 1st vma in the
range.

This occurs because do_mbind() collects a list of pages to migrate by
calling check_range().  check_range() walks the task's mm, spanning vmas as
necessary, to collect the migratable pages into a list.  Then, do_mbind()
calls migrate_pages() passing the list of pages, a function to allocate new
pages based on vma policy [new_vma_page()], and a pointer to the first vma
of the range.

For each page in the list, new_vma_page() calls page_address_in_vma()
passing the page and the vma [first in range] to obtain the address to get
for alloc_page_vma().  The page address is needed to get interleaving
policy correct.  If the pages in the list come from multiple vmas,
eventually, new_page_address() will pass that page to page_address_in_vma()
with the incorrect vma.  For !PageAnon pages, this will result in a bug
check in rmap.c:vma_address().  For anon pages, vma_address() will just
return EFAULT and fail the migration.

This patch modifies new_vma_page() to check the return value from
page_address_in_vma().  If the return value is EFAULT, new_vma_page()
searchs forward via vm_next for the vma that maps the page--i.e., that does
not return EFAULT.  This assumes that the pages in the list handed to
migrate_pages() is in address order.  This is currently case.  The patch
documents this assumption in a new comment block for new_vma_page().

If new_vma_page() cannot locate the vma mapping the page in a forward
search in the mm, it will pass a NULL vma to alloc_page_vma().  This will
result in the allocation using the task policy, if any, else system default
policy.  This situation is unlikely, but the patch documents this behavior
with a comment.

Note, this patch results in restarting from the first vma in a multi-vma
range each time new_vma_page() is called.  If this is not acceptable, we
can make the vma argument a pointer, both in new_vma_page() and it's caller
unmap_and_move() so that the value held by the loop in migrate_pages()
always passes down the last vma in which a page was found.  This will
require changes to all new_page_t functions passed to migrate_pages().  Is
this necessary?

For this patch to work, we can't bug check in vma_address() for pages
outside the argument vma.  This patch removes the BUG_ON().  All other
callers [besides new_vma_page()] already check the return status.

Tested on x86_64, 4 node NUMA platform.

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent e1a1c99
Raw File
acdebug.h
/******************************************************************************
 *
 * Name: acdebug.h - ACPI/AML debugger
 *
 *****************************************************************************/

/*
 * Copyright (C) 2000 - 2007, R. Byron Moore
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions, and the following disclaimer,
 *    without modification.
 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
 *    substantially similar to the "NO WARRANTY" disclaimer below
 *    ("Disclaimer") and any redistribution must be conditioned upon
 *    including a substantially similar Disclaimer requirement for further
 *    binary redistribution.
 * 3. Neither the names of the above-listed copyright holders nor the names
 *    of any contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * Alternatively, this software may be distributed under the terms of the
 * GNU General Public License ("GPL") version 2 as published by the Free
 * Software Foundation.
 *
 * NO WARRANTY
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGES.
 */

#ifndef __ACDEBUG_H__
#define __ACDEBUG_H__

#define ACPI_DEBUG_BUFFER_SIZE  4196

struct command_info {
	char *name;		/* Command Name */
	u8 min_args;		/* Minimum arguments required */
};

struct argument_info {
	char *name;		/* Argument Name */
};

#define PARAM_LIST(pl)                  pl
#define DBTEST_OUTPUT_LEVEL(lvl)        if (acpi_gbl_db_opt_verbose)
#define VERBOSE_PRINT(fp)               DBTEST_OUTPUT_LEVEL(lvl) {\
			  acpi_os_printf PARAM_LIST(fp);}

#define EX_NO_SINGLE_STEP               1
#define EX_SINGLE_STEP                  2

/*
 * dbxface - external debugger interfaces
 */
acpi_status acpi_db_initialize(void);

void acpi_db_terminate(void);

acpi_status
acpi_db_single_step(struct acpi_walk_state *walk_state,
		    union acpi_parse_object *op, u32 op_type);

/*
 * dbcmds - debug commands and output routines
 */
acpi_status acpi_db_disassemble_method(char *name);

void acpi_db_display_table_info(char *table_arg);

void acpi_db_unload_acpi_table(char *table_arg, char *instance_arg);

void
acpi_db_set_method_breakpoint(char *location,
			      struct acpi_walk_state *walk_state,
			      union acpi_parse_object *op);

void acpi_db_set_method_call_breakpoint(union acpi_parse_object *op);

void acpi_db_get_bus_info(void);

void acpi_db_disassemble_aml(char *statements, union acpi_parse_object *op);

void acpi_db_dump_namespace(char *start_arg, char *depth_arg);

void acpi_db_dump_namespace_by_owner(char *owner_arg, char *depth_arg);

void acpi_db_send_notify(char *name, u32 value);

void acpi_db_set_method_data(char *type_arg, char *index_arg, char *value_arg);

acpi_status
acpi_db_display_objects(char *obj_type_arg, char *display_count_arg);

acpi_status acpi_db_find_name_in_namespace(char *name_arg);

void acpi_db_set_scope(char *name);

acpi_status acpi_db_sleep(char *object_arg);

void acpi_db_find_references(char *object_arg);

void acpi_db_display_locks(void);

void acpi_db_display_resources(char *object_arg);

void acpi_db_display_gpes(void);

void acpi_db_check_integrity(void);

void acpi_db_generate_gpe(char *gpe_arg, char *block_arg);

/*
 * dbdisply - debug display commands
 */
void acpi_db_display_method_info(union acpi_parse_object *op);

void acpi_db_decode_and_display_object(char *target, char *output_type);

void
acpi_db_display_result_object(union acpi_operand_object *obj_desc,
			      struct acpi_walk_state *walk_state);

acpi_status acpi_db_display_all_methods(char *display_count_arg);

void acpi_db_display_arguments(void);

void acpi_db_display_locals(void);

void acpi_db_display_results(void);

void acpi_db_display_calling_tree(void);

void acpi_db_display_object_type(char *object_arg);

void
acpi_db_display_argument_object(union acpi_operand_object *obj_desc,
				struct acpi_walk_state *walk_state);

/*
 * dbexec - debugger control method execution
 */
void acpi_db_execute(char *name, char **args, u32 flags);

void
acpi_db_create_execution_threads(char *num_threads_arg,
				 char *num_loops_arg, char *method_name_arg);

#ifdef ACPI_DBG_TRACK_ALLOCATIONS
u32 acpi_db_get_cache_info(struct acpi_memory_list *cache);
#endif

/*
 * dbfileio - Debugger file I/O commands
 */
acpi_object_type
acpi_db_match_argument(char *user_argument, struct argument_info *arguments);

void acpi_db_close_debug_file(void);

void acpi_db_open_debug_file(char *name);

acpi_status acpi_db_load_acpi_table(char *filename);

acpi_status
acpi_db_get_table_from_file(char *filename, struct acpi_table_header **table);

acpi_status
acpi_db_read_table_from_file(char *filename, struct acpi_table_header **table);

/*
 * dbhistry - debugger HISTORY command
 */
void acpi_db_add_to_history(char *command_line);

void acpi_db_display_history(void);

char *acpi_db_get_from_history(char *command_num_arg);

/*
 * dbinput - user front-end to the AML debugger
 */
acpi_status
acpi_db_command_dispatch(char *input_buffer,
			 struct acpi_walk_state *walk_state,
			 union acpi_parse_object *op);

void ACPI_SYSTEM_XFACE acpi_db_execute_thread(void *context);

/*
 * dbstats - Generation and display of ACPI table statistics
 */
void acpi_db_generate_statistics(union acpi_parse_object *root, u8 is_method);

acpi_status acpi_db_display_statistics(char *type_arg);

/*
 * dbutils - AML debugger utilities
 */
void acpi_db_set_output_destination(u32 where);

void acpi_db_dump_external_object(union acpi_object *obj_desc, u32 level);

void acpi_db_prep_namestring(char *name);

struct acpi_namespace_node *acpi_db_local_ns_lookup(char *name);

void acpi_db_uint32_to_hex_string(u32 value, char *buffer);

#endif				/* __ACDEBUG_H__ */
back to top