Revision 3ad33b2436b545cbe8b28e53f3710432cad457ab authored by Lee Schermerhorn on 15 November 2007, 00:59:10 UTC, committed by Linus Torvalds on 15 November 2007, 02:45:38 UTC
We hit the BUG_ON() in mm/rmap.c:vma_address() when trying to migrate via mbind(MPOL_MF_MOVE) a non-anon region that spans multiple vmas. For anon-regions, we just fail to migrate any pages beyond the 1st vma in the range. This occurs because do_mbind() collects a list of pages to migrate by calling check_range(). check_range() walks the task's mm, spanning vmas as necessary, to collect the migratable pages into a list. Then, do_mbind() calls migrate_pages() passing the list of pages, a function to allocate new pages based on vma policy [new_vma_page()], and a pointer to the first vma of the range. For each page in the list, new_vma_page() calls page_address_in_vma() passing the page and the vma [first in range] to obtain the address to get for alloc_page_vma(). The page address is needed to get interleaving policy correct. If the pages in the list come from multiple vmas, eventually, new_page_address() will pass that page to page_address_in_vma() with the incorrect vma. For !PageAnon pages, this will result in a bug check in rmap.c:vma_address(). For anon pages, vma_address() will just return EFAULT and fail the migration. This patch modifies new_vma_page() to check the return value from page_address_in_vma(). If the return value is EFAULT, new_vma_page() searchs forward via vm_next for the vma that maps the page--i.e., that does not return EFAULT. This assumes that the pages in the list handed to migrate_pages() is in address order. This is currently case. The patch documents this assumption in a new comment block for new_vma_page(). If new_vma_page() cannot locate the vma mapping the page in a forward search in the mm, it will pass a NULL vma to alloc_page_vma(). This will result in the allocation using the task policy, if any, else system default policy. This situation is unlikely, but the patch documents this behavior with a comment. Note, this patch results in restarting from the first vma in a multi-vma range each time new_vma_page() is called. If this is not acceptable, we can make the vma argument a pointer, both in new_vma_page() and it's caller unmap_and_move() so that the value held by the loop in migrate_pages() always passes down the last vma in which a page was found. This will require changes to all new_page_t functions passed to migrate_pages(). Is this necessary? For this patch to work, we can't bug check in vma_address() for pages outside the argument vma. This patch removes the BUG_ON(). All other callers [besides new_vma_page()] already check the return status. Tested on x86_64, 4 node NUMA platform. Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Acked-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent e1a1c99
ppcboot.h
/*
* (C) Copyright 2000, 2001
* Wolfgang Denk, DENX Software Engineering, wd@denx.de.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
#ifndef __ASM_PPCBOOT_H__
#define __ASM_PPCBOOT_H__
/*
* Board information passed to kernel from PPCBoot
*
* include/asm-ppc/ppcboot.h
*/
#ifndef __ASSEMBLY__
#include <linux/types.h>
typedef struct bd_info {
unsigned long bi_memstart; /* start of DRAM memory */
unsigned long bi_memsize; /* size of DRAM memory in bytes */
unsigned long bi_flashstart; /* start of FLASH memory */
unsigned long bi_flashsize; /* size of FLASH memory */
unsigned long bi_flashoffset; /* reserved area for startup monitor */
unsigned long bi_sramstart; /* start of SRAM memory */
unsigned long bi_sramsize; /* size of SRAM memory */
#if defined(CONFIG_8xx) || defined(CONFIG_CPM2) || defined(CONFIG_85xx) ||\
defined(CONFIG_83xx)
unsigned long bi_immr_base; /* base of IMMR register */
#endif
#if defined(CONFIG_PPC_MPC52xx)
unsigned long bi_mbar_base; /* base of internal registers */
#endif
unsigned long bi_bootflags; /* boot / reboot flag (for LynxOS) */
unsigned long bi_ip_addr; /* IP Address */
unsigned char bi_enetaddr[6]; /* Ethernet address */
unsigned short bi_ethspeed; /* Ethernet speed in Mbps */
unsigned long bi_intfreq; /* Internal Freq, in MHz */
unsigned long bi_busfreq; /* Bus Freq, in MHz */
#if defined(CONFIG_CPM2)
unsigned long bi_cpmfreq; /* CPM_CLK Freq, in MHz */
unsigned long bi_brgfreq; /* BRG_CLK Freq, in MHz */
unsigned long bi_sccfreq; /* SCC_CLK Freq, in MHz */
unsigned long bi_vco; /* VCO Out from PLL, in MHz */
#endif
#if defined(CONFIG_PPC_MPC52xx)
unsigned long bi_ipbfreq; /* IPB Bus Freq, in MHz */
unsigned long bi_pcifreq; /* PCI Bus Freq, in MHz */
#endif
unsigned long bi_baudrate; /* Console Baudrate */
#if defined(CONFIG_4xx)
unsigned char bi_s_version[4]; /* Version of this structure */
unsigned char bi_r_version[32]; /* Version of the ROM (IBM) */
unsigned int bi_procfreq; /* CPU (Internal) Freq, in Hz */
unsigned int bi_plb_busfreq; /* PLB Bus speed, in Hz */
unsigned int bi_pci_busfreq; /* PCI Bus speed, in Hz */
unsigned char bi_pci_enetaddr[6]; /* PCI Ethernet MAC address */
#endif
#if defined(CONFIG_HYMOD)
hymod_conf_t bi_hymod_conf; /* hymod configuration information */
#endif
#if defined(CONFIG_EVB64260) || defined(CONFIG_405EP) || defined(CONFIG_44x) || \
defined(CONFIG_85xx) || defined(CONFIG_83xx)
/* second onboard ethernet port */
unsigned char bi_enet1addr[6];
#endif
#if defined(CONFIG_EVB64260) || defined(CONFIG_440GX) || defined(CONFIG_85xx)
/* third onboard ethernet ports */
unsigned char bi_enet2addr[6];
#endif
#if defined(CONFIG_440GX)
/* fourth onboard ethernet ports */
unsigned char bi_enet3addr[6];
#endif
#if defined(CONFIG_4xx)
unsigned int bi_opbfreq; /* OB clock in Hz */
int bi_iic_fast[2]; /* Use fast i2c mode */
#endif
#if defined(CONFIG_440GX)
int bi_phynum[4]; /* phy mapping */
int bi_phymode[4]; /* phy mode */
#endif
} bd_t;
#define bi_tbfreq bi_intfreq
#endif /* __ASSEMBLY__ */
#endif /* __ASM_PPCBOOT_H__ */
![swh spinner](/static/img/swh-spinner.gif)
Computing file changes ...