Revision 3ad33b2436b545cbe8b28e53f3710432cad457ab authored by Lee Schermerhorn on 15 November 2007, 00:59:10 UTC, committed by Linus Torvalds on 15 November 2007, 02:45:38 UTC
We hit the BUG_ON() in mm/rmap.c:vma_address() when trying to migrate via mbind(MPOL_MF_MOVE) a non-anon region that spans multiple vmas. For anon-regions, we just fail to migrate any pages beyond the 1st vma in the range. This occurs because do_mbind() collects a list of pages to migrate by calling check_range(). check_range() walks the task's mm, spanning vmas as necessary, to collect the migratable pages into a list. Then, do_mbind() calls migrate_pages() passing the list of pages, a function to allocate new pages based on vma policy [new_vma_page()], and a pointer to the first vma of the range. For each page in the list, new_vma_page() calls page_address_in_vma() passing the page and the vma [first in range] to obtain the address to get for alloc_page_vma(). The page address is needed to get interleaving policy correct. If the pages in the list come from multiple vmas, eventually, new_page_address() will pass that page to page_address_in_vma() with the incorrect vma. For !PageAnon pages, this will result in a bug check in rmap.c:vma_address(). For anon pages, vma_address() will just return EFAULT and fail the migration. This patch modifies new_vma_page() to check the return value from page_address_in_vma(). If the return value is EFAULT, new_vma_page() searchs forward via vm_next for the vma that maps the page--i.e., that does not return EFAULT. This assumes that the pages in the list handed to migrate_pages() is in address order. This is currently case. The patch documents this assumption in a new comment block for new_vma_page(). If new_vma_page() cannot locate the vma mapping the page in a forward search in the mm, it will pass a NULL vma to alloc_page_vma(). This will result in the allocation using the task policy, if any, else system default policy. This situation is unlikely, but the patch documents this behavior with a comment. Note, this patch results in restarting from the first vma in a multi-vma range each time new_vma_page() is called. If this is not acceptable, we can make the vma argument a pointer, both in new_vma_page() and it's caller unmap_and_move() so that the value held by the loop in migrate_pages() always passes down the last vma in which a page was found. This will require changes to all new_page_t functions passed to migrate_pages(). Is this necessary? For this patch to work, we can't bug check in vma_address() for pages outside the argument vma. This patch removes the BUG_ON(). All other callers [besides new_vma_page()] already check the return status. Tested on x86_64, 4 node NUMA platform. Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Acked-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent e1a1c99
highmem.h
/*
* highmem.h: virtual kernel memory mappings for high memory
*
* PowerPC version, stolen from the i386 version.
*
* Used in CONFIG_HIGHMEM systems for memory pages which
* are not addressable by direct kernel virtual addresses.
*
* Copyright (C) 1999 Gerhard Wichert, Siemens AG
* Gerhard.Wichert@pdb.siemens.de
*
*
* Redesigned the x86 32-bit VM architecture to deal with
* up to 16 Terrabyte physical memory. With current x86 CPUs
* we now support up to 64 Gigabytes physical RAM.
*
* Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
*/
#ifndef _ASM_HIGHMEM_H
#define _ASM_HIGHMEM_H
#ifdef __KERNEL__
#include <linux/init.h>
#include <linux/interrupt.h>
#include <asm/kmap_types.h>
#include <asm/tlbflush.h>
#include <asm/page.h>
/* undef for production */
#define HIGHMEM_DEBUG 1
extern pte_t *kmap_pte;
extern pgprot_t kmap_prot;
extern pte_t *pkmap_page_table;
/*
* Right now we initialize only a single pte table. It can be extended
* easily, subsequent pte tables have to be allocated in one physical
* chunk of RAM.
*/
#define PKMAP_BASE CONFIG_HIGHMEM_START
#define LAST_PKMAP (1 << PTE_SHIFT)
#define LAST_PKMAP_MASK (LAST_PKMAP-1)
#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT)
#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT))
#define KMAP_FIX_BEGIN (PKMAP_BASE + 0x00400000UL)
extern void *kmap_high(struct page *page);
extern void kunmap_high(struct page *page);
static inline void *kmap(struct page *page)
{
might_sleep();
if (!PageHighMem(page))
return page_address(page);
return kmap_high(page);
}
static inline void kunmap(struct page *page)
{
BUG_ON(in_interrupt());
if (!PageHighMem(page))
return;
kunmap_high(page);
}
/*
* The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap
* gives a more generic (and caching) interface. But kmap_atomic can
* be used in IRQ contexts, so in some (very limited) cases we need
* it.
*/
static inline void *kmap_atomic(struct page *page, enum km_type type)
{
unsigned int idx;
unsigned long vaddr;
/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
pagefault_disable();
if (!PageHighMem(page))
return page_address(page);
idx = type + KM_TYPE_NR*smp_processor_id();
vaddr = KMAP_FIX_BEGIN + idx * PAGE_SIZE;
#ifdef HIGHMEM_DEBUG
BUG_ON(!pte_none(*(kmap_pte+idx)));
#endif
set_pte_at(&init_mm, vaddr, kmap_pte+idx, mk_pte(page, kmap_prot));
flush_tlb_page(NULL, vaddr);
return (void*) vaddr;
}
static inline void kunmap_atomic(void *kvaddr, enum km_type type)
{
#ifdef HIGHMEM_DEBUG
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
unsigned int idx = type + KM_TYPE_NR*smp_processor_id();
if (vaddr < KMAP_FIX_BEGIN) { // FIXME
pagefault_enable();
return;
}
BUG_ON(vaddr != KMAP_FIX_BEGIN + idx * PAGE_SIZE);
/*
* force other mappings to Oops if they'll try to access
* this pte without first remap it
*/
pte_clear(&init_mm, vaddr, kmap_pte+idx);
flush_tlb_page(NULL, vaddr);
#endif
pagefault_enable();
}
static inline struct page *kmap_atomic_to_page(void *ptr)
{
unsigned long idx, vaddr = (unsigned long) ptr;
if (vaddr < KMAP_FIX_BEGIN)
return virt_to_page(ptr);
idx = (vaddr - KMAP_FIX_BEGIN) >> PAGE_SHIFT;
return pte_page(kmap_pte[idx]);
}
#define flush_cache_kmaps() flush_cache_all()
#endif /* __KERNEL__ */
#endif /* _ASM_HIGHMEM_H */
![swh spinner](/static/img/swh-spinner.gif)
Computing file changes ...