https://github.com/torvalds/linux
Revision 7f284d3cc96e02468a42e045f77af11e5ff8b095 authored by Frank Arnold on 22 April 2010, 14:06:59 UTC, committed by H. Peter Anvin on 14 May 2010, 18:53:01 UTC
When running a quest kernel on xen we get:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000038
IP: [<ffffffff8142f2fb>] cpuid4_cache_lookup_regs+0x2ca/0x3df
PGD 0
Oops: 0000 [#1] SMP
last sysfs file:
CPU 0
Modules linked in:

Pid: 0, comm: swapper Tainted: G        W  2.6.34-rc3 #1 /HVM domU
RIP: 0010:[<ffffffff8142f2fb>]  [<ffffffff8142f2fb>] cpuid4_cache_lookup_regs+0x
2ca/0x3df
RSP: 0018:ffff880002203e08  EFLAGS: 00010046
RAX: 0000000000000000 RBX: 0000000000000003 RCX: 0000000000000060
RDX: 0000000000000000 RSI: 0000000000000040 RDI: 0000000000000000
RBP: ffff880002203ed8 R08: 00000000000017c0 R09: ffff880002203e38
R10: ffff8800023d5d40 R11: ffffffff81a01e28 R12: ffff880187e6f5c0
R13: ffff880002203e34 R14: ffff880002203e58 R15: ffff880002203e68
FS:  0000000000000000(0000) GS:ffff880002200000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000000000000038 CR3: 0000000001a3c000 CR4: 00000000000006f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process swapper (pid: 0, threadinfo ffffffff81a00000, task ffffffff81a44020)
Stack:
 ffffffff810d7ecb ffff880002203e20 ffffffff81059140 ffff880002203e30
<0> ffffffff810d7ec9 0000000002203e40 000000000050d140 ffff880002203e70
<0> 0000000002008140 0000000000000086 ffff880040020140 ffffffff81068b8b
Call Trace:
 <IRQ>
 [<ffffffff810d7ecb>] ? sync_supers_timer_fn+0x0/0x1c
 [<ffffffff81059140>] ? mod_timer+0x23/0x25
 [<ffffffff810d7ec9>] ? arm_supers_timer+0x34/0x36
 [<ffffffff81068b8b>] ? hrtimer_get_next_event+0xa7/0xc3
 [<ffffffff81058e85>] ? get_next_timer_interrupt+0x19a/0x20d
 [<ffffffff8142fa23>] get_cpu_leaves+0x5c/0x232
 [<ffffffff8106a7b1>] ? sched_clock_local+0x1c/0x82
 [<ffffffff8106a9a0>] ? sched_clock_tick+0x75/0x7a
 [<ffffffff8107748c>] generic_smp_call_function_single_interrupt+0xae/0xd0
 [<ffffffff8101f6ef>] smp_call_function_single_interrupt+0x18/0x27
 [<ffffffff8100a773>] call_function_single_interrupt+0x13/0x20
 <EOI>
 [<ffffffff8143c468>] ? notifier_call_chain+0x14/0x63
 [<ffffffff810295c6>] ? native_safe_halt+0xc/0xd
 [<ffffffff810114eb>] ? default_idle+0x36/0x53
 [<ffffffff81008c22>] cpu_idle+0xaa/0xe4
 [<ffffffff81423a9a>] rest_init+0x7e/0x80
 [<ffffffff81b10dd2>] start_kernel+0x40e/0x419
 [<ffffffff81b102c8>] x86_64_start_reservations+0xb3/0xb7
 [<ffffffff81b103c4>] x86_64_start_kernel+0xf8/0x107
Code: 14 d5 40 ff ae 81 8b 14 02 31 c0 3b 15 47 1c 8b 00 7d 0e 48 8b 05 36 1c 8b
 00 48 63 d2 48 8b 04 d0 c7 85 5c ff ff ff 00 00 00 00 <8b> 70 38 48 8d 8d 5c ff
 ff ff 48 8b 78 10 ba c4 01 00 00 e8 eb
RIP  [<ffffffff8142f2fb>] cpuid4_cache_lookup_regs+0x2ca/0x3df
 RSP <ffff880002203e08>
CR2: 0000000000000038
---[ end trace a7919e7f17c0a726 ]---

The L3 cache index disable feature of AMD CPUs has to be disabled if the
kernel is running as guest on top of a hypervisor because northbridge
devices are not available to the guest. Currently, this fixes a boot
crash on top of Xen. In the future this will become an issue on KVM as
well.

Check if northbridge devices are present and do not enable the feature
if there are none.

[ hpa: backported to 2.6.34 ]

Signed-off-by: Frank Arnold <frank.arnold@amd.com>
LKML-Reference: <1271945222-5283-3-git-send-email-bp@amd64.org>
Acked-by: Borislav Petkov <borislav.petkov@amd.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: <stable@kernel.org>
1 parent ade029e
Raw File
Tip revision: 7f284d3cc96e02468a42e045f77af11e5ff8b095 authored by Frank Arnold on 22 April 2010, 14:06:59 UTC
x86, cacheinfo: Turn off L3 cache index disable feature in virtualized environments
Tip revision: 7f284d3
backchannel_rqst.c
/******************************************************************************

(c) 2007 Network Appliance, Inc.  All Rights Reserved.
(c) 2009 NetApp.  All Rights Reserved.

NetApp provides this source code under the GPL v2 License.
The GPL v2 license is available at
http://opensource.org/licenses/gpl-license.php.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

******************************************************************************/

#include <linux/tcp.h>
#include <linux/slab.h>
#include <linux/sunrpc/xprt.h>

#ifdef RPC_DEBUG
#define RPCDBG_FACILITY	RPCDBG_TRANS
#endif

#if defined(CONFIG_NFS_V4_1)

/*
 * Helper routines that track the number of preallocation elements
 * on the transport.
 */
static inline int xprt_need_to_requeue(struct rpc_xprt *xprt)
{
	return xprt->bc_alloc_count > 0;
}

static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n)
{
	xprt->bc_alloc_count += n;
}

static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n)
{
	return xprt->bc_alloc_count -= n;
}

/*
 * Free the preallocated rpc_rqst structure and the memory
 * buffers hanging off of it.
 */
static void xprt_free_allocation(struct rpc_rqst *req)
{
	struct xdr_buf *xbufp;

	dprintk("RPC:        free allocations for req= %p\n", req);
	BUG_ON(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
	xbufp = &req->rq_private_buf;
	free_page((unsigned long)xbufp->head[0].iov_base);
	xbufp = &req->rq_snd_buf;
	free_page((unsigned long)xbufp->head[0].iov_base);
	list_del(&req->rq_bc_pa_list);
	kfree(req);
}

/*
 * Preallocate up to min_reqs structures and related buffers for use
 * by the backchannel.  This function can be called multiple times
 * when creating new sessions that use the same rpc_xprt.  The
 * preallocated buffers are added to the pool of resources used by
 * the rpc_xprt.  Anyone of these resources may be used used by an
 * incoming callback request.  It's up to the higher levels in the
 * stack to enforce that the maximum number of session slots is not
 * being exceeded.
 *
 * Some callback arguments can be large.  For example, a pNFS server
 * using multiple deviceids.  The list can be unbound, but the client
 * has the ability to tell the server the maximum size of the callback
 * requests.  Each deviceID is 16 bytes, so allocate one page
 * for the arguments to have enough room to receive a number of these
 * deviceIDs.  The NFS client indicates to the pNFS server that its
 * callback requests can be up to 4096 bytes in size.
 */
int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
{
	struct page *page_rcv = NULL, *page_snd = NULL;
	struct xdr_buf *xbufp = NULL;
	struct rpc_rqst *req, *tmp;
	struct list_head tmp_list;
	int i;

	dprintk("RPC:       setup backchannel transport\n");

	/*
	 * We use a temporary list to keep track of the preallocated
	 * buffers.  Once we're done building the list we splice it
	 * into the backchannel preallocation list off of the rpc_xprt
	 * struct.  This helps minimize the amount of time the list
	 * lock is held on the rpc_xprt struct.  It also makes cleanup
	 * easier in case of memory allocation errors.
	 */
	INIT_LIST_HEAD(&tmp_list);
	for (i = 0; i < min_reqs; i++) {
		/* Pre-allocate one backchannel rpc_rqst */
		req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL);
		if (req == NULL) {
			printk(KERN_ERR "Failed to create bc rpc_rqst\n");
			goto out_free;
		}

		/* Add the allocated buffer to the tmp list */
		dprintk("RPC:       adding req= %p\n", req);
		list_add(&req->rq_bc_pa_list, &tmp_list);

		req->rq_xprt = xprt;
		INIT_LIST_HEAD(&req->rq_list);
		INIT_LIST_HEAD(&req->rq_bc_list);

		/* Preallocate one XDR receive buffer */
		page_rcv = alloc_page(GFP_KERNEL);
		if (page_rcv == NULL) {
			printk(KERN_ERR "Failed to create bc receive xbuf\n");
			goto out_free;
		}
		xbufp = &req->rq_rcv_buf;
		xbufp->head[0].iov_base = page_address(page_rcv);
		xbufp->head[0].iov_len = PAGE_SIZE;
		xbufp->tail[0].iov_base = NULL;
		xbufp->tail[0].iov_len = 0;
		xbufp->page_len = 0;
		xbufp->len = PAGE_SIZE;
		xbufp->buflen = PAGE_SIZE;

		/* Preallocate one XDR send buffer */
		page_snd = alloc_page(GFP_KERNEL);
		if (page_snd == NULL) {
			printk(KERN_ERR "Failed to create bc snd xbuf\n");
			goto out_free;
		}

		xbufp = &req->rq_snd_buf;
		xbufp->head[0].iov_base = page_address(page_snd);
		xbufp->head[0].iov_len = 0;
		xbufp->tail[0].iov_base = NULL;
		xbufp->tail[0].iov_len = 0;
		xbufp->page_len = 0;
		xbufp->len = 0;
		xbufp->buflen = PAGE_SIZE;
	}

	/*
	 * Add the temporary list to the backchannel preallocation list
	 */
	spin_lock_bh(&xprt->bc_pa_lock);
	list_splice(&tmp_list, &xprt->bc_pa_list);
	xprt_inc_alloc_count(xprt, min_reqs);
	spin_unlock_bh(&xprt->bc_pa_lock);

	dprintk("RPC:       setup backchannel transport done\n");
	return 0;

out_free:
	/*
	 * Memory allocation failed, free the temporary list
	 */
	list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list)
		xprt_free_allocation(req);

	dprintk("RPC:       setup backchannel transport failed\n");
	return -1;
}
EXPORT_SYMBOL(xprt_setup_backchannel);

/*
 * Destroys the backchannel preallocated structures.
 * Since these structures may have been allocated by multiple calls
 * to xprt_setup_backchannel, we only destroy up to the maximum number
 * of reqs specified by the caller.
 * @xprt:	the transport holding the preallocated strucures
 * @max_reqs	the maximum number of preallocated structures to destroy
 */
void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs)
{
	struct rpc_rqst *req = NULL, *tmp = NULL;

	dprintk("RPC:        destroy backchannel transport\n");

	BUG_ON(max_reqs == 0);
	spin_lock_bh(&xprt->bc_pa_lock);
	xprt_dec_alloc_count(xprt, max_reqs);
	list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) {
		dprintk("RPC:        req=%p\n", req);
		xprt_free_allocation(req);
		if (--max_reqs == 0)
			break;
	}
	spin_unlock_bh(&xprt->bc_pa_lock);

	dprintk("RPC:        backchannel list empty= %s\n",
		list_empty(&xprt->bc_pa_list) ? "true" : "false");
}
EXPORT_SYMBOL(xprt_destroy_backchannel);

/*
 * One or more rpc_rqst structure have been preallocated during the
 * backchannel setup.  Buffer space for the send and private XDR buffers
 * has been preallocated as well.  Use xprt_alloc_bc_request to allocate
 * to this request.  Use xprt_free_bc_request to return it.
 *
 * We know that we're called in soft interrupt context, grab the spin_lock
 * since there is no need to grab the bottom half spin_lock.
 *
 * Return an available rpc_rqst, otherwise NULL if non are available.
 */
struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt)
{
	struct rpc_rqst *req;

	dprintk("RPC:       allocate a backchannel request\n");
	spin_lock(&xprt->bc_pa_lock);
	if (!list_empty(&xprt->bc_pa_list)) {
		req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst,
				rq_bc_pa_list);
		list_del(&req->rq_bc_pa_list);
	} else {
		req = NULL;
	}
	spin_unlock(&xprt->bc_pa_lock);

	if (req != NULL) {
		set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
		req->rq_reply_bytes_recvd = 0;
		req->rq_bytes_sent = 0;
		memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
			sizeof(req->rq_private_buf));
	}
	dprintk("RPC:       backchannel req=%p\n", req);
	return req;
}

/*
 * Return the preallocated rpc_rqst structure and XDR buffers
 * associated with this rpc_task.
 */
void xprt_free_bc_request(struct rpc_rqst *req)
{
	struct rpc_xprt *xprt = req->rq_xprt;

	dprintk("RPC:       free backchannel req=%p\n", req);

	smp_mb__before_clear_bit();
	BUG_ON(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
	clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
	smp_mb__after_clear_bit();

	if (!xprt_need_to_requeue(xprt)) {
		/*
		 * The last remaining session was destroyed while this
		 * entry was in use.  Free the entry and don't attempt
		 * to add back to the list because there is no need to
		 * have anymore preallocated entries.
		 */
		dprintk("RPC:       Last session removed req=%p\n", req);
		xprt_free_allocation(req);
		return;
	}

	/*
	 * Return it to the list of preallocations so that it
	 * may be reused by a new callback request.
	 */
	spin_lock_bh(&xprt->bc_pa_lock);
	list_add(&req->rq_bc_pa_list, &xprt->bc_pa_list);
	spin_unlock_bh(&xprt->bc_pa_lock);
}

#endif /* CONFIG_NFS_V4_1 */
back to top