Revision 002e062e13db10973adb8302f231e48b477c7ccf authored by Yonatan Cohen on 16 November 2016, 08:39:15 UTC, committed by Doug Ledford on 17 November 2016, 01:03:44 UTC
To correctly handle a erroneous WR this fix does the following
1. Make sure the bad WQE causes a user completion event.
2. Call rxe_completer to handle the erred WQE.

Before the fix, when rxe_requester found a bad WQE, it changed its
status to IB_WC_LOC_PROT_ERR and exit with 0 for non RC QPs.

If this was the 1st WQE then there would be no ACK to invoke the
completer and this bad WQE would be stuck in the QP's send-q.

On top of that the requester exiting with 0 caused rxe_do_task to
endlessly invoke rxe_requester, resulting in a soft-lockup attached
below.

In case the WQE was not the 1st and rxe_completer did get a chance to
handle the bad WQE, it did not cause a complete event since the WQE's
IB_SEND_SIGNALED flag was not set.

Setting WQE status to IB_SEND_SIGNALED is subject to IBA spec
version 1.2.1, section 10.7.3.1 Signaled Completions.

NMI watchdog: BUG: soft lockup - CPU#7 stuck for 22s!
[<ffffffffa0590145>] ? rxe_pool_get_index+0x35/0xb0 [rdma_rxe]
[<ffffffffa05952ec>] lookup_mem+0x3c/0xc0 [rdma_rxe]
[<ffffffffa0595534>] copy_data+0x1c4/0x230 [rdma_rxe]
[<ffffffffa058c180>] rxe_requester+0x9d0/0x1100 [rdma_rxe]
[<ffffffff8158e98a>] ? kfree_skbmem+0x5a/0x60
[<ffffffffa05962c9>] rxe_do_task+0x89/0xf0 [rdma_rxe]
[<ffffffffa05963e2>] rxe_run_task+0x12/0x30 [rdma_rxe]
[<ffffffffa059110a>] rxe_post_send+0x41a/0x550 [rdma_rxe]
[<ffffffff811ef922>] ? __kmalloc+0x182/0x200
[<ffffffff816ba512>] ? down_read+0x12/0x40
[<ffffffffa054bd32>] ib_uverbs_post_send+0x532/0x540 [ib_uverbs]
[<ffffffff815f8722>] ? tcp_sendmsg+0x402/0xb80
[<ffffffffa05453dc>] ib_uverbs_write+0x18c/0x3f0 [ib_uverbs]
[<ffffffff81623c2e>] ? inet_recvmsg+0x7e/0xb0
[<ffffffff8158764d>] ? sock_recvmsg+0x3d/0x50
[<ffffffff81215b87>] __vfs_write+0x37/0x140
[<ffffffff81216892>] vfs_write+0xb2/0x1b0
[<ffffffff81217ce5>] SyS_write+0x55/0xc0
[<ffffffff816bc672>] entry_SYSCALL_64_fastpath+0x1a/0xa

Fixes: 8700e3e7c485 ("Soft RoCE driver")
Signed-off-by: Yonatan Cohen <yonatanc@mellanox.com>
Reviewed-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
1 parent 1454ca3
Raw File
extable.c
/*
 * Derived from arch/ppc/mm/extable.c and arch/i386/mm/extable.c.
 *
 * Copyright (C) 2004 Paul Mackerras, IBM Corp.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/sort.h>
#include <asm/uaccess.h>

#ifndef ARCH_HAS_RELATIVE_EXTABLE
#define ex_to_insn(x)	((x)->insn)
#else
static inline unsigned long ex_to_insn(const struct exception_table_entry *x)
{
	return (unsigned long)&x->insn + x->insn;
}
#endif

#ifndef ARCH_HAS_SORT_EXTABLE
#ifndef ARCH_HAS_RELATIVE_EXTABLE
#define swap_ex		NULL
#else
static void swap_ex(void *a, void *b, int size)
{
	struct exception_table_entry *x = a, *y = b, tmp;
	int delta = b - a;

	tmp = *x;
	x->insn = y->insn + delta;
	y->insn = tmp.insn - delta;

#ifdef swap_ex_entry_fixup
	swap_ex_entry_fixup(x, y, tmp, delta);
#else
	x->fixup = y->fixup + delta;
	y->fixup = tmp.fixup - delta;
#endif
}
#endif /* ARCH_HAS_RELATIVE_EXTABLE */

/*
 * The exception table needs to be sorted so that the binary
 * search that we use to find entries in it works properly.
 * This is used both for the kernel exception table and for
 * the exception tables of modules that get loaded.
 */
static int cmp_ex(const void *a, const void *b)
{
	const struct exception_table_entry *x = a, *y = b;

	/* avoid overflow */
	if (ex_to_insn(x) > ex_to_insn(y))
		return 1;
	if (ex_to_insn(x) < ex_to_insn(y))
		return -1;
	return 0;
}

void sort_extable(struct exception_table_entry *start,
		  struct exception_table_entry *finish)
{
	sort(start, finish - start, sizeof(struct exception_table_entry),
	     cmp_ex, swap_ex);
}

#ifdef CONFIG_MODULES
/*
 * If the exception table is sorted, any referring to the module init
 * will be at the beginning or the end.
 */
void trim_init_extable(struct module *m)
{
	/*trim the beginning*/
	while (m->num_exentries &&
	       within_module_init(ex_to_insn(&m->extable[0]), m)) {
		m->extable++;
		m->num_exentries--;
	}
	/*trim the end*/
	while (m->num_exentries &&
	       within_module_init(ex_to_insn(&m->extable[m->num_exentries - 1]),
				  m))
		m->num_exentries--;
}
#endif /* CONFIG_MODULES */
#endif /* !ARCH_HAS_SORT_EXTABLE */

#ifndef ARCH_HAS_SEARCH_EXTABLE
/*
 * Search one exception table for an entry corresponding to the
 * given instruction address, and return the address of the entry,
 * or NULL if none is found.
 * We use a binary search, and thus we assume that the table is
 * already sorted.
 */
const struct exception_table_entry *
search_extable(const struct exception_table_entry *first,
	       const struct exception_table_entry *last,
	       unsigned long value)
{
	while (first <= last) {
		const struct exception_table_entry *mid;

		mid = ((last - first) >> 1) + first;
		/*
		 * careful, the distance between value and insn
		 * can be larger than MAX_LONG:
		 */
		if (ex_to_insn(mid) < value)
			first = mid + 1;
		else if (ex_to_insn(mid) > value)
			last = mid - 1;
		else
			return mid;
	}
	return NULL;
}
#endif
back to top