https://github.com/torvalds/linux
Revision 69ad4ef868c1fc7609daa235dfa46d28ba7a3ba3 authored by Matt Lupfer on 08 March 2022, 15:27:02 UTC, committed by Martin K. Petersen on 15 March 2022, 03:45:19 UTC
A page fault was encountered in mpt3sas on a LUN reset error path:

[  145.763216] mpt3sas_cm1: Task abort tm failed: handle(0x0002),timeout(30) tr_method(0x0) smid(3) msix_index(0)
[  145.778932] scsi 1:0:0:0: task abort: FAILED scmd(0x0000000024ba29a2)
[  145.817307] scsi 1:0:0:0: attempting device reset! scmd(0x0000000024ba29a2)
[  145.827253] scsi 1:0:0:0: [sg1] tag#2 CDB: Receive Diagnostic 1c 01 01 ff fc 00
[  145.837617] scsi target1:0:0: handle(0x0002), sas_address(0x500605b0000272b9), phy(0)
[  145.848598] scsi target1:0:0: enclosure logical id(0x500605b0000272b8), slot(0)
[  149.858378] mpt3sas_cm1: Poll ReplyDescriptor queues for completion of smid(0), task_type(0x05), handle(0x0002)
[  149.875202] BUG: unable to handle page fault for address: 00000007fffc445d
[  149.885617] #PF: supervisor read access in kernel mode
[  149.894346] #PF: error_code(0x0000) - not-present page
[  149.903123] PGD 0 P4D 0
[  149.909387] Oops: 0000 [#1] PREEMPT SMP NOPTI
[  149.917417] CPU: 24 PID: 3512 Comm: scsi_eh_1 Kdump: loaded Tainted: G S         O      5.10.89-altav-1 #1
[  149.934327] Hardware name: DDN           200NVX2             /200NVX2-MB          , BIOS ATHG2.2.02.01 09/10/2021
[  149.951871] RIP: 0010:_base_process_reply_queue+0x4b/0x900 [mpt3sas]
[  149.961889] Code: 0f 84 22 02 00 00 8d 48 01 49 89 fd 48 8d 57 38 f0 0f b1 4f 38 0f 85 d8 01 00 00 49 8b 45 10 45 31 e4 41 8b 55 0c 48 8d 1c d0 <0f> b6 03 83 e0 0f 3c 0f 0f 85 a2 00 00 00 e9 e6 01 00 00 0f b7 ee
[  149.991952] RSP: 0018:ffffc9000f1ebcb8 EFLAGS: 00010246
[  150.000937] RAX: 0000000000000055 RBX: 00000007fffc445d RCX: 000000002548f071
[  150.011841] RDX: 00000000ffff8881 RSI: 0000000000000001 RDI: ffff888125ed50d8
[  150.022670] RBP: 0000000000000000 R08: 0000000000000000 R09: c0000000ffff7fff
[  150.033445] R10: ffffc9000f1ebb68 R11: ffffc9000f1ebb60 R12: 0000000000000000
[  150.044204] R13: ffff888125ed50d8 R14: 0000000000000080 R15: 34cdc00034cdea80
[  150.054963] FS:  0000000000000000(0000) GS:ffff88dfaf200000(0000) knlGS:0000000000000000
[  150.066715] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  150.076078] CR2: 00000007fffc445d CR3: 000000012448a006 CR4: 0000000000770ee0
[  150.086887] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  150.097670] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  150.108323] PKRU: 55555554
[  150.114690] Call Trace:
[  150.120497]  ? printk+0x48/0x4a
[  150.127049]  mpt3sas_scsih_issue_tm.cold.114+0x2e/0x2b3 [mpt3sas]
[  150.136453]  mpt3sas_scsih_issue_locked_tm+0x86/0xb0 [mpt3sas]
[  150.145759]  scsih_dev_reset+0xea/0x300 [mpt3sas]
[  150.153891]  scsi_eh_ready_devs+0x541/0x9e0 [scsi_mod]
[  150.162206]  ? __scsi_host_match+0x20/0x20 [scsi_mod]
[  150.170406]  ? scsi_try_target_reset+0x90/0x90 [scsi_mod]
[  150.178925]  ? blk_mq_tagset_busy_iter+0x45/0x60
[  150.186638]  ? scsi_try_target_reset+0x90/0x90 [scsi_mod]
[  150.195087]  scsi_error_handler+0x3a5/0x4a0 [scsi_mod]
[  150.203206]  ? __schedule+0x1e9/0x610
[  150.209783]  ? scsi_eh_get_sense+0x210/0x210 [scsi_mod]
[  150.217924]  kthread+0x12e/0x150
[  150.224041]  ? kthread_worker_fn+0x130/0x130
[  150.231206]  ret_from_fork+0x1f/0x30

This is caused by mpt3sas_base_sync_reply_irqs() using an invalid reply_q
pointer outside of the list_for_each_entry() loop. At the end of the full
list traversal the pointer is invalid.

Move the _base_process_reply_queue() call inside of the loop.

Link: https://lore.kernel.org/r/d625deae-a958-0ace-2ba3-0888dd0a415b@ddn.com
Fixes: 711a923c14d9 ("scsi: mpt3sas: Postprocessing of target and LUN reset")
Cc: stable@vger.kernel.org
Acked-by: Sreekanth Reddy <sreekanth.reddy@broadcom.com>
Signed-off-by: Matt Lupfer <mlupfer@ddn.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
1 parent 10af115
Raw File
Tip revision: 69ad4ef868c1fc7609daa235dfa46d28ba7a3ba3 authored by Matt Lupfer on 08 March 2022, 15:27:02 UTC
scsi: mpt3sas: Page fault in reply q processing
Tip revision: 69ad4ef
cfi.c
// SPDX-License-Identifier: GPL-2.0
/*
 * Clang Control Flow Integrity (CFI) error and slowpath handling.
 *
 * Copyright (C) 2021 Google LLC
 */

#include <linux/hardirq.h>
#include <linux/kallsyms.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/printk.h>
#include <linux/ratelimit.h>
#include <linux/rcupdate.h>
#include <linux/vmalloc.h>
#include <asm/cacheflush.h>
#include <asm/set_memory.h>

/* Compiler-defined handler names */
#ifdef CONFIG_CFI_PERMISSIVE
#define cfi_failure_handler	__ubsan_handle_cfi_check_fail
#else
#define cfi_failure_handler	__ubsan_handle_cfi_check_fail_abort
#endif

static inline void handle_cfi_failure(void *ptr)
{
	if (IS_ENABLED(CONFIG_CFI_PERMISSIVE))
		WARN_RATELIMIT(1, "CFI failure (target: %pS):\n", ptr);
	else
		panic("CFI failure (target: %pS)\n", ptr);
}

#ifdef CONFIG_MODULES
#ifdef CONFIG_CFI_CLANG_SHADOW
/*
 * Index type. A 16-bit index can address at most (2^16)-2 pages (taking
 * into account SHADOW_INVALID), i.e. ~256M with 4k pages.
 */
typedef u16 shadow_t;
#define SHADOW_INVALID		((shadow_t)~0UL)

struct cfi_shadow {
	/* Page index for the beginning of the shadow */
	unsigned long base;
	/* An array of __cfi_check locations (as indices to the shadow) */
	shadow_t shadow[1];
} __packed;

/*
 * The shadow covers ~128M from the beginning of the module region. If
 * the region is larger, we fall back to __module_address for the rest.
 */
#define __SHADOW_RANGE		(_UL(SZ_128M) >> PAGE_SHIFT)

/* The in-memory size of struct cfi_shadow, always at least one page */
#define __SHADOW_PAGES		((__SHADOW_RANGE * sizeof(shadow_t)) >> PAGE_SHIFT)
#define SHADOW_PAGES		max(1UL, __SHADOW_PAGES)
#define SHADOW_SIZE		(SHADOW_PAGES << PAGE_SHIFT)

/* The actual size of the shadow array, minus metadata */
#define SHADOW_ARR_SIZE		(SHADOW_SIZE - offsetof(struct cfi_shadow, shadow))
#define SHADOW_ARR_SLOTS	(SHADOW_ARR_SIZE / sizeof(shadow_t))

static DEFINE_MUTEX(shadow_update_lock);
static struct cfi_shadow __rcu *cfi_shadow __read_mostly;

/* Returns the index in the shadow for the given address */
static inline int ptr_to_shadow(const struct cfi_shadow *s, unsigned long ptr)
{
	unsigned long index;
	unsigned long page = ptr >> PAGE_SHIFT;

	if (unlikely(page < s->base))
		return -1; /* Outside of module area */

	index = page - s->base;

	if (index >= SHADOW_ARR_SLOTS)
		return -1; /* Cannot be addressed with shadow */

	return (int)index;
}

/* Returns the page address for an index in the shadow */
static inline unsigned long shadow_to_ptr(const struct cfi_shadow *s,
	int index)
{
	if (unlikely(index < 0 || index >= SHADOW_ARR_SLOTS))
		return 0;

	return (s->base + index) << PAGE_SHIFT;
}

/* Returns the __cfi_check function address for the given shadow location */
static inline unsigned long shadow_to_check_fn(const struct cfi_shadow *s,
	int index)
{
	if (unlikely(index < 0 || index >= SHADOW_ARR_SLOTS))
		return 0;

	if (unlikely(s->shadow[index] == SHADOW_INVALID))
		return 0;

	/* __cfi_check is always page aligned */
	return (s->base + s->shadow[index]) << PAGE_SHIFT;
}

static void prepare_next_shadow(const struct cfi_shadow __rcu *prev,
		struct cfi_shadow *next)
{
	int i, index, check;

	/* Mark everything invalid */
	memset(next->shadow, 0xFF, SHADOW_ARR_SIZE);

	if (!prev)
		return; /* No previous shadow */

	/* If the base address didn't change, an update is not needed */
	if (prev->base == next->base) {
		memcpy(next->shadow, prev->shadow, SHADOW_ARR_SIZE);
		return;
	}

	/* Convert the previous shadow to the new address range */
	for (i = 0; i < SHADOW_ARR_SLOTS; ++i) {
		if (prev->shadow[i] == SHADOW_INVALID)
			continue;

		index = ptr_to_shadow(next, shadow_to_ptr(prev, i));
		if (index < 0)
			continue;

		check = ptr_to_shadow(next,
				shadow_to_check_fn(prev, prev->shadow[i]));
		if (check < 0)
			continue;

		next->shadow[index] = (shadow_t)check;
	}
}

static void add_module_to_shadow(struct cfi_shadow *s, struct module *mod,
			unsigned long min_addr, unsigned long max_addr)
{
	int check_index;
	unsigned long check = (unsigned long)mod->cfi_check;
	unsigned long ptr;

	if (unlikely(!PAGE_ALIGNED(check))) {
		pr_warn("cfi: not using shadow for module %s\n", mod->name);
		return;
	}

	check_index = ptr_to_shadow(s, check);
	if (check_index < 0)
		return; /* Module not addressable with shadow */

	/* For each page, store the check function index in the shadow */
	for (ptr = min_addr; ptr <= max_addr; ptr += PAGE_SIZE) {
		int index = ptr_to_shadow(s, ptr);

		if (index >= 0) {
			/* Each page must only contain one module */
			WARN_ON_ONCE(s->shadow[index] != SHADOW_INVALID);
			s->shadow[index] = (shadow_t)check_index;
		}
	}
}

static void remove_module_from_shadow(struct cfi_shadow *s, struct module *mod,
		unsigned long min_addr, unsigned long max_addr)
{
	unsigned long ptr;

	for (ptr = min_addr; ptr <= max_addr; ptr += PAGE_SIZE) {
		int index = ptr_to_shadow(s, ptr);

		if (index >= 0)
			s->shadow[index] = SHADOW_INVALID;
	}
}

typedef void (*update_shadow_fn)(struct cfi_shadow *, struct module *,
			unsigned long min_addr, unsigned long max_addr);

static void update_shadow(struct module *mod, unsigned long base_addr,
		update_shadow_fn fn)
{
	struct cfi_shadow *prev;
	struct cfi_shadow *next;
	unsigned long min_addr, max_addr;

	next = vmalloc(SHADOW_SIZE);

	mutex_lock(&shadow_update_lock);
	prev = rcu_dereference_protected(cfi_shadow,
					 mutex_is_locked(&shadow_update_lock));

	if (next) {
		next->base = base_addr >> PAGE_SHIFT;
		prepare_next_shadow(prev, next);

		min_addr = (unsigned long)mod->core_layout.base;
		max_addr = min_addr + mod->core_layout.text_size;
		fn(next, mod, min_addr & PAGE_MASK, max_addr & PAGE_MASK);

		set_memory_ro((unsigned long)next, SHADOW_PAGES);
	}

	rcu_assign_pointer(cfi_shadow, next);
	mutex_unlock(&shadow_update_lock);
	synchronize_rcu();

	if (prev) {
		set_memory_rw((unsigned long)prev, SHADOW_PAGES);
		vfree(prev);
	}
}

void cfi_module_add(struct module *mod, unsigned long base_addr)
{
	update_shadow(mod, base_addr, add_module_to_shadow);
}

void cfi_module_remove(struct module *mod, unsigned long base_addr)
{
	update_shadow(mod, base_addr, remove_module_from_shadow);
}

static inline cfi_check_fn ptr_to_check_fn(const struct cfi_shadow __rcu *s,
	unsigned long ptr)
{
	int index;

	if (unlikely(!s))
		return NULL; /* No shadow available */

	index = ptr_to_shadow(s, ptr);
	if (index < 0)
		return NULL; /* Cannot be addressed with shadow */

	return (cfi_check_fn)shadow_to_check_fn(s, index);
}

static inline cfi_check_fn find_shadow_check_fn(unsigned long ptr)
{
	cfi_check_fn fn;

	rcu_read_lock_sched_notrace();
	fn = ptr_to_check_fn(rcu_dereference_sched(cfi_shadow), ptr);
	rcu_read_unlock_sched_notrace();

	return fn;
}

#else /* !CONFIG_CFI_CLANG_SHADOW */

static inline cfi_check_fn find_shadow_check_fn(unsigned long ptr)
{
	return NULL;
}

#endif /* CONFIG_CFI_CLANG_SHADOW */

static inline cfi_check_fn find_module_check_fn(unsigned long ptr)
{
	cfi_check_fn fn = NULL;
	struct module *mod;

	rcu_read_lock_sched_notrace();
	mod = __module_address(ptr);
	if (mod)
		fn = mod->cfi_check;
	rcu_read_unlock_sched_notrace();

	return fn;
}

static inline cfi_check_fn find_check_fn(unsigned long ptr)
{
	cfi_check_fn fn = NULL;

	if (is_kernel_text(ptr))
		return __cfi_check;

	/*
	 * Indirect call checks can happen when RCU is not watching. Both
	 * the shadow and __module_address use RCU, so we need to wake it
	 * up if necessary.
	 */
	RCU_NONIDLE({
		if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW))
			fn = find_shadow_check_fn(ptr);

		if (!fn)
			fn = find_module_check_fn(ptr);
	});

	return fn;
}

void __cfi_slowpath_diag(uint64_t id, void *ptr, void *diag)
{
	cfi_check_fn fn = find_check_fn((unsigned long)ptr);

	if (likely(fn))
		fn(id, ptr, diag);
	else /* Don't allow unchecked modules */
		handle_cfi_failure(ptr);
}
EXPORT_SYMBOL(__cfi_slowpath_diag);

#else /* !CONFIG_MODULES */

void __cfi_slowpath_diag(uint64_t id, void *ptr, void *diag)
{
	handle_cfi_failure(ptr); /* No modules */
}
EXPORT_SYMBOL(__cfi_slowpath_diag);

#endif /* CONFIG_MODULES */

void cfi_failure_handler(void *data, void *ptr, void *vtable)
{
	handle_cfi_failure(ptr);
}
EXPORT_SYMBOL(cfi_failure_handler);
back to top