https://github.com/torvalds/linux
Revision a73914c35b05d80f8ce78288e10056c91090b666 authored by Mark Salyzyn on 22 September 2011, 15:32:23 UTC, committed by James Bottomley on 02 October 2011, 18:28:55 UTC
When a wide port is being utilized to a target, if one disables only one of the phys, we get an OS crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000238 IP: [<ffffffff814ca9b1>] mutex_lock+0x21/0x50 PGD 4103f5067 PUD 41dba9067 PMD 0 Oops: 0002 [#1] SMP last sysfs file: /sys/bus/pci/slots/5/address CPU 0 Modules linked in: pm8001(U) ses enclosure fuse nfsd exportfs autofs4 ipmi_devintf ipmi_si ipmi_msghandler nfs lockd fscache nfs_acl auth_rpcgss 8021q fcoe libfcoe garp libfc scsi_transport_fc stp scsi_tgt llc sunrpc cpufreq_ondemand acpi_cpufreq freq_table ipv6 sr_mod cdrom dm_mirror dm_region_hash dm_log uinput sg i2c_i801 i2c_core iTCO_wdt iTCO_vendor_support e1000e mlx4_ib ib_mad ib_core mlx4_en mlx4_core ext3 jbd mbcache sd_mod crc_t10dif usb_storage ata_generic pata_acpi ata_piix libsas(U) scsi_transport_sas dm_mod [last unloaded: pm8001] Modules linked in: pm8001(U) ses enclosure fuse nfsd exportfs autofs4 ipmi_devintf ipmi_si ipmi_msghandler nfs lockd fscache nfs_acl auth_rpcgss 8021q fcoe libfcoe garp libfc scsi_transport_fc stp scsi_tgt llc sunrpc cpufreq_ondemand acpi_cpufreq freq_table ipv6 sr_mod cdrom dm_mirror dm_region_hash dm_log uinput sg i2c_i801 i2c_core iTCO_wdt iTCO_vendor_support e1000e mlx4_ib ib_mad ib_core mlx4_en mlx4_core ext3 jbd mbcache sd_mod crc_t10dif usb_storage ata_generic pata_acpi ata_piix libsas(U) scsi_transport_sas dm_mod [last unloaded: pm8001] Pid: 5146, comm: scsi_wq_5 Not tainted 2.6.32-71.29.1.el6.lustre.7.x86_64 #1 Storage Server RIP: 0010:[<ffffffff814ca9b1>] [<ffffffff814ca9b1>] mutex_lock+0x21/0x50 RSP: 0018:ffff8803e4e33d30 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000238 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff8803e664c800 RDI: 0000000000000238 RBP: ffff8803e4e33d40 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 R13: 0000000000000238 R14: ffff88041acb7200 R15: ffff88041c51ada0 FS: 0000000000000000(0000) GS:ffff880028200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 0000000000000238 CR3: 0000000410143000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process scsi_wq_5 (pid: 5146, threadinfo ffff8803e4e32000, task ffff8803e4e294a0) Stack: ffff8803e664c800 0000000000000000 ffff8803e4e33d70 ffffffffa001f06e <0> ffff8803e4e33d60 ffff88041c51ada0 ffff88041acb7200 ffff88041bc0aa00 <0> ffff8803e4e33d90 ffffffffa0032b6c 0000000000000014 ffff88041acb7200 Call Trace: [<ffffffffa001f06e>] sas_port_delete_phy+0x2e/0xa0 [scsi_transport_sas] [<ffffffffa0032b6c>] sas_unregister_devs_sas_addr+0xac/0xe0 [libsas] [<ffffffffa0034914>] sas_ex_revalidate_domain+0x204/0x330 [libsas] [<ffffffffa00307f0>] ? sas_revalidate_domain+0x0/0x90 [libsas] [<ffffffffa0030855>] sas_revalidate_domain+0x65/0x90 [libsas] [<ffffffff8108c7d0>] worker_thread+0x170/0x2a0 [<ffffffff81091ea0>] ? autoremove_wake_function+0x0/0x40 [<ffffffff8108c660>] ? worker_thread+0x0/0x2a0 [<ffffffff81091b36>] kthread+0x96/0xa0 [<ffffffff810141ca>] child_rip+0xa/0x20 [<ffffffff81091aa0>] ? kthread+0x0/0xa0 [<ffffffff810141c0>] ? child_rip+0x0/0x20 Code: ff ff 85 c0 75 ed eb d6 66 90 55 48 89 e5 48 83 ec 10 48 89 1c 24 4c 89 64 24 08 0f 1f 44 00 00 48 89 fb e8 92 f4 ff ff 48 89 df <f0> ff 0f 79 05 e8 25 00 00 00 65 48 8b 04 25 08 cc 00 00 48 2d RIP [<ffffffff814ca9b1>] mutex_lock+0x21/0x50 RSP <ffff8803e4e33d30> CR2: 0000000000000238 The following patch is admittedly a band-aid, and does not solve the root cause, but it still is a good candidate for hardening as a pointer check before reference. Signed-off-by: Mark Salyzyn <mark_salyzyn@us.xyratex.com> Tested-by: Jack Wang <jack_wang@usish.com> Cc: stable@kernel.org Signed-off-by: James Bottomley <JBottomley@Parallels.com>
1 parent 9bfacd0
Tip revision: a73914c35b05d80f8ce78288e10056c91090b666 authored by Mark Salyzyn on 22 September 2011, 15:32:23 UTC
[SCSI] libsas: fix panic when single phy is disabled on a wide port
[SCSI] libsas: fix panic when single phy is disabled on a wide port
Tip revision: a73914c
blk-timeout.c
/*
* Functions related to generic timeout handling of requests.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/blkdev.h>
#include <linux/fault-inject.h>
#include "blk.h"
#ifdef CONFIG_FAIL_IO_TIMEOUT
static DECLARE_FAULT_ATTR(fail_io_timeout);
static int __init setup_fail_io_timeout(char *str)
{
return setup_fault_attr(&fail_io_timeout, str);
}
__setup("fail_io_timeout=", setup_fail_io_timeout);
int blk_should_fake_timeout(struct request_queue *q)
{
if (!test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags))
return 0;
return should_fail(&fail_io_timeout, 1);
}
static int __init fail_io_timeout_debugfs(void)
{
struct dentry *dir = fault_create_debugfs_attr("fail_io_timeout",
NULL, &fail_io_timeout);
return IS_ERR(dir) ? PTR_ERR(dir) : 0;
}
late_initcall(fail_io_timeout_debugfs);
ssize_t part_timeout_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct gendisk *disk = dev_to_disk(dev);
int set = test_bit(QUEUE_FLAG_FAIL_IO, &disk->queue->queue_flags);
return sprintf(buf, "%d\n", set != 0);
}
ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct gendisk *disk = dev_to_disk(dev);
int val;
if (count) {
struct request_queue *q = disk->queue;
char *p = (char *) buf;
val = simple_strtoul(p, &p, 10);
spin_lock_irq(q->queue_lock);
if (val)
queue_flag_set(QUEUE_FLAG_FAIL_IO, q);
else
queue_flag_clear(QUEUE_FLAG_FAIL_IO, q);
spin_unlock_irq(q->queue_lock);
}
return count;
}
#endif /* CONFIG_FAIL_IO_TIMEOUT */
/*
* blk_delete_timer - Delete/cancel timer for a given function.
* @req: request that we are canceling timer for
*
*/
void blk_delete_timer(struct request *req)
{
list_del_init(&req->timeout_list);
}
static void blk_rq_timed_out(struct request *req)
{
struct request_queue *q = req->q;
enum blk_eh_timer_return ret;
ret = q->rq_timed_out_fn(req);
switch (ret) {
case BLK_EH_HANDLED:
__blk_complete_request(req);
break;
case BLK_EH_RESET_TIMER:
blk_clear_rq_complete(req);
blk_add_timer(req);
break;
case BLK_EH_NOT_HANDLED:
/*
* LLD handles this for now but in the future
* we can send a request msg to abort the command
* and we can move more of the generic scsi eh code to
* the blk layer.
*/
break;
default:
printk(KERN_ERR "block: bad eh return: %d\n", ret);
break;
}
}
void blk_rq_timed_out_timer(unsigned long data)
{
struct request_queue *q = (struct request_queue *) data;
unsigned long flags, next = 0;
struct request *rq, *tmp;
int next_set = 0;
spin_lock_irqsave(q->queue_lock, flags);
list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) {
if (time_after_eq(jiffies, rq->deadline)) {
list_del_init(&rq->timeout_list);
/*
* Check if we raced with end io completion
*/
if (blk_mark_rq_complete(rq))
continue;
blk_rq_timed_out(rq);
} else if (!next_set || time_after(next, rq->deadline)) {
next = rq->deadline;
next_set = 1;
}
}
if (next_set)
mod_timer(&q->timeout, round_jiffies_up(next));
spin_unlock_irqrestore(q->queue_lock, flags);
}
/**
* blk_abort_request -- Request request recovery for the specified command
* @req: pointer to the request of interest
*
* This function requests that the block layer start recovery for the
* request by deleting the timer and calling the q's timeout function.
* LLDDs who implement their own error recovery MAY ignore the timeout
* event if they generated blk_abort_req. Must hold queue lock.
*/
void blk_abort_request(struct request *req)
{
if (blk_mark_rq_complete(req))
return;
blk_delete_timer(req);
blk_rq_timed_out(req);
}
EXPORT_SYMBOL_GPL(blk_abort_request);
/**
* blk_add_timer - Start timeout timer for a single request
* @req: request that is about to start running.
*
* Notes:
* Each request has its own timer, and as it is added to the queue, we
* set up the timer. When the request completes, we cancel the timer.
*/
void blk_add_timer(struct request *req)
{
struct request_queue *q = req->q;
unsigned long expiry;
if (!q->rq_timed_out_fn)
return;
BUG_ON(!list_empty(&req->timeout_list));
BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags));
/*
* Some LLDs, like scsi, peek at the timeout to prevent a
* command from being retried forever.
*/
if (!req->timeout)
req->timeout = q->rq_timeout;
req->deadline = jiffies + req->timeout;
list_add_tail(&req->timeout_list, &q->timeout_list);
/*
* If the timer isn't already pending or this timeout is earlier
* than an existing one, modify the timer. Round up to next nearest
* second.
*/
expiry = round_jiffies_up(req->deadline);
if (!timer_pending(&q->timeout) ||
time_before(expiry, q->timeout.expires))
mod_timer(&q->timeout, expiry);
}
/**
* blk_abort_queue -- Abort all request on given queue
* @queue: pointer to queue
*
*/
void blk_abort_queue(struct request_queue *q)
{
unsigned long flags;
struct request *rq, *tmp;
LIST_HEAD(list);
/*
* Not a request based block device, nothing to abort
*/
if (!q->request_fn)
return;
spin_lock_irqsave(q->queue_lock, flags);
elv_abort_queue(q);
/*
* Splice entries to local list, to avoid deadlocking if entries
* get readded to the timeout list by error handling
*/
list_splice_init(&q->timeout_list, &list);
list_for_each_entry_safe(rq, tmp, &list, timeout_list)
blk_abort_request(rq);
/*
* Occasionally, blk_abort_request() will return without
* deleting the element from the list. Make sure we add those back
* instead of leaving them on the local stack list.
*/
list_splice(&list, &q->timeout_list);
spin_unlock_irqrestore(q->queue_lock, flags);
}
EXPORT_SYMBOL_GPL(blk_abort_queue);
Computing file changes ...