Revision 8700aff089843399f95bc7701ae87b642b35a716 authored by Edward Cree on 20 December 2019, 16:26:40 UTC, committed by David S. Miller on 21 December 2019, 05:56:48 UTC
It was possible for channel allocation logic to get confused between what
 it had and what it wanted, and end up trying to use the same channel for
 both PTP and regular TX.  This led to a kernel panic:
    BUG: unable to handle page fault for address: 0000000000047635
    #PF: supervisor write access in kernel mode
    #PF: error_code(0x0002) - not-present page
    PGD 0 P4D 0
    Oops: 0002 [#1] SMP PTI
    CPU: 0 PID: 0 Comm: swapper/0 Tainted: G        W         5.4.0-rc3-ehc14+ #900
    Hardware name: Dell Inc. PowerEdge R710/0M233H, BIOS 6.4.0 07/23/2013
    RIP: 0010:native_queued_spin_lock_slowpath+0x188/0x1e0
    Code: f3 90 48 8b 32 48 85 f6 74 f6 eb e8 c1 ee 12 83 e0 03 83 ee 01 48 c1 e0 05 48 63 f6 48 05 c0 98 02 00 48 03 04 f5 a0 c6 ed 81 <48> 89 10 8b 42 08 85 c0 75 09 f3 90 8b 42 08 85 c0 74 f7 48 8b 32
    RSP: 0018:ffffc90000003d28 EFLAGS: 00010006
    RAX: 0000000000047635 RBX: 0000000000000246 RCX: 0000000000040000
    RDX: ffff888627a298c0 RSI: 0000000000003ffe RDI: ffff88861f6b8dd4
    RBP: ffff8886225c6e00 R08: 0000000000040000 R09: 0000000000000000
    R10: 0000000616f080c6 R11: 00000000000000c0 R12: ffff88861f6b8dd4
    R13: ffffc90000003dc8 R14: ffff88861942bf00 R15: ffff8886150f2000
    FS:  0000000000000000(0000) GS:ffff888627a00000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: 0000000000047635 CR3: 000000000200a000 CR4: 00000000000006f0
    Call Trace:
     <IRQ>
     _raw_spin_lock_irqsave+0x22/0x30
     skb_queue_tail+0x1b/0x50
     sock_queue_err_skb+0x9d/0xf0
     __skb_complete_tx_timestamp+0x9d/0xc0
     efx_dequeue_buffer+0x126/0x180 [sfc]
     efx_xmit_done+0x73/0x1c0 [sfc]
     efx_ef10_ev_process+0x56a/0xfe0 [sfc]
     ? tick_sched_do_timer+0x60/0x60
     ? timerqueue_add+0x5d/0x70
     ? enqueue_hrtimer+0x39/0x90
     efx_poll+0x111/0x380 [sfc]
     ? rcu_accelerate_cbs+0x50/0x160
     net_rx_action+0x14a/0x400
     __do_softirq+0xdd/0x2d0
     irq_exit+0xa0/0xb0
     do_IRQ+0x53/0xe0
     common_interrupt+0xf/0xf
     </IRQ>

In the long run we intend to rewrite the channel allocation code, but for
 'net' fix this by allocating extra_channels, and giving them TX queues,
 even if we do not in fact need them (e.g. on NICs without MAC TX
 timestamping), and thereby using simpler logic to assign the channels
 once they're allocated.

Fixes: 3990a8fffbda ("sfc: allocate channels for XDP tx queues")
Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 258a980
Raw File
blk-mq-cpumap.c
// SPDX-License-Identifier: GPL-2.0
/*
 * CPU <-> hardware queue mapping helpers
 *
 * Copyright (C) 2013-2014 Jens Axboe
 */
#include <linux/kernel.h>
#include <linux/threads.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/cpu.h>

#include <linux/blk-mq.h>
#include "blk.h"
#include "blk-mq.h"

static int queue_index(struct blk_mq_queue_map *qmap,
		       unsigned int nr_queues, const int q)
{
	return qmap->queue_offset + (q % nr_queues);
}

static int get_first_sibling(unsigned int cpu)
{
	unsigned int ret;

	ret = cpumask_first(topology_sibling_cpumask(cpu));
	if (ret < nr_cpu_ids)
		return ret;

	return cpu;
}

int blk_mq_map_queues(struct blk_mq_queue_map *qmap)
{
	unsigned int *map = qmap->mq_map;
	unsigned int nr_queues = qmap->nr_queues;
	unsigned int cpu, first_sibling, q = 0;

	for_each_possible_cpu(cpu)
		map[cpu] = -1;

	/*
	 * Spread queues among present CPUs first for minimizing
	 * count of dead queues which are mapped by all un-present CPUs
	 */
	for_each_present_cpu(cpu) {
		if (q >= nr_queues)
			break;
		map[cpu] = queue_index(qmap, nr_queues, q++);
	}

	for_each_possible_cpu(cpu) {
		if (map[cpu] != -1)
			continue;
		/*
		 * First do sequential mapping between CPUs and queues.
		 * In case we still have CPUs to map, and we have some number of
		 * threads per cores then map sibling threads to the same queue
		 * for performance optimizations.
		 */
		if (q < nr_queues) {
			map[cpu] = queue_index(qmap, nr_queues, q++);
		} else {
			first_sibling = get_first_sibling(cpu);
			if (first_sibling == cpu)
				map[cpu] = queue_index(qmap, nr_queues, q++);
			else
				map[cpu] = map[first_sibling];
		}
	}

	return 0;
}
EXPORT_SYMBOL_GPL(blk_mq_map_queues);

/**
 * blk_mq_hw_queue_to_node - Look up the memory node for a hardware queue index
 * @qmap: CPU to hardware queue map.
 * @index: hardware queue index.
 *
 * We have no quick way of doing reverse lookups. This is only used at
 * queue init time, so runtime isn't important.
 */
int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int index)
{
	int i;

	for_each_possible_cpu(i) {
		if (index == qmap->mq_map[i])
			return local_memory_node(cpu_to_node(i));
	}

	return NUMA_NO_NODE;
}
back to top