Revision - aa76042 - MIPS: Fix 64-bit HTW configuration

Revision aa76042a016474775ccd187c068669148c30c3bb authored by James Hogan on 27 May 2016, 21:25:23 UTC, committed by Ralf Baechle on 28 May 2016, 10:35:11 UTC

MIPS: Fix 64-bit HTW configuration

The Hardware page Table Walker (HTW) is being misconfigured on 64-bit
kernels. The PWSize.PS (pointer size) bit determines whether pointers
within directories are loaded as 32-bit or 64-bit addresses, but was
never being set to 1 for 64-bit kernels where the unsigned long in pgd_t
is 64-bits wide.

This actually reduces rather than improves performance when the HTW is
enabled on P6600 since the HTW is initiated lots, but walks are all
aborted due I think to bad intermediate pointers.

Since we were already taking the width of the PTEs into account by
setting PWSize.PTEW, which is the left shift applied to the page table
index *in addition to* the native pointer size, we also need to reduce
PTEW by 1 when PS=1. This is done by calculating PTEW based on the
relative size of pte_t compared to pgd_t.

Finally in order for the HTW to be used when PS=1, the appropriate
XK/XS/XU bits corresponding to the different 64-bit segments need to be
set in PWCtl. We enable only XU for now to enable walking for XUSeg.

Supporting walking for XKSeg would be a bit more involved so is left for
a future patch. It would either require the use of a per-CPU top level
base directory if supported by the HTW (a bit like pgd_current but with
a second entry pointing at swapper_pg_dir), or the HTW would prepend bit
63 of the address to the global directory index which doesn't really
match how we split user and kernel page directories.

Fixes: cab25bc7537b ("MIPS: Extend hardware table walking support to MIPS64")
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/13364/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

1 parent 6446e6c

Files
Changes

Permalinks

blk-mq.h

#ifndef INT_BLK_MQ_H
#define INT_BLK_MQ_H

struct blk_mq_tag_set;

struct blk_mq_ctx {
	struct {
		spinlock_t		lock;
		struct list_head	rq_list;
	}  ____cacheline_aligned_in_smp;

	unsigned int		cpu;
	unsigned int		index_hw;

	unsigned int		last_tag ____cacheline_aligned_in_smp;

	/* incremented at dispatch time */
	unsigned long		rq_dispatched[2];
	unsigned long		rq_merged;

	/* incremented at completion time */
	unsigned long		____cacheline_aligned_in_smp rq_completed[2];

	struct request_queue	*queue;
	struct kobject		kobj;
} ____cacheline_aligned_in_smp;

void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_freeze_queue(struct request_queue *q);
void blk_mq_free_queue(struct request_queue *q);
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
void blk_mq_wake_waiters(struct request_queue *q);

/*
 * CPU hotplug helpers
 */
struct blk_mq_cpu_notifier;
void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
			      int (*fn)(void *, unsigned long, unsigned int),
			      void *data);
void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier);
void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier);
void blk_mq_cpu_init(void);
void blk_mq_enable_hotplug(void);
void blk_mq_disable_hotplug(void);

/*
 * CPU -> queue mappings
 */
extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set);
extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues,
				   const struct cpumask *online_mask);
extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int);

/*
 * sysfs helpers
 */
extern int blk_mq_sysfs_register(struct request_queue *q);
extern void blk_mq_sysfs_unregister(struct request_queue *q);
extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);

extern void blk_mq_rq_timed_out(struct request *req, bool reserved);

void blk_mq_release(struct request_queue *q);

/*
 * Basic implementation of sparser bitmap, allowing the user to spread
 * the bits over more cachelines.
 */
struct blk_align_bitmap {
	unsigned long word;
	unsigned long depth;
} ____cacheline_aligned_in_smp;

static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
					   unsigned int cpu)
{
	return per_cpu_ptr(q->queue_ctx, cpu);
}

/*
 * This assumes per-cpu software queueing queues. They could be per-node
 * as well, for instance. For now this is hardcoded as-is. Note that we don't
 * care about preemption, since we know the ctx's are persistent. This does
 * mean that we can't rely on ctx always matching the currently running CPU.
 */
static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
{
	return __blk_mq_get_ctx(q, get_cpu());
}

static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
{
	put_cpu();
}

struct blk_mq_alloc_data {
	/* input parameter */
	struct request_queue *q;
	unsigned int flags;

	/* input & output parameter */
	struct blk_mq_ctx *ctx;
	struct blk_mq_hw_ctx *hctx;
};

static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data,
		struct request_queue *q, unsigned int flags,
		struct blk_mq_ctx *ctx, struct blk_mq_hw_ctx *hctx)
{
	data->q = q;
	data->flags = flags;
	data->ctx = ctx;
	data->hctx = hctx;
}

static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx)
{
	return hctx->nr_ctx && hctx->tags;
}

#endif

Showing with 0 additions and 0 deletions (0 / 0 diffs computed)

Computing file changes ...