Revision - 2548d54 - nohz/full, sched/rt: Fix missed tick-reenabling bug in [...]

Revision 2548d546d40c0014efdde88a53bf7896e917dcce authored by Peter Zijlstra on 21 April 2016, 16:03:15 UTC, committed by Ingo Molnar on 28 April 2016, 08:28:55 UTC

nohz/full, sched/rt: Fix missed tick-reenabling bug in sched_can_stop_tick()

Chris Metcalf reported a that sched_can_stop_tick() sometimes fails to
re-enable the tick.

His observed problem is that rq->cfs.nr_running can be 1 even though
there are multiple runnable CFS tasks. This happens in the cgroup
case, in which case cfs.nr_running is the number of runnable entities
for that level.

If there is a single runnable cgroup (which can have an arbitrary
number of runnable child entries itself) rq->cfs.nr_running will be 1.

However, looking at that function I think there's more problems with it.

It seems to assume that if there's FIFO tasks, those will run. This is
incorrect. The FIFO task can have a lower prio than an RR task, in which
case the RR task will run.

So the whole fifo_nr_running test seems misplaced, it should go after
the rr_nr_running tests. That is, only if !rr_nr_running, can we use
fifo_nr_running like this.

Reported-by: Chris Metcalf <cmetcalf@mellanox.com>
Tested-by: Chris Metcalf <cmetcalf@mellanox.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Wanpeng Li <kernellwp@gmail.com>
Fixes: 76d92ac305f2 ("sched: Migrate sched to use new tick dependency mask model")
Link: http://lkml.kernel.org/r/20160421160315.GK24771@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>

1 parent 02da2d7

Files
Changes

Permalinks

blk-mq.h

#ifndef INT_BLK_MQ_H
#define INT_BLK_MQ_H

struct blk_mq_tag_set;

struct blk_mq_ctx {
	struct {
		spinlock_t		lock;
		struct list_head	rq_list;
	}  ____cacheline_aligned_in_smp;

	unsigned int		cpu;
	unsigned int		index_hw;

	unsigned int		last_tag ____cacheline_aligned_in_smp;

	/* incremented at dispatch time */
	unsigned long		rq_dispatched[2];
	unsigned long		rq_merged;

	/* incremented at completion time */
	unsigned long		____cacheline_aligned_in_smp rq_completed[2];

	struct request_queue	*queue;
	struct kobject		kobj;
} ____cacheline_aligned_in_smp;

void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_freeze_queue(struct request_queue *q);
void blk_mq_free_queue(struct request_queue *q);
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
void blk_mq_wake_waiters(struct request_queue *q);

/*
 * CPU hotplug helpers
 */
struct blk_mq_cpu_notifier;
void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
			      int (*fn)(void *, unsigned long, unsigned int),
			      void *data);
void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier);
void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier);
void blk_mq_cpu_init(void);
void blk_mq_enable_hotplug(void);
void blk_mq_disable_hotplug(void);

/*
 * CPU -> queue mappings
 */
extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set);
extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues,
				   const struct cpumask *online_mask);
extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int);

/*
 * sysfs helpers
 */
extern int blk_mq_sysfs_register(struct request_queue *q);
extern void blk_mq_sysfs_unregister(struct request_queue *q);
extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);

extern void blk_mq_rq_timed_out(struct request *req, bool reserved);

void blk_mq_release(struct request_queue *q);

/*
 * Basic implementation of sparser bitmap, allowing the user to spread
 * the bits over more cachelines.
 */
struct blk_align_bitmap {
	unsigned long word;
	unsigned long depth;
} ____cacheline_aligned_in_smp;

static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
					   unsigned int cpu)
{
	return per_cpu_ptr(q->queue_ctx, cpu);
}

/*
 * This assumes per-cpu software queueing queues. They could be per-node
 * as well, for instance. For now this is hardcoded as-is. Note that we don't
 * care about preemption, since we know the ctx's are persistent. This does
 * mean that we can't rely on ctx always matching the currently running CPU.
 */
static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
{
	return __blk_mq_get_ctx(q, get_cpu());
}

static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
{
	put_cpu();
}

struct blk_mq_alloc_data {
	/* input parameter */
	struct request_queue *q;
	unsigned int flags;

	/* input & output parameter */
	struct blk_mq_ctx *ctx;
	struct blk_mq_hw_ctx *hctx;
};

static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data,
		struct request_queue *q, unsigned int flags,
		struct blk_mq_ctx *ctx, struct blk_mq_hw_ctx *hctx)
{
	data->q = q;
	data->flags = flags;
	data->ctx = ctx;
	data->hctx = hctx;
}

static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx)
{
	return hctx->nr_ctx && hctx->tags;
}

#endif

Showing with 0 additions and 0 deletions (0 / 0 diffs computed)

Computing file changes ...