Revision d9724772e69cb8076231202292665ca74eec13e1 authored by Or Gerlitz on 22 December 2016, 12:28:15 UTC, committed by David S. Miller on 23 December 2016, 16:59:56 UTC
When matching on flags, we should require the user to provide the
mask and avoid using an all-ones mask. Not doing so causes matching
on flags provided w.o mask to hit on the value being unset for all
flags, which may not what the user wanted to happen.

Fixes: faa3ffce7829 ('net/sched: cls_flower: Add support for matching on flags')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reported-by: Paul Blakey <paulb@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent dc594ec
Raw File
offwaketime_kern.c
/* Copyright (c) 2016 Facebook
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 */
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"
#include <uapi/linux/ptrace.h>
#include <uapi/linux/perf_event.h>
#include <linux/version.h>
#include <linux/sched.h>

#define _(P) ({typeof(P) val; bpf_probe_read(&val, sizeof(val), &P); val;})

#define MINBLOCK_US	1

struct key_t {
	char waker[TASK_COMM_LEN];
	char target[TASK_COMM_LEN];
	u32 wret;
	u32 tret;
};

struct bpf_map_def SEC("maps") counts = {
	.type = BPF_MAP_TYPE_HASH,
	.key_size = sizeof(struct key_t),
	.value_size = sizeof(u64),
	.max_entries = 10000,
};

struct bpf_map_def SEC("maps") start = {
	.type = BPF_MAP_TYPE_HASH,
	.key_size = sizeof(u32),
	.value_size = sizeof(u64),
	.max_entries = 10000,
};

struct wokeby_t {
	char name[TASK_COMM_LEN];
	u32 ret;
};

struct bpf_map_def SEC("maps") wokeby = {
	.type = BPF_MAP_TYPE_HASH,
	.key_size = sizeof(u32),
	.value_size = sizeof(struct wokeby_t),
	.max_entries = 10000,
};

struct bpf_map_def SEC("maps") stackmap = {
	.type = BPF_MAP_TYPE_STACK_TRACE,
	.key_size = sizeof(u32),
	.value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
	.max_entries = 10000,
};

#define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)

SEC("kprobe/try_to_wake_up")
int waker(struct pt_regs *ctx)
{
	struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
	struct wokeby_t woke;
	u32 pid;

	pid = _(p->pid);

	bpf_get_current_comm(&woke.name, sizeof(woke.name));
	woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);

	bpf_map_update_elem(&wokeby, &pid, &woke, BPF_ANY);
	return 0;
}

static inline int update_counts(void *ctx, u32 pid, u64 delta)
{
	struct wokeby_t *woke;
	u64 zero = 0, *val;
	struct key_t key;

	__builtin_memset(&key.waker, 0, sizeof(key.waker));
	bpf_get_current_comm(&key.target, sizeof(key.target));
	key.tret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS);
	key.wret = 0;

	woke = bpf_map_lookup_elem(&wokeby, &pid);
	if (woke) {
		key.wret = woke->ret;
		__builtin_memcpy(&key.waker, woke->name, sizeof(key.waker));
		bpf_map_delete_elem(&wokeby, &pid);
	}

	val = bpf_map_lookup_elem(&counts, &key);
	if (!val) {
		bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST);
		val = bpf_map_lookup_elem(&counts, &key);
		if (!val)
			return 0;
	}
	(*val) += delta;
	return 0;
}

#if 1
/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
struct sched_switch_args {
	unsigned long long pad;
	char prev_comm[16];
	int prev_pid;
	int prev_prio;
	long long prev_state;
	char next_comm[16];
	int next_pid;
	int next_prio;
};
SEC("tracepoint/sched/sched_switch")
int oncpu(struct sched_switch_args *ctx)
{
	/* record previous thread sleep time */
	u32 pid = ctx->prev_pid;
#else
SEC("kprobe/finish_task_switch")
int oncpu(struct pt_regs *ctx)
{
	struct task_struct *p = (void *) PT_REGS_PARM1(ctx);
	/* record previous thread sleep time */
	u32 pid = _(p->pid);
#endif
	u64 delta, ts, *tsp;

	ts = bpf_ktime_get_ns();
	bpf_map_update_elem(&start, &pid, &ts, BPF_ANY);

	/* calculate current thread's delta time */
	pid = bpf_get_current_pid_tgid();
	tsp = bpf_map_lookup_elem(&start, &pid);
	if (!tsp)
		/* missed start or filtered */
		return 0;

	delta = bpf_ktime_get_ns() - *tsp;
	bpf_map_delete_elem(&start, &pid);
	delta = delta / 1000;
	if (delta < MINBLOCK_US)
		return 0;

	return update_counts(ctx, pid, delta);
}
char _license[] SEC("license") = "GPL";
u32 _version SEC("version") = LINUX_VERSION_CODE;
back to top