Revision c403f6a3a792a6601185497c12b0bdf4be880439 authored by Qian Cai on 15 August 2020, 00:31:53 UTC, committed by Linus Torvalds on 15 August 2020, 02:56:57 UTC
 BUG: KCSAN: data-race in page_cpupid_xchg_last / put_page

 write (marked) to 0xfffffc0d48ec1a00 of 8 bytes by task 91442 on cpu 3:
  page_cpupid_xchg_last+0x51/0x80
  page_cpupid_xchg_last at mm/mmzone.c:109 (discriminator 11)
  wp_page_reuse+0x3e/0xc0
  wp_page_reuse at mm/memory.c:2453
  do_wp_page+0x472/0x7b0
  do_wp_page at mm/memory.c:2798
  __handle_mm_fault+0xcb0/0xd00
  handle_pte_fault at mm/memory.c:4049
  (inlined by) __handle_mm_fault at mm/memory.c:4163
  handle_mm_fault+0xfc/0x2f0
  handle_mm_fault at mm/memory.c:4200
  do_page_fault+0x263/0x6f9
  do_user_addr_fault at arch/x86/mm/fault.c:1465
  (inlined by) do_page_fault at arch/x86/mm/fault.c:1539
  page_fault+0x34/0x40

 read to 0xfffffc0d48ec1a00 of 8 bytes by task 94817 on cpu 69:
  put_page+0x15a/0x1f0
  page_zonenum at include/linux/mm.h:923
  (inlined by) is_zone_device_page at include/linux/mm.h:929
  (inlined by) page_is_devmap_managed at include/linux/mm.h:948
  (inlined by) put_page at include/linux/mm.h:1023
  wp_page_copy+0x571/0x930
  wp_page_copy at mm/memory.c:2615
  do_wp_page+0x107/0x7b0
  __handle_mm_fault+0xcb0/0xd00
  handle_mm_fault+0xfc/0x2f0
  do_page_fault+0x263/0x6f9
  page_fault+0x34/0x40

 Reported by Kernel Concurrency Sanitizer on:
 CPU: 69 PID: 94817 Comm: systemd-udevd Tainted: G        W  O L 5.5.0-next-20200204+ #6
 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019

A page never changes its zone number. The zone number happens to be
stored in the same word as other bits which are modified, but the zone
number bits will never be modified by any other write, so it can accept
a reload of the zone bits after an intervening write and it don't need
to use READ_ONCE(). Thus, annotate this data race using
ASSERT_EXCLUSIVE_BITS() to also assert that there are no concurrent
writes to it.

Suggested-by: Marco Elver <elver@google.com>
Signed-off-by: Qian Cai <cai@lca.pw>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Link: http://lkml.kernel.org/r/1581619089-14472-1-git-send-email-cai@lca.pw
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 7e0cc01
Raw File
pie.h
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __NET_SCHED_PIE_H
#define __NET_SCHED_PIE_H

#include <linux/ktime.h>
#include <linux/skbuff.h>
#include <linux/types.h>
#include <net/inet_ecn.h>
#include <net/pkt_sched.h>

#define MAX_PROB	(U64_MAX >> BITS_PER_BYTE)
#define DTIME_INVALID	U64_MAX
#define QUEUE_THRESHOLD	16384
#define DQCOUNT_INVALID	-1
#define PIE_SCALE	8

/**
 * struct pie_params - contains pie parameters
 * @target:		target delay in pschedtime
 * @tudpate:		interval at which drop probability is calculated
 * @limit:		total number of packets that can be in the queue
 * @alpha:		parameter to control drop probability
 * @beta:		parameter to control drop probability
 * @ecn:		is ECN marking of packets enabled
 * @bytemode:		is drop probability scaled based on pkt size
 * @dq_rate_estimator:	is Little's law used for qdelay calculation
 */
struct pie_params {
	psched_time_t target;
	u32 tupdate;
	u32 limit;
	u32 alpha;
	u32 beta;
	u8 ecn;
	u8 bytemode;
	u8 dq_rate_estimator;
};

/**
 * struct pie_vars - contains pie variables
 * @qdelay:		current queue delay
 * @qdelay_old:		queue delay in previous qdelay calculation
 * @burst_time:		burst time allowance
 * @dq_tstamp:		timestamp at which dq rate was last calculated
 * @prob:		drop probability
 * @accu_prob:		accumulated drop probability
 * @dq_count:		number of bytes dequeued in a measurement cycle
 * @avg_dq_rate:	calculated average dq rate
 * @backlog_old:	queue backlog during previous qdelay calculation
 */
struct pie_vars {
	psched_time_t qdelay;
	psched_time_t qdelay_old;
	psched_time_t burst_time;
	psched_time_t dq_tstamp;
	u64 prob;
	u64 accu_prob;
	u64 dq_count;
	u32 avg_dq_rate;
	u32 backlog_old;
};

/**
 * struct pie_stats - contains pie stats
 * @packets_in:	total number of packets enqueued
 * @dropped:	packets dropped due to pie action
 * @overlimit:	packets dropped due to lack of space in queue
 * @ecn_mark:	packets marked with ECN
 * @maxq:	maximum queue size
 */
struct pie_stats {
	u32 packets_in;
	u32 dropped;
	u32 overlimit;
	u32 ecn_mark;
	u32 maxq;
};

/**
 * struct pie_skb_cb - contains private skb vars
 * @enqueue_time:	timestamp when the packet is enqueued
 * @mem_usage:		size of the skb during enqueue
 */
struct pie_skb_cb {
	psched_time_t enqueue_time;
	u32 mem_usage;
};

static inline void pie_params_init(struct pie_params *params)
{
	params->target = PSCHED_NS2TICKS(15 * NSEC_PER_MSEC);	/* 15 ms */
	params->tupdate = usecs_to_jiffies(15 * USEC_PER_MSEC);	/* 15 ms */
	params->limit = 1000;
	params->alpha = 2;
	params->beta = 20;
	params->ecn = false;
	params->bytemode = false;
	params->dq_rate_estimator = false;
}

static inline void pie_vars_init(struct pie_vars *vars)
{
	vars->burst_time = PSCHED_NS2TICKS(150 * NSEC_PER_MSEC); /* 150 ms */
	vars->dq_tstamp = DTIME_INVALID;
	vars->accu_prob = 0;
	vars->dq_count = DQCOUNT_INVALID;
	vars->avg_dq_rate = 0;
}

static inline struct pie_skb_cb *get_pie_cb(const struct sk_buff *skb)
{
	qdisc_cb_private_validate(skb, sizeof(struct pie_skb_cb));
	return (struct pie_skb_cb *)qdisc_skb_cb(skb)->data;
}

static inline psched_time_t pie_get_enqueue_time(const struct sk_buff *skb)
{
	return get_pie_cb(skb)->enqueue_time;
}

static inline void pie_set_enqueue_time(struct sk_buff *skb)
{
	get_pie_cb(skb)->enqueue_time = psched_get_time();
}

bool pie_drop_early(struct Qdisc *sch, struct pie_params *params,
		    struct pie_vars *vars, u32 backlog, u32 packet_size);

void pie_process_dequeue(struct sk_buff *skb, struct pie_params *params,
			 struct pie_vars *vars, u32 backlog);

void pie_calculate_probability(struct pie_params *params, struct pie_vars *vars,
			       u32 backlog);

#endif
back to top