Revision 171936ddaf97e6f4e1264f4128bb5cf15691339c authored by Tony Luck on 25 June 2021, 01:39:55 UTC, committed by Linus Torvalds on 25 June 2021, 02:40:54 UTC
Patch series "mm,hwpoison: fix sending SIGBUS for Action Required MCE", v5.

I wrote this patchset to materialize what I think is the current
allowable solution mentioned by the previous discussion [1].  I simply
borrowed Tony's mutex patch and Aili's return code patch, then I queued
another one to find error virtual address in the best effort manner.  I
know that this is not a perfect solution, but should work for some
typical case.

[1]: https://lore.kernel.org/linux-mm/20210331192540.2141052f@alex-virtual-machine/

This patch (of 2):

There can be races when multiple CPUs consume poison from the same page.
The first into memory_failure() atomically sets the HWPoison page flag
and begins hunting for tasks that map this page.  Eventually it
invalidates those mappings and may send a SIGBUS to the affected tasks.

But while all that work is going on, other CPUs see a "success" return
code from memory_failure() and so they believe the error has been
handled and continue executing.

Fix by wrapping most of the internal parts of memory_failure() in a
mutex.

[akpm@linux-foundation.org: make mf_mutex local to memory_failure()]

Link: https://lkml.kernel.org/r/20210521030156.2612074-1-nao.horiguchi@gmail.com
Link: https://lkml.kernel.org/r/20210521030156.2612074-2-nao.horiguchi@gmail.com
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Aili Yao <yaoaili@kingsoft.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jue Wang <juew@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent fe19bd3
Raw File
lec_arpc.h
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Lec arp cache
 *
 * Marko Kiiskila <mkiiskila@yahoo.com>
 */
#ifndef _LEC_ARP_H_
#define _LEC_ARP_H_
#include <linux/atm.h>
#include <linux/atmdev.h>
#include <linux/if_ether.h>
#include <linux/atmlec.h>

struct lec_arp_table {
	struct hlist_node next;		/* Linked entry list */
	unsigned char atm_addr[ATM_ESA_LEN];	/* Atm address */
	unsigned char mac_addr[ETH_ALEN];	/* Mac address */
	int is_rdesc;			/* Mac address is a route descriptor */
	struct atm_vcc *vcc;		/* Vcc this entry is attached */
	struct atm_vcc *recv_vcc;	/* Vcc we receive data from */

	void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb);
					/* Push that leads to daemon */

	void (*old_recv_push) (struct atm_vcc *vcc, struct sk_buff *skb);
					/* Push that leads to daemon */

	unsigned long last_used;	/* For expiry */
	unsigned long timestamp;	/* Used for various timestamping things:
					 * 1. FLUSH started
					 *    (status=ESI_FLUSH_PENDING)
					 * 2. Counting to
					 *    max_unknown_frame_time
					 *    (status=ESI_ARP_PENDING||
					 *     status=ESI_VC_PENDING)
					 */
	unsigned char no_tries;		/* No of times arp retry has been tried */
	unsigned char status;		/* Status of this entry */
	unsigned short flags;		/* Flags for this entry */
	unsigned short packets_flooded;	/* Data packets flooded */
	unsigned long flush_tran_id;	/* Transaction id in flush protocol */
	struct timer_list timer;	/* Arping timer */
	struct lec_priv *priv;		/* Pointer back */
	u8 *tlvs;
	u32 sizeoftlvs;			/*
					 * LANE2: Each MAC address can have TLVs
					 * associated with it.  sizeoftlvs tells
					 * the length of the tlvs array
					 */
	struct sk_buff_head tx_wait;	/* wait queue for outgoing packets */
	refcount_t usage;		/* usage count */
};

/*
 * LANE2: Template tlv struct for accessing
 * the tlvs in the lec_arp_table->tlvs array
 */
struct tlv {
	u32 type;
	u8 length;
	u8 value[255];
};

/* Status fields */
#define ESI_UNKNOWN 0		/*
				 * Next packet sent to this mac address
				 * causes ARP-request to be sent
				 */
#define ESI_ARP_PENDING 1	/*
				 * There is no ATM address associated with this
				 * 48-bit address.  The LE-ARP protocol is in
				 * progress.
				 */
#define ESI_VC_PENDING 2	/*
				 * There is a valid ATM address associated with
				 * this 48-bit address but there is no VC set
				 * up to that ATM address.  The signaling
				 * protocol is in process.
				 */
#define ESI_FLUSH_PENDING 4	/*
				 * The LEC has been notified of the FLUSH_START
				 * status and it is assumed that the flush
				 * protocol is in process.
				 */
#define ESI_FORWARD_DIRECT 5	/*
				 * Either the Path Switching Delay (C22) has
				 * elapsed or the LEC has notified the Mapping
				 * that the flush protocol has completed.  In
				 * either case, it is safe to forward packets
				 * to this address via the data direct VC.
				 */

/* Flag values */
#define LEC_REMOTE_FLAG      0x0001
#define LEC_PERMANENT_FLAG   0x0002

#endif /* _LEC_ARP_H_ */
back to top