Revision 171936ddaf97e6f4e1264f4128bb5cf15691339c authored by Tony Luck on 25 June 2021, 01:39:55 UTC, committed by Linus Torvalds on 25 June 2021, 02:40:54 UTC
Patch series "mm,hwpoison: fix sending SIGBUS for Action Required MCE", v5. I wrote this patchset to materialize what I think is the current allowable solution mentioned by the previous discussion [1]. I simply borrowed Tony's mutex patch and Aili's return code patch, then I queued another one to find error virtual address in the best effort manner. I know that this is not a perfect solution, but should work for some typical case. [1]: https://lore.kernel.org/linux-mm/20210331192540.2141052f@alex-virtual-machine/ This patch (of 2): There can be races when multiple CPUs consume poison from the same page. The first into memory_failure() atomically sets the HWPoison page flag and begins hunting for tasks that map this page. Eventually it invalidates those mappings and may send a SIGBUS to the affected tasks. But while all that work is going on, other CPUs see a "success" return code from memory_failure() and so they believe the error has been handled and continue executing. Fix by wrapping most of the internal parts of memory_failure() in a mutex. [akpm@linux-foundation.org: make mf_mutex local to memory_failure()] Link: https://lkml.kernel.org/r/20210521030156.2612074-1-nao.horiguchi@gmail.com Link: https://lkml.kernel.org/r/20210521030156.2612074-2-nao.horiguchi@gmail.com Signed-off-by: Tony Luck <tony.luck@intel.com> Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com> Reviewed-by: Borislav Petkov <bp@suse.de> Reviewed-by: Oscar Salvador <osalvador@suse.de> Cc: Aili Yao <yaoaili@kingsoft.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: David Hildenbrand <david@redhat.com> Cc: Jue Wang <juew@google.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent fe19bd3
lec_arpc.h
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Lec arp cache
*
* Marko Kiiskila <mkiiskila@yahoo.com>
*/
#ifndef _LEC_ARP_H_
#define _LEC_ARP_H_
#include <linux/atm.h>
#include <linux/atmdev.h>
#include <linux/if_ether.h>
#include <linux/atmlec.h>
struct lec_arp_table {
struct hlist_node next; /* Linked entry list */
unsigned char atm_addr[ATM_ESA_LEN]; /* Atm address */
unsigned char mac_addr[ETH_ALEN]; /* Mac address */
int is_rdesc; /* Mac address is a route descriptor */
struct atm_vcc *vcc; /* Vcc this entry is attached */
struct atm_vcc *recv_vcc; /* Vcc we receive data from */
void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb);
/* Push that leads to daemon */
void (*old_recv_push) (struct atm_vcc *vcc, struct sk_buff *skb);
/* Push that leads to daemon */
unsigned long last_used; /* For expiry */
unsigned long timestamp; /* Used for various timestamping things:
* 1. FLUSH started
* (status=ESI_FLUSH_PENDING)
* 2. Counting to
* max_unknown_frame_time
* (status=ESI_ARP_PENDING||
* status=ESI_VC_PENDING)
*/
unsigned char no_tries; /* No of times arp retry has been tried */
unsigned char status; /* Status of this entry */
unsigned short flags; /* Flags for this entry */
unsigned short packets_flooded; /* Data packets flooded */
unsigned long flush_tran_id; /* Transaction id in flush protocol */
struct timer_list timer; /* Arping timer */
struct lec_priv *priv; /* Pointer back */
u8 *tlvs;
u32 sizeoftlvs; /*
* LANE2: Each MAC address can have TLVs
* associated with it. sizeoftlvs tells
* the length of the tlvs array
*/
struct sk_buff_head tx_wait; /* wait queue for outgoing packets */
refcount_t usage; /* usage count */
};
/*
* LANE2: Template tlv struct for accessing
* the tlvs in the lec_arp_table->tlvs array
*/
struct tlv {
u32 type;
u8 length;
u8 value[255];
};
/* Status fields */
#define ESI_UNKNOWN 0 /*
* Next packet sent to this mac address
* causes ARP-request to be sent
*/
#define ESI_ARP_PENDING 1 /*
* There is no ATM address associated with this
* 48-bit address. The LE-ARP protocol is in
* progress.
*/
#define ESI_VC_PENDING 2 /*
* There is a valid ATM address associated with
* this 48-bit address but there is no VC set
* up to that ATM address. The signaling
* protocol is in process.
*/
#define ESI_FLUSH_PENDING 4 /*
* The LEC has been notified of the FLUSH_START
* status and it is assumed that the flush
* protocol is in process.
*/
#define ESI_FORWARD_DIRECT 5 /*
* Either the Path Switching Delay (C22) has
* elapsed or the LEC has notified the Mapping
* that the flush protocol has completed. In
* either case, it is safe to forward packets
* to this address via the data direct VC.
*/
/* Flag values */
#define LEC_REMOTE_FLAG 0x0001
#define LEC_PERMANENT_FLAG 0x0002
#endif /* _LEC_ARP_H_ */
Computing file changes ...