Revision 88e8ac11d2ea3acc003cf01bb5a38c8aa76c3cfd authored by Charan Teja Reddy on 21 August 2020, 00:42:27 UTC, committed by Linus Torvalds on 21 August 2020, 16:52:53 UTC
The following race is observed with the repeated online, offline and a
delay between two successive online of memory blocks of movable zone.

P1						P2

Online the first memory block in
the movable zone. The pcp struct
values are initialized to default
values,i.e., pcp->high = 0 &
pcp->batch = 1.

					Allocate the pages from the
					movable zone.

Try to Online the second memory
block in the movable zone thus it
entered the online_pages() but yet
to call zone_pcp_update().
					This process is entered into
					the exit path thus it tries
					to release the order-0 pages
					to pcp lists through
					free_unref_page_commit().
					As pcp->high = 0, pcp->count = 1
					proceed to call the function
					free_pcppages_bulk().
Update the pcp values thus the
new pcp values are like, say,
pcp->high = 378, pcp->batch = 63.
					Read the pcp's batch value using
					READ_ONCE() and pass the same to
					free_pcppages_bulk(), pcp values
					passed here are, batch = 63,
					count = 1.

					Since num of pages in the pcp
					lists are less than ->batch,
					then it will stuck in
					while(list_empty(list)) loop
					with interrupts disabled thus
					a core hung.

Avoid this by ensuring free_pcppages_bulk() is called with proper count of
pcp list pages.

The mentioned race is some what easily reproducible without [1] because
pcp's are not updated for the first memory block online and thus there is
a enough race window for P2 between alloc+free and pcp struct values
update through onlining of second memory block.

With [1], the race still exists but it is very narrow as we update the pcp
struct values for the first memory block online itself.

This is not limited to the movable zone, it could also happen in cases
with the normal zone (e.g., hotplug to a node that only has DMA memory, or
no other memory yet).

[1]: https://patchwork.kernel.org/patch/11696389/

Fixes: 5f8dcc21211a ("page-allocator: split per-cpu list into one-list-per-migrate-type")
Signed-off-by: Charan Teja Reddy <charante@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Vinayak Menon <vinmenon@codeaurora.org>
Cc: <stable@vger.kernel.org> [2.6+]
Link: http://lkml.kernel.org/r/1597150703-19003-1-git-send-email-charante@codeaurora.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent e08d3fd
Raw File
x25_in.c
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *	X.25 Packet Layer release 002
 *
 *	This is ALPHA test software. This code may break your machine,
 *	randomly fail to work with new releases, misbehave and/or generally
 *	screw up. It might even work.
 *
 *	This code REQUIRES 2.1.15 or higher
 *
 *	History
 *	X.25 001	Jonathan Naylor	  Started coding.
 *	X.25 002	Jonathan Naylor	  Centralised disconnection code.
 *					  New timer architecture.
 *	2000-03-20	Daniela Squassoni Disabling/enabling of facilities
 *					  negotiation.
 *	2000-11-10	Henner Eisen	  Check and reset for out-of-sequence
 *					  i-frames.
 */

#define pr_fmt(fmt) "X25: " fmt

#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/tcp_states.h>
#include <net/x25.h>

static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
{
	struct sk_buff *skbo, *skbn = skb;
	struct x25_sock *x25 = x25_sk(sk);

	if (more) {
		x25->fraglen += skb->len;
		skb_queue_tail(&x25->fragment_queue, skb);
		skb_set_owner_r(skb, sk);
		return 0;
	}

	if (!more && x25->fraglen > 0) {	/* End of fragment */
		int len = x25->fraglen + skb->len;

		if ((skbn = alloc_skb(len, GFP_ATOMIC)) == NULL){
			kfree_skb(skb);
			return 1;
		}

		skb_queue_tail(&x25->fragment_queue, skb);

		skb_reset_transport_header(skbn);

		skbo = skb_dequeue(&x25->fragment_queue);
		skb_copy_from_linear_data(skbo, skb_put(skbn, skbo->len),
					  skbo->len);
		kfree_skb(skbo);

		while ((skbo =
			skb_dequeue(&x25->fragment_queue)) != NULL) {
			skb_pull(skbo, (x25->neighbour->extended) ?
					X25_EXT_MIN_LEN : X25_STD_MIN_LEN);
			skb_copy_from_linear_data(skbo,
						  skb_put(skbn, skbo->len),
						  skbo->len);
			kfree_skb(skbo);
		}

		x25->fraglen = 0;
	}

	skb_set_owner_r(skbn, sk);
	skb_queue_tail(&sk->sk_receive_queue, skbn);
	if (!sock_flag(sk, SOCK_DEAD))
		sk->sk_data_ready(sk);

	return 0;
}

/*
 * State machine for state 1, Awaiting Call Accepted State.
 * The handling of the timer(s) is in file x25_timer.c.
 * Handling of state 0 and connection release is in af_x25.c.
 */
static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametype)
{
	struct x25_address source_addr, dest_addr;
	int len;
	struct x25_sock *x25 = x25_sk(sk);

	switch (frametype) {
	case X25_CALL_ACCEPTED: {

		x25_stop_timer(sk);
		x25->condition = 0x00;
		x25->vs        = 0;
		x25->va        = 0;
		x25->vr        = 0;
		x25->vl        = 0;
		x25->state     = X25_STATE_3;
		sk->sk_state   = TCP_ESTABLISHED;
		/*
		 *	Parse the data in the frame.
		 */
		if (!pskb_may_pull(skb, X25_STD_MIN_LEN))
			goto out_clear;
		skb_pull(skb, X25_STD_MIN_LEN);

		len = x25_parse_address_block(skb, &source_addr,
					      &dest_addr);
		if (len > 0)
			skb_pull(skb, len);
		else if (len < 0)
			goto out_clear;

		len = x25_parse_facilities(skb, &x25->facilities,
					   &x25->dte_facilities,
					   &x25->vc_facil_mask);
		if (len > 0)
			skb_pull(skb, len);
		else if (len < 0)
			goto out_clear;
		/*
		 *	Copy any Call User Data.
		 */
		if (skb->len > 0) {
			if (skb->len > X25_MAX_CUD_LEN)
				goto out_clear;

			skb_copy_bits(skb, 0, x25->calluserdata.cuddata,
				skb->len);
			x25->calluserdata.cudlength = skb->len;
		}
		if (!sock_flag(sk, SOCK_DEAD))
			sk->sk_state_change(sk);
		break;
	}
	case X25_CALL_REQUEST:
		/* call collision */
		x25->causediag.cause      = 0x01;
		x25->causediag.diagnostic = 0x48;

		x25_write_internal(sk, X25_CLEAR_REQUEST);
		x25_disconnect(sk, EISCONN, 0x01, 0x48);
		break;

	case X25_CLEAR_REQUEST:
		if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2))
			goto out_clear;

		x25_write_internal(sk, X25_CLEAR_CONFIRMATION);
		x25_disconnect(sk, ECONNREFUSED, skb->data[3], skb->data[4]);
		break;

	default:
		break;
	}

	return 0;

out_clear:
	x25_write_internal(sk, X25_CLEAR_REQUEST);
	x25->state = X25_STATE_2;
	x25_start_t23timer(sk);
	return 0;
}

/*
 * State machine for state 2, Awaiting Clear Confirmation State.
 * The handling of the timer(s) is in file x25_timer.c
 * Handling of state 0 and connection release is in af_x25.c.
 */
static int x25_state2_machine(struct sock *sk, struct sk_buff *skb, int frametype)
{
	switch (frametype) {

		case X25_CLEAR_REQUEST:
			if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2))
				goto out_clear;

			x25_write_internal(sk, X25_CLEAR_CONFIRMATION);
			x25_disconnect(sk, 0, skb->data[3], skb->data[4]);
			break;

		case X25_CLEAR_CONFIRMATION:
			x25_disconnect(sk, 0, 0, 0);
			break;

		default:
			break;
	}

	return 0;

out_clear:
	x25_write_internal(sk, X25_CLEAR_REQUEST);
	x25_start_t23timer(sk);
	return 0;
}

/*
 * State machine for state 3, Connected State.
 * The handling of the timer(s) is in file x25_timer.c
 * Handling of state 0 and connection release is in af_x25.c.
 */
static int x25_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype, int ns, int nr, int q, int d, int m)
{
	int queued = 0;
	int modulus;
	struct x25_sock *x25 = x25_sk(sk);

	modulus = (x25->neighbour->extended) ? X25_EMODULUS : X25_SMODULUS;

	switch (frametype) {

		case X25_RESET_REQUEST:
			x25_write_internal(sk, X25_RESET_CONFIRMATION);
			x25_stop_timer(sk);
			x25->condition = 0x00;
			x25->vs        = 0;
			x25->vr        = 0;
			x25->va        = 0;
			x25->vl        = 0;
			x25_requeue_frames(sk);
			break;

		case X25_CLEAR_REQUEST:
			if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2))
				goto out_clear;

			x25_write_internal(sk, X25_CLEAR_CONFIRMATION);
			x25_disconnect(sk, 0, skb->data[3], skb->data[4]);
			break;

		case X25_RR:
		case X25_RNR:
			if (!x25_validate_nr(sk, nr)) {
				x25_clear_queues(sk);
				x25_write_internal(sk, X25_RESET_REQUEST);
				x25_start_t22timer(sk);
				x25->condition = 0x00;
				x25->vs        = 0;
				x25->vr        = 0;
				x25->va        = 0;
				x25->vl        = 0;
				x25->state     = X25_STATE_4;
			} else {
				x25_frames_acked(sk, nr);
				if (frametype == X25_RNR) {
					x25->condition |= X25_COND_PEER_RX_BUSY;
				} else {
					x25->condition &= ~X25_COND_PEER_RX_BUSY;
				}
			}
			break;

		case X25_DATA:	/* XXX */
			x25->condition &= ~X25_COND_PEER_RX_BUSY;
			if ((ns != x25->vr) || !x25_validate_nr(sk, nr)) {
				x25_clear_queues(sk);
				x25_write_internal(sk, X25_RESET_REQUEST);
				x25_start_t22timer(sk);
				x25->condition = 0x00;
				x25->vs        = 0;
				x25->vr        = 0;
				x25->va        = 0;
				x25->vl        = 0;
				x25->state     = X25_STATE_4;
				break;
			}
			x25_frames_acked(sk, nr);
			if (ns == x25->vr) {
				if (x25_queue_rx_frame(sk, skb, m) == 0) {
					x25->vr = (x25->vr + 1) % modulus;
					queued = 1;
				} else {
					/* Should never happen */
					x25_clear_queues(sk);
					x25_write_internal(sk, X25_RESET_REQUEST);
					x25_start_t22timer(sk);
					x25->condition = 0x00;
					x25->vs        = 0;
					x25->vr        = 0;
					x25->va        = 0;
					x25->vl        = 0;
					x25->state     = X25_STATE_4;
					break;
				}
				if (atomic_read(&sk->sk_rmem_alloc) >
				    (sk->sk_rcvbuf >> 1))
					x25->condition |= X25_COND_OWN_RX_BUSY;
			}
			/*
			 *	If the window is full Ack it immediately, else
			 *	start the holdback timer.
			 */
			if (((x25->vl + x25->facilities.winsize_in) % modulus) == x25->vr) {
				x25->condition &= ~X25_COND_ACK_PENDING;
				x25_stop_timer(sk);
				x25_enquiry_response(sk);
			} else {
				x25->condition |= X25_COND_ACK_PENDING;
				x25_start_t2timer(sk);
			}
			break;

		case X25_INTERRUPT_CONFIRMATION:
			clear_bit(X25_INTERRUPT_FLAG, &x25->flags);
			break;

		case X25_INTERRUPT:
			if (sock_flag(sk, SOCK_URGINLINE))
				queued = !sock_queue_rcv_skb(sk, skb);
			else {
				skb_set_owner_r(skb, sk);
				skb_queue_tail(&x25->interrupt_in_queue, skb);
				queued = 1;
			}
			sk_send_sigurg(sk);
			x25_write_internal(sk, X25_INTERRUPT_CONFIRMATION);
			break;

		default:
			pr_warn("unknown %02X in state 3\n", frametype);
			break;
	}

	return queued;

out_clear:
	x25_write_internal(sk, X25_CLEAR_REQUEST);
	x25->state = X25_STATE_2;
	x25_start_t23timer(sk);
	return 0;
}

/*
 * State machine for state 4, Awaiting Reset Confirmation State.
 * The handling of the timer(s) is in file x25_timer.c
 * Handling of state 0 and connection release is in af_x25.c.
 */
static int x25_state4_machine(struct sock *sk, struct sk_buff *skb, int frametype)
{
	struct x25_sock *x25 = x25_sk(sk);

	switch (frametype) {

		case X25_RESET_REQUEST:
			x25_write_internal(sk, X25_RESET_CONFIRMATION);
			/* fall through */
		case X25_RESET_CONFIRMATION: {
			x25_stop_timer(sk);
			x25->condition = 0x00;
			x25->va        = 0;
			x25->vr        = 0;
			x25->vs        = 0;
			x25->vl        = 0;
			x25->state     = X25_STATE_3;
			x25_requeue_frames(sk);
			break;
		}
		case X25_CLEAR_REQUEST:
			if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2))
				goto out_clear;

			x25_write_internal(sk, X25_CLEAR_CONFIRMATION);
			x25_disconnect(sk, 0, skb->data[3], skb->data[4]);
			break;

		default:
			break;
	}

	return 0;

out_clear:
	x25_write_internal(sk, X25_CLEAR_REQUEST);
	x25->state = X25_STATE_2;
	x25_start_t23timer(sk);
	return 0;
}

/*
 * State machine for state 5, Call Accepted / Call Connected pending (X25_ACCPT_APPRV_FLAG).
 * The handling of the timer(s) is in file x25_timer.c
 * Handling of state 0 and connection release is in af_x25.c.
 */
static int x25_state5_machine(struct sock *sk, struct sk_buff *skb, int frametype)
{
	struct x25_sock *x25 = x25_sk(sk);

	switch (frametype) {
		case X25_CLEAR_REQUEST:
			if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2)) {
				x25_write_internal(sk, X25_CLEAR_REQUEST);
				x25->state = X25_STATE_2;
				x25_start_t23timer(sk);
				return 0;
			}

			x25_write_internal(sk, X25_CLEAR_CONFIRMATION);
			x25_disconnect(sk, 0, skb->data[3], skb->data[4]);
			break;

		default:
			break;
	}

	return 0;
}

/* Higher level upcall for a LAPB frame */
int x25_process_rx_frame(struct sock *sk, struct sk_buff *skb)
{
	struct x25_sock *x25 = x25_sk(sk);
	int queued = 0, frametype, ns, nr, q, d, m;

	if (x25->state == X25_STATE_0)
		return 0;

	frametype = x25_decode(sk, skb, &ns, &nr, &q, &d, &m);

	switch (x25->state) {
	case X25_STATE_1:
		queued = x25_state1_machine(sk, skb, frametype);
		break;
	case X25_STATE_2:
		queued = x25_state2_machine(sk, skb, frametype);
		break;
	case X25_STATE_3:
		queued = x25_state3_machine(sk, skb, frametype, ns, nr, q, d, m);
		break;
	case X25_STATE_4:
		queued = x25_state4_machine(sk, skb, frametype);
		break;
	case X25_STATE_5:
		queued = x25_state5_machine(sk, skb, frametype);
		break;
	}

	x25_kick(sk);

	return queued;
}

int x25_backlog_rcv(struct sock *sk, struct sk_buff *skb)
{
	int queued = x25_process_rx_frame(sk, skb);

	if (!queued)
		kfree_skb(skb);

	return 0;
}
back to top