Revision 4e683f499a15cd777d3cb51aaebe48d72334c852 authored by David S. Miller on 23 September 2017, 03:33:18 UTC, committed by David S. Miller on 23 September 2017, 03:33:18 UTC
Josef Bacik says:

====================
net: fix reuseaddr regression

I introduced a regression when reworking the fastreuse port stuff that allows
bind conflicts to occur once a reuseaddr successfully opens on an existing tb.
The root cause is I reversed an if statement which caused us to set the tb as if
there were no owners on the socket if there were, which obviously is not
correct.

Dave could you please queue these changes up for -stable, I've run them through
the net tests and added another test to check for this problem specifically.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
2 parent s 581fe0e + fbed24b
Raw File
smc_close.c
/*
 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
 *
 *  Socket Closing - normal and abnormal
 *
 *  Copyright IBM Corp. 2016
 *
 *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
 */

#include <linux/workqueue.h>
#include <linux/sched/signal.h>

#include <net/sock.h>

#include "smc.h"
#include "smc_tx.h"
#include "smc_cdc.h"
#include "smc_close.h"

#define SMC_CLOSE_WAIT_TX_PENDS_TIME		(5 * HZ)

static void smc_close_cleanup_listen(struct sock *parent)
{
	struct sock *sk;

	/* Close non-accepted connections */
	while ((sk = smc_accept_dequeue(parent, NULL)))
		smc_close_non_accepted(sk);
}

static void smc_close_wait_tx_pends(struct smc_sock *smc)
{
	DEFINE_WAIT_FUNC(wait, woken_wake_function);
	struct sock *sk = &smc->sk;
	signed long timeout;

	timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME;
	add_wait_queue(sk_sleep(sk), &wait);
	while (!signal_pending(current) && timeout) {
		int rc;

		rc = sk_wait_event(sk, &timeout,
				   !smc_cdc_tx_has_pending(&smc->conn),
				   &wait);
		if (rc)
			break;
	}
	remove_wait_queue(sk_sleep(sk), &wait);
}

/* wait for sndbuf data being transmitted */
static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
{
	DEFINE_WAIT_FUNC(wait, woken_wake_function);
	struct sock *sk = &smc->sk;

	if (!timeout)
		return;

	if (!smc_tx_prepared_sends(&smc->conn))
		return;

	smc->wait_close_tx_prepared = 1;
	add_wait_queue(sk_sleep(sk), &wait);
	while (!signal_pending(current) && timeout) {
		int rc;

		rc = sk_wait_event(sk, &timeout,
				   !smc_tx_prepared_sends(&smc->conn) ||
				   (sk->sk_err == ECONNABORTED) ||
				   (sk->sk_err == ECONNRESET),
				   &wait);
		if (rc)
			break;
	}
	remove_wait_queue(sk_sleep(sk), &wait);
	smc->wait_close_tx_prepared = 0;
}

void smc_close_wake_tx_prepared(struct smc_sock *smc)
{
	if (smc->wait_close_tx_prepared)
		/* wake up socket closing */
		smc->sk.sk_state_change(&smc->sk);
}

static int smc_close_wr(struct smc_connection *conn)
{
	conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1;

	return smc_cdc_get_slot_and_msg_send(conn);
}

static int smc_close_final(struct smc_connection *conn)
{
	if (atomic_read(&conn->bytes_to_rcv))
		conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
	else
		conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1;

	return smc_cdc_get_slot_and_msg_send(conn);
}

static int smc_close_abort(struct smc_connection *conn)
{
	conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;

	return smc_cdc_get_slot_and_msg_send(conn);
}

/* terminate smc socket abnormally - active abort
 * RDMA communication no longer possible
 */
void smc_close_active_abort(struct smc_sock *smc)
{
	struct smc_cdc_conn_state_flags *txflags =
		&smc->conn.local_tx_ctrl.conn_state_flags;

	smc->sk.sk_err = ECONNABORTED;
	if (smc->clcsock && smc->clcsock->sk) {
		smc->clcsock->sk->sk_err = ECONNABORTED;
		smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
	}
	switch (smc->sk.sk_state) {
	case SMC_INIT:
	case SMC_ACTIVE:
		smc->sk.sk_state = SMC_PEERABORTWAIT;
		break;
	case SMC_APPCLOSEWAIT1:
	case SMC_APPCLOSEWAIT2:
		txflags->peer_conn_abort = 1;
		sock_release(smc->clcsock);
		if (!smc_cdc_rxed_any_close(&smc->conn))
			smc->sk.sk_state = SMC_PEERABORTWAIT;
		else
			smc->sk.sk_state = SMC_CLOSED;
		break;
	case SMC_PEERCLOSEWAIT1:
	case SMC_PEERCLOSEWAIT2:
		if (!txflags->peer_conn_closed) {
			smc->sk.sk_state = SMC_PEERABORTWAIT;
			txflags->peer_conn_abort = 1;
			sock_release(smc->clcsock);
		} else {
			smc->sk.sk_state = SMC_CLOSED;
		}
		break;
	case SMC_PROCESSABORT:
	case SMC_APPFINCLOSEWAIT:
		if (!txflags->peer_conn_closed) {
			txflags->peer_conn_abort = 1;
			sock_release(smc->clcsock);
		}
		smc->sk.sk_state = SMC_CLOSED;
		break;
	case SMC_PEERFINCLOSEWAIT:
	case SMC_PEERABORTWAIT:
	case SMC_CLOSED:
		break;
	}

	sock_set_flag(&smc->sk, SOCK_DEAD);
	smc->sk.sk_state_change(&smc->sk);
}

static inline bool smc_close_sent_any_close(struct smc_connection *conn)
{
	return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort ||
	       conn->local_tx_ctrl.conn_state_flags.peer_conn_closed;
}

int smc_close_active(struct smc_sock *smc)
{
	struct smc_cdc_conn_state_flags *txflags =
		&smc->conn.local_tx_ctrl.conn_state_flags;
	struct smc_connection *conn = &smc->conn;
	struct sock *sk = &smc->sk;
	int old_state;
	long timeout;
	int rc = 0;

	timeout = current->flags & PF_EXITING ?
		  0 : sock_flag(sk, SOCK_LINGER) ?
		      sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;

again:
	old_state = sk->sk_state;
	switch (old_state) {
	case SMC_INIT:
		sk->sk_state = SMC_CLOSED;
		if (smc->smc_listen_work.func)
			cancel_work_sync(&smc->smc_listen_work);
		break;
	case SMC_LISTEN:
		sk->sk_state = SMC_CLOSED;
		sk->sk_state_change(sk); /* wake up accept */
		if (smc->clcsock && smc->clcsock->sk) {
			rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
			/* wake up kernel_accept of smc_tcp_listen_worker */
			smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
		}
		release_sock(sk);
		smc_close_cleanup_listen(sk);
		cancel_work_sync(&smc->smc_listen_work);
		lock_sock(sk);
		break;
	case SMC_ACTIVE:
		smc_close_stream_wait(smc, timeout);
		release_sock(sk);
		cancel_delayed_work_sync(&conn->tx_work);
		lock_sock(sk);
		if (sk->sk_state == SMC_ACTIVE) {
			/* send close request */
			rc = smc_close_final(conn);
			sk->sk_state = SMC_PEERCLOSEWAIT1;
		} else {
			/* peer event has changed the state */
			goto again;
		}
		break;
	case SMC_APPFINCLOSEWAIT:
		/* socket already shutdown wr or both (active close) */
		if (txflags->peer_done_writing &&
		    !smc_close_sent_any_close(conn)) {
			/* just shutdown wr done, send close request */
			rc = smc_close_final(conn);
		}
		sk->sk_state = SMC_CLOSED;
		smc_close_wait_tx_pends(smc);
		break;
	case SMC_APPCLOSEWAIT1:
	case SMC_APPCLOSEWAIT2:
		if (!smc_cdc_rxed_any_close(conn))
			smc_close_stream_wait(smc, timeout);
		release_sock(sk);
		cancel_delayed_work_sync(&conn->tx_work);
		lock_sock(sk);
		if (sk->sk_err != ECONNABORTED) {
			/* confirm close from peer */
			rc = smc_close_final(conn);
			if (rc)
				break;
		}
		if (smc_cdc_rxed_any_close(conn))
			/* peer has closed the socket already */
			sk->sk_state = SMC_CLOSED;
		else
			/* peer has just issued a shutdown write */
			sk->sk_state = SMC_PEERFINCLOSEWAIT;
		smc_close_wait_tx_pends(smc);
		break;
	case SMC_PEERCLOSEWAIT1:
	case SMC_PEERCLOSEWAIT2:
		if (txflags->peer_done_writing &&
		    !smc_close_sent_any_close(conn)) {
			/* just shutdown wr done, send close request */
			rc = smc_close_final(conn);
		}
		/* peer sending PeerConnectionClosed will cause transition */
		break;
	case SMC_PEERFINCLOSEWAIT:
		/* peer sending PeerConnectionClosed will cause transition */
		break;
	case SMC_PROCESSABORT:
		release_sock(sk);
		cancel_delayed_work_sync(&conn->tx_work);
		lock_sock(sk);
		smc_close_abort(conn);
		sk->sk_state = SMC_CLOSED;
		smc_close_wait_tx_pends(smc);
		break;
	case SMC_PEERABORTWAIT:
	case SMC_CLOSED:
		/* nothing to do, add tracing in future patch */
		break;
	}

	if (old_state != sk->sk_state)
		sk->sk_state_change(&smc->sk);
	return rc;
}

static void smc_close_passive_abort_received(struct smc_sock *smc)
{
	struct smc_cdc_conn_state_flags *txflags =
		&smc->conn.local_tx_ctrl.conn_state_flags;
	struct sock *sk = &smc->sk;

	switch (sk->sk_state) {
	case SMC_ACTIVE:
	case SMC_APPFINCLOSEWAIT:
	case SMC_APPCLOSEWAIT1:
	case SMC_APPCLOSEWAIT2:
		smc_close_abort(&smc->conn);
		sk->sk_state = SMC_PROCESSABORT;
		break;
	case SMC_PEERCLOSEWAIT1:
	case SMC_PEERCLOSEWAIT2:
		if (txflags->peer_done_writing &&
		    !smc_close_sent_any_close(&smc->conn)) {
			/* just shutdown, but not yet closed locally */
			smc_close_abort(&smc->conn);
			sk->sk_state = SMC_PROCESSABORT;
		} else {
			sk->sk_state = SMC_CLOSED;
		}
		break;
	case SMC_PEERFINCLOSEWAIT:
	case SMC_PEERABORTWAIT:
		sk->sk_state = SMC_CLOSED;
		break;
	case SMC_INIT:
	case SMC_PROCESSABORT:
	/* nothing to do, add tracing in future patch */
		break;
	}
}

/* Some kind of closing has been received: peer_conn_closed, peer_conn_abort,
 * or peer_done_writing.
 */
static void smc_close_passive_work(struct work_struct *work)
{
	struct smc_connection *conn = container_of(work,
						   struct smc_connection,
						   close_work);
	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
	struct smc_cdc_conn_state_flags *rxflags;
	struct sock *sk = &smc->sk;
	int old_state;

	lock_sock(&smc->sk);
	old_state = sk->sk_state;

	if (!conn->alert_token_local) {
		/* abnormal termination */
		smc_close_active_abort(smc);
		goto wakeup;
	}

	rxflags = &smc->conn.local_rx_ctrl.conn_state_flags;
	if (rxflags->peer_conn_abort) {
		smc_close_passive_abort_received(smc);
		goto wakeup;
	}

	switch (sk->sk_state) {
	case SMC_INIT:
		if (atomic_read(&smc->conn.bytes_to_rcv) ||
		    (rxflags->peer_done_writing &&
		     !smc_cdc_rxed_any_close(conn)))
			sk->sk_state = SMC_APPCLOSEWAIT1;
		else
			sk->sk_state = SMC_CLOSED;
		break;
	case SMC_ACTIVE:
		sk->sk_state = SMC_APPCLOSEWAIT1;
		break;
	case SMC_PEERCLOSEWAIT1:
		if (rxflags->peer_done_writing)
			sk->sk_state = SMC_PEERCLOSEWAIT2;
		/* fall through to check for closing */
	case SMC_PEERCLOSEWAIT2:
	case SMC_PEERFINCLOSEWAIT:
		if (!smc_cdc_rxed_any_close(&smc->conn))
			break;
		if (sock_flag(sk, SOCK_DEAD) &&
		    smc_close_sent_any_close(conn)) {
			/* smc_release has already been called locally */
			sk->sk_state = SMC_CLOSED;
		} else {
			/* just shutdown, but not yet closed locally */
			sk->sk_state = SMC_APPFINCLOSEWAIT;
		}
		break;
	case SMC_APPCLOSEWAIT1:
	case SMC_APPCLOSEWAIT2:
	case SMC_APPFINCLOSEWAIT:
	case SMC_PEERABORTWAIT:
	case SMC_PROCESSABORT:
	case SMC_CLOSED:
		/* nothing to do, add tracing in future patch */
		break;
	}

wakeup:
	sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */
	sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */

	if (old_state != sk->sk_state) {
		sk->sk_state_change(sk);
		if ((sk->sk_state == SMC_CLOSED) &&
		    (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
			smc_conn_free(&smc->conn);
			schedule_delayed_work(&smc->sock_put_work,
					      SMC_CLOSE_SOCK_PUT_DELAY);
		}
	}
	release_sock(&smc->sk);
}

void smc_close_sock_put_work(struct work_struct *work)
{
	struct smc_sock *smc = container_of(to_delayed_work(work),
					    struct smc_sock,
					    sock_put_work);

	smc->sk.sk_prot->unhash(&smc->sk);
	sock_put(&smc->sk);
}

int smc_close_shutdown_write(struct smc_sock *smc)
{
	struct smc_connection *conn = &smc->conn;
	struct sock *sk = &smc->sk;
	int old_state;
	long timeout;
	int rc = 0;

	timeout = current->flags & PF_EXITING ?
		  0 : sock_flag(sk, SOCK_LINGER) ?
		      sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;

again:
	old_state = sk->sk_state;
	switch (old_state) {
	case SMC_ACTIVE:
		smc_close_stream_wait(smc, timeout);
		release_sock(sk);
		cancel_delayed_work_sync(&conn->tx_work);
		lock_sock(sk);
		/* send close wr request */
		rc = smc_close_wr(conn);
		if (sk->sk_state == SMC_ACTIVE)
			sk->sk_state = SMC_PEERCLOSEWAIT1;
		else
			goto again;
		break;
	case SMC_APPCLOSEWAIT1:
		/* passive close */
		if (!smc_cdc_rxed_any_close(conn))
			smc_close_stream_wait(smc, timeout);
		release_sock(sk);
		cancel_delayed_work_sync(&conn->tx_work);
		lock_sock(sk);
		/* confirm close from peer */
		rc = smc_close_wr(conn);
		sk->sk_state = SMC_APPCLOSEWAIT2;
		break;
	case SMC_APPCLOSEWAIT2:
	case SMC_PEERFINCLOSEWAIT:
	case SMC_PEERCLOSEWAIT1:
	case SMC_PEERCLOSEWAIT2:
	case SMC_APPFINCLOSEWAIT:
	case SMC_PROCESSABORT:
	case SMC_PEERABORTWAIT:
		/* nothing to do, add tracing in future patch */
		break;
	}

	if (old_state != sk->sk_state)
		sk->sk_state_change(&smc->sk);
	return rc;
}

/* Initialize close properties on connection establishment. */
void smc_close_init(struct smc_sock *smc)
{
	INIT_WORK(&smc->conn.close_work, smc_close_passive_work);
}
back to top