Revision 78c906e430b13d30a8cfbdef4ccbbe1686841a9e authored by Vlad Buslov on 31 August 2020, 13:17:29 UTC, committed by Saeed Mahameed on 05 November 2020, 20:17:05 UTC
In functions mlx5e_route_lookup_ipv{4|6}() route_dev can be arbitrary net
device and not necessary mlx5 eswitch port representor. As such, in order
to ensure that route_dev is not destroyed concurrent the code needs either
explicitly take reference to the device before releasing reference to
rtable instance or ensure that caller holds rtnl lock. First approach is
chosen as a fix since rtnl lock dependency was intentionally removed from
mlx5 TC layer.

To prevent unprotected usage of route_dev in encap code take a reference to
the device before releasing rt. Don't save direct pointer to the device in
mlx5_encap_entry structure and use ifindex instead. Modify users of
route_dev pointer to properly obtain the net device instance from its
ifindex.

Fixes: 61086f391044 ("net/mlx5e: Protect encap hash table with mutex")
Fixes: 6707f74be862 ("net/mlx5e: Update hw flows when encap source mac changed")
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
1 parent e68e28b
Raw File
raw_diag.c
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/module.h>

#include <linux/inet_diag.h>
#include <linux/sock_diag.h>

#include <net/inet_sock.h>
#include <net/raw.h>
#include <net/rawv6.h>

#ifdef pr_fmt
# undef pr_fmt
#endif

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

static struct raw_hashinfo *
raw_get_hashinfo(const struct inet_diag_req_v2 *r)
{
	if (r->sdiag_family == AF_INET) {
		return &raw_v4_hashinfo;
#if IS_ENABLED(CONFIG_IPV6)
	} else if (r->sdiag_family == AF_INET6) {
		return &raw_v6_hashinfo;
#endif
	} else {
		return ERR_PTR(-EINVAL);
	}
}

/*
 * Due to requirement of not breaking user API we can't simply
 * rename @pad field in inet_diag_req_v2 structure, instead
 * use helper to figure it out.
 */

static struct sock *raw_lookup(struct net *net, struct sock *from,
			       const struct inet_diag_req_v2 *req)
{
	struct inet_diag_req_raw *r = (void *)req;
	struct sock *sk = NULL;

	if (r->sdiag_family == AF_INET)
		sk = __raw_v4_lookup(net, from, r->sdiag_raw_protocol,
				     r->id.idiag_dst[0],
				     r->id.idiag_src[0],
				     r->id.idiag_if, 0);
#if IS_ENABLED(CONFIG_IPV6)
	else
		sk = __raw_v6_lookup(net, from, r->sdiag_raw_protocol,
				     (const struct in6_addr *)r->id.idiag_src,
				     (const struct in6_addr *)r->id.idiag_dst,
				     r->id.idiag_if, 0);
#endif
	return sk;
}

static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 *r)
{
	struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
	struct sock *sk = NULL, *s;
	int slot;

	if (IS_ERR(hashinfo))
		return ERR_CAST(hashinfo);

	read_lock(&hashinfo->lock);
	for (slot = 0; slot < RAW_HTABLE_SIZE; slot++) {
		sk_for_each(s, &hashinfo->ht[slot]) {
			sk = raw_lookup(net, s, r);
			if (sk) {
				/*
				 * Grab it and keep until we fill
				 * diag meaage to be reported, so
				 * caller should call sock_put then.
				 * We can do that because we're keeping
				 * hashinfo->lock here.
				 */
				sock_hold(sk);
				goto out_unlock;
			}
		}
	}
out_unlock:
	read_unlock(&hashinfo->lock);

	return sk ? sk : ERR_PTR(-ENOENT);
}

static int raw_diag_dump_one(struct netlink_callback *cb,
			     const struct inet_diag_req_v2 *r)
{
	struct sk_buff *in_skb = cb->skb;
	struct sk_buff *rep;
	struct sock *sk;
	struct net *net;
	int err;

	net = sock_net(in_skb->sk);
	sk = raw_sock_get(net, r);
	if (IS_ERR(sk))
		return PTR_ERR(sk);

	rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) +
			inet_diag_msg_attrs_size() +
			nla_total_size(sizeof(struct inet_diag_meminfo)) + 64,
			GFP_KERNEL);
	if (!rep) {
		sock_put(sk);
		return -ENOMEM;
	}

	err = inet_sk_diag_fill(sk, NULL, rep, cb, r, 0,
				netlink_net_capable(in_skb, CAP_NET_ADMIN));
	sock_put(sk);

	if (err < 0) {
		kfree_skb(rep);
		return err;
	}

	err = netlink_unicast(net->diag_nlsk, rep,
			      NETLINK_CB(in_skb).portid,
			      MSG_DONTWAIT);
	if (err > 0)
		err = 0;
	return err;
}

static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
			struct netlink_callback *cb,
			const struct inet_diag_req_v2 *r,
			struct nlattr *bc, bool net_admin)
{
	if (!inet_diag_bc_sk(bc, sk))
		return 0;

	return inet_sk_diag_fill(sk, NULL, skb, cb, r, NLM_F_MULTI, net_admin);
}

static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
			  const struct inet_diag_req_v2 *r)
{
	bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
	struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
	struct net *net = sock_net(skb->sk);
	struct inet_diag_dump_data *cb_data;
	int num, s_num, slot, s_slot;
	struct sock *sk = NULL;
	struct nlattr *bc;

	if (IS_ERR(hashinfo))
		return;

	cb_data = cb->data;
	bc = cb_data->inet_diag_nla_bc;
	s_slot = cb->args[0];
	num = s_num = cb->args[1];

	read_lock(&hashinfo->lock);
	for (slot = s_slot; slot < RAW_HTABLE_SIZE; s_num = 0, slot++) {
		num = 0;

		sk_for_each(sk, &hashinfo->ht[slot]) {
			struct inet_sock *inet = inet_sk(sk);

			if (!net_eq(sock_net(sk), net))
				continue;
			if (num < s_num)
				goto next;
			if (sk->sk_family != r->sdiag_family)
				goto next;
			if (r->id.idiag_sport != inet->inet_sport &&
			    r->id.idiag_sport)
				goto next;
			if (r->id.idiag_dport != inet->inet_dport &&
			    r->id.idiag_dport)
				goto next;
			if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0)
				goto out_unlock;
next:
			num++;
		}
	}

out_unlock:
	read_unlock(&hashinfo->lock);

	cb->args[0] = slot;
	cb->args[1] = num;
}

static void raw_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
			      void *info)
{
	r->idiag_rqueue = sk_rmem_alloc_get(sk);
	r->idiag_wqueue = sk_wmem_alloc_get(sk);
}

#ifdef CONFIG_INET_DIAG_DESTROY
static int raw_diag_destroy(struct sk_buff *in_skb,
			    const struct inet_diag_req_v2 *r)
{
	struct net *net = sock_net(in_skb->sk);
	struct sock *sk;
	int err;

	sk = raw_sock_get(net, r);
	if (IS_ERR(sk))
		return PTR_ERR(sk);
	err = sock_diag_destroy(sk, ECONNABORTED);
	sock_put(sk);
	return err;
}
#endif

static const struct inet_diag_handler raw_diag_handler = {
	.dump			= raw_diag_dump,
	.dump_one		= raw_diag_dump_one,
	.idiag_get_info		= raw_diag_get_info,
	.idiag_type		= IPPROTO_RAW,
	.idiag_info_size	= 0,
#ifdef CONFIG_INET_DIAG_DESTROY
	.destroy		= raw_diag_destroy,
#endif
};

static void __always_unused __check_inet_diag_req_raw(void)
{
	/*
	 * Make sure the two structures are identical,
	 * except the @pad field.
	 */
#define __offset_mismatch(m1, m2)			\
	(offsetof(struct inet_diag_req_v2, m1) !=	\
	 offsetof(struct inet_diag_req_raw, m2))

	BUILD_BUG_ON(sizeof(struct inet_diag_req_v2) !=
		     sizeof(struct inet_diag_req_raw));
	BUILD_BUG_ON(__offset_mismatch(sdiag_family, sdiag_family));
	BUILD_BUG_ON(__offset_mismatch(sdiag_protocol, sdiag_protocol));
	BUILD_BUG_ON(__offset_mismatch(idiag_ext, idiag_ext));
	BUILD_BUG_ON(__offset_mismatch(pad, sdiag_raw_protocol));
	BUILD_BUG_ON(__offset_mismatch(idiag_states, idiag_states));
	BUILD_BUG_ON(__offset_mismatch(id, id));
#undef __offset_mismatch
}

static int __init raw_diag_init(void)
{
	return inet_diag_register(&raw_diag_handler);
}

static void __exit raw_diag_exit(void)
{
	inet_diag_unregister(&raw_diag_handler);
}

module_init(raw_diag_init);
module_exit(raw_diag_exit);
MODULE_LICENSE("GPL");
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-255 /* AF_INET - IPPROTO_RAW */);
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10-255 /* AF_INET6 - IPPROTO_RAW */);
back to top