https://github.com/cilium/cilium
Raw File
Tip revision: 411f93ea9b358f47ef6c4de35d9c326d05737210 authored by Michi Mutsuzaki on 28 November 2022, 22:37:48 UTC
hubble: Set drop reason to POLICY_DENIED for L7 dropped flows
Tip revision: 411f93e
proxy.h
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
/* Copyright Authors of Cilium */

#ifndef __LIB_PROXY_H_
#define __LIB_PROXY_H_

#include "conntrack.h"

#if !(__ctx_is == __ctx_skb)
#error "Proxy redirection is only supported from skb context"
#endif

#ifdef ENABLE_TPROXY
static __always_inline int
assign_socket_tcp(struct __ctx_buff *ctx,
		  struct bpf_sock_tuple *tuple, __u32 len, bool established)
{
	int result = DROP_PROXY_LOOKUP_FAILED;
	struct bpf_sock *sk;
	__u32 dbg_ctx;

	sk = skc_lookup_tcp(ctx, tuple, len, BPF_F_CURRENT_NETNS, 0);
	if (!sk)
		goto out;

	if (established && sk->state == BPF_TCP_TIME_WAIT)
		goto release;
	if (established && sk->state == BPF_TCP_LISTEN)
		goto release;

	dbg_ctx = sk->family << 16 | ctx->protocol;
	result = sk_assign(ctx, sk, 0);
	cilium_dbg(ctx, DBG_SK_ASSIGN, -result, dbg_ctx);
	if (result == 0)
		result = CTX_ACT_OK;
	else
		result = DROP_PROXY_SET_FAILED;
release:
	sk_release(sk);
out:
	return result;
}

static __always_inline int
assign_socket_udp(struct __ctx_buff *ctx,
		  struct bpf_sock_tuple *tuple, __u32 len,
		  bool established __maybe_unused)
{
	int result = DROP_PROXY_LOOKUP_FAILED;
	struct bpf_sock *sk;
	__u32 dbg_ctx;

	sk = sk_lookup_udp(ctx, tuple, len, BPF_F_CURRENT_NETNS, 0);
	if (!sk)
		goto out;

	dbg_ctx = sk->family << 16 | ctx->protocol;
	result = sk_assign(ctx, sk, 0);
	cilium_dbg(ctx, DBG_SK_ASSIGN, -result, dbg_ctx);
	if (result == 0)
		result = CTX_ACT_OK;
	else
		result = DROP_PROXY_SET_FAILED;
	sk_release(sk);
out:
	return result;
}

static __always_inline int
assign_socket(struct __ctx_buff *ctx,
	      struct bpf_sock_tuple *tuple, __u32 len,
	      __u8 nexthdr, bool established)
{
	/* Workaround: While the below functions are nearly identical in C
	 * implementation, the 'struct bpf_sock *' has a different verifier
	 * pointer type, which means we can't fold these implementations
	 * together.
	 */
	switch (nexthdr) {
	case IPPROTO_TCP:
		return assign_socket_tcp(ctx, tuple, len, established);
	case IPPROTO_UDP:
		return assign_socket_udp(ctx, tuple, len, established);
	}
	return DROP_PROXY_UNKNOWN_PROTO;
}

/**
 * combine_ports joins the specified ports in a manner consistent with
 * pkg/monitor/dataapth_debug.go to report the ports ino monitor messages.
 */
static __always_inline __u32
combine_ports(__u16 dport, __u16 sport)
{
	return (bpf_ntohs(dport) << 16) | bpf_ntohs(sport);
}

#define CTX_REDIRECT_FN(NAME, CT_TUPLE_TYPE, SK_FIELD,				\
			DBG_LOOKUP_CODE, DADDR_DBG, SADDR_DBG)			\
/**										\
 * ctx_redirect_to_proxy_ingress4 / ctx_redirect_to_proxy_ingress6		\
 * @ctx			pointer to program context				\
 * @tuple		pointer to *scratch buffer* with packet tuple		\
 * @proxy_port		port to redirect traffic towards			\
 *										\
 * Prefetch the proxy socket and associate with the ctx. Must be run on tc	\
 * ingress. Will modify 'tuple'!						\
 */										\
static __always_inline int							\
NAME(struct __ctx_buff *ctx, CT_TUPLE_TYPE * ct_tuple, __be16 proxy_port)	\
{										\
	struct bpf_sock_tuple *tuple = (struct bpf_sock_tuple *)ct_tuple;	\
	__u8 nexthdr = ct_tuple->nexthdr;					\
	__u32 len = sizeof(tuple->SK_FIELD);					\
	__u16 port;								\
	int result;								\
										\
	/* The provided 'ct_tuple' is in the internal Cilium format, which	\
	 * reverses the source/destination ports as compared with the actual	\
	 * packet contents. 'bpf_sock_tuple' in the eBPF API needs these to	\
	 * match normal packet ordering to successfully look up the		\
	 * corresponding socket. So, swap them here.				\
	 */									\
	port = tuple->SK_FIELD.sport;						\
	tuple->SK_FIELD.sport = tuple->SK_FIELD.dport;				\
	tuple->SK_FIELD.dport = port;						\
										\
	/* Look for established socket locally first */				\
	cilium_dbg3(ctx, DBG_LOOKUP_CODE,					\
		    tuple->SK_FIELD.SADDR_DBG, tuple->SK_FIELD.DADDR_DBG,	\
		    combine_ports(tuple->SK_FIELD.dport, tuple->SK_FIELD.sport));	\
	result = assign_socket(ctx, tuple, len, nexthdr, true);			\
	if (result == CTX_ACT_OK)						\
		goto out;							\
										\
	/* if there's no established connection, locate the tproxy socket */	\
	tuple->SK_FIELD.dport = proxy_port;					\
	tuple->SK_FIELD.sport = 0;						\
	memset(&tuple->SK_FIELD.daddr, 0, sizeof(tuple->SK_FIELD.daddr));	\
	memset(&tuple->SK_FIELD.saddr, 0, sizeof(tuple->SK_FIELD.saddr));	\
	cilium_dbg3(ctx, DBG_LOOKUP_CODE,					\
		    tuple->SK_FIELD.SADDR_DBG, tuple->SK_FIELD.DADDR_DBG,	\
		    combine_ports(tuple->SK_FIELD.dport, tuple->SK_FIELD.sport));	\
	result = assign_socket(ctx, tuple, len, nexthdr, false);		\
										\
out:										\
	return result;								\
}

#ifdef ENABLE_IPV4
CTX_REDIRECT_FN(ctx_redirect_to_proxy_ingress4, struct ipv4_ct_tuple, ipv4,
		DBG_SK_LOOKUP4, daddr, saddr)
#endif
#ifdef ENABLE_IPV6
CTX_REDIRECT_FN(ctx_redirect_to_proxy_ingress6, struct ipv6_ct_tuple, ipv6,
		DBG_SK_LOOKUP6, daddr[3], saddr[3])
#endif
#undef CTX_REDIRECT_FN
#endif /* ENABLE_TPROXY */

/**
 * __ctx_redirect_to_proxy configures the ctx with the proxy mark and proxy
 * port number to ensure that the stack redirects the packet into the proxy.
 *
 * It is called from both ingress and egress side of endpoint devices.
 *
 * In regular veth mode:
 * * To apply egress policy, the egressing endpoint configures the mark,
 *   which returns CTX_ACT_OK to pass the packet to the stack in the context
 *   of the source device (stack ingress).
 * * To apply ingress policy, the egressing endpoint or netdev program tail
 *   calls into the policy program which configures the mark here, which
 *   returns CTX_ACT_OK to pass the packet to the stack in the context of the
 *   source device (netdev or egress endpoint device, stack ingress).
 *
 * In chaining mode with bridged endpoint devices:
 * * To apply egress policy, the egressing endpoint configures the mark,
 *   which is propagated via ctx_store_meta() in the caller. The redirect() call
 *   here redirects the packet to the ingress TC filter configured on the bridge
 *   master device.
 * * To apply ingress policy, the stack transmits the packet into the bridge
 *   master device which tail calls into the policy program for the ingress
 *   endpoint, which configures mark and cb[] as described for the egress path.
 *   The redirect() call here redirects the packet to the ingress TC filter
 *   configured on the bridge master device.
 * * In both cases for bridged endpoint devices, the bridge master device has
 *   a BPF program configured upon ingress to transfer the cb[] to the mark
 *   before passing the traffic up to the stack towards the proxy.
 */
static __always_inline int
__ctx_redirect_to_proxy(struct __ctx_buff *ctx, void *tuple __maybe_unused,
			__be16 proxy_port, bool from_host __maybe_unused,
			bool ipv4 __maybe_unused)
{
	int result __maybe_unused = CTX_ACT_OK;

#ifdef ENABLE_TPROXY
	if (!from_host)
		ctx->mark |= MARK_MAGIC_TO_PROXY;
	else
#endif
		ctx->mark = MARK_MAGIC_TO_PROXY | proxy_port << 16;

	cilium_dbg(ctx, DBG_CAPTURE_PROXY_PRE, proxy_port, 0);

#ifdef ENABLE_TPROXY
	if (proxy_port && !from_host) {
#ifdef ENABLE_IPV4
		if (ipv4)
			result = ctx_redirect_to_proxy_ingress4(ctx, tuple, proxy_port);
#endif /* ENABLE_IPV4 */
#ifdef ENABLE_IPV6
		if (!ipv4)
			result = ctx_redirect_to_proxy_ingress6(ctx, tuple, proxy_port);
#endif /* ENABLE_IPV6 */
	}
#endif /* ENABLE_TPROXY */
	ctx_change_type(ctx, PACKET_HOST); /* Required for ingress packets from overlay */
	return result;
}

#ifdef ENABLE_IPV4
static __always_inline int
ctx_redirect_to_proxy4(struct __ctx_buff *ctx, void *tuple __maybe_unused,
		       __be16 proxy_port, bool from_host __maybe_unused)
{
	return __ctx_redirect_to_proxy(ctx, tuple, proxy_port, from_host, true);
}
#endif /* ENABLE_IPV4 */

#ifdef ENABLE_IPV6
static __always_inline int
ctx_redirect_to_proxy6(struct __ctx_buff *ctx, void *tuple __maybe_unused,
		       __be16 proxy_port, bool from_host __maybe_unused)
{
	return __ctx_redirect_to_proxy(ctx, tuple, proxy_port, from_host, false);
}
#endif /* ENABLE_IPV6 */

#ifdef ENABLE_TPROXY
#define IP_TUPLE_EXTRACT_FN(NAME, PREFIX)				\
/**									\
 * extract_tuple4 / extract_tuple6					\
 *									\
 * Extracts the packet 5-tuple into 'tuple'.				\
 *									\
 * Note that it doesn't fully initialize 'tuple' as the directionality	\
 * bit is unused in the proxy paths.					\
 */									\
static __always_inline int						\
NAME(struct __ctx_buff *ctx, struct PREFIX ## _ct_tuple *tuple)		\
{									\
	int err, l4_off;						\
									\
	err = PREFIX ## _extract_tuple(ctx, tuple, &l4_off);		\
	if (err != CTX_ACT_OK)						\
		return err;						\
									\
	if (ctx_load_bytes(ctx, l4_off, &tuple->dport, 4) < 0)		\
		return DROP_CT_INVALID_HDR;				\
									\
	__ ## PREFIX ## _ct_tuple_reverse(tuple);			\
									\
	return CTX_ACT_OK;						\
}

#ifdef ENABLE_IPV4
IP_TUPLE_EXTRACT_FN(extract_tuple4, ipv4)
#endif /* ENABLE_IPV4 */
#ifdef ENABLE_IPV6
IP_TUPLE_EXTRACT_FN(extract_tuple6, ipv6)
#endif /* ENABLE_IPV6 */
#endif /* ENABLE_TPROXY */

/**
 * ctx_redirect_to_proxy_first() applies changes to the context to forward
 * the packet towards the proxy. It is designed to run as the first function
 * that accesses the context from the current BPF program.
 */
static __always_inline int
ctx_redirect_to_proxy_first(struct __ctx_buff *ctx, __be16 proxy_port)
{
	int ret = CTX_ACT_OK;
#if defined(ENABLE_TPROXY)
	__u16 proto;

	/**
	 * For reply traffic to egress proxy for a local endpoint, we skip the
	 * policy & proxy_port lookup and just hairpin & rely on local stack
	 * routing via ctx->mark to ensure that the return traffic reaches the
	 * proxy. This is only relevant for endpoint-routes mode but we don't
	 * have a macro for this so the logic applies unconditionally here.
	 * See ct_state.proxy_redirect usage in bpf_lxc.c for more info.
	 */
	if (!proxy_port)
		goto mark;

	if (!validate_ethertype(ctx, &proto))
		return DROP_UNSUPPORTED_L2;

	ret = DROP_UNKNOWN_L3;
	switch (proto) {
#ifdef ENABLE_IPV6
	case bpf_htons(ETH_P_IPV6): {
		struct ipv6_ct_tuple tuple;

		ret = extract_tuple6(ctx, &tuple);
		if (ret < 0)
			return ret;
		ret = ctx_redirect_to_proxy_ingress6(ctx, &tuple, proxy_port);
		break;
	}
#endif /* ENABLE_IPV6 */
#ifdef ENABLE_IPV4
	case bpf_htons(ETH_P_IP): {
		struct ipv4_ct_tuple tuple;

		ret = extract_tuple4(ctx, &tuple);
		if (ret < 0)
			return ret;
		ret = ctx_redirect_to_proxy_ingress4(ctx, &tuple, proxy_port);
		break;
	}
#endif /* ENABLE_IPV4 */
	default:
		goto out;
	}
#endif /* ENABLE_TPROXY */

mark: __maybe_unused
	cilium_dbg(ctx, DBG_CAPTURE_PROXY_POST, proxy_port, 0);
	ctx->mark = MARK_MAGIC_TO_PROXY | (proxy_port << 16);
	ctx_change_type(ctx, PACKET_HOST);

out: __maybe_unused
	return ret;
}

/**
 * tc_index_skip_ingress_proxy - returns true if packet originates from ingress proxy
 */
static __always_inline bool tc_index_skip_ingress_proxy(struct __ctx_buff *ctx)
{
	volatile __u32 tc_index = ctx->tc_index;
#ifdef DEBUG
	if (tc_index & TC_INDEX_F_SKIP_INGRESS_PROXY)
		cilium_dbg(ctx, DBG_SKIP_PROXY, tc_index, 0);
#endif

	return tc_index & TC_INDEX_F_SKIP_INGRESS_PROXY;
}

/**
 * tc_index_skip_egress_proxy - returns true if packet originates from egress proxy
 */
static __always_inline bool tc_index_skip_egress_proxy(struct __ctx_buff *ctx)
{
	volatile __u32 tc_index = ctx->tc_index;
#ifdef DEBUG
	if (tc_index & TC_INDEX_F_SKIP_EGRESS_PROXY)
		cilium_dbg(ctx, DBG_SKIP_PROXY, tc_index, 0);
#endif

	return tc_index & TC_INDEX_F_SKIP_EGRESS_PROXY;
}
#endif /* __LIB_PROXY_H_ */
back to top