https://github.com/cilium/cilium
Raw File
Tip revision: 40814d5b75bbf2d36eb8f98162a40700001fd143 authored by André Martins on 03 February 2020, 18:05:30 UTC
Prepare for release v1.6.6
Tip revision: 40814d5
bpf_overlay.c
/*
 *  Copyright (C) 2016-2019 Authors of Cilium
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
#include <node_config.h>
#include <netdev_config.h>

#include <bpf/api.h>

#include <stdint.h>
#include <stdio.h>

#include <linux/if_packet.h>

#include "lib/tailcall.h"
#include "lib/utils.h"
#include "lib/common.h"
#include "lib/maps.h"
#include "lib/ipv6.h"
#include "lib/eth.h"
#include "lib/dbg.h"
#include "lib/trace.h"
#include "lib/l3.h"
#include "lib/drop.h"
#include "lib/policy.h"
#include "lib/nodeport.h"

#ifdef ENABLE_IPV6
static inline int handle_ipv6(struct __sk_buff *skb, __u32 *identity)
{
	int ret, l4_off, l3_off = ETH_HLEN, hdrlen;
	void *data_end, *data;
	struct ipv6hdr *ip6;
	struct bpf_tunnel_key key = {};
	struct endpoint_info *ep;
	bool decrypted;

	/* verifier workaround (dereference of modified ctx ptr) */
	if (!revalidate_data_first(skb, &data, &data_end, &ip6))
		return DROP_INVALID;
#ifdef ENABLE_NODEPORT
	if (!bpf_skip_nodeport(skb)) {
		int ret = nodeport_lb6(skb, *identity);
		if (ret < 0)
			return ret;
	}
#endif
	ret = encap_remap_v6_host_address(skb, false);
	if (unlikely(ret < 0))
		return ret;

	if (!revalidate_data(skb, &data, &data_end, &ip6))
		return DROP_INVALID;

	decrypted = ((skb->mark & MARK_MAGIC_HOST_MASK) == MARK_MAGIC_DECRYPT);
	if (decrypted) {
		*identity = get_identity(skb);
	} else {
		if (unlikely(skb_get_tunnel_key(skb, &key, sizeof(key), 0) < 0))
			return DROP_NO_TUNNEL_KEY;
		*identity = key.tunnel_id;
	}

	cilium_dbg(skb, DBG_DECAP, key.tunnel_id, key.tunnel_label);

#ifdef ENABLE_IPSEC
	if (!decrypted) {
		/* IPSec is not currently enforce (feature coming soon)
		 * so for now just handle normally
		 */
		if (ip6->nexthdr != IPPROTO_ESP) {
			update_metrics(skb->len, METRIC_INGRESS, REASON_PLAINTEXT);
			goto not_esp;
		}

		/* Decrypt "key" is determined by SPI */
		skb->mark = MARK_MAGIC_DECRYPT;
		set_identity(skb, key.tunnel_id);
		/* To IPSec stack on cilium_vxlan we are going to pass
		 * this up the stack but eth_type_trans has already labeled
		 * this as an OTHERHOST type packet. To avoid being dropped
		 * by IP stack before IPSec can be processed mark as a HOST
		 * packet.
		 */
		skb_change_type(skb, PACKET_HOST);
		return TC_ACT_OK;
	} else {
		key.tunnel_id = get_identity(skb);
		skb->mark = 0;
	}
not_esp:
#endif

	/* Lookup IPv6 address in list of local endpoints */
	if ((ep = lookup_ip6_endpoint(ip6)) != NULL) {
		/* Let through packets to the node-ip so they are
		 * processed by the local ip stack */
		if (ep->flags & ENDPOINT_F_HOST)
			goto to_host;

		__u8 nexthdr = ip6->nexthdr;
		hdrlen = ipv6_hdrlen(skb, l3_off, &nexthdr);
		if (hdrlen < 0)
			return hdrlen;

		l4_off = l3_off + hdrlen;
		return ipv6_local_delivery(skb, l3_off, l4_off, key.tunnel_id, ip6, nexthdr, ep, METRIC_INGRESS);
	}

to_host:
#ifdef HOST_IFINDEX
	if (1) {
		union macaddr host_mac = HOST_IFINDEX_MAC;
		union macaddr router_mac = NODE_MAC;
		int ret;

		ret = ipv6_l3(skb, ETH_HLEN, (__u8 *) &router_mac.addr, (__u8 *) &host_mac.addr, METRIC_INGRESS);
		if (ret != TC_ACT_OK)
			return ret;

		cilium_dbg_capture(skb, DBG_CAPTURE_DELIVERY, HOST_IFINDEX);
		return redirect(HOST_IFINDEX, 0);
	}
#else
	return TC_ACT_OK;
#endif
}

__section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_IPV6_FROM_LXC) int tail_handle_ipv6(struct __sk_buff *skb)
{
	__u32 src_identity = 0;
	int ret = handle_ipv6(skb, &src_identity);

	if (IS_ERR(ret))
		return send_drop_notify_error(skb, src_identity, ret, TC_ACT_SHOT, METRIC_INGRESS);

	return ret;
}
#endif /* ENABLE_IPV6 */

#ifdef ENABLE_IPV4
static inline int handle_ipv4(struct __sk_buff *skb, __u32 *identity)
{
	void *data_end, *data;
	struct iphdr *ip4;
	struct endpoint_info *ep;
	struct bpf_tunnel_key key = {};
	bool decrypted;
	int l4_off;

	/* verifier workaround (dereference of modified ctx ptr) */
	if (!revalidate_data_first(skb, &data, &data_end, &ip4))
		return DROP_INVALID;
#ifdef ENABLE_NODEPORT
	if (!bpf_skip_nodeport(skb)) {
		int ret = nodeport_lb4(skb, *identity);
		if (ret < 0)
			return ret;
	}
#endif
	if (!revalidate_data(skb, &data, &data_end, &ip4))
		return DROP_INVALID;

	decrypted = ((skb->mark & MARK_MAGIC_HOST_MASK) == MARK_MAGIC_DECRYPT);
	/* If packets are decrypted the key has already been pushed into metadata. */
	if (decrypted) {
		*identity = get_identity(skb);
	} else {
		if (unlikely(skb_get_tunnel_key(skb, &key, sizeof(key), 0) < 0))
			return DROP_NO_TUNNEL_KEY;
		*identity = key.tunnel_id;
	}

	l4_off = ETH_HLEN + ipv4_hdrlen(ip4);
#ifdef ENABLE_IPSEC
	if (!decrypted) {
		/* IPSec is not currently enforce (feature coming soon)
		 * so for now just handle normally
		 */
		if (ip4->protocol != IPPROTO_ESP) {
			update_metrics(skb->len, METRIC_INGRESS, REASON_PLAINTEXT);
			goto not_esp;
		}

		skb->mark = MARK_MAGIC_DECRYPT;
		set_identity(skb, key.tunnel_id);
		/* To IPSec stack on cilium_vxlan we are going to pass
		 * this up the stack but eth_type_trans has already labeled
		 * this as an OTHERHOST type packet. To avoid being dropped
		 * by IP stack before IPSec can be processed mark as a HOST
		 * packet.
		 */
		skb_change_type(skb, PACKET_HOST);
		return TC_ACT_OK;
	} else {
		key.tunnel_id = get_identity(skb);
		skb->mark = 0;
	}
not_esp:
#endif

	/* Lookup IPv4 address in list of local endpoints */
	if ((ep = lookup_ip4_endpoint(ip4)) != NULL) {
		/* Let through packets to the node-ip so they are
		 * processed by the local ip stack */
		if (ep->flags & ENDPOINT_F_HOST)
			goto to_host;

		return ipv4_local_delivery(skb, ETH_HLEN, l4_off, key.tunnel_id, ip4, ep, METRIC_INGRESS);
	}

to_host:
#ifdef HOST_IFINDEX
	if (1) {
		union macaddr host_mac = HOST_IFINDEX_MAC;
		union macaddr router_mac = NODE_MAC;
		int ret;

		ret = ipv4_l3(skb, ETH_HLEN, (__u8 *) &router_mac.addr, (__u8 *) &host_mac.addr, ip4);
		if (ret != TC_ACT_OK)
			return ret;

		cilium_dbg_capture(skb, DBG_CAPTURE_DELIVERY, HOST_IFINDEX);
		return redirect(HOST_IFINDEX, 0);
	}
#else
	return TC_ACT_OK;
#endif
}

__section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_IPV4_FROM_LXC) int tail_handle_ipv4(struct __sk_buff *skb)
{
	__u32 src_identity = 0;
	int ret = handle_ipv4(skb, &src_identity);

	if (IS_ERR(ret))
		return send_drop_notify_error(skb, src_identity, ret, TC_ACT_SHOT, METRIC_INGRESS);

	return ret;
}
#endif /* ENABLE_IPV4 */

__section("from-overlay")
int from_overlay(struct __sk_buff *skb)
{
	__u16 proto;
	int ret;

	bpf_clear_cb(skb);
	bpf_clear_nodeport(skb);

	if (!validate_ethertype(skb, &proto)) {
		/* Pass unknown traffic to the stack */
		ret = TC_ACT_OK;
		goto out;
	}

#ifdef ENABLE_IPSEC
	if ((skb->mark & MARK_MAGIC_HOST_MASK) == MARK_MAGIC_DECRYPT) {
		send_trace_notify(skb, TRACE_FROM_OVERLAY, get_identity(skb), 0, 0,
				  skb->ingress_ifindex,
				  TRACE_REASON_ENCRYPTED, TRACE_PAYLOAD_LEN);
	} else
#endif
	{
		send_trace_notify(skb, TRACE_FROM_OVERLAY, 0, 0, 0,
				  skb->ingress_ifindex, 0, TRACE_PAYLOAD_LEN);
	}

	switch (proto) {
	case bpf_htons(ETH_P_IPV6):
#ifdef ENABLE_IPV6
		ep_tail_call(skb, CILIUM_CALL_IPV6_FROM_LXC);
		ret = DROP_MISSED_TAIL_CALL;
#else
		ret = DROP_UNKNOWN_L3;
#endif
		break;

	case bpf_htons(ETH_P_IP):
#ifdef ENABLE_IPV4
		ep_tail_call(skb, CILIUM_CALL_IPV4_FROM_LXC);
		ret = DROP_MISSED_TAIL_CALL;
#else
		ret = DROP_UNKNOWN_L3;
#endif
		break;

	default:
		/* Pass unknown traffic to the stack */
		ret = TC_ACT_OK;
	}
out:
	if (IS_ERR(ret))
		return send_drop_notify_error(skb, 0, ret, TC_ACT_SHOT, METRIC_INGRESS);
	return ret;
}

#ifdef ENABLE_NODEPORT
declare_tailcall_if(is_defined(ENABLE_IPV6), CILIUM_CALL_ENCAP_NODEPORT_NAT)
int tail_handle_nat_fwd(struct __sk_buff *skb)
{
	int ret;

	if ((skb->mark & MARK_MAGIC_SNAT_DONE) == MARK_MAGIC_SNAT_DONE)
		return TC_ACT_OK;
	ret = nodeport_nat_fwd(skb, true);
	if (IS_ERR(ret))
		return send_drop_notify_error(skb, 0, ret, TC_ACT_SHOT, METRIC_EGRESS);
	return ret;
}
#endif

__section("to-overlay")
int to_overlay(struct __sk_buff *skb)
{
	int ret = encap_remap_v6_host_address(skb, true);
	if (unlikely(ret < 0))
		goto out;
#ifdef ENABLE_NODEPORT
	invoke_tailcall_if(is_defined(ENABLE_IPV6),
			   CILIUM_CALL_ENCAP_NODEPORT_NAT, tail_handle_nat_fwd);
#endif
out:
	if (IS_ERR(ret))
		return send_drop_notify_error(skb, 0, ret, TC_ACT_SHOT, METRIC_EGRESS);
	return ret;
}

BPF_LICENSE("GPL");
back to top