https://github.com/cilium/cilium
Raw File
Tip revision: 48a403fb2f43df2c82fab158042cd9aa53581b7d authored by Martynas Pumputis on 18 December 2019, 10:40:09 UTC
WIP: kubeproxy-free CI
Tip revision: 48a403f
cilium-map-migrate.c
/*
 *  Copyright (C) 2017 Authors of Cilium
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *
 *  Parts from iproute2 bpf.c loader code:
 *
 *  This program is free software; you can distribute it and/or
 *  modify it under the terms of the GNU General Public License
 *  as published by the Free Software Foundation; either version
 *  2 of the License, or (at your option) any later version.
 *
 *  Authors:
 *
 *    Daniel Borkmann <daniel@iogearbox.net>
 *    Jiri Pirko <jiri@resnulli.us>
 *    Alexei Starovoitov <ast@kernel.org>
 */

#include <stdio.h>
#include <syslog.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdbool.h>
#include <errno.h>
#include <fcntl.h>
#include <string.h>
#include <limits.h>

#include <sys/syscall.h>
#include <sys/stat.h>

#include <arpa/inet.h>

#include <linux/bpf.h>

#include "elf/libelf.h"
#include "elf/gelf.h"

#include "iproute2/bpf_elf.h"

#ifndef EM_BPF
# define EM_BPF		247
#endif

#define ELF_MAX_MAPS	64

#define STATE_PENDING	"pending"

#define BPF_ENV_MNT "CILIUM_BPF_MNT"

struct bpf_elf_sec_data {
	GElf_Shdr	sec_hdr;
	Elf_Data	*sec_data;
	const char	*sec_name;
};

struct bpf_elf_ctx {
	GElf_Ehdr	elf_hdr;
	Elf		*elf_fd;
	Elf_Data	*sym_tab;
	Elf_Data	*str_tab;
	Elf_Data	*map_tab;
	int		map_len;
	int		map_num;
	int		map_sec;
	int		sym_num;
	int		obj_fd;
};

static int bpf(int cmd, union bpf_attr *attr, unsigned int size)
{
#ifndef __NR_bpf
# if defined(__i386__)
#  define __NR_bpf 357
# elif defined(__x86_64__)
#  define __NR_bpf 321
# elif defined(__aarch64__)
#  define __NR_bpf 280
# else
#  error __NR_bpf not defined.
# endif
#endif
	return syscall(__NR_bpf, cmd, attr, size);
}

static int renameat2(int dfd1, const char *path1,
		     int dfd2, const char *path2,
		     unsigned int flags)
{
#ifndef __NR_renameat2
# if defined(__i386__)
#  define __NR_renameat2 353
# elif defined(__x86_64__)
#  define __NR_renameat2 316
# elif defined(__aarch64__)
#  define __NR_renameat2 276
# else
#  error __NR_renameat2 not defined.
# endif
#endif
	return syscall(__NR_renameat2, dfd1, path1, dfd2, path2, flags);
}

static inline __u64 bpf_ptr_to_u64(const void *ptr)
{
	return (__u64)(unsigned long)ptr;
}

static int bpf_elf_check_ehdr(const struct bpf_elf_ctx *ctx)
{
	if (ctx->elf_hdr.e_type != ET_REL ||
	    (ctx->elf_hdr.e_machine != EM_NONE &&
	     ctx->elf_hdr.e_machine != EM_BPF) ||
	    ctx->elf_hdr.e_version != EV_CURRENT) {
		fprintf(stderr, "ELF format error, ELF file not for eBPF?\n");
		return -EINVAL;
	}

	switch (ctx->elf_hdr.e_ident[EI_DATA]) {
	default:
		fprintf(stderr, "ELF format error, wrong endianness info?\n");
		return -EINVAL;
	case ELFDATA2LSB:
		if (htons(1) == 1) {
			fprintf(stderr,
				"We are big endian, eBPF object is little endian!\n");
			return -EIO;
		}
		break;
	case ELFDATA2MSB:
		if (htons(1) != 1) {
			fprintf(stderr,
				"We are little endian, eBPF object is big endian!\n");
			return -EIO;
		}
		break;
	}

	return 0;
}

static int bpf_elf_init(struct bpf_elf_ctx *ctx, const char *pathname)
{
	int ret;

	if (elf_version(EV_CURRENT) == EV_NONE)
		return -EIO;
	ctx->obj_fd = open(pathname, O_RDONLY);
	if (ctx->obj_fd < 0)
		return ctx->obj_fd;
	ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL);
	if (!ctx->elf_fd) {
		ret = -EINVAL;
		goto out_fd;
	}
	if (elf_kind(ctx->elf_fd) != ELF_K_ELF) {
		ret = -EINVAL;
		goto out_fd;
	}
	if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) !=
	    &ctx->elf_hdr) {
		ret = -EIO;
		goto out_elf;
	}
	ret = bpf_elf_check_ehdr(ctx);
	if (ret < 0)
		goto out_elf;
	return 0;
out_elf:
	elf_end(ctx->elf_fd);
out_fd:
	close(ctx->obj_fd);
	return ret;
}

static void bpf_elf_close(struct bpf_elf_ctx *ctx)
{
	elf_end(ctx->elf_fd);
	close(ctx->obj_fd);
}

static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx,
				    const GElf_Sym *sym)
{
	return ctx->str_tab->d_buf + sym->st_name;
}

static int bpf_map_verify_all_offs(struct bpf_elf_ctx *ctx, int end)
{
	GElf_Sym sym;
	int off, i;

	for (off = 0; off < end; off += ctx->map_len) {
		/* Order doesn't need to be linear here, hence we walk
		 * the table again.
		 */
		for (i = 0; i < ctx->sym_num; i++) {
			if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
				continue;
			if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
			    !(GELF_ST_TYPE(sym.st_info) == STT_NOTYPE ||
			      GELF_ST_TYPE(sym.st_info) == STT_OBJECT) ||
			    sym.st_shndx != ctx->map_sec)
				continue;
			if (sym.st_value == off)
				break;
			if (i == ctx->sym_num - 1)
				return -1;
		}
	}

	return off == end ? 0 : -1;
}

static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, unsigned long off)
{
	GElf_Sym sym;
	int i;

	for (i = 0; i < ctx->sym_num; i++) {
		if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
			continue;

		if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
		    !(GELF_ST_TYPE(sym.st_info) == STT_NOTYPE ||
		      GELF_ST_TYPE(sym.st_info) == STT_OBJECT) ||
		    sym.st_shndx != ctx->map_sec ||
		    sym.st_value != off)
			continue;
		return bpf_str_tab_name(ctx, &sym);
	}

	return NULL;
}

static int bpf_map_num_sym(struct bpf_elf_ctx *ctx)
{
	int i, num = 0;
	GElf_Sym sym;

	for (i = 0; i < ctx->sym_num; i++) {
		if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
			continue;

		if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
		    !(GELF_ST_TYPE(sym.st_info) == STT_NOTYPE ||
		      GELF_ST_TYPE(sym.st_info) == STT_OBJECT) ||
		    sym.st_shndx != ctx->map_sec)
			continue;
		num++;
	}

	return num;
}

static int bpf_derive_elf_map_from_fdinfo(int fd, struct bpf_elf_map *map)
{
	char file[PATH_MAX], buff[256];
	unsigned int val;
	FILE *fp;

	snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
	memset(map, 0, sizeof(*map));

	fp = fopen(file, "r");
	if (!fp) {
		fprintf(stderr, "No procfs support?!\n");
		return -EIO;
	}

	while (fgets(buff, sizeof(buff), fp)) {
		if (sscanf(buff, "map_type:\t%u", &val) == 1)
			map->type = val;
		else if (sscanf(buff, "key_size:\t%u", &val) == 1)
			map->size_key = val;
		else if (sscanf(buff, "value_size:\t%u", &val) == 1)
			map->size_value = val;
		else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
			map->max_elem = val;
		else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
			map->flags = val;
	}

	fclose(fp);
	return 0;
}

static int bpf_obj_get(const char *pathname)
{
	union bpf_attr attr = {};

	attr.pathname = bpf_ptr_to_u64(pathname);
	return bpf(BPF_OBJ_GET, &attr, sizeof(attr));
}

typedef int (*bpf_handle_state_t)(struct bpf_elf_ctx *ctx,
				  const struct bpf_elf_map *map,
				  const char *name, int exit);

char fs_base[PATH_MAX + 1];

void fs_base_init()
{
	const char *mnt_env = getenv(BPF_ENV_MNT);

	if (mnt_env) {
		snprintf(fs_base, sizeof(fs_base), "%s/tc/globals", mnt_env);
	} else {
		strcpy(fs_base, "/sys/fs/bpf/tc/globals");
	}
}

static int bpf_handle_pending(struct bpf_elf_ctx *ctx,
			      const struct bpf_elf_map *map,
			      const char *name, int exit)
{
	char file[PATH_MAX + 1], dest[PATH_MAX + 1];
	struct bpf_elf_map pinned;
	struct stat sb;
	int fd, ret;

	snprintf(file, sizeof(file), "%s/%s", fs_base, name);
	ret = stat(file, &sb);
	if (ret < 0) {
		if (errno == ENOENT)
			return 0;
		fprintf(stderr, "Cannot stat node %s!\n", file);
		return -errno;
	}

	fd = bpf_obj_get(file);
	if (fd < 0) {
		fprintf(stderr, "Cannot open pinned node %s!\n", file);
		return -errno;
	}
	ret = bpf_derive_elf_map_from_fdinfo(fd, &pinned);
	close(fd);
	if (ret < 0) {
		fprintf(stderr, "Cannot fetch fdinfo from %s!\n", file);
		return ret;
	}

	pinned.id = map->id;
        pinned.pinning = map->pinning;
	if (!memcmp(map, &pinned, sizeof(pinned)))
		return 0;

	snprintf(dest, sizeof(dest), "%s:%s", file, STATE_PENDING);
	syslog(LOG_WARNING, "Property mismatch in %s, migrating node to %s!\n",
	       file, dest);
	utimensat(AT_FDCWD, file, NULL, 0);
	return rename(file, dest);
}

static int bpf_handle_finalize(struct bpf_elf_ctx *ctx,
			       const struct bpf_elf_map *map,
			       const char *name, int exit)
{
	char file[PATH_MAX + 1], dest[PATH_MAX + 1];
	struct stat sb;
	int ret;

	snprintf(file, sizeof(file), "%s/%s:%s", fs_base, name,
		 STATE_PENDING);
	ret = stat(file, &sb);
	if (ret < 0) {
		if (errno == ENOENT)
			return 0;
		fprintf(stderr, "Cannot stat node %s!\n", file);
		return -errno;
	}

	if (exit) {
		snprintf(dest, sizeof(dest), "%s/%s", fs_base, name);
		syslog(LOG_WARNING, "Restoring migrated node %s into %s due to bad exit.\n",
		       file, dest);
		utimensat(AT_FDCWD, file, NULL, 0);
		renameat2(AT_FDCWD, file, AT_FDCWD, dest, 1);
		return 0;
	} else {
		syslog(LOG_WARNING, "Unlinking migrated node %s due to good exit.\n",
		       file);
		return unlink(file);
	}
}

static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section,
				 struct bpf_elf_sec_data *data)
{
	Elf_Data *sec_edata;
	GElf_Shdr sec_hdr;
	Elf_Scn *sec_fd;
	char *sec_name;

	memset(data, 0, sizeof(*data));

	sec_fd = elf_getscn(ctx->elf_fd, section);
	if (!sec_fd)
		return -EINVAL;
	if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
		return -EIO;

	sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx,
			      sec_hdr.sh_name);
	if (!sec_name || !sec_hdr.sh_size)
		return -ENOENT;

	sec_edata = elf_getdata(sec_fd, NULL);
	if (!sec_edata || elf_getdata(sec_fd, sec_edata))
		return -EIO;

	memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr));

	data->sec_name = sec_name;
	data->sec_data = sec_edata;
	return 0;
}

static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section,
			    struct bpf_elf_sec_data *data)
{
	ctx->sym_tab = data->sec_data;
	ctx->sym_num = data->sec_hdr.sh_size /
		       data->sec_hdr.sh_entsize;
	return 0;
}

static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section,
			    struct bpf_elf_sec_data *data)
{
	ctx->str_tab = data->sec_data;
	return 0;
}

static int bpf_fetch_maps_begin(struct bpf_elf_ctx *ctx, int section,
				struct bpf_elf_sec_data *data)
{
	ctx->map_tab = data->sec_data;
	ctx->map_len = data->sec_data->d_size;
	ctx->map_sec = section;
	return 0;
}

static int bpf_fetch_maps_end(struct bpf_elf_ctx *ctx, bpf_handle_state_t cb,
			      int exit)
{
	int i, ret = 0, sym_num = bpf_map_num_sym(ctx);
	struct bpf_elf_map *map;
	unsigned long off;
	const char *name;

	if (sym_num == 0 || sym_num > 64) {
		fprintf(stderr, "%u maps not supported in current map section!\n",
			sym_num);
		return -EINVAL;
	}

	if (ctx->map_len != sym_num * sizeof(struct bpf_elf_map)) {
		fprintf(stderr, "Number BPF map symbols are not multiple of struct bpf_elf_map!\n");
		return -EINVAL;
	}

	ctx->map_len /= sym_num;
	if (bpf_map_verify_all_offs(ctx, ctx->map_num)) {
		fprintf(stderr, "Different struct bpf_elf_map in use!\n");
		return -EINVAL;
	}

	ctx->map_num = sym_num;
	for (i = 0, map = ctx->map_tab->d_buf; i < sym_num; i++, map++) {
		if (map->pinning != PIN_GLOBAL_NS)
			continue;
		off = (void*)map - ctx->map_tab->d_buf;
		name = bpf_map_fetch_name(ctx, off);
		if (!name) {
			fprintf(stderr, "Count not fetch map name at off %lu!\n", off);
			return -EIO;
		}
		ret = cb(ctx, map, name, exit);
		if (ret)
			break;
	}

	return ret;
}

static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx)
{
	return ctx->sym_tab && ctx->str_tab && ctx->map_tab;
}

static int bpf_check_ancillary(struct bpf_elf_ctx *ctx, bpf_handle_state_t cb,
			       int exit)
{
	struct bpf_elf_sec_data data;
	int i, ret = 0;

	for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
		ret = bpf_fill_section_data(ctx, i, &data);
		if (ret < 0)
			continue;
		if (data.sec_hdr.sh_type == SHT_PROGBITS &&
		    !strcmp(data.sec_name, ELF_SECTION_MAPS))
			ret = bpf_fetch_maps_begin(ctx, i, &data);
		else if (data.sec_hdr.sh_type == SHT_SYMTAB &&
			 !strcmp(data.sec_name, ".symtab"))
			ret = bpf_fetch_symtab(ctx, i, &data);
		else if (data.sec_hdr.sh_type == SHT_STRTAB &&
			 !strcmp(data.sec_name, ".strtab"))
			ret = bpf_fetch_strtab(ctx, i, &data);
		if (ret < 0) {
			fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n",
				i);
			return ret;
		}
	}

	if (bpf_has_map_data(ctx)) {
		ret = bpf_fetch_maps_end(ctx, cb, exit);
		if (ret < 0) {
			fprintf(stderr, "Error fixing up map structure, incompatible struct bpf_elf_map used?\n");
			return ret;
		}
	}

	return ret;
}

static int migrate_state(const char *pathname, bpf_handle_state_t cb, int exit)
{
	struct bpf_elf_ctx ctx = {};
	int ret;

	ret = bpf_elf_init(&ctx, pathname);
	if (!ret) {
		ret = bpf_check_ancillary(&ctx, cb, exit);
		bpf_elf_close(&ctx);
	}
	return ret;
}

int main(int argc, char **argv)
{
	const char *pathname = NULL;
	bpf_handle_state_t fn = NULL;
	int opt, exit = 0;

	fs_base_init();

	openlog("cilium-map-migrate", LOG_NDELAY, 0);
	while ((opt = getopt(argc, argv, "s:e:r:")) != -1) {
		switch (opt) {
		case 's':
		case 'e':
			pathname = optarg;
			fn = opt == 's' ?
			     bpf_handle_pending :
			     bpf_handle_finalize;
			break;
		case 'r':
			exit = atoi(optarg);
			break;
		default:
			return -1;
		}
	}

	if (fn == NULL)
		return -1;

	exit = pathname ? migrate_state(pathname, fn, exit) : -1;
	closelog();
	return exit;
}
back to top