Revision d8e2262a5044c1a05b4da51e5d0976f10c487a9f authored by Saif Hasan on 21 September 2018, 21:30:05 UTC, committed by David S. Miller on 24 September 2018, 19:19:27 UTC
Summary:

This appears to be necessary and sufficient change to enable `MPLS` on
`ip6gre` tunnels (RFC4023).

This diff allows IP6GRE devices to be recognized by MPLS kernel module
and hence user can configure interface to accept packets with mpls
headers as well setup mpls routes on them.

Test Plan:

Test plan consists of multiple containers connected via GRE-V6 tunnel.
Then carrying out testing steps as below.

- Carry out necessary sysctl settings on all containers

```
sysctl -w net.mpls.platform_labels=65536
sysctl -w net.mpls.ip_ttl_propagate=1
sysctl -w net.mpls.conf.lo.input=1
```

- Establish IP6GRE tunnels

```
ip -6 tunnel add name if_1_2_1 mode ip6gre \
  local 2401:db00:21:6048:feed:0::1 \
  remote 2401:db00:21:6048:feed:0::2 key 1
ip link set dev if_1_2_1 up
sysctl -w net.mpls.conf.if_1_2_1.input=1
ip -4 addr add 169.254.0.2/31 dev if_1_2_1 scope link

ip -6 tunnel add name if_1_3_1 mode ip6gre \
  local 2401:db00:21:6048:feed:0::1 \
  remote 2401:db00:21:6048:feed:0::3 key 1
ip link set dev if_1_3_1 up
sysctl -w net.mpls.conf.if_1_3_1.input=1
ip -4 addr add 169.254.0.4/31 dev if_1_3_1 scope link
```

- Install MPLS encap rules on node-1 towards node-2

```
ip route add 192.168.0.11/32 nexthop encap mpls 32/64 \
  via inet 169.254.0.3 dev if_1_2_1
```

- Install MPLS forwarding rules on node-2 and node-3
```
// node2
ip -f mpls route add 32 via inet 169.254.0.7 dev if_2_4_1

// node3
ip -f mpls route add 64 via inet 169.254.0.12 dev if_4_3_1
```

- Ping 192.168.0.11 (node4) from 192.168.0.1 (node1) (where routing
  towards 192.168.0.1 is via IP route directly towards node1 from node4)
```
ping 192.168.0.11
```

- tcpdump on interface to capture ping packets wrapped within MPLS
  header which inturn wrapped within IP6GRE header

```
16:43:41.121073 IP6
  2401:db00:21:6048:feed::1 > 2401:db00:21:6048:feed::2:
  DSTOPT GREv0, key=0x1, length 100:
  MPLS (label 32, exp 0, ttl 255) (label 64, exp 0, [S], ttl 255)
  IP 192.168.0.1 > 192.168.0.11:
  ICMP echo request, id 1208, seq 45, length 64

0x0000:  6000 2cdb 006c 3c3f 2401 db00 0021 6048  `.,..l<?$....!`H
0x0010:  feed 0000 0000 0001 2401 db00 0021 6048  ........$....!`H
0x0020:  feed 0000 0000 0002 2f00 0401 0401 0100  ......../.......
0x0030:  2000 8847 0000 0001 0002 00ff 0004 01ff  ...G............
0x0040:  4500 0054 3280 4000 ff01 c7cb c0a8 0001  E..T2.@.........
0x0050:  c0a8 000b 0800 a8d7 04b8 002d 2d3c a05b  ...........--<.[
0x0060:  0000 0000 bcd8 0100 0000 0000 1011 1213  ................
0x0070:  1415 1617 1819 1a1b 1c1d 1e1f 2021 2223  .............!"#
0x0080:  2425 2627 2829 2a2b 2c2d 2e2f 3031 3233  $%&'()*+,-./0123
0x0090:  3435 3637                                4567
```

Signed-off-by: Saif Hasan <has@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent d2f85c9
Raw File
dcscb.c
/*
 * arch/arm/mach-vexpress/dcscb.c - Dual Cluster System Configuration Block
 *
 * Created by:	Nicolas Pitre, May 2012
 * Copyright:	(C) 2012-2013  Linaro Limited
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/io.h>
#include <linux/errno.h>
#include <linux/of_address.h>
#include <linux/vexpress.h>
#include <linux/arm-cci.h>

#include <asm/mcpm.h>
#include <asm/proc-fns.h>
#include <asm/cacheflush.h>
#include <asm/cputype.h>
#include <asm/cp15.h>


#define RST_HOLD0	0x0
#define RST_HOLD1	0x4
#define SYS_SWRESET	0x8
#define RST_STAT0	0xc
#define RST_STAT1	0x10
#define EAG_CFG_R	0x20
#define EAG_CFG_W	0x24
#define KFC_CFG_R	0x28
#define KFC_CFG_W	0x2c
#define DCS_CFG_R	0x30

static void __iomem *dcscb_base;
static int dcscb_allcpus_mask[2];

static int dcscb_cpu_powerup(unsigned int cpu, unsigned int cluster)
{
	unsigned int rst_hold, cpumask = (1 << cpu);

	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
	if (cluster >= 2 || !(cpumask & dcscb_allcpus_mask[cluster]))
		return -EINVAL;

	rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
	rst_hold &= ~(cpumask | (cpumask << 4));
	writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
	return 0;
}

static int dcscb_cluster_powerup(unsigned int cluster)
{
	unsigned int rst_hold;

	pr_debug("%s: cluster %u\n", __func__, cluster);
	if (cluster >= 2)
		return -EINVAL;

	/* remove cluster reset and add individual CPU's reset */
	rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
	rst_hold &= ~(1 << 8);
	rst_hold |= dcscb_allcpus_mask[cluster];
	writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
	return 0;
}

static void dcscb_cpu_powerdown_prepare(unsigned int cpu, unsigned int cluster)
{
	unsigned int rst_hold;

	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
	BUG_ON(cluster >= 2 || !((1 << cpu) & dcscb_allcpus_mask[cluster]));

	rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
	rst_hold |= (1 << cpu);
	writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
}

static void dcscb_cluster_powerdown_prepare(unsigned int cluster)
{
	unsigned int rst_hold;

	pr_debug("%s: cluster %u\n", __func__, cluster);
	BUG_ON(cluster >= 2);

	rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
	rst_hold |= (1 << 8);
	writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
}

static void dcscb_cpu_cache_disable(void)
{
	/* Disable and flush the local CPU cache. */
	v7_exit_coherency_flush(louis);
}

static void dcscb_cluster_cache_disable(void)
{
	/* Flush all cache levels for this cluster. */
	v7_exit_coherency_flush(all);

	/*
	 * A full outer cache flush could be needed at this point
	 * on platforms with such a cache, depending on where the
	 * outer cache sits. In some cases the notion of a "last
	 * cluster standing" would need to be implemented if the
	 * outer cache is shared across clusters. In any case, when
	 * the outer cache needs flushing, there is no concurrent
	 * access to the cache controller to worry about and no
	 * special locking besides what is already provided by the
	 * MCPM state machinery is needed.
	 */

	/*
	 * Disable cluster-level coherency by masking
	 * incoming snoops and DVM messages:
	 */
	cci_disable_port_by_cpu(read_cpuid_mpidr());
}

static const struct mcpm_platform_ops dcscb_power_ops = {
	.cpu_powerup		= dcscb_cpu_powerup,
	.cluster_powerup	= dcscb_cluster_powerup,
	.cpu_powerdown_prepare	= dcscb_cpu_powerdown_prepare,
	.cluster_powerdown_prepare = dcscb_cluster_powerdown_prepare,
	.cpu_cache_disable	= dcscb_cpu_cache_disable,
	.cluster_cache_disable	= dcscb_cluster_cache_disable,
};

extern void dcscb_power_up_setup(unsigned int affinity_level);

static int __init dcscb_init(void)
{
	struct device_node *node;
	unsigned int cfg;
	int ret;

	if (!cci_probed())
		return -ENODEV;

	node = of_find_compatible_node(NULL, NULL, "arm,rtsm,dcscb");
	if (!node)
		return -ENODEV;
	dcscb_base = of_iomap(node, 0);
	if (!dcscb_base)
		return -EADDRNOTAVAIL;
	cfg = readl_relaxed(dcscb_base + DCS_CFG_R);
	dcscb_allcpus_mask[0] = (1 << (((cfg >> 16) >> (0 << 2)) & 0xf)) - 1;
	dcscb_allcpus_mask[1] = (1 << (((cfg >> 16) >> (1 << 2)) & 0xf)) - 1;

	ret = mcpm_platform_register(&dcscb_power_ops);
	if (!ret)
		ret = mcpm_sync_init(dcscb_power_up_setup);
	if (ret) {
		iounmap(dcscb_base);
		return ret;
	}

	pr_info("VExpress DCSCB support installed\n");

	/*
	 * Future entries into the kernel can now go
	 * through the cluster entry vectors.
	 */
	vexpress_flags_set(__pa_symbol(mcpm_entry_point));

	return 0;
}

early_initcall(dcscb_init);
back to top