summaryrefslogtreecommitdiff
path: root/samples/bpf/xdp_router_ipv4.bpf.c
blob: 0643330d1d2e5e0191173e5c5120dbd5ec1d2d4c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
/* Copyright (C) 2017 Cavium, Inc.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of version 2 of the GNU General Public License
 * as published by the Free Software Foundation.
 */

#include "vmlinux.h"
#include "xdp_sample.bpf.h"
#include "xdp_sample_shared.h"

#define ETH_ALEN	6
#define ETH_P_8021Q	0x8100
#define ETH_P_8021AD	0x88A8

struct trie_value {
	__u8 prefix[4];
	__be64 value;
	int ifindex;
	int metric;
	__be32 gw;
};

/* Key for lpm_trie */
union key_4 {
	u32 b32[2];
	u8 b8[8];
};

struct arp_entry {
	__be64 mac;
	__be32 dst;
};

struct direct_map {
	struct arp_entry arp;
	int ifindex;
	__be64 mac;
};

/* Map for trie implementation */
struct {
	__uint(type, BPF_MAP_TYPE_LPM_TRIE);
	__uint(key_size, 8);
	__uint(value_size, sizeof(struct trie_value));
	__uint(max_entries, 50);
	__uint(map_flags, BPF_F_NO_PREALLOC);
} lpm_map SEC(".maps");

/* Map for ARP table */
struct {
	__uint(type, BPF_MAP_TYPE_HASH);
	__type(key, __be32);
	__type(value, __be64);
	__uint(max_entries, 50);
} arp_table SEC(".maps");

/* Map to keep the exact match entries in the route table */
struct {
	__uint(type, BPF_MAP_TYPE_HASH);
	__type(key, __be32);
	__type(value, struct direct_map);
	__uint(max_entries, 50);
} exact_match SEC(".maps");

struct {
	__uint(type, BPF_MAP_TYPE_DEVMAP);
	__uint(key_size, sizeof(int));
	__uint(value_size, sizeof(int));
	__uint(max_entries, 100);
} tx_port SEC(".maps");

SEC("xdp")
int xdp_router_ipv4_prog(struct xdp_md *ctx)
{
	void *data_end = (void *)(long)ctx->data_end;
	void *data = (void *)(long)ctx->data;
	struct ethhdr *eth = data;
	u64 nh_off = sizeof(*eth);
	struct datarec *rec;
	__be16 h_proto;
	u32 key = 0;

	rec = bpf_map_lookup_elem(&rx_cnt, &key);
	if (rec)
		NO_TEAR_INC(rec->processed);

	if (data + nh_off > data_end)
		goto drop;

	h_proto = eth->h_proto;
	if (h_proto == bpf_htons(ETH_P_8021Q) ||
	    h_proto == bpf_htons(ETH_P_8021AD)) {
		struct vlan_hdr *vhdr;

		vhdr = data + nh_off;
		nh_off += sizeof(struct vlan_hdr);
		if (data + nh_off > data_end)
			goto drop;

		h_proto = vhdr->h_vlan_encapsulated_proto;
	}

	switch (bpf_ntohs(h_proto)) {
	case ETH_P_ARP:
		if (rec)
			NO_TEAR_INC(rec->xdp_pass);
		return XDP_PASS;
	case ETH_P_IP: {
		struct iphdr *iph = data + nh_off;
		struct direct_map *direct_entry;
		__be64 *dest_mac, *src_mac;
		int forward_to;

		if (iph + 1 > data_end)
			goto drop;

		direct_entry = bpf_map_lookup_elem(&exact_match, &iph->daddr);

		/* Check for exact match, this would give a faster lookup */
		if (direct_entry && direct_entry->mac &&
		    direct_entry->arp.mac) {
			src_mac = &direct_entry->mac;
			dest_mac = &direct_entry->arp.mac;
			forward_to = direct_entry->ifindex;
		} else {
			struct trie_value *prefix_value;
			union key_4 key4;

			/* Look up in the trie for lpm */
			key4.b32[0] = 32;
			key4.b8[4] = iph->daddr & 0xff;
			key4.b8[5] = (iph->daddr >> 8) & 0xff;
			key4.b8[6] = (iph->daddr >> 16) & 0xff;
			key4.b8[7] = (iph->daddr >> 24) & 0xff;

			prefix_value = bpf_map_lookup_elem(&lpm_map, &key4);
			if (!prefix_value)
				goto drop;

			forward_to = prefix_value->ifindex;
			src_mac = &prefix_value->value;
			if (!src_mac)
				goto drop;

			dest_mac = bpf_map_lookup_elem(&arp_table, &iph->daddr);
			if (!dest_mac) {
				if (!prefix_value->gw)
					goto drop;

				dest_mac = bpf_map_lookup_elem(&arp_table,
							       &prefix_value->gw);
				if (!dest_mac) {
					/* Forward the packet to the kernel in
					 * order to trigger ARP discovery for
					 * the default gw.
					 */
					if (rec)
						NO_TEAR_INC(rec->xdp_pass);
					return XDP_PASS;
				}
			}
		}

		if (src_mac && dest_mac) {
			int ret;

			__builtin_memcpy(eth->h_dest, dest_mac, ETH_ALEN);
			__builtin_memcpy(eth->h_source, src_mac, ETH_ALEN);

			ret = bpf_redirect_map(&tx_port, forward_to, 0);
			if (ret == XDP_REDIRECT) {
				if (rec)
					NO_TEAR_INC(rec->xdp_redirect);
				return ret;
			}
		}
	}
	default:
		break;
	}
drop:
	if (rec)
		NO_TEAR_INC(rec->xdp_drop);

	return XDP_DROP;
}

char _license[] SEC("license") = "GPL";