Home | History | Annotate | Download | only in bpf
      1 // SPDX-License-Identifier: GPL-2.0
      2 // Copyright (c) 2017 Facebook
      3 #include <stddef.h>
      4 #include <stdbool.h>
      5 #include <string.h>
      6 #include <linux/pkt_cls.h>
      7 #include <linux/bpf.h>
      8 #include <linux/in.h>
      9 #include <linux/if_ether.h>
     10 #include <linux/ip.h>
     11 #include <linux/ipv6.h>
     12 #include <linux/icmp.h>
     13 #include <linux/icmpv6.h>
     14 #include <linux/tcp.h>
     15 #include <linux/udp.h>
     16 #include "bpf_helpers.h"
     17 
     18 #define bpf_printk(fmt, ...)				\
     19 ({							\
     20 	char ____fmt[] = fmt;				\
     21 	bpf_trace_printk(____fmt, sizeof(____fmt),	\
     22 			##__VA_ARGS__);			\
     23 })
     24 
     25 static __u32 rol32(__u32 word, unsigned int shift)
     26 {
     27 	return (word << shift) | (word >> ((-shift) & 31));
     28 }
     29 
     30 /* copy paste of jhash from kernel sources to make sure llvm
     31  * can compile it into valid sequence of bpf instructions
     32  */
     33 #define __jhash_mix(a, b, c)			\
     34 {						\
     35 	a -= c;  a ^= rol32(c, 4);  c += b;	\
     36 	b -= a;  b ^= rol32(a, 6);  a += c;	\
     37 	c -= b;  c ^= rol32(b, 8);  b += a;	\
     38 	a -= c;  a ^= rol32(c, 16); c += b;	\
     39 	b -= a;  b ^= rol32(a, 19); a += c;	\
     40 	c -= b;  c ^= rol32(b, 4);  b += a;	\
     41 }
     42 
     43 #define __jhash_final(a, b, c)			\
     44 {						\
     45 	c ^= b; c -= rol32(b, 14);		\
     46 	a ^= c; a -= rol32(c, 11);		\
     47 	b ^= a; b -= rol32(a, 25);		\
     48 	c ^= b; c -= rol32(b, 16);		\
     49 	a ^= c; a -= rol32(c, 4);		\
     50 	b ^= a; b -= rol32(a, 14);		\
     51 	c ^= b; c -= rol32(b, 24);		\
     52 }
     53 
     54 #define JHASH_INITVAL		0xdeadbeef
     55 
     56 typedef unsigned int u32;
     57 
     58 static __attribute__ ((noinline))
     59 u32 jhash(const void *key, u32 length, u32 initval)
     60 {
     61 	u32 a, b, c;
     62 	const unsigned char *k = key;
     63 
     64 	a = b = c = JHASH_INITVAL + length + initval;
     65 
     66 	while (length > 12) {
     67 		a += *(u32 *)(k);
     68 		b += *(u32 *)(k + 4);
     69 		c += *(u32 *)(k + 8);
     70 		__jhash_mix(a, b, c);
     71 		length -= 12;
     72 		k += 12;
     73 	}
     74 	switch (length) {
     75 	case 12: c += (u32)k[11]<<24;
     76 	case 11: c += (u32)k[10]<<16;
     77 	case 10: c += (u32)k[9]<<8;
     78 	case 9:  c += k[8];
     79 	case 8:  b += (u32)k[7]<<24;
     80 	case 7:  b += (u32)k[6]<<16;
     81 	case 6:  b += (u32)k[5]<<8;
     82 	case 5:  b += k[4];
     83 	case 4:  a += (u32)k[3]<<24;
     84 	case 3:  a += (u32)k[2]<<16;
     85 	case 2:  a += (u32)k[1]<<8;
     86 	case 1:  a += k[0];
     87 		 __jhash_final(a, b, c);
     88 	case 0: /* Nothing left to add */
     89 		break;
     90 	}
     91 
     92 	return c;
     93 }
     94 
     95 static __attribute__ ((noinline))
     96 u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
     97 {
     98 	a += initval;
     99 	b += initval;
    100 	c += initval;
    101 	__jhash_final(a, b, c);
    102 	return c;
    103 }
    104 
    105 static __attribute__ ((noinline))
    106 u32 jhash_2words(u32 a, u32 b, u32 initval)
    107 {
    108 	return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
    109 }
    110 
    111 struct flow_key {
    112 	union {
    113 		__be32 src;
    114 		__be32 srcv6[4];
    115 	};
    116 	union {
    117 		__be32 dst;
    118 		__be32 dstv6[4];
    119 	};
    120 	union {
    121 		__u32 ports;
    122 		__u16 port16[2];
    123 	};
    124 	__u8 proto;
    125 };
    126 
    127 struct packet_description {
    128 	struct flow_key flow;
    129 	__u8 flags;
    130 };
    131 
    132 struct ctl_value {
    133 	union {
    134 		__u64 value;
    135 		__u32 ifindex;
    136 		__u8 mac[6];
    137 	};
    138 };
    139 
    140 struct vip_definition {
    141 	union {
    142 		__be32 vip;
    143 		__be32 vipv6[4];
    144 	};
    145 	__u16 port;
    146 	__u16 family;
    147 	__u8 proto;
    148 };
    149 
    150 struct vip_meta {
    151 	__u32 flags;
    152 	__u32 vip_num;
    153 };
    154 
    155 struct real_pos_lru {
    156 	__u32 pos;
    157 	__u64 atime;
    158 };
    159 
    160 struct real_definition {
    161 	union {
    162 		__be32 dst;
    163 		__be32 dstv6[4];
    164 	};
    165 	__u8 flags;
    166 };
    167 
    168 struct lb_stats {
    169 	__u64 v2;
    170 	__u64 v1;
    171 };
    172 
    173 struct bpf_map_def __attribute__ ((section("maps"), used)) vip_map = {
    174 	.type = BPF_MAP_TYPE_HASH,
    175 	.key_size = sizeof(struct vip_definition),
    176 	.value_size = sizeof(struct vip_meta),
    177 	.max_entries = 512,
    178 	.map_flags = 0,
    179 };
    180 
    181 struct bpf_map_def __attribute__ ((section("maps"), used)) lru_cache = {
    182 	.type = BPF_MAP_TYPE_LRU_HASH,
    183 	.key_size = sizeof(struct flow_key),
    184 	.value_size = sizeof(struct real_pos_lru),
    185 	.max_entries = 300,
    186 	.map_flags = 1U << 1,
    187 };
    188 
    189 struct bpf_map_def __attribute__ ((section("maps"), used)) ch_rings = {
    190 	.type = BPF_MAP_TYPE_ARRAY,
    191 	.key_size = sizeof(__u32),
    192 	.value_size = sizeof(__u32),
    193 	.max_entries = 12 * 655,
    194 	.map_flags = 0,
    195 };
    196 
    197 struct bpf_map_def __attribute__ ((section("maps"), used)) reals = {
    198 	.type = BPF_MAP_TYPE_ARRAY,
    199 	.key_size = sizeof(__u32),
    200 	.value_size = sizeof(struct real_definition),
    201 	.max_entries = 40,
    202 	.map_flags = 0,
    203 };
    204 
    205 struct bpf_map_def __attribute__ ((section("maps"), used)) stats = {
    206 	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
    207 	.key_size = sizeof(__u32),
    208 	.value_size = sizeof(struct lb_stats),
    209 	.max_entries = 515,
    210 	.map_flags = 0,
    211 };
    212 
    213 struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = {
    214 	.type = BPF_MAP_TYPE_ARRAY,
    215 	.key_size = sizeof(__u32),
    216 	.value_size = sizeof(struct ctl_value),
    217 	.max_entries = 16,
    218 	.map_flags = 0,
    219 };
    220 
    221 struct eth_hdr {
    222 	unsigned char eth_dest[6];
    223 	unsigned char eth_source[6];
    224 	unsigned short eth_proto;
    225 };
    226 
    227 static inline __u64 calc_offset(bool is_ipv6, bool is_icmp)
    228 {
    229 	__u64 off = sizeof(struct eth_hdr);
    230 	if (is_ipv6) {
    231 		off += sizeof(struct ipv6hdr);
    232 		if (is_icmp)
    233 			off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr);
    234 	} else {
    235 		off += sizeof(struct iphdr);
    236 		if (is_icmp)
    237 			off += sizeof(struct icmphdr) + sizeof(struct iphdr);
    238 	}
    239 	return off;
    240 }
    241 
    242 static __attribute__ ((noinline))
    243 bool parse_udp(void *data, void *data_end,
    244 	       bool is_ipv6, struct packet_description *pckt)
    245 {
    246 
    247 	bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
    248 	__u64 off = calc_offset(is_ipv6, is_icmp);
    249 	struct udphdr *udp;
    250 	udp = data + off;
    251 
    252 	if (udp + 1 > data_end)
    253 		return 0;
    254 	if (!is_icmp) {
    255 		pckt->flow.port16[0] = udp->source;
    256 		pckt->flow.port16[1] = udp->dest;
    257 	} else {
    258 		pckt->flow.port16[0] = udp->dest;
    259 		pckt->flow.port16[1] = udp->source;
    260 	}
    261 	return 1;
    262 }
    263 
    264 static __attribute__ ((noinline))
    265 bool parse_tcp(void *data, void *data_end,
    266 	       bool is_ipv6, struct packet_description *pckt)
    267 {
    268 
    269 	bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
    270 	__u64 off = calc_offset(is_ipv6, is_icmp);
    271 	struct tcphdr *tcp;
    272 
    273 	tcp = data + off;
    274 	if (tcp + 1 > data_end)
    275 		return 0;
    276 	if (tcp->syn)
    277 		pckt->flags |= (1 << 1);
    278 	if (!is_icmp) {
    279 		pckt->flow.port16[0] = tcp->source;
    280 		pckt->flow.port16[1] = tcp->dest;
    281 	} else {
    282 		pckt->flow.port16[0] = tcp->dest;
    283 		pckt->flow.port16[1] = tcp->source;
    284 	}
    285 	return 1;
    286 }
    287 
    288 static __attribute__ ((noinline))
    289 bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
    290 	      struct packet_description *pckt,
    291 	      struct real_definition *dst, __u32 pkt_bytes)
    292 {
    293 	struct eth_hdr *new_eth;
    294 	struct eth_hdr *old_eth;
    295 	struct ipv6hdr *ip6h;
    296 	__u32 ip_suffix;
    297 	void *data_end;
    298 	void *data;
    299 
    300 	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
    301 		return 0;
    302 	data = (void *)(long)xdp->data;
    303 	data_end = (void *)(long)xdp->data_end;
    304 	new_eth = data;
    305 	ip6h = data + sizeof(struct eth_hdr);
    306 	old_eth = data + sizeof(struct ipv6hdr);
    307 	if (new_eth + 1 > data_end ||
    308 	    old_eth + 1 > data_end || ip6h + 1 > data_end)
    309 		return 0;
    310 	memcpy(new_eth->eth_dest, cval->mac, 6);
    311 	memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
    312 	new_eth->eth_proto = 56710;
    313 	ip6h->version = 6;
    314 	ip6h->priority = 0;
    315 	memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
    316 
    317 	ip6h->nexthdr = IPPROTO_IPV6;
    318 	ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0];
    319 	ip6h->payload_len =
    320 	    __builtin_bswap16(pkt_bytes + sizeof(struct ipv6hdr));
    321 	ip6h->hop_limit = 4;
    322 
    323 	ip6h->saddr.in6_u.u6_addr32[0] = 1;
    324 	ip6h->saddr.in6_u.u6_addr32[1] = 2;
    325 	ip6h->saddr.in6_u.u6_addr32[2] = 3;
    326 	ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix;
    327 	memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16);
    328 	return 1;
    329 }
    330 
    331 static __attribute__ ((noinline))
    332 bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
    333 	      struct packet_description *pckt,
    334 	      struct real_definition *dst, __u32 pkt_bytes)
    335 {
    336 
    337 	__u32 ip_suffix = __builtin_bswap16(pckt->flow.port16[0]);
    338 	struct eth_hdr *new_eth;
    339 	struct eth_hdr *old_eth;
    340 	__u16 *next_iph_u16;
    341 	struct iphdr *iph;
    342 	__u32 csum = 0;
    343 	void *data_end;
    344 	void *data;
    345 
    346 	ip_suffix <<= 15;
    347 	ip_suffix ^= pckt->flow.src;
    348 	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
    349 		return 0;
    350 	data = (void *)(long)xdp->data;
    351 	data_end = (void *)(long)xdp->data_end;
    352 	new_eth = data;
    353 	iph = data + sizeof(struct eth_hdr);
    354 	old_eth = data + sizeof(struct iphdr);
    355 	if (new_eth + 1 > data_end ||
    356 	    old_eth + 1 > data_end || iph + 1 > data_end)
    357 		return 0;
    358 	memcpy(new_eth->eth_dest, cval->mac, 6);
    359 	memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
    360 	new_eth->eth_proto = 8;
    361 	iph->version = 4;
    362 	iph->ihl = 5;
    363 	iph->frag_off = 0;
    364 	iph->protocol = IPPROTO_IPIP;
    365 	iph->check = 0;
    366 	iph->tos = 1;
    367 	iph->tot_len = __builtin_bswap16(pkt_bytes + sizeof(struct iphdr));
    368 	/* don't update iph->daddr, since it will overwrite old eth_proto
    369 	 * and multiple iterations of bpf_prog_run() will fail
    370 	 */
    371 
    372 	iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst;
    373 	iph->ttl = 4;
    374 
    375 	next_iph_u16 = (__u16 *) iph;
    376 #pragma clang loop unroll(full)
    377 	for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
    378 		csum += *next_iph_u16++;
    379 	iph->check = ~((csum & 0xffff) + (csum >> 16));
    380 	if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
    381 		return 0;
    382 	return 1;
    383 }
    384 
    385 static __attribute__ ((noinline))
    386 bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4)
    387 {
    388 	struct eth_hdr *new_eth;
    389 	struct eth_hdr *old_eth;
    390 
    391 	old_eth = *data;
    392 	new_eth = *data + sizeof(struct ipv6hdr);
    393 	memcpy(new_eth->eth_source, old_eth->eth_source, 6);
    394 	memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
    395 	if (inner_v4)
    396 		new_eth->eth_proto = 8;
    397 	else
    398 		new_eth->eth_proto = 56710;
    399 	if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr)))
    400 		return 0;
    401 	*data = (void *)(long)xdp->data;
    402 	*data_end = (void *)(long)xdp->data_end;
    403 	return 1;
    404 }
    405 
    406 static __attribute__ ((noinline))
    407 bool decap_v4(struct xdp_md *xdp, void **data, void **data_end)
    408 {
    409 	struct eth_hdr *new_eth;
    410 	struct eth_hdr *old_eth;
    411 
    412 	old_eth = *data;
    413 	new_eth = *data + sizeof(struct iphdr);
    414 	memcpy(new_eth->eth_source, old_eth->eth_source, 6);
    415 	memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
    416 	new_eth->eth_proto = 8;
    417 	if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
    418 		return 0;
    419 	*data = (void *)(long)xdp->data;
    420 	*data_end = (void *)(long)xdp->data_end;
    421 	return 1;
    422 }
    423 
    424 static __attribute__ ((noinline))
    425 int swap_mac_and_send(void *data, void *data_end)
    426 {
    427 	unsigned char tmp_mac[6];
    428 	struct eth_hdr *eth;
    429 
    430 	eth = data;
    431 	memcpy(tmp_mac, eth->eth_source, 6);
    432 	memcpy(eth->eth_source, eth->eth_dest, 6);
    433 	memcpy(eth->eth_dest, tmp_mac, 6);
    434 	return XDP_TX;
    435 }
    436 
    437 static __attribute__ ((noinline))
    438 int send_icmp_reply(void *data, void *data_end)
    439 {
    440 	struct icmphdr *icmp_hdr;
    441 	__u16 *next_iph_u16;
    442 	__u32 tmp_addr = 0;
    443 	struct iphdr *iph;
    444 	__u32 csum1 = 0;
    445 	__u32 csum = 0;
    446 	__u64 off = 0;
    447 
    448 	if (data + sizeof(struct eth_hdr)
    449 	     + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end)
    450 		return XDP_DROP;
    451 	off += sizeof(struct eth_hdr);
    452 	iph = data + off;
    453 	off += sizeof(struct iphdr);
    454 	icmp_hdr = data + off;
    455 	icmp_hdr->type = 0;
    456 	icmp_hdr->checksum += 0x0007;
    457 	iph->ttl = 4;
    458 	tmp_addr = iph->daddr;
    459 	iph->daddr = iph->saddr;
    460 	iph->saddr = tmp_addr;
    461 	iph->check = 0;
    462 	next_iph_u16 = (__u16 *) iph;
    463 #pragma clang loop unroll(full)
    464 	for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
    465 		csum += *next_iph_u16++;
    466 	iph->check = ~((csum & 0xffff) + (csum >> 16));
    467 	return swap_mac_and_send(data, data_end);
    468 }
    469 
    470 static __attribute__ ((noinline))
    471 int send_icmp6_reply(void *data, void *data_end)
    472 {
    473 	struct icmp6hdr *icmp_hdr;
    474 	struct ipv6hdr *ip6h;
    475 	__be32 tmp_addr[4];
    476 	__u64 off = 0;
    477 
    478 	if (data + sizeof(struct eth_hdr)
    479 	     + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end)
    480 		return XDP_DROP;
    481 	off += sizeof(struct eth_hdr);
    482 	ip6h = data + off;
    483 	off += sizeof(struct ipv6hdr);
    484 	icmp_hdr = data + off;
    485 	icmp_hdr->icmp6_type = 129;
    486 	icmp_hdr->icmp6_cksum -= 0x0001;
    487 	ip6h->hop_limit = 4;
    488 	memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16);
    489 	memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16);
    490 	memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16);
    491 	return swap_mac_and_send(data, data_end);
    492 }
    493 
    494 static __attribute__ ((noinline))
    495 int parse_icmpv6(void *data, void *data_end, __u64 off,
    496 		 struct packet_description *pckt)
    497 {
    498 	struct icmp6hdr *icmp_hdr;
    499 	struct ipv6hdr *ip6h;
    500 
    501 	icmp_hdr = data + off;
    502 	if (icmp_hdr + 1 > data_end)
    503 		return XDP_DROP;
    504 	if (icmp_hdr->icmp6_type == 128)
    505 		return send_icmp6_reply(data, data_end);
    506 	if (icmp_hdr->icmp6_type != 3)
    507 		return XDP_PASS;
    508 	off += sizeof(struct icmp6hdr);
    509 	ip6h = data + off;
    510 	if (ip6h + 1 > data_end)
    511 		return XDP_DROP;
    512 	pckt->flow.proto = ip6h->nexthdr;
    513 	pckt->flags |= (1 << 0);
    514 	memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16);
    515 	memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16);
    516 	return -1;
    517 }
    518 
    519 static __attribute__ ((noinline))
    520 int parse_icmp(void *data, void *data_end, __u64 off,
    521 	       struct packet_description *pckt)
    522 {
    523 	struct icmphdr *icmp_hdr;
    524 	struct iphdr *iph;
    525 
    526 	icmp_hdr = data + off;
    527 	if (icmp_hdr + 1 > data_end)
    528 		return XDP_DROP;
    529 	if (icmp_hdr->type == 8)
    530 		return send_icmp_reply(data, data_end);
    531 	if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4))
    532 		return XDP_PASS;
    533 	off += sizeof(struct icmphdr);
    534 	iph = data + off;
    535 	if (iph + 1 > data_end)
    536 		return XDP_DROP;
    537 	if (iph->ihl != 5)
    538 		return XDP_DROP;
    539 	pckt->flow.proto = iph->protocol;
    540 	pckt->flags |= (1 << 0);
    541 	pckt->flow.src = iph->daddr;
    542 	pckt->flow.dst = iph->saddr;
    543 	return -1;
    544 }
    545 
    546 static __attribute__ ((noinline))
    547 __u32 get_packet_hash(struct packet_description *pckt,
    548 		      bool hash_16bytes)
    549 {
    550 	if (hash_16bytes)
    551 		return jhash_2words(jhash(pckt->flow.srcv6, 16, 12),
    552 				    pckt->flow.ports, 24);
    553 	else
    554 		return jhash_2words(pckt->flow.src, pckt->flow.ports,
    555 				    24);
    556 }
    557 
    558 __attribute__ ((noinline))
    559 static bool get_packet_dst(struct real_definition **real,
    560 			   struct packet_description *pckt,
    561 			   struct vip_meta *vip_info,
    562 			   bool is_ipv6, void *lru_map)
    563 {
    564 	struct real_pos_lru new_dst_lru = { };
    565 	bool hash_16bytes = is_ipv6;
    566 	__u32 *real_pos, hash, key;
    567 	__u64 cur_time;
    568 
    569 	if (vip_info->flags & (1 << 2))
    570 		hash_16bytes = 1;
    571 	if (vip_info->flags & (1 << 3)) {
    572 		pckt->flow.port16[0] = pckt->flow.port16[1];
    573 		memset(pckt->flow.srcv6, 0, 16);
    574 	}
    575 	hash = get_packet_hash(pckt, hash_16bytes);
    576 	if (hash != 0x358459b7 /* jhash of ipv4 packet */  &&
    577 	    hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
    578 		return 0;
    579 	key = 2 * vip_info->vip_num + hash % 2;
    580 	real_pos = bpf_map_lookup_elem(&ch_rings, &key);
    581 	if (!real_pos)
    582 		return 0;
    583 	key = *real_pos;
    584 	*real = bpf_map_lookup_elem(&reals, &key);
    585 	if (!(*real))
    586 		return 0;
    587 	if (!(vip_info->flags & (1 << 1))) {
    588 		__u32 conn_rate_key = 512 + 2;
    589 		struct lb_stats *conn_rate_stats =
    590 		    bpf_map_lookup_elem(&stats, &conn_rate_key);
    591 
    592 		if (!conn_rate_stats)
    593 			return 1;
    594 		cur_time = bpf_ktime_get_ns();
    595 		if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) {
    596 			conn_rate_stats->v1 = 1;
    597 			conn_rate_stats->v2 = cur_time;
    598 		} else {
    599 			conn_rate_stats->v1 += 1;
    600 			if (conn_rate_stats->v1 >= 1)
    601 				return 1;
    602 		}
    603 		if (pckt->flow.proto == IPPROTO_UDP)
    604 			new_dst_lru.atime = cur_time;
    605 		new_dst_lru.pos = key;
    606 		bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0);
    607 	}
    608 	return 1;
    609 }
    610 
    611 __attribute__ ((noinline))
    612 static void connection_table_lookup(struct real_definition **real,
    613 				    struct packet_description *pckt,
    614 				    void *lru_map)
    615 {
    616 
    617 	struct real_pos_lru *dst_lru;
    618 	__u64 cur_time;
    619 	__u32 key;
    620 
    621 	dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow);
    622 	if (!dst_lru)
    623 		return;
    624 	if (pckt->flow.proto == IPPROTO_UDP) {
    625 		cur_time = bpf_ktime_get_ns();
    626 		if (cur_time - dst_lru->atime > 300000)
    627 			return;
    628 		dst_lru->atime = cur_time;
    629 	}
    630 	key = dst_lru->pos;
    631 	*real = bpf_map_lookup_elem(&reals, &key);
    632 }
    633 
    634 /* don't believe your eyes!
    635  * below function has 6 arguments whereas bpf and llvm allow maximum of 5
    636  * but since it's _static_ llvm can optimize one argument away
    637  */
    638 __attribute__ ((noinline))
    639 static int process_l3_headers_v6(struct packet_description *pckt,
    640 				 __u8 *protocol, __u64 off,
    641 				 __u16 *pkt_bytes, void *data,
    642 				 void *data_end)
    643 {
    644 	struct ipv6hdr *ip6h;
    645 	__u64 iph_len;
    646 	int action;
    647 
    648 	ip6h = data + off;
    649 	if (ip6h + 1 > data_end)
    650 		return XDP_DROP;
    651 	iph_len = sizeof(struct ipv6hdr);
    652 	*protocol = ip6h->nexthdr;
    653 	pckt->flow.proto = *protocol;
    654 	*pkt_bytes = __builtin_bswap16(ip6h->payload_len);
    655 	off += iph_len;
    656 	if (*protocol == 45) {
    657 		return XDP_DROP;
    658 	} else if (*protocol == 59) {
    659 		action = parse_icmpv6(data, data_end, off, pckt);
    660 		if (action >= 0)
    661 			return action;
    662 	} else {
    663 		memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16);
    664 		memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16);
    665 	}
    666 	return -1;
    667 }
    668 
    669 __attribute__ ((noinline))
    670 static int process_l3_headers_v4(struct packet_description *pckt,
    671 				 __u8 *protocol, __u64 off,
    672 				 __u16 *pkt_bytes, void *data,
    673 				 void *data_end)
    674 {
    675 	struct iphdr *iph;
    676 	__u64 iph_len;
    677 	int action;
    678 
    679 	iph = data + off;
    680 	if (iph + 1 > data_end)
    681 		return XDP_DROP;
    682 	if (iph->ihl != 5)
    683 		return XDP_DROP;
    684 	*protocol = iph->protocol;
    685 	pckt->flow.proto = *protocol;
    686 	*pkt_bytes = __builtin_bswap16(iph->tot_len);
    687 	off += 20;
    688 	if (iph->frag_off & 65343)
    689 		return XDP_DROP;
    690 	if (*protocol == IPPROTO_ICMP) {
    691 		action = parse_icmp(data, data_end, off, pckt);
    692 		if (action >= 0)
    693 			return action;
    694 	} else {
    695 		pckt->flow.src = iph->saddr;
    696 		pckt->flow.dst = iph->daddr;
    697 	}
    698 	return -1;
    699 }
    700 
    701 __attribute__ ((noinline))
    702 static int process_packet(void *data, __u64 off, void *data_end,
    703 			  bool is_ipv6, struct xdp_md *xdp)
    704 {
    705 
    706 	struct real_definition *dst = NULL;
    707 	struct packet_description pckt = { };
    708 	struct vip_definition vip = { };
    709 	struct lb_stats *data_stats;
    710 	struct eth_hdr *eth = data;
    711 	void *lru_map = &lru_cache;
    712 	struct vip_meta *vip_info;
    713 	__u32 lru_stats_key = 513;
    714 	__u32 mac_addr_pos = 0;
    715 	__u32 stats_key = 512;
    716 	struct ctl_value *cval;
    717 	__u16 pkt_bytes;
    718 	__u64 iph_len;
    719 	__u8 protocol;
    720 	__u32 vip_num;
    721 	int action;
    722 
    723 	if (is_ipv6)
    724 		action = process_l3_headers_v6(&pckt, &protocol, off,
    725 					       &pkt_bytes, data, data_end);
    726 	else
    727 		action = process_l3_headers_v4(&pckt, &protocol, off,
    728 					       &pkt_bytes, data, data_end);
    729 	if (action >= 0)
    730 		return action;
    731 	protocol = pckt.flow.proto;
    732 	if (protocol == IPPROTO_TCP) {
    733 		if (!parse_tcp(data, data_end, is_ipv6, &pckt))
    734 			return XDP_DROP;
    735 	} else if (protocol == IPPROTO_UDP) {
    736 		if (!parse_udp(data, data_end, is_ipv6, &pckt))
    737 			return XDP_DROP;
    738 	} else {
    739 		return XDP_TX;
    740 	}
    741 
    742 	if (is_ipv6)
    743 		memcpy(vip.vipv6, pckt.flow.dstv6, 16);
    744 	else
    745 		vip.vip = pckt.flow.dst;
    746 	vip.port = pckt.flow.port16[1];
    747 	vip.proto = pckt.flow.proto;
    748 	vip_info = bpf_map_lookup_elem(&vip_map, &vip);
    749 	if (!vip_info) {
    750 		vip.port = 0;
    751 		vip_info = bpf_map_lookup_elem(&vip_map, &vip);
    752 		if (!vip_info)
    753 			return XDP_PASS;
    754 		if (!(vip_info->flags & (1 << 4)))
    755 			pckt.flow.port16[1] = 0;
    756 	}
    757 	if (data_end - data > 1400)
    758 		return XDP_DROP;
    759 	data_stats = bpf_map_lookup_elem(&stats, &stats_key);
    760 	if (!data_stats)
    761 		return XDP_DROP;
    762 	data_stats->v1 += 1;
    763 	if (!dst) {
    764 		if (vip_info->flags & (1 << 0))
    765 			pckt.flow.port16[0] = 0;
    766 		if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1)))
    767 			connection_table_lookup(&dst, &pckt, lru_map);
    768 		if (dst)
    769 			goto out;
    770 		if (pckt.flow.proto == IPPROTO_TCP) {
    771 			struct lb_stats *lru_stats =
    772 			    bpf_map_lookup_elem(&stats, &lru_stats_key);
    773 
    774 			if (!lru_stats)
    775 				return XDP_DROP;
    776 			if (pckt.flags & (1 << 1))
    777 				lru_stats->v1 += 1;
    778 			else
    779 				lru_stats->v2 += 1;
    780 		}
    781 		if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map))
    782 			return XDP_DROP;
    783 		data_stats->v2 += 1;
    784 	}
    785 out:
    786 	cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos);
    787 	if (!cval)
    788 		return XDP_DROP;
    789 	if (dst->flags & (1 << 0)) {
    790 		if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes))
    791 			return XDP_DROP;
    792 	} else {
    793 		if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes))
    794 			return XDP_DROP;
    795 	}
    796 	vip_num = vip_info->vip_num;
    797 	data_stats = bpf_map_lookup_elem(&stats, &vip_num);
    798 	if (!data_stats)
    799 		return XDP_DROP;
    800 	data_stats->v1 += 1;
    801 	data_stats->v2 += pkt_bytes;
    802 
    803 	data = (void *)(long)xdp->data;
    804 	data_end = (void *)(long)xdp->data_end;
    805 	if (data + 4 > data_end)
    806 		return XDP_DROP;
    807 	*(u32 *)data = dst->dst;
    808 	return XDP_DROP;
    809 }
    810 
    811 __attribute__ ((section("xdp-test"), used))
    812 int balancer_ingress(struct xdp_md *ctx)
    813 {
    814 	void *data = (void *)(long)ctx->data;
    815 	void *data_end = (void *)(long)ctx->data_end;
    816 	struct eth_hdr *eth = data;
    817 	__u32 eth_proto;
    818 	__u32 nh_off;
    819 
    820 	nh_off = sizeof(struct eth_hdr);
    821 	if (data + nh_off > data_end)
    822 		return XDP_DROP;
    823 	eth_proto = eth->eth_proto;
    824 	if (eth_proto == 8)
    825 		return process_packet(data, nh_off, data_end, 0, ctx);
    826 	else if (eth_proto == 56710)
    827 		return process_packet(data, nh_off, data_end, 1, ctx);
    828 	else
    829 		return XDP_DROP;
    830 }
    831 
    832 char _license[] __attribute__ ((section("license"), used)) = "GPL";
    833 int _version __attribute__ ((section("version"), used)) = 1;
    834