Home | History | Annotate | Download | only in bpf
      1 /* SPDX-License-Identifier: GPL-2.0
      2  *  Copyright(c) 2018 Jesper Dangaard Brouer.
      3  *
      4  * XDP/TC VLAN manipulation example
      5  *
      6  * GOTCHA: Remember to disable NIC hardware offloading of VLANs,
      7  * else the VLAN tags are NOT inlined in the packet payload:
      8  *
      9  *  # ethtool -K ixgbe2 rxvlan off
     10  *
     11  * Verify setting:
     12  *  # ethtool -k ixgbe2 | grep rx-vlan-offload
     13  *  rx-vlan-offload: off
     14  *
     15  */
     16 #include <stddef.h>
     17 #include <stdbool.h>
     18 #include <string.h>
     19 #include <linux/bpf.h>
     20 #include <linux/if_ether.h>
     21 #include <linux/if_vlan.h>
     22 #include <linux/in.h>
     23 #include <linux/pkt_cls.h>
     24 
     25 #include "bpf_helpers.h"
     26 #include "bpf_endian.h"
     27 
     28 /* linux/if_vlan.h have not exposed this as UAPI, thus mirror some here
     29  *
     30  *	struct vlan_hdr - vlan header
     31  *	@h_vlan_TCI: priority and VLAN ID
     32  *	@h_vlan_encapsulated_proto: packet type ID or len
     33  */
     34 struct _vlan_hdr {
     35 	__be16 h_vlan_TCI;
     36 	__be16 h_vlan_encapsulated_proto;
     37 };
     38 #define VLAN_PRIO_MASK		0xe000 /* Priority Code Point */
     39 #define VLAN_PRIO_SHIFT		13
     40 #define VLAN_CFI_MASK		0x1000 /* Canonical Format Indicator */
     41 #define VLAN_TAG_PRESENT	VLAN_CFI_MASK
     42 #define VLAN_VID_MASK		0x0fff /* VLAN Identifier */
     43 #define VLAN_N_VID		4096
     44 
     45 struct parse_pkt {
     46 	__u16 l3_proto;
     47 	__u16 l3_offset;
     48 	__u16 vlan_outer;
     49 	__u16 vlan_inner;
     50 	__u8  vlan_outer_offset;
     51 	__u8  vlan_inner_offset;
     52 };
     53 
     54 char _license[] SEC("license") = "GPL";
     55 
     56 static __always_inline
     57 bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt)
     58 {
     59 	__u16 eth_type;
     60 	__u8 offset;
     61 
     62 	offset = sizeof(*eth);
     63 	/* Make sure packet is large enough for parsing eth + 2 VLAN headers */
     64 	if ((void *)eth + offset + (2*sizeof(struct _vlan_hdr)) > data_end)
     65 		return false;
     66 
     67 	eth_type = eth->h_proto;
     68 
     69 	/* Handle outer VLAN tag */
     70 	if (eth_type == bpf_htons(ETH_P_8021Q)
     71 	    || eth_type == bpf_htons(ETH_P_8021AD)) {
     72 		struct _vlan_hdr *vlan_hdr;
     73 
     74 		vlan_hdr = (void *)eth + offset;
     75 		pkt->vlan_outer_offset = offset;
     76 		pkt->vlan_outer = bpf_ntohs(vlan_hdr->h_vlan_TCI)
     77 				& VLAN_VID_MASK;
     78 		eth_type        = vlan_hdr->h_vlan_encapsulated_proto;
     79 		offset += sizeof(*vlan_hdr);
     80 	}
     81 
     82 	/* Handle inner (double) VLAN tag */
     83 	if (eth_type == bpf_htons(ETH_P_8021Q)
     84 	    || eth_type == bpf_htons(ETH_P_8021AD)) {
     85 		struct _vlan_hdr *vlan_hdr;
     86 
     87 		vlan_hdr = (void *)eth + offset;
     88 		pkt->vlan_inner_offset = offset;
     89 		pkt->vlan_inner = bpf_ntohs(vlan_hdr->h_vlan_TCI)
     90 				& VLAN_VID_MASK;
     91 		eth_type        = vlan_hdr->h_vlan_encapsulated_proto;
     92 		offset += sizeof(*vlan_hdr);
     93 	}
     94 
     95 	pkt->l3_proto = bpf_ntohs(eth_type); /* Convert to host-byte-order */
     96 	pkt->l3_offset = offset;
     97 
     98 	return true;
     99 }
    100 
    101 /* Hint, VLANs are choosen to hit network-byte-order issues */
    102 #define TESTVLAN 4011 /* 0xFAB */
    103 // #define TO_VLAN  4000 /* 0xFA0 (hint 0xOA0 = 160) */
    104 
    105 SEC("xdp_drop_vlan_4011")
    106 int  xdp_prognum0(struct xdp_md *ctx)
    107 {
    108 	void *data_end = (void *)(long)ctx->data_end;
    109 	void *data     = (void *)(long)ctx->data;
    110 	struct parse_pkt pkt = { 0 };
    111 
    112 	if (!parse_eth_frame(data, data_end, &pkt))
    113 		return XDP_ABORTED;
    114 
    115 	/* Drop specific VLAN ID example */
    116 	if (pkt.vlan_outer == TESTVLAN)
    117 		return XDP_ABORTED;
    118 	/*
    119 	 * Using XDP_ABORTED makes it possible to record this event,
    120 	 * via tracepoint xdp:xdp_exception like:
    121 	 *  # perf record -a -e xdp:xdp_exception
    122 	 *  # perf script
    123 	 */
    124 	return XDP_PASS;
    125 }
    126 /*
    127 Commands to setup VLAN on Linux to test packets gets dropped:
    128 
    129  export ROOTDEV=ixgbe2
    130  export VLANID=4011
    131  ip link add link $ROOTDEV name $ROOTDEV.$VLANID type vlan id $VLANID
    132  ip link set dev  $ROOTDEV.$VLANID up
    133 
    134  ip link set dev $ROOTDEV mtu 1508
    135  ip addr add 100.64.40.11/24 dev $ROOTDEV.$VLANID
    136 
    137 Load prog with ip tool:
    138 
    139  ip link set $ROOTDEV xdp off
    140  ip link set $ROOTDEV xdp object xdp_vlan01_kern.o section xdp_drop_vlan_4011
    141 
    142 */
    143 
    144 /* Changing VLAN to zero, have same practical effect as removing the VLAN. */
    145 #define TO_VLAN	0
    146 
    147 SEC("xdp_vlan_change")
    148 int  xdp_prognum1(struct xdp_md *ctx)
    149 {
    150 	void *data_end = (void *)(long)ctx->data_end;
    151 	void *data     = (void *)(long)ctx->data;
    152 	struct parse_pkt pkt = { 0 };
    153 
    154 	if (!parse_eth_frame(data, data_end, &pkt))
    155 		return XDP_ABORTED;
    156 
    157 	/* Change specific VLAN ID */
    158 	if (pkt.vlan_outer == TESTVLAN) {
    159 		struct _vlan_hdr *vlan_hdr = data + pkt.vlan_outer_offset;
    160 
    161 		/* Modifying VLAN, preserve top 4 bits */
    162 		vlan_hdr->h_vlan_TCI =
    163 			bpf_htons((bpf_ntohs(vlan_hdr->h_vlan_TCI) & 0xf000)
    164 				  | TO_VLAN);
    165 	}
    166 
    167 	return XDP_PASS;
    168 }
    169 
    170 /*
    171  * Show XDP+TC can cooperate, on creating a VLAN rewriter.
    172  * 1. Create a XDP prog that can "pop"/remove a VLAN header.
    173  * 2. Create a TC-bpf prog that egress can add a VLAN header.
    174  */
    175 
    176 #ifndef ETH_ALEN /* Ethernet MAC address length */
    177 #define ETH_ALEN	6	/* bytes */
    178 #endif
    179 #define VLAN_HDR_SZ	4	/* bytes */
    180 
    181 SEC("xdp_vlan_remove_outer")
    182 int  xdp_prognum2(struct xdp_md *ctx)
    183 {
    184 	void *data_end = (void *)(long)ctx->data_end;
    185 	void *data     = (void *)(long)ctx->data;
    186 	struct parse_pkt pkt = { 0 };
    187 	char *dest;
    188 
    189 	if (!parse_eth_frame(data, data_end, &pkt))
    190 		return XDP_ABORTED;
    191 
    192 	/* Skip packet if no outer VLAN was detected */
    193 	if (pkt.vlan_outer_offset == 0)
    194 		return XDP_PASS;
    195 
    196 	/* Moving Ethernet header, dest overlap with src, memmove handle this */
    197 	dest = data;
    198 	dest+= VLAN_HDR_SZ;
    199 	/*
    200 	 * Notice: Taking over vlan_hdr->h_vlan_encapsulated_proto, by
    201 	 * only moving two MAC addrs (12 bytes), not overwriting last 2 bytes
    202 	 */
    203 	__builtin_memmove(dest, data, ETH_ALEN * 2);
    204 	/* Note: LLVM built-in memmove inlining require size to be constant */
    205 
    206 	/* Move start of packet header seen by Linux kernel stack */
    207 	bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ);
    208 
    209 	return XDP_PASS;
    210 }
    211 
    212 static __always_inline
    213 void shift_mac_4bytes_16bit(void *data)
    214 {
    215 	__u16 *p = data;
    216 
    217 	p[7] = p[5]; /* delete p[7] was vlan_hdr->h_vlan_TCI */
    218 	p[6] = p[4]; /* delete p[6] was ethhdr->h_proto */
    219 	p[5] = p[3];
    220 	p[4] = p[2];
    221 	p[3] = p[1];
    222 	p[2] = p[0];
    223 }
    224 
    225 static __always_inline
    226 void shift_mac_4bytes_32bit(void *data)
    227 {
    228 	__u32 *p = data;
    229 
    230 	/* Assuming VLAN hdr present. The 4 bytes in p[3] that gets
    231 	 * overwritten, is ethhdr->h_proto and vlan_hdr->h_vlan_TCI.
    232 	 * The vlan_hdr->h_vlan_encapsulated_proto take over role as
    233 	 * ethhdr->h_proto.
    234 	 */
    235 	p[3] = p[2];
    236 	p[2] = p[1];
    237 	p[1] = p[0];
    238 }
    239 
    240 SEC("xdp_vlan_remove_outer2")
    241 int  xdp_prognum3(struct xdp_md *ctx)
    242 {
    243 	void *data_end = (void *)(long)ctx->data_end;
    244 	void *data     = (void *)(long)ctx->data;
    245 	struct ethhdr *orig_eth = data;
    246 	struct parse_pkt pkt = { 0 };
    247 
    248 	if (!parse_eth_frame(orig_eth, data_end, &pkt))
    249 		return XDP_ABORTED;
    250 
    251 	/* Skip packet if no outer VLAN was detected */
    252 	if (pkt.vlan_outer_offset == 0)
    253 		return XDP_PASS;
    254 
    255 	/* Simply shift down MAC addrs 4 bytes, overwrite h_proto + TCI */
    256 	shift_mac_4bytes_32bit(data);
    257 
    258 	/* Move start of packet header seen by Linux kernel stack */
    259 	bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ);
    260 
    261 	return XDP_PASS;
    262 }
    263 
    264 /*=====================================
    265  *  BELOW: TC-hook based ebpf programs
    266  * ====================================
    267  * The TC-clsact eBPF programs (currently) need to be attach via TC commands
    268  */
    269 
    270 SEC("tc_vlan_push")
    271 int _tc_progA(struct __sk_buff *ctx)
    272 {
    273 	bpf_skb_vlan_push(ctx, bpf_htons(ETH_P_8021Q), TESTVLAN);
    274 
    275 	return TC_ACT_OK;
    276 }
    277 /*
    278 Commands to setup TC to use above bpf prog:
    279 
    280 export ROOTDEV=ixgbe2
    281 export FILE=xdp_vlan01_kern.o
    282 
    283 # Re-attach clsact to clear/flush existing role
    284 tc qdisc del dev $ROOTDEV clsact 2> /dev/null ;\
    285 tc qdisc add dev $ROOTDEV clsact
    286 
    287 # Attach BPF prog EGRESS
    288 tc filter add dev $ROOTDEV egress \
    289   prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push
    290 
    291 tc filter show dev $ROOTDEV egress
    292 */
    293