1 /* SPDX-License-Identifier: GPL-2.0 2 * Copyright(c) 2018 Jesper Dangaard Brouer. 3 * 4 * XDP/TC VLAN manipulation example 5 * 6 * GOTCHA: Remember to disable NIC hardware offloading of VLANs, 7 * else the VLAN tags are NOT inlined in the packet payload: 8 * 9 * # ethtool -K ixgbe2 rxvlan off 10 * 11 * Verify setting: 12 * # ethtool -k ixgbe2 | grep rx-vlan-offload 13 * rx-vlan-offload: off 14 * 15 */ 16 #include <stddef.h> 17 #include <stdbool.h> 18 #include <string.h> 19 #include <linux/bpf.h> 20 #include <linux/if_ether.h> 21 #include <linux/if_vlan.h> 22 #include <linux/in.h> 23 #include <linux/pkt_cls.h> 24 25 #include "bpf_helpers.h" 26 #include "bpf_endian.h" 27 28 /* linux/if_vlan.h have not exposed this as UAPI, thus mirror some here 29 * 30 * struct vlan_hdr - vlan header 31 * @h_vlan_TCI: priority and VLAN ID 32 * @h_vlan_encapsulated_proto: packet type ID or len 33 */ 34 struct _vlan_hdr { 35 __be16 h_vlan_TCI; 36 __be16 h_vlan_encapsulated_proto; 37 }; 38 #define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */ 39 #define VLAN_PRIO_SHIFT 13 40 #define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator */ 41 #define VLAN_TAG_PRESENT VLAN_CFI_MASK 42 #define VLAN_VID_MASK 0x0fff /* VLAN Identifier */ 43 #define VLAN_N_VID 4096 44 45 struct parse_pkt { 46 __u16 l3_proto; 47 __u16 l3_offset; 48 __u16 vlan_outer; 49 __u16 vlan_inner; 50 __u8 vlan_outer_offset; 51 __u8 vlan_inner_offset; 52 }; 53 54 char _license[] SEC("license") = "GPL"; 55 56 static __always_inline 57 bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt) 58 { 59 __u16 eth_type; 60 __u8 offset; 61 62 offset = sizeof(*eth); 63 /* Make sure packet is large enough for parsing eth + 2 VLAN headers */ 64 if ((void *)eth + offset + (2*sizeof(struct _vlan_hdr)) > data_end) 65 return false; 66 67 eth_type = eth->h_proto; 68 69 /* Handle outer VLAN tag */ 70 if (eth_type == bpf_htons(ETH_P_8021Q) 71 || eth_type == bpf_htons(ETH_P_8021AD)) { 72 struct _vlan_hdr *vlan_hdr; 73 74 vlan_hdr = (void *)eth + offset; 75 pkt->vlan_outer_offset = offset; 76 pkt->vlan_outer = bpf_ntohs(vlan_hdr->h_vlan_TCI) 77 & VLAN_VID_MASK; 78 eth_type = vlan_hdr->h_vlan_encapsulated_proto; 79 offset += sizeof(*vlan_hdr); 80 } 81 82 /* Handle inner (double) VLAN tag */ 83 if (eth_type == bpf_htons(ETH_P_8021Q) 84 || eth_type == bpf_htons(ETH_P_8021AD)) { 85 struct _vlan_hdr *vlan_hdr; 86 87 vlan_hdr = (void *)eth + offset; 88 pkt->vlan_inner_offset = offset; 89 pkt->vlan_inner = bpf_ntohs(vlan_hdr->h_vlan_TCI) 90 & VLAN_VID_MASK; 91 eth_type = vlan_hdr->h_vlan_encapsulated_proto; 92 offset += sizeof(*vlan_hdr); 93 } 94 95 pkt->l3_proto = bpf_ntohs(eth_type); /* Convert to host-byte-order */ 96 pkt->l3_offset = offset; 97 98 return true; 99 } 100 101 /* Hint, VLANs are choosen to hit network-byte-order issues */ 102 #define TESTVLAN 4011 /* 0xFAB */ 103 // #define TO_VLAN 4000 /* 0xFA0 (hint 0xOA0 = 160) */ 104 105 SEC("xdp_drop_vlan_4011") 106 int xdp_prognum0(struct xdp_md *ctx) 107 { 108 void *data_end = (void *)(long)ctx->data_end; 109 void *data = (void *)(long)ctx->data; 110 struct parse_pkt pkt = { 0 }; 111 112 if (!parse_eth_frame(data, data_end, &pkt)) 113 return XDP_ABORTED; 114 115 /* Drop specific VLAN ID example */ 116 if (pkt.vlan_outer == TESTVLAN) 117 return XDP_ABORTED; 118 /* 119 * Using XDP_ABORTED makes it possible to record this event, 120 * via tracepoint xdp:xdp_exception like: 121 * # perf record -a -e xdp:xdp_exception 122 * # perf script 123 */ 124 return XDP_PASS; 125 } 126 /* 127 Commands to setup VLAN on Linux to test packets gets dropped: 128 129 export ROOTDEV=ixgbe2 130 export VLANID=4011 131 ip link add link $ROOTDEV name $ROOTDEV.$VLANID type vlan id $VLANID 132 ip link set dev $ROOTDEV.$VLANID up 133 134 ip link set dev $ROOTDEV mtu 1508 135 ip addr add 100.64.40.11/24 dev $ROOTDEV.$VLANID 136 137 Load prog with ip tool: 138 139 ip link set $ROOTDEV xdp off 140 ip link set $ROOTDEV xdp object xdp_vlan01_kern.o section xdp_drop_vlan_4011 141 142 */ 143 144 /* Changing VLAN to zero, have same practical effect as removing the VLAN. */ 145 #define TO_VLAN 0 146 147 SEC("xdp_vlan_change") 148 int xdp_prognum1(struct xdp_md *ctx) 149 { 150 void *data_end = (void *)(long)ctx->data_end; 151 void *data = (void *)(long)ctx->data; 152 struct parse_pkt pkt = { 0 }; 153 154 if (!parse_eth_frame(data, data_end, &pkt)) 155 return XDP_ABORTED; 156 157 /* Change specific VLAN ID */ 158 if (pkt.vlan_outer == TESTVLAN) { 159 struct _vlan_hdr *vlan_hdr = data + pkt.vlan_outer_offset; 160 161 /* Modifying VLAN, preserve top 4 bits */ 162 vlan_hdr->h_vlan_TCI = 163 bpf_htons((bpf_ntohs(vlan_hdr->h_vlan_TCI) & 0xf000) 164 | TO_VLAN); 165 } 166 167 return XDP_PASS; 168 } 169 170 /* 171 * Show XDP+TC can cooperate, on creating a VLAN rewriter. 172 * 1. Create a XDP prog that can "pop"/remove a VLAN header. 173 * 2. Create a TC-bpf prog that egress can add a VLAN header. 174 */ 175 176 #ifndef ETH_ALEN /* Ethernet MAC address length */ 177 #define ETH_ALEN 6 /* bytes */ 178 #endif 179 #define VLAN_HDR_SZ 4 /* bytes */ 180 181 SEC("xdp_vlan_remove_outer") 182 int xdp_prognum2(struct xdp_md *ctx) 183 { 184 void *data_end = (void *)(long)ctx->data_end; 185 void *data = (void *)(long)ctx->data; 186 struct parse_pkt pkt = { 0 }; 187 char *dest; 188 189 if (!parse_eth_frame(data, data_end, &pkt)) 190 return XDP_ABORTED; 191 192 /* Skip packet if no outer VLAN was detected */ 193 if (pkt.vlan_outer_offset == 0) 194 return XDP_PASS; 195 196 /* Moving Ethernet header, dest overlap with src, memmove handle this */ 197 dest = data; 198 dest+= VLAN_HDR_SZ; 199 /* 200 * Notice: Taking over vlan_hdr->h_vlan_encapsulated_proto, by 201 * only moving two MAC addrs (12 bytes), not overwriting last 2 bytes 202 */ 203 __builtin_memmove(dest, data, ETH_ALEN * 2); 204 /* Note: LLVM built-in memmove inlining require size to be constant */ 205 206 /* Move start of packet header seen by Linux kernel stack */ 207 bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ); 208 209 return XDP_PASS; 210 } 211 212 static __always_inline 213 void shift_mac_4bytes_16bit(void *data) 214 { 215 __u16 *p = data; 216 217 p[7] = p[5]; /* delete p[7] was vlan_hdr->h_vlan_TCI */ 218 p[6] = p[4]; /* delete p[6] was ethhdr->h_proto */ 219 p[5] = p[3]; 220 p[4] = p[2]; 221 p[3] = p[1]; 222 p[2] = p[0]; 223 } 224 225 static __always_inline 226 void shift_mac_4bytes_32bit(void *data) 227 { 228 __u32 *p = data; 229 230 /* Assuming VLAN hdr present. The 4 bytes in p[3] that gets 231 * overwritten, is ethhdr->h_proto and vlan_hdr->h_vlan_TCI. 232 * The vlan_hdr->h_vlan_encapsulated_proto take over role as 233 * ethhdr->h_proto. 234 */ 235 p[3] = p[2]; 236 p[2] = p[1]; 237 p[1] = p[0]; 238 } 239 240 SEC("xdp_vlan_remove_outer2") 241 int xdp_prognum3(struct xdp_md *ctx) 242 { 243 void *data_end = (void *)(long)ctx->data_end; 244 void *data = (void *)(long)ctx->data; 245 struct ethhdr *orig_eth = data; 246 struct parse_pkt pkt = { 0 }; 247 248 if (!parse_eth_frame(orig_eth, data_end, &pkt)) 249 return XDP_ABORTED; 250 251 /* Skip packet if no outer VLAN was detected */ 252 if (pkt.vlan_outer_offset == 0) 253 return XDP_PASS; 254 255 /* Simply shift down MAC addrs 4 bytes, overwrite h_proto + TCI */ 256 shift_mac_4bytes_32bit(data); 257 258 /* Move start of packet header seen by Linux kernel stack */ 259 bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ); 260 261 return XDP_PASS; 262 } 263 264 /*===================================== 265 * BELOW: TC-hook based ebpf programs 266 * ==================================== 267 * The TC-clsact eBPF programs (currently) need to be attach via TC commands 268 */ 269 270 SEC("tc_vlan_push") 271 int _tc_progA(struct __sk_buff *ctx) 272 { 273 bpf_skb_vlan_push(ctx, bpf_htons(ETH_P_8021Q), TESTVLAN); 274 275 return TC_ACT_OK; 276 } 277 /* 278 Commands to setup TC to use above bpf prog: 279 280 export ROOTDEV=ixgbe2 281 export FILE=xdp_vlan01_kern.o 282 283 # Re-attach clsact to clear/flush existing role 284 tc qdisc del dev $ROOTDEV clsact 2> /dev/null ;\ 285 tc qdisc add dev $ROOTDEV clsact 286 287 # Attach BPF prog EGRESS 288 tc filter add dev $ROOTDEV egress \ 289 prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push 290 291 tc filter show dev $ROOTDEV egress 292 */ 293