Home | History | Annotate | Download | only in bpf
      1 --[[
      2 Copyright 2016 Marek Vavrusa <mvavrusa (a] cloudflare.com>
      3 
      4 Licensed under the Apache License, Version 2.0 (the "License");
      5 you may not use this file except in compliance with the License.
      6 You may obtain a copy of the License at
      7 
      8 http://www.apache.org/licenses/LICENSE-2.0
      9 
     10 Unless required by applicable law or agreed to in writing, software
     11 distributed under the License is distributed on an "AS IS" BASIS,
     12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 See the License for the specific language governing permissions and
     14 limitations under the License.
     15 ]]
     16 -- LuaJIT to BPF bytecode compiler.
     17 --
     18 -- The code generation phase is currently one-pass and produces:
     19 -- * Compiled code in BPF bytecode format (https://www.kernel.org/doc/Documentation/networking/filter.txt)
     20 -- * Variables with liveness analysis and other meta (spill information, compile-time value)
     21 --
     22 -- The code generator optimises as much as possible in single pass:
     23 -- * Fold compile-time expressions and constant propagation
     24 -- * Basic control flow analysis with dead code elimination (based on compile-time expressions)
     25 -- * Single-pass optimistic register allocation
     26 --
     27 -- The first pass doesn't have variable lifetime visibility yet, so it relies on rewriter for further
     28 -- optimisations such as:
     29 -- * Dead store elimination (first-pass doesn't know if/when the variable is going to be used)
     30 -- * Common sub-expression elimination (relies on DCE and liveness analysis)
     31 -- * Orphan JMP elimination (removing this in first pass would break previous JMP targets)
     32 -- * Better register allocation (needs to be recomputed after optimisations)
     33 
     34 local ffi = require('ffi')
     35 local bit = require('bit')
     36 local S = require('syscall')
     37 local bytecode = require('bpf.ljbytecode')
     38 local cdef = require('bpf.cdef')
     39 local proto = require('bpf.proto')
     40 local builtins = require('bpf.builtins')
     41 
     42 -- Constants
     43 local ALWAYS, NEVER = -1, -2
     44 local BPF = ffi.typeof('struct bpf')
     45 local HELPER = ffi.typeof('struct bpf_func_id')
     46 
     47 -- Symbolic table of constant expressions over numbers
     48 local const_expr = {
     49 	ADD = function (a, b) return a + b end,
     50 	SUB = function (a, b) return a - b end,
     51 	DIV = function (a, b) return a / b end,
     52 	MOD = function (a, b) return a % b end,
     53 	JEQ = function (a, b) return a == b end,
     54 	JNE = function (a, b) return a ~= b end,
     55 	JGE = function (a, b) return a >= b end,
     56 	JGT = function (a, b) return a > b end,
     57 }
     58 
     59 local const_width = {
     60 	[1] = BPF.B, [2] = BPF.H, [4] = BPF.W, [8] = BPF.DW,
     61 }
     62 
     63 -- Built-ins that are strict only (never compile-time expandable)
     64 local builtins_strict = {
     65 	[ffi.new] = true,
     66 	[print]   = true,
     67 }
     68 
     69 -- Deep copy a table
     70 local function table_copy(t)
     71 	local copy = {}
     72 	for n,v in pairs(t) do
     73 		if type(v) == 'table' then
     74 			v = table_copy(v)
     75 		end
     76 		copy[n] = v
     77 	end
     78 	return copy
     79 end
     80 
     81 -- Return true if the constant part is a proxy
     82 local function is_proxy(x)
     83 	return type(x) == 'table' and (x.__dissector or x.__map or x.__base)
     84 end
     85 
     86 -- Create compiler closure
     87 local function create_emitter(env, stackslots, params, param_types)
     88 
     89 local V = {}   -- Variable tracking / register allocator
     90 local code = { -- Generated code
     91 	pc = 0, bc_pc = 0,
     92 	insn = ffi.new('struct bpf_insn[4096]'),
     93 	fixup = {},
     94 	reachable = true,
     95 	seen_cmp = nil,
     96 }
     97 local Vstate = {} -- Track variable layout at basic block exits
     98 
     99 -- Anything below this stack offset is free to use by caller
    100 -- @note: There is no tracking memory allocator, so the caller may
    101 -- lower it for persistent objects, but such memory will never
    102 -- be reclaimed and the caller is responsible for resetting stack
    103 -- top whenever the memory below is free to be reused
    104 local stack_top = (stackslots + 1) * ffi.sizeof('uint64_t')
    105 
    106 local function emit(op, dst, src, off, imm)
    107 	local ins = code.insn[code.pc]
    108 	ins.code = op
    109 	ins.dst_reg = dst
    110 	ins.src_reg = src
    111 	ins.off = off
    112 	ins.imm = imm
    113 	code.pc = code.pc + 1
    114 end
    115 
    116 local function reg_spill(var)
    117 	local vinfo = V[var]
    118 	assert(vinfo.reg, 'attempt to spill VAR that doesn\'t have an allocated register')
    119 	vinfo.spill = (var + 1) * ffi.sizeof('uint64_t') -- Index by (variable number) * (register width)
    120 	emit(BPF.MEM + BPF.STX + BPF.DW, 10, vinfo.reg, -vinfo.spill, 0)
    121 	vinfo.reg = nil
    122 end
    123 
    124 local function reg_fill(var, reg)
    125 	local vinfo = V[var]
    126 	assert(reg, 'attempt to fill variable to register but not register is allocated')
    127 	assert(vinfo.spill, 'attempt to fill register with a VAR that isn\'t spilled')
    128 	emit(BPF.MEM + BPF.LDX + BPF.DW, reg, 10, -vinfo.spill, 0)
    129 	vinfo.reg = reg
    130 	vinfo.spill = nil
    131 end
    132 
    133 -- Allocate a register (lazy simple allocator)
    134 local function reg_alloc(var, reg)
    135 	-- Specific register requested, must spill/move existing variable
    136 	if reg then
    137 		for k,v in pairs(V) do -- Spill any variable that has this register
    138 			if v.reg == reg and not v.shadow then
    139 				reg_spill(k)
    140 				break
    141 			end
    142 		end
    143 		return reg
    144 	end
    145 	-- Find free or least recently used slot
    146 	local last, last_seen, used = nil, 0xffff, 0
    147 	for k,v in pairs(V) do
    148 		if v.reg then
    149 			if not v.live_to or v.live_to < last_seen then
    150 				last, last_seen = k, v.live_to or last_seen
    151 			end
    152 			used = bit.bor(used, bit.lshift(1, v.reg))
    153 		end
    154 	end
    155 	-- Attempt to select a free register from R7-R9 (callee saved)
    156 	local free = bit.bnot(used)
    157 	if     bit.band(free, 0x80) ~= 0 then reg = 7
    158 	elseif bit.band(free,0x100) ~= 0 then reg = 8
    159 	elseif bit.band(free,0x200) ~= 0 then reg = 9
    160 	end
    161 	-- Select another variable to be spilled
    162 	if not reg then
    163 		assert(last)
    164 		reg = V[last].reg
    165 		reg_spill(last)
    166 	end
    167 	assert(reg, 'VAR '..var..'fill/spill failed')
    168 	return reg
    169 end
    170 
    171 -- Set new variable
    172 local function vset(var, reg, const, vtype)
    173 	-- Must materialise all variables shadowing this variable slot, as it will be overwritten
    174 	if V[var] and V[var].reg then
    175 		for _, vinfo in pairs(V) do
    176 			-- Shadowing variable MUST share the same type and attributes,
    177 			-- but the register assignment may have changed
    178 			if vinfo.shadow == var then
    179 				vinfo.reg = V[var].reg
    180 				vinfo.shadow = nil
    181 			end
    182 		end
    183 	end
    184 	-- Get precise type for CDATA or attempt to narrow numeric constant
    185 	if not vtype and type(const) == 'cdata' then
    186 		vtype = ffi.typeof(const)
    187 	end
    188 	V[var] = {reg=reg, const=const, type=vtype}
    189 	-- Track variable source
    190 	if V[var].const and type(const) == 'table' then
    191 		V[var].source = V[var].const.source
    192 	end
    193 end
    194 
    195 -- Materialize (or register) a variable in a register
    196 -- If the register is nil, then the a new register is assigned (if not already assigned)
    197 local function vreg(var, reg, reserve, vtype)
    198 	local vinfo = V[var]
    199 	assert(vinfo, 'VAR '..var..' not registered')
    200 	vinfo.live_to = code.pc-1
    201 	if (vinfo.reg and not reg) and not vinfo.shadow then return vinfo.reg end
    202 	reg = reg_alloc(var, reg)
    203 	-- Materialize variable shadow copy
    204 	local src = vinfo
    205 	while src.shadow do src = V[src.shadow] end
    206 	if reserve then -- luacheck: ignore
    207 		-- No load to register occurs
    208 	elseif src.reg then
    209 		emit(BPF.ALU64 + BPF.MOV + BPF.X, reg, src.reg, 0, 0)
    210 	elseif src.spill then
    211 		vinfo.spill = src.spill
    212 		reg_fill(var, reg)
    213 	elseif src.const then
    214 		vtype = vtype or src.type
    215 		if type(src.const) == 'table' and src.const.__base then
    216 			-- Load pointer type
    217 			emit(BPF.ALU64 + BPF.MOV + BPF.X, reg, 10, 0, 0)
    218 			emit(BPF.ALU64 + BPF.ADD + BPF.K, reg, 0, 0, -src.const.__base)
    219 		elseif type(src.const) == 'table' and src.const.__dissector then
    220 			-- Load dissector offset (imm32), but keep the constant part (dissector proxy)
    221 			emit(BPF.ALU64 + BPF.MOV + BPF.K, reg, 0, 0, src.const.off or 0)
    222 		elseif vtype and ffi.sizeof(vtype) == 8 then
    223 			-- IMM64 must be done in two instructions with imm64 = (lo(imm32), hi(imm32))
    224 			emit(BPF.LD + BPF.DW, reg, 0, 0, ffi.cast('uint32_t', src.const))
    225 			emit(0, 0, 0, 0, ffi.cast('uint32_t', bit.rshift(bit.rshift(src.const, 16), 16)))
    226 			vinfo.const = nil -- The variable is live
    227 		else
    228 			emit(BPF.ALU64 + BPF.MOV + BPF.K, reg, 0, 0, src.const)
    229 			vinfo.const = nil -- The variable is live
    230 		end
    231 	else assert(false, 'VAR '..var..' has neither register nor constant value') end
    232 	vinfo.reg = reg
    233 	vinfo.shadow = nil
    234 	vinfo.live_from = code.pc-1
    235 	vinfo.type = vtype or vinfo.type
    236 	return reg
    237 end
    238 
    239 -- Copy variable
    240 local function vcopy(dst, src)
    241 	if dst == src then return end
    242 	V[dst] = {reg=V[src].reg, const=V[src].const, shadow=src, source=V[src].source, type=V[src].type}
    243 end
    244 
    245 -- Dereference variable of pointer type
    246 local function vderef(dst_reg, src_reg, vinfo)
    247 	-- Dereference map pointers for primitive types
    248 	-- BPF doesn't allow pointer arithmetics, so use the entry value
    249 	assert(type(vinfo.const) == 'table' and vinfo.const.__dissector, 'cannot dereference a non-pointer variable')
    250 	local vtype = vinfo.const.__dissector
    251 	local w = ffi.sizeof(vtype)
    252 	assert(const_width[w], 'NYI: sizeof('..tostring(vtype)..') not 1/2/4/8 bytes')
    253 	if dst_reg ~= src_reg then
    254 		emit(BPF.ALU64 + BPF.MOV + BPF.X, dst_reg, src_reg, 0, 0)    -- dst = src
    255 	end
    256 	-- Optimize the NULL check away if provably not NULL
    257 	if not vinfo.source or vinfo.source:find('_or_null', 1, true) then
    258 		emit(BPF.JMP + BPF.JEQ + BPF.K, src_reg, 0, 1, 0)            -- if (src != NULL)
    259 	end
    260 	emit(BPF.MEM + BPF.LDX + const_width[w], dst_reg, src_reg, 0, 0) --     dst = *src;
    261 end
    262 
    263 -- Allocate a space for variable
    264 local function valloc(size, blank)
    265 	local base = stack_top
    266 	assert(stack_top + size < 512 * 1024, 'exceeded maximum stack size of 512kB')
    267 	stack_top = stack_top + size
    268 	-- Align to 8 byte boundary
    269 	stack_top = math.ceil(stack_top/8)*8
    270 	-- Current kernel version doesn't support ARG_PTR_TO_RAW_STACK
    271 	-- so we always need to have memory initialized, remove this when supported
    272 	if blank then
    273 		if type(blank) == 'string' then
    274 			local sp = 0
    275 			while sp < size do
    276 				-- TODO: no BPF_ST + BPF_DW instruction yet
    277 				local as_u32 = ffi.new('uint32_t [1]')
    278 				local sub = blank:sub(sp+1, sp+ffi.sizeof(as_u32))
    279 				ffi.copy(as_u32, sub, #sub)
    280 				emit(BPF.MEM + BPF.ST + BPF.W, 10, 0, -(stack_top-sp), as_u32[0])
    281 				sp = sp + ffi.sizeof(as_u32)
    282 			end
    283 		elseif type(blank) == 'boolean' then
    284 			reg_alloc(stackslots, 0)
    285 			emit(BPF.ALU64 + BPF.MOV + BPF.K, 0, 0, 0, 0)
    286 			for sp = base+8,stack_top,8 do
    287 				emit(BPF.MEM + BPF.STX + BPF.DW, 10, 0, -sp, 0)
    288 			end
    289 		else error('NYI: will with unknown type '..type(blank)) end
    290 	end
    291 	return stack_top
    292 end
    293 
    294 -- Turn variable into scalar in register (or constant)
    295 local function vscalar(a, w)
    296 	assert(const_width[w], 'sizeof(scalar variable) must be 1/2/4/8')
    297 	local src_reg
    298 	-- If source is a pointer, we must dereference it first
    299 	if cdef.isptr(V[a].type) then
    300 		src_reg = vreg(a)
    301 		local tmp_reg = reg_alloc(stackslots, 1) -- Clone variable in tmp register
    302 		emit(BPF.ALU64 + BPF.MOV + BPF.X, tmp_reg, src_reg, 0, 0)
    303 		vderef(tmp_reg, tmp_reg, V[a])
    304 		src_reg = tmp_reg -- Materialize and dereference it
    305 	-- Source is a value on stack, we must load it first
    306 	elseif type(V[a].const) == 'table' and V[a].const.__base > 0 then
    307 		src_reg = vreg(a)
    308 		emit(BPF.MEM + BPF.LDX + const_width[w], src_reg, 10, -V[a].const.__base, 0)
    309 		V[a].type = V[a].const.__dissector
    310 		V[a].const = nil -- Value is dereferenced
    311 	-- If source is an imm32 number, avoid register load
    312 	elseif type(V[a].const) == 'number' and w < 8 then
    313 		return nil, V[a].const
    314 	-- Load variable from any other source
    315 	else
    316 		src_reg = vreg(a)
    317 	end
    318 
    319 	return src_reg, nil
    320 end
    321 
    322 -- Emit compensation code at the end of basic block to unify variable set layout on all block exits
    323 -- 1. we need to free registers by spilling
    324 -- 2. fill registers to match other exits from this BB
    325 local function bb_end(Vcomp)
    326 	for i,v in pairs(V) do
    327 		if Vcomp[i] and Vcomp[i].spill and not v.spill then
    328 			-- Materialize constant or shadowing variable to be able to spill
    329 			if not v.reg and (v.shadow or cdef.isimmconst(v)) then
    330 				vreg(i)
    331 			end
    332 			reg_spill(i)
    333 		end
    334 	end
    335 	for i,v in pairs(V) do
    336 		if Vcomp[i] and Vcomp[i].reg and not v.reg then
    337 			vreg(i, Vcomp[i].reg)
    338 		end
    339 		-- Compensate variable metadata change
    340 		if Vcomp[i] and Vcomp[i].source then
    341 			V[i].source = Vcomp[i].source
    342 		end
    343 	end
    344 end
    345 
    346 local function CMP_STR(a, b, op)
    347 	assert(op == 'JEQ' or op == 'JNE', 'NYI: only equivallence stack/string only supports == or ~=')
    348 	-- I have no better idea how to implement it than unrolled XOR loop, as we can fixup only one JMP
    349 	-- So: X(a,b) = a[0] ^ b[0] | a[1] ^ b[1] | ...
    350 	--     EQ(a,b) <=> X == 0
    351 	-- This could be optimised by placing early exits by rewriter in second phase for long strings
    352 	local base, size = V[a].const.__base, math.min(#b, ffi.sizeof(V[a].type))
    353 	local acc, tmp = reg_alloc(stackslots, 0), reg_alloc(stackslots+1, 1)
    354 	local sp = 0
    355 	emit(BPF.ALU64 + BPF.MOV + BPF.K, acc, 0, 0, 0)
    356 	while sp < size do
    357 		-- Load string chunk as imm32
    358 		local as_u32 = ffi.new('uint32_t [1]')
    359 		local sub = b:sub(sp+1, sp+ffi.sizeof(as_u32))
    360 		ffi.copy(as_u32, sub, #sub)
    361 		-- TODO: make this faster by interleaved load/compare steps with DW length
    362 		emit(BPF.MEM + BPF.LDX + BPF.W, tmp, 10, -(base-sp), 0)
    363 		emit(BPF.ALU64 + BPF.XOR + BPF.K, tmp, 0, 0, as_u32[0])
    364 		emit(BPF.ALU64 + BPF.OR + BPF.X, acc, tmp, 0, 0)
    365 		sp = sp + ffi.sizeof(as_u32)
    366 	end
    367 	emit(BPF.JMP + BPF[op] + BPF.K, acc, 0, 0xffff, 0)
    368 	code.seen_cmp = code.pc-1
    369 end
    370 
    371 local function CMP_REG(a, b, op)
    372 	-- Fold compile-time expressions
    373 	if V[a].const and V[b].const and not (is_proxy(V[a].const) or is_proxy(V[b].const)) then
    374 		code.seen_cmp = const_expr[op](V[a].const, V[b].const) and ALWAYS or NEVER
    375 	else
    376 		-- Comparison against compile-time string or stack memory
    377 		if V[b].const and type(V[b].const) == 'string' then
    378 			return CMP_STR(a, V[b].const, op)
    379 		end
    380 		-- The 0xFFFF target here has no significance, it's just a placeholder for
    381 		-- compiler to replace it's absolute offset to LJ bytecode insn with a relative
    382 		-- offset in BPF program code, verifier will accept only programs with valid JMP targets
    383 		local a_reg, b_reg = vreg(a), vreg(b)
    384 		emit(BPF.JMP + BPF[op] + BPF.X, a_reg, b_reg, 0xffff, 0)
    385 		code.seen_cmp = code.pc-1
    386 	end
    387 end
    388 
    389 local function CMP_IMM(a, b, op)
    390 	local c = V[a].const
    391 	if c and not is_proxy(c) then -- Fold compile-time expressions
    392 		code.seen_cmp = const_expr[op](c, b) and ALWAYS or NEVER
    393 	else
    394 		-- Convert imm32 to number
    395 		if type(b) == 'string' then
    396 			if     #b == 1 then b = b:byte()
    397 			elseif cdef.isptr(V[a].type) then
    398 				-- String comparison between stack/constant string
    399 				return CMP_STR(a, b, op)
    400 			elseif #b <= 4 then
    401 				-- Convert to u32 with network byte order
    402 				local imm = ffi.new('uint32_t[1]')
    403 				ffi.copy(imm, b, #b)
    404 				b = builtins.hton(imm[0])
    405 			else error('NYI: compare register with string, where #string > sizeof(u32)') end
    406 		end
    407 		-- The 0xFFFF target here has no significance, it's just a placeholder for
    408 		-- compiler to replace it's absolute offset to LJ bytecode insn with a relative
    409 		-- offset in BPF program code, verifier will accept only programs with valid JMP targets
    410 		local reg = vreg(a)
    411 		emit(BPF.JMP + BPF[op] + BPF.K, reg, 0, 0xffff, b)
    412 		code.seen_cmp = code.pc-1
    413 		-- Remember NULL pointer checks as BPF prohibits pointer comparisons
    414 		-- and repeated checks wouldn't pass the verifier, only comparisons
    415 		-- against constants are checked.
    416 		if op == 'JEQ' and tonumber(b) == 0 and V[a].source then
    417 			local pos = V[a].source:find('_or_null', 1, true)
    418 			if pos then
    419 				code.seen_null_guard = a
    420 			end
    421 		-- Inverse NULL pointer check (if a ~= nil)
    422 		elseif op == 'JNE' and tonumber(b) == 0 and V[a].source then
    423 			local pos = V[a].source:find('_or_null', 1, true)
    424 			if pos then
    425 				code.seen_null_guard = a
    426 				code.seen_null_guard_inverse = true
    427 			end
    428 		end
    429 	end
    430 end
    431 
    432 local function ALU_IMM(dst, a, b, op)
    433 	-- Fold compile-time expressions
    434 	if V[a].const and not is_proxy(V[a].const) then
    435 			assert(cdef.isimmconst(V[a]), 'VAR '..a..' must be numeric')
    436 			vset(dst, nil, const_expr[op](V[a].const, b))
    437 	-- Now we need to materialize dissected value at DST, and add it
    438 	else
    439 		vcopy(dst, a)
    440 		local dst_reg = vreg(dst)
    441 		if cdef.isptr(V[a].type) then
    442 			vderef(dst_reg, dst_reg, V[a])
    443 			V[dst].type = V[a].const.__dissector
    444 		else
    445 			V[dst].type = V[a].type
    446 		end
    447 		emit(BPF.ALU64 + BPF[op] + BPF.K, dst_reg, 0, 0, b)
    448 	end
    449 end
    450 
    451 local function ALU_REG(dst, a, b, op)
    452 	-- Fold compile-time expressions
    453 	if V[a].const and not (is_proxy(V[a].const) or is_proxy(V[b].const)) then
    454 		assert(cdef.isimmconst(V[a]), 'VAR '..a..' must be numeric')
    455 		assert(cdef.isimmconst(V[b]), 'VAR '..b..' must be numeric')
    456 		if type(op) == 'string' then op = const_expr[op] end
    457 		vcopy(dst, a)
    458 		V[dst].const = op(V[a].const, V[b].const)
    459 	else
    460 		local src_reg = b and vreg(b) or 0 -- SRC is optional for unary operations
    461 		if b and cdef.isptr(V[b].type) then
    462 			-- We have to allocate a temporary register for dereferencing to preserve
    463 			-- pointer in source variable that MUST NOT be altered
    464 			reg_alloc(stackslots, 2)
    465 			vderef(2, src_reg, V[b])
    466 			src_reg = 2
    467 		end
    468 		vcopy(dst, a) -- DST may alias B, so copy must occur after we materialize B
    469 		local dst_reg = vreg(dst)
    470 		if cdef.isptr(V[a].type) then
    471 			vderef(dst_reg, dst_reg, V[a])
    472 			V[dst].type = V[a].const.__dissector
    473 		end
    474 		emit(BPF.ALU64 + BPF[op] + BPF.X, dst_reg, src_reg, 0, 0)
    475 		V[stackslots].reg = nil  -- Free temporary registers
    476 	end
    477 end
    478 
    479 local function ALU_IMM_NV(dst, a, b, op)
    480 	-- Do DST = IMM(a) op VAR(b) where we can't invert because
    481 	-- the registers are u64 but immediates are u32, so complement
    482 	-- arithmetics wouldn't work
    483 	vset(stackslots+1, nil, a)
    484 	ALU_REG(dst, stackslots+1, b, op)
    485 end
    486 
    487 local function LD_ABS(dst, w, off)
    488 	assert(off, 'LD_ABS called without offset')
    489 	if w < 8 then
    490 		local dst_reg = vreg(dst, 0, true, builtins.width_type(w)) -- Reserve R0
    491 		emit(BPF.LD + BPF.ABS + const_width[w], dst_reg, 0, 0, off)
    492 		if w > 1 and ffi.abi('le') then -- LD_ABS has htonl() semantics, reverse
    493 			emit(BPF.ALU + BPF.END + BPF.TO_BE, dst_reg, 0, 0, w * 8)
    494 		end
    495 	elseif w == 8 then
    496 		-- LD_ABS|IND prohibits DW, we need to do two W loads and combine them
    497 		local tmp_reg = vreg(stackslots, 0, true, builtins.width_type(w)) -- Reserve R0
    498 		emit(BPF.LD + BPF.ABS + const_width[4], tmp_reg, 0, 0, off + 4)
    499 		if ffi.abi('le') then -- LD_ABS has htonl() semantics, reverse
    500 			emit(BPF.ALU + BPF.END + BPF.TO_BE, tmp_reg, 0, 0, 32)
    501 		end
    502 		ALU_IMM(stackslots, stackslots, 32, 'LSH')
    503 		local dst_reg = vreg(dst, 0, true, builtins.width_type(w)) -- Reserve R0, spill tmp variable
    504 		emit(BPF.LD + BPF.ABS + const_width[4], dst_reg, 0, 0, off)
    505 		if ffi.abi('le') then -- LD_ABS has htonl() semantics, reverse
    506 			emit(BPF.ALU + BPF.END + BPF.TO_BE, dst_reg, 0, 0, 32)
    507 		end
    508 		ALU_REG(dst, dst, stackslots, 'OR')
    509 		V[stackslots].reg = nil -- Free temporary registers
    510 	else
    511 		assert(w < 8, 'NYI: only LD_ABS of 1/2/4/8 is supported')
    512 	end
    513 end
    514 
    515 local function LD_IND(dst, src, w, off)
    516 	local src_reg = vreg(src) -- Must materialize first in case dst == src
    517 	local dst_reg = vreg(dst, 0, true, builtins.width_type(w)) -- Reserve R0
    518 	emit(BPF.LD + BPF.IND + const_width[w], dst_reg, src_reg, 0, off or 0)
    519 	if w > 1 and ffi.abi('le') then -- LD_ABS has htonl() semantics, reverse
    520 		emit(BPF.ALU + BPF.END + BPF.TO_BE, dst_reg, 0, 0, w * 8)
    521 	end
    522 end
    523 
    524 local function LD_MEM(dst, src, w, off)
    525 	local src_reg = vreg(src) -- Must materialize first in case dst == src
    526 	local dst_reg = vreg(dst, nil, true, builtins.width_type(w)) -- Reserve R0
    527 	emit(BPF.MEM + BPF.LDX + const_width[w], dst_reg, src_reg, off or 0, 0)
    528 end
    529 
    530 -- @note: This is specific now as it expects registers reserved
    531 local function LD_IMM_X(dst_reg, src_type, imm, w)
    532 	if w == 8 then -- IMM64 must be done in two instructions with imm64 = (lo(imm32), hi(imm32))
    533 		emit(BPF.LD + const_width[w], dst_reg, src_type, 0, ffi.cast('uint32_t', imm))
    534 		-- Must shift in two steps as bit.lshift supports [0..31]
    535 		emit(0, 0, 0, 0, ffi.cast('uint32_t', bit.lshift(bit.lshift(imm, 16), 16)))
    536 	else
    537 		emit(BPF.LD + const_width[w], dst_reg, src_type, 0, imm)
    538 	end
    539 end
    540 
    541 local function BUILTIN(func, ...)
    542 	local builtin_export = {
    543 		-- Compiler primitives (work with variable slots, emit instructions)
    544 		V=V, vreg=vreg, vset=vset, vcopy=vcopy, vderef=vderef, valloc=valloc, emit=emit,
    545 		reg_alloc=reg_alloc, reg_spill=reg_spill, tmpvar=stackslots, const_width=const_width,
    546 		-- Extensions and helpers (use with care)
    547 		LD_IMM_X = LD_IMM_X,
    548 	}
    549 	func(builtin_export, ...)
    550 end
    551 
    552 local function LOAD(dst, src, off, vtype)
    553 	local base = V[src].const
    554 	assert(base and base.__dissector, 'NYI: load() on variable that doesn\'t have dissector')
    555 	assert(V[src].source, 'NYI: load() on variable with unknown source')
    556 	-- Cast to different type if requested
    557 	vtype = vtype or base.__dissector
    558 	local w = ffi.sizeof(vtype)
    559 	assert(const_width[w], 'NYI: load() supports 1/2/4/8 bytes at a time only, wanted ' .. tostring(w))
    560 	-- Packet access with a dissector (use BPF_LD)
    561 	if V[src].source:find('ptr_to_pkt', 1, true) then
    562 		if base.off then -- Absolute address to payload
    563 			LD_ABS(dst, w, off + base.off)
    564 		else -- Indirect address to payload
    565 			LD_IND(dst, src, w, off)
    566 		end
    567 	-- Direct access to first argument (skb fields, pt regs, ...)
    568 	elseif V[src].source:find('ptr_to_ctx', 1, true) then
    569 		LD_MEM(dst, src, w, off)
    570 	-- Direct skb access with a dissector (use BPF_MEM)
    571 	elseif V[src].source:find('ptr_to_skb', 1, true) then
    572 		LD_MEM(dst, src, w, off)
    573 	-- Pointer to map-backed memory (use BPF_MEM)
    574 	elseif V[src].source:find('ptr_to_map_value', 1, true) then
    575 		LD_MEM(dst, src, w, off)
    576 	-- Indirect read using probe (uprobe or kprobe, uses helper)
    577 	elseif V[src].source:find('ptr_to_probe', 1, true) then
    578 		BUILTIN(builtins[builtins.probe_read], nil, dst, src, vtype, off)
    579 		V[dst].source = V[src].source -- Builtin handles everything
    580 	else
    581 		error('NYI: load() on variable from ' .. V[src].source)
    582 	end
    583 	V[dst].type = vtype
    584 	V[dst].const = nil -- Dissected value is not constant anymore
    585 end
    586 
    587 local function CALL(a, b, d)
    588 	assert(b-1 <= 1, 'NYI: CALL with >1 return values')
    589 	-- Perform either compile-time, helper, or builtin
    590 	local func = V[a].const
    591 	-- Gather all arguments and check if they're constant
    592 	local args, const, nargs = {}, true, d - 1
    593 	for i = a+1, a+d-1 do
    594 		table.insert(args, V[i].const)
    595 		if not V[i].const or is_proxy(V[i].const) then const = false end
    596 	end
    597 	local builtin = builtins[func]
    598 	if not const or nargs == 0 then
    599 		if builtin and type(builtin) == 'function' then
    600 			args = {a}
    601 			for i = a+1, a+nargs do table.insert(args, i) end
    602 			BUILTIN(builtin, unpack(args))
    603 		elseif V[a+2] and V[a+2].const then -- var OP imm
    604 			ALU_IMM(a, a+1, V[a+2].const, builtin)
    605 		elseif nargs <= 2 then              -- var OP var
    606 			ALU_REG(a, a+1, V[a+2] and a+2, builtin)
    607 		else
    608 			error('NYI: CALL non-builtin with 3 or more arguments')
    609 		end
    610 	-- Call on dissector implies slice retrieval
    611 	elseif type(func) == 'table' and func.__dissector then
    612 		assert(nargs >= 2, 'NYI: <dissector>.slice(a, b) must have at least two arguments')
    613 		assert(V[a+1].const and V[a+2].const, 'NYI: slice() arguments must be constant')
    614 		local off = V[a+1].const
    615 		local vtype = builtins.width_type(V[a+2].const - off)
    616 		-- Access to packet via packet (use BPF_LD)
    617 		if V[a].source and V[a].source:find('ptr_to_', 1, true) then
    618 			LOAD(a, a, off, vtype)
    619 		else
    620 			error('NYI: <dissector>.slice(a, b) on non-pointer memory ' .. (V[a].source or 'unknown'))
    621 		end
    622 	-- Strict builtins cannot be expanded on compile-time
    623 	elseif builtins_strict[func] and builtin then
    624 		args = {a}
    625 		for i = a+1, a+nargs do table.insert(args, i) end
    626 		BUILTIN(builtin, unpack(args))
    627 	-- Attempt compile-time call expansion (expects all argument compile-time known)
    628 	else
    629 		assert(const, 'NYI: CALL attempted on constant arguments, but at least one argument is not constant')
    630 		V[a].const = func(unpack(args))
    631 	end
    632 end
    633 
    634 local function MAP_INIT(map_var, key, imm)
    635 	local map = V[map_var].const
    636 	vreg(map_var, 1, true, ffi.typeof('uint64_t'))
    637 	-- Reserve R1 and load ptr for process-local map fd
    638 	LD_IMM_X(1, BPF.PSEUDO_MAP_FD, map.fd, ffi.sizeof(V[map_var].type))
    639 	V[map_var].reg = nil -- R1 will be invalidated after CALL, forget register allocation
    640 	-- Reserve R2 and load R2 = key pointer
    641 	local key_size = ffi.sizeof(map.key_type)
    642 	local w = const_width[key_size] or BPF.DW
    643 	local pod_type = const_width[key_size]
    644 	local sp = stack_top + key_size -- Must use stack below spill slots
    645 	-- Store immediate value on stack
    646 	reg_alloc(stackslots, 2) -- Spill anything in R2 (unnamed tmp variable)
    647 	local key_base = key and V[key].const
    648 	imm = imm or key_base
    649 	if imm and (not key or not is_proxy(key_base)) then
    650 		assert(pod_type, 'NYI: map[const K], K width must be 1/2/4/8')
    651 		emit(BPF.MEM + BPF.ST + w, 10, 0, -sp, imm)
    652 	-- Key is in register, spill it
    653 	elseif V[key].reg and pod_type then
    654 		if cdef.isptr(V[key].type) then
    655 			-- There is already pointer in register, dereference before spilling
    656 			emit(BPF.MEM + BPF.LDX + w, 2, V[key].reg, 0, 0)
    657 			emit(BPF.MEM + BPF.STX + w, 10, 2, -sp, 0)
    658 		else -- Variable in register is POD, spill it on the stack
    659 			emit(BPF.MEM + BPF.STX + w, 10, V[key].reg, -sp, 0)
    660 		end
    661 	-- Key is spilled from register to stack
    662 	elseif V[key].spill then
    663 		sp = V[key].spill
    664 	-- Key is already on stack, write to base-relative address
    665 	elseif key_base.__base then
    666 		assert(key_size == ffi.sizeof(V[key].type), 'VAR '..key..' type incompatible with BPF map key type')
    667 		sp = key_base.__base
    668 	else
    669 		error('VAR '..key..' is neither const-expr/register/stack/spilled')
    670 	end
    671 	-- If [FP+K] addressing, emit it
    672 	if sp then
    673 		emit(BPF.ALU64 + BPF.MOV + BPF.X, 2, 10, 0, 0)
    674 		emit(BPF.ALU64 + BPF.ADD + BPF.K, 2, 0, 0, -sp)
    675 	end
    676 end
    677 
    678 local function MAP_GET(dst, map_var, key, imm)
    679 	local map = V[map_var].const
    680 	MAP_INIT(map_var, key, imm)
    681 	-- Flag as pointer type and associate dissector for map value type
    682 	vreg(dst, 0, true, ffi.typeof('uint8_t *'))
    683 	V[dst].const = {__dissector=map.val_type}
    684 	V[dst].source = 'ptr_to_map_value_or_null'
    685 	emit(BPF.JMP + BPF.CALL, 0, 0, 0, HELPER.map_lookup_elem)
    686 	V[stackslots].reg = nil -- Free temporary registers
    687 end
    688 
    689 local function MAP_DEL(map_var, key, key_imm)
    690 	-- Set R0, R1 (map fd, preempt R0)
    691 	reg_alloc(stackslots, 0) -- Spill anything in R0 (unnamed tmp variable)
    692 	MAP_INIT(map_var, key, key_imm)
    693 	emit(BPF.JMP + BPF.CALL, 0, 0, 0, HELPER.map_delete_elem)
    694 	V[stackslots].reg = nil -- Free temporary registers
    695 end
    696 
    697 local function MAP_SET(map_var, key, key_imm, src)
    698 	local map = V[map_var].const
    699 	-- Delete when setting nil
    700 	if V[src].type == ffi.typeof('void') then
    701 		return MAP_DEL(map_var, key, key_imm)
    702 	end
    703 	-- Set R0, R1 (map fd, preempt R0)
    704 	reg_alloc(stackslots, 0) -- Spill anything in R0 (unnamed tmp variable)
    705 	MAP_INIT(map_var, key, key_imm)
    706 	reg_alloc(stackslots, 4) -- Spill anything in R4 (unnamed tmp variable)
    707 	emit(BPF.ALU64 + BPF.MOV + BPF.K, 4, 0, 0, 0) -- BPF_ANY, create new element or update existing
    708 	-- Reserve R3 for value pointer
    709 	reg_alloc(stackslots, 3) -- Spill anything in R3 (unnamed tmp variable)
    710 	local val_size = ffi.sizeof(map.val_type)
    711 	local w = const_width[val_size] or BPF.DW
    712 	local pod_type = const_width[val_size]
    713 	-- Stack pointer must be aligned to both key/value size and have enough headroom for (key, value)
    714 	local sp = stack_top + ffi.sizeof(map.key_type) + val_size
    715 	sp = sp + (sp % val_size)
    716 	local base = V[src].const
    717 	if base and not is_proxy(base) then
    718 		assert(pod_type, 'NYI: MAP[K] = imm V; V width must be 1/2/4/8')
    719 		emit(BPF.MEM + BPF.ST + w, 10, 0, -sp, base)
    720 	-- Value is in register, spill it
    721 	elseif V[src].reg and pod_type then
    722 		-- Value is a pointer, derefernce it and spill it
    723 		if cdef.isptr(V[src].type) then
    724 			vderef(3, V[src].reg, V[src])
    725 			emit(BPF.MEM + BPF.STX + w, 10, 3, -sp, 0)
    726 		else
    727 			emit(BPF.MEM + BPF.STX + w, 10, V[src].reg, -sp, 0)
    728 		end
    729 	-- We get a pointer to spilled register on stack
    730 	elseif V[src].spill then
    731 		-- If variable is a pointer, we can load it to R3 directly (save "LEA")
    732 		if cdef.isptr(V[src].type) then
    733 			reg_fill(src, 3)
    734 			-- If variable is a stack pointer, we don't have to check it
    735 			if base.__base then
    736 				emit(BPF.JMP + BPF.CALL, 0, 0, 0, HELPER.map_update_elem)
    737 				return
    738 			end
    739 			vderef(3, V[src].reg, V[src])
    740 			emit(BPF.MEM + BPF.STX + w, 10, 3, -sp, 0)
    741 		else
    742 			sp = V[src].spill
    743 		end
    744 	-- Value is already on stack, write to base-relative address
    745 	elseif base.__base then
    746 		if val_size ~= ffi.sizeof(V[src].type) then
    747 			local err = string.format('VAR %d type (%s) incompatible with BPF map value type (%s): expected %d, got %d',
    748 				src, V[src].type, map.val_type, val_size, ffi.sizeof(V[src].type))
    749 			error(err)
    750 		end
    751 		sp = base.__base
    752 	-- Value is constant, materialize it on stack
    753 	else
    754 		error('VAR '.. src ..' is neither const-expr/register/stack/spilled')
    755 	end
    756 	emit(BPF.ALU64 + BPF.MOV + BPF.X, 3, 10, 0, 0)
    757 	emit(BPF.ALU64 + BPF.ADD + BPF.K, 3, 0, 0, -sp)
    758 	emit(BPF.JMP + BPF.CALL, 0, 0, 0, HELPER.map_update_elem)
    759 	V[stackslots].reg = nil -- Free temporary registers
    760 end
    761 
    762 -- Finally - this table translates LuaJIT bytecode into code emitter actions.
    763 local BC = {
    764 	-- Constants
    765 	KNUM = function(a, _, c, _) -- KNUM
    766 		if c < 2147483648 then
    767 			vset(a, nil, c, ffi.typeof('int32_t'))
    768 		else
    769 			vset(a, nil, c, ffi.typeof('uint64_t'))
    770 		end
    771 	end,
    772 	KSHORT = function(a, _, _, d) -- KSHORT
    773 		vset(a, nil, d, ffi.typeof('int16_t'))
    774 	end,
    775 	KCDATA = function(a, _, c, _) -- KCDATA
    776 		-- Coerce numeric types if possible
    777 		local ct = ffi.typeof(c)
    778 		if ffi.istype(ct, ffi.typeof('uint64_t')) or ffi.istype(ct, ffi.typeof('int64_t')) then
    779 			vset(a, nil, c, ct)
    780 		elseif tonumber(c) ~= nil then
    781 			-- TODO: this should not be possible
    782 			vset(a, nil, tonumber(c), ct)
    783 		else
    784 			error('NYI: cannot use CDATA constant of type ' .. ct)
    785 		end
    786 	end,
    787 	KPRI = function(a, _, _, d) -- KPRI
    788 		-- KNIL is 0, must create a special type to identify it
    789 		local vtype = (d < 1) and ffi.typeof('void') or ffi.typeof('uint8_t')
    790 		vset(a, nil, (d < 2) and 0 or 1, vtype)
    791 	end,
    792 	KSTR = function(a, _, c, _) -- KSTR
    793 		vset(a, nil, c, ffi.typeof('const char[?]'))
    794 	end,
    795 	MOV = function(a, _, _, d) -- MOV var, var
    796 		vcopy(a, d)
    797 	end,
    798 
    799 	-- Comparison ops
    800 	-- Note: comparisons are always followed by JMP opcode, that
    801 	--       will fuse following JMP to JMP+CMP instruction in BPF
    802 	-- Note:  we're narrowed to integers, so operand/operator inversion is legit
    803 	ISLT = function(a, _, _, d) return CMP_REG(d, a, 'JGE') end, -- (a < d) (inverted)
    804 	ISGE = function(a, _, _, d) return CMP_REG(a, d, 'JGE') end, -- (a >= d)
    805 	ISGT = function(a, _, _, d) return CMP_REG(a, d, 'JGT') end, -- (a > d)
    806 	ISEQV = function(a, _, _, d) return CMP_REG(a, d, 'JEQ') end, -- (a == d)
    807 	ISNEV = function(a, _, _, d) return CMP_REG(a, d, 'JNE') end, -- (a ~= d)
    808 	ISEQS = function(a, _, c, _) return CMP_IMM(a, c, 'JEQ') end, -- (a == str(c))
    809 	ISNES = function(a, _, c, _) return CMP_IMM(a, c, 'JNE') end, -- (a ~= str(c))
    810 	ISEQN = function(a, _, c, _) return CMP_IMM(a, c, 'JEQ') end, -- (a == c)
    811 	ISNEN = function(a, _, c, _) return CMP_IMM(a, c, 'JNE') end, -- (a ~= c)
    812 	IST = function(_, _, _, d) return CMP_IMM(d, 0, 'JNE') end, -- (d)
    813 	ISF = function(_, _, _, d) return CMP_IMM(d, 0, 'JEQ') end, -- (not d)
    814 	ISEQP = function(a, _, c, _) return CMP_IMM(a, c, 'JEQ') end, -- ISEQP (a == c)
    815 	-- Binary operations with RHS constants
    816 	ADDVN = function(a, b, c, _) return ALU_IMM(a, b, c, 'ADD') end,
    817 	SUBVN = function(a, b, c, _) return ALU_IMM(a, b, c, 'SUB') end,
    818 	MULVN = function(a, b, c, _) return ALU_IMM(a, b, c, 'MUL') end,
    819 	DIVVN = function(a, b, c, _) return ALU_IMM(a, b, c, 'DIV') end,
    820 	MODVN = function(a, b, c, _) return ALU_IMM(a, b, c, 'MOD') end,
    821 	-- Binary operations with LHS constants
    822 	-- Cheat code: we're narrowed to integer arithmetic, so MUL+ADD are commutative
    823 	ADDNV = function(a, b, c, _) return ALU_IMM(a, b, c, 'ADD') end, -- ADDNV
    824 	MULNV = function(a, b, c, _) return ALU_IMM(a, b, c, 'MUL') end, -- MULNV
    825 	SUBNV = function(a, b, c, _) return ALU_IMM_NV(a, c, b, 'SUB') end, -- SUBNV
    826 	DIVNV = function(a, b, c, _) return ALU_IMM_NV(a, c, b, 'DIV') end, -- DIVNV
    827 	-- Binary operations between registers
    828 	ADDVV = function(a, b, _, d) return ALU_REG(a, b, d, 'ADD') end,
    829 	SUBVV = function(a, b, _, d) return ALU_REG(a, b, d, 'SUB') end,
    830 	MULVV = function(a, b, _, d) return ALU_REG(a, b, d, 'MUL') end,
    831 	DIVVV = function(a, b, _, d) return ALU_REG(a, b, d, 'DIV') end,
    832 	MODVV = function(a, b, _, d) return ALU_REG(a, b, d, 'MOD') end,
    833 	-- Strings
    834 	CAT = function(a, b, _, d) -- CAT A = B ~ D
    835 		assert(V[b].const and V[d].const, 'NYI: CAT only works on compile-time expressions')
    836 		assert(type(V[b].const) == 'string' and type(V[d].const) == 'string',
    837 			'NYI: CAT only works on compile-time strings')
    838 		vset(a, nil, V[b].const .. V[d].const)
    839 	end,
    840 	-- Tables
    841 	GGET = function (a, _, c, _) -- GGET (A = GLOBAL[c])
    842 		if env[c] ~= nil then
    843 			vset(a, nil, env[c])
    844 		else error(string.format("undefined global '%s'", c)) end
    845 	end,
    846 	UGET = function (a, _, c, _) -- UGET (A = UPVALUE[c])
    847 		if env[c] ~= nil then
    848 			vset(a, nil, env[c])
    849 		else error(string.format("undefined upvalue '%s'", c)) end
    850 	end,
    851 	TSETB = function (a, b, _, d) -- TSETB (B[D] = A)
    852 		assert(V[b] and type(V[b].const) == 'table', 'NYI: B[D] where B is not Lua table, BPF map, or pointer')
    853 		local vinfo = V[b].const
    854 		if vinfo.__map then -- BPF map read (constant)
    855 			return MAP_SET(b, nil, d, a) -- D is literal
    856 		elseif vinfo.__dissector then
    857 			assert(vinfo.__dissector, 'NYI: B[D] where B does not have a known element size')
    858 			local w = ffi.sizeof(vinfo.__dissector)
    859 			-- TODO: support vectorized moves larger than register width
    860 			assert(const_width[w], 'B[C] = A, sizeof(A) must be 1/2/4/8')
    861 			local src_reg, const = vscalar(a, w)
    862 			-- If changing map value, write to absolute address + offset
    863 			if V[b].source and V[b].source:find('ptr_to_map_value', 1, true) then
    864 				local dst_reg = vreg(b)
    865 				-- Optimization: immediate values (imm32) can be stored directly
    866 				if type(const) == 'number' then
    867 					emit(BPF.MEM + BPF.ST + const_width[w], dst_reg, 0, d, const)
    868 				else
    869 					emit(BPF.MEM + BPF.STX + const_width[w], dst_reg, src_reg, d, 0)
    870 				end
    871 			-- Table is already on stack, write to vinfo-relative address
    872 			elseif vinfo.__base then
    873 				-- Optimization: immediate values (imm32) can be stored directly
    874 				if type(const) == 'number' then
    875 					emit(BPF.MEM + BPF.ST + const_width[w], 10, 0, -vinfo.__base + (d * w), const)
    876 				else
    877 					emit(BPF.MEM + BPF.STX + const_width[w], 10, src_reg, -vinfo.__base + (d * w), 0)
    878 				end
    879 			else
    880 				error('NYI: B[D] where B is not Lua table, BPF map, or pointer')
    881 			end
    882 		elseif vinfo and vinfo and V[a].const then
    883 			vinfo[V[d].const] = V[a].const
    884 		else
    885 			error('NYI: B[D] where B is not Lua table, BPF map, or pointer')
    886 		end
    887 	end,
    888 	TSETV = function (a, b, _, d) -- TSETV (B[D] = A)
    889 		assert(V[b] and type(V[b].const) == 'table', 'NYI: B[D] where B is not Lua table, BPF map, or pointer')
    890 		local vinfo = V[b].const
    891 		if vinfo.__map then -- BPF map read (constant)
    892 			return MAP_SET(b, d, nil, a) -- D is variable
    893 		elseif vinfo.__dissector then
    894 			assert(vinfo.__dissector, 'NYI: B[D] where B does not have a known element size')
    895 			local w = ffi.sizeof(vinfo.__dissector)
    896 			-- TODO: support vectorized moves larger than register width
    897 			assert(const_width[w], 'B[C] = A, sizeof(A) must be 1/2/4/8')
    898 			local src_reg, const = vscalar(a, w)
    899 			-- If changing map value, write to absolute address + offset
    900 			if V[b].source and V[b].source:find('ptr_to_map_value', 1, true) then
    901 				-- Calculate variable address from two registers
    902 				local tmp_var = stackslots + 1
    903 				vset(tmp_var, nil, d)
    904 				ALU_REG(tmp_var, tmp_var, b, 'ADD')
    905 				local dst_reg = vreg(tmp_var)
    906 				V[tmp_var].reg = nil -- Only temporary allocation
    907 				-- Optimization: immediate values (imm32) can be stored directly
    908 				if type(const) == 'number' and w < 8 then
    909 					emit(BPF.MEM + BPF.ST + const_width[w], dst_reg, 0, 0, const)
    910 				else
    911 					emit(BPF.MEM + BPF.STX + const_width[w], dst_reg, src_reg, 0, 0)
    912 				end
    913 			-- Table is already on stack, write to vinfo-relative address
    914 			elseif vinfo.__base then
    915 				-- Calculate variable address from two registers
    916 				local tmp_var = stackslots + 1
    917 				vcopy(tmp_var, d)                       -- Element position
    918 				if w > 1 then
    919 					ALU_IMM(tmp_var, tmp_var, w, 'MUL') -- multiply by element size
    920 				end
    921 				local dst_reg = vreg(tmp_var)           -- add R10 (stack pointer)
    922 				emit(BPF.ALU64 + BPF.ADD + BPF.X, dst_reg, 10, 0, 0)
    923 				V[tmp_var].reg = nil -- Only temporary allocation
    924 				-- Optimization: immediate values (imm32) can be stored directly
    925 				if type(const) == 'number' and w < 8 then
    926 					emit(BPF.MEM + BPF.ST + const_width[w], dst_reg, 0, -vinfo.__base, const)
    927 				else
    928 					emit(BPF.MEM + BPF.STX + const_width[w], dst_reg, src_reg, -vinfo.__base, 0)
    929 				end
    930 			else
    931 				error('NYI: B[D] where B is not Lua table, BPF map, or pointer')
    932 			end
    933 		elseif vinfo and V[d].const and V[a].const then
    934 			vinfo[V[d].const] = V[a].const
    935 		else
    936 			error('NYI: B[D] where B is not Lua table, BPF map, or pointer')
    937 		end
    938 	end,
    939 	TSETS = function (a, b, c, _) -- TSETS (B[C] = A)
    940 		assert(V[b] and V[b].const, 'NYI: B[D] where B is not Lua table, BPF map, or pointer')
    941 		local base = V[b].const
    942 		if base.__dissector then
    943 			local ofs,bpos = ffi.offsetof(base.__dissector, c)
    944 			assert(not bpos, 'NYI: B[C] = A, where C is a bitfield')
    945 			local w = builtins.sizeofattr(base.__dissector, c)
    946 			-- TODO: support vectorized moves larger than register width
    947 			assert(const_width[w], 'B[C] = A, sizeof(A) must be 1/2/4/8')
    948 			local src_reg, const = vscalar(a, w)
    949 			-- If changing map value, write to absolute address + offset
    950 			if V[b].source and V[b].source:find('ptr_to_map_value', 1, true) then
    951 				local dst_reg = vreg(b)
    952 				-- Optimization: immediate values (imm32) can be stored directly
    953 				if type(const) == 'number' and w < 8 then
    954 					emit(BPF.MEM + BPF.ST + const_width[w], dst_reg, 0, ofs, const)
    955 				else
    956 					emit(BPF.MEM + BPF.STX + const_width[w], dst_reg, src_reg, ofs, 0)
    957 				end
    958 			-- Table is already on stack, write to base-relative address
    959 			elseif base.__base then
    960 				-- Optimization: immediate values (imm32) can be stored directly
    961 				if type(const) == 'number' and w < 8 then
    962 					emit(BPF.MEM + BPF.ST + const_width[w], 10, 0, -base.__base + ofs, const)
    963 				else
    964 					emit(BPF.MEM + BPF.STX + const_width[w], 10, src_reg, -base.__base + ofs, 0)
    965 				end
    966 			else
    967 				error('NYI: B[C] where B is not Lua table, BPF map, or pointer')
    968 			end
    969 		elseif V[a].const then
    970 			base[c] = V[a].const
    971 		else
    972 			error('NYI: B[C] where B is not Lua table, BPF map, or pointer')
    973 		end
    974 	end,
    975 	TGETB = function (a, b, _, d) -- TGETB (A = B[D])
    976 		local base = V[b].const
    977 		assert(type(base) == 'table', 'NYI: B[C] where C is string and B not Lua table or BPF map')
    978 		if a ~= b then vset(a) end
    979 		if base.__map then -- BPF map read (constant)
    980 			MAP_GET(a, b, nil, d)
    981 		-- Pointer access with a dissector (traditional uses BPF_LD, direct uses BPF_MEM)
    982 		elseif V[b].source and V[b].source:find('ptr_to_') then
    983 			local vtype = base.__dissector and base.__dissector or ffi.typeof('uint8_t')
    984 			LOAD(a, b, d, vtype)
    985 		-- Specialise PTR[0] as dereference operator
    986 		elseif cdef.isptr(V[b].type) and d == 0 then
    987 			vcopy(a, b)
    988 			local dst_reg = vreg(a)
    989 			vderef(dst_reg, dst_reg, V[a])
    990 			V[a].type = V[a].const.__dissector
    991 		else
    992 			error('NYI: A = B[D], where B is not Lua table or packet dissector or pointer dereference')
    993 		end
    994 	end,
    995 	TGETV = function (a, b, _, d) -- TGETV (A = B[D])
    996 		local base = V[b].const
    997 		assert(type(base) == 'table', 'NYI: B[C] where C is string and B not Lua table or BPF map')
    998 		if a ~= b then vset(a) end
    999 		if base.__map then -- BPF map read
   1000 			MAP_GET(a, b, d)
   1001 		-- Pointer access with a dissector (traditional uses BPF_LD, direct uses BPF_MEM)
   1002 		elseif V[b].source and V[b].source:find('ptr_to_') then
   1003 			local vtype = base.__dissector and base.__dissector or ffi.typeof('uint8_t')
   1004 			LOAD(a, b, d, vtype)
   1005 		-- Constant dereference
   1006 		elseif type(V[d].const) == 'number' then
   1007 			V[a].const = base[V[d].const]
   1008 		else
   1009 			error('NYI: A = B[D], where B is not Lua table or packet dissector or pointer dereference')
   1010 		end
   1011 	end,
   1012 	TGETS = function (a, b, c, _) -- TGETS (A = B[C])
   1013 		local base = V[b].const
   1014 		assert(type(base) == 'table', 'NYI: B[C] where C is string and B not Lua table or BPF map')
   1015 		if a ~= b then vset(a) end
   1016 		if base.__dissector then
   1017 			local ofs,bpos,bsize = ffi.offsetof(base.__dissector, c)
   1018 			-- Resolve table key using metatable
   1019 			if not ofs and type(base.__dissector[c]) == 'string' then
   1020 				c = base.__dissector[c]
   1021 				ofs,bpos,bsize = ffi.offsetof(base.__dissector, c)
   1022 			end
   1023 			if not ofs and proto[c] then -- Load new dissector on given offset
   1024 				BUILTIN(proto[c], a, b, c)
   1025 			else
   1026 				-- Loading register from offset is a little bit tricky as there are
   1027 				-- several data sources and value loading modes with different restrictions
   1028 				-- such as checking pointer values for NULL compared to using stack.
   1029 				assert(ofs, tostring(base.__dissector)..'.'..c..' attribute not exists')
   1030 				if a ~= b then vset(a) end
   1031 				-- Dissected value is probably not constant anymore
   1032 				local new_const = nil
   1033 				local w, atype = builtins.sizeofattr(base.__dissector, c)
   1034 				-- [SP+K] addressing using R10 (stack pointer)
   1035 				-- Doesn't need to be checked for NULL
   1036 				if base.__base and base.__base > 0 then
   1037 					if cdef.isptr(atype) then -- If the member is pointer type, update base pointer with offset
   1038 						new_const = {__base = base.__base-ofs}
   1039 					else
   1040 						local dst_reg = vreg(a, nil, true)
   1041 						emit(BPF.MEM + BPF.LDX + const_width[w], dst_reg, 10, -base.__base+ofs, 0)
   1042 					end
   1043 				-- Pointer access with a dissector (traditional uses BPF_LD, direct uses BPF_MEM)
   1044 				elseif V[b].source and V[b].source:find('ptr_to_') then
   1045 					LOAD(a, b, ofs, atype)
   1046 				else
   1047 					error('NYI: B[C] where B is not Lua table, BPF map, or pointer')
   1048 				end
   1049 				-- Bitfield, must be further narrowed with a bitmask/shift
   1050 				if bpos then
   1051 					local mask = 0
   1052 					for i=bpos+1,bpos+bsize do
   1053 						mask = bit.bor(mask, bit.lshift(1, w*8-i))
   1054 					end
   1055 					emit(BPF.ALU64 + BPF.AND + BPF.K, vreg(a), 0, 0, mask)
   1056 					-- Free optimization: single-bit values need just boolean result
   1057 					if bsize > 1 then
   1058 						local shift = w*8-bsize-bpos
   1059 						if shift > 0 then
   1060 							emit(BPF.ALU64 + BPF.RSH + BPF.K, vreg(a), 0, 0, shift)
   1061 						end
   1062 					end
   1063 				end
   1064 				V[a].type = atype
   1065 				V[a].const = new_const
   1066 				V[a].source = V[b].source
   1067 				-- Track direct access to skb data
   1068 				-- see https://www.kernel.org/doc/Documentation/networking/filter.txt "Direct packet access"
   1069 				if ffi.istype(base.__dissector, ffi.typeof('struct sk_buff')) then
   1070 					-- Direct access to skb uses skb->data and skb->data_end
   1071 					-- which are encoded as u32, but are actually pointers
   1072 					if c == 'data' or c == 'data_end' then
   1073 						V[a].const = {__dissector = ffi.typeof('uint8_t')}
   1074 						V[a].source = 'ptr_to_skb'
   1075 					end
   1076 				end
   1077 			end
   1078 		else
   1079 			V[a].const = base[c]
   1080 		end
   1081 	end,
   1082 	-- Loops and branches
   1083 	CALLM = function (a, b, _, d) -- A = A(A+1, ..., A+D+MULTRES)
   1084 		-- NYI: Support single result only
   1085 		CALL(a, b, d+2)
   1086 	end,
   1087 	CALL = function (a, b, _, d) -- A = A(A+1, ..., A+D-1)
   1088 		CALL(a, b, d)
   1089 	end,
   1090 	JMP = function (a, _, c, _) -- JMP
   1091 		-- Discard unused slots after jump
   1092 		for i, _ in pairs(V) do
   1093 			if i >= a and i < stackslots then
   1094 				V[i] = nil
   1095 			end
   1096 		end
   1097 		-- Cross basic block boundary if the jump target isn't provably unreachable
   1098 		local val = code.fixup[c] or {}
   1099 		if code.seen_cmp and code.seen_cmp ~= ALWAYS then
   1100 			if code.seen_cmp ~= NEVER then -- Do not emit the jump or fixup
   1101 				-- Store previous CMP insn for reemitting after compensation code
   1102 				local jmpi = ffi.new('struct bpf_insn', code.insn[code.pc-1])
   1103 				code.pc = code.pc - 1
   1104 				-- First branch point, emit compensation code
   1105 				local Vcomp = Vstate[c]
   1106 				if not Vcomp then
   1107 					-- Select scratch register (R0-5) that isn't used as operand
   1108 					-- in the CMP instruction, as the variable may not be live, after
   1109 					-- the JMP, but it may be used in the JMP+CMP instruction itself
   1110 					local tmp_reg = 0
   1111 					for reg = 0, 5 do
   1112 						if reg ~= jmpi.dst_reg and reg ~= jmpi.src_reg then
   1113 							tmp_reg = reg
   1114 							break
   1115 						end
   1116 					end
   1117 					-- Force materialization of constants at the end of BB
   1118 					for i, v in pairs(V) do
   1119 						if not v.reg and cdef.isimmconst(v) then
   1120 							vreg(i, tmp_reg) -- Load to TMP register (not saved)
   1121 							reg_spill(i) -- Spill caller-saved registers
   1122 						end
   1123 					end
   1124 					-- Record variable state
   1125 					Vstate[c] = V
   1126 					Vcomp = V
   1127 					V = table_copy(V)
   1128 				-- Variable state already set, emit specific compensation code
   1129 				else
   1130 					bb_end(Vcomp)
   1131 				end
   1132 				-- Record pointer NULL check from condition
   1133 				-- If the condition checks pointer variable against NULL,
   1134 				-- we can assume it will not be NULL in the fall-through block
   1135 				if code.seen_null_guard then
   1136 					local var = code.seen_null_guard
   1137 					-- The null guard can have two forms:
   1138 					--   if x == nil then goto
   1139 					--   if x ~= nil then goto
   1140 					-- First form guarantees that the variable will be non-nil on the following instruction
   1141 					-- Second form guarantees that the variable will be non-nil at the jump target
   1142 					local vinfo = code.seen_null_guard_inverse and Vcomp[var] or V[var]
   1143 					if vinfo.source then
   1144 						local pos = vinfo.source:find('_or_null', 1, true)
   1145 						if pos then
   1146 							vinfo.source = vinfo.source:sub(1, pos - 1)
   1147 						end
   1148 					end
   1149 				end
   1150 				-- Reemit CMP insn
   1151 				emit(jmpi.code, jmpi.dst_reg, jmpi.src_reg, jmpi.off, jmpi.imm)
   1152 				-- Fuse JMP into previous CMP opcode, mark JMP target for fixup
   1153 				-- as we don't knot the relative offset in generated code yet
   1154 				table.insert(val, code.pc-1)
   1155 				code.fixup[c] = val
   1156 			end
   1157 			code.seen_cmp = nil
   1158 			code.seen_null_guard = nil
   1159 			code.seen_null_guard_inverse = nil
   1160 		elseif c == code.bc_pc + 1 then -- luacheck: ignore 542
   1161 			-- Eliminate jumps to next immediate instruction
   1162 			-- e.g. 0002    JMP      1 => 0003
   1163 		else
   1164 			-- We need to synthesise a condition that's always true, however
   1165 			-- BPF prohibits pointer arithmetic to prevent pointer leaks
   1166 			-- so we have to clear out one register and use it for cmp that's always true
   1167 			local dst_reg = reg_alloc(stackslots)
   1168 			V[stackslots].reg = nil -- Only temporary allocation
   1169 			-- First branch point, emit compensation code
   1170 			local Vcomp = Vstate[c]
   1171 			if not Vcomp then
   1172 				-- Force materialization of constants at the end of BB
   1173 				for i, v in pairs(V) do
   1174 					if not v.reg and cdef.isimmconst(v) then
   1175 						vreg(i, dst_reg) -- Load to TMP register (not saved)
   1176 						reg_spill(i) -- Spill caller-saved registers
   1177 					end
   1178 				end
   1179 				-- Record variable state
   1180 				Vstate[c] = V
   1181 				V = table_copy(V)
   1182 			-- Variable state already set, emit specific compensation code
   1183 			else
   1184 				bb_end(Vcomp)
   1185 			end
   1186 			emit(BPF.ALU64 + BPF.MOV + BPF.K, dst_reg, 0, 0, 0)
   1187 			emit(BPF.JMP + BPF.JEQ + BPF.K, dst_reg, 0, 0xffff, 0)
   1188 			table.insert(val, code.pc-1) -- Fixup JMP target
   1189 			code.reachable = false -- Code following the JMP is not reachable
   1190 			code.fixup[c] = val
   1191 		end
   1192 	end,
   1193 	RET1 = function (a, _, _, _) -- RET1
   1194 		-- Free optimisation: spilled variable will not be filled again
   1195 		for i, v in pairs(V) do
   1196 			if i ~= a then v.reg = nil end
   1197 		end
   1198 		if V[a].reg ~= 0 then vreg(a, 0) end
   1199 		-- Convenience: dereference pointer variables
   1200 		-- e.g. 'return map[k]' will return actual map value, not pointer
   1201 		if cdef.isptr(V[a].type) then
   1202 			vderef(0, 0, V[a])
   1203 		end
   1204 		emit(BPF.JMP + BPF.EXIT, 0, 0, 0, 0)
   1205 		code.reachable = false
   1206 	end,
   1207 	RET0 = function (_, _, _, _) -- RET0
   1208 		emit(BPF.ALU64 + BPF.MOV + BPF.K, 0, 0, 0, 0)
   1209 		emit(BPF.JMP + BPF.EXIT, 0, 0, 0, 0)
   1210 		code.reachable = false
   1211 	end,
   1212 	compile = function ()
   1213 		return code
   1214 	end
   1215 }
   1216 
   1217 -- Composite instructions
   1218 function BC.CALLT(a, _, _, d) -- Tailcall: return A(A+1, ..., A+D-1)
   1219 	CALL(a, 1, d)
   1220 	BC.RET1(a)
   1221 end
   1222 
   1223 -- Always initialize R6 with R1 context
   1224 emit(BPF.ALU64 + BPF.MOV + BPF.X, 6, 1, 0, 0)
   1225 -- Register R6 as context variable (first argument)
   1226 if params and params > 0 then
   1227 	vset(0, 6, param_types[1] or proto.skb)
   1228 	assert(V[0].source == V[0].const.source) -- Propagate source annotation from typeinfo
   1229 end
   1230 -- Register tmpvars
   1231 vset(stackslots)
   1232 vset(stackslots+1)
   1233 return setmetatable(BC, {
   1234 	__index = function (_, k, _)
   1235 		if type(k) == 'number' then
   1236 			local op_str = string.sub(require('jit.vmdef').bcnames, 6*k+1, 6*k+6)
   1237 			error(string.format("NYI: opcode '0x%02x' (%-04s)", k, op_str))
   1238 		end
   1239 	end,
   1240 	__call = function (t, op, a, b, c, d)
   1241 		code.bc_pc = code.bc_pc + 1
   1242 		-- Exitting BB straight through, emit compensation code
   1243 		if Vstate[code.bc_pc] then
   1244 			if code.reachable then
   1245 				-- Instruction is reachable from previous line
   1246 				-- so we must make the variable allocation consistent
   1247 				-- with the variable allocation at the jump source
   1248 				-- e.g. 0001 x:R0 = 5
   1249 				--      0002 if rand() then goto 0005
   1250 				--      0003 x:R0 -> x:stack
   1251 				--      0004 y:R0 = 5
   1252 				--      0005 x:? = 10 <-- x was in R0 before jump, and stack after jump
   1253 				bb_end(Vstate[code.bc_pc])
   1254 			else
   1255 				-- Instruction isn't reachable from previous line, restore variable layout
   1256 				-- e.g. RET or condition-less JMP on previous line
   1257 				V = table_copy(Vstate[code.bc_pc])
   1258 			end
   1259 		end
   1260 		-- Perform fixup of jump targets
   1261 		-- We need to do this because the number of consumed and emitted
   1262 		-- bytecode instructions is different
   1263 		local fixup = code.fixup[code.bc_pc]
   1264 		if fixup ~= nil then
   1265 			-- Patch JMP source insn with relative offset
   1266 			for _,pc in ipairs(fixup) do
   1267 				code.insn[pc].off = code.pc - 1 - pc
   1268 			end
   1269 			code.fixup[code.bc_pc] = nil
   1270 			code.reachable = true
   1271 		end
   1272 		-- Execute
   1273 		if code.reachable then
   1274 			assert(t[op], string.format('NYI: instruction %s, parameters: %s,%s,%s,%s', op,a,b,c,d))
   1275 			return t[op](a, b, c, d)
   1276 		end
   1277 	end,
   1278 })
   1279 end
   1280 
   1281 -- Emitted code dump
   1282 local function dump_mem(cls, ins, _, fuse)
   1283 	-- This is a very dense MEM instruction decoder without much explanation
   1284 	-- Refer to https://www.kernel.org/doc/Documentation/networking/filter.txt for instruction format
   1285 	local mode = bit.band(ins.code, 0xe0)
   1286 	if mode == BPF.XADD then cls = 5 end -- The only mode
   1287 	local op_1 = {'LD', 'LDX', 'ST', 'STX', '', 'XADD'}
   1288 	local op_2 = {[0]='W', [8]='H', [16]='B', [24]='DW'}
   1289 	local name = op_1[cls+1] .. op_2[bit.band(ins.code, 0x18)]
   1290 	local off = tonumber(ffi.cast('int16_t', ins.off)) -- Reinterpret as signed
   1291 	local dst = cls < 2 and 'R'..ins.dst_reg or string.format('[R%d%+d]', ins.dst_reg, off)
   1292 	local src = cls % 2 == 0 and '#'..ins.imm or 'R'..ins.src_reg
   1293 	if cls == BPF.LDX then src = string.format('[R%d%+d]', ins.src_reg, off) end
   1294 	if mode == BPF.ABS then src = string.format('skb[%d]', ins.imm) end
   1295 	if mode == BPF.IND then src = string.format('skb[R%d%+d]', ins.src_reg, ins.imm) end
   1296 	return string.format('%s\t%s\t%s', fuse and '' or name, fuse and '' or dst, src)
   1297 end
   1298 
   1299 local function dump_alu(cls, ins, pc)
   1300 	local alu = {'ADD', 'SUB', 'MUL', 'DIV', 'OR', 'AND', 'LSH', 'RSH', 'NEG', 'MOD', 'XOR', 'MOV', 'ARSH', 'END' }
   1301 	local jmp = {'JA', 'JEQ', 'JGT', 'JGE', 'JSET', 'JNE', 'JSGT', 'JSGE', 'CALL', 'EXIT'}
   1302 	local helper = {'unspec', 'map_lookup_elem', 'map_update_elem', 'map_delete_elem', 'probe_read', 'ktime_get_ns',
   1303 					'trace_printk', 'get_prandom_u32', 'get_smp_processor_id', 'skb_store_bytes',
   1304 					'l3_csum_replace', 'l4_csum_replace', 'tail_call', 'clone_redirect', 'get_current_pid_tgid',
   1305 					'get_current_uid_gid', 'get_current_comm', 'get_cgroup_classid', 'skb_vlan_push', 'skb_vlan_pop',
   1306 					'skb_get_tunnel_key', 'skb_set_tunnel_key', 'perf_event_read', 'redirect', 'get_route_realm',
   1307 					'perf_event_output', 'skb_load_bytes'}
   1308 	local op = 0
   1309 	-- This is a very dense ALU instruction decoder without much explanation
   1310 	-- Refer to https://www.kernel.org/doc/Documentation/networking/filter.txt for instruction format
   1311 	for i = 0,13 do if 0x10 * i == bit.band(ins.code, 0xf0) then op = i + 1 break end end
   1312 	local name = (cls == 5) and jmp[op] or alu[op]
   1313 	local src = (bit.band(ins.code, 0x08) == BPF.X) and 'R'..ins.src_reg or '#'..ins.imm
   1314 	local target = (cls == 5 and op < 9) and string.format('\t=> %04d', pc + ins.off + 1) or ''
   1315 	if cls == 5 and op == 9 then target = string.format('\t; %s', helper[ins.imm + 1] or tostring(ins.imm)) end
   1316 	return string.format('%s\t%s\t%s%s', name, 'R'..ins.dst_reg, src, target)
   1317 end
   1318 
   1319 local function dump_string(code, off, hide_counter)
   1320 	if not code then return end
   1321 	local cls_map = {
   1322 		[0] = dump_mem, [1] = dump_mem, [2] = dump_mem, [3] = dump_mem,
   1323 		[4] = dump_alu, [5] = dump_alu, [7] = dump_alu,
   1324 	}
   1325 	local result = {}
   1326 	local fused = false
   1327 	for i = off or 0, code.pc - 1 do
   1328 		local ins = code.insn[i]
   1329 		local cls = bit.band(ins.code, 0x07)
   1330 		local line = cls_map[cls](cls, ins, i, fused)
   1331 		if hide_counter then
   1332 			table.insert(result, line)
   1333 		else
   1334 			table.insert(result, string.format('%04u\t%s', i, line))
   1335 		end
   1336 		fused = string.find(line, 'LDDW', 1)
   1337 	end
   1338 	return table.concat(result, '\n')
   1339 end
   1340 
   1341 local function dump(code)
   1342 	if not code then return end
   1343 	print(string.format('-- BPF %s:0-%u', code.insn, code.pc))
   1344 	print(dump_string(code))
   1345 end
   1346 
   1347 local function compile(prog, params)
   1348 	-- Create code emitter sandbox, include caller locals
   1349 	local env = { pkt=proto.pkt, eth=proto.pkt, BPF=BPF, ffi=ffi }
   1350 	-- Include upvalues up to 4 nested scopes back
   1351 	-- the narrower scope overrides broader scope
   1352 	for k = 5, 2, -1 do
   1353 		local i = 1
   1354 		while true do
   1355 			local ok, n, v = pcall(debug.getlocal, k, i)
   1356 			if not ok or not n then break end
   1357 			env[n] = v
   1358 			i = i + 1
   1359 		end
   1360 	end
   1361 	setmetatable(env, {
   1362 		__index = function (_, k)
   1363 			return proto[k] or builtins[k] or _G[k]
   1364 		end
   1365 	})
   1366 	-- Create code emitter and compile LuaJIT bytecode
   1367 	if type(prog) == 'string' then prog = loadstring(prog) end
   1368 	-- Create error handler to print traceback
   1369 	local funci, pc = bytecode.funcinfo(prog), 0
   1370 	local E = create_emitter(env, funci.stackslots, funci.params, params or {})
   1371 	local on_err = function (e)
   1372 			funci = bytecode.funcinfo(prog, pc)
   1373 			local from, to = 0, 0
   1374 			for _ = 1, funci.currentline do
   1375 				from = to
   1376 				to = string.find(funci.source, '\n', from+1, true) or 0
   1377 			end
   1378 			print(funci.loc..':'..string.sub(funci.source, from+1, to-1))
   1379 			print('error: '..e)
   1380 			print(debug.traceback())
   1381 	end
   1382 	for _,op,a,b,c,d in bytecode.decoder(prog) do
   1383 		local ok, _, err = xpcall(E,on_err,op,a,b,c,d)
   1384 		if not ok then
   1385 			return nil, err
   1386 		end
   1387 	end
   1388 	return E:compile()
   1389 end
   1390 
   1391 -- BPF map interface
   1392 local bpf_map_mt = {
   1393 	__gc = function (map) S.close(map.fd) end,
   1394 	__len = function(map) return map.max_entries end,
   1395 	__index = function (map, k)
   1396 		if type(k) == 'string' then
   1397 			-- Return iterator
   1398 			if k == 'pairs' then
   1399 				return function(t, key)
   1400 					-- Get next key
   1401 					local next_key = ffi.new(ffi.typeof(t.key))
   1402 					local cur_key
   1403 					if key then
   1404 						cur_key = t.key
   1405 						t.key[0] = key
   1406 					else
   1407 						cur_key = ffi.new(ffi.typeof(t.key))
   1408 					end
   1409 					local ok, err = S.bpf_map_op(S.c.BPF_CMD.MAP_GET_NEXT_KEY, map.fd, cur_key, next_key)
   1410 					if not ok then return nil, err end
   1411 					-- Get next value
   1412 					assert(S.bpf_map_op(S.c.BPF_CMD.MAP_LOOKUP_ELEM, map.fd, next_key, map.val))
   1413 					return next_key[0], map.val[0]
   1414 				end, map, nil
   1415 			-- Read for perf event map
   1416 			elseif k == 'reader' then
   1417 				return function (pmap, pid, cpu, event_type)
   1418 					-- Caller must either specify PID or CPU
   1419 					if not pid or pid < 0 then
   1420 						assert((cpu and cpu >= 0), 'NYI: creating composed reader for all CPUs')
   1421 						pid = -1
   1422 					end
   1423 					-- Create BPF output reader
   1424 					local pe = S.t.perf_event_attr1()
   1425 					pe[0].type = 'software'
   1426 					pe[0].config = 'sw_bpf_output'
   1427 					pe[0].sample_type = 'raw'
   1428 					pe[0].sample_period = 1
   1429 					pe[0].wakeup_events = 1
   1430 					local reader, err = S.t.perf_reader(S.perf_event_open(pe, pid, cpu or -1))
   1431 					if not reader then return nil, tostring(err) end
   1432 					-- Register event reader fd in BPF map
   1433 					assert(cpu < pmap.max_entries, string.format('BPF map smaller than read CPU %d', cpu))
   1434 					pmap[cpu] = reader.fd
   1435 					-- Open memory map and start reading
   1436 					local ok, err = reader:start()
   1437 					assert(ok, tostring(err))
   1438 					ok, err = reader:mmap()
   1439 					assert(ok, tostring(err))
   1440 					return cdef.event_reader(reader, event_type)
   1441 				end
   1442 			-- Signalise this is a map type
   1443 			end
   1444 			return k == '__map'
   1445 		end
   1446 		-- Retrieve key
   1447 		map.key[0] = k
   1448 		local ok, err = S.bpf_map_op(S.c.BPF_CMD.MAP_LOOKUP_ELEM, map.fd, map.key, map.val)
   1449 		if not ok then return nil, err end
   1450 		return ffi.new(map.val_type, map.val[0])
   1451 	end,
   1452 	__newindex = function (map, k, v)
   1453 		map.key[0] = k
   1454 		if v == nil then
   1455 			return S.bpf_map_op(map.fd, S.c.BPF_CMD.MAP_DELETE_ELEM, map.key, nil)
   1456 		end
   1457 		map.val[0] = v
   1458 		return S.bpf_map_op(S.c.BPF_CMD.MAP_UPDATE_ELEM, map.fd, map.key, map.val)
   1459 	end,
   1460 }
   1461 
   1462 -- Linux tracing interface
   1463 local function trace_check_enabled(path)
   1464 	path = path or '/sys/kernel/debug/tracing'
   1465 	if S.statfs(path) then return true end
   1466 	return nil, 'debugfs not accessible: "mount -t debugfs nodev /sys/kernel/debug"? missing sudo?'
   1467 end
   1468 
   1469 -- Tracepoint interface
   1470 local tracepoint_mt = {
   1471 	__index = {
   1472 		bpf = function (t, prog)
   1473 			if type(prog) ~= 'table' then
   1474 				-- Create protocol parser with source probe
   1475 				prog = compile(prog, {proto.type(t.type, {source='ptr_to_probe'})})
   1476 			end
   1477 			-- Load the BPF program
   1478 			local prog_fd, err, log = S.bpf_prog_load(S.c.BPF_PROG.TRACEPOINT, prog.insn, prog.pc)
   1479 			assert(prog_fd, tostring(err)..': '..tostring(log))
   1480 			-- Open tracepoint and attach
   1481 			t.reader:setbpf(prog_fd:getfd())
   1482 			table.insert(t.progs, prog_fd)
   1483 			return prog_fd
   1484 		end,
   1485 	}
   1486 }
   1487 -- Open tracepoint
   1488 local function tracepoint_open(path, pid, cpu, group_fd)
   1489 	-- Open tracepoint and compile tracepoint type
   1490 	local tp = assert(S.perf_tracepoint('/sys/kernel/debug/tracing/events/'..path))
   1491 	local tp_type = assert(cdef.tracepoint_type(path))
   1492 	-- Open tracepoint reader and create interface
   1493 	local reader = assert(S.perf_attach_tracepoint(tp, pid, cpu, group_fd))
   1494 	return setmetatable({tp=tp,type=tp_type,reader=reader,progs={}}, tracepoint_mt)
   1495 end
   1496 
   1497 local function trace_bpf(ptype, pname, pdef, retprobe, prog, pid, cpu, group_fd)
   1498 	-- Load BPF program
   1499 	if type(prog) ~= 'table' then
   1500 		prog = compile(prog, {proto.pt_regs})
   1501 	end
   1502 	local prog_fd, err, log = S.bpf_prog_load(S.c.BPF_PROG.KPROBE, prog.insn, prog.pc)
   1503 	assert(prog_fd, tostring(err)..': '..tostring(log))
   1504 	-- Open tracepoint and attach
   1505 	local tp, err = S.perf_probe(ptype, pname, pdef, retprobe)
   1506 	if not tp then
   1507 		prog_fd:close()
   1508 		return nil, tostring(err)
   1509 	end
   1510 	local reader, err = S.perf_attach_tracepoint(tp, pid, cpu, group_fd, {sample_type='raw, callchain'})
   1511 	if not reader then
   1512 		prog_fd:close()
   1513 		S.perf_probe(ptype, pname, false)
   1514 		return nil, tostring(err)
   1515 	end
   1516 	local ok, err = reader:setbpf(prog_fd:getfd())
   1517 	if not ok then
   1518 		prog_fd:close()
   1519 		reader:close()
   1520 		S.perf_probe(ptype, pname, false)
   1521 		return nil, tostring(err)..' (kernel version should be at least 4.1)'
   1522 	end
   1523 	-- Create GC closure for reader to close BPF program
   1524 	-- and detach probe in correct order
   1525 	ffi.gc(reader, function ()
   1526 		prog_fd:close()
   1527 		reader:close()
   1528 		S.perf_probe(ptype, pname, false)
   1529 	end)
   1530 	return {reader=reader, prog=prog_fd, probe=pname, probe_type=ptype}
   1531 end
   1532 
   1533 -- Module interface
   1534 return setmetatable({
   1535 	new = create_emitter,
   1536 	dump = dump,
   1537 	dump_string = dump_string,
   1538 	maps = {},
   1539 	map = function (type, max_entries, key_ctype, val_ctype)
   1540 		if not key_ctype then key_ctype = ffi.typeof('uint32_t') end
   1541 		if not val_ctype then val_ctype = ffi.typeof('uint32_t') end
   1542 		if not max_entries then max_entries = 4096 end
   1543 		-- Special case for BPF_MAP_STACK_TRACE
   1544 		if S.c.BPF_MAP[type] == S.c.BPF_MAP.STACK_TRACE then
   1545 			key_ctype = ffi.typeof('int32_t')
   1546 			val_ctype = ffi.typeof('struct bpf_stacktrace')
   1547 		end
   1548 		local fd, err = S.bpf_map_create(S.c.BPF_MAP[type], ffi.sizeof(key_ctype), ffi.sizeof(val_ctype), max_entries)
   1549 		if not fd then return nil, tostring(err) end
   1550 		local map = setmetatable({
   1551 			max_entries = max_entries,
   1552 			key = ffi.new(ffi.typeof('$ [1]', key_ctype)),
   1553 			val = ffi.new(ffi.typeof('$ [1]', val_ctype)),
   1554 			map_type = S.c.BPF_MAP[type],
   1555 			key_type = key_ctype,
   1556 			val_type = val_ctype,
   1557 			fd = fd:nogc():getfd(),
   1558 		}, bpf_map_mt)
   1559 		return map
   1560 	end,
   1561 	socket = function (sock, prog)
   1562 		-- Expect socket type, if sock is string then assume it's
   1563 		-- an interface name (e.g. 'lo'), if it's a number then typecast it as a socket
   1564 		local ok, err
   1565 		if type(sock) == 'string' then
   1566 			local iface = assert(S.nl.getlink())[sock]
   1567 			assert(iface, sock..' is not interface name')
   1568 			sock, err = S.socket('packet', 'raw')
   1569 			assert(sock, tostring(err))
   1570 			ok, err = sock:bind(S.t.sockaddr_ll({protocol='all', ifindex=iface.index}))
   1571 			assert(ok, tostring(err))
   1572 		elseif type(sock) == 'number' then
   1573 			sock = S.t.fd(sock):nogc()
   1574 		elseif ffi.istype(S.t.fd, sock) then -- luacheck: ignore
   1575 			-- No cast required
   1576 		else
   1577 			return nil, 'socket must either be an fd number, an interface name, or an ljsyscall socket'
   1578 		end
   1579 		-- Load program and attach it to socket
   1580 		if type(prog) ~= 'table' then
   1581 			prog = compile(prog, {proto.skb})
   1582 		end
   1583 		local prog_fd, err, log = S.bpf_prog_load(S.c.BPF_PROG.SOCKET_FILTER, prog.insn, prog.pc)
   1584 		assert(prog_fd, tostring(err)..': '..tostring(log))
   1585 		assert(sock:setsockopt('socket', 'attach_bpf', prog_fd:getfd()))
   1586 		return prog_fd, err
   1587 	end,
   1588 	tracepoint = function(tp, prog, pid, cpu, group_fd)
   1589 		assert(trace_check_enabled())
   1590 		-- Return tracepoint instance if no program specified
   1591 		-- this allows free specialisation of arg0 to tracepoint type
   1592 		local probe = tracepoint_open(tp, pid, cpu, group_fd)
   1593 		-- Load the BPF program
   1594 		if prog then
   1595 			probe:bpf(prog)
   1596 		end
   1597 		return probe
   1598 	end,
   1599 	kprobe = function(tp, prog, retprobe, pid, cpu, group_fd)
   1600 		assert(trace_check_enabled())
   1601 		-- Open tracepoint and attach
   1602 		local pname, pdef = tp:match('([^:]+):(.+)')
   1603 		return trace_bpf('kprobe', pname, pdef, retprobe, prog, pid, cpu, group_fd)
   1604 	end,
   1605 	uprobe = function(tp, prog, retprobe, pid, cpu, group_fd)
   1606 		assert(trace_check_enabled())
   1607 		-- Translate symbol to address
   1608 		local obj, sym_want = tp:match('([^:]+):(.+)')
   1609 		if not S.statfs(obj) then return nil, S.t.error(S.c.E.NOENT) end
   1610 		-- Resolve Elf object (no support for anything else)
   1611 		local elf = require('bpf.elf').open(obj)
   1612 		local sym = elf:resolve(sym_want)
   1613 		if not sym then return nil, 'no such symbol' end
   1614 		sym = sym.st_value - elf:loadaddr()
   1615 		local sym_addr = string.format('%x%04x', tonumber(bit.rshift(sym, 32)),
   1616 		                                         tonumber(ffi.cast('uint32_t', sym)))
   1617 		-- Convert it to expected uprobe format
   1618 		local pname = string.format('%s_%s', obj:gsub('.*/', ''), sym_addr)
   1619 		local pdef = obj..':0x'..sym_addr
   1620 		return trace_bpf('uprobe', pname, pdef, retprobe, prog, pid, cpu, group_fd)
   1621 	end,
   1622 	tracelog = function(path)
   1623 		assert(trace_check_enabled())
   1624 		path = path or '/sys/kernel/debug/tracing/trace_pipe'
   1625 		return io.open(path, 'r')
   1626 	end,
   1627 	ntoh = builtins.ntoh, hton = builtins.hton,
   1628 }, {
   1629 	__call = function (_, prog) return compile(prog) end,
   1630 })
   1631