Home | History | Annotate | Download | only in seccomp-bpf
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "sandbox/linux/seccomp-bpf/syscall.h"
      6 
      7 #include <asm/unistd.h>
      8 #include <errno.h>
      9 
     10 #include "base/basictypes.h"
     11 
     12 namespace sandbox {
     13 
     14   asm(      // We need to be able to tell the kernel exactly where we made a
     15             // system call. The C++ compiler likes to sometimes clone or
     16             // inline code, which would inadvertently end up duplicating
     17             // the entry point.
     18             // "gcc" can suppress code duplication with suitable function
     19             // attributes, but "clang" doesn't have this ability.
     20             // The "clang" developer mailing list suggested that the correct
     21             // and portable solution is a file-scope assembly block.
     22             // N.B. We do mark our code as a proper function so that backtraces
     23             // work correctly. But we make absolutely no attempt to use the
     24             // ABI's calling conventions for passing arguments. We will only
     25             // ever be called from assembly code and thus can pick more
     26             // suitable calling conventions.
     27 #if defined(__i386__)
     28             ".text\n"
     29             ".align 16, 0x90\n"
     30             ".type SyscallAsm, @function\n"
     31  "SyscallAsm:.cfi_startproc\n"
     32             // Check if "%eax" is negative. If so, do not attempt to make a
     33             // system call. Instead, compute the return address that is visible
     34             // to the kernel after we execute "int $0x80". This address can be
     35             // used as a marker that BPF code inspects.
     36             "test %eax, %eax\n"
     37             "jge  1f\n"
     38             // Always, make sure that our code is position-independent, or
     39             // address space randomization might not work on i386. This means,
     40             // we can't use "lea", but instead have to rely on "call/pop".
     41             "call 0f;   .cfi_adjust_cfa_offset  4\n"
     42           "0:pop  %eax; .cfi_adjust_cfa_offset -4\n"
     43             "addl $2f-0b, %eax\n"
     44             "ret\n"
     45             // Save register that we don't want to clobber. On i386, we need to
     46             // save relatively aggressively, as there are a couple or registers
     47             // that are used internally (e.g. %ebx for position-independent
     48             // code, and %ebp for the frame pointer), and as we need to keep at
     49             // least a few registers available for the register allocator.
     50           "1:push %esi; .cfi_adjust_cfa_offset 4\n"
     51             "push %edi; .cfi_adjust_cfa_offset 4\n"
     52             "push %ebx; .cfi_adjust_cfa_offset 4\n"
     53             "push %ebp; .cfi_adjust_cfa_offset 4\n"
     54             // Copy entries from the array holding the arguments into the
     55             // correct CPU registers.
     56             "movl  0(%edi), %ebx\n"
     57             "movl  4(%edi), %ecx\n"
     58             "movl  8(%edi), %edx\n"
     59             "movl 12(%edi), %esi\n"
     60             "movl 20(%edi), %ebp\n"
     61             "movl 16(%edi), %edi\n"
     62             // Enter the kernel.
     63             "int  $0x80\n"
     64             // This is our "magic" return address that the BPF filter sees.
     65           "2:"
     66             // Restore any clobbered registers that we didn't declare to the
     67             // compiler.
     68             "pop  %ebp; .cfi_adjust_cfa_offset -4\n"
     69             "pop  %ebx; .cfi_adjust_cfa_offset -4\n"
     70             "pop  %edi; .cfi_adjust_cfa_offset -4\n"
     71             "pop  %esi; .cfi_adjust_cfa_offset -4\n"
     72             "ret\n"
     73             ".cfi_endproc\n"
     74           "9:.size SyscallAsm, 9b-SyscallAsm\n"
     75 #elif defined(__x86_64__)
     76             ".text\n"
     77             ".align 16, 0x90\n"
     78             ".type SyscallAsm, @function\n"
     79  "SyscallAsm:.cfi_startproc\n"
     80             // Check if "%rax" is negative. If so, do not attempt to make a
     81             // system call. Instead, compute the return address that is visible
     82             // to the kernel after we execute "syscall". This address can be
     83             // used as a marker that BPF code inspects.
     84             "test %rax, %rax\n"
     85             "jge  1f\n"
     86             // Always make sure that our code is position-independent, or the
     87             // linker will throw a hissy fit on x86-64.
     88             "call 0f;   .cfi_adjust_cfa_offset  8\n"
     89           "0:pop  %rax; .cfi_adjust_cfa_offset -8\n"
     90             "addq $2f-0b, %rax\n"
     91             "ret\n"
     92             // We declared all clobbered registers to the compiler. On x86-64,
     93             // there really isn't much of a problem with register pressure. So,
     94             // we can go ahead and directly copy the entries from the arguments
     95             // array into the appropriate CPU registers.
     96           "1:movq  0(%r12), %rdi\n"
     97             "movq  8(%r12), %rsi\n"
     98             "movq 16(%r12), %rdx\n"
     99             "movq 24(%r12), %r10\n"
    100             "movq 32(%r12), %r8\n"
    101             "movq 40(%r12), %r9\n"
    102             // Enter the kernel.
    103             "syscall\n"
    104             // This is our "magic" return address that the BPF filter sees.
    105           "2:ret\n"
    106             ".cfi_endproc\n"
    107           "9:.size SyscallAsm, 9b-SyscallAsm\n"
    108 #elif defined(__arm__)
    109             // Throughout this file, we use the same mode (ARM vs. thumb)
    110             // that the C++ compiler uses. This means, when transfering control
    111             // from C++ to assembly code, we do not need to switch modes (e.g.
    112             // by using the "bx" instruction). It also means that our assembly
    113             // code should not be invoked directly from code that lives in
    114             // other compilation units, as we don't bother implementing thumb
    115             // interworking. That's OK, as we don't make any of the assembly
    116             // symbols public. They are all local to this file.
    117             ".text\n"
    118             ".align 2\n"
    119             ".type SyscallAsm, %function\n"
    120 #if defined(__thumb__)
    121             ".thumb_func\n"
    122 #else
    123             ".arm\n"
    124 #endif
    125  "SyscallAsm:.fnstart\n"
    126             "@ args = 0, pretend = 0, frame = 8\n"
    127             "@ frame_needed = 1, uses_anonymous_args = 0\n"
    128 #if defined(__thumb__)
    129             ".cfi_startproc\n"
    130             "push {r7, lr}\n"
    131             ".cfi_offset 14, -4\n"
    132             ".cfi_offset  7, -8\n"
    133             "mov r7, sp\n"
    134             ".cfi_def_cfa_register 7\n"
    135             ".cfi_def_cfa_offset 8\n"
    136 #else
    137             "stmfd sp!, {fp, lr}\n"
    138             "add fp, sp, #4\n"
    139 #endif
    140             // Check if "r0" is negative. If so, do not attempt to make a
    141             // system call. Instead, compute the return address that is visible
    142             // to the kernel after we execute "swi 0". This address can be
    143             // used as a marker that BPF code inspects.
    144             "cmp r0, #0\n"
    145             "bge 1f\n"
    146             "ldr r0, =2f\n"
    147             "b   2f\n"
    148             // We declared (almost) all clobbered registers to the compiler. On
    149             // ARM there is no particular register pressure. So, we can go
    150             // ahead and directly copy the entries from the arguments array
    151             // into the appropriate CPU registers.
    152           "1:ldr r5, [r6, #20]\n"
    153             "ldr r4, [r6, #16]\n"
    154             "ldr r3, [r6, #12]\n"
    155             "ldr r2, [r6, #8]\n"
    156             "ldr r1, [r6, #4]\n"
    157             "mov r7, r0\n"
    158             "ldr r0, [r6, #0]\n"
    159             // Enter the kernel
    160             "swi 0\n"
    161             // Restore the frame pointer. Also restore the program counter from
    162             // the link register; this makes us return to the caller.
    163 #if defined(__thumb__)
    164           "2:pop {r7, pc}\n"
    165             ".cfi_endproc\n"
    166 #else
    167           "2:ldmfd sp!, {fp, pc}\n"
    168 #endif
    169             ".fnend\n"
    170           "9:.size SyscallAsm, 9b-SyscallAsm\n"
    171 #endif
    172   );  // asm
    173 
    174 intptr_t SandboxSyscall(int nr,
    175                         intptr_t p0, intptr_t p1, intptr_t p2,
    176                         intptr_t p3, intptr_t p4, intptr_t p5) {
    177   // We rely on "intptr_t" to be the exact size as a "void *". This is
    178   // typically true, but just in case, we add a check. The language
    179   // specification allows platforms some leeway in cases, where
    180   // "sizeof(void *)" is not the same as "sizeof(void (*)())". We expect
    181   // that this would only be an issue for IA64, which we are currently not
    182   // planning on supporting. And it is even possible that this would work
    183   // on IA64, but for lack of actual hardware, I cannot test.
    184   COMPILE_ASSERT(sizeof(void *) == sizeof(intptr_t),
    185                  pointer_types_and_intptr_must_be_exactly_the_same_size);
    186 
    187   const intptr_t args[6] = { p0, p1, p2, p3, p4, p5 };
    188 
    189   // Invoke our file-scope assembly code. The constraints have been picked
    190   // carefully to match what the rest of the assembly code expects in input,
    191   // output, and clobbered registers.
    192 #if defined(__i386__)
    193   intptr_t ret = nr;
    194   asm volatile(
    195     "call SyscallAsm\n"
    196     // N.B. These are not the calling conventions normally used by the ABI.
    197     : "=a"(ret)
    198     : "0"(ret), "D"(args)
    199     : "cc", "esp", "memory", "ecx", "edx");
    200 #elif defined(__x86_64__)
    201   intptr_t ret = nr;
    202   {
    203     register const intptr_t *data __asm__("r12") = args;
    204     asm volatile(
    205       "lea  -128(%%rsp), %%rsp\n"  // Avoid red zone.
    206       "call SyscallAsm\n"
    207       "lea  128(%%rsp), %%rsp\n"
    208       // N.B. These are not the calling conventions normally used by the ABI.
    209       : "=a"(ret)
    210       : "0"(ret), "r"(data)
    211       : "cc", "rsp", "memory",
    212         "rcx", "rdi", "rsi", "rdx", "r8", "r9", "r10", "r11");
    213   }
    214 #elif defined(__arm__)
    215   intptr_t ret;
    216   {
    217     register intptr_t inout __asm__("r0") = nr;
    218     register const intptr_t *data __asm__("r6") = args;
    219     asm volatile(
    220       "bl SyscallAsm\n"
    221       // N.B. These are not the calling conventions normally used by the ABI.
    222       : "=r"(inout)
    223       : "0"(inout), "r"(data)
    224       : "cc", "lr", "memory", "r1", "r2", "r3", "r4", "r5"
    225 #if !defined(__arm__)
    226       // In thumb mode, we cannot use "r7" as a general purpose register, as
    227       // it is our frame pointer. We have to manually manage and preserve it.
    228       // In ARM mode, we have a dedicated frame pointer register and "r7" is
    229       // thus available as a general purpose register. We don't preserve it,
    230       // but instead mark it as clobbered.
    231         , "r7"
    232 #endif
    233       );
    234     ret = inout;
    235   }
    236 #else
    237   errno = ENOSYS;
    238   intptr_t ret = -1;
    239 #endif
    240   return ret;
    241 }
    242 
    243 }  // namespace sandbox
    244