Home | History | Annotate | Download | only in seccomp-bpf
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "sandbox/linux/seccomp-bpf/syscall.h"
      6 
      7 #include <errno.h>
      8 #include <stdint.h>
      9 
     10 #include "base/logging.h"
     11 #include "sandbox/linux/bpf_dsl/seccomp_macros.h"
     12 
     13 namespace sandbox {
     14 
     15 namespace {
     16 
     17 #if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) || \
     18     defined(ARCH_CPU_MIPS_FAMILY)
     19 // Number that's not currently used by any Linux kernel ABIs.
     20 const int kInvalidSyscallNumber = 0x351d3;
     21 #else
     22 #error Unrecognized architecture
     23 #endif
     24 
     25 asm(// We need to be able to tell the kernel exactly where we made a
     26     // system call. The C++ compiler likes to sometimes clone or
     27     // inline code, which would inadvertently end up duplicating
     28     // the entry point.
     29     // "gcc" can suppress code duplication with suitable function
     30     // attributes, but "clang" doesn't have this ability.
     31     // The "clang" developer mailing list suggested that the correct
     32     // and portable solution is a file-scope assembly block.
     33     // N.B. We do mark our code as a proper function so that backtraces
     34     // work correctly. But we make absolutely no attempt to use the
     35     // ABI's calling conventions for passing arguments. We will only
     36     // ever be called from assembly code and thus can pick more
     37     // suitable calling conventions.
     38 #if defined(__i386__)
     39     ".text\n"
     40     ".align 16, 0x90\n"
     41     ".type SyscallAsm, @function\n"
     42     "SyscallAsm:.cfi_startproc\n"
     43     // Check if "%eax" is negative. If so, do not attempt to make a
     44     // system call. Instead, compute the return address that is visible
     45     // to the kernel after we execute "int $0x80". This address can be
     46     // used as a marker that BPF code inspects.
     47     "test %eax, %eax\n"
     48     "jge  1f\n"
     49     // Always, make sure that our code is position-independent, or
     50     // address space randomization might not work on i386. This means,
     51     // we can't use "lea", but instead have to rely on "call/pop".
     52     "call 0f;   .cfi_adjust_cfa_offset  4\n"
     53     "0:pop  %eax; .cfi_adjust_cfa_offset -4\n"
     54     "addl $2f-0b, %eax\n"
     55     "ret\n"
     56     // Save register that we don't want to clobber. On i386, we need to
     57     // save relatively aggressively, as there are a couple or registers
     58     // that are used internally (e.g. %ebx for position-independent
     59     // code, and %ebp for the frame pointer), and as we need to keep at
     60     // least a few registers available for the register allocator.
     61     "1:push %esi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset esi, 0\n"
     62     "push %edi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset edi, 0\n"
     63     "push %ebx; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebx, 0\n"
     64     "push %ebp; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebp, 0\n"
     65     // Copy entries from the array holding the arguments into the
     66     // correct CPU registers.
     67     "movl  0(%edi), %ebx\n"
     68     "movl  4(%edi), %ecx\n"
     69     "movl  8(%edi), %edx\n"
     70     "movl 12(%edi), %esi\n"
     71     "movl 20(%edi), %ebp\n"
     72     "movl 16(%edi), %edi\n"
     73     // Enter the kernel.
     74     "int  $0x80\n"
     75     // This is our "magic" return address that the BPF filter sees.
     76     "2:"
     77     // Restore any clobbered registers that we didn't declare to the
     78     // compiler.
     79     "pop  %ebp; .cfi_restore ebp; .cfi_adjust_cfa_offset -4\n"
     80     "pop  %ebx; .cfi_restore ebx; .cfi_adjust_cfa_offset -4\n"
     81     "pop  %edi; .cfi_restore edi; .cfi_adjust_cfa_offset -4\n"
     82     "pop  %esi; .cfi_restore esi; .cfi_adjust_cfa_offset -4\n"
     83     "ret\n"
     84     ".cfi_endproc\n"
     85     "9:.size SyscallAsm, 9b-SyscallAsm\n"
     86 #elif defined(__x86_64__)
     87     ".text\n"
     88     ".align 16, 0x90\n"
     89     ".type SyscallAsm, @function\n"
     90     "SyscallAsm:.cfi_startproc\n"
     91     // Check if "%rdi" is negative. If so, do not attempt to make a
     92     // system call. Instead, compute the return address that is visible
     93     // to the kernel after we execute "syscall". This address can be
     94     // used as a marker that BPF code inspects.
     95     "test %rdi, %rdi\n"
     96     "jge  1f\n"
     97     // Always make sure that our code is position-independent, or the
     98     // linker will throw a hissy fit on x86-64.
     99     "lea 2f(%rip), %rax\n"
    100     "ret\n"
    101     // Now we load the registers used to pass arguments to the system
    102     // call: system call number in %rax, and arguments in %rdi, %rsi,
    103     // %rdx, %r10, %r8, %r9. Note: These are all caller-save registers
    104     // (only %rbx, %rbp, %rsp, and %r12-%r15 are callee-save), so no
    105     // need to worry here about spilling registers or CFI directives.
    106     "1:movq %rdi, %rax\n"
    107     "movq  0(%rsi), %rdi\n"
    108     "movq 16(%rsi), %rdx\n"
    109     "movq 24(%rsi), %r10\n"
    110     "movq 32(%rsi), %r8\n"
    111     "movq 40(%rsi), %r9\n"
    112     "movq  8(%rsi), %rsi\n"
    113     // Enter the kernel.
    114     "syscall\n"
    115     // This is our "magic" return address that the BPF filter sees.
    116     "2:ret\n"
    117     ".cfi_endproc\n"
    118     "9:.size SyscallAsm, 9b-SyscallAsm\n"
    119 #elif defined(__arm__)
    120     // Throughout this file, we use the same mode (ARM vs. thumb)
    121     // that the C++ compiler uses. This means, when transfering control
    122     // from C++ to assembly code, we do not need to switch modes (e.g.
    123     // by using the "bx" instruction). It also means that our assembly
    124     // code should not be invoked directly from code that lives in
    125     // other compilation units, as we don't bother implementing thumb
    126     // interworking. That's OK, as we don't make any of the assembly
    127     // symbols public. They are all local to this file.
    128     ".text\n"
    129     ".align 2\n"
    130     ".type SyscallAsm, %function\n"
    131 #if defined(__thumb__)
    132     ".thumb_func\n"
    133 #else
    134     ".arm\n"
    135 #endif
    136     "SyscallAsm:\n"
    137 #if !defined(__native_client_nonsfi__)
    138     // .fnstart and .fnend pseudo operations creates unwind table.
    139     // It also creates a reference to the symbol __aeabi_unwind_cpp_pr0, which
    140     // is not provided by PNaCl toolchain. Disable it.
    141     ".fnstart\n"
    142 #endif
    143     "@ args = 0, pretend = 0, frame = 8\n"
    144     "@ frame_needed = 1, uses_anonymous_args = 0\n"
    145 #if defined(__thumb__)
    146     ".cfi_startproc\n"
    147     "push {r7, lr}\n"
    148     ".save {r7, lr}\n"
    149     ".cfi_offset 14, -4\n"
    150     ".cfi_offset  7, -8\n"
    151     ".cfi_def_cfa_offset 8\n"
    152 #else
    153     "stmfd sp!, {fp, lr}\n"
    154     "add fp, sp, #4\n"
    155 #endif
    156     // Check if "r0" is negative. If so, do not attempt to make a
    157     // system call. Instead, compute the return address that is visible
    158     // to the kernel after we execute "swi 0". This address can be
    159     // used as a marker that BPF code inspects.
    160     "cmp r0, #0\n"
    161     "bge 1f\n"
    162     "adr r0, 2f\n"
    163     "b   2f\n"
    164     // We declared (almost) all clobbered registers to the compiler. On
    165     // ARM there is no particular register pressure. So, we can go
    166     // ahead and directly copy the entries from the arguments array
    167     // into the appropriate CPU registers.
    168     "1:ldr r5, [r6, #20]\n"
    169     "ldr r4, [r6, #16]\n"
    170     "ldr r3, [r6, #12]\n"
    171     "ldr r2, [r6, #8]\n"
    172     "ldr r1, [r6, #4]\n"
    173     "mov r7, r0\n"
    174     "ldr r0, [r6, #0]\n"
    175     // Enter the kernel
    176     "swi 0\n"
    177 // Restore the frame pointer. Also restore the program counter from
    178 // the link register; this makes us return to the caller.
    179 #if defined(__thumb__)
    180     "2:pop {r7, pc}\n"
    181     ".cfi_endproc\n"
    182 #else
    183     "2:ldmfd sp!, {fp, pc}\n"
    184 #endif
    185 #if !defined(__native_client_nonsfi__)
    186     // Do not use .fnstart and .fnend for PNaCl toolchain. See above comment,
    187     // for more details.
    188     ".fnend\n"
    189 #endif
    190     "9:.size SyscallAsm, 9b-SyscallAsm\n"
    191 #elif defined(__mips__)
    192     ".text\n"
    193     ".align 4\n"
    194     ".type SyscallAsm, @function\n"
    195     "SyscallAsm:.ent SyscallAsm\n"
    196     ".frame  $sp, 40, $ra\n"
    197     ".set   push\n"
    198     ".set   noreorder\n"
    199     "addiu  $sp, $sp, -40\n"
    200     "sw     $ra, 36($sp)\n"
    201     // Check if "v0" is negative. If so, do not attempt to make a
    202     // system call. Instead, compute the return address that is visible
    203     // to the kernel after we execute "syscall". This address can be
    204     // used as a marker that BPF code inspects.
    205     "bgez   $v0, 1f\n"
    206     " nop\n"
    207     "la     $v0, 2f\n"
    208     "b      2f\n"
    209     " nop\n"
    210     // On MIPS first four arguments go to registers a0 - a3 and any
    211     // argument after that goes to stack. We can go ahead and directly
    212     // copy the entries from the arguments array into the appropriate
    213     // CPU registers and on the stack.
    214     "1:lw     $a3, 28($a0)\n"
    215     "lw     $a2, 24($a0)\n"
    216     "lw     $a1, 20($a0)\n"
    217     "lw     $t0, 16($a0)\n"
    218     "sw     $a3, 28($sp)\n"
    219     "sw     $a2, 24($sp)\n"
    220     "sw     $a1, 20($sp)\n"
    221     "sw     $t0, 16($sp)\n"
    222     "lw     $a3, 12($a0)\n"
    223     "lw     $a2, 8($a0)\n"
    224     "lw     $a1, 4($a0)\n"
    225     "lw     $a0, 0($a0)\n"
    226     // Enter the kernel
    227     "syscall\n"
    228     // This is our "magic" return address that the BPF filter sees.
    229     // Restore the return address from the stack.
    230     "2:lw     $ra, 36($sp)\n"
    231     "jr     $ra\n"
    232     " addiu  $sp, $sp, 40\n"
    233     ".set    pop\n"
    234     ".end    SyscallAsm\n"
    235     ".size   SyscallAsm,.-SyscallAsm\n"
    236 #elif defined(__aarch64__)
    237     ".text\n"
    238     ".align 2\n"
    239     ".type SyscallAsm, %function\n"
    240     "SyscallAsm:\n"
    241     ".cfi_startproc\n"
    242     "cmp x0, #0\n"
    243     "b.ge 1f\n"
    244     "adr x0,2f\n"
    245     "b 2f\n"
    246     "1:ldr x5, [x6, #40]\n"
    247     "ldr x4, [x6, #32]\n"
    248     "ldr x3, [x6, #24]\n"
    249     "ldr x2, [x6, #16]\n"
    250     "ldr x1, [x6, #8]\n"
    251     "mov x8, x0\n"
    252     "ldr x0, [x6, #0]\n"
    253     // Enter the kernel
    254     "svc 0\n"
    255     "2:ret\n"
    256     ".cfi_endproc\n"
    257     ".size SyscallAsm, .-SyscallAsm\n"
    258 #endif
    259     );  // asm
    260 
    261 #if defined(__x86_64__)
    262 extern "C" {
    263 intptr_t SyscallAsm(intptr_t nr, const intptr_t args[6]);
    264 }
    265 #endif
    266 
    267 }  // namespace
    268 
    269 intptr_t Syscall::InvalidCall() {
    270   // Explicitly pass eight zero arguments just in case.
    271   return Call(kInvalidSyscallNumber, 0, 0, 0, 0, 0, 0, 0, 0);
    272 }
    273 
    274 intptr_t Syscall::Call(int nr,
    275                        intptr_t p0,
    276                        intptr_t p1,
    277                        intptr_t p2,
    278                        intptr_t p3,
    279                        intptr_t p4,
    280                        intptr_t p5,
    281                        intptr_t p6,
    282                        intptr_t p7) {
    283   // We rely on "intptr_t" to be the exact size as a "void *". This is
    284   // typically true, but just in case, we add a check. The language
    285   // specification allows platforms some leeway in cases, where
    286   // "sizeof(void *)" is not the same as "sizeof(void (*)())". We expect
    287   // that this would only be an issue for IA64, which we are currently not
    288   // planning on supporting. And it is even possible that this would work
    289   // on IA64, but for lack of actual hardware, I cannot test.
    290   static_assert(sizeof(void*) == sizeof(intptr_t),
    291                 "pointer types and intptr_t must be exactly the same size");
    292 
    293   // TODO(nedeljko): Enable use of more than six parameters on architectures
    294   //                 where that makes sense.
    295 #if defined(__mips__)
    296   const intptr_t args[8] = {p0, p1, p2, p3, p4, p5, p6, p7};
    297 #else
    298   DCHECK_EQ(p6, 0) << " Support for syscalls with more than six arguments not "
    299                       "added for this architecture";
    300   DCHECK_EQ(p7, 0) << " Support for syscalls with more than six arguments not "
    301                       "added for this architecture";
    302   const intptr_t args[6] = {p0, p1, p2, p3, p4, p5};
    303 #endif  // defined(__mips__)
    304 
    305 // Invoke our file-scope assembly code. The constraints have been picked
    306 // carefully to match what the rest of the assembly code expects in input,
    307 // output, and clobbered registers.
    308 #if defined(__i386__)
    309   intptr_t ret = nr;
    310   asm volatile(
    311       "call SyscallAsm\n"
    312       // N.B. These are not the calling conventions normally used by the ABI.
    313       : "=a"(ret)
    314       : "0"(ret), "D"(args)
    315       : "cc", "esp", "memory", "ecx", "edx");
    316 #elif defined(__x86_64__)
    317   intptr_t ret = SyscallAsm(nr, args);
    318 #elif defined(__arm__)
    319   intptr_t ret;
    320   {
    321     register intptr_t inout __asm__("r0") = nr;
    322     register const intptr_t* data __asm__("r6") = args;
    323     asm volatile(
    324         "bl SyscallAsm\n"
    325         // N.B. These are not the calling conventions normally used by the ABI.
    326         : "=r"(inout)
    327         : "0"(inout), "r"(data)
    328         : "cc",
    329           "lr",
    330           "memory",
    331           "r1",
    332           "r2",
    333           "r3",
    334           "r4",
    335           "r5"
    336 #if !defined(__thumb__)
    337           // In thumb mode, we cannot use "r7" as a general purpose register, as
    338           // it is our frame pointer. We have to manually manage and preserve
    339           // it.
    340           // In ARM mode, we have a dedicated frame pointer register and "r7" is
    341           // thus available as a general purpose register. We don't preserve it,
    342           // but instead mark it as clobbered.
    343           ,
    344           "r7"
    345 #endif  // !defined(__thumb__)
    346         );
    347     ret = inout;
    348   }
    349 #elif defined(__mips__)
    350   int err_status;
    351   intptr_t ret = Syscall::SandboxSyscallRaw(nr, args, &err_status);
    352 
    353   if (err_status) {
    354     // On error, MIPS returns errno from syscall instead of -errno.
    355     // The purpose of this negation is for SandboxSyscall() to behave
    356     // more like it would on other architectures.
    357     ret = -ret;
    358   }
    359 #elif defined(__aarch64__)
    360   intptr_t ret;
    361   {
    362     register intptr_t inout __asm__("x0") = nr;
    363     register const intptr_t* data __asm__("x6") = args;
    364     asm volatile("bl SyscallAsm\n"
    365                  : "=r"(inout)
    366                  : "0"(inout), "r"(data)
    367                  : "memory", "x1", "x2", "x3", "x4", "x5", "x8", "x30");
    368     ret = inout;
    369   }
    370 
    371 #else
    372 #error "Unimplemented architecture"
    373 #endif
    374   return ret;
    375 }
    376 
    377 void Syscall::PutValueInUcontext(intptr_t ret_val, ucontext_t* ctx) {
    378 #if defined(__mips__)
    379   // Mips ABI states that on error a3 CPU register has non zero value and if
    380   // there is no error, it should be zero.
    381   if (ret_val <= -1 && ret_val >= -4095) {
    382     // |ret_val| followes the Syscall::Call() convention of being -errno on
    383     // errors. In order to write correct value to return register this sign
    384     // needs to be changed back.
    385     ret_val = -ret_val;
    386     SECCOMP_PARM4(ctx) = 1;
    387   } else
    388     SECCOMP_PARM4(ctx) = 0;
    389 #endif
    390   SECCOMP_RESULT(ctx) = static_cast<greg_t>(ret_val);
    391 }
    392 
    393 #if defined(__mips__)
    394 intptr_t Syscall::SandboxSyscallRaw(int nr,
    395                                     const intptr_t* args,
    396                                     intptr_t* err_ret) {
    397   register intptr_t ret __asm__("v0") = nr;
    398   // a3 register becomes non zero on error.
    399   register intptr_t err_stat __asm__("a3") = 0;
    400   {
    401     register const intptr_t* data __asm__("a0") = args;
    402     asm volatile(
    403         "la $t9, SyscallAsm\n"
    404         "jalr $t9\n"
    405         " nop\n"
    406         : "=r"(ret), "=r"(err_stat)
    407         : "0"(ret),
    408           "r"(data)
    409           // a2 is in the clober list so inline assembly can not change its
    410           // value.
    411         : "memory", "ra", "t9", "a2");
    412   }
    413 
    414   // Set an error status so it can be used outside of this function
    415   *err_ret = err_stat;
    416 
    417   return ret;
    418 }
    419 #endif  // defined(__mips__)
    420 
    421 }  // namespace sandbox
    422