Home | History | Annotate | Download | only in seccomp-bpf
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "sandbox/linux/seccomp-bpf/syscall.h"
      6 
      7 #include <asm/unistd.h>
      8 #include <errno.h>
      9 
     10 #include "base/basictypes.h"
     11 #include "base/logging.h"
     12 #include "sandbox/linux/seccomp-bpf/linux_seccomp.h"
     13 
     14 namespace sandbox {
     15 
     16 namespace {
     17 
     18 #if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) || \
     19     defined(ARCH_CPU_MIPS_FAMILY)
     20 // Number that's not currently used by any Linux kernel ABIs.
     21 const int kInvalidSyscallNumber = 0x351d3;
     22 #else
     23 #error Unrecognized architecture
     24 #endif
     25 
     26 asm(// We need to be able to tell the kernel exactly where we made a
     27     // system call. The C++ compiler likes to sometimes clone or
     28     // inline code, which would inadvertently end up duplicating
     29     // the entry point.
     30     // "gcc" can suppress code duplication with suitable function
     31     // attributes, but "clang" doesn't have this ability.
     32     // The "clang" developer mailing list suggested that the correct
     33     // and portable solution is a file-scope assembly block.
     34     // N.B. We do mark our code as a proper function so that backtraces
     35     // work correctly. But we make absolutely no attempt to use the
     36     // ABI's calling conventions for passing arguments. We will only
     37     // ever be called from assembly code and thus can pick more
     38     // suitable calling conventions.
     39 #if defined(__i386__)
     40     ".text\n"
     41     ".align 16, 0x90\n"
     42     ".type SyscallAsm, @function\n"
     43     "SyscallAsm:.cfi_startproc\n"
     44     // Check if "%eax" is negative. If so, do not attempt to make a
     45     // system call. Instead, compute the return address that is visible
     46     // to the kernel after we execute "int $0x80". This address can be
     47     // used as a marker that BPF code inspects.
     48     "test %eax, %eax\n"
     49     "jge  1f\n"
     50     // Always, make sure that our code is position-independent, or
     51     // address space randomization might not work on i386. This means,
     52     // we can't use "lea", but instead have to rely on "call/pop".
     53     "call 0f;   .cfi_adjust_cfa_offset  4\n"
     54     "0:pop  %eax; .cfi_adjust_cfa_offset -4\n"
     55     "addl $2f-0b, %eax\n"
     56     "ret\n"
     57     // Save register that we don't want to clobber. On i386, we need to
     58     // save relatively aggressively, as there are a couple or registers
     59     // that are used internally (e.g. %ebx for position-independent
     60     // code, and %ebp for the frame pointer), and as we need to keep at
     61     // least a few registers available for the register allocator.
     62     "1:push %esi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset esi, 0\n"
     63     "push %edi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset edi, 0\n"
     64     "push %ebx; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebx, 0\n"
     65     "push %ebp; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebp, 0\n"
     66     // Copy entries from the array holding the arguments into the
     67     // correct CPU registers.
     68     "movl  0(%edi), %ebx\n"
     69     "movl  4(%edi), %ecx\n"
     70     "movl  8(%edi), %edx\n"
     71     "movl 12(%edi), %esi\n"
     72     "movl 20(%edi), %ebp\n"
     73     "movl 16(%edi), %edi\n"
     74     // Enter the kernel.
     75     "int  $0x80\n"
     76     // This is our "magic" return address that the BPF filter sees.
     77     "2:"
     78     // Restore any clobbered registers that we didn't declare to the
     79     // compiler.
     80     "pop  %ebp; .cfi_restore ebp; .cfi_adjust_cfa_offset -4\n"
     81     "pop  %ebx; .cfi_restore ebx; .cfi_adjust_cfa_offset -4\n"
     82     "pop  %edi; .cfi_restore edi; .cfi_adjust_cfa_offset -4\n"
     83     "pop  %esi; .cfi_restore esi; .cfi_adjust_cfa_offset -4\n"
     84     "ret\n"
     85     ".cfi_endproc\n"
     86     "9:.size SyscallAsm, 9b-SyscallAsm\n"
     87 #elif defined(__x86_64__)
     88     ".text\n"
     89     ".align 16, 0x90\n"
     90     ".type SyscallAsm, @function\n"
     91     "SyscallAsm:.cfi_startproc\n"
     92     // Check if "%rax" is negative. If so, do not attempt to make a
     93     // system call. Instead, compute the return address that is visible
     94     // to the kernel after we execute "syscall". This address can be
     95     // used as a marker that BPF code inspects.
     96     "test %rax, %rax\n"
     97     "jge  1f\n"
     98     // Always make sure that our code is position-independent, or the
     99     // linker will throw a hissy fit on x86-64.
    100     "call 0f;   .cfi_adjust_cfa_offset  8\n"
    101     "0:pop  %rax; .cfi_adjust_cfa_offset -8\n"
    102     "addq $2f-0b, %rax\n"
    103     "ret\n"
    104     // We declared all clobbered registers to the compiler. On x86-64,
    105     // there really isn't much of a problem with register pressure. So,
    106     // we can go ahead and directly copy the entries from the arguments
    107     // array into the appropriate CPU registers.
    108     "1:movq  0(%r12), %rdi\n"
    109     "movq  8(%r12), %rsi\n"
    110     "movq 16(%r12), %rdx\n"
    111     "movq 24(%r12), %r10\n"
    112     "movq 32(%r12), %r8\n"
    113     "movq 40(%r12), %r9\n"
    114     // Enter the kernel.
    115     "syscall\n"
    116     // This is our "magic" return address that the BPF filter sees.
    117     "2:ret\n"
    118     ".cfi_endproc\n"
    119     "9:.size SyscallAsm, 9b-SyscallAsm\n"
    120 #elif defined(__arm__)
    121     // Throughout this file, we use the same mode (ARM vs. thumb)
    122     // that the C++ compiler uses. This means, when transfering control
    123     // from C++ to assembly code, we do not need to switch modes (e.g.
    124     // by using the "bx" instruction). It also means that our assembly
    125     // code should not be invoked directly from code that lives in
    126     // other compilation units, as we don't bother implementing thumb
    127     // interworking. That's OK, as we don't make any of the assembly
    128     // symbols public. They are all local to this file.
    129     ".text\n"
    130     ".align 2\n"
    131     ".type SyscallAsm, %function\n"
    132 #if defined(__thumb__)
    133     ".thumb_func\n"
    134 #else
    135     ".arm\n"
    136 #endif
    137     "SyscallAsm:.fnstart\n"
    138     "@ args = 0, pretend = 0, frame = 8\n"
    139     "@ frame_needed = 1, uses_anonymous_args = 0\n"
    140 #if defined(__thumb__)
    141     ".cfi_startproc\n"
    142     "push {r7, lr}\n"
    143     ".cfi_offset 14, -4\n"
    144     ".cfi_offset  7, -8\n"
    145     "mov r7, sp\n"
    146     ".cfi_def_cfa_register 7\n"
    147     ".cfi_def_cfa_offset 8\n"
    148 #else
    149     "stmfd sp!, {fp, lr}\n"
    150     "add fp, sp, #4\n"
    151 #endif
    152     // Check if "r0" is negative. If so, do not attempt to make a
    153     // system call. Instead, compute the return address that is visible
    154     // to the kernel after we execute "swi 0". This address can be
    155     // used as a marker that BPF code inspects.
    156     "cmp r0, #0\n"
    157     "bge 1f\n"
    158     "adr r0, 2f\n"
    159     "b   2f\n"
    160     // We declared (almost) all clobbered registers to the compiler. On
    161     // ARM there is no particular register pressure. So, we can go
    162     // ahead and directly copy the entries from the arguments array
    163     // into the appropriate CPU registers.
    164     "1:ldr r5, [r6, #20]\n"
    165     "ldr r4, [r6, #16]\n"
    166     "ldr r3, [r6, #12]\n"
    167     "ldr r2, [r6, #8]\n"
    168     "ldr r1, [r6, #4]\n"
    169     "mov r7, r0\n"
    170     "ldr r0, [r6, #0]\n"
    171     // Enter the kernel
    172     "swi 0\n"
    173 // Restore the frame pointer. Also restore the program counter from
    174 // the link register; this makes us return to the caller.
    175 #if defined(__thumb__)
    176     "2:pop {r7, pc}\n"
    177     ".cfi_endproc\n"
    178 #else
    179     "2:ldmfd sp!, {fp, pc}\n"
    180 #endif
    181     ".fnend\n"
    182     "9:.size SyscallAsm, 9b-SyscallAsm\n"
    183 #elif defined(__mips__)
    184     ".text\n"
    185     ".align 4\n"
    186     ".type SyscallAsm, @function\n"
    187     "SyscallAsm:.ent SyscallAsm\n"
    188     ".frame  $sp, 40, $ra\n"
    189     ".set   push\n"
    190     ".set   noreorder\n"
    191     "addiu  $sp, $sp, -40\n"
    192     "sw     $ra, 36($sp)\n"
    193     // Check if "v0" is negative. If so, do not attempt to make a
    194     // system call. Instead, compute the return address that is visible
    195     // to the kernel after we execute "syscall". This address can be
    196     // used as a marker that BPF code inspects.
    197     "bgez   $v0, 1f\n"
    198     " nop\n"
    199     "la     $v0, 2f\n"
    200     "b      2f\n"
    201     " nop\n"
    202     // On MIPS first four arguments go to registers a0 - a3 and any
    203     // argument after that goes to stack. We can go ahead and directly
    204     // copy the entries from the arguments array into the appropriate
    205     // CPU registers and on the stack.
    206     "1:lw     $a3, 28($a0)\n"
    207     "lw     $a2, 24($a0)\n"
    208     "lw     $a1, 20($a0)\n"
    209     "lw     $t0, 16($a0)\n"
    210     "sw     $a3, 28($sp)\n"
    211     "sw     $a2, 24($sp)\n"
    212     "sw     $a1, 20($sp)\n"
    213     "sw     $t0, 16($sp)\n"
    214     "lw     $a3, 12($a0)\n"
    215     "lw     $a2, 8($a0)\n"
    216     "lw     $a1, 4($a0)\n"
    217     "lw     $a0, 0($a0)\n"
    218     // Enter the kernel
    219     "syscall\n"
    220     // This is our "magic" return address that the BPF filter sees.
    221     // Restore the return address from the stack.
    222     "2:lw     $ra, 36($sp)\n"
    223     "jr     $ra\n"
    224     " addiu  $sp, $sp, 40\n"
    225     ".set    pop\n"
    226     ".end    SyscallAsm\n"
    227     ".size   SyscallAsm,.-SyscallAsm\n"
    228 #elif defined(__aarch64__)
    229     ".text\n"
    230     ".align 2\n"
    231     ".type SyscallAsm, %function\n"
    232     "SyscallAsm:\n"
    233     ".cfi_startproc\n"
    234     "cmp x0, #0\n"
    235     "b.ge 1f\n"
    236     "adr x0,2f\n"
    237     "b 2f\n"
    238     "1:ldr x5, [x6, #40]\n"
    239     "ldr x4, [x6, #32]\n"
    240     "ldr x3, [x6, #24]\n"
    241     "ldr x2, [x6, #16]\n"
    242     "ldr x1, [x6, #8]\n"
    243     "mov x8, x0\n"
    244     "ldr x0, [x6, #0]\n"
    245     // Enter the kernel
    246     "svc 0\n"
    247     "2:ret\n"
    248     ".cfi_endproc\n"
    249     ".size SyscallAsm, .-SyscallAsm\n"
    250 #endif
    251     );  // asm
    252 
    253 }  // namespace
    254 
    255 intptr_t Syscall::InvalidCall() {
    256   // Explicitly pass eight zero arguments just in case.
    257   return Call(kInvalidSyscallNumber, 0, 0, 0, 0, 0, 0, 0, 0);
    258 }
    259 
    260 intptr_t Syscall::Call(int nr,
    261                        intptr_t p0,
    262                        intptr_t p1,
    263                        intptr_t p2,
    264                        intptr_t p3,
    265                        intptr_t p4,
    266                        intptr_t p5,
    267                        intptr_t p6,
    268                        intptr_t p7) {
    269   // We rely on "intptr_t" to be the exact size as a "void *". This is
    270   // typically true, but just in case, we add a check. The language
    271   // specification allows platforms some leeway in cases, where
    272   // "sizeof(void *)" is not the same as "sizeof(void (*)())". We expect
    273   // that this would only be an issue for IA64, which we are currently not
    274   // planning on supporting. And it is even possible that this would work
    275   // on IA64, but for lack of actual hardware, I cannot test.
    276   COMPILE_ASSERT(sizeof(void*) == sizeof(intptr_t),
    277                  pointer_types_and_intptr_must_be_exactly_the_same_size);
    278 
    279   // TODO(nedeljko): Enable use of more than six parameters on architectures
    280   //                 where that makes sense.
    281 #if defined(__mips__)
    282   const intptr_t args[8] = {p0, p1, p2, p3, p4, p5, p6, p7};
    283 #else
    284   DCHECK_EQ(p6, 0) << " Support for syscalls with more than six arguments not "
    285                       "added for this architecture";
    286   DCHECK_EQ(p7, 0) << " Support for syscalls with more than six arguments not "
    287                       "added for this architecture";
    288   const intptr_t args[6] = {p0, p1, p2, p3, p4, p5};
    289 #endif  // defined(__mips__)
    290 
    291 // Invoke our file-scope assembly code. The constraints have been picked
    292 // carefully to match what the rest of the assembly code expects in input,
    293 // output, and clobbered registers.
    294 #if defined(__i386__)
    295   intptr_t ret = nr;
    296   asm volatile(
    297       "call SyscallAsm\n"
    298       // N.B. These are not the calling conventions normally used by the ABI.
    299       : "=a"(ret)
    300       : "0"(ret), "D"(args)
    301       : "cc", "esp", "memory", "ecx", "edx");
    302 #elif defined(__x86_64__)
    303   intptr_t ret = nr;
    304   {
    305     register const intptr_t* data __asm__("r12") = args;
    306     asm volatile(
    307         "lea  -128(%%rsp), %%rsp\n"  // Avoid red zone.
    308         "call SyscallAsm\n"
    309         "lea  128(%%rsp), %%rsp\n"
    310         // N.B. These are not the calling conventions normally used by the ABI.
    311         : "=a"(ret)
    312         : "0"(ret), "r"(data)
    313         : "cc",
    314           "rsp",
    315           "memory",
    316           "rcx",
    317           "rdi",
    318           "rsi",
    319           "rdx",
    320           "r8",
    321           "r9",
    322           "r10",
    323           "r11");
    324   }
    325 #elif defined(__arm__)
    326   intptr_t ret;
    327   {
    328     register intptr_t inout __asm__("r0") = nr;
    329     register const intptr_t* data __asm__("r6") = args;
    330     asm volatile(
    331         "bl SyscallAsm\n"
    332         // N.B. These are not the calling conventions normally used by the ABI.
    333         : "=r"(inout)
    334         : "0"(inout), "r"(data)
    335         : "cc",
    336           "lr",
    337           "memory",
    338           "r1",
    339           "r2",
    340           "r3",
    341           "r4",
    342           "r5"
    343 #if !defined(__thumb__)
    344           // In thumb mode, we cannot use "r7" as a general purpose register, as
    345           // it is our frame pointer. We have to manually manage and preserve
    346           // it.
    347           // In ARM mode, we have a dedicated frame pointer register and "r7" is
    348           // thus available as a general purpose register. We don't preserve it,
    349           // but instead mark it as clobbered.
    350           ,
    351           "r7"
    352 #endif  // !defined(__thumb__)
    353         );
    354     ret = inout;
    355   }
    356 #elif defined(__mips__)
    357   int err_status;
    358   intptr_t ret = Syscall::SandboxSyscallRaw(nr, args, &err_status);
    359 
    360   if (err_status) {
    361     // On error, MIPS returns errno from syscall instead of -errno.
    362     // The purpose of this negation is for SandboxSyscall() to behave
    363     // more like it would on other architectures.
    364     ret = -ret;
    365   }
    366 #elif defined(__aarch64__)
    367   intptr_t ret;
    368   {
    369     register intptr_t inout __asm__("x0") = nr;
    370     register const intptr_t* data __asm__("x6") = args;
    371     asm volatile("bl SyscallAsm\n"
    372                  : "=r"(inout)
    373                  : "0"(inout), "r"(data)
    374                  : "memory", "x1", "x2", "x3", "x4", "x5", "x8", "x30");
    375     ret = inout;
    376   }
    377 
    378 #else
    379 #error "Unimplemented architecture"
    380 #endif
    381   return ret;
    382 }
    383 
    384 void Syscall::PutValueInUcontext(intptr_t ret_val, ucontext_t* ctx) {
    385 #if defined(__mips__)
    386   // Mips ABI states that on error a3 CPU register has non zero value and if
    387   // there is no error, it should be zero.
    388   if (ret_val <= -1 && ret_val >= -4095) {
    389     // |ret_val| followes the Syscall::Call() convention of being -errno on
    390     // errors. In order to write correct value to return register this sign
    391     // needs to be changed back.
    392     ret_val = -ret_val;
    393     SECCOMP_PARM4(ctx) = 1;
    394   } else
    395     SECCOMP_PARM4(ctx) = 0;
    396 #endif
    397   SECCOMP_RESULT(ctx) = static_cast<greg_t>(ret_val);
    398 }
    399 
    400 #if defined(__mips__)
    401 intptr_t Syscall::SandboxSyscallRaw(int nr,
    402                                     const intptr_t* args,
    403                                     intptr_t* err_ret) {
    404   register intptr_t ret __asm__("v0") = nr;
    405   // a3 register becomes non zero on error.
    406   register intptr_t err_stat __asm__("a3") = 0;
    407   {
    408     register const intptr_t* data __asm__("a0") = args;
    409     asm volatile(
    410         "la $t9, SyscallAsm\n"
    411         "jalr $t9\n"
    412         " nop\n"
    413         : "=r"(ret), "=r"(err_stat)
    414         : "0"(ret),
    415           "r"(data)
    416           // a2 is in the clober list so inline assembly can not change its
    417           // value.
    418         : "memory", "ra", "t9", "a2");
    419   }
    420 
    421   // Set an error status so it can be used outside of this function
    422   *err_ret = err_stat;
    423 
    424   return ret;
    425 }
    426 #endif  // defined(__mips__)
    427 
    428 }  // namespace sandbox
    429