Home | History | Annotate | Download | only in x86
      1 ; Tests basics and corner cases of x86-32 sandboxing, using -Om1 in
      2 ; the hope that the output will remain stable.  When packing bundles,
      3 ; we try to limit to a few instructions with well known sizes and
      4 ; minimal use of registers and stack slots in the lowering sequence.
      5 
      6 ; XFAIL: filtype=asm
      7 ; RUN: %p2i -i %s --sandbox --filetype=obj --disassemble --args -Om1 \
      8 ; RUN:   -allow-externally-defined-symbols \
      9 ; RUN:   -ffunction-sections | FileCheck %s
     10 
     11 ; RUN: %p2i -i %s --sandbox --filetype=obj --disassemble --target=x8664 \
     12 ; RUN:   --args -Om1 -allow-externally-defined-symbols  \
     13 ; RUN:   -ffunction-sections | FileCheck %s --check-prefix X8664
     14 
     15 declare void @call_target()
     16 @global_byte = internal global [1 x i8] zeroinitializer
     17 @global_short = internal global [2 x i8] zeroinitializer
     18 @global_int = internal global [4 x i8] zeroinitializer
     19 
     20 ; A direct call sequence uses the right mask and register-call sequence.
     21 define internal void @test_direct_call() {
     22 entry:
     23   call void @call_target()
     24   ret void
     25 }
     26 ; CHECK-LABEL: test_direct_call
     27 ; CHECK: nop
     28 ; CHECK: 1b: {{.*}} call 1c
     29 ; CHECK-NEXT: 20:
     30 ; X8664-LABEL: test_direct_call
     31 ; X8664: push {{.*}} R_X86_64_32S test_direct_call+{{.*}}20
     32 ; X8664: jmp {{.*}} call_target
     33 
     34 ; An indirect call sequence uses the right mask and register-call sequence.
     35 define internal void @test_indirect_call(i32 %target) {
     36 entry:
     37   %__1 = inttoptr i32 %target to void ()*
     38   call void %__1()
     39   ret void
     40 }
     41 ; CHECK-LABEL: test_indirect_call
     42 ; CHECK: mov [[REG:.*]],DWORD PTR [esp
     43 ; CHECK-NEXT: nop
     44 ; CHECK: 1b: {{.*}} and [[REG]],0xffffffe0
     45 ; CHECK-NEXT: call [[REG]]
     46 ; CHECk-NEXT: 20:
     47 ; X8664-LABEL: test_indirect_call
     48 ; X8664: push {{.*}} R_X86_64_32S test_indirect_call+{{.*}}20
     49 ; X8664: {{.*}} and e[[REG:..]],0xffffffe0
     50 ; X8664: add r[[REG]],r15
     51 ; X8664: jmp r[[REG]]
     52 
     53 ; A return sequence uses the right pop / mask / jmp sequence.
     54 define internal void @test_ret() {
     55 entry:
     56   ret void
     57 }
     58 ; CHECK-LABEL: test_ret
     59 ; CHECK: pop ecx
     60 ; CHECK-NEXT: and ecx,0xffffffe0
     61 ; CHECK-NEXT: jmp ecx
     62 ; X8664-LABEL: test_ret
     63 ; X8664: pop rcx
     64 ; X8664: and ecx,0xffffffe0
     65 ; X8664: add rcx,r15
     66 ; X8664: jmp rcx
     67 
     68 ; A perfectly packed bundle should not have nops at the end.
     69 define internal void @packed_bundle() {
     70 entry:
     71   call void @call_target()
     72   ; bundle boundary
     73   %addr_byte = bitcast [1 x i8]* @global_byte to i8*
     74   %addr_short = bitcast [2 x i8]* @global_short to i16*
     75   store i8 0, i8* %addr_byte, align 1      ; 7-byte instruction
     76   store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
     77   store i8 0, i8* %addr_byte, align 1      ; 7-byte instruction
     78   store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
     79   ; bundle boundary
     80   store i8 0, i8* %addr_byte, align 1      ; 7-byte instruction
     81   store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
     82   ret void
     83 }
     84 ; CHECK-LABEL: packed_bundle
     85 ; CHECK: call
     86 ; CHECK-NEXT: 20: {{.*}} mov BYTE PTR
     87 ; CHECK-NEXT: 27: {{.*}} mov WORD PTR
     88 ; CHECK-NEXT: 30: {{.*}} mov BYTE PTR
     89 ; CHECK-NEXT: 37: {{.*}} mov WORD PTR
     90 ; CHECK-NEXT: 40: {{.*}} mov BYTE PTR
     91 ; CHECK-NEXT: 47: {{.*}} mov WORD PTR
     92 
     93 ; An imperfectly packed bundle should have one or more nops at the end.
     94 define internal void @nonpacked_bundle() {
     95 entry:
     96   call void @call_target()
     97   ; bundle boundary
     98   %addr_short = bitcast [2 x i8]* @global_short to i16*
     99   store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
    100   store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
    101   store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
    102   ; nop padding
    103   ; bundle boundary
    104   store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
    105   ret void
    106 }
    107 ; CHECK-LABEL: nonpacked_bundle
    108 ; CHECK: call
    109 ; CHECK-NEXT: 20: {{.*}} mov WORD PTR
    110 ; CHECK-NEXT: 29: {{.*}} mov WORD PTR
    111 ; CHECK-NEXT: 32: {{.*}} mov WORD PTR
    112 ; CHECK-NEXT: 3b: {{.*}} nop
    113 ; CHECK: 40: {{.*}} mov WORD PTR
    114 
    115 ; A zero-byte instruction (e.g. local label definition) at a bundle
    116 ; boundary should not trigger nop padding.
    117 define internal void @label_at_boundary(i32 %arg, float %farg1, float %farg2) {
    118 entry:
    119   %argi8 = trunc i32 %arg to i8
    120   call void @call_target()
    121   ; bundle boundary
    122   %addr_short = bitcast [2 x i8]* @global_short to i16*
    123   %addr_int = bitcast [4 x i8]* @global_int to i32*
    124   store i32 0, i32* %addr_int, align 1           ; 10-byte instruction
    125   %blah = select i1 true, i8 %argi8, i8 %argi8   ; 22-byte lowering sequence
    126   ; label is here
    127   store i16 0, i16* %addr_short, align 1         ; 9-byte instruction
    128   ret void
    129 }
    130 ; CHECK-LABEL: label_at_boundary
    131 ; CHECK: call
    132 ; We rely on a particular 7-instruction 22-byte Om1 lowering sequence
    133 ; for select.
    134 ; CHECK-NEXT: 20: {{.*}} mov DWORD PTR
    135 ; CHECK-NEXT: 2a: {{.*}} mov {{.*}},0x1
    136 ; CHECK-NEXT: 2c: {{.*}} cmp {{.*}},0x0
    137 ; CHECK-NEXT: 2e: {{.*}} mov {{.*}},BYTE PTR
    138 ; CHECK-NEXT: 32: {{.*}} mov BYTE PTR
    139 ; CHECK-NEXT: 36: {{.*}} jne 40
    140 ; CHECK-NEXT: 38: {{.*}} mov {{.*}},BYTE PTR
    141 ; CHECK-NEXT: 3c: {{.*}} mov BYTE PTR
    142 ; CHECK-NEXT: 40: {{.*}} mov WORD PTR
    143 
    144 ; Bundle lock without padding.
    145 define internal void @bundle_lock_without_padding() {
    146 entry:
    147   %addr_short = bitcast [2 x i8]* @global_short to i16*
    148   store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
    149   ret void
    150 }
    151 ; CHECK-LABEL: bundle_lock_without_padding
    152 ; CHECK: mov WORD PTR
    153 ; CHECK-NEXT: pop ecx
    154 ; CHECK-NEXT: and ecx,0xffffffe0
    155 ; CHECK-NEXT: jmp ecx
    156 
    157 ; Bundle lock with padding.
    158 define internal void @bundle_lock_with_padding() {
    159 entry:
    160   call void @call_target()
    161   ; bundle boundary
    162   %addr_byte = bitcast [1 x i8]* @global_byte to i8*
    163   %addr_short = bitcast [2 x i8]* @global_short to i16*
    164   store i8 0, i8* %addr_byte, align 1      ; 7-byte instruction
    165   store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
    166   store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
    167   ret void
    168   ; 3 bytes to restore stack pointer
    169   ; 1 byte to pop ecx
    170   ; bundle_lock
    171   ; 3 bytes to mask ecx
    172   ; This is now 32 bytes from the beginning of the bundle, so
    173   ; a 3-byte nop will need to be emitted before the bundle_lock.
    174   ; 2 bytes to jump to ecx
    175   ; bundle_unlock
    176 }
    177 ; CHECK-LABEL: bundle_lock_with_padding
    178 ; CHECK: call
    179 ; CHECK-NEXT: 20: {{.*}} mov BYTE PTR
    180 ; CHECK-NEXT: 27: {{.*}} mov WORD PTR
    181 ; CHECK-NEXT: 30: {{.*}} mov WORD PTR
    182 ; CHECK-NEXT: 39: {{.*}} add esp,
    183 ; CHECK-NEXT: 3c: {{.*}} pop ecx
    184 ; CHECK-NEXT: 3d: {{.*}} nop
    185 ; CHECK-NEXT: 40: {{.*}} and ecx,0xffffffe0
    186 ; CHECK-NEXT: 43: {{.*}} jmp ecx
    187 
    188 ; Bundle lock align_to_end without any padding.
    189 define internal void @bundle_lock_align_to_end_padding_0() {
    190 entry:
    191   call void @call_target()
    192   ; bundle boundary
    193   %addr_short = bitcast [2 x i8]* @global_short to i16*
    194   store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
    195   store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
    196   store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
    197   call void @call_target()                 ; 5-byte instruction
    198   ret void
    199 }
    200 ; CHECK-LABEL: bundle_lock_align_to_end_padding_0
    201 ; CHECK: call
    202 ; CHECK-NEXT: 20: {{.*}} mov WORD PTR
    203 ; CHECK-NEXT: 29: {{.*}} mov WORD PTR
    204 ; CHECK-NEXT: 32: {{.*}} mov WORD PTR
    205 ; CHECK-NEXT: 3b: {{.*}} call
    206 
    207 ; Bundle lock align_to_end with one bunch of padding.
    208 define internal void @bundle_lock_align_to_end_padding_1() {
    209 entry:
    210   call void @call_target()
    211   ; bundle boundary
    212   %addr_byte = bitcast [1 x i8]* @global_byte to i8*
    213   store i8 0, i8* %addr_byte, align 1      ; 7-byte instruction
    214   store i8 0, i8* %addr_byte, align 1      ; 7-byte instruction
    215   store i8 0, i8* %addr_byte, align 1      ; 7-byte instruction
    216   call void @call_target()                 ; 5-byte instruction
    217   ret void
    218 }
    219 ; CHECK-LABEL: bundle_lock_align_to_end_padding_1
    220 ; CHECK: call
    221 ; CHECK-NEXT: 20: {{.*}} mov BYTE PTR
    222 ; CHECK-NEXT: 27: {{.*}} mov BYTE PTR
    223 ; CHECK-NEXT: 2e: {{.*}} mov BYTE PTR
    224 ; CHECK-NEXT: 35: {{.*}} nop
    225 ; CHECK: 3b: {{.*}} call
    226 
    227 ; Bundle lock align_to_end with two bunches of padding.
    228 define internal void @bundle_lock_align_to_end_padding_2(i32 %target) {
    229 entry:
    230   call void @call_target()
    231   ; bundle boundary
    232   %addr_byte = bitcast [1 x i8]* @global_byte to i8*
    233   %addr_short = bitcast [2 x i8]* @global_short to i16*
    234   %__1 = inttoptr i32 %target to void ()*
    235   store i8 0, i8* %addr_byte, align 1      ; 7-byte instruction
    236   store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
    237   store i16 0, i16* %addr_short, align 1   ; 9-byte instruction
    238   call void %__1()
    239   ; 4 bytes to load %target into a register
    240   ; bundle_lock align_to_end
    241   ; 3 bytes to mask the register
    242   ; This is now 32 bytes from the beginning of the bundle, so
    243   ; a 3-byte nop will need to be emitted before the bundle_lock,
    244   ; followed by a 27-byte nop before the mask/jump.
    245   ; 2 bytes to jump to the register
    246   ; bundle_unlock
    247   ret void
    248 }
    249 ; CHECK-LABEL: bundle_lock_align_to_end_padding_2
    250 ; CHECK: call
    251 ; CHECK-NEXT: 20: {{.*}} mov BYTE PTR
    252 ; CHECK-NEXT: 27: {{.*}} mov WORD PTR
    253 ; CHECK-NEXT: 30: {{.*}} mov WORD PTR
    254 ; CHECK-NEXT: 39: {{.*}} mov [[REG:.*]],DWORD PTR [esp
    255 ; CHECK-NEXT: 3d: {{.*}} nop
    256 ; CHECK: 40: {{.*}} nop
    257 ; CHECK: 5b: {{.*}} and [[REG]],0xffffffe0
    258 ; CHECK-NEXT: 5e: {{.*}} call [[REG]]
    259 
    260 ; Tests the pad_to_end bundle alignment with no padding bytes needed.
    261 define internal void @bundle_lock_pad_to_end_padding_0(i32 %arg0, i32 %arg1,
    262                                                        i32 %arg3, i32 %arg4,
    263                                                        i32 %arg5, i32 %arg6) {
    264   call void @call_target()
    265   ; bundle boundary
    266   %x = add i32 %arg5, %arg6  ; 12 bytes
    267   %y = trunc i32 %x to i16   ; 10 bytes
    268   call void @call_target()   ; 10 bytes
    269   ; bundle boundary
    270   ret void
    271 }
    272 ; X8664: 56: {{.*}} push {{.*}} R_X86_64_32S bundle_lock_pad_to_end_padding_0+{{.*}}60
    273 ; X8664: 5b: {{.*}} jmp {{.*}} call_target
    274 ; X8664: 60: {{.*}} add
    275 
    276 ; Tests the pad_to_end bundle alignment with 11 padding bytes needed, and some
    277 ; instructions before the call.
    278 define internal void @bundle_lock_pad_to_end_padding_11(i32 %arg0, i32 %arg1,
    279                                                         i32 %arg3, i32 %arg4,
    280                                                         i32 %arg5, i32 %arg6) {
    281   call void @call_target()
    282   ; bundle boundary
    283   %x = add i32 %arg5, %arg6  ; 11 bytes
    284   call void @call_target()   ; 10 bytes
    285                              ; 11 bytes of nop
    286   ; bundle boundary
    287   ret void
    288 }
    289 ; X8664: 4b: {{.*}} push {{.*}} R_X86_64_32S bundle_lock_pad_to_end_padding_11+{{.*}}60
    290 ; X8664: 50: {{.*}} jmp {{.*}} call_target
    291 ; X8664: 55: {{.*}} nop
    292 ; X8664: 5d: {{.*}} nop
    293 ; X8664: 60: {{.*}} add
    294 
    295 ; Tests the pad_to_end bundle alignment with 22 padding bytes needed, and no
    296 ; instructions before the call.
    297 define internal void @bundle_lock_pad_to_end_padding_22(i32 %arg0, i32 %arg1,
    298                                                         i32 %arg3, i32 %arg4,
    299                                                         i32 %arg5, i32 %arg6) {
    300   call void @call_target()
    301   ; bundle boundary
    302   call void @call_target()   ; 10 bytes
    303                              ; 22 bytes of nop
    304   ; bundle boundary
    305   ret void
    306 }
    307 ; X8664: 40: {{.*}} push {{.*}} R_X86_64_32S bundle_lock_pad_to_end_padding_22+{{.*}}60
    308 ; X8664: 45: {{.*}} jmp {{.*}} call_target
    309 ; X8664: 4a: {{.*}} nop
    310 ; X8664: 52: {{.*}} nop
    311 ; X8664: 5a: {{.*}} nop
    312 ; X8664: 60: {{.*}} add
    313 
    314 ; Stack adjustment state during an argument push sequence gets
    315 ; properly checkpointed and restored during the two passes, as
    316 ; observed by the stack adjustment for accessing stack-allocated
    317 ; variables.
    318 define internal void @checkpoint_restore_stack_adjustment(i32 %arg) {
    319 entry:
    320   call void @call_target()
    321   ; bundle boundary
    322   call void @checkpoint_restore_stack_adjustment(i32 %arg)
    323   ret void
    324 }
    325 ; CHECK-LABEL: checkpoint_restore_stack_adjustment
    326 ; CHECK: sub esp,0x1c
    327 ; CHECK: call
    328 ; The address of %arg should be [esp+0x20], not [esp+0x30].
    329 ; CHECK-NEXT: mov [[REG:.*]],DWORD PTR [esp+0x20]
    330 ; CHECK-NEXT: mov DWORD PTR [esp],[[REG]]
    331 ; CHECK: call
    332 ; CHECK: add esp,0x1c
    333