Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -x86-speculative-load-hardening | FileCheck %s --check-prefix=X64
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -x86-speculative-load-hardening -x86-speculative-load-hardening-lfence | FileCheck %s --check-prefix=X64-LFENCE
      4 ;
      5 ; FIXME: Add support for 32-bit and other EH ABIs.
      6 
      7 declare void @leak(i32 %v1, i32 %v2)
      8 
      9 declare void @sink(i32)
     10 
     11 define i32 @test_trivial_entry_load(i32* %ptr) {
     12 ; X64-LABEL: test_trivial_entry_load:
     13 ; X64:       # %bb.0: # %entry
     14 ; X64-NEXT:    movq %rsp, %rcx
     15 ; X64-NEXT:    movq $-1, %rax
     16 ; X64-NEXT:    sarq $63, %rcx
     17 ; X64-NEXT:    movl (%rdi), %eax
     18 ; X64-NEXT:    orl %ecx, %eax
     19 ; X64-NEXT:    shlq $47, %rcx
     20 ; X64-NEXT:    orq %rcx, %rsp
     21 ; X64-NEXT:    retq
     22 ;
     23 ; X64-LFENCE-LABEL: test_trivial_entry_load:
     24 ; X64-LFENCE:       # %bb.0: # %entry
     25 ; X64-LFENCE-NEXT:    movl (%rdi), %eax
     26 ; X64-LFENCE-NEXT:    retq
     27 entry:
     28   %v = load i32, i32* %ptr
     29   ret i32 %v
     30 }
     31 
     32 define void @test_basic_conditions(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %ptr2, i32** %ptr3) {
     33 ; X64-LABEL: test_basic_conditions:
     34 ; X64:       # %bb.0: # %entry
     35 ; X64-NEXT:    pushq %r15
     36 ; X64-NEXT:    .cfi_def_cfa_offset 16
     37 ; X64-NEXT:    pushq %r14
     38 ; X64-NEXT:    .cfi_def_cfa_offset 24
     39 ; X64-NEXT:    pushq %rbx
     40 ; X64-NEXT:    .cfi_def_cfa_offset 32
     41 ; X64-NEXT:    .cfi_offset %rbx, -32
     42 ; X64-NEXT:    .cfi_offset %r14, -24
     43 ; X64-NEXT:    .cfi_offset %r15, -16
     44 ; X64-NEXT:    movq %rsp, %rax
     45 ; X64-NEXT:    movq $-1, %rbx
     46 ; X64-NEXT:    sarq $63, %rax
     47 ; X64-NEXT:    testl %edi, %edi
     48 ; X64-NEXT:    jne .LBB1_1
     49 ; X64-NEXT:  # %bb.2: # %then1
     50 ; X64-NEXT:    cmovneq %rbx, %rax
     51 ; X64-NEXT:    testl %esi, %esi
     52 ; X64-NEXT:    je .LBB1_4
     53 ; X64-NEXT:  .LBB1_1:
     54 ; X64-NEXT:    cmoveq %rbx, %rax
     55 ; X64-NEXT:  .LBB1_8: # %exit
     56 ; X64-NEXT:    shlq $47, %rax
     57 ; X64-NEXT:    orq %rax, %rsp
     58 ; X64-NEXT:    popq %rbx
     59 ; X64-NEXT:    .cfi_def_cfa_offset 24
     60 ; X64-NEXT:    popq %r14
     61 ; X64-NEXT:    .cfi_def_cfa_offset 16
     62 ; X64-NEXT:    popq %r15
     63 ; X64-NEXT:    .cfi_def_cfa_offset 8
     64 ; X64-NEXT:    retq
     65 ; X64-NEXT:  .LBB1_4: # %then2
     66 ; X64-NEXT:    .cfi_def_cfa_offset 32
     67 ; X64-NEXT:    movq %r8, %r15
     68 ; X64-NEXT:    cmovneq %rbx, %rax
     69 ; X64-NEXT:    testl %edx, %edx
     70 ; X64-NEXT:    je .LBB1_6
     71 ; X64-NEXT:  # %bb.5: # %else3
     72 ; X64-NEXT:    cmoveq %rbx, %rax
     73 ; X64-NEXT:    movslq (%r9), %rcx
     74 ; X64-NEXT:    orq %rax, %rcx
     75 ; X64-NEXT:    leaq (%r15,%rcx,4), %r14
     76 ; X64-NEXT:    movl %ecx, (%r15,%rcx,4)
     77 ; X64-NEXT:    jmp .LBB1_7
     78 ; X64-NEXT:  .LBB1_6: # %then3
     79 ; X64-NEXT:    cmovneq %rbx, %rax
     80 ; X64-NEXT:    movl (%rcx), %ecx
     81 ; X64-NEXT:    addl (%r15), %ecx
     82 ; X64-NEXT:    movslq %ecx, %rdi
     83 ; X64-NEXT:    orq %rax, %rdi
     84 ; X64-NEXT:    movl (%r15,%rdi,4), %esi
     85 ; X64-NEXT:    orl %eax, %esi
     86 ; X64-NEXT:    movq (%r9), %r14
     87 ; X64-NEXT:    orq %rax, %r14
     88 ; X64-NEXT:    addl (%r14), %esi
     89 ; X64-NEXT:    shlq $47, %rax
     90 ; X64-NEXT:    # kill: def $edi killed $edi killed $rdi
     91 ; X64-NEXT:    orq %rax, %rsp
     92 ; X64-NEXT:    callq leak
     93 ; X64-NEXT:    movq %rsp, %rax
     94 ; X64-NEXT:    sarq $63, %rax
     95 ; X64-NEXT:  .LBB1_7: # %merge
     96 ; X64-NEXT:    movslq (%r14), %rcx
     97 ; X64-NEXT:    orq %rax, %rcx
     98 ; X64-NEXT:    movl $0, (%r15,%rcx,4)
     99 ; X64-NEXT:    jmp .LBB1_8
    100 ;
    101 ; X64-LFENCE-LABEL: test_basic_conditions:
    102 ; X64-LFENCE:       # %bb.0: # %entry
    103 ; X64-LFENCE-NEXT:    pushq %r14
    104 ; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 16
    105 ; X64-LFENCE-NEXT:    pushq %rbx
    106 ; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 24
    107 ; X64-LFENCE-NEXT:    pushq %rax
    108 ; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 32
    109 ; X64-LFENCE-NEXT:    .cfi_offset %rbx, -24
    110 ; X64-LFENCE-NEXT:    .cfi_offset %r14, -16
    111 ; X64-LFENCE-NEXT:    testl %edi, %edi
    112 ; X64-LFENCE-NEXT:    jne .LBB1_6
    113 ; X64-LFENCE-NEXT:  # %bb.1: # %then1
    114 ; X64-LFENCE-NEXT:    lfence
    115 ; X64-LFENCE-NEXT:    testl %esi, %esi
    116 ; X64-LFENCE-NEXT:    jne .LBB1_6
    117 ; X64-LFENCE-NEXT:  # %bb.2: # %then2
    118 ; X64-LFENCE-NEXT:    movq %r8, %rbx
    119 ; X64-LFENCE-NEXT:    lfence
    120 ; X64-LFENCE-NEXT:    testl %edx, %edx
    121 ; X64-LFENCE-NEXT:    je .LBB1_3
    122 ; X64-LFENCE-NEXT:  # %bb.4: # %else3
    123 ; X64-LFENCE-NEXT:    lfence
    124 ; X64-LFENCE-NEXT:    movslq (%r9), %rax
    125 ; X64-LFENCE-NEXT:    leaq (%rbx,%rax,4), %r14
    126 ; X64-LFENCE-NEXT:    movl %eax, (%rbx,%rax,4)
    127 ; X64-LFENCE-NEXT:    jmp .LBB1_5
    128 ; X64-LFENCE-NEXT:  .LBB1_3: # %then3
    129 ; X64-LFENCE-NEXT:    lfence
    130 ; X64-LFENCE-NEXT:    movl (%rcx), %eax
    131 ; X64-LFENCE-NEXT:    addl (%rbx), %eax
    132 ; X64-LFENCE-NEXT:    movslq %eax, %rdi
    133 ; X64-LFENCE-NEXT:    movl (%rbx,%rdi,4), %esi
    134 ; X64-LFENCE-NEXT:    movq (%r9), %r14
    135 ; X64-LFENCE-NEXT:    addl (%r14), %esi
    136 ; X64-LFENCE-NEXT:    # kill: def $edi killed $edi killed $rdi
    137 ; X64-LFENCE-NEXT:    callq leak
    138 ; X64-LFENCE-NEXT:  .LBB1_5: # %merge
    139 ; X64-LFENCE-NEXT:    movslq (%r14), %rax
    140 ; X64-LFENCE-NEXT:    movl $0, (%rbx,%rax,4)
    141 ; X64-LFENCE-NEXT:  .LBB1_6: # %exit
    142 ; X64-LFENCE-NEXT:    lfence
    143 ; X64-LFENCE-NEXT:    addq $8, %rsp
    144 ; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 24
    145 ; X64-LFENCE-NEXT:    popq %rbx
    146 ; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 16
    147 ; X64-LFENCE-NEXT:    popq %r14
    148 ; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 8
    149 ; X64-LFENCE-NEXT:    retq
    150 entry:
    151   %a.cmp = icmp eq i32 %a, 0
    152   br i1 %a.cmp, label %then1, label %exit
    153 
    154 then1:
    155   %b.cmp = icmp eq i32 %b, 0
    156   br i1 %b.cmp, label %then2, label %exit
    157 
    158 then2:
    159   %c.cmp = icmp eq i32 %c, 0
    160   br i1 %c.cmp, label %then3, label %else3
    161 
    162 then3:
    163   %secret1 = load i32, i32* %ptr1
    164   %secret2 = load i32, i32* %ptr2
    165   %secret.sum1 = add i32 %secret1, %secret2
    166   %ptr2.idx = getelementptr i32, i32* %ptr2, i32 %secret.sum1
    167   %secret3 = load i32, i32* %ptr2.idx
    168   %secret4 = load i32*, i32** %ptr3
    169   %secret5 = load i32, i32* %secret4
    170   %secret.sum2 = add i32 %secret3, %secret5
    171   call void @leak(i32 %secret.sum1, i32 %secret.sum2)
    172   br label %merge
    173 
    174 else3:
    175   %secret6 = load i32*, i32** %ptr3
    176   %cast = ptrtoint i32* %secret6 to i32
    177   %ptr2.idx2 = getelementptr i32, i32* %ptr2, i32 %cast
    178   store i32 %cast, i32* %ptr2.idx2
    179   br label %merge
    180 
    181 merge:
    182   %phi = phi i32* [ %secret4, %then3 ], [ %ptr2.idx2, %else3 ]
    183   %secret7 = load i32, i32* %phi
    184   %ptr2.idx3 = getelementptr i32, i32* %ptr2, i32 %secret7
    185   store i32 0, i32* %ptr2.idx3
    186   br label %exit
    187 
    188 exit:
    189   ret void
    190 }
    191 
    192 define void @test_basic_loop(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) nounwind {
    193 ; X64-LABEL: test_basic_loop:
    194 ; X64:       # %bb.0: # %entry
    195 ; X64-NEXT:    pushq %rbp
    196 ; X64-NEXT:    pushq %r15
    197 ; X64-NEXT:    pushq %r14
    198 ; X64-NEXT:    pushq %r12
    199 ; X64-NEXT:    pushq %rbx
    200 ; X64-NEXT:    movq %rsp, %rax
    201 ; X64-NEXT:    movq $-1, %r15
    202 ; X64-NEXT:    sarq $63, %rax
    203 ; X64-NEXT:    testl %edi, %edi
    204 ; X64-NEXT:    je .LBB2_2
    205 ; X64-NEXT:  # %bb.1:
    206 ; X64-NEXT:    cmoveq %r15, %rax
    207 ; X64-NEXT:    jmp .LBB2_5
    208 ; X64-NEXT:  .LBB2_2: # %l.header.preheader
    209 ; X64-NEXT:    movq %rcx, %r14
    210 ; X64-NEXT:    movq %rdx, %r12
    211 ; X64-NEXT:    movl %esi, %ebp
    212 ; X64-NEXT:    cmovneq %r15, %rax
    213 ; X64-NEXT:    xorl %ebx, %ebx
    214 ; X64-NEXT:    jmp .LBB2_3
    215 ; X64-NEXT:    .p2align 4, 0x90
    216 ; X64-NEXT:  .LBB2_6: # in Loop: Header=BB2_3 Depth=1
    217 ; X64-NEXT:    cmovgeq %r15, %rax
    218 ; X64-NEXT:  .LBB2_3: # %l.header
    219 ; X64-NEXT:    # =>This Inner Loop Header: Depth=1
    220 ; X64-NEXT:    movslq (%r12), %rcx
    221 ; X64-NEXT:    orq %rax, %rcx
    222 ; X64-NEXT:    movq %rax, %rdx
    223 ; X64-NEXT:    orq %r14, %rdx
    224 ; X64-NEXT:    movl (%rdx,%rcx,4), %edi
    225 ; X64-NEXT:    shlq $47, %rax
    226 ; X64-NEXT:    orq %rax, %rsp
    227 ; X64-NEXT:    callq sink
    228 ; X64-NEXT:    movq %rsp, %rax
    229 ; X64-NEXT:    sarq $63, %rax
    230 ; X64-NEXT:    incl %ebx
    231 ; X64-NEXT:    cmpl %ebp, %ebx
    232 ; X64-NEXT:    jl .LBB2_6
    233 ; X64-NEXT:  # %bb.4:
    234 ; X64-NEXT:    cmovlq %r15, %rax
    235 ; X64-NEXT:  .LBB2_5: # %exit
    236 ; X64-NEXT:    shlq $47, %rax
    237 ; X64-NEXT:    orq %rax, %rsp
    238 ; X64-NEXT:    popq %rbx
    239 ; X64-NEXT:    popq %r12
    240 ; X64-NEXT:    popq %r14
    241 ; X64-NEXT:    popq %r15
    242 ; X64-NEXT:    popq %rbp
    243 ; X64-NEXT:    retq
    244 ;
    245 ; X64-LFENCE-LABEL: test_basic_loop:
    246 ; X64-LFENCE:       # %bb.0: # %entry
    247 ; X64-LFENCE-NEXT:    pushq %rbp
    248 ; X64-LFENCE-NEXT:    pushq %r15
    249 ; X64-LFENCE-NEXT:    pushq %r14
    250 ; X64-LFENCE-NEXT:    pushq %rbx
    251 ; X64-LFENCE-NEXT:    pushq %rax
    252 ; X64-LFENCE-NEXT:    testl %edi, %edi
    253 ; X64-LFENCE-NEXT:    jne .LBB2_3
    254 ; X64-LFENCE-NEXT:  # %bb.1: # %l.header.preheader
    255 ; X64-LFENCE-NEXT:    movq %rcx, %r14
    256 ; X64-LFENCE-NEXT:    movq %rdx, %r15
    257 ; X64-LFENCE-NEXT:    movl %esi, %ebp
    258 ; X64-LFENCE-NEXT:    lfence
    259 ; X64-LFENCE-NEXT:    xorl %ebx, %ebx
    260 ; X64-LFENCE-NEXT:    .p2align 4, 0x90
    261 ; X64-LFENCE-NEXT:  .LBB2_2: # %l.header
    262 ; X64-LFENCE-NEXT:    # =>This Inner Loop Header: Depth=1
    263 ; X64-LFENCE-NEXT:    lfence
    264 ; X64-LFENCE-NEXT:    movslq (%r15), %rax
    265 ; X64-LFENCE-NEXT:    movl (%r14,%rax,4), %edi
    266 ; X64-LFENCE-NEXT:    callq sink
    267 ; X64-LFENCE-NEXT:    incl %ebx
    268 ; X64-LFENCE-NEXT:    cmpl %ebp, %ebx
    269 ; X64-LFENCE-NEXT:    jl .LBB2_2
    270 ; X64-LFENCE-NEXT:  .LBB2_3: # %exit
    271 ; X64-LFENCE-NEXT:    lfence
    272 ; X64-LFENCE-NEXT:    addq $8, %rsp
    273 ; X64-LFENCE-NEXT:    popq %rbx
    274 ; X64-LFENCE-NEXT:    popq %r14
    275 ; X64-LFENCE-NEXT:    popq %r15
    276 ; X64-LFENCE-NEXT:    popq %rbp
    277 ; X64-LFENCE-NEXT:    retq
    278 entry:
    279   %a.cmp = icmp eq i32 %a, 0
    280   br i1 %a.cmp, label %l.header, label %exit
    281 
    282 l.header:
    283   %i = phi i32 [ 0, %entry ], [ %i.next, %l.header ]
    284   %secret = load i32, i32* %ptr1
    285   %ptr2.idx = getelementptr i32, i32* %ptr2, i32 %secret
    286   %leak = load i32, i32* %ptr2.idx
    287   call void @sink(i32 %leak)
    288   %i.next = add i32 %i, 1
    289   %i.cmp = icmp slt i32 %i.next, %b
    290   br i1 %i.cmp, label %l.header, label %exit
    291 
    292 exit:
    293   ret void
    294 }
    295 
    296 define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %ptr2) nounwind {
    297 ; X64-LABEL: test_basic_nested_loop:
    298 ; X64:       # %bb.0: # %entry
    299 ; X64-NEXT:    pushq %rbp
    300 ; X64-NEXT:    pushq %r15
    301 ; X64-NEXT:    pushq %r14
    302 ; X64-NEXT:    pushq %r13
    303 ; X64-NEXT:    pushq %r12
    304 ; X64-NEXT:    pushq %rbx
    305 ; X64-NEXT:    pushq %rax
    306 ; X64-NEXT:    movq %rsp, %rax
    307 ; X64-NEXT:    movq $-1, %r12
    308 ; X64-NEXT:    sarq $63, %rax
    309 ; X64-NEXT:    testl %edi, %edi
    310 ; X64-NEXT:    je .LBB3_2
    311 ; X64-NEXT:  # %bb.1:
    312 ; X64-NEXT:    cmoveq %r12, %rax
    313 ; X64-NEXT:    jmp .LBB3_10
    314 ; X64-NEXT:  .LBB3_2: # %l1.header.preheader
    315 ; X64-NEXT:    movq %r8, %r14
    316 ; X64-NEXT:    movq %rcx, %rbx
    317 ; X64-NEXT:    movl %edx, %ebp
    318 ; X64-NEXT:    movl %esi, %r15d
    319 ; X64-NEXT:    cmovneq %r12, %rax
    320 ; X64-NEXT:    xorl %r13d, %r13d
    321 ; X64-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
    322 ; X64-NEXT:    testl %r15d, %r15d
    323 ; X64-NEXT:    jg .LBB3_5
    324 ; X64-NEXT:    jmp .LBB3_4
    325 ; X64-NEXT:    .p2align 4, 0x90
    326 ; X64-NEXT:  .LBB3_12:
    327 ; X64-NEXT:    cmovgeq %r12, %rax
    328 ; X64-NEXT:    testl %r15d, %r15d
    329 ; X64-NEXT:    jle .LBB3_4
    330 ; X64-NEXT:  .LBB3_5: # %l2.header.preheader
    331 ; X64-NEXT:    cmovleq %r12, %rax
    332 ; X64-NEXT:    xorl %r15d, %r15d
    333 ; X64-NEXT:    jmp .LBB3_6
    334 ; X64-NEXT:    .p2align 4, 0x90
    335 ; X64-NEXT:  .LBB3_11: # in Loop: Header=BB3_6 Depth=1
    336 ; X64-NEXT:    cmovgeq %r12, %rax
    337 ; X64-NEXT:  .LBB3_6: # %l2.header
    338 ; X64-NEXT:    # =>This Inner Loop Header: Depth=1
    339 ; X64-NEXT:    movslq (%rbx), %rcx
    340 ; X64-NEXT:    orq %rax, %rcx
    341 ; X64-NEXT:    movq %rax, %rdx
    342 ; X64-NEXT:    orq %r14, %rdx
    343 ; X64-NEXT:    movl (%rdx,%rcx,4), %edi
    344 ; X64-NEXT:    shlq $47, %rax
    345 ; X64-NEXT:    orq %rax, %rsp
    346 ; X64-NEXT:    callq sink
    347 ; X64-NEXT:    movq %rsp, %rax
    348 ; X64-NEXT:    sarq $63, %rax
    349 ; X64-NEXT:    incl %r15d
    350 ; X64-NEXT:    cmpl %ebp, %r15d
    351 ; X64-NEXT:    jl .LBB3_11
    352 ; X64-NEXT:  # %bb.7:
    353 ; X64-NEXT:    cmovlq %r12, %rax
    354 ; X64-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Reload
    355 ; X64-NEXT:    jmp .LBB3_8
    356 ; X64-NEXT:    .p2align 4, 0x90
    357 ; X64-NEXT:  .LBB3_4:
    358 ; X64-NEXT:    cmovgq %r12, %rax
    359 ; X64-NEXT:  .LBB3_8: # %l1.latch
    360 ; X64-NEXT:    movslq (%rbx), %rcx
    361 ; X64-NEXT:    orq %rax, %rcx
    362 ; X64-NEXT:    movq %rax, %rdx
    363 ; X64-NEXT:    orq %r14, %rdx
    364 ; X64-NEXT:    movl (%rdx,%rcx,4), %edi
    365 ; X64-NEXT:    shlq $47, %rax
    366 ; X64-NEXT:    orq %rax, %rsp
    367 ; X64-NEXT:    callq sink
    368 ; X64-NEXT:    movq %rsp, %rax
    369 ; X64-NEXT:    sarq $63, %rax
    370 ; X64-NEXT:    incl %r13d
    371 ; X64-NEXT:    cmpl %r15d, %r13d
    372 ; X64-NEXT:    jl .LBB3_12
    373 ; X64-NEXT:  # %bb.9:
    374 ; X64-NEXT:    cmovlq %r12, %rax
    375 ; X64-NEXT:  .LBB3_10: # %exit
    376 ; X64-NEXT:    shlq $47, %rax
    377 ; X64-NEXT:    orq %rax, %rsp
    378 ; X64-NEXT:    addq $8, %rsp
    379 ; X64-NEXT:    popq %rbx
    380 ; X64-NEXT:    popq %r12
    381 ; X64-NEXT:    popq %r13
    382 ; X64-NEXT:    popq %r14
    383 ; X64-NEXT:    popq %r15
    384 ; X64-NEXT:    popq %rbp
    385 ; X64-NEXT:    retq
    386 ;
    387 ; X64-LFENCE-LABEL: test_basic_nested_loop:
    388 ; X64-LFENCE:       # %bb.0: # %entry
    389 ; X64-LFENCE-NEXT:    pushq %rbp
    390 ; X64-LFENCE-NEXT:    pushq %r15
    391 ; X64-LFENCE-NEXT:    pushq %r14
    392 ; X64-LFENCE-NEXT:    pushq %r13
    393 ; X64-LFENCE-NEXT:    pushq %r12
    394 ; X64-LFENCE-NEXT:    pushq %rbx
    395 ; X64-LFENCE-NEXT:    pushq %rax
    396 ; X64-LFENCE-NEXT:    testl %edi, %edi
    397 ; X64-LFENCE-NEXT:    jne .LBB3_6
    398 ; X64-LFENCE-NEXT:  # %bb.1: # %l1.header.preheader
    399 ; X64-LFENCE-NEXT:    movq %r8, %r14
    400 ; X64-LFENCE-NEXT:    movq %rcx, %rbx
    401 ; X64-LFENCE-NEXT:    movl %edx, %r13d
    402 ; X64-LFENCE-NEXT:    movl %esi, %r15d
    403 ; X64-LFENCE-NEXT:    lfence
    404 ; X64-LFENCE-NEXT:    xorl %r12d, %r12d
    405 ; X64-LFENCE-NEXT:    .p2align 4, 0x90
    406 ; X64-LFENCE-NEXT:  .LBB3_2: # %l1.header
    407 ; X64-LFENCE-NEXT:    # =>This Loop Header: Depth=1
    408 ; X64-LFENCE-NEXT:    # Child Loop BB3_4 Depth 2
    409 ; X64-LFENCE-NEXT:    lfence
    410 ; X64-LFENCE-NEXT:    testl %r15d, %r15d
    411 ; X64-LFENCE-NEXT:    jle .LBB3_5
    412 ; X64-LFENCE-NEXT:  # %bb.3: # %l2.header.preheader
    413 ; X64-LFENCE-NEXT:    # in Loop: Header=BB3_2 Depth=1
    414 ; X64-LFENCE-NEXT:    lfence
    415 ; X64-LFENCE-NEXT:    xorl %ebp, %ebp
    416 ; X64-LFENCE-NEXT:    .p2align 4, 0x90
    417 ; X64-LFENCE-NEXT:  .LBB3_4: # %l2.header
    418 ; X64-LFENCE-NEXT:    # Parent Loop BB3_2 Depth=1
    419 ; X64-LFENCE-NEXT:    # => This Inner Loop Header: Depth=2
    420 ; X64-LFENCE-NEXT:    lfence
    421 ; X64-LFENCE-NEXT:    movslq (%rbx), %rax
    422 ; X64-LFENCE-NEXT:    movl (%r14,%rax,4), %edi
    423 ; X64-LFENCE-NEXT:    callq sink
    424 ; X64-LFENCE-NEXT:    incl %ebp
    425 ; X64-LFENCE-NEXT:    cmpl %r13d, %ebp
    426 ; X64-LFENCE-NEXT:    jl .LBB3_4
    427 ; X64-LFENCE-NEXT:  .LBB3_5: # %l1.latch
    428 ; X64-LFENCE-NEXT:    # in Loop: Header=BB3_2 Depth=1
    429 ; X64-LFENCE-NEXT:    lfence
    430 ; X64-LFENCE-NEXT:    movslq (%rbx), %rax
    431 ; X64-LFENCE-NEXT:    movl (%r14,%rax,4), %edi
    432 ; X64-LFENCE-NEXT:    callq sink
    433 ; X64-LFENCE-NEXT:    incl %r12d
    434 ; X64-LFENCE-NEXT:    cmpl %r15d, %r12d
    435 ; X64-LFENCE-NEXT:    jl .LBB3_2
    436 ; X64-LFENCE-NEXT:  .LBB3_6: # %exit
    437 ; X64-LFENCE-NEXT:    lfence
    438 ; X64-LFENCE-NEXT:    addq $8, %rsp
    439 ; X64-LFENCE-NEXT:    popq %rbx
    440 ; X64-LFENCE-NEXT:    popq %r12
    441 ; X64-LFENCE-NEXT:    popq %r13
    442 ; X64-LFENCE-NEXT:    popq %r14
    443 ; X64-LFENCE-NEXT:    popq %r15
    444 ; X64-LFENCE-NEXT:    popq %rbp
    445 ; X64-LFENCE-NEXT:    retq
    446 entry:
    447   %a.cmp = icmp eq i32 %a, 0
    448   br i1 %a.cmp, label %l1.header, label %exit
    449 
    450 l1.header:
    451   %i = phi i32 [ 0, %entry ], [ %i.next, %l1.latch ]
    452   %b.cmp = icmp sgt i32 %b, 0
    453   br i1 %b.cmp, label %l2.header, label %l1.latch
    454 
    455 l2.header:
    456   %j = phi i32 [ 0, %l1.header ], [ %j.next, %l2.header ]
    457   %secret = load i32, i32* %ptr1
    458   %ptr2.idx = getelementptr i32, i32* %ptr2, i32 %secret
    459   %leak = load i32, i32* %ptr2.idx
    460   call void @sink(i32 %leak)
    461   %j.next = add i32 %j, 1
    462   %j.cmp = icmp slt i32 %j.next, %c
    463   br i1 %j.cmp, label %l2.header, label %l1.latch
    464 
    465 l1.latch:
    466   %secret2 = load i32, i32* %ptr1
    467   %ptr2.idx2 = getelementptr i32, i32* %ptr2, i32 %secret2
    468   %leak2 = load i32, i32* %ptr2.idx2
    469   call void @sink(i32 %leak2)
    470   %i.next = add i32 %i, 1
    471   %i.cmp = icmp slt i32 %i.next, %b
    472   br i1 %i.cmp, label %l1.header, label %exit
    473 
    474 exit:
    475   ret void
    476 }
    477 
    478 declare i32 @__gxx_personality_v0(...)
    479 
    480 declare i8* @__cxa_allocate_exception(i64) local_unnamed_addr
    481 
    482 declare void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr
    483 
    484 define void @test_basic_eh(i32 %a, i32* %ptr1, i32* %ptr2) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
    485 ; X64-LABEL: test_basic_eh:
    486 ; X64:       # %bb.0: # %entry
    487 ; X64-NEXT:    pushq %rbp
    488 ; X64-NEXT:    .cfi_def_cfa_offset 16
    489 ; X64-NEXT:    pushq %r14
    490 ; X64-NEXT:    .cfi_def_cfa_offset 24
    491 ; X64-NEXT:    pushq %rbx
    492 ; X64-NEXT:    .cfi_def_cfa_offset 32
    493 ; X64-NEXT:    .cfi_offset %rbx, -32
    494 ; X64-NEXT:    .cfi_offset %r14, -24
    495 ; X64-NEXT:    .cfi_offset %rbp, -16
    496 ; X64-NEXT:    movq %rsp, %rax
    497 ; X64-NEXT:    movq $-1, %rcx
    498 ; X64-NEXT:    sarq $63, %rax
    499 ; X64-NEXT:    cmpl $41, %edi
    500 ; X64-NEXT:    jg .LBB4_1
    501 ; X64-NEXT:  # %bb.2: # %thrower
    502 ; X64-NEXT:    movq %rdx, %r14
    503 ; X64-NEXT:    movq %rsi, %rbx
    504 ; X64-NEXT:    cmovgq %rcx, %rax
    505 ; X64-NEXT:    movslq %edi, %rcx
    506 ; X64-NEXT:    movl (%rsi,%rcx,4), %ebp
    507 ; X64-NEXT:    orl %eax, %ebp
    508 ; X64-NEXT:    movl $4, %edi
    509 ; X64-NEXT:    shlq $47, %rax
    510 ; X64-NEXT:    orq %rax, %rsp
    511 ; X64-NEXT:    callq __cxa_allocate_exception
    512 ; X64-NEXT:    movq %rsp, %rcx
    513 ; X64-NEXT:    sarq $63, %rcx
    514 ; X64-NEXT:    movl %ebp, (%rax)
    515 ; X64-NEXT:  .Ltmp0:
    516 ; X64-NEXT:    xorl %esi, %esi
    517 ; X64-NEXT:    xorl %edx, %edx
    518 ; X64-NEXT:    shlq $47, %rcx
    519 ; X64-NEXT:    movq %rax, %rdi
    520 ; X64-NEXT:    orq %rcx, %rsp
    521 ; X64-NEXT:    callq __cxa_throw
    522 ; X64-NEXT:    movq %rsp, %rax
    523 ; X64-NEXT:    sarq $63, %rax
    524 ; X64-NEXT:  .Ltmp1:
    525 ; X64-NEXT:    jmp .LBB4_3
    526 ; X64-NEXT:  .LBB4_1:
    527 ; X64-NEXT:    cmovleq %rcx, %rax
    528 ; X64-NEXT:  .LBB4_3: # %exit
    529 ; X64-NEXT:    shlq $47, %rax
    530 ; X64-NEXT:    orq %rax, %rsp
    531 ; X64-NEXT:    popq %rbx
    532 ; X64-NEXT:    .cfi_def_cfa_offset 24
    533 ; X64-NEXT:    popq %r14
    534 ; X64-NEXT:    .cfi_def_cfa_offset 16
    535 ; X64-NEXT:    popq %rbp
    536 ; X64-NEXT:    .cfi_def_cfa_offset 8
    537 ; X64-NEXT:    retq
    538 ; X64-NEXT:  .LBB4_4: # %lpad
    539 ; X64-NEXT:    .cfi_def_cfa_offset 32
    540 ; X64-NEXT:  .Ltmp2:
    541 ; X64-NEXT:    movq %rsp, %rcx
    542 ; X64-NEXT:    sarq $63, %rcx
    543 ; X64-NEXT:    movl (%rax), %eax
    544 ; X64-NEXT:    addl (%rbx), %eax
    545 ; X64-NEXT:    cltq
    546 ; X64-NEXT:    orq %rcx, %rax
    547 ; X64-NEXT:    movl (%r14,%rax,4), %edi
    548 ; X64-NEXT:    orl %ecx, %edi
    549 ; X64-NEXT:    shlq $47, %rcx
    550 ; X64-NEXT:    orq %rcx, %rsp
    551 ; X64-NEXT:    callq sink
    552 ; X64-NEXT:    movq %rsp, %rax
    553 ; X64-NEXT:    sarq $63, %rax
    554 ;
    555 ; X64-LFENCE-LABEL: test_basic_eh:
    556 ; X64-LFENCE:       # %bb.0: # %entry
    557 ; X64-LFENCE-NEXT:    pushq %rbp
    558 ; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 16
    559 ; X64-LFENCE-NEXT:    pushq %r14
    560 ; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 24
    561 ; X64-LFENCE-NEXT:    pushq %rbx
    562 ; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 32
    563 ; X64-LFENCE-NEXT:    .cfi_offset %rbx, -32
    564 ; X64-LFENCE-NEXT:    .cfi_offset %r14, -24
    565 ; X64-LFENCE-NEXT:    .cfi_offset %rbp, -16
    566 ; X64-LFENCE-NEXT:    cmpl $41, %edi
    567 ; X64-LFENCE-NEXT:    jg .LBB4_2
    568 ; X64-LFENCE-NEXT:  # %bb.1: # %thrower
    569 ; X64-LFENCE-NEXT:    movq %rdx, %r14
    570 ; X64-LFENCE-NEXT:    movq %rsi, %rbx
    571 ; X64-LFENCE-NEXT:    lfence
    572 ; X64-LFENCE-NEXT:    movslq %edi, %rax
    573 ; X64-LFENCE-NEXT:    movl (%rsi,%rax,4), %ebp
    574 ; X64-LFENCE-NEXT:    movl $4, %edi
    575 ; X64-LFENCE-NEXT:    callq __cxa_allocate_exception
    576 ; X64-LFENCE-NEXT:    movl %ebp, (%rax)
    577 ; X64-LFENCE-NEXT:  .Ltmp0:
    578 ; X64-LFENCE-NEXT:    xorl %esi, %esi
    579 ; X64-LFENCE-NEXT:    xorl %edx, %edx
    580 ; X64-LFENCE-NEXT:    movq %rax, %rdi
    581 ; X64-LFENCE-NEXT:    callq __cxa_throw
    582 ; X64-LFENCE-NEXT:  .Ltmp1:
    583 ; X64-LFENCE-NEXT:  .LBB4_2: # %exit
    584 ; X64-LFENCE-NEXT:    lfence
    585 ; X64-LFENCE-NEXT:    popq %rbx
    586 ; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 24
    587 ; X64-LFENCE-NEXT:    popq %r14
    588 ; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 16
    589 ; X64-LFENCE-NEXT:    popq %rbp
    590 ; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 8
    591 ; X64-LFENCE-NEXT:    retq
    592 ; X64-LFENCE-NEXT:  .LBB4_3: # %lpad
    593 ; X64-LFENCE-NEXT:    .cfi_def_cfa_offset 32
    594 ; X64-LFENCE-NEXT:  .Ltmp2:
    595 ; X64-LFENCE-NEXT:    movl (%rax), %eax
    596 ; X64-LFENCE-NEXT:    addl (%rbx), %eax
    597 ; X64-LFENCE-NEXT:    cltq
    598 ; X64-LFENCE-NEXT:    movl (%r14,%rax,4), %edi
    599 ; X64-LFENCE-NEXT:    callq sink
    600 entry:
    601   %a.cmp = icmp slt i32 %a, 42
    602   br i1 %a.cmp, label %thrower, label %exit
    603 
    604 thrower:
    605   %badidx = getelementptr i32, i32* %ptr1, i32 %a
    606   %secret1 = load i32, i32* %badidx
    607   %e.ptr = call i8* @__cxa_allocate_exception(i64 4)
    608   %e.ptr.cast = bitcast i8* %e.ptr to i32*
    609   store i32 %secret1, i32* %e.ptr.cast
    610   invoke void @__cxa_throw(i8* %e.ptr, i8* null, i8* null)
    611           to label %exit unwind label %lpad
    612 
    613 exit:
    614   ret void
    615 
    616 lpad:
    617   %e = landingpad { i8*, i32 }
    618           catch i8* null
    619   %e.catch.ptr = extractvalue { i8*, i32 } %e, 0
    620   %e.catch.ptr.cast = bitcast i8* %e.catch.ptr to i32*
    621   %secret1.catch = load i32, i32* %e.catch.ptr.cast
    622   %secret2 = load i32, i32* %ptr1
    623   %secret.sum = add i32 %secret1.catch, %secret2
    624   %ptr2.idx = getelementptr i32, i32* %ptr2, i32 %secret.sum
    625   %leak = load i32, i32* %ptr2.idx
    626   call void @sink(i32 %leak)
    627   unreachable
    628 }
    629 
    630 declare void @sink_float(float)
    631 declare void @sink_double(double)
    632 
    633 ; Test direct and converting loads of floating point values.
    634 define void @test_fp_loads(float* %fptr, double* %dptr, i32* %i32ptr, i64* %i64ptr) nounwind {
    635 ; X64-LABEL: test_fp_loads:
    636 ; X64:       # %bb.0: # %entry
    637 ; X64-NEXT:    pushq %r15
    638 ; X64-NEXT:    pushq %r14
    639 ; X64-NEXT:    pushq %r12
    640 ; X64-NEXT:    pushq %rbx
    641 ; X64-NEXT:    pushq %rax
    642 ; X64-NEXT:    movq %rsp, %rax
    643 ; X64-NEXT:    movq %rcx, %r15
    644 ; X64-NEXT:    movq %rdx, %r14
    645 ; X64-NEXT:    movq %rsi, %rbx
    646 ; X64-NEXT:    movq %rdi, %r12
    647 ; X64-NEXT:    movq $-1, %rcx
    648 ; X64-NEXT:    sarq $63, %rax
    649 ; X64-NEXT:    orq %rax, %r12
    650 ; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    651 ; X64-NEXT:    shlq $47, %rax
    652 ; X64-NEXT:    orq %rax, %rsp
    653 ; X64-NEXT:    callq sink_float
    654 ; X64-NEXT:    movq %rsp, %rax
    655 ; X64-NEXT:    sarq $63, %rax
    656 ; X64-NEXT:    orq %rax, %rbx
    657 ; X64-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    658 ; X64-NEXT:    shlq $47, %rax
    659 ; X64-NEXT:    orq %rax, %rsp
    660 ; X64-NEXT:    callq sink_double
    661 ; X64-NEXT:    movq %rsp, %rax
    662 ; X64-NEXT:    sarq $63, %rax
    663 ; X64-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    664 ; X64-NEXT:    cvtsd2ss %xmm0, %xmm0
    665 ; X64-NEXT:    shlq $47, %rax
    666 ; X64-NEXT:    orq %rax, %rsp
    667 ; X64-NEXT:    callq sink_float
    668 ; X64-NEXT:    movq %rsp, %rax
    669 ; X64-NEXT:    sarq $63, %rax
    670 ; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    671 ; X64-NEXT:    cvtss2sd %xmm0, %xmm0
    672 ; X64-NEXT:    shlq $47, %rax
    673 ; X64-NEXT:    orq %rax, %rsp
    674 ; X64-NEXT:    callq sink_double
    675 ; X64-NEXT:    movq %rsp, %rax
    676 ; X64-NEXT:    sarq $63, %rax
    677 ; X64-NEXT:    orq %rax, %r14
    678 ; X64-NEXT:    xorps %xmm0, %xmm0
    679 ; X64-NEXT:    cvtsi2ssl (%r14), %xmm0
    680 ; X64-NEXT:    shlq $47, %rax
    681 ; X64-NEXT:    orq %rax, %rsp
    682 ; X64-NEXT:    callq sink_float
    683 ; X64-NEXT:    movq %rsp, %rax
    684 ; X64-NEXT:    sarq $63, %rax
    685 ; X64-NEXT:    orq %rax, %r15
    686 ; X64-NEXT:    xorps %xmm0, %xmm0
    687 ; X64-NEXT:    cvtsi2sdq (%r15), %xmm0
    688 ; X64-NEXT:    shlq $47, %rax
    689 ; X64-NEXT:    orq %rax, %rsp
    690 ; X64-NEXT:    callq sink_double
    691 ; X64-NEXT:    movq %rsp, %rax
    692 ; X64-NEXT:    sarq $63, %rax
    693 ; X64-NEXT:    xorps %xmm0, %xmm0
    694 ; X64-NEXT:    cvtsi2ssq (%r15), %xmm0
    695 ; X64-NEXT:    shlq $47, %rax
    696 ; X64-NEXT:    orq %rax, %rsp
    697 ; X64-NEXT:    callq sink_float
    698 ; X64-NEXT:    movq %rsp, %rax
    699 ; X64-NEXT:    sarq $63, %rax
    700 ; X64-NEXT:    xorps %xmm0, %xmm0
    701 ; X64-NEXT:    cvtsi2sdl (%r14), %xmm0
    702 ; X64-NEXT:    shlq $47, %rax
    703 ; X64-NEXT:    orq %rax, %rsp
    704 ; X64-NEXT:    callq sink_double
    705 ; X64-NEXT:    movq %rsp, %rax
    706 ; X64-NEXT:    sarq $63, %rax
    707 ; X64-NEXT:    shlq $47, %rax
    708 ; X64-NEXT:    orq %rax, %rsp
    709 ; X64-NEXT:    addq $8, %rsp
    710 ; X64-NEXT:    popq %rbx
    711 ; X64-NEXT:    popq %r12
    712 ; X64-NEXT:    popq %r14
    713 ; X64-NEXT:    popq %r15
    714 ; X64-NEXT:    retq
    715 ;
    716 ; X64-LFENCE-LABEL: test_fp_loads:
    717 ; X64-LFENCE:       # %bb.0: # %entry
    718 ; X64-LFENCE-NEXT:    pushq %r15
    719 ; X64-LFENCE-NEXT:    pushq %r14
    720 ; X64-LFENCE-NEXT:    pushq %r12
    721 ; X64-LFENCE-NEXT:    pushq %rbx
    722 ; X64-LFENCE-NEXT:    pushq %rax
    723 ; X64-LFENCE-NEXT:    movq %rcx, %r15
    724 ; X64-LFENCE-NEXT:    movq %rdx, %r14
    725 ; X64-LFENCE-NEXT:    movq %rsi, %rbx
    726 ; X64-LFENCE-NEXT:    movq %rdi, %r12
    727 ; X64-LFENCE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    728 ; X64-LFENCE-NEXT:    callq sink_float
    729 ; X64-LFENCE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    730 ; X64-LFENCE-NEXT:    callq sink_double
    731 ; X64-LFENCE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
    732 ; X64-LFENCE-NEXT:    cvtsd2ss %xmm0, %xmm0
    733 ; X64-LFENCE-NEXT:    callq sink_float
    734 ; X64-LFENCE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    735 ; X64-LFENCE-NEXT:    cvtss2sd %xmm0, %xmm0
    736 ; X64-LFENCE-NEXT:    callq sink_double
    737 ; X64-LFENCE-NEXT:    xorps %xmm0, %xmm0
    738 ; X64-LFENCE-NEXT:    cvtsi2ssl (%r14), %xmm0
    739 ; X64-LFENCE-NEXT:    callq sink_float
    740 ; X64-LFENCE-NEXT:    xorps %xmm0, %xmm0
    741 ; X64-LFENCE-NEXT:    cvtsi2sdq (%r15), %xmm0
    742 ; X64-LFENCE-NEXT:    callq sink_double
    743 ; X64-LFENCE-NEXT:    xorps %xmm0, %xmm0
    744 ; X64-LFENCE-NEXT:    cvtsi2ssq (%r15), %xmm0
    745 ; X64-LFENCE-NEXT:    callq sink_float
    746 ; X64-LFENCE-NEXT:    xorps %xmm0, %xmm0
    747 ; X64-LFENCE-NEXT:    cvtsi2sdl (%r14), %xmm0
    748 ; X64-LFENCE-NEXT:    callq sink_double
    749 ; X64-LFENCE-NEXT:    addq $8, %rsp
    750 ; X64-LFENCE-NEXT:    popq %rbx
    751 ; X64-LFENCE-NEXT:    popq %r12
    752 ; X64-LFENCE-NEXT:    popq %r14
    753 ; X64-LFENCE-NEXT:    popq %r15
    754 ; X64-LFENCE-NEXT:    retq
    755 entry:
    756   %f1 = load float, float* %fptr
    757   call void @sink_float(float %f1)
    758   %d1 = load double, double* %dptr
    759   call void @sink_double(double %d1)
    760   %f2.d = load double, double* %dptr
    761   %f2 = fptrunc double %f2.d to float
    762   call void @sink_float(float %f2)
    763   %d2.f = load float, float* %fptr
    764   %d2 = fpext float %d2.f to double
    765   call void @sink_double(double %d2)
    766   %f3.i = load i32, i32* %i32ptr
    767   %f3 = sitofp i32 %f3.i to float
    768   call void @sink_float(float %f3)
    769   %d3.i = load i64, i64* %i64ptr
    770   %d3 = sitofp i64 %d3.i to double
    771   call void @sink_double(double %d3)
    772   %f4.i = load i64, i64* %i64ptr
    773   %f4 = sitofp i64 %f4.i to float
    774   call void @sink_float(float %f4)
    775   %d4.i = load i32, i32* %i32ptr
    776   %d4 = sitofp i32 %d4.i to double
    777   call void @sink_double(double %d4)
    778   ret void
    779 }
    780 
    781 declare void @sink_v4f32(<4 x float>)
    782 declare void @sink_v2f64(<2 x double>)
    783 declare void @sink_v16i8(<16 x i8>)
    784 declare void @sink_v8i16(<8 x i16>)
    785 declare void @sink_v4i32(<4 x i32>)
    786 declare void @sink_v2i64(<2 x i64>)
    787 
    788 ; Test loads of vectors.
    789 define void @test_vec_loads(<4 x float>* %v4f32ptr, <2 x double>* %v2f64ptr, <16 x i8>* %v16i8ptr, <8 x i16>* %v8i16ptr, <4 x i32>* %v4i32ptr, <2 x i64>* %v2i64ptr) nounwind {
    790 ; X64-LABEL: test_vec_loads:
    791 ; X64:       # %bb.0: # %entry
    792 ; X64-NEXT:    pushq %r15
    793 ; X64-NEXT:    pushq %r14
    794 ; X64-NEXT:    pushq %r13
    795 ; X64-NEXT:    pushq %r12
    796 ; X64-NEXT:    pushq %rbx
    797 ; X64-NEXT:    movq %rsp, %rax
    798 ; X64-NEXT:    movq %r9, %r14
    799 ; X64-NEXT:    movq %r8, %r15
    800 ; X64-NEXT:    movq %rcx, %r12
    801 ; X64-NEXT:    movq %rdx, %r13
    802 ; X64-NEXT:    movq %rsi, %rbx
    803 ; X64-NEXT:    movq $-1, %rcx
    804 ; X64-NEXT:    sarq $63, %rax
    805 ; X64-NEXT:    orq %rax, %rdi
    806 ; X64-NEXT:    movaps (%rdi), %xmm0
    807 ; X64-NEXT:    shlq $47, %rax
    808 ; X64-NEXT:    orq %rax, %rsp
    809 ; X64-NEXT:    callq sink_v4f32
    810 ; X64-NEXT:    movq %rsp, %rax
    811 ; X64-NEXT:    sarq $63, %rax
    812 ; X64-NEXT:    orq %rax, %rbx
    813 ; X64-NEXT:    movaps (%rbx), %xmm0
    814 ; X64-NEXT:    shlq $47, %rax
    815 ; X64-NEXT:    orq %rax, %rsp
    816 ; X64-NEXT:    callq sink_v2f64
    817 ; X64-NEXT:    movq %rsp, %rax
    818 ; X64-NEXT:    sarq $63, %rax
    819 ; X64-NEXT:    orq %rax, %r13
    820 ; X64-NEXT:    movaps (%r13), %xmm0
    821 ; X64-NEXT:    shlq $47, %rax
    822 ; X64-NEXT:    orq %rax, %rsp
    823 ; X64-NEXT:    callq sink_v16i8
    824 ; X64-NEXT:    movq %rsp, %rax
    825 ; X64-NEXT:    sarq $63, %rax
    826 ; X64-NEXT:    orq %rax, %r12
    827 ; X64-NEXT:    movaps (%r12), %xmm0
    828 ; X64-NEXT:    shlq $47, %rax
    829 ; X64-NEXT:    orq %rax, %rsp
    830 ; X64-NEXT:    callq sink_v8i16
    831 ; X64-NEXT:    movq %rsp, %rax
    832 ; X64-NEXT:    sarq $63, %rax
    833 ; X64-NEXT:    orq %rax, %r15
    834 ; X64-NEXT:    movaps (%r15), %xmm0
    835 ; X64-NEXT:    shlq $47, %rax
    836 ; X64-NEXT:    orq %rax, %rsp
    837 ; X64-NEXT:    callq sink_v4i32
    838 ; X64-NEXT:    movq %rsp, %rax
    839 ; X64-NEXT:    sarq $63, %rax
    840 ; X64-NEXT:    orq %rax, %r14
    841 ; X64-NEXT:    movaps (%r14), %xmm0
    842 ; X64-NEXT:    shlq $47, %rax
    843 ; X64-NEXT:    orq %rax, %rsp
    844 ; X64-NEXT:    callq sink_v2i64
    845 ; X64-NEXT:    movq %rsp, %rax
    846 ; X64-NEXT:    sarq $63, %rax
    847 ; X64-NEXT:    shlq $47, %rax
    848 ; X64-NEXT:    orq %rax, %rsp
    849 ; X64-NEXT:    popq %rbx
    850 ; X64-NEXT:    popq %r12
    851 ; X64-NEXT:    popq %r13
    852 ; X64-NEXT:    popq %r14
    853 ; X64-NEXT:    popq %r15
    854 ; X64-NEXT:    retq
    855 ;
    856 ; X64-LFENCE-LABEL: test_vec_loads:
    857 ; X64-LFENCE:       # %bb.0: # %entry
    858 ; X64-LFENCE-NEXT:    pushq %r15
    859 ; X64-LFENCE-NEXT:    pushq %r14
    860 ; X64-LFENCE-NEXT:    pushq %r13
    861 ; X64-LFENCE-NEXT:    pushq %r12
    862 ; X64-LFENCE-NEXT:    pushq %rbx
    863 ; X64-LFENCE-NEXT:    movq %r9, %r14
    864 ; X64-LFENCE-NEXT:    movq %r8, %r15
    865 ; X64-LFENCE-NEXT:    movq %rcx, %r12
    866 ; X64-LFENCE-NEXT:    movq %rdx, %r13
    867 ; X64-LFENCE-NEXT:    movq %rsi, %rbx
    868 ; X64-LFENCE-NEXT:    movaps (%rdi), %xmm0
    869 ; X64-LFENCE-NEXT:    callq sink_v4f32
    870 ; X64-LFENCE-NEXT:    movaps (%rbx), %xmm0
    871 ; X64-LFENCE-NEXT:    callq sink_v2f64
    872 ; X64-LFENCE-NEXT:    movaps (%r13), %xmm0
    873 ; X64-LFENCE-NEXT:    callq sink_v16i8
    874 ; X64-LFENCE-NEXT:    movaps (%r12), %xmm0
    875 ; X64-LFENCE-NEXT:    callq sink_v8i16
    876 ; X64-LFENCE-NEXT:    movaps (%r15), %xmm0
    877 ; X64-LFENCE-NEXT:    callq sink_v4i32
    878 ; X64-LFENCE-NEXT:    movaps (%r14), %xmm0
    879 ; X64-LFENCE-NEXT:    callq sink_v2i64
    880 ; X64-LFENCE-NEXT:    popq %rbx
    881 ; X64-LFENCE-NEXT:    popq %r12
    882 ; X64-LFENCE-NEXT:    popq %r13
    883 ; X64-LFENCE-NEXT:    popq %r14
    884 ; X64-LFENCE-NEXT:    popq %r15
    885 ; X64-LFENCE-NEXT:    retq
    886 entry:
    887   %x1 = load <4 x float>, <4 x float>* %v4f32ptr
    888   call void @sink_v4f32(<4 x float> %x1)
    889   %x2 = load <2 x double>, <2 x double>* %v2f64ptr
    890   call void @sink_v2f64(<2 x double> %x2)
    891   %x3 = load <16 x i8>, <16 x i8>* %v16i8ptr
    892   call void @sink_v16i8(<16 x i8> %x3)
    893   %x4 = load <8 x i16>, <8 x i16>* %v8i16ptr
    894   call void @sink_v8i16(<8 x i16> %x4)
    895   %x5 = load <4 x i32>, <4 x i32>* %v4i32ptr
    896   call void @sink_v4i32(<4 x i32> %x5)
    897   %x6 = load <2 x i64>, <2 x i64>* %v2i64ptr
    898   call void @sink_v2i64(<2 x i64> %x6)
    899   ret void
    900 }
    901 
    902 define void @test_deferred_hardening(i32* %ptr1, i32* %ptr2, i32 %x) nounwind {
    903 ; X64-LABEL: test_deferred_hardening:
    904 ; X64:       # %bb.0: # %entry
    905 ; X64-NEXT:    pushq %r14
    906 ; X64-NEXT:    pushq %rbx
    907 ; X64-NEXT:    pushq %rax
    908 ; X64-NEXT:    movq %rsp, %rax
    909 ; X64-NEXT:    movq %rsi, %r14
    910 ; X64-NEXT:    movq %rdi, %rbx
    911 ; X64-NEXT:    movq $-1, %rcx
    912 ; X64-NEXT:    sarq $63, %rax
    913 ; X64-NEXT:    movl (%rdi), %edi
    914 ; X64-NEXT:    incl %edi
    915 ; X64-NEXT:    imull %edx, %edi
    916 ; X64-NEXT:    orl %eax, %edi
    917 ; X64-NEXT:    shlq $47, %rax
    918 ; X64-NEXT:    orq %rax, %rsp
    919 ; X64-NEXT:    callq sink
    920 ; X64-NEXT:    movq %rsp, %rax
    921 ; X64-NEXT:    sarq $63, %rax
    922 ; X64-NEXT:    movl (%rbx), %ecx
    923 ; X64-NEXT:    movl (%r14), %edx
    924 ; X64-NEXT:    leal 1(%rcx,%rdx), %edi
    925 ; X64-NEXT:    orl %eax, %edi
    926 ; X64-NEXT:    shlq $47, %rax
    927 ; X64-NEXT:    orq %rax, %rsp
    928 ; X64-NEXT:    callq sink
    929 ; X64-NEXT:    movq %rsp, %rax
    930 ; X64-NEXT:    sarq $63, %rax
    931 ; X64-NEXT:    movl (%rbx), %edi
    932 ; X64-NEXT:    shll $7, %edi
    933 ; X64-NEXT:    orl %eax, %edi
    934 ; X64-NEXT:    shlq $47, %rax
    935 ; X64-NEXT:    orq %rax, %rsp
    936 ; X64-NEXT:    callq sink
    937 ; X64-NEXT:    movq %rsp, %rax
    938 ; X64-NEXT:    sarq $63, %rax
    939 ; X64-NEXT:    movzwl (%rbx), %ecx
    940 ; X64-NEXT:    sarw $7, %cx
    941 ; X64-NEXT:    movzwl %cx, %edi
    942 ; X64-NEXT:    notl %edi
    943 ; X64-NEXT:    orl %eax, %edi
    944 ; X64-NEXT:    shlq $47, %rax
    945 ; X64-NEXT:    orq %rax, %rsp
    946 ; X64-NEXT:    callq sink
    947 ; X64-NEXT:    movq %rsp, %rax
    948 ; X64-NEXT:    sarq $63, %rax
    949 ; X64-NEXT:    movzwl (%rbx), %ecx
    950 ; X64-NEXT:    rolw $9, %cx
    951 ; X64-NEXT:    movswl %cx, %edi
    952 ; X64-NEXT:    negl %edi
    953 ; X64-NEXT:    orl %eax, %edi
    954 ; X64-NEXT:    shlq $47, %rax
    955 ; X64-NEXT:    orq %rax, %rsp
    956 ; X64-NEXT:    callq sink
    957 ; X64-NEXT:    movq %rsp, %rax
    958 ; X64-NEXT:    sarq $63, %rax
    959 ; X64-NEXT:    shlq $47, %rax
    960 ; X64-NEXT:    orq %rax, %rsp
    961 ; X64-NEXT:    addq $8, %rsp
    962 ; X64-NEXT:    popq %rbx
    963 ; X64-NEXT:    popq %r14
    964 ; X64-NEXT:    retq
    965 ;
    966 ; X64-LFENCE-LABEL: test_deferred_hardening:
    967 ; X64-LFENCE:       # %bb.0: # %entry
    968 ; X64-LFENCE-NEXT:    pushq %r14
    969 ; X64-LFENCE-NEXT:    pushq %rbx
    970 ; X64-LFENCE-NEXT:    pushq %rax
    971 ; X64-LFENCE-NEXT:    movq %rsi, %r14
    972 ; X64-LFENCE-NEXT:    movq %rdi, %rbx
    973 ; X64-LFENCE-NEXT:    movl (%rdi), %edi
    974 ; X64-LFENCE-NEXT:    incl %edi
    975 ; X64-LFENCE-NEXT:    imull %edx, %edi
    976 ; X64-LFENCE-NEXT:    callq sink
    977 ; X64-LFENCE-NEXT:    movl (%rbx), %eax
    978 ; X64-LFENCE-NEXT:    movl (%r14), %ecx
    979 ; X64-LFENCE-NEXT:    leal 1(%rax,%rcx), %edi
    980 ; X64-LFENCE-NEXT:    callq sink
    981 ; X64-LFENCE-NEXT:    movl (%rbx), %edi
    982 ; X64-LFENCE-NEXT:    shll $7, %edi
    983 ; X64-LFENCE-NEXT:    callq sink
    984 ; X64-LFENCE-NEXT:    movzwl (%rbx), %eax
    985 ; X64-LFENCE-NEXT:    sarw $7, %ax
    986 ; X64-LFENCE-NEXT:    movzwl %ax, %edi
    987 ; X64-LFENCE-NEXT:    notl %edi
    988 ; X64-LFENCE-NEXT:    callq sink
    989 ; X64-LFENCE-NEXT:    movzwl (%rbx), %eax
    990 ; X64-LFENCE-NEXT:    rolw $9, %ax
    991 ; X64-LFENCE-NEXT:    movswl %ax, %edi
    992 ; X64-LFENCE-NEXT:    negl %edi
    993 ; X64-LFENCE-NEXT:    callq sink
    994 ; X64-LFENCE-NEXT:    addq $8, %rsp
    995 ; X64-LFENCE-NEXT:    popq %rbx
    996 ; X64-LFENCE-NEXT:    popq %r14
    997 ; X64-LFENCE-NEXT:    retq
    998 entry:
    999   %a1 = load i32, i32* %ptr1
   1000   %a2 = add i32 %a1, 1
   1001   %a3 = mul i32 %a2, %x
   1002   call void @sink(i32 %a3)
   1003   %b1 = load i32, i32* %ptr1
   1004   %b2 = add i32 %b1, 1
   1005   %b3 = load i32, i32* %ptr2
   1006   %b4 = add i32 %b2, %b3
   1007   call void @sink(i32 %b4)
   1008   %c1 = load i32, i32* %ptr1
   1009   %c2 = shl i32 %c1, 7
   1010   call void @sink(i32 %c2)
   1011   %d1 = load i32, i32* %ptr1
   1012   ; Check trunc and integer ops narrower than i32.
   1013   %d2 = trunc i32 %d1 to i16
   1014   %d3 = ashr i16 %d2, 7
   1015   %d4 = zext i16 %d3 to i32
   1016   %d5 = xor i32 %d4, -1
   1017   call void @sink(i32 %d5)
   1018   %e1 = load i32, i32* %ptr1
   1019   %e2 = trunc i32 %e1 to i16
   1020   %e3 = lshr i16 %e2, 7
   1021   %e4 = shl i16 %e2, 9
   1022   %e5 = or i16 %e3, %e4
   1023   %e6 = sext i16 %e5 to i32
   1024   %e7 = sub i32 0, %e6
   1025   call void @sink(i32 %e7)
   1026   ret void
   1027 }
   1028