Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s -check-prefix=CHECK
      3 ; RUN: llc < %s -mtriple=x86_64-linux --x86-disable-avoid-SFB -verify-machineinstrs | FileCheck %s --check-prefix=DISABLED
      4 ; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX2
      5 ; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX512
      6 
      7 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
      8 target triple = "x86_64-unknown-linux-gnu"
      9 
     10 %struct.S = type { i32, i32, i32, i32 }
     11 
     12 ; Function Attrs: nounwind uwtable
     13 define void @test_conditional_block(%struct.S* nocapture noalias %s1 , %struct.S* nocapture noalias %s2, i32 %x, %struct.S* nocapture noalias  %s3, %struct.S* nocapture noalias readonly %s4) local_unnamed_addr #0 {
     14 ; CHECK-LABEL: test_conditional_block:
     15 ; CHECK:       # %bb.0: # %entry
     16 ; CHECK-NEXT:    cmpl $18, %edx
     17 ; CHECK-NEXT:    jl .LBB0_2
     18 ; CHECK-NEXT:  # %bb.1: # %if.then
     19 ; CHECK-NEXT:    movl %edx, 4(%rdi)
     20 ; CHECK-NEXT:  .LBB0_2: # %if.end
     21 ; CHECK-NEXT:    movups (%r8), %xmm0
     22 ; CHECK-NEXT:    movups %xmm0, (%rcx)
     23 ; CHECK-NEXT:    movl (%rdi), %eax
     24 ; CHECK-NEXT:    movl %eax, (%rsi)
     25 ; CHECK-NEXT:    movl 4(%rdi), %eax
     26 ; CHECK-NEXT:    movl %eax, 4(%rsi)
     27 ; CHECK-NEXT:    movq 8(%rdi), %rax
     28 ; CHECK-NEXT:    movq %rax, 8(%rsi)
     29 ; CHECK-NEXT:    retq
     30 ;
     31 ; DISABLED-LABEL: test_conditional_block:
     32 ; DISABLED:       # %bb.0: # %entry
     33 ; DISABLED-NEXT:    cmpl $18, %edx
     34 ; DISABLED-NEXT:    jl .LBB0_2
     35 ; DISABLED-NEXT:  # %bb.1: # %if.then
     36 ; DISABLED-NEXT:    movl %edx, 4(%rdi)
     37 ; DISABLED-NEXT:  .LBB0_2: # %if.end
     38 ; DISABLED-NEXT:    movups (%r8), %xmm0
     39 ; DISABLED-NEXT:    movups %xmm0, (%rcx)
     40 ; DISABLED-NEXT:    movups (%rdi), %xmm0
     41 ; DISABLED-NEXT:    movups %xmm0, (%rsi)
     42 ; DISABLED-NEXT:    retq
     43 ;
     44 ; CHECK-AVX2-LABEL: test_conditional_block:
     45 ; CHECK-AVX2:       # %bb.0: # %entry
     46 ; CHECK-AVX2-NEXT:    cmpl $18, %edx
     47 ; CHECK-AVX2-NEXT:    jl .LBB0_2
     48 ; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
     49 ; CHECK-AVX2-NEXT:    movl %edx, 4(%rdi)
     50 ; CHECK-AVX2-NEXT:  .LBB0_2: # %if.end
     51 ; CHECK-AVX2-NEXT:    vmovups (%r8), %xmm0
     52 ; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rcx)
     53 ; CHECK-AVX2-NEXT:    movl (%rdi), %eax
     54 ; CHECK-AVX2-NEXT:    movl %eax, (%rsi)
     55 ; CHECK-AVX2-NEXT:    movl 4(%rdi), %eax
     56 ; CHECK-AVX2-NEXT:    movl %eax, 4(%rsi)
     57 ; CHECK-AVX2-NEXT:    movq 8(%rdi), %rax
     58 ; CHECK-AVX2-NEXT:    movq %rax, 8(%rsi)
     59 ; CHECK-AVX2-NEXT:    retq
     60 ;
     61 ; CHECK-AVX512-LABEL: test_conditional_block:
     62 ; CHECK-AVX512:       # %bb.0: # %entry
     63 ; CHECK-AVX512-NEXT:    cmpl $18, %edx
     64 ; CHECK-AVX512-NEXT:    jl .LBB0_2
     65 ; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
     66 ; CHECK-AVX512-NEXT:    movl %edx, 4(%rdi)
     67 ; CHECK-AVX512-NEXT:  .LBB0_2: # %if.end
     68 ; CHECK-AVX512-NEXT:    vmovups (%r8), %xmm0
     69 ; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rcx)
     70 ; CHECK-AVX512-NEXT:    movl (%rdi), %eax
     71 ; CHECK-AVX512-NEXT:    movl %eax, (%rsi)
     72 ; CHECK-AVX512-NEXT:    movl 4(%rdi), %eax
     73 ; CHECK-AVX512-NEXT:    movl %eax, 4(%rsi)
     74 ; CHECK-AVX512-NEXT:    movq 8(%rdi), %rax
     75 ; CHECK-AVX512-NEXT:    movq %rax, 8(%rsi)
     76 ; CHECK-AVX512-NEXT:    retq
     77 entry:
     78   %cmp = icmp sgt i32 %x, 17
     79   br i1 %cmp, label %if.then, label %if.end
     80 
     81 if.then:                                          ; preds = %entry
     82   %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
     83   store i32 %x, i32* %b, align 4
     84   br label %if.end
     85 
     86 if.end:                                           ; preds = %if.then, %entry
     87   %0 = bitcast %struct.S* %s3 to i8*
     88   %1 = bitcast %struct.S* %s4 to i8*
     89   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
     90   %2 = bitcast %struct.S* %s2 to i8*
     91   %3 = bitcast %struct.S* %s1 to i8*
     92   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
     93   ret void
     94 }
     95 
     96 ; Function Attrs: nounwind uwtable
     97 define void @test_imm_store(%struct.S* nocapture noalias %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3) local_unnamed_addr #0 {
     98 ; CHECK-LABEL: test_imm_store:
     99 ; CHECK:       # %bb.0: # %entry
    100 ; CHECK-NEXT:    movl $0, (%rdi)
    101 ; CHECK-NEXT:    movl $1, (%rcx)
    102 ; CHECK-NEXT:    movl (%rdi), %eax
    103 ; CHECK-NEXT:    movl %eax, (%rsi)
    104 ; CHECK-NEXT:    movq 4(%rdi), %rax
    105 ; CHECK-NEXT:    movq %rax, 4(%rsi)
    106 ; CHECK-NEXT:    movl 12(%rdi), %eax
    107 ; CHECK-NEXT:    movl %eax, 12(%rsi)
    108 ; CHECK-NEXT:    retq
    109 ;
    110 ; DISABLED-LABEL: test_imm_store:
    111 ; DISABLED:       # %bb.0: # %entry
    112 ; DISABLED-NEXT:    movl $0, (%rdi)
    113 ; DISABLED-NEXT:    movl $1, (%rcx)
    114 ; DISABLED-NEXT:    movups (%rdi), %xmm0
    115 ; DISABLED-NEXT:    movups %xmm0, (%rsi)
    116 ; DISABLED-NEXT:    retq
    117 ;
    118 ; CHECK-AVX2-LABEL: test_imm_store:
    119 ; CHECK-AVX2:       # %bb.0: # %entry
    120 ; CHECK-AVX2-NEXT:    movl $0, (%rdi)
    121 ; CHECK-AVX2-NEXT:    movl $1, (%rcx)
    122 ; CHECK-AVX2-NEXT:    movl (%rdi), %eax
    123 ; CHECK-AVX2-NEXT:    movl %eax, (%rsi)
    124 ; CHECK-AVX2-NEXT:    movq 4(%rdi), %rax
    125 ; CHECK-AVX2-NEXT:    movq %rax, 4(%rsi)
    126 ; CHECK-AVX2-NEXT:    movl 12(%rdi), %eax
    127 ; CHECK-AVX2-NEXT:    movl %eax, 12(%rsi)
    128 ; CHECK-AVX2-NEXT:    retq
    129 ;
    130 ; CHECK-AVX512-LABEL: test_imm_store:
    131 ; CHECK-AVX512:       # %bb.0: # %entry
    132 ; CHECK-AVX512-NEXT:    movl $0, (%rdi)
    133 ; CHECK-AVX512-NEXT:    movl $1, (%rcx)
    134 ; CHECK-AVX512-NEXT:    movl (%rdi), %eax
    135 ; CHECK-AVX512-NEXT:    movl %eax, (%rsi)
    136 ; CHECK-AVX512-NEXT:    movq 4(%rdi), %rax
    137 ; CHECK-AVX512-NEXT:    movq %rax, 4(%rsi)
    138 ; CHECK-AVX512-NEXT:    movl 12(%rdi), %eax
    139 ; CHECK-AVX512-NEXT:    movl %eax, 12(%rsi)
    140 ; CHECK-AVX512-NEXT:    retq
    141 entry:
    142   %a = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 0
    143   store i32 0, i32* %a, align 4
    144   %a1 = getelementptr inbounds %struct.S, %struct.S* %s3, i64 0, i32 0
    145   store i32 1, i32* %a1, align 4
    146   %0 = bitcast %struct.S* %s2 to i8*
    147   %1 = bitcast %struct.S* %s1 to i8*
    148   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
    149   ret void
    150 }
    151 
    152 ; Function Attrs: nounwind uwtable
    153 define void @test_nondirect_br(%struct.S* nocapture noalias %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
    154 ; CHECK-LABEL: test_nondirect_br:
    155 ; CHECK:       # %bb.0: # %entry
    156 ; CHECK-NEXT:    cmpl $18, %edx
    157 ; CHECK-NEXT:    jl .LBB2_2
    158 ; CHECK-NEXT:  # %bb.1: # %if.then
    159 ; CHECK-NEXT:    movl %edx, 4(%rdi)
    160 ; CHECK-NEXT:  .LBB2_2: # %if.end
    161 ; CHECK-NEXT:    cmpl $14, %r9d
    162 ; CHECK-NEXT:    jl .LBB2_4
    163 ; CHECK-NEXT:  # %bb.3: # %if.then2
    164 ; CHECK-NEXT:    movl %r9d, 12(%rdi)
    165 ; CHECK-NEXT:  .LBB2_4: # %if.end3
    166 ; CHECK-NEXT:    movups (%r8), %xmm0
    167 ; CHECK-NEXT:    movups %xmm0, (%rcx)
    168 ; CHECK-NEXT:    movq (%rdi), %rax
    169 ; CHECK-NEXT:    movq %rax, (%rsi)
    170 ; CHECK-NEXT:    movl 8(%rdi), %eax
    171 ; CHECK-NEXT:    movl %eax, 8(%rsi)
    172 ; CHECK-NEXT:    movl 12(%rdi), %eax
    173 ; CHECK-NEXT:    movl %eax, 12(%rsi)
    174 ; CHECK-NEXT:    retq
    175 ;
    176 ; DISABLED-LABEL: test_nondirect_br:
    177 ; DISABLED:       # %bb.0: # %entry
    178 ; DISABLED-NEXT:    cmpl $18, %edx
    179 ; DISABLED-NEXT:    jl .LBB2_2
    180 ; DISABLED-NEXT:  # %bb.1: # %if.then
    181 ; DISABLED-NEXT:    movl %edx, 4(%rdi)
    182 ; DISABLED-NEXT:  .LBB2_2: # %if.end
    183 ; DISABLED-NEXT:    cmpl $14, %r9d
    184 ; DISABLED-NEXT:    jl .LBB2_4
    185 ; DISABLED-NEXT:  # %bb.3: # %if.then2
    186 ; DISABLED-NEXT:    movl %r9d, 12(%rdi)
    187 ; DISABLED-NEXT:  .LBB2_4: # %if.end3
    188 ; DISABLED-NEXT:    movups (%r8), %xmm0
    189 ; DISABLED-NEXT:    movups %xmm0, (%rcx)
    190 ; DISABLED-NEXT:    movups (%rdi), %xmm0
    191 ; DISABLED-NEXT:    movups %xmm0, (%rsi)
    192 ; DISABLED-NEXT:    retq
    193 ;
    194 ; CHECK-AVX2-LABEL: test_nondirect_br:
    195 ; CHECK-AVX2:       # %bb.0: # %entry
    196 ; CHECK-AVX2-NEXT:    cmpl $18, %edx
    197 ; CHECK-AVX2-NEXT:    jl .LBB2_2
    198 ; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
    199 ; CHECK-AVX2-NEXT:    movl %edx, 4(%rdi)
    200 ; CHECK-AVX2-NEXT:  .LBB2_2: # %if.end
    201 ; CHECK-AVX2-NEXT:    cmpl $14, %r9d
    202 ; CHECK-AVX2-NEXT:    jl .LBB2_4
    203 ; CHECK-AVX2-NEXT:  # %bb.3: # %if.then2
    204 ; CHECK-AVX2-NEXT:    movl %r9d, 12(%rdi)
    205 ; CHECK-AVX2-NEXT:  .LBB2_4: # %if.end3
    206 ; CHECK-AVX2-NEXT:    vmovups (%r8), %xmm0
    207 ; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rcx)
    208 ; CHECK-AVX2-NEXT:    movq (%rdi), %rax
    209 ; CHECK-AVX2-NEXT:    movq %rax, (%rsi)
    210 ; CHECK-AVX2-NEXT:    movl 8(%rdi), %eax
    211 ; CHECK-AVX2-NEXT:    movl %eax, 8(%rsi)
    212 ; CHECK-AVX2-NEXT:    movl 12(%rdi), %eax
    213 ; CHECK-AVX2-NEXT:    movl %eax, 12(%rsi)
    214 ; CHECK-AVX2-NEXT:    retq
    215 ;
    216 ; CHECK-AVX512-LABEL: test_nondirect_br:
    217 ; CHECK-AVX512:       # %bb.0: # %entry
    218 ; CHECK-AVX512-NEXT:    cmpl $18, %edx
    219 ; CHECK-AVX512-NEXT:    jl .LBB2_2
    220 ; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
    221 ; CHECK-AVX512-NEXT:    movl %edx, 4(%rdi)
    222 ; CHECK-AVX512-NEXT:  .LBB2_2: # %if.end
    223 ; CHECK-AVX512-NEXT:    cmpl $14, %r9d
    224 ; CHECK-AVX512-NEXT:    jl .LBB2_4
    225 ; CHECK-AVX512-NEXT:  # %bb.3: # %if.then2
    226 ; CHECK-AVX512-NEXT:    movl %r9d, 12(%rdi)
    227 ; CHECK-AVX512-NEXT:  .LBB2_4: # %if.end3
    228 ; CHECK-AVX512-NEXT:    vmovups (%r8), %xmm0
    229 ; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rcx)
    230 ; CHECK-AVX512-NEXT:    movq (%rdi), %rax
    231 ; CHECK-AVX512-NEXT:    movq %rax, (%rsi)
    232 ; CHECK-AVX512-NEXT:    movl 8(%rdi), %eax
    233 ; CHECK-AVX512-NEXT:    movl %eax, 8(%rsi)
    234 ; CHECK-AVX512-NEXT:    movl 12(%rdi), %eax
    235 ; CHECK-AVX512-NEXT:    movl %eax, 12(%rsi)
    236 ; CHECK-AVX512-NEXT:    retq
    237 entry:
    238   %cmp = icmp sgt i32 %x, 17
    239   br i1 %cmp, label %if.then, label %if.end
    240 
    241 if.then:                                          ; preds = %entry
    242   %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
    243   store i32 %x, i32* %b, align 4
    244   br label %if.end
    245 
    246 if.end:                                           ; preds = %if.then, %entry
    247   %cmp1 = icmp sgt i32 %x2, 13
    248   br i1 %cmp1, label %if.then2, label %if.end3
    249 
    250 if.then2:                                         ; preds = %if.end
    251   %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
    252   store i32 %x2, i32* %d, align 4
    253   br label %if.end3
    254 
    255 if.end3:                                          ; preds = %if.then2, %if.end
    256   %0 = bitcast %struct.S* %s3 to i8*
    257   %1 = bitcast %struct.S* %s4 to i8*
    258   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
    259   %2 = bitcast %struct.S* %s2 to i8*
    260   %3 = bitcast %struct.S* %s1 to i8*
    261   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
    262   ret void
    263 }
    264 
    265 ; Function Attrs: nounwind uwtable
    266 define void @test_2preds_block(%struct.S* nocapture noalias %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
    267 ; CHECK-LABEL: test_2preds_block:
    268 ; CHECK:       # %bb.0: # %entry
    269 ; CHECK-NEXT:    movl %r9d, 12(%rdi)
    270 ; CHECK-NEXT:    cmpl $18, %edx
    271 ; CHECK-NEXT:    jl .LBB3_2
    272 ; CHECK-NEXT:  # %bb.1: # %if.then
    273 ; CHECK-NEXT:    movl %edx, 4(%rdi)
    274 ; CHECK-NEXT:  .LBB3_2: # %if.end
    275 ; CHECK-NEXT:    movups (%r8), %xmm0
    276 ; CHECK-NEXT:    movups %xmm0, (%rcx)
    277 ; CHECK-NEXT:    movl (%rdi), %eax
    278 ; CHECK-NEXT:    movl %eax, (%rsi)
    279 ; CHECK-NEXT:    movl 4(%rdi), %eax
    280 ; CHECK-NEXT:    movl %eax, 4(%rsi)
    281 ; CHECK-NEXT:    movl 8(%rdi), %eax
    282 ; CHECK-NEXT:    movl %eax, 8(%rsi)
    283 ; CHECK-NEXT:    movl 12(%rdi), %eax
    284 ; CHECK-NEXT:    movl %eax, 12(%rsi)
    285 ; CHECK-NEXT:    retq
    286 ;
    287 ; DISABLED-LABEL: test_2preds_block:
    288 ; DISABLED:       # %bb.0: # %entry
    289 ; DISABLED-NEXT:    movl %r9d, 12(%rdi)
    290 ; DISABLED-NEXT:    cmpl $18, %edx
    291 ; DISABLED-NEXT:    jl .LBB3_2
    292 ; DISABLED-NEXT:  # %bb.1: # %if.then
    293 ; DISABLED-NEXT:    movl %edx, 4(%rdi)
    294 ; DISABLED-NEXT:  .LBB3_2: # %if.end
    295 ; DISABLED-NEXT:    movups (%r8), %xmm0
    296 ; DISABLED-NEXT:    movups %xmm0, (%rcx)
    297 ; DISABLED-NEXT:    movups (%rdi), %xmm0
    298 ; DISABLED-NEXT:    movups %xmm0, (%rsi)
    299 ; DISABLED-NEXT:    retq
    300 ;
    301 ; CHECK-AVX2-LABEL: test_2preds_block:
    302 ; CHECK-AVX2:       # %bb.0: # %entry
    303 ; CHECK-AVX2-NEXT:    movl %r9d, 12(%rdi)
    304 ; CHECK-AVX2-NEXT:    cmpl $18, %edx
    305 ; CHECK-AVX2-NEXT:    jl .LBB3_2
    306 ; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
    307 ; CHECK-AVX2-NEXT:    movl %edx, 4(%rdi)
    308 ; CHECK-AVX2-NEXT:  .LBB3_2: # %if.end
    309 ; CHECK-AVX2-NEXT:    vmovups (%r8), %xmm0
    310 ; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rcx)
    311 ; CHECK-AVX2-NEXT:    movl (%rdi), %eax
    312 ; CHECK-AVX2-NEXT:    movl %eax, (%rsi)
    313 ; CHECK-AVX2-NEXT:    movl 4(%rdi), %eax
    314 ; CHECK-AVX2-NEXT:    movl %eax, 4(%rsi)
    315 ; CHECK-AVX2-NEXT:    movl 8(%rdi), %eax
    316 ; CHECK-AVX2-NEXT:    movl %eax, 8(%rsi)
    317 ; CHECK-AVX2-NEXT:    movl 12(%rdi), %eax
    318 ; CHECK-AVX2-NEXT:    movl %eax, 12(%rsi)
    319 ; CHECK-AVX2-NEXT:    retq
    320 ;
    321 ; CHECK-AVX512-LABEL: test_2preds_block:
    322 ; CHECK-AVX512:       # %bb.0: # %entry
    323 ; CHECK-AVX512-NEXT:    movl %r9d, 12(%rdi)
    324 ; CHECK-AVX512-NEXT:    cmpl $18, %edx
    325 ; CHECK-AVX512-NEXT:    jl .LBB3_2
    326 ; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
    327 ; CHECK-AVX512-NEXT:    movl %edx, 4(%rdi)
    328 ; CHECK-AVX512-NEXT:  .LBB3_2: # %if.end
    329 ; CHECK-AVX512-NEXT:    vmovups (%r8), %xmm0
    330 ; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rcx)
    331 ; CHECK-AVX512-NEXT:    movl (%rdi), %eax
    332 ; CHECK-AVX512-NEXT:    movl %eax, (%rsi)
    333 ; CHECK-AVX512-NEXT:    movl 4(%rdi), %eax
    334 ; CHECK-AVX512-NEXT:    movl %eax, 4(%rsi)
    335 ; CHECK-AVX512-NEXT:    movl 8(%rdi), %eax
    336 ; CHECK-AVX512-NEXT:    movl %eax, 8(%rsi)
    337 ; CHECK-AVX512-NEXT:    movl 12(%rdi), %eax
    338 ; CHECK-AVX512-NEXT:    movl %eax, 12(%rsi)
    339 ; CHECK-AVX512-NEXT:    retq
    340 entry:
    341   %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
    342   store i32 %x2, i32* %d, align 4
    343   %cmp = icmp sgt i32 %x, 17
    344   br i1 %cmp, label %if.then, label %if.end
    345 
    346 if.then:                                          ; preds = %entry
    347   %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
    348   store i32 %x, i32* %b, align 4
    349   br label %if.end
    350 
    351 if.end:                                           ; preds = %if.then, %entry
    352   %0 = bitcast %struct.S* %s3 to i8*
    353   %1 = bitcast %struct.S* %s4 to i8*
    354   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
    355   %2 = bitcast %struct.S* %s2 to i8*
    356   %3 = bitcast %struct.S* %s1 to i8*
    357   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
    358   ret void
    359 }
    360 %struct.S2 = type { i64, i64 }
    361 
    362 ; Function Attrs: nounwind uwtable
    363 define void @test_type64(%struct.S2* nocapture noalias %s1, %struct.S2* nocapture %s2, i32 %x, %struct.S2* nocapture %s3, %struct.S2* nocapture readonly %s4) local_unnamed_addr #0 {
    364 ; CHECK-LABEL: test_type64:
    365 ; CHECK:       # %bb.0: # %entry
    366 ; CHECK-NEXT:    cmpl $18, %edx
    367 ; CHECK-NEXT:    jl .LBB4_2
    368 ; CHECK-NEXT:  # %bb.1: # %if.then
    369 ; CHECK-NEXT:    movslq %edx, %rax
    370 ; CHECK-NEXT:    movq %rax, 8(%rdi)
    371 ; CHECK-NEXT:  .LBB4_2: # %if.end
    372 ; CHECK-NEXT:    movups (%r8), %xmm0
    373 ; CHECK-NEXT:    movups %xmm0, (%rcx)
    374 ; CHECK-NEXT:    movq (%rdi), %rax
    375 ; CHECK-NEXT:    movq %rax, (%rsi)
    376 ; CHECK-NEXT:    movq 8(%rdi), %rax
    377 ; CHECK-NEXT:    movq %rax, 8(%rsi)
    378 ; CHECK-NEXT:    retq
    379 ;
    380 ; DISABLED-LABEL: test_type64:
    381 ; DISABLED:       # %bb.0: # %entry
    382 ; DISABLED-NEXT:    cmpl $18, %edx
    383 ; DISABLED-NEXT:    jl .LBB4_2
    384 ; DISABLED-NEXT:  # %bb.1: # %if.then
    385 ; DISABLED-NEXT:    movslq %edx, %rax
    386 ; DISABLED-NEXT:    movq %rax, 8(%rdi)
    387 ; DISABLED-NEXT:  .LBB4_2: # %if.end
    388 ; DISABLED-NEXT:    movups (%r8), %xmm0
    389 ; DISABLED-NEXT:    movups %xmm0, (%rcx)
    390 ; DISABLED-NEXT:    movups (%rdi), %xmm0
    391 ; DISABLED-NEXT:    movups %xmm0, (%rsi)
    392 ; DISABLED-NEXT:    retq
    393 ;
    394 ; CHECK-AVX2-LABEL: test_type64:
    395 ; CHECK-AVX2:       # %bb.0: # %entry
    396 ; CHECK-AVX2-NEXT:    cmpl $18, %edx
    397 ; CHECK-AVX2-NEXT:    jl .LBB4_2
    398 ; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
    399 ; CHECK-AVX2-NEXT:    movslq %edx, %rax
    400 ; CHECK-AVX2-NEXT:    movq %rax, 8(%rdi)
    401 ; CHECK-AVX2-NEXT:  .LBB4_2: # %if.end
    402 ; CHECK-AVX2-NEXT:    vmovups (%r8), %xmm0
    403 ; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rcx)
    404 ; CHECK-AVX2-NEXT:    movq (%rdi), %rax
    405 ; CHECK-AVX2-NEXT:    movq %rax, (%rsi)
    406 ; CHECK-AVX2-NEXT:    movq 8(%rdi), %rax
    407 ; CHECK-AVX2-NEXT:    movq %rax, 8(%rsi)
    408 ; CHECK-AVX2-NEXT:    retq
    409 ;
    410 ; CHECK-AVX512-LABEL: test_type64:
    411 ; CHECK-AVX512:       # %bb.0: # %entry
    412 ; CHECK-AVX512-NEXT:    cmpl $18, %edx
    413 ; CHECK-AVX512-NEXT:    jl .LBB4_2
    414 ; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
    415 ; CHECK-AVX512-NEXT:    movslq %edx, %rax
    416 ; CHECK-AVX512-NEXT:    movq %rax, 8(%rdi)
    417 ; CHECK-AVX512-NEXT:  .LBB4_2: # %if.end
    418 ; CHECK-AVX512-NEXT:    vmovups (%r8), %xmm0
    419 ; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rcx)
    420 ; CHECK-AVX512-NEXT:    movq (%rdi), %rax
    421 ; CHECK-AVX512-NEXT:    movq %rax, (%rsi)
    422 ; CHECK-AVX512-NEXT:    movq 8(%rdi), %rax
    423 ; CHECK-AVX512-NEXT:    movq %rax, 8(%rsi)
    424 ; CHECK-AVX512-NEXT:    retq
    425 entry:
    426   %cmp = icmp sgt i32 %x, 17
    427   br i1 %cmp, label %if.then, label %if.end
    428 
    429 if.then:                                          ; preds = %entry
    430   %conv = sext i32 %x to i64
    431   %b = getelementptr inbounds %struct.S2, %struct.S2* %s1, i64 0, i32 1
    432   store i64 %conv, i64* %b, align 8
    433   br label %if.end
    434 
    435 if.end:                                           ; preds = %if.then, %entry
    436   %0 = bitcast %struct.S2* %s3 to i8*
    437   %1 = bitcast %struct.S2* %s4 to i8*
    438   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 8, i1 false)
    439   %2 = bitcast %struct.S2* %s2 to i8*
    440   %3 = bitcast %struct.S2* %s1 to i8*
    441   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 8, i1 false)
    442   ret void
    443 }
    444 %struct.S3 = type { i64, i8, i8, i16, i32 }
    445 
    446 ; Function Attrs: noinline nounwind uwtable
    447 define void @test_mixed_type(%struct.S3* nocapture noalias %s1, %struct.S3* nocapture %s2, i32 %x, %struct.S3* nocapture readnone %s3, %struct.S3* nocapture readnone %s4) local_unnamed_addr #0 {
    448 ; CHECK-LABEL: test_mixed_type:
    449 ; CHECK:       # %bb.0: # %entry
    450 ; CHECK-NEXT:    cmpl $18, %edx
    451 ; CHECK-NEXT:    jl .LBB5_2
    452 ; CHECK-NEXT:  # %bb.1: # %if.then
    453 ; CHECK-NEXT:    movslq %edx, %rax
    454 ; CHECK-NEXT:    movq %rax, (%rdi)
    455 ; CHECK-NEXT:    movb %dl, 8(%rdi)
    456 ; CHECK-NEXT:  .LBB5_2: # %if.end
    457 ; CHECK-NEXT:    movq (%rdi), %rax
    458 ; CHECK-NEXT:    movq %rax, (%rsi)
    459 ; CHECK-NEXT:    movb 8(%rdi), %al
    460 ; CHECK-NEXT:    movb %al, 8(%rsi)
    461 ; CHECK-NEXT:    movl 9(%rdi), %eax
    462 ; CHECK-NEXT:    movl %eax, 9(%rsi)
    463 ; CHECK-NEXT:    movzwl 13(%rdi), %eax
    464 ; CHECK-NEXT:    movw %ax, 13(%rsi)
    465 ; CHECK-NEXT:    movb 15(%rdi), %al
    466 ; CHECK-NEXT:    movb %al, 15(%rsi)
    467 ; CHECK-NEXT:    retq
    468 ;
    469 ; DISABLED-LABEL: test_mixed_type:
    470 ; DISABLED:       # %bb.0: # %entry
    471 ; DISABLED-NEXT:    cmpl $18, %edx
    472 ; DISABLED-NEXT:    jl .LBB5_2
    473 ; DISABLED-NEXT:  # %bb.1: # %if.then
    474 ; DISABLED-NEXT:    movslq %edx, %rax
    475 ; DISABLED-NEXT:    movq %rax, (%rdi)
    476 ; DISABLED-NEXT:    movb %dl, 8(%rdi)
    477 ; DISABLED-NEXT:  .LBB5_2: # %if.end
    478 ; DISABLED-NEXT:    movups (%rdi), %xmm0
    479 ; DISABLED-NEXT:    movups %xmm0, (%rsi)
    480 ; DISABLED-NEXT:    retq
    481 ;
    482 ; CHECK-AVX2-LABEL: test_mixed_type:
    483 ; CHECK-AVX2:       # %bb.0: # %entry
    484 ; CHECK-AVX2-NEXT:    cmpl $18, %edx
    485 ; CHECK-AVX2-NEXT:    jl .LBB5_2
    486 ; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
    487 ; CHECK-AVX2-NEXT:    movslq %edx, %rax
    488 ; CHECK-AVX2-NEXT:    movq %rax, (%rdi)
    489 ; CHECK-AVX2-NEXT:    movb %dl, 8(%rdi)
    490 ; CHECK-AVX2-NEXT:  .LBB5_2: # %if.end
    491 ; CHECK-AVX2-NEXT:    movq (%rdi), %rax
    492 ; CHECK-AVX2-NEXT:    movq %rax, (%rsi)
    493 ; CHECK-AVX2-NEXT:    movb 8(%rdi), %al
    494 ; CHECK-AVX2-NEXT:    movb %al, 8(%rsi)
    495 ; CHECK-AVX2-NEXT:    movl 9(%rdi), %eax
    496 ; CHECK-AVX2-NEXT:    movl %eax, 9(%rsi)
    497 ; CHECK-AVX2-NEXT:    movzwl 13(%rdi), %eax
    498 ; CHECK-AVX2-NEXT:    movw %ax, 13(%rsi)
    499 ; CHECK-AVX2-NEXT:    movb 15(%rdi), %al
    500 ; CHECK-AVX2-NEXT:    movb %al, 15(%rsi)
    501 ; CHECK-AVX2-NEXT:    retq
    502 ;
    503 ; CHECK-AVX512-LABEL: test_mixed_type:
    504 ; CHECK-AVX512:       # %bb.0: # %entry
    505 ; CHECK-AVX512-NEXT:    cmpl $18, %edx
    506 ; CHECK-AVX512-NEXT:    jl .LBB5_2
    507 ; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
    508 ; CHECK-AVX512-NEXT:    movslq %edx, %rax
    509 ; CHECK-AVX512-NEXT:    movq %rax, (%rdi)
    510 ; CHECK-AVX512-NEXT:    movb %dl, 8(%rdi)
    511 ; CHECK-AVX512-NEXT:  .LBB5_2: # %if.end
    512 ; CHECK-AVX512-NEXT:    movq (%rdi), %rax
    513 ; CHECK-AVX512-NEXT:    movq %rax, (%rsi)
    514 ; CHECK-AVX512-NEXT:    movb 8(%rdi), %al
    515 ; CHECK-AVX512-NEXT:    movb %al, 8(%rsi)
    516 ; CHECK-AVX512-NEXT:    movl 9(%rdi), %eax
    517 ; CHECK-AVX512-NEXT:    movl %eax, 9(%rsi)
    518 ; CHECK-AVX512-NEXT:    movzwl 13(%rdi), %eax
    519 ; CHECK-AVX512-NEXT:    movw %ax, 13(%rsi)
    520 ; CHECK-AVX512-NEXT:    movb 15(%rdi), %al
    521 ; CHECK-AVX512-NEXT:    movb %al, 15(%rsi)
    522 ; CHECK-AVX512-NEXT:    retq
    523 entry:
    524   %cmp = icmp sgt i32 %x, 17
    525   br i1 %cmp, label %if.then, label %if.end
    526 
    527 if.then:                                          ; preds = %entry
    528   %conv = sext i32 %x to i64
    529   %a = getelementptr inbounds %struct.S3, %struct.S3* %s1, i64 0, i32 0
    530   store i64 %conv, i64* %a, align 8
    531   %conv1 = trunc i32 %x to i8
    532   %b = getelementptr inbounds %struct.S3, %struct.S3* %s1, i64 0, i32 1
    533   store i8 %conv1, i8* %b, align 8
    534   br label %if.end
    535 
    536 if.end:                                           ; preds = %if.then, %entry
    537   %0 = bitcast %struct.S3* %s2 to i8*
    538   %1 = bitcast %struct.S3* %s1 to i8*
    539   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 8, i1 false)
    540   ret void
    541 }
    542 %struct.S4 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
    543 
    544 ; Function Attrs: nounwind uwtable
    545 define void @test_multiple_blocks(%struct.S4* nocapture noalias %s1, %struct.S4* nocapture %s2) local_unnamed_addr #0 {
    546 ; CHECK-LABEL: test_multiple_blocks:
    547 ; CHECK:       # %bb.0: # %entry
    548 ; CHECK-NEXT:    movl $0, 4(%rdi)
    549 ; CHECK-NEXT:    movl $0, 36(%rdi)
    550 ; CHECK-NEXT:    movups 16(%rdi), %xmm0
    551 ; CHECK-NEXT:    movups %xmm0, 16(%rsi)
    552 ; CHECK-NEXT:    movl 32(%rdi), %eax
    553 ; CHECK-NEXT:    movl %eax, 32(%rsi)
    554 ; CHECK-NEXT:    movl 36(%rdi), %eax
    555 ; CHECK-NEXT:    movl %eax, 36(%rsi)
    556 ; CHECK-NEXT:    movq 40(%rdi), %rax
    557 ; CHECK-NEXT:    movq %rax, 40(%rsi)
    558 ; CHECK-NEXT:    movl (%rdi), %eax
    559 ; CHECK-NEXT:    movl %eax, (%rsi)
    560 ; CHECK-NEXT:    movl 4(%rdi), %eax
    561 ; CHECK-NEXT:    movl %eax, 4(%rsi)
    562 ; CHECK-NEXT:    movq 8(%rdi), %rax
    563 ; CHECK-NEXT:    movq %rax, 8(%rsi)
    564 ; CHECK-NEXT:    retq
    565 ;
    566 ; DISABLED-LABEL: test_multiple_blocks:
    567 ; DISABLED:       # %bb.0: # %entry
    568 ; DISABLED-NEXT:    movl $0, 4(%rdi)
    569 ; DISABLED-NEXT:    movl $0, 36(%rdi)
    570 ; DISABLED-NEXT:    movups 16(%rdi), %xmm0
    571 ; DISABLED-NEXT:    movups %xmm0, 16(%rsi)
    572 ; DISABLED-NEXT:    movups 32(%rdi), %xmm0
    573 ; DISABLED-NEXT:    movups %xmm0, 32(%rsi)
    574 ; DISABLED-NEXT:    movups (%rdi), %xmm0
    575 ; DISABLED-NEXT:    movups %xmm0, (%rsi)
    576 ; DISABLED-NEXT:    retq
    577 ;
    578 ; CHECK-AVX2-LABEL: test_multiple_blocks:
    579 ; CHECK-AVX2:       # %bb.0: # %entry
    580 ; CHECK-AVX2-NEXT:    movl $0, 4(%rdi)
    581 ; CHECK-AVX2-NEXT:    movl $0, 36(%rdi)
    582 ; CHECK-AVX2-NEXT:    vmovups 16(%rdi), %xmm0
    583 ; CHECK-AVX2-NEXT:    vmovups %xmm0, 16(%rsi)
    584 ; CHECK-AVX2-NEXT:    movl 32(%rdi), %eax
    585 ; CHECK-AVX2-NEXT:    movl %eax, 32(%rsi)
    586 ; CHECK-AVX2-NEXT:    movl 36(%rdi), %eax
    587 ; CHECK-AVX2-NEXT:    movl %eax, 36(%rsi)
    588 ; CHECK-AVX2-NEXT:    movq 40(%rdi), %rax
    589 ; CHECK-AVX2-NEXT:    movq %rax, 40(%rsi)
    590 ; CHECK-AVX2-NEXT:    movl (%rdi), %eax
    591 ; CHECK-AVX2-NEXT:    movl %eax, (%rsi)
    592 ; CHECK-AVX2-NEXT:    movl 4(%rdi), %eax
    593 ; CHECK-AVX2-NEXT:    movl %eax, 4(%rsi)
    594 ; CHECK-AVX2-NEXT:    vmovups 8(%rdi), %xmm0
    595 ; CHECK-AVX2-NEXT:    vmovups %xmm0, 8(%rsi)
    596 ; CHECK-AVX2-NEXT:    movq 24(%rdi), %rax
    597 ; CHECK-AVX2-NEXT:    movq %rax, 24(%rsi)
    598 ; CHECK-AVX2-NEXT:    retq
    599 ;
    600 ; CHECK-AVX512-LABEL: test_multiple_blocks:
    601 ; CHECK-AVX512:       # %bb.0: # %entry
    602 ; CHECK-AVX512-NEXT:    movl $0, 4(%rdi)
    603 ; CHECK-AVX512-NEXT:    movl $0, 36(%rdi)
    604 ; CHECK-AVX512-NEXT:    vmovups 16(%rdi), %xmm0
    605 ; CHECK-AVX512-NEXT:    vmovups %xmm0, 16(%rsi)
    606 ; CHECK-AVX512-NEXT:    movl 32(%rdi), %eax
    607 ; CHECK-AVX512-NEXT:    movl %eax, 32(%rsi)
    608 ; CHECK-AVX512-NEXT:    movl 36(%rdi), %eax
    609 ; CHECK-AVX512-NEXT:    movl %eax, 36(%rsi)
    610 ; CHECK-AVX512-NEXT:    movq 40(%rdi), %rax
    611 ; CHECK-AVX512-NEXT:    movq %rax, 40(%rsi)
    612 ; CHECK-AVX512-NEXT:    movl (%rdi), %eax
    613 ; CHECK-AVX512-NEXT:    movl %eax, (%rsi)
    614 ; CHECK-AVX512-NEXT:    movl 4(%rdi), %eax
    615 ; CHECK-AVX512-NEXT:    movl %eax, 4(%rsi)
    616 ; CHECK-AVX512-NEXT:    vmovups 8(%rdi), %xmm0
    617 ; CHECK-AVX512-NEXT:    vmovups %xmm0, 8(%rsi)
    618 ; CHECK-AVX512-NEXT:    movq 24(%rdi), %rax
    619 ; CHECK-AVX512-NEXT:    movq %rax, 24(%rsi)
    620 ; CHECK-AVX512-NEXT:    retq
    621 entry:
    622   %b = getelementptr inbounds %struct.S4, %struct.S4* %s1, i64 0, i32 1
    623   store i32 0, i32* %b, align 4
    624   %b3 = getelementptr inbounds %struct.S4, %struct.S4* %s1, i64 0, i32 9
    625   store i32 0, i32* %b3, align 4
    626   %0 = bitcast %struct.S4* %s2 to i8*
    627   %1 = bitcast %struct.S4* %s1 to i8*
    628   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 48, i32 4, i1 false)
    629   ret void
    630 }
    631 %struct.S5 = type { i16, i16, i16, i16, i16, i16, i16, i16 }
    632 
    633 ; Function Attrs: nounwind uwtable
    634 define void @test_type16(%struct.S5* nocapture noalias %s1, %struct.S5* nocapture %s2, i32 %x, %struct.S5* nocapture %s3, %struct.S5* nocapture readonly %s4) local_unnamed_addr #0 {
    635 ; CHECK-LABEL: test_type16:
    636 ; CHECK:       # %bb.0: # %entry
    637 ; CHECK-NEXT:    cmpl $18, %edx
    638 ; CHECK-NEXT:    jl .LBB7_2
    639 ; CHECK-NEXT:  # %bb.1: # %if.then
    640 ; CHECK-NEXT:    movw %dx, 2(%rdi)
    641 ; CHECK-NEXT:  .LBB7_2: # %if.end
    642 ; CHECK-NEXT:    movups (%r8), %xmm0
    643 ; CHECK-NEXT:    movups %xmm0, (%rcx)
    644 ; CHECK-NEXT:    movzwl (%rdi), %eax
    645 ; CHECK-NEXT:    movw %ax, (%rsi)
    646 ; CHECK-NEXT:    movzwl 2(%rdi), %eax
    647 ; CHECK-NEXT:    movw %ax, 2(%rsi)
    648 ; CHECK-NEXT:    movq 4(%rdi), %rax
    649 ; CHECK-NEXT:    movq %rax, 4(%rsi)
    650 ; CHECK-NEXT:    movl 12(%rdi), %eax
    651 ; CHECK-NEXT:    movl %eax, 12(%rsi)
    652 ; CHECK-NEXT:    retq
    653 ;
    654 ; DISABLED-LABEL: test_type16:
    655 ; DISABLED:       # %bb.0: # %entry
    656 ; DISABLED-NEXT:    cmpl $18, %edx
    657 ; DISABLED-NEXT:    jl .LBB7_2
    658 ; DISABLED-NEXT:  # %bb.1: # %if.then
    659 ; DISABLED-NEXT:    movw %dx, 2(%rdi)
    660 ; DISABLED-NEXT:  .LBB7_2: # %if.end
    661 ; DISABLED-NEXT:    movups (%r8), %xmm0
    662 ; DISABLED-NEXT:    movups %xmm0, (%rcx)
    663 ; DISABLED-NEXT:    movups (%rdi), %xmm0
    664 ; DISABLED-NEXT:    movups %xmm0, (%rsi)
    665 ; DISABLED-NEXT:    retq
    666 ;
    667 ; CHECK-AVX2-LABEL: test_type16:
    668 ; CHECK-AVX2:       # %bb.0: # %entry
    669 ; CHECK-AVX2-NEXT:    cmpl $18, %edx
    670 ; CHECK-AVX2-NEXT:    jl .LBB7_2
    671 ; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
    672 ; CHECK-AVX2-NEXT:    movw %dx, 2(%rdi)
    673 ; CHECK-AVX2-NEXT:  .LBB7_2: # %if.end
    674 ; CHECK-AVX2-NEXT:    vmovups (%r8), %xmm0
    675 ; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rcx)
    676 ; CHECK-AVX2-NEXT:    movzwl (%rdi), %eax
    677 ; CHECK-AVX2-NEXT:    movw %ax, (%rsi)
    678 ; CHECK-AVX2-NEXT:    movzwl 2(%rdi), %eax
    679 ; CHECK-AVX2-NEXT:    movw %ax, 2(%rsi)
    680 ; CHECK-AVX2-NEXT:    movq 4(%rdi), %rax
    681 ; CHECK-AVX2-NEXT:    movq %rax, 4(%rsi)
    682 ; CHECK-AVX2-NEXT:    movl 12(%rdi), %eax
    683 ; CHECK-AVX2-NEXT:    movl %eax, 12(%rsi)
    684 ; CHECK-AVX2-NEXT:    retq
    685 ;
    686 ; CHECK-AVX512-LABEL: test_type16:
    687 ; CHECK-AVX512:       # %bb.0: # %entry
    688 ; CHECK-AVX512-NEXT:    cmpl $18, %edx
    689 ; CHECK-AVX512-NEXT:    jl .LBB7_2
    690 ; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
    691 ; CHECK-AVX512-NEXT:    movw %dx, 2(%rdi)
    692 ; CHECK-AVX512-NEXT:  .LBB7_2: # %if.end
    693 ; CHECK-AVX512-NEXT:    vmovups (%r8), %xmm0
    694 ; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rcx)
    695 ; CHECK-AVX512-NEXT:    movzwl (%rdi), %eax
    696 ; CHECK-AVX512-NEXT:    movw %ax, (%rsi)
    697 ; CHECK-AVX512-NEXT:    movzwl 2(%rdi), %eax
    698 ; CHECK-AVX512-NEXT:    movw %ax, 2(%rsi)
    699 ; CHECK-AVX512-NEXT:    movq 4(%rdi), %rax
    700 ; CHECK-AVX512-NEXT:    movq %rax, 4(%rsi)
    701 ; CHECK-AVX512-NEXT:    movl 12(%rdi), %eax
    702 ; CHECK-AVX512-NEXT:    movl %eax, 12(%rsi)
    703 ; CHECK-AVX512-NEXT:    retq
    704 entry:
    705   %cmp = icmp sgt i32 %x, 17
    706   br i1 %cmp, label %if.then, label %if.end
    707 
    708 if.then:                                          ; preds = %entry
    709   %conv = trunc i32 %x to i16
    710   %b = getelementptr inbounds %struct.S5, %struct.S5* %s1, i64 0, i32 1
    711   store i16 %conv, i16* %b, align 2
    712   br label %if.end
    713 
    714 if.end:                                           ; preds = %if.then, %entry
    715   %0 = bitcast %struct.S5* %s3 to i8*
    716   %1 = bitcast %struct.S5* %s4 to i8*
    717   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 2, i1 false)
    718   %2 = bitcast %struct.S5* %s2 to i8*
    719   %3 = bitcast %struct.S5* %s1 to i8*
    720   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 2, i1 false)
    721   ret void
    722 }
    723 
    724 %struct.S6 = type { [4 x i32], i32, i32, i32, i32 }
    725 
    726 ; Function Attrs: nounwind uwtable
    727 define void @test_stack(%struct.S6* noalias nocapture sret %agg.result, %struct.S6* byval nocapture readnone align 8 %s1, %struct.S6* byval nocapture align 8 %s2, i32 %x) local_unnamed_addr #0 {
    728 ; CHECK-LABEL: test_stack:
    729 ; CHECK:       # %bb.0: # %entry
    730 ; CHECK-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
    731 ; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
    732 ; CHECK-NEXT:    movups %xmm0, (%rdi)
    733 ; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rax
    734 ; CHECK-NEXT:    movq %rax, 16(%rdi)
    735 ; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    736 ; CHECK-NEXT:    movl %eax, 24(%rdi)
    737 ; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    738 ; CHECK-NEXT:    movl %eax, 28(%rdi)
    739 ; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
    740 ; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rax
    741 ; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
    742 ; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %edx
    743 ; CHECK-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
    744 ; CHECK-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
    745 ; CHECK-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
    746 ; CHECK-NEXT:    movl %edx, {{[0-9]+}}(%rsp)
    747 ; CHECK-NEXT:    movq %rdi, %rax
    748 ; CHECK-NEXT:    retq
    749 ;
    750 ; DISABLED-LABEL: test_stack:
    751 ; DISABLED:       # %bb.0: # %entry
    752 ; DISABLED-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
    753 ; DISABLED-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
    754 ; DISABLED-NEXT:    movups %xmm0, (%rdi)
    755 ; DISABLED-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
    756 ; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
    757 ; DISABLED-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
    758 ; DISABLED-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
    759 ; DISABLED-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
    760 ; DISABLED-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
    761 ; DISABLED-NEXT:    movq %rdi, %rax
    762 ; DISABLED-NEXT:    retq
    763 ;
    764 ; CHECK-AVX2-LABEL: test_stack:
    765 ; CHECK-AVX2:       # %bb.0: # %entry
    766 ; CHECK-AVX2-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
    767 ; CHECK-AVX2-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
    768 ; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rdi)
    769 ; CHECK-AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
    770 ; CHECK-AVX2-NEXT:    movq %rax, 16(%rdi)
    771 ; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    772 ; CHECK-AVX2-NEXT:    movl %eax, 24(%rdi)
    773 ; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    774 ; CHECK-AVX2-NEXT:    movl %eax, 28(%rdi)
    775 ; CHECK-AVX2-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
    776 ; CHECK-AVX2-NEXT:    vmovups %xmm0, {{[0-9]+}}(%rsp)
    777 ; CHECK-AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
    778 ; CHECK-AVX2-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
    779 ; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    780 ; CHECK-AVX2-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
    781 ; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    782 ; CHECK-AVX2-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
    783 ; CHECK-AVX2-NEXT:    movq %rdi, %rax
    784 ; CHECK-AVX2-NEXT:    retq
    785 ;
    786 ; CHECK-AVX512-LABEL: test_stack:
    787 ; CHECK-AVX512:       # %bb.0: # %entry
    788 ; CHECK-AVX512-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
    789 ; CHECK-AVX512-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
    790 ; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rdi)
    791 ; CHECK-AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
    792 ; CHECK-AVX512-NEXT:    movq %rax, 16(%rdi)
    793 ; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    794 ; CHECK-AVX512-NEXT:    movl %eax, 24(%rdi)
    795 ; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    796 ; CHECK-AVX512-NEXT:    movl %eax, 28(%rdi)
    797 ; CHECK-AVX512-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
    798 ; CHECK-AVX512-NEXT:    vmovups %xmm0, {{[0-9]+}}(%rsp)
    799 ; CHECK-AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
    800 ; CHECK-AVX512-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
    801 ; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    802 ; CHECK-AVX512-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
    803 ; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    804 ; CHECK-AVX512-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
    805 ; CHECK-AVX512-NEXT:    movq %rdi, %rax
    806 ; CHECK-AVX512-NEXT:    retq
    807 entry:
    808   %s6.sroa.0.0..sroa_cast1 = bitcast %struct.S6* %s2 to i8*
    809   %s6.sroa.3.0..sroa_idx4 = getelementptr inbounds %struct.S6, %struct.S6* %s2, i64 0, i32 3
    810   store i32 %x, i32* %s6.sroa.3.0..sroa_idx4, align 8
    811   %0 = bitcast %struct.S6* %agg.result to i8*
    812   %s6.sroa.0.0..sroa_cast2 = bitcast %struct.S6* %s1 to i8*
    813   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* nonnull %s6.sroa.0.0..sroa_cast1, i64 32, i32 4, i1 false)
    814   call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %s6.sroa.0.0..sroa_cast2, i8* nonnull %s6.sroa.0.0..sroa_cast1, i64 32, i32 4, i1 false)
    815 
    816   ret void
    817 }
    818 
    819 ; Function Attrs: nounwind uwtable
    820 define void @test_limit_all(%struct.S* noalias  %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
    821 ; CHECK-LABEL: test_limit_all:
    822 ; CHECK:       # %bb.0: # %entry
    823 ; CHECK-NEXT:    pushq %rbp
    824 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
    825 ; CHECK-NEXT:    pushq %r15
    826 ; CHECK-NEXT:    .cfi_def_cfa_offset 24
    827 ; CHECK-NEXT:    pushq %r14
    828 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
    829 ; CHECK-NEXT:    pushq %r12
    830 ; CHECK-NEXT:    .cfi_def_cfa_offset 40
    831 ; CHECK-NEXT:    pushq %rbx
    832 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
    833 ; CHECK-NEXT:    .cfi_offset %rbx, -48
    834 ; CHECK-NEXT:    .cfi_offset %r12, -40
    835 ; CHECK-NEXT:    .cfi_offset %r14, -32
    836 ; CHECK-NEXT:    .cfi_offset %r15, -24
    837 ; CHECK-NEXT:    .cfi_offset %rbp, -16
    838 ; CHECK-NEXT:    movq %r8, %r15
    839 ; CHECK-NEXT:    movq %rcx, %r14
    840 ; CHECK-NEXT:    movl %edx, %ebp
    841 ; CHECK-NEXT:    movq %rsi, %r12
    842 ; CHECK-NEXT:    movq %rdi, %rbx
    843 ; CHECK-NEXT:    movl %r9d, 12(%rdi)
    844 ; CHECK-NEXT:    callq bar
    845 ; CHECK-NEXT:    cmpl $18, %ebp
    846 ; CHECK-NEXT:    jl .LBB9_2
    847 ; CHECK-NEXT:  # %bb.1: # %if.then
    848 ; CHECK-NEXT:    movl %ebp, 4(%rbx)
    849 ; CHECK-NEXT:    movq %rbx, %rdi
    850 ; CHECK-NEXT:    callq bar
    851 ; CHECK-NEXT:  .LBB9_2: # %if.end
    852 ; CHECK-NEXT:    movups (%r15), %xmm0
    853 ; CHECK-NEXT:    movups %xmm0, (%r14)
    854 ; CHECK-NEXT:    movups (%rbx), %xmm0
    855 ; CHECK-NEXT:    movups %xmm0, (%r12)
    856 ; CHECK-NEXT:    popq %rbx
    857 ; CHECK-NEXT:    .cfi_def_cfa_offset 40
    858 ; CHECK-NEXT:    popq %r12
    859 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
    860 ; CHECK-NEXT:    popq %r14
    861 ; CHECK-NEXT:    .cfi_def_cfa_offset 24
    862 ; CHECK-NEXT:    popq %r15
    863 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
    864 ; CHECK-NEXT:    popq %rbp
    865 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
    866 ; CHECK-NEXT:    retq
    867 ;
    868 ; DISABLED-LABEL: test_limit_all:
    869 ; DISABLED:       # %bb.0: # %entry
    870 ; DISABLED-NEXT:    pushq %rbp
    871 ; DISABLED-NEXT:    .cfi_def_cfa_offset 16
    872 ; DISABLED-NEXT:    pushq %r15
    873 ; DISABLED-NEXT:    .cfi_def_cfa_offset 24
    874 ; DISABLED-NEXT:    pushq %r14
    875 ; DISABLED-NEXT:    .cfi_def_cfa_offset 32
    876 ; DISABLED-NEXT:    pushq %r12
    877 ; DISABLED-NEXT:    .cfi_def_cfa_offset 40
    878 ; DISABLED-NEXT:    pushq %rbx
    879 ; DISABLED-NEXT:    .cfi_def_cfa_offset 48
    880 ; DISABLED-NEXT:    .cfi_offset %rbx, -48
    881 ; DISABLED-NEXT:    .cfi_offset %r12, -40
    882 ; DISABLED-NEXT:    .cfi_offset %r14, -32
    883 ; DISABLED-NEXT:    .cfi_offset %r15, -24
    884 ; DISABLED-NEXT:    .cfi_offset %rbp, -16
    885 ; DISABLED-NEXT:    movq %r8, %r15
    886 ; DISABLED-NEXT:    movq %rcx, %r14
    887 ; DISABLED-NEXT:    movl %edx, %ebp
    888 ; DISABLED-NEXT:    movq %rsi, %r12
    889 ; DISABLED-NEXT:    movq %rdi, %rbx
    890 ; DISABLED-NEXT:    movl %r9d, 12(%rdi)
    891 ; DISABLED-NEXT:    callq bar
    892 ; DISABLED-NEXT:    cmpl $18, %ebp
    893 ; DISABLED-NEXT:    jl .LBB9_2
    894 ; DISABLED-NEXT:  # %bb.1: # %if.then
    895 ; DISABLED-NEXT:    movl %ebp, 4(%rbx)
    896 ; DISABLED-NEXT:    movq %rbx, %rdi
    897 ; DISABLED-NEXT:    callq bar
    898 ; DISABLED-NEXT:  .LBB9_2: # %if.end
    899 ; DISABLED-NEXT:    movups (%r15), %xmm0
    900 ; DISABLED-NEXT:    movups %xmm0, (%r14)
    901 ; DISABLED-NEXT:    movups (%rbx), %xmm0
    902 ; DISABLED-NEXT:    movups %xmm0, (%r12)
    903 ; DISABLED-NEXT:    popq %rbx
    904 ; DISABLED-NEXT:    .cfi_def_cfa_offset 40
    905 ; DISABLED-NEXT:    popq %r12
    906 ; DISABLED-NEXT:    .cfi_def_cfa_offset 32
    907 ; DISABLED-NEXT:    popq %r14
    908 ; DISABLED-NEXT:    .cfi_def_cfa_offset 24
    909 ; DISABLED-NEXT:    popq %r15
    910 ; DISABLED-NEXT:    .cfi_def_cfa_offset 16
    911 ; DISABLED-NEXT:    popq %rbp
    912 ; DISABLED-NEXT:    .cfi_def_cfa_offset 8
    913 ; DISABLED-NEXT:    retq
    914 ;
    915 ; CHECK-AVX2-LABEL: test_limit_all:
    916 ; CHECK-AVX2:       # %bb.0: # %entry
    917 ; CHECK-AVX2-NEXT:    pushq %rbp
    918 ; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 16
    919 ; CHECK-AVX2-NEXT:    pushq %r15
    920 ; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 24
    921 ; CHECK-AVX2-NEXT:    pushq %r14
    922 ; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 32
    923 ; CHECK-AVX2-NEXT:    pushq %r12
    924 ; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 40
    925 ; CHECK-AVX2-NEXT:    pushq %rbx
    926 ; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 48
    927 ; CHECK-AVX2-NEXT:    .cfi_offset %rbx, -48
    928 ; CHECK-AVX2-NEXT:    .cfi_offset %r12, -40
    929 ; CHECK-AVX2-NEXT:    .cfi_offset %r14, -32
    930 ; CHECK-AVX2-NEXT:    .cfi_offset %r15, -24
    931 ; CHECK-AVX2-NEXT:    .cfi_offset %rbp, -16
    932 ; CHECK-AVX2-NEXT:    movq %r8, %r15
    933 ; CHECK-AVX2-NEXT:    movq %rcx, %r14
    934 ; CHECK-AVX2-NEXT:    movl %edx, %ebp
    935 ; CHECK-AVX2-NEXT:    movq %rsi, %r12
    936 ; CHECK-AVX2-NEXT:    movq %rdi, %rbx
    937 ; CHECK-AVX2-NEXT:    movl %r9d, 12(%rdi)
    938 ; CHECK-AVX2-NEXT:    callq bar
    939 ; CHECK-AVX2-NEXT:    cmpl $18, %ebp
    940 ; CHECK-AVX2-NEXT:    jl .LBB9_2
    941 ; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
    942 ; CHECK-AVX2-NEXT:    movl %ebp, 4(%rbx)
    943 ; CHECK-AVX2-NEXT:    movq %rbx, %rdi
    944 ; CHECK-AVX2-NEXT:    callq bar
    945 ; CHECK-AVX2-NEXT:  .LBB9_2: # %if.end
    946 ; CHECK-AVX2-NEXT:    vmovups (%r15), %xmm0
    947 ; CHECK-AVX2-NEXT:    vmovups %xmm0, (%r14)
    948 ; CHECK-AVX2-NEXT:    vmovups (%rbx), %xmm0
    949 ; CHECK-AVX2-NEXT:    vmovups %xmm0, (%r12)
    950 ; CHECK-AVX2-NEXT:    popq %rbx
    951 ; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 40
    952 ; CHECK-AVX2-NEXT:    popq %r12
    953 ; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 32
    954 ; CHECK-AVX2-NEXT:    popq %r14
    955 ; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 24
    956 ; CHECK-AVX2-NEXT:    popq %r15
    957 ; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 16
    958 ; CHECK-AVX2-NEXT:    popq %rbp
    959 ; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 8
    960 ; CHECK-AVX2-NEXT:    retq
    961 ;
    962 ; CHECK-AVX512-LABEL: test_limit_all:
    963 ; CHECK-AVX512:       # %bb.0: # %entry
    964 ; CHECK-AVX512-NEXT:    pushq %rbp
    965 ; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 16
    966 ; CHECK-AVX512-NEXT:    pushq %r15
    967 ; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 24
    968 ; CHECK-AVX512-NEXT:    pushq %r14
    969 ; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 32
    970 ; CHECK-AVX512-NEXT:    pushq %r12
    971 ; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 40
    972 ; CHECK-AVX512-NEXT:    pushq %rbx
    973 ; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 48
    974 ; CHECK-AVX512-NEXT:    .cfi_offset %rbx, -48
    975 ; CHECK-AVX512-NEXT:    .cfi_offset %r12, -40
    976 ; CHECK-AVX512-NEXT:    .cfi_offset %r14, -32
    977 ; CHECK-AVX512-NEXT:    .cfi_offset %r15, -24
    978 ; CHECK-AVX512-NEXT:    .cfi_offset %rbp, -16
    979 ; CHECK-AVX512-NEXT:    movq %r8, %r15
    980 ; CHECK-AVX512-NEXT:    movq %rcx, %r14
    981 ; CHECK-AVX512-NEXT:    movl %edx, %ebp
    982 ; CHECK-AVX512-NEXT:    movq %rsi, %r12
    983 ; CHECK-AVX512-NEXT:    movq %rdi, %rbx
    984 ; CHECK-AVX512-NEXT:    movl %r9d, 12(%rdi)
    985 ; CHECK-AVX512-NEXT:    callq bar
    986 ; CHECK-AVX512-NEXT:    cmpl $18, %ebp
    987 ; CHECK-AVX512-NEXT:    jl .LBB9_2
    988 ; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
    989 ; CHECK-AVX512-NEXT:    movl %ebp, 4(%rbx)
    990 ; CHECK-AVX512-NEXT:    movq %rbx, %rdi
    991 ; CHECK-AVX512-NEXT:    callq bar
    992 ; CHECK-AVX512-NEXT:  .LBB9_2: # %if.end
    993 ; CHECK-AVX512-NEXT:    vmovups (%r15), %xmm0
    994 ; CHECK-AVX512-NEXT:    vmovups %xmm0, (%r14)
    995 ; CHECK-AVX512-NEXT:    vmovups (%rbx), %xmm0
    996 ; CHECK-AVX512-NEXT:    vmovups %xmm0, (%r12)
    997 ; CHECK-AVX512-NEXT:    popq %rbx
    998 ; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 40
    999 ; CHECK-AVX512-NEXT:    popq %r12
   1000 ; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 32
   1001 ; CHECK-AVX512-NEXT:    popq %r14
   1002 ; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 24
   1003 ; CHECK-AVX512-NEXT:    popq %r15
   1004 ; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 16
   1005 ; CHECK-AVX512-NEXT:    popq %rbp
   1006 ; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 8
   1007 ; CHECK-AVX512-NEXT:    retq
   1008 entry:
   1009   %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
   1010   store i32 %x2, i32* %d, align 4
   1011   tail call void @bar(%struct.S* %s1) #3
   1012   %cmp = icmp sgt i32 %x, 17
   1013   br i1 %cmp, label %if.then, label %if.end
   1014 
   1015 if.then:                                          ; preds = %entry
   1016   %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
   1017   store i32 %x, i32* %b, align 4
   1018   tail call void @bar(%struct.S* nonnull %s1) #3
   1019   br label %if.end
   1020 
   1021 if.end:                                           ; preds = %if.then, %entry
   1022   %0 = bitcast %struct.S* %s3 to i8*
   1023   %1 = bitcast %struct.S* %s4 to i8*
   1024   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
   1025   %2 = bitcast %struct.S* %s2 to i8*
   1026   %3 = bitcast %struct.S* %s1 to i8*
   1027   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
   1028   ret void
   1029 }
   1030 
   1031 ; Function Attrs: nounwind uwtable
   1032 define void @test_limit_one_pred(%struct.S* noalias %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
   1033 ; CHECK-LABEL: test_limit_one_pred:
   1034 ; CHECK:       # %bb.0: # %entry
   1035 ; CHECK-NEXT:    pushq %r15
   1036 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
   1037 ; CHECK-NEXT:    pushq %r14
   1038 ; CHECK-NEXT:    .cfi_def_cfa_offset 24
   1039 ; CHECK-NEXT:    pushq %r12
   1040 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
   1041 ; CHECK-NEXT:    pushq %rbx
   1042 ; CHECK-NEXT:    .cfi_def_cfa_offset 40
   1043 ; CHECK-NEXT:    pushq %rax
   1044 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
   1045 ; CHECK-NEXT:    .cfi_offset %rbx, -40
   1046 ; CHECK-NEXT:    .cfi_offset %r12, -32
   1047 ; CHECK-NEXT:    .cfi_offset %r14, -24
   1048 ; CHECK-NEXT:    .cfi_offset %r15, -16
   1049 ; CHECK-NEXT:    movq %r8, %r12
   1050 ; CHECK-NEXT:    movq %rcx, %r15
   1051 ; CHECK-NEXT:    movq %rsi, %r14
   1052 ; CHECK-NEXT:    movq %rdi, %rbx
   1053 ; CHECK-NEXT:    movl %r9d, 12(%rdi)
   1054 ; CHECK-NEXT:    cmpl $18, %edx
   1055 ; CHECK-NEXT:    jl .LBB10_2
   1056 ; CHECK-NEXT:  # %bb.1: # %if.then
   1057 ; CHECK-NEXT:    movl %edx, 4(%rbx)
   1058 ; CHECK-NEXT:    movq %rbx, %rdi
   1059 ; CHECK-NEXT:    callq bar
   1060 ; CHECK-NEXT:  .LBB10_2: # %if.end
   1061 ; CHECK-NEXT:    movups (%r12), %xmm0
   1062 ; CHECK-NEXT:    movups %xmm0, (%r15)
   1063 ; CHECK-NEXT:    movq (%rbx), %rax
   1064 ; CHECK-NEXT:    movq %rax, (%r14)
   1065 ; CHECK-NEXT:    movl 8(%rbx), %eax
   1066 ; CHECK-NEXT:    movl %eax, 8(%r14)
   1067 ; CHECK-NEXT:    movl 12(%rbx), %eax
   1068 ; CHECK-NEXT:    movl %eax, 12(%r14)
   1069 ; CHECK-NEXT:    addq $8, %rsp
   1070 ; CHECK-NEXT:    .cfi_def_cfa_offset 40
   1071 ; CHECK-NEXT:    popq %rbx
   1072 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
   1073 ; CHECK-NEXT:    popq %r12
   1074 ; CHECK-NEXT:    .cfi_def_cfa_offset 24
   1075 ; CHECK-NEXT:    popq %r14
   1076 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
   1077 ; CHECK-NEXT:    popq %r15
   1078 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
   1079 ; CHECK-NEXT:    retq
   1080 ;
   1081 ; DISABLED-LABEL: test_limit_one_pred:
   1082 ; DISABLED:       # %bb.0: # %entry
   1083 ; DISABLED-NEXT:    pushq %r15
   1084 ; DISABLED-NEXT:    .cfi_def_cfa_offset 16
   1085 ; DISABLED-NEXT:    pushq %r14
   1086 ; DISABLED-NEXT:    .cfi_def_cfa_offset 24
   1087 ; DISABLED-NEXT:    pushq %r12
   1088 ; DISABLED-NEXT:    .cfi_def_cfa_offset 32
   1089 ; DISABLED-NEXT:    pushq %rbx
   1090 ; DISABLED-NEXT:    .cfi_def_cfa_offset 40
   1091 ; DISABLED-NEXT:    pushq %rax
   1092 ; DISABLED-NEXT:    .cfi_def_cfa_offset 48
   1093 ; DISABLED-NEXT:    .cfi_offset %rbx, -40
   1094 ; DISABLED-NEXT:    .cfi_offset %r12, -32
   1095 ; DISABLED-NEXT:    .cfi_offset %r14, -24
   1096 ; DISABLED-NEXT:    .cfi_offset %r15, -16
   1097 ; DISABLED-NEXT:    movq %r8, %r15
   1098 ; DISABLED-NEXT:    movq %rcx, %r14
   1099 ; DISABLED-NEXT:    movq %rsi, %r12
   1100 ; DISABLED-NEXT:    movq %rdi, %rbx
   1101 ; DISABLED-NEXT:    movl %r9d, 12(%rdi)
   1102 ; DISABLED-NEXT:    cmpl $18, %edx
   1103 ; DISABLED-NEXT:    jl .LBB10_2
   1104 ; DISABLED-NEXT:  # %bb.1: # %if.then
   1105 ; DISABLED-NEXT:    movl %edx, 4(%rbx)
   1106 ; DISABLED-NEXT:    movq %rbx, %rdi
   1107 ; DISABLED-NEXT:    callq bar
   1108 ; DISABLED-NEXT:  .LBB10_2: # %if.end
   1109 ; DISABLED-NEXT:    movups (%r15), %xmm0
   1110 ; DISABLED-NEXT:    movups %xmm0, (%r14)
   1111 ; DISABLED-NEXT:    movups (%rbx), %xmm0
   1112 ; DISABLED-NEXT:    movups %xmm0, (%r12)
   1113 ; DISABLED-NEXT:    addq $8, %rsp
   1114 ; DISABLED-NEXT:    .cfi_def_cfa_offset 40
   1115 ; DISABLED-NEXT:    popq %rbx
   1116 ; DISABLED-NEXT:    .cfi_def_cfa_offset 32
   1117 ; DISABLED-NEXT:    popq %r12
   1118 ; DISABLED-NEXT:    .cfi_def_cfa_offset 24
   1119 ; DISABLED-NEXT:    popq %r14
   1120 ; DISABLED-NEXT:    .cfi_def_cfa_offset 16
   1121 ; DISABLED-NEXT:    popq %r15
   1122 ; DISABLED-NEXT:    .cfi_def_cfa_offset 8
   1123 ; DISABLED-NEXT:    retq
   1124 ;
   1125 ; CHECK-AVX2-LABEL: test_limit_one_pred:
   1126 ; CHECK-AVX2:       # %bb.0: # %entry
   1127 ; CHECK-AVX2-NEXT:    pushq %r15
   1128 ; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 16
   1129 ; CHECK-AVX2-NEXT:    pushq %r14
   1130 ; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 24
   1131 ; CHECK-AVX2-NEXT:    pushq %r12
   1132 ; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 32
   1133 ; CHECK-AVX2-NEXT:    pushq %rbx
   1134 ; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 40
   1135 ; CHECK-AVX2-NEXT:    pushq %rax
   1136 ; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 48
   1137 ; CHECK-AVX2-NEXT:    .cfi_offset %rbx, -40
   1138 ; CHECK-AVX2-NEXT:    .cfi_offset %r12, -32
   1139 ; CHECK-AVX2-NEXT:    .cfi_offset %r14, -24
   1140 ; CHECK-AVX2-NEXT:    .cfi_offset %r15, -16
   1141 ; CHECK-AVX2-NEXT:    movq %r8, %r12
   1142 ; CHECK-AVX2-NEXT:    movq %rcx, %r15
   1143 ; CHECK-AVX2-NEXT:    movq %rsi, %r14
   1144 ; CHECK-AVX2-NEXT:    movq %rdi, %rbx
   1145 ; CHECK-AVX2-NEXT:    movl %r9d, 12(%rdi)
   1146 ; CHECK-AVX2-NEXT:    cmpl $18, %edx
   1147 ; CHECK-AVX2-NEXT:    jl .LBB10_2
   1148 ; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
   1149 ; CHECK-AVX2-NEXT:    movl %edx, 4(%rbx)
   1150 ; CHECK-AVX2-NEXT:    movq %rbx, %rdi
   1151 ; CHECK-AVX2-NEXT:    callq bar
   1152 ; CHECK-AVX2-NEXT:  .LBB10_2: # %if.end
   1153 ; CHECK-AVX2-NEXT:    vmovups (%r12), %xmm0
   1154 ; CHECK-AVX2-NEXT:    vmovups %xmm0, (%r15)
   1155 ; CHECK-AVX2-NEXT:    movq (%rbx), %rax
   1156 ; CHECK-AVX2-NEXT:    movq %rax, (%r14)
   1157 ; CHECK-AVX2-NEXT:    movl 8(%rbx), %eax
   1158 ; CHECK-AVX2-NEXT:    movl %eax, 8(%r14)
   1159 ; CHECK-AVX2-NEXT:    movl 12(%rbx), %eax
   1160 ; CHECK-AVX2-NEXT:    movl %eax, 12(%r14)
   1161 ; CHECK-AVX2-NEXT:    addq $8, %rsp
   1162 ; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 40
   1163 ; CHECK-AVX2-NEXT:    popq %rbx
   1164 ; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 32
   1165 ; CHECK-AVX2-NEXT:    popq %r12
   1166 ; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 24
   1167 ; CHECK-AVX2-NEXT:    popq %r14
   1168 ; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 16
   1169 ; CHECK-AVX2-NEXT:    popq %r15
   1170 ; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 8
   1171 ; CHECK-AVX2-NEXT:    retq
   1172 ;
   1173 ; CHECK-AVX512-LABEL: test_limit_one_pred:
   1174 ; CHECK-AVX512:       # %bb.0: # %entry
   1175 ; CHECK-AVX512-NEXT:    pushq %r15
   1176 ; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 16
   1177 ; CHECK-AVX512-NEXT:    pushq %r14
   1178 ; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 24
   1179 ; CHECK-AVX512-NEXT:    pushq %r12
   1180 ; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 32
   1181 ; CHECK-AVX512-NEXT:    pushq %rbx
   1182 ; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 40
   1183 ; CHECK-AVX512-NEXT:    pushq %rax
   1184 ; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 48
   1185 ; CHECK-AVX512-NEXT:    .cfi_offset %rbx, -40
   1186 ; CHECK-AVX512-NEXT:    .cfi_offset %r12, -32
   1187 ; CHECK-AVX512-NEXT:    .cfi_offset %r14, -24
   1188 ; CHECK-AVX512-NEXT:    .cfi_offset %r15, -16
   1189 ; CHECK-AVX512-NEXT:    movq %r8, %r12
   1190 ; CHECK-AVX512-NEXT:    movq %rcx, %r15
   1191 ; CHECK-AVX512-NEXT:    movq %rsi, %r14
   1192 ; CHECK-AVX512-NEXT:    movq %rdi, %rbx
   1193 ; CHECK-AVX512-NEXT:    movl %r9d, 12(%rdi)
   1194 ; CHECK-AVX512-NEXT:    cmpl $18, %edx
   1195 ; CHECK-AVX512-NEXT:    jl .LBB10_2
   1196 ; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
   1197 ; CHECK-AVX512-NEXT:    movl %edx, 4(%rbx)
   1198 ; CHECK-AVX512-NEXT:    movq %rbx, %rdi
   1199 ; CHECK-AVX512-NEXT:    callq bar
   1200 ; CHECK-AVX512-NEXT:  .LBB10_2: # %if.end
   1201 ; CHECK-AVX512-NEXT:    vmovups (%r12), %xmm0
   1202 ; CHECK-AVX512-NEXT:    vmovups %xmm0, (%r15)
   1203 ; CHECK-AVX512-NEXT:    movq (%rbx), %rax
   1204 ; CHECK-AVX512-NEXT:    movq %rax, (%r14)
   1205 ; CHECK-AVX512-NEXT:    movl 8(%rbx), %eax
   1206 ; CHECK-AVX512-NEXT:    movl %eax, 8(%r14)
   1207 ; CHECK-AVX512-NEXT:    movl 12(%rbx), %eax
   1208 ; CHECK-AVX512-NEXT:    movl %eax, 12(%r14)
   1209 ; CHECK-AVX512-NEXT:    addq $8, %rsp
   1210 ; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 40
   1211 ; CHECK-AVX512-NEXT:    popq %rbx
   1212 ; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 32
   1213 ; CHECK-AVX512-NEXT:    popq %r12
   1214 ; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 24
   1215 ; CHECK-AVX512-NEXT:    popq %r14
   1216 ; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 16
   1217 ; CHECK-AVX512-NEXT:    popq %r15
   1218 ; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 8
   1219 ; CHECK-AVX512-NEXT:    retq
   1220 entry:
   1221   %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
   1222   store i32 %x2, i32* %d, align 4
   1223   %cmp = icmp sgt i32 %x, 17
   1224   br i1 %cmp, label %if.then, label %if.end
   1225 
   1226 if.then:                                          ; preds = %entry
   1227   %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
   1228   store i32 %x, i32* %b, align 4
   1229   tail call void @bar(%struct.S* nonnull %s1) #3
   1230   br label %if.end
   1231 
   1232 if.end:                                           ; preds = %if.then, %entry
   1233   %0 = bitcast %struct.S* %s3 to i8*
   1234   %1 = bitcast %struct.S* %s4 to i8*
   1235   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
   1236   %2 = bitcast %struct.S* %s2 to i8*
   1237   %3 = bitcast %struct.S* %s1 to i8*
   1238   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
   1239   ret void
   1240 }
   1241 
   1242 
   1243 declare void @bar(%struct.S*) local_unnamed_addr #1
   1244 
   1245 
   1246 ; Function Attrs: argmemonly nounwind
   1247 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1
   1248 
   1249 attributes #0 = { nounwind uwtable "target-cpu"="x86-64" }
   1250 
   1251 %struct.S7 = type { float, float, float , float, float, float, float, float }
   1252 
   1253 ; Function Attrs: nounwind uwtable
   1254 define void @test_conditional_block_float(%struct.S7* nocapture noalias %s1, %struct.S7* nocapture %s2, i32 %x, %struct.S7* nocapture %s3, %struct.S7* nocapture readonly %s4, float %y) local_unnamed_addr #0 {
   1255 ; CHECK-LABEL: test_conditional_block_float:
   1256 ; CHECK:       # %bb.0: # %entry
   1257 ; CHECK-NEXT:    cmpl $18, %edx
   1258 ; CHECK-NEXT:    jl .LBB11_2
   1259 ; CHECK-NEXT:  # %bb.1: # %if.then
   1260 ; CHECK-NEXT:    movl $1065353216, 4(%rdi) # imm = 0x3F800000
   1261 ; CHECK-NEXT:  .LBB11_2: # %if.end
   1262 ; CHECK-NEXT:    movups (%r8), %xmm0
   1263 ; CHECK-NEXT:    movups 16(%r8), %xmm1
   1264 ; CHECK-NEXT:    movups %xmm1, 16(%rcx)
   1265 ; CHECK-NEXT:    movups %xmm0, (%rcx)
   1266 ; CHECK-NEXT:    movl (%rdi), %eax
   1267 ; CHECK-NEXT:    movl 4(%rdi), %ecx
   1268 ; CHECK-NEXT:    movq 8(%rdi), %rdx
   1269 ; CHECK-NEXT:    movups 16(%rdi), %xmm0
   1270 ; CHECK-NEXT:    movups %xmm0, 16(%rsi)
   1271 ; CHECK-NEXT:    movl %eax, (%rsi)
   1272 ; CHECK-NEXT:    movl %ecx, 4(%rsi)
   1273 ; CHECK-NEXT:    movq %rdx, 8(%rsi)
   1274 ; CHECK-NEXT:    retq
   1275 ;
   1276 ; DISABLED-LABEL: test_conditional_block_float:
   1277 ; DISABLED:       # %bb.0: # %entry
   1278 ; DISABLED-NEXT:    cmpl $18, %edx
   1279 ; DISABLED-NEXT:    jl .LBB11_2
   1280 ; DISABLED-NEXT:  # %bb.1: # %if.then
   1281 ; DISABLED-NEXT:    movl $1065353216, 4(%rdi) # imm = 0x3F800000
   1282 ; DISABLED-NEXT:  .LBB11_2: # %if.end
   1283 ; DISABLED-NEXT:    movups (%r8), %xmm0
   1284 ; DISABLED-NEXT:    movups 16(%r8), %xmm1
   1285 ; DISABLED-NEXT:    movups %xmm1, 16(%rcx)
   1286 ; DISABLED-NEXT:    movups %xmm0, (%rcx)
   1287 ; DISABLED-NEXT:    movups (%rdi), %xmm0
   1288 ; DISABLED-NEXT:    movups 16(%rdi), %xmm1
   1289 ; DISABLED-NEXT:    movups %xmm1, 16(%rsi)
   1290 ; DISABLED-NEXT:    movups %xmm0, (%rsi)
   1291 ; DISABLED-NEXT:    retq
   1292 ;
   1293 ; CHECK-AVX2-LABEL: test_conditional_block_float:
   1294 ; CHECK-AVX2:       # %bb.0: # %entry
   1295 ; CHECK-AVX2-NEXT:    cmpl $18, %edx
   1296 ; CHECK-AVX2-NEXT:    jl .LBB11_2
   1297 ; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
   1298 ; CHECK-AVX2-NEXT:    movl $1065353216, 4(%rdi) # imm = 0x3F800000
   1299 ; CHECK-AVX2-NEXT:  .LBB11_2: # %if.end
   1300 ; CHECK-AVX2-NEXT:    vmovups (%r8), %ymm0
   1301 ; CHECK-AVX2-NEXT:    vmovups %ymm0, (%rcx)
   1302 ; CHECK-AVX2-NEXT:    movl (%rdi), %eax
   1303 ; CHECK-AVX2-NEXT:    movl %eax, (%rsi)
   1304 ; CHECK-AVX2-NEXT:    movl 4(%rdi), %eax
   1305 ; CHECK-AVX2-NEXT:    movl %eax, 4(%rsi)
   1306 ; CHECK-AVX2-NEXT:    vmovups 8(%rdi), %xmm0
   1307 ; CHECK-AVX2-NEXT:    vmovups %xmm0, 8(%rsi)
   1308 ; CHECK-AVX2-NEXT:    movq 24(%rdi), %rax
   1309 ; CHECK-AVX2-NEXT:    movq %rax, 24(%rsi)
   1310 ; CHECK-AVX2-NEXT:    vzeroupper
   1311 ; CHECK-AVX2-NEXT:    retq
   1312 ;
   1313 ; CHECK-AVX512-LABEL: test_conditional_block_float:
   1314 ; CHECK-AVX512:       # %bb.0: # %entry
   1315 ; CHECK-AVX512-NEXT:    cmpl $18, %edx
   1316 ; CHECK-AVX512-NEXT:    jl .LBB11_2
   1317 ; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
   1318 ; CHECK-AVX512-NEXT:    movl $1065353216, 4(%rdi) # imm = 0x3F800000
   1319 ; CHECK-AVX512-NEXT:  .LBB11_2: # %if.end
   1320 ; CHECK-AVX512-NEXT:    vmovups (%r8), %ymm0
   1321 ; CHECK-AVX512-NEXT:    vmovups %ymm0, (%rcx)
   1322 ; CHECK-AVX512-NEXT:    movl (%rdi), %eax
   1323 ; CHECK-AVX512-NEXT:    movl %eax, (%rsi)
   1324 ; CHECK-AVX512-NEXT:    movl 4(%rdi), %eax
   1325 ; CHECK-AVX512-NEXT:    movl %eax, 4(%rsi)
   1326 ; CHECK-AVX512-NEXT:    vmovups 8(%rdi), %xmm0
   1327 ; CHECK-AVX512-NEXT:    vmovups %xmm0, 8(%rsi)
   1328 ; CHECK-AVX512-NEXT:    movq 24(%rdi), %rax
   1329 ; CHECK-AVX512-NEXT:    movq %rax, 24(%rsi)
   1330 ; CHECK-AVX512-NEXT:    vzeroupper
   1331 ; CHECK-AVX512-NEXT:    retq
   1332 entry:
   1333   %cmp = icmp sgt i32 %x, 17
   1334   br i1 %cmp, label %if.then, label %if.end
   1335 
   1336 if.then:                                          ; preds = %entry
   1337   %b = getelementptr inbounds %struct.S7, %struct.S7* %s1, i64 0, i32 1
   1338   store float 1.0, float* %b, align 4
   1339   br label %if.end
   1340 
   1341 if.end:                                           ; preds = %if.then, %entry
   1342   %0 = bitcast %struct.S7* %s3 to i8*
   1343   %1 = bitcast %struct.S7* %s4 to i8*
   1344   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 32, i32 4, i1 false)
   1345   %2 = bitcast %struct.S7* %s2 to i8*
   1346   %3 = bitcast %struct.S7* %s1 to i8*
   1347   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 32, i32 4, i1 false)
   1348   ret void
   1349 }
   1350 
   1351 %struct.S8 = type { i64, i64, i64, i64, i64, i64 }
   1352 
   1353 ; Function Attrs: nounwind uwtable
   1354 define void @test_conditional_block_ymm(%struct.S8* nocapture noalias %s1, %struct.S8* nocapture %s2, i32 %x, %struct.S8* nocapture %s3, %struct.S8* nocapture readonly %s4) local_unnamed_addr #0 {
   1355 ; CHECK-LABEL: test_conditional_block_ymm:
   1356 ; CHECK:       # %bb.0: # %entry
   1357 ; CHECK-NEXT:    cmpl $18, %edx
   1358 ; CHECK-NEXT:    jl .LBB12_2
   1359 ; CHECK-NEXT:  # %bb.1: # %if.then
   1360 ; CHECK-NEXT:    movq $1, 8(%rdi)
   1361 ; CHECK-NEXT:  .LBB12_2: # %if.end
   1362 ; CHECK-NEXT:    movups (%r8), %xmm0
   1363 ; CHECK-NEXT:    movups 16(%r8), %xmm1
   1364 ; CHECK-NEXT:    movups %xmm1, 16(%rcx)
   1365 ; CHECK-NEXT:    movups %xmm0, (%rcx)
   1366 ; CHECK-NEXT:    movq (%rdi), %rax
   1367 ; CHECK-NEXT:    movq 8(%rdi), %rcx
   1368 ; CHECK-NEXT:    movups 16(%rdi), %xmm0
   1369 ; CHECK-NEXT:    movups %xmm0, 16(%rsi)
   1370 ; CHECK-NEXT:    movq %rax, (%rsi)
   1371 ; CHECK-NEXT:    movq %rcx, 8(%rsi)
   1372 ; CHECK-NEXT:    retq
   1373 ;
   1374 ; DISABLED-LABEL: test_conditional_block_ymm:
   1375 ; DISABLED:       # %bb.0: # %entry
   1376 ; DISABLED-NEXT:    cmpl $18, %edx
   1377 ; DISABLED-NEXT:    jl .LBB12_2
   1378 ; DISABLED-NEXT:  # %bb.1: # %if.then
   1379 ; DISABLED-NEXT:    movq $1, 8(%rdi)
   1380 ; DISABLED-NEXT:  .LBB12_2: # %if.end
   1381 ; DISABLED-NEXT:    movups (%r8), %xmm0
   1382 ; DISABLED-NEXT:    movups 16(%r8), %xmm1
   1383 ; DISABLED-NEXT:    movups %xmm1, 16(%rcx)
   1384 ; DISABLED-NEXT:    movups %xmm0, (%rcx)
   1385 ; DISABLED-NEXT:    movups (%rdi), %xmm0
   1386 ; DISABLED-NEXT:    movups 16(%rdi), %xmm1
   1387 ; DISABLED-NEXT:    movups %xmm1, 16(%rsi)
   1388 ; DISABLED-NEXT:    movups %xmm0, (%rsi)
   1389 ; DISABLED-NEXT:    retq
   1390 ;
   1391 ; CHECK-AVX2-LABEL: test_conditional_block_ymm:
   1392 ; CHECK-AVX2:       # %bb.0: # %entry
   1393 ; CHECK-AVX2-NEXT:    cmpl $18, %edx
   1394 ; CHECK-AVX2-NEXT:    jl .LBB12_2
   1395 ; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
   1396 ; CHECK-AVX2-NEXT:    movq $1, 8(%rdi)
   1397 ; CHECK-AVX2-NEXT:  .LBB12_2: # %if.end
   1398 ; CHECK-AVX2-NEXT:    vmovups (%r8), %ymm0
   1399 ; CHECK-AVX2-NEXT:    vmovups %ymm0, (%rcx)
   1400 ; CHECK-AVX2-NEXT:    movq (%rdi), %rax
   1401 ; CHECK-AVX2-NEXT:    movq %rax, (%rsi)
   1402 ; CHECK-AVX2-NEXT:    movq 8(%rdi), %rax
   1403 ; CHECK-AVX2-NEXT:    movq %rax, 8(%rsi)
   1404 ; CHECK-AVX2-NEXT:    vmovups 16(%rdi), %xmm0
   1405 ; CHECK-AVX2-NEXT:    vmovups %xmm0, 16(%rsi)
   1406 ; CHECK-AVX2-NEXT:    vzeroupper
   1407 ; CHECK-AVX2-NEXT:    retq
   1408 ;
   1409 ; CHECK-AVX512-LABEL: test_conditional_block_ymm:
   1410 ; CHECK-AVX512:       # %bb.0: # %entry
   1411 ; CHECK-AVX512-NEXT:    cmpl $18, %edx
   1412 ; CHECK-AVX512-NEXT:    jl .LBB12_2
   1413 ; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
   1414 ; CHECK-AVX512-NEXT:    movq $1, 8(%rdi)
   1415 ; CHECK-AVX512-NEXT:  .LBB12_2: # %if.end
   1416 ; CHECK-AVX512-NEXT:    vmovups (%r8), %ymm0
   1417 ; CHECK-AVX512-NEXT:    vmovups %ymm0, (%rcx)
   1418 ; CHECK-AVX512-NEXT:    movq (%rdi), %rax
   1419 ; CHECK-AVX512-NEXT:    movq %rax, (%rsi)
   1420 ; CHECK-AVX512-NEXT:    movq 8(%rdi), %rax
   1421 ; CHECK-AVX512-NEXT:    movq %rax, 8(%rsi)
   1422 ; CHECK-AVX512-NEXT:    vmovups 16(%rdi), %xmm0
   1423 ; CHECK-AVX512-NEXT:    vmovups %xmm0, 16(%rsi)
   1424 ; CHECK-AVX512-NEXT:    vzeroupper
   1425 ; CHECK-AVX512-NEXT:    retq
   1426 entry:
   1427   %cmp = icmp sgt i32 %x, 17
   1428   br i1 %cmp, label %if.then, label %if.end
   1429 
   1430 if.then:                                          ; preds = %entry
   1431   %b = getelementptr inbounds %struct.S8, %struct.S8* %s1, i64 0, i32 1
   1432   store i64 1, i64* %b, align 4
   1433   br label %if.end
   1434 
   1435 if.end:                                           ; preds = %if.then, %entry
   1436   %0 = bitcast %struct.S8* %s3 to i8*
   1437   %1 = bitcast %struct.S8* %s4 to i8*
   1438   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 32, i32 4, i1 false)
   1439   %2 = bitcast %struct.S8* %s2 to i8*
   1440   %3 = bitcast %struct.S8* %s1 to i8*
   1441   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 32, i32 4, i1 false)
   1442   ret void
   1443 }
   1444 
   1445 define dso_local void @test_alias(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
   1446 ; CHECK-LABEL: test_alias:
   1447 ; CHECK:       # %bb.0: # %entry
   1448 ; CHECK-NEXT:    movl %esi, (%rdi)
   1449 ; CHECK-NEXT:    movups (%rdi), %xmm0
   1450 ; CHECK-NEXT:    movups %xmm0, 4(%rdi)
   1451 ; CHECK-NEXT:    retq
   1452 ;
   1453 ; DISABLED-LABEL: test_alias:
   1454 ; DISABLED:       # %bb.0: # %entry
   1455 ; DISABLED-NEXT:    movl %esi, (%rdi)
   1456 ; DISABLED-NEXT:    movups (%rdi), %xmm0
   1457 ; DISABLED-NEXT:    movups %xmm0, 4(%rdi)
   1458 ; DISABLED-NEXT:    retq
   1459 ;
   1460 ; CHECK-AVX2-LABEL: test_alias:
   1461 ; CHECK-AVX2:       # %bb.0: # %entry
   1462 ; CHECK-AVX2-NEXT:    movl %esi, (%rdi)
   1463 ; CHECK-AVX2-NEXT:    vmovups (%rdi), %xmm0
   1464 ; CHECK-AVX2-NEXT:    vmovups %xmm0, 4(%rdi)
   1465 ; CHECK-AVX2-NEXT:    retq
   1466 ;
   1467 ; CHECK-AVX512-LABEL: test_alias:
   1468 ; CHECK-AVX512:       # %bb.0: # %entry
   1469 ; CHECK-AVX512-NEXT:    movl %esi, (%rdi)
   1470 ; CHECK-AVX512-NEXT:    vmovups (%rdi), %xmm0
   1471 ; CHECK-AVX512-NEXT:    vmovups %xmm0, 4(%rdi)
   1472 ; CHECK-AVX512-NEXT:    retq
   1473 entry:
   1474   %a = bitcast i8* %A to i32*
   1475   store i32 %x, i32* %a, align 4
   1476   %add.ptr = getelementptr inbounds i8, i8* %A, i64 4
   1477   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr, i8* align 4 %A, i64 16, i32 4, i1 false)
   1478   ret void
   1479 }
   1480 
   1481 ; Function Attrs: nounwind uwtable
   1482 define dso_local void @test_noalias(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
   1483 ; CHECK-LABEL: test_noalias:
   1484 ; CHECK:       # %bb.0: # %entry
   1485 ; CHECK-NEXT:    movl %esi, (%rdi)
   1486 ; CHECK-NEXT:    movl (%rdi), %eax
   1487 ; CHECK-NEXT:    movl %eax, 20(%rdi)
   1488 ; CHECK-NEXT:    movq 4(%rdi), %rax
   1489 ; CHECK-NEXT:    movq %rax, 24(%rdi)
   1490 ; CHECK-NEXT:    movl 12(%rdi), %eax
   1491 ; CHECK-NEXT:    movl %eax, 32(%rdi)
   1492 ; CHECK-NEXT:    retq
   1493 ;
   1494 ; DISABLED-LABEL: test_noalias:
   1495 ; DISABLED:       # %bb.0: # %entry
   1496 ; DISABLED-NEXT:    movl %esi, (%rdi)
   1497 ; DISABLED-NEXT:    movups (%rdi), %xmm0
   1498 ; DISABLED-NEXT:    movups %xmm0, 20(%rdi)
   1499 ; DISABLED-NEXT:    retq
   1500 ;
   1501 ; CHECK-AVX2-LABEL: test_noalias:
   1502 ; CHECK-AVX2:       # %bb.0: # %entry
   1503 ; CHECK-AVX2-NEXT:    movl %esi, (%rdi)
   1504 ; CHECK-AVX2-NEXT:    movl (%rdi), %eax
   1505 ; CHECK-AVX2-NEXT:    movl %eax, 20(%rdi)
   1506 ; CHECK-AVX2-NEXT:    movq 4(%rdi), %rax
   1507 ; CHECK-AVX2-NEXT:    movq %rax, 24(%rdi)
   1508 ; CHECK-AVX2-NEXT:    movl 12(%rdi), %eax
   1509 ; CHECK-AVX2-NEXT:    movl %eax, 32(%rdi)
   1510 ; CHECK-AVX2-NEXT:    retq
   1511 ;
   1512 ; CHECK-AVX512-LABEL: test_noalias:
   1513 ; CHECK-AVX512:       # %bb.0: # %entry
   1514 ; CHECK-AVX512-NEXT:    movl %esi, (%rdi)
   1515 ; CHECK-AVX512-NEXT:    movl (%rdi), %eax
   1516 ; CHECK-AVX512-NEXT:    movl %eax, 20(%rdi)
   1517 ; CHECK-AVX512-NEXT:    movq 4(%rdi), %rax
   1518 ; CHECK-AVX512-NEXT:    movq %rax, 24(%rdi)
   1519 ; CHECK-AVX512-NEXT:    movl 12(%rdi), %eax
   1520 ; CHECK-AVX512-NEXT:    movl %eax, 32(%rdi)
   1521 ; CHECK-AVX512-NEXT:    retq
   1522 entry:
   1523   %a = bitcast i8* %A to i32*
   1524   store i32 %x, i32* %a, align 4
   1525   %add.ptr = getelementptr inbounds i8, i8* %A, i64 20
   1526   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr, i8* align 4 %A, i64 16, i32 4, i1 false)
   1527   ret void
   1528 }
   1529 
   1530 
   1531 
   1532