Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s -check-prefix=CHECK
      3 ; RUN: llc < %s -mtriple=x86_64-linux --x86-disable-avoid-SFB -verify-machineinstrs | FileCheck %s --check-prefix=DISABLED
      4 ; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX2
      5 ; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX512
      6 
      7 ; ModuleID = '../testSFB/testOverlapBlocks.c'
      8 source_filename = "../testSFB/testOverlapBlocks.c"
      9 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
     10 target triple = "x86_64-unknown-linux-gnu"
     11 
     12 ; Function Attrs: nounwind uwtable
     13 define dso_local void @test_overlap_1(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
     14 ; CHECK-LABEL: test_overlap_1:
     15 ; CHECK:       # %bb.0: # %entry
     16 ; CHECK-NEXT:    movl $7, -8(%rdi)
     17 ; CHECK-NEXT:    movq -16(%rdi), %rax
     18 ; CHECK-NEXT:    movq %rax, (%rdi)
     19 ; CHECK-NEXT:    movl -8(%rdi), %eax
     20 ; CHECK-NEXT:    movl %eax, 8(%rdi)
     21 ; CHECK-NEXT:    movl -4(%rdi), %eax
     22 ; CHECK-NEXT:    movl %eax, 12(%rdi)
     23 ; CHECK-NEXT:    movslq %esi, %rax
     24 ; CHECK-NEXT:    movq %rax, -9(%rdi)
     25 ; CHECK-NEXT:    movq %rax, -16(%rdi)
     26 ; CHECK-NEXT:    movb $0, -1(%rdi)
     27 ; CHECK-NEXT:    movq -16(%rdi), %rax
     28 ; CHECK-NEXT:    movq %rax, 16(%rdi)
     29 ; CHECK-NEXT:    movl -8(%rdi), %eax
     30 ; CHECK-NEXT:    movl %eax, 24(%rdi)
     31 ; CHECK-NEXT:    movzwl -4(%rdi), %eax
     32 ; CHECK-NEXT:    movw %ax, 28(%rdi)
     33 ; CHECK-NEXT:    movb -2(%rdi), %al
     34 ; CHECK-NEXT:    movb %al, 30(%rdi)
     35 ; CHECK-NEXT:    movb -1(%rdi), %al
     36 ; CHECK-NEXT:    movb %al, 31(%rdi)
     37 ; CHECK-NEXT:    retq
     38 ;
     39 ; DISABLED-LABEL: test_overlap_1:
     40 ; DISABLED:       # %bb.0: # %entry
     41 ; DISABLED-NEXT:    movl $7, -8(%rdi)
     42 ; DISABLED-NEXT:    movups -16(%rdi), %xmm0
     43 ; DISABLED-NEXT:    movups %xmm0, (%rdi)
     44 ; DISABLED-NEXT:    movslq %esi, %rax
     45 ; DISABLED-NEXT:    movq %rax, -9(%rdi)
     46 ; DISABLED-NEXT:    movq %rax, -16(%rdi)
     47 ; DISABLED-NEXT:    movb $0, -1(%rdi)
     48 ; DISABLED-NEXT:    movups -16(%rdi), %xmm0
     49 ; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
     50 ; DISABLED-NEXT:    retq
     51 ;
     52 ; CHECK-AVX2-LABEL: test_overlap_1:
     53 ; CHECK-AVX2:       # %bb.0: # %entry
     54 ; CHECK-AVX2-NEXT:    movl $7, -8(%rdi)
     55 ; CHECK-AVX2-NEXT:    movq -16(%rdi), %rax
     56 ; CHECK-AVX2-NEXT:    movq %rax, (%rdi)
     57 ; CHECK-AVX2-NEXT:    movl -8(%rdi), %eax
     58 ; CHECK-AVX2-NEXT:    movl %eax, 8(%rdi)
     59 ; CHECK-AVX2-NEXT:    movl -4(%rdi), %eax
     60 ; CHECK-AVX2-NEXT:    movl %eax, 12(%rdi)
     61 ; CHECK-AVX2-NEXT:    movslq %esi, %rax
     62 ; CHECK-AVX2-NEXT:    movq %rax, -9(%rdi)
     63 ; CHECK-AVX2-NEXT:    movq %rax, -16(%rdi)
     64 ; CHECK-AVX2-NEXT:    movb $0, -1(%rdi)
     65 ; CHECK-AVX2-NEXT:    movq -16(%rdi), %rax
     66 ; CHECK-AVX2-NEXT:    movq %rax, 16(%rdi)
     67 ; CHECK-AVX2-NEXT:    movl -8(%rdi), %eax
     68 ; CHECK-AVX2-NEXT:    movl %eax, 24(%rdi)
     69 ; CHECK-AVX2-NEXT:    movzwl -4(%rdi), %eax
     70 ; CHECK-AVX2-NEXT:    movw %ax, 28(%rdi)
     71 ; CHECK-AVX2-NEXT:    movb -2(%rdi), %al
     72 ; CHECK-AVX2-NEXT:    movb %al, 30(%rdi)
     73 ; CHECK-AVX2-NEXT:    movb -1(%rdi), %al
     74 ; CHECK-AVX2-NEXT:    movb %al, 31(%rdi)
     75 ; CHECK-AVX2-NEXT:    retq
     76 ;
     77 ; CHECK-AVX512-LABEL: test_overlap_1:
     78 ; CHECK-AVX512:       # %bb.0: # %entry
     79 ; CHECK-AVX512-NEXT:    movl $7, -8(%rdi)
     80 ; CHECK-AVX512-NEXT:    movq -16(%rdi), %rax
     81 ; CHECK-AVX512-NEXT:    movq %rax, (%rdi)
     82 ; CHECK-AVX512-NEXT:    movl -8(%rdi), %eax
     83 ; CHECK-AVX512-NEXT:    movl %eax, 8(%rdi)
     84 ; CHECK-AVX512-NEXT:    movl -4(%rdi), %eax
     85 ; CHECK-AVX512-NEXT:    movl %eax, 12(%rdi)
     86 ; CHECK-AVX512-NEXT:    movslq %esi, %rax
     87 ; CHECK-AVX512-NEXT:    movq %rax, -9(%rdi)
     88 ; CHECK-AVX512-NEXT:    movq %rax, -16(%rdi)
     89 ; CHECK-AVX512-NEXT:    movb $0, -1(%rdi)
     90 ; CHECK-AVX512-NEXT:    movq -16(%rdi), %rax
     91 ; CHECK-AVX512-NEXT:    movq %rax, 16(%rdi)
     92 ; CHECK-AVX512-NEXT:    movl -8(%rdi), %eax
     93 ; CHECK-AVX512-NEXT:    movl %eax, 24(%rdi)
     94 ; CHECK-AVX512-NEXT:    movzwl -4(%rdi), %eax
     95 ; CHECK-AVX512-NEXT:    movw %ax, 28(%rdi)
     96 ; CHECK-AVX512-NEXT:    movb -2(%rdi), %al
     97 ; CHECK-AVX512-NEXT:    movb %al, 30(%rdi)
     98 ; CHECK-AVX512-NEXT:    movb -1(%rdi), %al
     99 ; CHECK-AVX512-NEXT:    movb %al, 31(%rdi)
    100 ; CHECK-AVX512-NEXT:    retq
    101 entry:
    102   %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16
    103   %add.ptr1 = getelementptr inbounds i8, i8* %A, i64 -8
    104   %0 = bitcast i8* %add.ptr1 to i32*
    105   store i32 7, i32* %0, align 4
    106   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
    107   %conv = sext i32 %x to i64
    108   %add.ptr2 = getelementptr inbounds i8, i8* %A, i64 -9
    109   %1 = bitcast i8* %add.ptr2 to i64*
    110   store i64 %conv, i64* %1, align 8
    111   %2 = bitcast i8* %add.ptr to i64*
    112   store i64 %conv, i64* %2, align 8
    113   %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 -1
    114   store i8 0, i8* %add.ptr5, align 1
    115   %add.ptr6 = getelementptr inbounds i8, i8* %A, i64 16
    116   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr6, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
    117   ret void
    118 }
    119 
    120 ; Function Attrs: argmemonly nounwind
    121 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
    122 
    123 ; Function Attrs: nounwind uwtable
    124 define dso_local void @test_overlap_2(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
    125 ; CHECK-LABEL: test_overlap_2:
    126 ; CHECK:       # %bb.0: # %entry
    127 ; CHECK-NEXT:    movslq %esi, %rax
    128 ; CHECK-NEXT:    movq %rax, -16(%rdi)
    129 ; CHECK-NEXT:    movq -16(%rdi), %rcx
    130 ; CHECK-NEXT:    movq %rcx, (%rdi)
    131 ; CHECK-NEXT:    movq -8(%rdi), %rcx
    132 ; CHECK-NEXT:    movq %rcx, 8(%rdi)
    133 ; CHECK-NEXT:    movq %rax, -8(%rdi)
    134 ; CHECK-NEXT:    movl $7, -12(%rdi)
    135 ; CHECK-NEXT:    movl -16(%rdi), %eax
    136 ; CHECK-NEXT:    movl %eax, 16(%rdi)
    137 ; CHECK-NEXT:    movl -12(%rdi), %eax
    138 ; CHECK-NEXT:    movl %eax, 20(%rdi)
    139 ; CHECK-NEXT:    movq -8(%rdi), %rax
    140 ; CHECK-NEXT:    movq %rax, 24(%rdi)
    141 ; CHECK-NEXT:    retq
    142 ;
    143 ; DISABLED-LABEL: test_overlap_2:
    144 ; DISABLED:       # %bb.0: # %entry
    145 ; DISABLED-NEXT:    movslq %esi, %rax
    146 ; DISABLED-NEXT:    movq %rax, -16(%rdi)
    147 ; DISABLED-NEXT:    movups -16(%rdi), %xmm0
    148 ; DISABLED-NEXT:    movups %xmm0, (%rdi)
    149 ; DISABLED-NEXT:    movq %rax, -8(%rdi)
    150 ; DISABLED-NEXT:    movl $7, -12(%rdi)
    151 ; DISABLED-NEXT:    movups -16(%rdi), %xmm0
    152 ; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
    153 ; DISABLED-NEXT:    retq
    154 ;
    155 ; CHECK-AVX2-LABEL: test_overlap_2:
    156 ; CHECK-AVX2:       # %bb.0: # %entry
    157 ; CHECK-AVX2-NEXT:    movslq %esi, %rax
    158 ; CHECK-AVX2-NEXT:    movq %rax, -16(%rdi)
    159 ; CHECK-AVX2-NEXT:    movq -16(%rdi), %rcx
    160 ; CHECK-AVX2-NEXT:    movq %rcx, (%rdi)
    161 ; CHECK-AVX2-NEXT:    movq -8(%rdi), %rcx
    162 ; CHECK-AVX2-NEXT:    movq %rcx, 8(%rdi)
    163 ; CHECK-AVX2-NEXT:    movq %rax, -8(%rdi)
    164 ; CHECK-AVX2-NEXT:    movl $7, -12(%rdi)
    165 ; CHECK-AVX2-NEXT:    movl -16(%rdi), %eax
    166 ; CHECK-AVX2-NEXT:    movl %eax, 16(%rdi)
    167 ; CHECK-AVX2-NEXT:    movl -12(%rdi), %eax
    168 ; CHECK-AVX2-NEXT:    movl %eax, 20(%rdi)
    169 ; CHECK-AVX2-NEXT:    movq -8(%rdi), %rax
    170 ; CHECK-AVX2-NEXT:    movq %rax, 24(%rdi)
    171 ; CHECK-AVX2-NEXT:    retq
    172 ;
    173 ; CHECK-AVX512-LABEL: test_overlap_2:
    174 ; CHECK-AVX512:       # %bb.0: # %entry
    175 ; CHECK-AVX512-NEXT:    movslq %esi, %rax
    176 ; CHECK-AVX512-NEXT:    movq %rax, -16(%rdi)
    177 ; CHECK-AVX512-NEXT:    movq -16(%rdi), %rcx
    178 ; CHECK-AVX512-NEXT:    movq %rcx, (%rdi)
    179 ; CHECK-AVX512-NEXT:    movq -8(%rdi), %rcx
    180 ; CHECK-AVX512-NEXT:    movq %rcx, 8(%rdi)
    181 ; CHECK-AVX512-NEXT:    movq %rax, -8(%rdi)
    182 ; CHECK-AVX512-NEXT:    movl $7, -12(%rdi)
    183 ; CHECK-AVX512-NEXT:    movl -16(%rdi), %eax
    184 ; CHECK-AVX512-NEXT:    movl %eax, 16(%rdi)
    185 ; CHECK-AVX512-NEXT:    movl -12(%rdi), %eax
    186 ; CHECK-AVX512-NEXT:    movl %eax, 20(%rdi)
    187 ; CHECK-AVX512-NEXT:    movq -8(%rdi), %rax
    188 ; CHECK-AVX512-NEXT:    movq %rax, 24(%rdi)
    189 ; CHECK-AVX512-NEXT:    retq
    190 entry:
    191   %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16
    192   %conv = sext i32 %x to i64
    193   %0 = bitcast i8* %add.ptr to i64*
    194   store i64 %conv, i64* %0, align 8
    195   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
    196   %add.ptr3 = getelementptr inbounds i8, i8* %A, i64 -8
    197   %1 = bitcast i8* %add.ptr3 to i64*
    198   store i64 %conv, i64* %1, align 8
    199   %add.ptr4 = getelementptr inbounds i8, i8* %A, i64 -12
    200   %2 = bitcast i8* %add.ptr4 to i32*
    201   store i32 7, i32* %2, align 4
    202   %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 16
    203   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr5, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
    204   ret void
    205 }
    206 
    207 ; Function Attrs: nounwind uwtable
    208 define dso_local void @test_overlap_3(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
    209 ; CHECK-LABEL: test_overlap_3:
    210 ; CHECK:       # %bb.0: # %entry
    211 ; CHECK-NEXT:    movl $7, -10(%rdi)
    212 ; CHECK-NEXT:    movl -16(%rdi), %eax
    213 ; CHECK-NEXT:    movl %eax, (%rdi)
    214 ; CHECK-NEXT:    movzwl -12(%rdi), %eax
    215 ; CHECK-NEXT:    movw %ax, 4(%rdi)
    216 ; CHECK-NEXT:    movl -10(%rdi), %eax
    217 ; CHECK-NEXT:    movl %eax, 6(%rdi)
    218 ; CHECK-NEXT:    movl -6(%rdi), %eax
    219 ; CHECK-NEXT:    movl %eax, 10(%rdi)
    220 ; CHECK-NEXT:    movzwl -2(%rdi), %eax
    221 ; CHECK-NEXT:    movw %ax, 14(%rdi)
    222 ; CHECK-NEXT:    movslq %esi, %rax
    223 ; CHECK-NEXT:    movq %rax, -9(%rdi)
    224 ; CHECK-NEXT:    movq %rax, -16(%rdi)
    225 ; CHECK-NEXT:    movb $0, -1(%rdi)
    226 ; CHECK-NEXT:    movq -16(%rdi), %rax
    227 ; CHECK-NEXT:    movq %rax, 16(%rdi)
    228 ; CHECK-NEXT:    movzwl -8(%rdi), %eax
    229 ; CHECK-NEXT:    movw %ax, 24(%rdi)
    230 ; CHECK-NEXT:    movl -6(%rdi), %eax
    231 ; CHECK-NEXT:    movl %eax, 26(%rdi)
    232 ; CHECK-NEXT:    movb -2(%rdi), %al
    233 ; CHECK-NEXT:    movb %al, 30(%rdi)
    234 ; CHECK-NEXT:    movb -1(%rdi), %al
    235 ; CHECK-NEXT:    movb %al, 31(%rdi)
    236 ; CHECK-NEXT:    retq
    237 ;
    238 ; DISABLED-LABEL: test_overlap_3:
    239 ; DISABLED:       # %bb.0: # %entry
    240 ; DISABLED-NEXT:    movl $7, -10(%rdi)
    241 ; DISABLED-NEXT:    movups -16(%rdi), %xmm0
    242 ; DISABLED-NEXT:    movups %xmm0, (%rdi)
    243 ; DISABLED-NEXT:    movslq %esi, %rax
    244 ; DISABLED-NEXT:    movq %rax, -9(%rdi)
    245 ; DISABLED-NEXT:    movq %rax, -16(%rdi)
    246 ; DISABLED-NEXT:    movb $0, -1(%rdi)
    247 ; DISABLED-NEXT:    movups -16(%rdi), %xmm0
    248 ; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
    249 ; DISABLED-NEXT:    retq
    250 ;
    251 ; CHECK-AVX2-LABEL: test_overlap_3:
    252 ; CHECK-AVX2:       # %bb.0: # %entry
    253 ; CHECK-AVX2-NEXT:    movl $7, -10(%rdi)
    254 ; CHECK-AVX2-NEXT:    movl -16(%rdi), %eax
    255 ; CHECK-AVX2-NEXT:    movl %eax, (%rdi)
    256 ; CHECK-AVX2-NEXT:    movzwl -12(%rdi), %eax
    257 ; CHECK-AVX2-NEXT:    movw %ax, 4(%rdi)
    258 ; CHECK-AVX2-NEXT:    movl -10(%rdi), %eax
    259 ; CHECK-AVX2-NEXT:    movl %eax, 6(%rdi)
    260 ; CHECK-AVX2-NEXT:    movl -6(%rdi), %eax
    261 ; CHECK-AVX2-NEXT:    movl %eax, 10(%rdi)
    262 ; CHECK-AVX2-NEXT:    movzwl -2(%rdi), %eax
    263 ; CHECK-AVX2-NEXT:    movw %ax, 14(%rdi)
    264 ; CHECK-AVX2-NEXT:    movslq %esi, %rax
    265 ; CHECK-AVX2-NEXT:    movq %rax, -9(%rdi)
    266 ; CHECK-AVX2-NEXT:    movq %rax, -16(%rdi)
    267 ; CHECK-AVX2-NEXT:    movb $0, -1(%rdi)
    268 ; CHECK-AVX2-NEXT:    movq -16(%rdi), %rax
    269 ; CHECK-AVX2-NEXT:    movq %rax, 16(%rdi)
    270 ; CHECK-AVX2-NEXT:    movzwl -8(%rdi), %eax
    271 ; CHECK-AVX2-NEXT:    movw %ax, 24(%rdi)
    272 ; CHECK-AVX2-NEXT:    movl -6(%rdi), %eax
    273 ; CHECK-AVX2-NEXT:    movl %eax, 26(%rdi)
    274 ; CHECK-AVX2-NEXT:    movb -2(%rdi), %al
    275 ; CHECK-AVX2-NEXT:    movb %al, 30(%rdi)
    276 ; CHECK-AVX2-NEXT:    movb -1(%rdi), %al
    277 ; CHECK-AVX2-NEXT:    movb %al, 31(%rdi)
    278 ; CHECK-AVX2-NEXT:    retq
    279 ;
    280 ; CHECK-AVX512-LABEL: test_overlap_3:
    281 ; CHECK-AVX512:       # %bb.0: # %entry
    282 ; CHECK-AVX512-NEXT:    movl $7, -10(%rdi)
    283 ; CHECK-AVX512-NEXT:    movl -16(%rdi), %eax
    284 ; CHECK-AVX512-NEXT:    movl %eax, (%rdi)
    285 ; CHECK-AVX512-NEXT:    movzwl -12(%rdi), %eax
    286 ; CHECK-AVX512-NEXT:    movw %ax, 4(%rdi)
    287 ; CHECK-AVX512-NEXT:    movl -10(%rdi), %eax
    288 ; CHECK-AVX512-NEXT:    movl %eax, 6(%rdi)
    289 ; CHECK-AVX512-NEXT:    movl -6(%rdi), %eax
    290 ; CHECK-AVX512-NEXT:    movl %eax, 10(%rdi)
    291 ; CHECK-AVX512-NEXT:    movzwl -2(%rdi), %eax
    292 ; CHECK-AVX512-NEXT:    movw %ax, 14(%rdi)
    293 ; CHECK-AVX512-NEXT:    movslq %esi, %rax
    294 ; CHECK-AVX512-NEXT:    movq %rax, -9(%rdi)
    295 ; CHECK-AVX512-NEXT:    movq %rax, -16(%rdi)
    296 ; CHECK-AVX512-NEXT:    movb $0, -1(%rdi)
    297 ; CHECK-AVX512-NEXT:    movq -16(%rdi), %rax
    298 ; CHECK-AVX512-NEXT:    movq %rax, 16(%rdi)
    299 ; CHECK-AVX512-NEXT:    movzwl -8(%rdi), %eax
    300 ; CHECK-AVX512-NEXT:    movw %ax, 24(%rdi)
    301 ; CHECK-AVX512-NEXT:    movl -6(%rdi), %eax
    302 ; CHECK-AVX512-NEXT:    movl %eax, 26(%rdi)
    303 ; CHECK-AVX512-NEXT:    movb -2(%rdi), %al
    304 ; CHECK-AVX512-NEXT:    movb %al, 30(%rdi)
    305 ; CHECK-AVX512-NEXT:    movb -1(%rdi), %al
    306 ; CHECK-AVX512-NEXT:    movb %al, 31(%rdi)
    307 ; CHECK-AVX512-NEXT:    retq
    308 entry:
    309   %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16
    310   %add.ptr1 = getelementptr inbounds i8, i8* %A, i64 -10
    311   %0 = bitcast i8* %add.ptr1 to i32*
    312   store i32 7, i32* %0, align 4
    313   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
    314   %conv = sext i32 %x to i64
    315   %add.ptr2 = getelementptr inbounds i8, i8* %A, i64 -9
    316   %1 = bitcast i8* %add.ptr2 to i64*
    317   store i64 %conv, i64* %1, align 8
    318   %2 = bitcast i8* %add.ptr to i64*
    319   store i64 %conv, i64* %2, align 8
    320   %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 -1
    321   store i8 0, i8* %add.ptr5, align 1
    322   %add.ptr6 = getelementptr inbounds i8, i8* %A, i64 16
    323   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr6, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
    324   ret void
    325 }
    326 
    327 ; Function Attrs: nounwind uwtable
    328 define dso_local void @test_overlap_4(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
    329 ; CHECK-LABEL: test_overlap_4:
    330 ; CHECK:       # %bb.0: # %entry
    331 ; CHECK-NEXT:    movups -16(%rdi), %xmm0
    332 ; CHECK-NEXT:    movups %xmm0, (%rdi)
    333 ; CHECK-NEXT:    movslq %esi, %rax
    334 ; CHECK-NEXT:    movq %rax, -8(%rdi)
    335 ; CHECK-NEXT:    movl %eax, -16(%rdi)
    336 ; CHECK-NEXT:    movl $0, -11(%rdi)
    337 ; CHECK-NEXT:    movl -16(%rdi), %eax
    338 ; CHECK-NEXT:    movl %eax, 16(%rdi)
    339 ; CHECK-NEXT:    movb -12(%rdi), %al
    340 ; CHECK-NEXT:    movb %al, 20(%rdi)
    341 ; CHECK-NEXT:    movl -11(%rdi), %eax
    342 ; CHECK-NEXT:    movl %eax, 21(%rdi)
    343 ; CHECK-NEXT:    movl -7(%rdi), %eax
    344 ; CHECK-NEXT:    movl %eax, 25(%rdi)
    345 ; CHECK-NEXT:    movzwl -3(%rdi), %eax
    346 ; CHECK-NEXT:    movw %ax, 29(%rdi)
    347 ; CHECK-NEXT:    movb -1(%rdi), %al
    348 ; CHECK-NEXT:    movb %al, 31(%rdi)
    349 ; CHECK-NEXT:    retq
    350 ;
    351 ; DISABLED-LABEL: test_overlap_4:
    352 ; DISABLED:       # %bb.0: # %entry
    353 ; DISABLED-NEXT:    movups -16(%rdi), %xmm0
    354 ; DISABLED-NEXT:    movups %xmm0, (%rdi)
    355 ; DISABLED-NEXT:    movslq %esi, %rax
    356 ; DISABLED-NEXT:    movq %rax, -8(%rdi)
    357 ; DISABLED-NEXT:    movl %eax, -16(%rdi)
    358 ; DISABLED-NEXT:    movl $0, -11(%rdi)
    359 ; DISABLED-NEXT:    movups -16(%rdi), %xmm0
    360 ; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
    361 ; DISABLED-NEXT:    retq
    362 ;
    363 ; CHECK-AVX2-LABEL: test_overlap_4:
    364 ; CHECK-AVX2:       # %bb.0: # %entry
    365 ; CHECK-AVX2-NEXT:    vmovups -16(%rdi), %xmm0
    366 ; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rdi)
    367 ; CHECK-AVX2-NEXT:    movslq %esi, %rax
    368 ; CHECK-AVX2-NEXT:    movq %rax, -8(%rdi)
    369 ; CHECK-AVX2-NEXT:    movl %eax, -16(%rdi)
    370 ; CHECK-AVX2-NEXT:    movl $0, -11(%rdi)
    371 ; CHECK-AVX2-NEXT:    movl -16(%rdi), %eax
    372 ; CHECK-AVX2-NEXT:    movl %eax, 16(%rdi)
    373 ; CHECK-AVX2-NEXT:    movb -12(%rdi), %al
    374 ; CHECK-AVX2-NEXT:    movb %al, 20(%rdi)
    375 ; CHECK-AVX2-NEXT:    movl -11(%rdi), %eax
    376 ; CHECK-AVX2-NEXT:    movl %eax, 21(%rdi)
    377 ; CHECK-AVX2-NEXT:    movl -7(%rdi), %eax
    378 ; CHECK-AVX2-NEXT:    movl %eax, 25(%rdi)
    379 ; CHECK-AVX2-NEXT:    movzwl -3(%rdi), %eax
    380 ; CHECK-AVX2-NEXT:    movw %ax, 29(%rdi)
    381 ; CHECK-AVX2-NEXT:    movb -1(%rdi), %al
    382 ; CHECK-AVX2-NEXT:    movb %al, 31(%rdi)
    383 ; CHECK-AVX2-NEXT:    retq
    384 ;
    385 ; CHECK-AVX512-LABEL: test_overlap_4:
    386 ; CHECK-AVX512:       # %bb.0: # %entry
    387 ; CHECK-AVX512-NEXT:    vmovups -16(%rdi), %xmm0
    388 ; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rdi)
    389 ; CHECK-AVX512-NEXT:    movslq %esi, %rax
    390 ; CHECK-AVX512-NEXT:    movq %rax, -8(%rdi)
    391 ; CHECK-AVX512-NEXT:    movl %eax, -16(%rdi)
    392 ; CHECK-AVX512-NEXT:    movl $0, -11(%rdi)
    393 ; CHECK-AVX512-NEXT:    movl -16(%rdi), %eax
    394 ; CHECK-AVX512-NEXT:    movl %eax, 16(%rdi)
    395 ; CHECK-AVX512-NEXT:    movb -12(%rdi), %al
    396 ; CHECK-AVX512-NEXT:    movb %al, 20(%rdi)
    397 ; CHECK-AVX512-NEXT:    movl -11(%rdi), %eax
    398 ; CHECK-AVX512-NEXT:    movl %eax, 21(%rdi)
    399 ; CHECK-AVX512-NEXT:    movl -7(%rdi), %eax
    400 ; CHECK-AVX512-NEXT:    movl %eax, 25(%rdi)
    401 ; CHECK-AVX512-NEXT:    movzwl -3(%rdi), %eax
    402 ; CHECK-AVX512-NEXT:    movw %ax, 29(%rdi)
    403 ; CHECK-AVX512-NEXT:    movb -1(%rdi), %al
    404 ; CHECK-AVX512-NEXT:    movb %al, 31(%rdi)
    405 ; CHECK-AVX512-NEXT:    retq
    406 entry:
    407   %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16
    408   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
    409   %conv = sext i32 %x to i64
    410   %add.ptr1 = getelementptr inbounds i8, i8* %A, i64 -8
    411   %0 = bitcast i8* %add.ptr1 to i64*
    412   store i64 %conv, i64* %0, align 8
    413   %1 = bitcast i8* %add.ptr to i32*
    414   store i32 %x, i32* %1, align 4
    415   %add.ptr3 = getelementptr inbounds i8, i8* %A, i64 -11
    416   %2 = bitcast i8* %add.ptr3 to i32*
    417   store i32 0, i32* %2, align 4
    418   %add.ptr4 = getelementptr inbounds i8, i8* %A, i64 16
    419   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr4, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
    420   ret void
    421 }
    422 
    423 ; Function Attrs: nounwind uwtable
    424 define dso_local void @test_overlap_5(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
    425 ; CHECK-LABEL: test_overlap_5:
    426 ; CHECK:       # %bb.0: # %entry
    427 ; CHECK-NEXT:    movups -16(%rdi), %xmm0
    428 ; CHECK-NEXT:    movups %xmm0, (%rdi)
    429 ; CHECK-NEXT:    movslq %esi, %rax
    430 ; CHECK-NEXT:    movq %rax, -16(%rdi)
    431 ; CHECK-NEXT:    movb %al, -14(%rdi)
    432 ; CHECK-NEXT:    movb $0, -11(%rdi)
    433 ; CHECK-NEXT:    movzwl -16(%rdi), %eax
    434 ; CHECK-NEXT:    movw %ax, 16(%rdi)
    435 ; CHECK-NEXT:    movb -14(%rdi), %al
    436 ; CHECK-NEXT:    movb %al, 18(%rdi)
    437 ; CHECK-NEXT:    movzwl -13(%rdi), %eax
    438 ; CHECK-NEXT:    movw %ax, 19(%rdi)
    439 ; CHECK-NEXT:    movb -11(%rdi), %al
    440 ; CHECK-NEXT:    movb %al, 21(%rdi)
    441 ; CHECK-NEXT:    movq -10(%rdi), %rax
    442 ; CHECK-NEXT:    movq %rax, 22(%rdi)
    443 ; CHECK-NEXT:    movzwl -2(%rdi), %eax
    444 ; CHECK-NEXT:    movw %ax, 30(%rdi)
    445 ; CHECK-NEXT:    retq
    446 ;
    447 ; DISABLED-LABEL: test_overlap_5:
    448 ; DISABLED:       # %bb.0: # %entry
    449 ; DISABLED-NEXT:    movups -16(%rdi), %xmm0
    450 ; DISABLED-NEXT:    movups %xmm0, (%rdi)
    451 ; DISABLED-NEXT:    movslq %esi, %rax
    452 ; DISABLED-NEXT:    movq %rax, -16(%rdi)
    453 ; DISABLED-NEXT:    movb %al, -14(%rdi)
    454 ; DISABLED-NEXT:    movb $0, -11(%rdi)
    455 ; DISABLED-NEXT:    movups -16(%rdi), %xmm0
    456 ; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
    457 ; DISABLED-NEXT:    retq
    458 ;
    459 ; CHECK-AVX2-LABEL: test_overlap_5:
    460 ; CHECK-AVX2:       # %bb.0: # %entry
    461 ; CHECK-AVX2-NEXT:    vmovups -16(%rdi), %xmm0
    462 ; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rdi)
    463 ; CHECK-AVX2-NEXT:    movslq %esi, %rax
    464 ; CHECK-AVX2-NEXT:    movq %rax, -16(%rdi)
    465 ; CHECK-AVX2-NEXT:    movb %al, -14(%rdi)
    466 ; CHECK-AVX2-NEXT:    movb $0, -11(%rdi)
    467 ; CHECK-AVX2-NEXT:    movzwl -16(%rdi), %eax
    468 ; CHECK-AVX2-NEXT:    movw %ax, 16(%rdi)
    469 ; CHECK-AVX2-NEXT:    movb -14(%rdi), %al
    470 ; CHECK-AVX2-NEXT:    movb %al, 18(%rdi)
    471 ; CHECK-AVX2-NEXT:    movzwl -13(%rdi), %eax
    472 ; CHECK-AVX2-NEXT:    movw %ax, 19(%rdi)
    473 ; CHECK-AVX2-NEXT:    movb -11(%rdi), %al
    474 ; CHECK-AVX2-NEXT:    movb %al, 21(%rdi)
    475 ; CHECK-AVX2-NEXT:    movq -10(%rdi), %rax
    476 ; CHECK-AVX2-NEXT:    movq %rax, 22(%rdi)
    477 ; CHECK-AVX2-NEXT:    movzwl -2(%rdi), %eax
    478 ; CHECK-AVX2-NEXT:    movw %ax, 30(%rdi)
    479 ; CHECK-AVX2-NEXT:    retq
    480 ;
    481 ; CHECK-AVX512-LABEL: test_overlap_5:
    482 ; CHECK-AVX512:       # %bb.0: # %entry
    483 ; CHECK-AVX512-NEXT:    vmovups -16(%rdi), %xmm0
    484 ; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rdi)
    485 ; CHECK-AVX512-NEXT:    movslq %esi, %rax
    486 ; CHECK-AVX512-NEXT:    movq %rax, -16(%rdi)
    487 ; CHECK-AVX512-NEXT:    movb %al, -14(%rdi)
    488 ; CHECK-AVX512-NEXT:    movb $0, -11(%rdi)
    489 ; CHECK-AVX512-NEXT:    movzwl -16(%rdi), %eax
    490 ; CHECK-AVX512-NEXT:    movw %ax, 16(%rdi)
    491 ; CHECK-AVX512-NEXT:    movb -14(%rdi), %al
    492 ; CHECK-AVX512-NEXT:    movb %al, 18(%rdi)
    493 ; CHECK-AVX512-NEXT:    movzwl -13(%rdi), %eax
    494 ; CHECK-AVX512-NEXT:    movw %ax, 19(%rdi)
    495 ; CHECK-AVX512-NEXT:    movb -11(%rdi), %al
    496 ; CHECK-AVX512-NEXT:    movb %al, 21(%rdi)
    497 ; CHECK-AVX512-NEXT:    movq -10(%rdi), %rax
    498 ; CHECK-AVX512-NEXT:    movq %rax, 22(%rdi)
    499 ; CHECK-AVX512-NEXT:    movzwl -2(%rdi), %eax
    500 ; CHECK-AVX512-NEXT:    movw %ax, 30(%rdi)
    501 ; CHECK-AVX512-NEXT:    retq
    502 entry:
    503   %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16
    504   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
    505   %conv = sext i32 %x to i64
    506   %0 = bitcast i8* %add.ptr to i64*
    507   store i64 %conv, i64* %0, align 8
    508   %conv2 = trunc i32 %x to i8
    509   %add.ptr3 = getelementptr inbounds i8, i8* %A, i64 -14
    510   store i8 %conv2, i8* %add.ptr3, align 1
    511   %add.ptr4 = getelementptr inbounds i8, i8* %A, i64 -11
    512   store i8 0, i8* %add.ptr4, align 1
    513   %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 16
    514   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr5, i8* nonnull align 4 %add.ptr, i64 16, i1 false)
    515   ret void
    516 }
    517 
    518 attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
    519 attributes #1 = { argmemonly nounwind }
    520 
    521 
    522