1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s -check-prefix=CHECK 3 ; RUN: llc < %s -mtriple=x86_64-linux --x86-disable-avoid-SFB -verify-machineinstrs | FileCheck %s --check-prefix=DISABLED 4 ; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX2 5 ; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX512 6 7 ; ModuleID = '../testSFB/testOverlapBlocks.c' 8 source_filename = "../testSFB/testOverlapBlocks.c" 9 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 10 target triple = "x86_64-unknown-linux-gnu" 11 12 ; Function Attrs: nounwind uwtable 13 define dso_local void @test_overlap_1(i8* nocapture %A, i32 %x) local_unnamed_addr #0 { 14 ; CHECK-LABEL: test_overlap_1: 15 ; CHECK: # %bb.0: # %entry 16 ; CHECK-NEXT: movl $7, -8(%rdi) 17 ; CHECK-NEXT: movq -16(%rdi), %rax 18 ; CHECK-NEXT: movq %rax, (%rdi) 19 ; CHECK-NEXT: movl -8(%rdi), %eax 20 ; CHECK-NEXT: movl %eax, 8(%rdi) 21 ; CHECK-NEXT: movl -4(%rdi), %eax 22 ; CHECK-NEXT: movl %eax, 12(%rdi) 23 ; CHECK-NEXT: movslq %esi, %rax 24 ; CHECK-NEXT: movq %rax, -9(%rdi) 25 ; CHECK-NEXT: movq %rax, -16(%rdi) 26 ; CHECK-NEXT: movb $0, -1(%rdi) 27 ; CHECK-NEXT: movq -16(%rdi), %rax 28 ; CHECK-NEXT: movq %rax, 16(%rdi) 29 ; CHECK-NEXT: movl -8(%rdi), %eax 30 ; CHECK-NEXT: movl %eax, 24(%rdi) 31 ; CHECK-NEXT: movzwl -4(%rdi), %eax 32 ; CHECK-NEXT: movw %ax, 28(%rdi) 33 ; CHECK-NEXT: movb -2(%rdi), %al 34 ; CHECK-NEXT: movb %al, 30(%rdi) 35 ; CHECK-NEXT: movb -1(%rdi), %al 36 ; CHECK-NEXT: movb %al, 31(%rdi) 37 ; CHECK-NEXT: retq 38 ; 39 ; DISABLED-LABEL: test_overlap_1: 40 ; DISABLED: # %bb.0: # %entry 41 ; DISABLED-NEXT: movl $7, -8(%rdi) 42 ; DISABLED-NEXT: movups -16(%rdi), %xmm0 43 ; DISABLED-NEXT: movups %xmm0, (%rdi) 44 ; DISABLED-NEXT: movslq %esi, %rax 45 ; DISABLED-NEXT: movq %rax, -9(%rdi) 46 ; DISABLED-NEXT: movq %rax, -16(%rdi) 47 ; DISABLED-NEXT: movb $0, -1(%rdi) 48 ; DISABLED-NEXT: movups -16(%rdi), %xmm0 49 ; DISABLED-NEXT: movups %xmm0, 16(%rdi) 50 ; DISABLED-NEXT: retq 51 ; 52 ; CHECK-AVX2-LABEL: test_overlap_1: 53 ; CHECK-AVX2: # %bb.0: # %entry 54 ; CHECK-AVX2-NEXT: movl $7, -8(%rdi) 55 ; CHECK-AVX2-NEXT: movq -16(%rdi), %rax 56 ; CHECK-AVX2-NEXT: movq %rax, (%rdi) 57 ; CHECK-AVX2-NEXT: movl -8(%rdi), %eax 58 ; CHECK-AVX2-NEXT: movl %eax, 8(%rdi) 59 ; CHECK-AVX2-NEXT: movl -4(%rdi), %eax 60 ; CHECK-AVX2-NEXT: movl %eax, 12(%rdi) 61 ; CHECK-AVX2-NEXT: movslq %esi, %rax 62 ; CHECK-AVX2-NEXT: movq %rax, -9(%rdi) 63 ; CHECK-AVX2-NEXT: movq %rax, -16(%rdi) 64 ; CHECK-AVX2-NEXT: movb $0, -1(%rdi) 65 ; CHECK-AVX2-NEXT: movq -16(%rdi), %rax 66 ; CHECK-AVX2-NEXT: movq %rax, 16(%rdi) 67 ; CHECK-AVX2-NEXT: movl -8(%rdi), %eax 68 ; CHECK-AVX2-NEXT: movl %eax, 24(%rdi) 69 ; CHECK-AVX2-NEXT: movzwl -4(%rdi), %eax 70 ; CHECK-AVX2-NEXT: movw %ax, 28(%rdi) 71 ; CHECK-AVX2-NEXT: movb -2(%rdi), %al 72 ; CHECK-AVX2-NEXT: movb %al, 30(%rdi) 73 ; CHECK-AVX2-NEXT: movb -1(%rdi), %al 74 ; CHECK-AVX2-NEXT: movb %al, 31(%rdi) 75 ; CHECK-AVX2-NEXT: retq 76 ; 77 ; CHECK-AVX512-LABEL: test_overlap_1: 78 ; CHECK-AVX512: # %bb.0: # %entry 79 ; CHECK-AVX512-NEXT: movl $7, -8(%rdi) 80 ; CHECK-AVX512-NEXT: movq -16(%rdi), %rax 81 ; CHECK-AVX512-NEXT: movq %rax, (%rdi) 82 ; CHECK-AVX512-NEXT: movl -8(%rdi), %eax 83 ; CHECK-AVX512-NEXT: movl %eax, 8(%rdi) 84 ; CHECK-AVX512-NEXT: movl -4(%rdi), %eax 85 ; CHECK-AVX512-NEXT: movl %eax, 12(%rdi) 86 ; CHECK-AVX512-NEXT: movslq %esi, %rax 87 ; CHECK-AVX512-NEXT: movq %rax, -9(%rdi) 88 ; CHECK-AVX512-NEXT: movq %rax, -16(%rdi) 89 ; CHECK-AVX512-NEXT: movb $0, -1(%rdi) 90 ; CHECK-AVX512-NEXT: movq -16(%rdi), %rax 91 ; CHECK-AVX512-NEXT: movq %rax, 16(%rdi) 92 ; CHECK-AVX512-NEXT: movl -8(%rdi), %eax 93 ; CHECK-AVX512-NEXT: movl %eax, 24(%rdi) 94 ; CHECK-AVX512-NEXT: movzwl -4(%rdi), %eax 95 ; CHECK-AVX512-NEXT: movw %ax, 28(%rdi) 96 ; CHECK-AVX512-NEXT: movb -2(%rdi), %al 97 ; CHECK-AVX512-NEXT: movb %al, 30(%rdi) 98 ; CHECK-AVX512-NEXT: movb -1(%rdi), %al 99 ; CHECK-AVX512-NEXT: movb %al, 31(%rdi) 100 ; CHECK-AVX512-NEXT: retq 101 entry: 102 %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16 103 %add.ptr1 = getelementptr inbounds i8, i8* %A, i64 -8 104 %0 = bitcast i8* %add.ptr1 to i32* 105 store i32 7, i32* %0, align 4 106 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false) 107 %conv = sext i32 %x to i64 108 %add.ptr2 = getelementptr inbounds i8, i8* %A, i64 -9 109 %1 = bitcast i8* %add.ptr2 to i64* 110 store i64 %conv, i64* %1, align 8 111 %2 = bitcast i8* %add.ptr to i64* 112 store i64 %conv, i64* %2, align 8 113 %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 -1 114 store i8 0, i8* %add.ptr5, align 1 115 %add.ptr6 = getelementptr inbounds i8, i8* %A, i64 16 116 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr6, i8* nonnull align 4 %add.ptr, i64 16, i1 false) 117 ret void 118 } 119 120 ; Function Attrs: argmemonly nounwind 121 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1 122 123 ; Function Attrs: nounwind uwtable 124 define dso_local void @test_overlap_2(i8* nocapture %A, i32 %x) local_unnamed_addr #0 { 125 ; CHECK-LABEL: test_overlap_2: 126 ; CHECK: # %bb.0: # %entry 127 ; CHECK-NEXT: movslq %esi, %rax 128 ; CHECK-NEXT: movq %rax, -16(%rdi) 129 ; CHECK-NEXT: movq -16(%rdi), %rcx 130 ; CHECK-NEXT: movq %rcx, (%rdi) 131 ; CHECK-NEXT: movq -8(%rdi), %rcx 132 ; CHECK-NEXT: movq %rcx, 8(%rdi) 133 ; CHECK-NEXT: movq %rax, -8(%rdi) 134 ; CHECK-NEXT: movl $7, -12(%rdi) 135 ; CHECK-NEXT: movl -16(%rdi), %eax 136 ; CHECK-NEXT: movl %eax, 16(%rdi) 137 ; CHECK-NEXT: movl -12(%rdi), %eax 138 ; CHECK-NEXT: movl %eax, 20(%rdi) 139 ; CHECK-NEXT: movq -8(%rdi), %rax 140 ; CHECK-NEXT: movq %rax, 24(%rdi) 141 ; CHECK-NEXT: retq 142 ; 143 ; DISABLED-LABEL: test_overlap_2: 144 ; DISABLED: # %bb.0: # %entry 145 ; DISABLED-NEXT: movslq %esi, %rax 146 ; DISABLED-NEXT: movq %rax, -16(%rdi) 147 ; DISABLED-NEXT: movups -16(%rdi), %xmm0 148 ; DISABLED-NEXT: movups %xmm0, (%rdi) 149 ; DISABLED-NEXT: movq %rax, -8(%rdi) 150 ; DISABLED-NEXT: movl $7, -12(%rdi) 151 ; DISABLED-NEXT: movups -16(%rdi), %xmm0 152 ; DISABLED-NEXT: movups %xmm0, 16(%rdi) 153 ; DISABLED-NEXT: retq 154 ; 155 ; CHECK-AVX2-LABEL: test_overlap_2: 156 ; CHECK-AVX2: # %bb.0: # %entry 157 ; CHECK-AVX2-NEXT: movslq %esi, %rax 158 ; CHECK-AVX2-NEXT: movq %rax, -16(%rdi) 159 ; CHECK-AVX2-NEXT: movq -16(%rdi), %rcx 160 ; CHECK-AVX2-NEXT: movq %rcx, (%rdi) 161 ; CHECK-AVX2-NEXT: movq -8(%rdi), %rcx 162 ; CHECK-AVX2-NEXT: movq %rcx, 8(%rdi) 163 ; CHECK-AVX2-NEXT: movq %rax, -8(%rdi) 164 ; CHECK-AVX2-NEXT: movl $7, -12(%rdi) 165 ; CHECK-AVX2-NEXT: movl -16(%rdi), %eax 166 ; CHECK-AVX2-NEXT: movl %eax, 16(%rdi) 167 ; CHECK-AVX2-NEXT: movl -12(%rdi), %eax 168 ; CHECK-AVX2-NEXT: movl %eax, 20(%rdi) 169 ; CHECK-AVX2-NEXT: movq -8(%rdi), %rax 170 ; CHECK-AVX2-NEXT: movq %rax, 24(%rdi) 171 ; CHECK-AVX2-NEXT: retq 172 ; 173 ; CHECK-AVX512-LABEL: test_overlap_2: 174 ; CHECK-AVX512: # %bb.0: # %entry 175 ; CHECK-AVX512-NEXT: movslq %esi, %rax 176 ; CHECK-AVX512-NEXT: movq %rax, -16(%rdi) 177 ; CHECK-AVX512-NEXT: movq -16(%rdi), %rcx 178 ; CHECK-AVX512-NEXT: movq %rcx, (%rdi) 179 ; CHECK-AVX512-NEXT: movq -8(%rdi), %rcx 180 ; CHECK-AVX512-NEXT: movq %rcx, 8(%rdi) 181 ; CHECK-AVX512-NEXT: movq %rax, -8(%rdi) 182 ; CHECK-AVX512-NEXT: movl $7, -12(%rdi) 183 ; CHECK-AVX512-NEXT: movl -16(%rdi), %eax 184 ; CHECK-AVX512-NEXT: movl %eax, 16(%rdi) 185 ; CHECK-AVX512-NEXT: movl -12(%rdi), %eax 186 ; CHECK-AVX512-NEXT: movl %eax, 20(%rdi) 187 ; CHECK-AVX512-NEXT: movq -8(%rdi), %rax 188 ; CHECK-AVX512-NEXT: movq %rax, 24(%rdi) 189 ; CHECK-AVX512-NEXT: retq 190 entry: 191 %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16 192 %conv = sext i32 %x to i64 193 %0 = bitcast i8* %add.ptr to i64* 194 store i64 %conv, i64* %0, align 8 195 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false) 196 %add.ptr3 = getelementptr inbounds i8, i8* %A, i64 -8 197 %1 = bitcast i8* %add.ptr3 to i64* 198 store i64 %conv, i64* %1, align 8 199 %add.ptr4 = getelementptr inbounds i8, i8* %A, i64 -12 200 %2 = bitcast i8* %add.ptr4 to i32* 201 store i32 7, i32* %2, align 4 202 %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 16 203 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr5, i8* nonnull align 4 %add.ptr, i64 16, i1 false) 204 ret void 205 } 206 207 ; Function Attrs: nounwind uwtable 208 define dso_local void @test_overlap_3(i8* nocapture %A, i32 %x) local_unnamed_addr #0 { 209 ; CHECK-LABEL: test_overlap_3: 210 ; CHECK: # %bb.0: # %entry 211 ; CHECK-NEXT: movl $7, -10(%rdi) 212 ; CHECK-NEXT: movl -16(%rdi), %eax 213 ; CHECK-NEXT: movl %eax, (%rdi) 214 ; CHECK-NEXT: movzwl -12(%rdi), %eax 215 ; CHECK-NEXT: movw %ax, 4(%rdi) 216 ; CHECK-NEXT: movl -10(%rdi), %eax 217 ; CHECK-NEXT: movl %eax, 6(%rdi) 218 ; CHECK-NEXT: movl -6(%rdi), %eax 219 ; CHECK-NEXT: movl %eax, 10(%rdi) 220 ; CHECK-NEXT: movzwl -2(%rdi), %eax 221 ; CHECK-NEXT: movw %ax, 14(%rdi) 222 ; CHECK-NEXT: movslq %esi, %rax 223 ; CHECK-NEXT: movq %rax, -9(%rdi) 224 ; CHECK-NEXT: movq %rax, -16(%rdi) 225 ; CHECK-NEXT: movb $0, -1(%rdi) 226 ; CHECK-NEXT: movq -16(%rdi), %rax 227 ; CHECK-NEXT: movq %rax, 16(%rdi) 228 ; CHECK-NEXT: movzwl -8(%rdi), %eax 229 ; CHECK-NEXT: movw %ax, 24(%rdi) 230 ; CHECK-NEXT: movl -6(%rdi), %eax 231 ; CHECK-NEXT: movl %eax, 26(%rdi) 232 ; CHECK-NEXT: movb -2(%rdi), %al 233 ; CHECK-NEXT: movb %al, 30(%rdi) 234 ; CHECK-NEXT: movb -1(%rdi), %al 235 ; CHECK-NEXT: movb %al, 31(%rdi) 236 ; CHECK-NEXT: retq 237 ; 238 ; DISABLED-LABEL: test_overlap_3: 239 ; DISABLED: # %bb.0: # %entry 240 ; DISABLED-NEXT: movl $7, -10(%rdi) 241 ; DISABLED-NEXT: movups -16(%rdi), %xmm0 242 ; DISABLED-NEXT: movups %xmm0, (%rdi) 243 ; DISABLED-NEXT: movslq %esi, %rax 244 ; DISABLED-NEXT: movq %rax, -9(%rdi) 245 ; DISABLED-NEXT: movq %rax, -16(%rdi) 246 ; DISABLED-NEXT: movb $0, -1(%rdi) 247 ; DISABLED-NEXT: movups -16(%rdi), %xmm0 248 ; DISABLED-NEXT: movups %xmm0, 16(%rdi) 249 ; DISABLED-NEXT: retq 250 ; 251 ; CHECK-AVX2-LABEL: test_overlap_3: 252 ; CHECK-AVX2: # %bb.0: # %entry 253 ; CHECK-AVX2-NEXT: movl $7, -10(%rdi) 254 ; CHECK-AVX2-NEXT: movl -16(%rdi), %eax 255 ; CHECK-AVX2-NEXT: movl %eax, (%rdi) 256 ; CHECK-AVX2-NEXT: movzwl -12(%rdi), %eax 257 ; CHECK-AVX2-NEXT: movw %ax, 4(%rdi) 258 ; CHECK-AVX2-NEXT: movl -10(%rdi), %eax 259 ; CHECK-AVX2-NEXT: movl %eax, 6(%rdi) 260 ; CHECK-AVX2-NEXT: movl -6(%rdi), %eax 261 ; CHECK-AVX2-NEXT: movl %eax, 10(%rdi) 262 ; CHECK-AVX2-NEXT: movzwl -2(%rdi), %eax 263 ; CHECK-AVX2-NEXT: movw %ax, 14(%rdi) 264 ; CHECK-AVX2-NEXT: movslq %esi, %rax 265 ; CHECK-AVX2-NEXT: movq %rax, -9(%rdi) 266 ; CHECK-AVX2-NEXT: movq %rax, -16(%rdi) 267 ; CHECK-AVX2-NEXT: movb $0, -1(%rdi) 268 ; CHECK-AVX2-NEXT: movq -16(%rdi), %rax 269 ; CHECK-AVX2-NEXT: movq %rax, 16(%rdi) 270 ; CHECK-AVX2-NEXT: movzwl -8(%rdi), %eax 271 ; CHECK-AVX2-NEXT: movw %ax, 24(%rdi) 272 ; CHECK-AVX2-NEXT: movl -6(%rdi), %eax 273 ; CHECK-AVX2-NEXT: movl %eax, 26(%rdi) 274 ; CHECK-AVX2-NEXT: movb -2(%rdi), %al 275 ; CHECK-AVX2-NEXT: movb %al, 30(%rdi) 276 ; CHECK-AVX2-NEXT: movb -1(%rdi), %al 277 ; CHECK-AVX2-NEXT: movb %al, 31(%rdi) 278 ; CHECK-AVX2-NEXT: retq 279 ; 280 ; CHECK-AVX512-LABEL: test_overlap_3: 281 ; CHECK-AVX512: # %bb.0: # %entry 282 ; CHECK-AVX512-NEXT: movl $7, -10(%rdi) 283 ; CHECK-AVX512-NEXT: movl -16(%rdi), %eax 284 ; CHECK-AVX512-NEXT: movl %eax, (%rdi) 285 ; CHECK-AVX512-NEXT: movzwl -12(%rdi), %eax 286 ; CHECK-AVX512-NEXT: movw %ax, 4(%rdi) 287 ; CHECK-AVX512-NEXT: movl -10(%rdi), %eax 288 ; CHECK-AVX512-NEXT: movl %eax, 6(%rdi) 289 ; CHECK-AVX512-NEXT: movl -6(%rdi), %eax 290 ; CHECK-AVX512-NEXT: movl %eax, 10(%rdi) 291 ; CHECK-AVX512-NEXT: movzwl -2(%rdi), %eax 292 ; CHECK-AVX512-NEXT: movw %ax, 14(%rdi) 293 ; CHECK-AVX512-NEXT: movslq %esi, %rax 294 ; CHECK-AVX512-NEXT: movq %rax, -9(%rdi) 295 ; CHECK-AVX512-NEXT: movq %rax, -16(%rdi) 296 ; CHECK-AVX512-NEXT: movb $0, -1(%rdi) 297 ; CHECK-AVX512-NEXT: movq -16(%rdi), %rax 298 ; CHECK-AVX512-NEXT: movq %rax, 16(%rdi) 299 ; CHECK-AVX512-NEXT: movzwl -8(%rdi), %eax 300 ; CHECK-AVX512-NEXT: movw %ax, 24(%rdi) 301 ; CHECK-AVX512-NEXT: movl -6(%rdi), %eax 302 ; CHECK-AVX512-NEXT: movl %eax, 26(%rdi) 303 ; CHECK-AVX512-NEXT: movb -2(%rdi), %al 304 ; CHECK-AVX512-NEXT: movb %al, 30(%rdi) 305 ; CHECK-AVX512-NEXT: movb -1(%rdi), %al 306 ; CHECK-AVX512-NEXT: movb %al, 31(%rdi) 307 ; CHECK-AVX512-NEXT: retq 308 entry: 309 %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16 310 %add.ptr1 = getelementptr inbounds i8, i8* %A, i64 -10 311 %0 = bitcast i8* %add.ptr1 to i32* 312 store i32 7, i32* %0, align 4 313 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false) 314 %conv = sext i32 %x to i64 315 %add.ptr2 = getelementptr inbounds i8, i8* %A, i64 -9 316 %1 = bitcast i8* %add.ptr2 to i64* 317 store i64 %conv, i64* %1, align 8 318 %2 = bitcast i8* %add.ptr to i64* 319 store i64 %conv, i64* %2, align 8 320 %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 -1 321 store i8 0, i8* %add.ptr5, align 1 322 %add.ptr6 = getelementptr inbounds i8, i8* %A, i64 16 323 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr6, i8* nonnull align 4 %add.ptr, i64 16, i1 false) 324 ret void 325 } 326 327 ; Function Attrs: nounwind uwtable 328 define dso_local void @test_overlap_4(i8* nocapture %A, i32 %x) local_unnamed_addr #0 { 329 ; CHECK-LABEL: test_overlap_4: 330 ; CHECK: # %bb.0: # %entry 331 ; CHECK-NEXT: movups -16(%rdi), %xmm0 332 ; CHECK-NEXT: movups %xmm0, (%rdi) 333 ; CHECK-NEXT: movslq %esi, %rax 334 ; CHECK-NEXT: movq %rax, -8(%rdi) 335 ; CHECK-NEXT: movl %eax, -16(%rdi) 336 ; CHECK-NEXT: movl $0, -11(%rdi) 337 ; CHECK-NEXT: movl -16(%rdi), %eax 338 ; CHECK-NEXT: movl %eax, 16(%rdi) 339 ; CHECK-NEXT: movb -12(%rdi), %al 340 ; CHECK-NEXT: movb %al, 20(%rdi) 341 ; CHECK-NEXT: movl -11(%rdi), %eax 342 ; CHECK-NEXT: movl %eax, 21(%rdi) 343 ; CHECK-NEXT: movl -7(%rdi), %eax 344 ; CHECK-NEXT: movl %eax, 25(%rdi) 345 ; CHECK-NEXT: movzwl -3(%rdi), %eax 346 ; CHECK-NEXT: movw %ax, 29(%rdi) 347 ; CHECK-NEXT: movb -1(%rdi), %al 348 ; CHECK-NEXT: movb %al, 31(%rdi) 349 ; CHECK-NEXT: retq 350 ; 351 ; DISABLED-LABEL: test_overlap_4: 352 ; DISABLED: # %bb.0: # %entry 353 ; DISABLED-NEXT: movups -16(%rdi), %xmm0 354 ; DISABLED-NEXT: movups %xmm0, (%rdi) 355 ; DISABLED-NEXT: movslq %esi, %rax 356 ; DISABLED-NEXT: movq %rax, -8(%rdi) 357 ; DISABLED-NEXT: movl %eax, -16(%rdi) 358 ; DISABLED-NEXT: movl $0, -11(%rdi) 359 ; DISABLED-NEXT: movups -16(%rdi), %xmm0 360 ; DISABLED-NEXT: movups %xmm0, 16(%rdi) 361 ; DISABLED-NEXT: retq 362 ; 363 ; CHECK-AVX2-LABEL: test_overlap_4: 364 ; CHECK-AVX2: # %bb.0: # %entry 365 ; CHECK-AVX2-NEXT: vmovups -16(%rdi), %xmm0 366 ; CHECK-AVX2-NEXT: vmovups %xmm0, (%rdi) 367 ; CHECK-AVX2-NEXT: movslq %esi, %rax 368 ; CHECK-AVX2-NEXT: movq %rax, -8(%rdi) 369 ; CHECK-AVX2-NEXT: movl %eax, -16(%rdi) 370 ; CHECK-AVX2-NEXT: movl $0, -11(%rdi) 371 ; CHECK-AVX2-NEXT: movl -16(%rdi), %eax 372 ; CHECK-AVX2-NEXT: movl %eax, 16(%rdi) 373 ; CHECK-AVX2-NEXT: movb -12(%rdi), %al 374 ; CHECK-AVX2-NEXT: movb %al, 20(%rdi) 375 ; CHECK-AVX2-NEXT: movl -11(%rdi), %eax 376 ; CHECK-AVX2-NEXT: movl %eax, 21(%rdi) 377 ; CHECK-AVX2-NEXT: movl -7(%rdi), %eax 378 ; CHECK-AVX2-NEXT: movl %eax, 25(%rdi) 379 ; CHECK-AVX2-NEXT: movzwl -3(%rdi), %eax 380 ; CHECK-AVX2-NEXT: movw %ax, 29(%rdi) 381 ; CHECK-AVX2-NEXT: movb -1(%rdi), %al 382 ; CHECK-AVX2-NEXT: movb %al, 31(%rdi) 383 ; CHECK-AVX2-NEXT: retq 384 ; 385 ; CHECK-AVX512-LABEL: test_overlap_4: 386 ; CHECK-AVX512: # %bb.0: # %entry 387 ; CHECK-AVX512-NEXT: vmovups -16(%rdi), %xmm0 388 ; CHECK-AVX512-NEXT: vmovups %xmm0, (%rdi) 389 ; CHECK-AVX512-NEXT: movslq %esi, %rax 390 ; CHECK-AVX512-NEXT: movq %rax, -8(%rdi) 391 ; CHECK-AVX512-NEXT: movl %eax, -16(%rdi) 392 ; CHECK-AVX512-NEXT: movl $0, -11(%rdi) 393 ; CHECK-AVX512-NEXT: movl -16(%rdi), %eax 394 ; CHECK-AVX512-NEXT: movl %eax, 16(%rdi) 395 ; CHECK-AVX512-NEXT: movb -12(%rdi), %al 396 ; CHECK-AVX512-NEXT: movb %al, 20(%rdi) 397 ; CHECK-AVX512-NEXT: movl -11(%rdi), %eax 398 ; CHECK-AVX512-NEXT: movl %eax, 21(%rdi) 399 ; CHECK-AVX512-NEXT: movl -7(%rdi), %eax 400 ; CHECK-AVX512-NEXT: movl %eax, 25(%rdi) 401 ; CHECK-AVX512-NEXT: movzwl -3(%rdi), %eax 402 ; CHECK-AVX512-NEXT: movw %ax, 29(%rdi) 403 ; CHECK-AVX512-NEXT: movb -1(%rdi), %al 404 ; CHECK-AVX512-NEXT: movb %al, 31(%rdi) 405 ; CHECK-AVX512-NEXT: retq 406 entry: 407 %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16 408 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false) 409 %conv = sext i32 %x to i64 410 %add.ptr1 = getelementptr inbounds i8, i8* %A, i64 -8 411 %0 = bitcast i8* %add.ptr1 to i64* 412 store i64 %conv, i64* %0, align 8 413 %1 = bitcast i8* %add.ptr to i32* 414 store i32 %x, i32* %1, align 4 415 %add.ptr3 = getelementptr inbounds i8, i8* %A, i64 -11 416 %2 = bitcast i8* %add.ptr3 to i32* 417 store i32 0, i32* %2, align 4 418 %add.ptr4 = getelementptr inbounds i8, i8* %A, i64 16 419 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr4, i8* nonnull align 4 %add.ptr, i64 16, i1 false) 420 ret void 421 } 422 423 ; Function Attrs: nounwind uwtable 424 define dso_local void @test_overlap_5(i8* nocapture %A, i32 %x) local_unnamed_addr #0 { 425 ; CHECK-LABEL: test_overlap_5: 426 ; CHECK: # %bb.0: # %entry 427 ; CHECK-NEXT: movups -16(%rdi), %xmm0 428 ; CHECK-NEXT: movups %xmm0, (%rdi) 429 ; CHECK-NEXT: movslq %esi, %rax 430 ; CHECK-NEXT: movq %rax, -16(%rdi) 431 ; CHECK-NEXT: movb %al, -14(%rdi) 432 ; CHECK-NEXT: movb $0, -11(%rdi) 433 ; CHECK-NEXT: movzwl -16(%rdi), %eax 434 ; CHECK-NEXT: movw %ax, 16(%rdi) 435 ; CHECK-NEXT: movb -14(%rdi), %al 436 ; CHECK-NEXT: movb %al, 18(%rdi) 437 ; CHECK-NEXT: movzwl -13(%rdi), %eax 438 ; CHECK-NEXT: movw %ax, 19(%rdi) 439 ; CHECK-NEXT: movb -11(%rdi), %al 440 ; CHECK-NEXT: movb %al, 21(%rdi) 441 ; CHECK-NEXT: movq -10(%rdi), %rax 442 ; CHECK-NEXT: movq %rax, 22(%rdi) 443 ; CHECK-NEXT: movzwl -2(%rdi), %eax 444 ; CHECK-NEXT: movw %ax, 30(%rdi) 445 ; CHECK-NEXT: retq 446 ; 447 ; DISABLED-LABEL: test_overlap_5: 448 ; DISABLED: # %bb.0: # %entry 449 ; DISABLED-NEXT: movups -16(%rdi), %xmm0 450 ; DISABLED-NEXT: movups %xmm0, (%rdi) 451 ; DISABLED-NEXT: movslq %esi, %rax 452 ; DISABLED-NEXT: movq %rax, -16(%rdi) 453 ; DISABLED-NEXT: movb %al, -14(%rdi) 454 ; DISABLED-NEXT: movb $0, -11(%rdi) 455 ; DISABLED-NEXT: movups -16(%rdi), %xmm0 456 ; DISABLED-NEXT: movups %xmm0, 16(%rdi) 457 ; DISABLED-NEXT: retq 458 ; 459 ; CHECK-AVX2-LABEL: test_overlap_5: 460 ; CHECK-AVX2: # %bb.0: # %entry 461 ; CHECK-AVX2-NEXT: vmovups -16(%rdi), %xmm0 462 ; CHECK-AVX2-NEXT: vmovups %xmm0, (%rdi) 463 ; CHECK-AVX2-NEXT: movslq %esi, %rax 464 ; CHECK-AVX2-NEXT: movq %rax, -16(%rdi) 465 ; CHECK-AVX2-NEXT: movb %al, -14(%rdi) 466 ; CHECK-AVX2-NEXT: movb $0, -11(%rdi) 467 ; CHECK-AVX2-NEXT: movzwl -16(%rdi), %eax 468 ; CHECK-AVX2-NEXT: movw %ax, 16(%rdi) 469 ; CHECK-AVX2-NEXT: movb -14(%rdi), %al 470 ; CHECK-AVX2-NEXT: movb %al, 18(%rdi) 471 ; CHECK-AVX2-NEXT: movzwl -13(%rdi), %eax 472 ; CHECK-AVX2-NEXT: movw %ax, 19(%rdi) 473 ; CHECK-AVX2-NEXT: movb -11(%rdi), %al 474 ; CHECK-AVX2-NEXT: movb %al, 21(%rdi) 475 ; CHECK-AVX2-NEXT: movq -10(%rdi), %rax 476 ; CHECK-AVX2-NEXT: movq %rax, 22(%rdi) 477 ; CHECK-AVX2-NEXT: movzwl -2(%rdi), %eax 478 ; CHECK-AVX2-NEXT: movw %ax, 30(%rdi) 479 ; CHECK-AVX2-NEXT: retq 480 ; 481 ; CHECK-AVX512-LABEL: test_overlap_5: 482 ; CHECK-AVX512: # %bb.0: # %entry 483 ; CHECK-AVX512-NEXT: vmovups -16(%rdi), %xmm0 484 ; CHECK-AVX512-NEXT: vmovups %xmm0, (%rdi) 485 ; CHECK-AVX512-NEXT: movslq %esi, %rax 486 ; CHECK-AVX512-NEXT: movq %rax, -16(%rdi) 487 ; CHECK-AVX512-NEXT: movb %al, -14(%rdi) 488 ; CHECK-AVX512-NEXT: movb $0, -11(%rdi) 489 ; CHECK-AVX512-NEXT: movzwl -16(%rdi), %eax 490 ; CHECK-AVX512-NEXT: movw %ax, 16(%rdi) 491 ; CHECK-AVX512-NEXT: movb -14(%rdi), %al 492 ; CHECK-AVX512-NEXT: movb %al, 18(%rdi) 493 ; CHECK-AVX512-NEXT: movzwl -13(%rdi), %eax 494 ; CHECK-AVX512-NEXT: movw %ax, 19(%rdi) 495 ; CHECK-AVX512-NEXT: movb -11(%rdi), %al 496 ; CHECK-AVX512-NEXT: movb %al, 21(%rdi) 497 ; CHECK-AVX512-NEXT: movq -10(%rdi), %rax 498 ; CHECK-AVX512-NEXT: movq %rax, 22(%rdi) 499 ; CHECK-AVX512-NEXT: movzwl -2(%rdi), %eax 500 ; CHECK-AVX512-NEXT: movw %ax, 30(%rdi) 501 ; CHECK-AVX512-NEXT: retq 502 entry: 503 %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16 504 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false) 505 %conv = sext i32 %x to i64 506 %0 = bitcast i8* %add.ptr to i64* 507 store i64 %conv, i64* %0, align 8 508 %conv2 = trunc i32 %x to i8 509 %add.ptr3 = getelementptr inbounds i8, i8* %A, i64 -14 510 store i8 %conv2, i8* %add.ptr3, align 1 511 %add.ptr4 = getelementptr inbounds i8, i8* %A, i64 -11 512 store i8 0, i8* %add.ptr4, align 1 513 %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 16 514 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr5, i8* nonnull align 4 %add.ptr, i64 16, i1 false) 515 ret void 516 } 517 518 attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } 519 attributes #1 = { argmemonly nounwind } 520 521 522