1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse | FileCheck %s --check-prefix=SSE 3 ; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse2 | FileCheck %s --check-prefix=SSE 4 ; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse2,-slow-unaligned-mem-16 | FileCheck %s --check-prefix=SSE2FAST 5 ; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 6 ; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 7 8 ; https://llvm.org/bugs/show_bug.cgi?id=27100 9 10 define void @memset_16_nonzero_bytes(i8* %x) { 11 ; SSE-LABEL: memset_16_nonzero_bytes: 12 ; SSE: # %bb.0: 13 ; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A 14 ; SSE-NEXT: movq %rax, 8(%rdi) 15 ; SSE-NEXT: movq %rax, (%rdi) 16 ; SSE-NEXT: retq 17 ; 18 ; SSE2FAST-LABEL: memset_16_nonzero_bytes: 19 ; SSE2FAST: # %bb.0: 20 ; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 21 ; SSE2FAST-NEXT: movups %xmm0, (%rdi) 22 ; SSE2FAST-NEXT: retq 23 ; 24 ; AVX-LABEL: memset_16_nonzero_bytes: 25 ; AVX: # %bb.0: 26 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 27 ; AVX-NEXT: vmovups %xmm0, (%rdi) 28 ; AVX-NEXT: retq 29 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 16, i64 -1) 30 ret void 31 } 32 33 define void @memset_32_nonzero_bytes(i8* %x) { 34 ; SSE-LABEL: memset_32_nonzero_bytes: 35 ; SSE: # %bb.0: 36 ; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A 37 ; SSE-NEXT: movq %rax, 24(%rdi) 38 ; SSE-NEXT: movq %rax, 16(%rdi) 39 ; SSE-NEXT: movq %rax, 8(%rdi) 40 ; SSE-NEXT: movq %rax, (%rdi) 41 ; SSE-NEXT: retq 42 ; 43 ; SSE2FAST-LABEL: memset_32_nonzero_bytes: 44 ; SSE2FAST: # %bb.0: 45 ; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 46 ; SSE2FAST-NEXT: movups %xmm0, 16(%rdi) 47 ; SSE2FAST-NEXT: movups %xmm0, (%rdi) 48 ; SSE2FAST-NEXT: retq 49 ; 50 ; AVX-LABEL: memset_32_nonzero_bytes: 51 ; AVX: # %bb.0: 52 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 53 ; AVX-NEXT: vmovups %ymm0, (%rdi) 54 ; AVX-NEXT: vzeroupper 55 ; AVX-NEXT: retq 56 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 32, i64 -1) 57 ret void 58 } 59 60 define void @memset_64_nonzero_bytes(i8* %x) { 61 ; SSE-LABEL: memset_64_nonzero_bytes: 62 ; SSE: # %bb.0: 63 ; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A 64 ; SSE-NEXT: movq %rax, 56(%rdi) 65 ; SSE-NEXT: movq %rax, 48(%rdi) 66 ; SSE-NEXT: movq %rax, 40(%rdi) 67 ; SSE-NEXT: movq %rax, 32(%rdi) 68 ; SSE-NEXT: movq %rax, 24(%rdi) 69 ; SSE-NEXT: movq %rax, 16(%rdi) 70 ; SSE-NEXT: movq %rax, 8(%rdi) 71 ; SSE-NEXT: movq %rax, (%rdi) 72 ; SSE-NEXT: retq 73 ; 74 ; SSE2FAST-LABEL: memset_64_nonzero_bytes: 75 ; SSE2FAST: # %bb.0: 76 ; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 77 ; SSE2FAST-NEXT: movups %xmm0, 48(%rdi) 78 ; SSE2FAST-NEXT: movups %xmm0, 32(%rdi) 79 ; SSE2FAST-NEXT: movups %xmm0, 16(%rdi) 80 ; SSE2FAST-NEXT: movups %xmm0, (%rdi) 81 ; SSE2FAST-NEXT: retq 82 ; 83 ; AVX-LABEL: memset_64_nonzero_bytes: 84 ; AVX: # %bb.0: 85 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 86 ; AVX-NEXT: vmovups %ymm0, 32(%rdi) 87 ; AVX-NEXT: vmovups %ymm0, (%rdi) 88 ; AVX-NEXT: vzeroupper 89 ; AVX-NEXT: retq 90 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 64, i64 -1) 91 ret void 92 } 93 94 define void @memset_128_nonzero_bytes(i8* %x) { 95 ; SSE-LABEL: memset_128_nonzero_bytes: 96 ; SSE: # %bb.0: 97 ; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A 98 ; SSE-NEXT: movq %rax, 120(%rdi) 99 ; SSE-NEXT: movq %rax, 112(%rdi) 100 ; SSE-NEXT: movq %rax, 104(%rdi) 101 ; SSE-NEXT: movq %rax, 96(%rdi) 102 ; SSE-NEXT: movq %rax, 88(%rdi) 103 ; SSE-NEXT: movq %rax, 80(%rdi) 104 ; SSE-NEXT: movq %rax, 72(%rdi) 105 ; SSE-NEXT: movq %rax, 64(%rdi) 106 ; SSE-NEXT: movq %rax, 56(%rdi) 107 ; SSE-NEXT: movq %rax, 48(%rdi) 108 ; SSE-NEXT: movq %rax, 40(%rdi) 109 ; SSE-NEXT: movq %rax, 32(%rdi) 110 ; SSE-NEXT: movq %rax, 24(%rdi) 111 ; SSE-NEXT: movq %rax, 16(%rdi) 112 ; SSE-NEXT: movq %rax, 8(%rdi) 113 ; SSE-NEXT: movq %rax, (%rdi) 114 ; SSE-NEXT: retq 115 ; 116 ; SSE2FAST-LABEL: memset_128_nonzero_bytes: 117 ; SSE2FAST: # %bb.0: 118 ; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 119 ; SSE2FAST-NEXT: movups %xmm0, 112(%rdi) 120 ; SSE2FAST-NEXT: movups %xmm0, 96(%rdi) 121 ; SSE2FAST-NEXT: movups %xmm0, 80(%rdi) 122 ; SSE2FAST-NEXT: movups %xmm0, 64(%rdi) 123 ; SSE2FAST-NEXT: movups %xmm0, 48(%rdi) 124 ; SSE2FAST-NEXT: movups %xmm0, 32(%rdi) 125 ; SSE2FAST-NEXT: movups %xmm0, 16(%rdi) 126 ; SSE2FAST-NEXT: movups %xmm0, (%rdi) 127 ; SSE2FAST-NEXT: retq 128 ; 129 ; AVX-LABEL: memset_128_nonzero_bytes: 130 ; AVX: # %bb.0: 131 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 132 ; AVX-NEXT: vmovups %ymm0, 96(%rdi) 133 ; AVX-NEXT: vmovups %ymm0, 64(%rdi) 134 ; AVX-NEXT: vmovups %ymm0, 32(%rdi) 135 ; AVX-NEXT: vmovups %ymm0, (%rdi) 136 ; AVX-NEXT: vzeroupper 137 ; AVX-NEXT: retq 138 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 128, i64 -1) 139 ret void 140 } 141 142 define void @memset_256_nonzero_bytes(i8* %x) { 143 ; SSE-LABEL: memset_256_nonzero_bytes: 144 ; SSE: # %bb.0: 145 ; SSE-NEXT: pushq %rax 146 ; SSE-NEXT: .cfi_def_cfa_offset 16 147 ; SSE-NEXT: movl $42, %esi 148 ; SSE-NEXT: movl $256, %edx # imm = 0x100 149 ; SSE-NEXT: callq memset 150 ; SSE-NEXT: popq %rax 151 ; SSE-NEXT: .cfi_def_cfa_offset 8 152 ; SSE-NEXT: retq 153 ; 154 ; SSE2FAST-LABEL: memset_256_nonzero_bytes: 155 ; SSE2FAST: # %bb.0: 156 ; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 157 ; SSE2FAST-NEXT: movups %xmm0, 240(%rdi) 158 ; SSE2FAST-NEXT: movups %xmm0, 224(%rdi) 159 ; SSE2FAST-NEXT: movups %xmm0, 208(%rdi) 160 ; SSE2FAST-NEXT: movups %xmm0, 192(%rdi) 161 ; SSE2FAST-NEXT: movups %xmm0, 176(%rdi) 162 ; SSE2FAST-NEXT: movups %xmm0, 160(%rdi) 163 ; SSE2FAST-NEXT: movups %xmm0, 144(%rdi) 164 ; SSE2FAST-NEXT: movups %xmm0, 128(%rdi) 165 ; SSE2FAST-NEXT: movups %xmm0, 112(%rdi) 166 ; SSE2FAST-NEXT: movups %xmm0, 96(%rdi) 167 ; SSE2FAST-NEXT: movups %xmm0, 80(%rdi) 168 ; SSE2FAST-NEXT: movups %xmm0, 64(%rdi) 169 ; SSE2FAST-NEXT: movups %xmm0, 48(%rdi) 170 ; SSE2FAST-NEXT: movups %xmm0, 32(%rdi) 171 ; SSE2FAST-NEXT: movups %xmm0, 16(%rdi) 172 ; SSE2FAST-NEXT: movups %xmm0, (%rdi) 173 ; SSE2FAST-NEXT: retq 174 ; 175 ; AVX-LABEL: memset_256_nonzero_bytes: 176 ; AVX: # %bb.0: 177 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 178 ; AVX-NEXT: vmovups %ymm0, 224(%rdi) 179 ; AVX-NEXT: vmovups %ymm0, 192(%rdi) 180 ; AVX-NEXT: vmovups %ymm0, 160(%rdi) 181 ; AVX-NEXT: vmovups %ymm0, 128(%rdi) 182 ; AVX-NEXT: vmovups %ymm0, 96(%rdi) 183 ; AVX-NEXT: vmovups %ymm0, 64(%rdi) 184 ; AVX-NEXT: vmovups %ymm0, 32(%rdi) 185 ; AVX-NEXT: vmovups %ymm0, (%rdi) 186 ; AVX-NEXT: vzeroupper 187 ; AVX-NEXT: retq 188 %call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 256, i64 -1) 189 ret void 190 } 191 192 declare i8* @__memset_chk(i8*, i32, i64, i64) 193 194 ; Repeat with a non-constant value for the stores. 195 196 define void @memset_16_nonconst_bytes(i8* %x, i8 %c) { 197 ; SSE-LABEL: memset_16_nonconst_bytes: 198 ; SSE: # %bb.0: 199 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi 200 ; SSE-NEXT: movzbl %sil, %eax 201 ; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 202 ; SSE-NEXT: imulq %rax, %rcx 203 ; SSE-NEXT: movq %rcx, 8(%rdi) 204 ; SSE-NEXT: movq %rcx, (%rdi) 205 ; SSE-NEXT: retq 206 ; 207 ; SSE2FAST-LABEL: memset_16_nonconst_bytes: 208 ; SSE2FAST: # %bb.0: 209 ; SSE2FAST-NEXT: movd %esi, %xmm0 210 ; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 211 ; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 212 ; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 213 ; SSE2FAST-NEXT: movdqu %xmm0, (%rdi) 214 ; SSE2FAST-NEXT: retq 215 ; 216 ; AVX1-LABEL: memset_16_nonconst_bytes: 217 ; AVX1: # %bb.0: 218 ; AVX1-NEXT: vmovd %esi, %xmm0 219 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 220 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 221 ; AVX1-NEXT: vmovdqu %xmm0, (%rdi) 222 ; AVX1-NEXT: retq 223 ; 224 ; AVX2-LABEL: memset_16_nonconst_bytes: 225 ; AVX2: # %bb.0: 226 ; AVX2-NEXT: vmovd %esi, %xmm0 227 ; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 228 ; AVX2-NEXT: vmovdqu %xmm0, (%rdi) 229 ; AVX2-NEXT: retq 230 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 16, i1 false) 231 ret void 232 } 233 234 define void @memset_32_nonconst_bytes(i8* %x, i8 %c) { 235 ; SSE-LABEL: memset_32_nonconst_bytes: 236 ; SSE: # %bb.0: 237 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi 238 ; SSE-NEXT: movzbl %sil, %eax 239 ; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 240 ; SSE-NEXT: imulq %rax, %rcx 241 ; SSE-NEXT: movq %rcx, 24(%rdi) 242 ; SSE-NEXT: movq %rcx, 16(%rdi) 243 ; SSE-NEXT: movq %rcx, 8(%rdi) 244 ; SSE-NEXT: movq %rcx, (%rdi) 245 ; SSE-NEXT: retq 246 ; 247 ; SSE2FAST-LABEL: memset_32_nonconst_bytes: 248 ; SSE2FAST: # %bb.0: 249 ; SSE2FAST-NEXT: movd %esi, %xmm0 250 ; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 251 ; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 252 ; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 253 ; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi) 254 ; SSE2FAST-NEXT: movdqu %xmm0, (%rdi) 255 ; SSE2FAST-NEXT: retq 256 ; 257 ; AVX1-LABEL: memset_32_nonconst_bytes: 258 ; AVX1: # %bb.0: 259 ; AVX1-NEXT: vmovd %esi, %xmm0 260 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 261 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 262 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 263 ; AVX1-NEXT: vmovups %ymm0, (%rdi) 264 ; AVX1-NEXT: vzeroupper 265 ; AVX1-NEXT: retq 266 ; 267 ; AVX2-LABEL: memset_32_nonconst_bytes: 268 ; AVX2: # %bb.0: 269 ; AVX2-NEXT: vmovd %esi, %xmm0 270 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 271 ; AVX2-NEXT: vmovdqu %ymm0, (%rdi) 272 ; AVX2-NEXT: vzeroupper 273 ; AVX2-NEXT: retq 274 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 32, i1 false) 275 ret void 276 } 277 278 define void @memset_64_nonconst_bytes(i8* %x, i8 %c) { 279 ; SSE-LABEL: memset_64_nonconst_bytes: 280 ; SSE: # %bb.0: 281 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi 282 ; SSE-NEXT: movzbl %sil, %eax 283 ; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 284 ; SSE-NEXT: imulq %rax, %rcx 285 ; SSE-NEXT: movq %rcx, 56(%rdi) 286 ; SSE-NEXT: movq %rcx, 48(%rdi) 287 ; SSE-NEXT: movq %rcx, 40(%rdi) 288 ; SSE-NEXT: movq %rcx, 32(%rdi) 289 ; SSE-NEXT: movq %rcx, 24(%rdi) 290 ; SSE-NEXT: movq %rcx, 16(%rdi) 291 ; SSE-NEXT: movq %rcx, 8(%rdi) 292 ; SSE-NEXT: movq %rcx, (%rdi) 293 ; SSE-NEXT: retq 294 ; 295 ; SSE2FAST-LABEL: memset_64_nonconst_bytes: 296 ; SSE2FAST: # %bb.0: 297 ; SSE2FAST-NEXT: movd %esi, %xmm0 298 ; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 299 ; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 300 ; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 301 ; SSE2FAST-NEXT: movdqu %xmm0, 48(%rdi) 302 ; SSE2FAST-NEXT: movdqu %xmm0, 32(%rdi) 303 ; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi) 304 ; SSE2FAST-NEXT: movdqu %xmm0, (%rdi) 305 ; SSE2FAST-NEXT: retq 306 ; 307 ; AVX1-LABEL: memset_64_nonconst_bytes: 308 ; AVX1: # %bb.0: 309 ; AVX1-NEXT: vmovd %esi, %xmm0 310 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 311 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 312 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 313 ; AVX1-NEXT: vmovups %ymm0, 32(%rdi) 314 ; AVX1-NEXT: vmovups %ymm0, (%rdi) 315 ; AVX1-NEXT: vzeroupper 316 ; AVX1-NEXT: retq 317 ; 318 ; AVX2-LABEL: memset_64_nonconst_bytes: 319 ; AVX2: # %bb.0: 320 ; AVX2-NEXT: vmovd %esi, %xmm0 321 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 322 ; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi) 323 ; AVX2-NEXT: vmovdqu %ymm0, (%rdi) 324 ; AVX2-NEXT: vzeroupper 325 ; AVX2-NEXT: retq 326 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 64, i1 false) 327 ret void 328 } 329 330 define void @memset_128_nonconst_bytes(i8* %x, i8 %c) { 331 ; SSE-LABEL: memset_128_nonconst_bytes: 332 ; SSE: # %bb.0: 333 ; SSE-NEXT: # kill: def $esi killed $esi def $rsi 334 ; SSE-NEXT: movzbl %sil, %eax 335 ; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 336 ; SSE-NEXT: imulq %rax, %rcx 337 ; SSE-NEXT: movq %rcx, 120(%rdi) 338 ; SSE-NEXT: movq %rcx, 112(%rdi) 339 ; SSE-NEXT: movq %rcx, 104(%rdi) 340 ; SSE-NEXT: movq %rcx, 96(%rdi) 341 ; SSE-NEXT: movq %rcx, 88(%rdi) 342 ; SSE-NEXT: movq %rcx, 80(%rdi) 343 ; SSE-NEXT: movq %rcx, 72(%rdi) 344 ; SSE-NEXT: movq %rcx, 64(%rdi) 345 ; SSE-NEXT: movq %rcx, 56(%rdi) 346 ; SSE-NEXT: movq %rcx, 48(%rdi) 347 ; SSE-NEXT: movq %rcx, 40(%rdi) 348 ; SSE-NEXT: movq %rcx, 32(%rdi) 349 ; SSE-NEXT: movq %rcx, 24(%rdi) 350 ; SSE-NEXT: movq %rcx, 16(%rdi) 351 ; SSE-NEXT: movq %rcx, 8(%rdi) 352 ; SSE-NEXT: movq %rcx, (%rdi) 353 ; SSE-NEXT: retq 354 ; 355 ; SSE2FAST-LABEL: memset_128_nonconst_bytes: 356 ; SSE2FAST: # %bb.0: 357 ; SSE2FAST-NEXT: movd %esi, %xmm0 358 ; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 359 ; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 360 ; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 361 ; SSE2FAST-NEXT: movdqu %xmm0, 112(%rdi) 362 ; SSE2FAST-NEXT: movdqu %xmm0, 96(%rdi) 363 ; SSE2FAST-NEXT: movdqu %xmm0, 80(%rdi) 364 ; SSE2FAST-NEXT: movdqu %xmm0, 64(%rdi) 365 ; SSE2FAST-NEXT: movdqu %xmm0, 48(%rdi) 366 ; SSE2FAST-NEXT: movdqu %xmm0, 32(%rdi) 367 ; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi) 368 ; SSE2FAST-NEXT: movdqu %xmm0, (%rdi) 369 ; SSE2FAST-NEXT: retq 370 ; 371 ; AVX1-LABEL: memset_128_nonconst_bytes: 372 ; AVX1: # %bb.0: 373 ; AVX1-NEXT: vmovd %esi, %xmm0 374 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 375 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 376 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 377 ; AVX1-NEXT: vmovups %ymm0, 96(%rdi) 378 ; AVX1-NEXT: vmovups %ymm0, 64(%rdi) 379 ; AVX1-NEXT: vmovups %ymm0, 32(%rdi) 380 ; AVX1-NEXT: vmovups %ymm0, (%rdi) 381 ; AVX1-NEXT: vzeroupper 382 ; AVX1-NEXT: retq 383 ; 384 ; AVX2-LABEL: memset_128_nonconst_bytes: 385 ; AVX2: # %bb.0: 386 ; AVX2-NEXT: vmovd %esi, %xmm0 387 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 388 ; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi) 389 ; AVX2-NEXT: vmovdqu %ymm0, 64(%rdi) 390 ; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi) 391 ; AVX2-NEXT: vmovdqu %ymm0, (%rdi) 392 ; AVX2-NEXT: vzeroupper 393 ; AVX2-NEXT: retq 394 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 128, i1 false) 395 ret void 396 } 397 398 define void @memset_256_nonconst_bytes(i8* %x, i8 %c) { 399 ; SSE-LABEL: memset_256_nonconst_bytes: 400 ; SSE: # %bb.0: 401 ; SSE-NEXT: movl $256, %edx # imm = 0x100 402 ; SSE-NEXT: jmp memset # TAILCALL 403 ; 404 ; SSE2FAST-LABEL: memset_256_nonconst_bytes: 405 ; SSE2FAST: # %bb.0: 406 ; SSE2FAST-NEXT: movd %esi, %xmm0 407 ; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 408 ; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] 409 ; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 410 ; SSE2FAST-NEXT: movdqu %xmm0, 240(%rdi) 411 ; SSE2FAST-NEXT: movdqu %xmm0, 224(%rdi) 412 ; SSE2FAST-NEXT: movdqu %xmm0, 208(%rdi) 413 ; SSE2FAST-NEXT: movdqu %xmm0, 192(%rdi) 414 ; SSE2FAST-NEXT: movdqu %xmm0, 176(%rdi) 415 ; SSE2FAST-NEXT: movdqu %xmm0, 160(%rdi) 416 ; SSE2FAST-NEXT: movdqu %xmm0, 144(%rdi) 417 ; SSE2FAST-NEXT: movdqu %xmm0, 128(%rdi) 418 ; SSE2FAST-NEXT: movdqu %xmm0, 112(%rdi) 419 ; SSE2FAST-NEXT: movdqu %xmm0, 96(%rdi) 420 ; SSE2FAST-NEXT: movdqu %xmm0, 80(%rdi) 421 ; SSE2FAST-NEXT: movdqu %xmm0, 64(%rdi) 422 ; SSE2FAST-NEXT: movdqu %xmm0, 48(%rdi) 423 ; SSE2FAST-NEXT: movdqu %xmm0, 32(%rdi) 424 ; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi) 425 ; SSE2FAST-NEXT: movdqu %xmm0, (%rdi) 426 ; SSE2FAST-NEXT: retq 427 ; 428 ; AVX1-LABEL: memset_256_nonconst_bytes: 429 ; AVX1: # %bb.0: 430 ; AVX1-NEXT: vmovd %esi, %xmm0 431 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 432 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 433 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 434 ; AVX1-NEXT: vmovups %ymm0, 224(%rdi) 435 ; AVX1-NEXT: vmovups %ymm0, 192(%rdi) 436 ; AVX1-NEXT: vmovups %ymm0, 160(%rdi) 437 ; AVX1-NEXT: vmovups %ymm0, 128(%rdi) 438 ; AVX1-NEXT: vmovups %ymm0, 96(%rdi) 439 ; AVX1-NEXT: vmovups %ymm0, 64(%rdi) 440 ; AVX1-NEXT: vmovups %ymm0, 32(%rdi) 441 ; AVX1-NEXT: vmovups %ymm0, (%rdi) 442 ; AVX1-NEXT: vzeroupper 443 ; AVX1-NEXT: retq 444 ; 445 ; AVX2-LABEL: memset_256_nonconst_bytes: 446 ; AVX2: # %bb.0: 447 ; AVX2-NEXT: vmovd %esi, %xmm0 448 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 449 ; AVX2-NEXT: vmovdqu %ymm0, 224(%rdi) 450 ; AVX2-NEXT: vmovdqu %ymm0, 192(%rdi) 451 ; AVX2-NEXT: vmovdqu %ymm0, 160(%rdi) 452 ; AVX2-NEXT: vmovdqu %ymm0, 128(%rdi) 453 ; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi) 454 ; AVX2-NEXT: vmovdqu %ymm0, 64(%rdi) 455 ; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi) 456 ; AVX2-NEXT: vmovdqu %ymm0, (%rdi) 457 ; AVX2-NEXT: vzeroupper 458 ; AVX2-NEXT: retq 459 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 256, i1 false) 460 ret void 461 } 462 463 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #1 464 465