1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X32 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64 4 5 declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32, <16 x i8>, i32, i8) 6 declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32, <16 x i8>, i32, i8) 7 declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %lhs, i32, <16 x i8>, i32, i8) 8 declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs, <16 x i8>, i8) 9 declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8>, i8) 10 declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %lhs, <16 x i8>, i8) 11 12 define i1 @pcmpestri_reg_eq_i8(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len) nounwind { 13 ; X32-LABEL: pcmpestri_reg_eq_i8: 14 ; X32: # %bb.0: # %entry 15 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 16 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 17 ; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 18 ; X32-NEXT: setae %al 19 ; X32-NEXT: retl 20 ; 21 ; X64-LABEL: pcmpestri_reg_eq_i8: 22 ; X64: # %bb.0: # %entry 23 ; X64-NEXT: movl %edi, %eax 24 ; X64-NEXT: movl %esi, %edx 25 ; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 26 ; X64-NEXT: setae %al 27 ; X64-NEXT: retq 28 entry: 29 %c = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 30 %result = icmp eq i32 %c, 0 31 ret i1 %result 32 } 33 34 define i32 @pcmpestri_reg_idx_i8(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len) nounwind { 35 ; X32-LABEL: pcmpestri_reg_idx_i8: 36 ; X32: # %bb.0: # %entry 37 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 38 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 39 ; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 40 ; X32-NEXT: movl %ecx, %eax 41 ; X32-NEXT: retl 42 ; 43 ; X64-LABEL: pcmpestri_reg_idx_i8: 44 ; X64: # %bb.0: # %entry 45 ; X64-NEXT: movl %edi, %eax 46 ; X64-NEXT: movl %esi, %edx 47 ; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 48 ; X64-NEXT: movl %ecx, %eax 49 ; X64-NEXT: retq 50 entry: 51 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 52 ret i32 %idx 53 } 54 55 define i32 @pcmpestri_reg_diff_i8(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len) nounwind { 56 ; X32-LABEL: pcmpestri_reg_diff_i8: 57 ; X32: # %bb.0: # %entry 58 ; X32-NEXT: pushl %ebp 59 ; X32-NEXT: movl %esp, %ebp 60 ; X32-NEXT: andl $-16, %esp 61 ; X32-NEXT: subl $48, %esp 62 ; X32-NEXT: movl 8(%ebp), %eax 63 ; X32-NEXT: movl 12(%ebp), %edx 64 ; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 65 ; X32-NEXT: cmpl $16, %ecx 66 ; X32-NEXT: jne .LBB2_2 67 ; X32-NEXT: # %bb.1: 68 ; X32-NEXT: xorl %eax, %eax 69 ; X32-NEXT: jmp .LBB2_3 70 ; X32-NEXT: .LBB2_2: # %compare 71 ; X32-NEXT: movdqa %xmm0, (%esp) 72 ; X32-NEXT: andl $15, %ecx 73 ; X32-NEXT: movb (%esp,%ecx), %al 74 ; X32-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) 75 ; X32-NEXT: subb 16(%esp,%ecx), %al 76 ; X32-NEXT: .LBB2_3: # %exit 77 ; X32-NEXT: movzbl %al, %eax 78 ; X32-NEXT: movl %ebp, %esp 79 ; X32-NEXT: popl %ebp 80 ; X32-NEXT: retl 81 ; 82 ; X64-LABEL: pcmpestri_reg_diff_i8: 83 ; X64: # %bb.0: # %entry 84 ; X64-NEXT: movl %edi, %eax 85 ; X64-NEXT: movl %esi, %edx 86 ; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 87 ; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 88 ; X64-NEXT: cmpl $16, %ecx 89 ; X64-NEXT: jne .LBB2_2 90 ; X64-NEXT: # %bb.1: 91 ; X64-NEXT: xorl %eax, %eax 92 ; X64-NEXT: movzbl %al, %eax 93 ; X64-NEXT: retq 94 ; X64-NEXT: .LBB2_2: # %compare 95 ; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 96 ; X64-NEXT: andl $15, %ecx 97 ; X64-NEXT: movb -24(%rsp,%rcx), %al 98 ; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 99 ; X64-NEXT: subb -40(%rsp,%rcx), %al 100 ; X64-NEXT: movzbl %al, %eax 101 ; X64-NEXT: retq 102 entry: 103 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 104 %eq = icmp eq i32 %idx, 16 105 br i1 %eq, label %exit, label %compare 106 107 compare: 108 %lhs_c = extractelement <16 x i8> %lhs, i32 %idx 109 %rhs_c = extractelement <16 x i8> %rhs, i32 %idx 110 %sub = sub i8 %lhs_c, %rhs_c 111 br label %exit 112 113 exit: 114 %result = phi i8 [ 0, %entry ], [ %sub, %compare ] 115 %result_ext = zext i8 %result to i32 116 ret i32 %result_ext 117 } 118 119 define i1 @pcmpestri_mem_eq_i8(i8* %lhs_ptr, i32 %lhs_len, i8* %rhs_ptr, i32 %rhs_len) nounwind { 120 ; X32-LABEL: pcmpestri_mem_eq_i8: 121 ; X32: # %bb.0: # %entry 122 ; X32-NEXT: pushl %esi 123 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 124 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 125 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 126 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 127 ; X32-NEXT: movdqu (%esi), %xmm0 128 ; X32-NEXT: pcmpestri $24, (%ecx), %xmm0 129 ; X32-NEXT: setae %al 130 ; X32-NEXT: popl %esi 131 ; X32-NEXT: retl 132 ; 133 ; X64-LABEL: pcmpestri_mem_eq_i8: 134 ; X64: # %bb.0: # %entry 135 ; X64-NEXT: movq %rdx, %r8 136 ; X64-NEXT: movdqu (%rdi), %xmm0 137 ; X64-NEXT: movl %esi, %eax 138 ; X64-NEXT: movl %ecx, %edx 139 ; X64-NEXT: pcmpestri $24, (%r8), %xmm0 140 ; X64-NEXT: setae %al 141 ; X64-NEXT: retq 142 entry: 143 %lhs_vptr = bitcast i8* %lhs_ptr to <16 x i8>* 144 %lhs = load <16 x i8>, <16 x i8>* %lhs_vptr, align 1 145 %rhs_vptr = bitcast i8* %rhs_ptr to <16 x i8>* 146 %rhs = load <16 x i8>, <16 x i8>* %rhs_vptr, align 1 147 %c = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 148 %result = icmp eq i32 %c, 0 149 ret i1 %result 150 } 151 152 define i32 @pcmpestri_mem_idx_i8(i8* %lhs_ptr, i32 %lhs_len, i8* %rhs_ptr, i32 %rhs_len) nounwind { 153 ; X32-LABEL: pcmpestri_mem_idx_i8: 154 ; X32: # %bb.0: # %entry 155 ; X32-NEXT: pushl %esi 156 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 157 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 158 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 159 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 160 ; X32-NEXT: movdqu (%esi), %xmm0 161 ; X32-NEXT: pcmpestri $24, (%ecx), %xmm0 162 ; X32-NEXT: movl %ecx, %eax 163 ; X32-NEXT: popl %esi 164 ; X32-NEXT: retl 165 ; 166 ; X64-LABEL: pcmpestri_mem_idx_i8: 167 ; X64: # %bb.0: # %entry 168 ; X64-NEXT: movq %rdx, %r8 169 ; X64-NEXT: movdqu (%rdi), %xmm0 170 ; X64-NEXT: movl %esi, %eax 171 ; X64-NEXT: movl %ecx, %edx 172 ; X64-NEXT: pcmpestri $24, (%r8), %xmm0 173 ; X64-NEXT: movl %ecx, %eax 174 ; X64-NEXT: retq 175 entry: 176 %lhs_vptr = bitcast i8* %lhs_ptr to <16 x i8>* 177 %lhs = load <16 x i8>, <16 x i8>* %lhs_vptr, align 1 178 %rhs_vptr = bitcast i8* %rhs_ptr to <16 x i8>* 179 %rhs = load <16 x i8>, <16 x i8>* %rhs_vptr, align 1 180 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 181 ret i32 %idx 182 } 183 184 define i32 @pcmpestri_mem_diff_i8(i8* %lhs_ptr, i32 %lhs_len, i8* %rhs_ptr, i32 %rhs_len) nounwind { 185 ; X32-LABEL: pcmpestri_mem_diff_i8: 186 ; X32: # %bb.0: # %entry 187 ; X32-NEXT: pushl %ebp 188 ; X32-NEXT: movl %esp, %ebp 189 ; X32-NEXT: pushl %esi 190 ; X32-NEXT: andl $-16, %esp 191 ; X32-NEXT: subl $48, %esp 192 ; X32-NEXT: movl 12(%ebp), %eax 193 ; X32-NEXT: movl 20(%ebp), %edx 194 ; X32-NEXT: movl 16(%ebp), %ecx 195 ; X32-NEXT: movl 8(%ebp), %esi 196 ; X32-NEXT: movdqu (%esi), %xmm1 197 ; X32-NEXT: movdqu (%ecx), %xmm0 198 ; X32-NEXT: pcmpestri $24, %xmm0, %xmm1 199 ; X32-NEXT: cmpl $16, %ecx 200 ; X32-NEXT: jne .LBB5_2 201 ; X32-NEXT: # %bb.1: 202 ; X32-NEXT: xorl %eax, %eax 203 ; X32-NEXT: jmp .LBB5_3 204 ; X32-NEXT: .LBB5_2: # %compare 205 ; X32-NEXT: movdqa %xmm1, (%esp) 206 ; X32-NEXT: andl $15, %ecx 207 ; X32-NEXT: movb (%esp,%ecx), %al 208 ; X32-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) 209 ; X32-NEXT: subb 16(%esp,%ecx), %al 210 ; X32-NEXT: .LBB5_3: # %exit 211 ; X32-NEXT: movzbl %al, %eax 212 ; X32-NEXT: leal -4(%ebp), %esp 213 ; X32-NEXT: popl %esi 214 ; X32-NEXT: popl %ebp 215 ; X32-NEXT: retl 216 ; 217 ; X64-LABEL: pcmpestri_mem_diff_i8: 218 ; X64: # %bb.0: # %entry 219 ; X64-NEXT: movdqu (%rdi), %xmm1 220 ; X64-NEXT: movdqu (%rdx), %xmm0 221 ; X64-NEXT: movl %esi, %eax 222 ; X64-NEXT: movl %ecx, %edx 223 ; X64-NEXT: pcmpestri $24, %xmm0, %xmm1 224 ; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 225 ; X64-NEXT: cmpl $16, %ecx 226 ; X64-NEXT: jne .LBB5_2 227 ; X64-NEXT: # %bb.1: 228 ; X64-NEXT: xorl %eax, %eax 229 ; X64-NEXT: movzbl %al, %eax 230 ; X64-NEXT: retq 231 ; X64-NEXT: .LBB5_2: # %compare 232 ; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 233 ; X64-NEXT: andl $15, %ecx 234 ; X64-NEXT: movb -24(%rsp,%rcx), %al 235 ; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 236 ; X64-NEXT: subb -40(%rsp,%rcx), %al 237 ; X64-NEXT: movzbl %al, %eax 238 ; X64-NEXT: retq 239 entry: 240 %lhs_vptr = bitcast i8* %lhs_ptr to <16 x i8>* 241 %lhs = load <16 x i8>, <16 x i8>* %lhs_vptr, align 1 242 %rhs_vptr = bitcast i8* %rhs_ptr to <16 x i8>* 243 %rhs = load <16 x i8>, <16 x i8>* %rhs_vptr, align 1 244 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 245 %eq = icmp eq i32 %idx, 16 246 br i1 %eq, label %exit, label %compare 247 248 compare: 249 %lhs_c = extractelement <16 x i8> %lhs, i32 %idx 250 %rhs_c = extractelement <16 x i8> %rhs, i32 %idx 251 %sub = sub i8 %lhs_c, %rhs_c 252 br label %exit 253 254 exit: 255 %result = phi i8 [ 0, %entry ], [ %sub, %compare ] 256 %result_ext = zext i8 %result to i32 257 ret i32 %result_ext 258 } 259 260 define i1 @pcmpestri_reg_eq_i16(<8 x i16> %lhs, i32 %lhs_len, <8 x i16> %rhs, i32 %rhs_len) nounwind { 261 ; X32-LABEL: pcmpestri_reg_eq_i16: 262 ; X32: # %bb.0: # %entry 263 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 264 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 265 ; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 266 ; X32-NEXT: setae %al 267 ; X32-NEXT: retl 268 ; 269 ; X64-LABEL: pcmpestri_reg_eq_i16: 270 ; X64: # %bb.0: # %entry 271 ; X64-NEXT: movl %edi, %eax 272 ; X64-NEXT: movl %esi, %edx 273 ; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 274 ; X64-NEXT: setae %al 275 ; X64-NEXT: retq 276 entry: 277 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 278 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 279 %c = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs_cast, i32 %lhs_len, <16 x i8> %rhs_cast, i32 %rhs_len, i8 24) 280 %result = icmp eq i32 %c, 0 281 ret i1 %result 282 } 283 284 define i32 @pcmpestri_reg_idx_i16(<8 x i16> %lhs, i32 %lhs_len, <8 x i16> %rhs, i32 %rhs_len) nounwind { 285 ; X32-LABEL: pcmpestri_reg_idx_i16: 286 ; X32: # %bb.0: # %entry 287 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 288 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 289 ; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 290 ; X32-NEXT: movl %ecx, %eax 291 ; X32-NEXT: retl 292 ; 293 ; X64-LABEL: pcmpestri_reg_idx_i16: 294 ; X64: # %bb.0: # %entry 295 ; X64-NEXT: movl %edi, %eax 296 ; X64-NEXT: movl %esi, %edx 297 ; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 298 ; X64-NEXT: movl %ecx, %eax 299 ; X64-NEXT: retq 300 entry: 301 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 302 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 303 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs_cast, i32 %lhs_len, <16 x i8> %rhs_cast, i32 %rhs_len, i8 24) 304 ret i32 %idx 305 } 306 307 define i32 @pcmpestri_reg_diff_i16(<8 x i16> %lhs, i32 %lhs_len, <8 x i16> %rhs, i32 %rhs_len) nounwind { 308 ; X32-LABEL: pcmpestri_reg_diff_i16: 309 ; X32: # %bb.0: # %entry 310 ; X32-NEXT: pushl %ebp 311 ; X32-NEXT: movl %esp, %ebp 312 ; X32-NEXT: andl $-16, %esp 313 ; X32-NEXT: subl $48, %esp 314 ; X32-NEXT: movl 8(%ebp), %eax 315 ; X32-NEXT: movl 12(%ebp), %edx 316 ; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 317 ; X32-NEXT: cmpl $16, %ecx 318 ; X32-NEXT: jne .LBB8_2 319 ; X32-NEXT: # %bb.1: 320 ; X32-NEXT: xorl %eax, %eax 321 ; X32-NEXT: jmp .LBB8_3 322 ; X32-NEXT: .LBB8_2: # %compare 323 ; X32-NEXT: movdqa %xmm0, (%esp) 324 ; X32-NEXT: addl %ecx, %ecx 325 ; X32-NEXT: andl $14, %ecx 326 ; X32-NEXT: movzwl (%esp,%ecx), %eax 327 ; X32-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) 328 ; X32-NEXT: subw 16(%esp,%ecx), %ax 329 ; X32-NEXT: .LBB8_3: # %exit 330 ; X32-NEXT: movzwl %ax, %eax 331 ; X32-NEXT: movl %ebp, %esp 332 ; X32-NEXT: popl %ebp 333 ; X32-NEXT: retl 334 ; 335 ; X64-LABEL: pcmpestri_reg_diff_i16: 336 ; X64: # %bb.0: # %entry 337 ; X64-NEXT: movl %edi, %eax 338 ; X64-NEXT: movl %esi, %edx 339 ; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 340 ; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 341 ; X64-NEXT: cmpl $16, %ecx 342 ; X64-NEXT: jne .LBB8_2 343 ; X64-NEXT: # %bb.1: 344 ; X64-NEXT: xorl %eax, %eax 345 ; X64-NEXT: movzwl %ax, %eax 346 ; X64-NEXT: retq 347 ; X64-NEXT: .LBB8_2: # %compare 348 ; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 349 ; X64-NEXT: andl $7, %ecx 350 ; X64-NEXT: movzwl -24(%rsp,%rcx,2), %eax 351 ; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 352 ; X64-NEXT: subw -40(%rsp,%rcx,2), %ax 353 ; X64-NEXT: movzwl %ax, %eax 354 ; X64-NEXT: retq 355 entry: 356 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 357 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 358 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs_cast, i32 %lhs_len, <16 x i8> %rhs_cast, i32 %rhs_len, i8 24) 359 %eq = icmp eq i32 %idx, 16 360 br i1 %eq, label %exit, label %compare 361 362 compare: 363 %lhs_c = extractelement <8 x i16> %lhs, i32 %idx 364 %rhs_c = extractelement <8 x i16> %rhs, i32 %idx 365 %sub = sub i16 %lhs_c, %rhs_c 366 br label %exit 367 368 exit: 369 %result = phi i16 [ 0, %entry ], [ %sub, %compare ] 370 %result_ext = zext i16 %result to i32 371 ret i32 %result_ext 372 } 373 374 define i1 @pcmpestri_mem_eq_i16(i16* %lhs_ptr, i32 %lhs_len, i16* %rhs_ptr, i32 %rhs_len) nounwind { 375 ; X32-LABEL: pcmpestri_mem_eq_i16: 376 ; X32: # %bb.0: # %entry 377 ; X32-NEXT: pushl %esi 378 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 379 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 380 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 381 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 382 ; X32-NEXT: movdqu (%esi), %xmm0 383 ; X32-NEXT: pcmpestri $25, (%ecx), %xmm0 384 ; X32-NEXT: setae %al 385 ; X32-NEXT: popl %esi 386 ; X32-NEXT: retl 387 ; 388 ; X64-LABEL: pcmpestri_mem_eq_i16: 389 ; X64: # %bb.0: # %entry 390 ; X64-NEXT: movq %rdx, %r8 391 ; X64-NEXT: movdqu (%rdi), %xmm0 392 ; X64-NEXT: movl %esi, %eax 393 ; X64-NEXT: movl %ecx, %edx 394 ; X64-NEXT: pcmpestri $25, (%r8), %xmm0 395 ; X64-NEXT: setae %al 396 ; X64-NEXT: retq 397 entry: 398 %lhs_vptr = bitcast i16* %lhs_ptr to <8 x i16>* 399 %lhs = load <8 x i16>, <8 x i16>* %lhs_vptr, align 1 400 %rhs_vptr = bitcast i16* %rhs_ptr to <8 x i16>* 401 %rhs = load <8 x i16>, <8 x i16>* %rhs_vptr, align 1 402 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 403 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 404 %c = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs_cast, i32 %lhs_len, <16 x i8> %rhs_cast, i32 %rhs_len, i8 25) 405 %result = icmp eq i32 %c, 0 406 ret i1 %result 407 } 408 409 define i32 @pcmpestri_mem_idx_i16(i16* %lhs_ptr, i32 %lhs_len, i16* %rhs_ptr, i32 %rhs_len) nounwind { 410 ; X32-LABEL: pcmpestri_mem_idx_i16: 411 ; X32: # %bb.0: # %entry 412 ; X32-NEXT: pushl %esi 413 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 414 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 415 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 416 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 417 ; X32-NEXT: movdqu (%esi), %xmm0 418 ; X32-NEXT: pcmpestri $25, (%ecx), %xmm0 419 ; X32-NEXT: movl %ecx, %eax 420 ; X32-NEXT: popl %esi 421 ; X32-NEXT: retl 422 ; 423 ; X64-LABEL: pcmpestri_mem_idx_i16: 424 ; X64: # %bb.0: # %entry 425 ; X64-NEXT: movq %rdx, %r8 426 ; X64-NEXT: movdqu (%rdi), %xmm0 427 ; X64-NEXT: movl %esi, %eax 428 ; X64-NEXT: movl %ecx, %edx 429 ; X64-NEXT: pcmpestri $25, (%r8), %xmm0 430 ; X64-NEXT: movl %ecx, %eax 431 ; X64-NEXT: retq 432 entry: 433 %lhs_vptr = bitcast i16* %lhs_ptr to <8 x i16>* 434 %lhs = load <8 x i16>, <8 x i16>* %lhs_vptr, align 1 435 %rhs_vptr = bitcast i16* %rhs_ptr to <8 x i16>* 436 %rhs = load <8 x i16>, <8 x i16>* %rhs_vptr, align 1 437 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 438 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 439 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs_cast, i32 %lhs_len, <16 x i8> %rhs_cast, i32 %rhs_len, i8 25) 440 ret i32 %idx 441 } 442 443 define i32 @pcmpestri_mem_diff_i16(i16* %lhs_ptr, i32 %lhs_len, i16* %rhs_ptr, i32 %rhs_len) nounwind { 444 ; X32-LABEL: pcmpestri_mem_diff_i16: 445 ; X32: # %bb.0: # %entry 446 ; X32-NEXT: pushl %ebp 447 ; X32-NEXT: movl %esp, %ebp 448 ; X32-NEXT: pushl %esi 449 ; X32-NEXT: andl $-16, %esp 450 ; X32-NEXT: subl $48, %esp 451 ; X32-NEXT: movl 12(%ebp), %eax 452 ; X32-NEXT: movl 20(%ebp), %edx 453 ; X32-NEXT: movl 16(%ebp), %ecx 454 ; X32-NEXT: movl 8(%ebp), %esi 455 ; X32-NEXT: movdqu (%esi), %xmm1 456 ; X32-NEXT: movdqu (%ecx), %xmm0 457 ; X32-NEXT: pcmpestri $25, %xmm0, %xmm1 458 ; X32-NEXT: cmpl $8, %ecx 459 ; X32-NEXT: jne .LBB11_2 460 ; X32-NEXT: # %bb.1: 461 ; X32-NEXT: xorl %eax, %eax 462 ; X32-NEXT: jmp .LBB11_3 463 ; X32-NEXT: .LBB11_2: # %compare 464 ; X32-NEXT: movdqa %xmm1, (%esp) 465 ; X32-NEXT: addl %ecx, %ecx 466 ; X32-NEXT: andl $14, %ecx 467 ; X32-NEXT: movzwl (%esp,%ecx), %eax 468 ; X32-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) 469 ; X32-NEXT: subw 16(%esp,%ecx), %ax 470 ; X32-NEXT: .LBB11_3: # %exit 471 ; X32-NEXT: movzwl %ax, %eax 472 ; X32-NEXT: leal -4(%ebp), %esp 473 ; X32-NEXT: popl %esi 474 ; X32-NEXT: popl %ebp 475 ; X32-NEXT: retl 476 ; 477 ; X64-LABEL: pcmpestri_mem_diff_i16: 478 ; X64: # %bb.0: # %entry 479 ; X64-NEXT: movdqu (%rdi), %xmm1 480 ; X64-NEXT: movdqu (%rdx), %xmm0 481 ; X64-NEXT: movl %esi, %eax 482 ; X64-NEXT: movl %ecx, %edx 483 ; X64-NEXT: pcmpestri $25, %xmm0, %xmm1 484 ; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 485 ; X64-NEXT: cmpl $8, %ecx 486 ; X64-NEXT: jne .LBB11_2 487 ; X64-NEXT: # %bb.1: 488 ; X64-NEXT: xorl %eax, %eax 489 ; X64-NEXT: movzwl %ax, %eax 490 ; X64-NEXT: retq 491 ; X64-NEXT: .LBB11_2: # %compare 492 ; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 493 ; X64-NEXT: andl $7, %ecx 494 ; X64-NEXT: movzwl -24(%rsp,%rcx,2), %eax 495 ; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 496 ; X64-NEXT: subw -40(%rsp,%rcx,2), %ax 497 ; X64-NEXT: movzwl %ax, %eax 498 ; X64-NEXT: retq 499 entry: 500 %lhs_vptr = bitcast i16* %lhs_ptr to <8 x i16>* 501 %lhs = load <8 x i16>, <8 x i16>* %lhs_vptr, align 1 502 %rhs_vptr = bitcast i16* %rhs_ptr to <8 x i16>* 503 %rhs = load <8 x i16>, <8 x i16>* %rhs_vptr, align 1 504 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 505 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 506 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs_cast, i32 %lhs_len, <16 x i8> %rhs_cast, i32 %rhs_len, i8 25) 507 %eq = icmp eq i32 %idx, 8 508 br i1 %eq, label %exit, label %compare 509 510 compare: 511 %lhs_c = extractelement <8 x i16> %lhs, i32 %idx 512 %rhs_c = extractelement <8 x i16> %rhs, i32 %idx 513 %sub = sub i16 %lhs_c, %rhs_c 514 br label %exit 515 516 exit: 517 %result = phi i16 [ 0, %entry ], [ %sub, %compare ] 518 %result_ext = zext i16 %result to i32 519 ret i32 %result_ext 520 } 521 522 define i1 @pcmpistri_reg_eq_i8(<16 x i8> %lhs, <16 x i8> %rhs) nounwind { 523 ; X32-LABEL: pcmpistri_reg_eq_i8: 524 ; X32: # %bb.0: # %entry 525 ; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 526 ; X32-NEXT: setae %al 527 ; X32-NEXT: retl 528 ; 529 ; X64-LABEL: pcmpistri_reg_eq_i8: 530 ; X64: # %bb.0: # %entry 531 ; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 532 ; X64-NEXT: setae %al 533 ; X64-NEXT: retq 534 entry: 535 %c = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 536 %result = icmp eq i32 %c, 0 537 ret i1 %result 538 } 539 540 define i32 @pcmpistri_reg_idx_i8(<16 x i8> %lhs, <16 x i8> %rhs) nounwind { 541 ; X32-LABEL: pcmpistri_reg_idx_i8: 542 ; X32: # %bb.0: # %entry 543 ; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 544 ; X32-NEXT: movl %ecx, %eax 545 ; X32-NEXT: retl 546 ; 547 ; X64-LABEL: pcmpistri_reg_idx_i8: 548 ; X64: # %bb.0: # %entry 549 ; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 550 ; X64-NEXT: movl %ecx, %eax 551 ; X64-NEXT: retq 552 entry: 553 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 554 ret i32 %idx 555 } 556 557 define i32 @pcmpistri_reg_diff_i8(<16 x i8> %lhs, <16 x i8> %rhs) nounwind { 558 ; X32-LABEL: pcmpistri_reg_diff_i8: 559 ; X32: # %bb.0: # %entry 560 ; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 561 ; X32-NEXT: cmpl $16, %ecx 562 ; X32-NEXT: jne .LBB14_2 563 ; X32-NEXT: # %bb.1: 564 ; X32-NEXT: xorl %eax, %eax 565 ; X32-NEXT: movzbl %al, %eax 566 ; X32-NEXT: retl 567 ; X32-NEXT: .LBB14_2: # %compare 568 ; X32-NEXT: pushl %ebp 569 ; X32-NEXT: movl %esp, %ebp 570 ; X32-NEXT: andl $-16, %esp 571 ; X32-NEXT: subl $48, %esp 572 ; X32-NEXT: movdqa %xmm0, (%esp) 573 ; X32-NEXT: andl $15, %ecx 574 ; X32-NEXT: movb (%esp,%ecx), %al 575 ; X32-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) 576 ; X32-NEXT: subb 16(%esp,%ecx), %al 577 ; X32-NEXT: movl %ebp, %esp 578 ; X32-NEXT: popl %ebp 579 ; X32-NEXT: movzbl %al, %eax 580 ; X32-NEXT: retl 581 ; 582 ; X64-LABEL: pcmpistri_reg_diff_i8: 583 ; X64: # %bb.0: # %entry 584 ; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 585 ; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 586 ; X64-NEXT: cmpl $16, %ecx 587 ; X64-NEXT: jne .LBB14_2 588 ; X64-NEXT: # %bb.1: 589 ; X64-NEXT: xorl %eax, %eax 590 ; X64-NEXT: movzbl %al, %eax 591 ; X64-NEXT: retq 592 ; X64-NEXT: .LBB14_2: # %compare 593 ; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 594 ; X64-NEXT: andl $15, %ecx 595 ; X64-NEXT: movb -24(%rsp,%rcx), %al 596 ; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 597 ; X64-NEXT: subb -40(%rsp,%rcx), %al 598 ; X64-NEXT: movzbl %al, %eax 599 ; X64-NEXT: retq 600 entry: 601 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 602 %eq = icmp eq i32 %idx, 16 603 br i1 %eq, label %exit, label %compare 604 605 compare: 606 %lhs_c = extractelement <16 x i8> %lhs, i32 %idx 607 %rhs_c = extractelement <16 x i8> %rhs, i32 %idx 608 %sub = sub i8 %lhs_c, %rhs_c 609 br label %exit 610 611 exit: 612 %result = phi i8 [ 0, %entry ], [ %sub, %compare ] 613 %result_ext = zext i8 %result to i32 614 ret i32 %result_ext 615 } 616 617 define i1 @pcmpistri_mem_eq_i8(i8* %lhs_ptr, i8* %rhs_ptr) nounwind { 618 ; X32-LABEL: pcmpistri_mem_eq_i8: 619 ; X32: # %bb.0: # %entry 620 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 621 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 622 ; X32-NEXT: movdqu (%ecx), %xmm0 623 ; X32-NEXT: pcmpistri $24, (%eax), %xmm0 624 ; X32-NEXT: setae %al 625 ; X32-NEXT: retl 626 ; 627 ; X64-LABEL: pcmpistri_mem_eq_i8: 628 ; X64: # %bb.0: # %entry 629 ; X64-NEXT: movdqu (%rdi), %xmm0 630 ; X64-NEXT: pcmpistri $24, (%rsi), %xmm0 631 ; X64-NEXT: setae %al 632 ; X64-NEXT: retq 633 entry: 634 %lhs_vptr = bitcast i8* %lhs_ptr to <16 x i8>* 635 %lhs = load <16 x i8>, <16 x i8>* %lhs_vptr, align 1 636 %rhs_vptr = bitcast i8* %rhs_ptr to <16 x i8>* 637 %rhs = load <16 x i8>, <16 x i8>* %rhs_vptr, align 1 638 %c = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 639 %result = icmp eq i32 %c, 0 640 ret i1 %result 641 } 642 643 define i32 @pcmpistri_mem_idx_i8(i8* %lhs_ptr, i8* %rhs_ptr) nounwind { 644 ; X32-LABEL: pcmpistri_mem_idx_i8: 645 ; X32: # %bb.0: # %entry 646 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 647 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 648 ; X32-NEXT: movdqu (%ecx), %xmm0 649 ; X32-NEXT: pcmpistri $24, (%eax), %xmm0 650 ; X32-NEXT: movl %ecx, %eax 651 ; X32-NEXT: retl 652 ; 653 ; X64-LABEL: pcmpistri_mem_idx_i8: 654 ; X64: # %bb.0: # %entry 655 ; X64-NEXT: movdqu (%rdi), %xmm0 656 ; X64-NEXT: pcmpistri $24, (%rsi), %xmm0 657 ; X64-NEXT: movl %ecx, %eax 658 ; X64-NEXT: retq 659 entry: 660 %lhs_vptr = bitcast i8* %lhs_ptr to <16 x i8>* 661 %lhs = load <16 x i8>, <16 x i8>* %lhs_vptr, align 1 662 %rhs_vptr = bitcast i8* %rhs_ptr to <16 x i8>* 663 %rhs = load <16 x i8>, <16 x i8>* %rhs_vptr, align 1 664 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 665 ret i32 %idx 666 } 667 668 define i32 @pcmpistri_mem_diff_i8(i8* %lhs_ptr, i8* %rhs_ptr) nounwind { 669 ; X32-LABEL: pcmpistri_mem_diff_i8: 670 ; X32: # %bb.0: # %entry 671 ; X32-NEXT: pushl %ebp 672 ; X32-NEXT: movl %esp, %ebp 673 ; X32-NEXT: andl $-16, %esp 674 ; X32-NEXT: subl $48, %esp 675 ; X32-NEXT: movl 12(%ebp), %eax 676 ; X32-NEXT: movl 8(%ebp), %ecx 677 ; X32-NEXT: movdqu (%ecx), %xmm1 678 ; X32-NEXT: movdqu (%eax), %xmm0 679 ; X32-NEXT: pcmpistri $24, %xmm0, %xmm1 680 ; X32-NEXT: cmpl $16, %ecx 681 ; X32-NEXT: jne .LBB17_2 682 ; X32-NEXT: # %bb.1: 683 ; X32-NEXT: xorl %eax, %eax 684 ; X32-NEXT: jmp .LBB17_3 685 ; X32-NEXT: .LBB17_2: # %compare 686 ; X32-NEXT: movdqa %xmm1, (%esp) 687 ; X32-NEXT: andl $15, %ecx 688 ; X32-NEXT: movb (%esp,%ecx), %al 689 ; X32-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) 690 ; X32-NEXT: subb 16(%esp,%ecx), %al 691 ; X32-NEXT: .LBB17_3: # %exit 692 ; X32-NEXT: movzbl %al, %eax 693 ; X32-NEXT: movl %ebp, %esp 694 ; X32-NEXT: popl %ebp 695 ; X32-NEXT: retl 696 ; 697 ; X64-LABEL: pcmpistri_mem_diff_i8: 698 ; X64: # %bb.0: # %entry 699 ; X64-NEXT: movdqu (%rdi), %xmm1 700 ; X64-NEXT: movdqu (%rsi), %xmm0 701 ; X64-NEXT: pcmpistri $24, %xmm0, %xmm1 702 ; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 703 ; X64-NEXT: cmpl $16, %ecx 704 ; X64-NEXT: jne .LBB17_2 705 ; X64-NEXT: # %bb.1: 706 ; X64-NEXT: xorl %eax, %eax 707 ; X64-NEXT: movzbl %al, %eax 708 ; X64-NEXT: retq 709 ; X64-NEXT: .LBB17_2: # %compare 710 ; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 711 ; X64-NEXT: andl $15, %ecx 712 ; X64-NEXT: movb -24(%rsp,%rcx), %al 713 ; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 714 ; X64-NEXT: subb -40(%rsp,%rcx), %al 715 ; X64-NEXT: movzbl %al, %eax 716 ; X64-NEXT: retq 717 entry: 718 %lhs_vptr = bitcast i8* %lhs_ptr to <16 x i8>* 719 %lhs = load <16 x i8>, <16 x i8>* %lhs_vptr, align 1 720 %rhs_vptr = bitcast i8* %rhs_ptr to <16 x i8>* 721 %rhs = load <16 x i8>, <16 x i8>* %rhs_vptr, align 1 722 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 723 %eq = icmp eq i32 %idx, 16 724 br i1 %eq, label %exit, label %compare 725 726 compare: 727 %lhs_c = extractelement <16 x i8> %lhs, i32 %idx 728 %rhs_c = extractelement <16 x i8> %rhs, i32 %idx 729 %sub = sub i8 %lhs_c, %rhs_c 730 br label %exit 731 732 exit: 733 %result = phi i8 [ 0, %entry ], [ %sub, %compare ] 734 %result_ext = zext i8 %result to i32 735 ret i32 %result_ext 736 } 737 738 define i1 @pcmpistri_reg_eq_i16(<8 x i16> %lhs, <8 x i16> %rhs) nounwind { 739 ; X32-LABEL: pcmpistri_reg_eq_i16: 740 ; X32: # %bb.0: # %entry 741 ; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 742 ; X32-NEXT: setae %al 743 ; X32-NEXT: retl 744 ; 745 ; X64-LABEL: pcmpistri_reg_eq_i16: 746 ; X64: # %bb.0: # %entry 747 ; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 748 ; X64-NEXT: setae %al 749 ; X64-NEXT: retq 750 entry: 751 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 752 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 753 %c = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs_cast, <16 x i8> %rhs_cast, i8 24) 754 %result = icmp eq i32 %c, 0 755 ret i1 %result 756 } 757 758 define i32 @pcmpistri_reg_idx_i16(<8 x i16> %lhs, <8 x i16> %rhs) nounwind { 759 ; X32-LABEL: pcmpistri_reg_idx_i16: 760 ; X32: # %bb.0: # %entry 761 ; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 762 ; X32-NEXT: movl %ecx, %eax 763 ; X32-NEXT: retl 764 ; 765 ; X64-LABEL: pcmpistri_reg_idx_i16: 766 ; X64: # %bb.0: # %entry 767 ; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 768 ; X64-NEXT: movl %ecx, %eax 769 ; X64-NEXT: retq 770 entry: 771 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 772 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 773 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs_cast, <16 x i8> %rhs_cast, i8 24) 774 ret i32 %idx 775 } 776 777 define i32 @pcmpistri_reg_diff_i16(<8 x i16> %lhs, <8 x i16> %rhs) nounwind { 778 ; X32-LABEL: pcmpistri_reg_diff_i16: 779 ; X32: # %bb.0: # %entry 780 ; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 781 ; X32-NEXT: cmpl $16, %ecx 782 ; X32-NEXT: jne .LBB20_2 783 ; X32-NEXT: # %bb.1: 784 ; X32-NEXT: xorl %eax, %eax 785 ; X32-NEXT: movzwl %ax, %eax 786 ; X32-NEXT: retl 787 ; X32-NEXT: .LBB20_2: # %compare 788 ; X32-NEXT: pushl %ebp 789 ; X32-NEXT: movl %esp, %ebp 790 ; X32-NEXT: andl $-16, %esp 791 ; X32-NEXT: subl $48, %esp 792 ; X32-NEXT: movdqa %xmm0, (%esp) 793 ; X32-NEXT: addl %ecx, %ecx 794 ; X32-NEXT: andl $14, %ecx 795 ; X32-NEXT: movzwl (%esp,%ecx), %eax 796 ; X32-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) 797 ; X32-NEXT: subw 16(%esp,%ecx), %ax 798 ; X32-NEXT: movl %ebp, %esp 799 ; X32-NEXT: popl %ebp 800 ; X32-NEXT: movzwl %ax, %eax 801 ; X32-NEXT: retl 802 ; 803 ; X64-LABEL: pcmpistri_reg_diff_i16: 804 ; X64: # %bb.0: # %entry 805 ; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 806 ; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 807 ; X64-NEXT: cmpl $16, %ecx 808 ; X64-NEXT: jne .LBB20_2 809 ; X64-NEXT: # %bb.1: 810 ; X64-NEXT: xorl %eax, %eax 811 ; X64-NEXT: movzwl %ax, %eax 812 ; X64-NEXT: retq 813 ; X64-NEXT: .LBB20_2: # %compare 814 ; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 815 ; X64-NEXT: andl $7, %ecx 816 ; X64-NEXT: movzwl -24(%rsp,%rcx,2), %eax 817 ; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 818 ; X64-NEXT: subw -40(%rsp,%rcx,2), %ax 819 ; X64-NEXT: movzwl %ax, %eax 820 ; X64-NEXT: retq 821 entry: 822 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 823 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 824 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs_cast, <16 x i8> %rhs_cast, i8 24) 825 %eq = icmp eq i32 %idx, 16 826 br i1 %eq, label %exit, label %compare 827 828 compare: 829 %lhs_c = extractelement <8 x i16> %lhs, i32 %idx 830 %rhs_c = extractelement <8 x i16> %rhs, i32 %idx 831 %sub = sub i16 %lhs_c, %rhs_c 832 br label %exit 833 834 exit: 835 %result = phi i16 [ 0, %entry ], [ %sub, %compare ] 836 %result_ext = zext i16 %result to i32 837 ret i32 %result_ext 838 } 839 840 define i1 @pcmpistri_mem_eq_i16(i16* %lhs_ptr, i16* %rhs_ptr) nounwind { 841 ; X32-LABEL: pcmpistri_mem_eq_i16: 842 ; X32: # %bb.0: # %entry 843 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 844 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 845 ; X32-NEXT: movdqu (%ecx), %xmm0 846 ; X32-NEXT: pcmpistri $25, (%eax), %xmm0 847 ; X32-NEXT: setae %al 848 ; X32-NEXT: retl 849 ; 850 ; X64-LABEL: pcmpistri_mem_eq_i16: 851 ; X64: # %bb.0: # %entry 852 ; X64-NEXT: movdqu (%rdi), %xmm0 853 ; X64-NEXT: pcmpistri $25, (%rsi), %xmm0 854 ; X64-NEXT: setae %al 855 ; X64-NEXT: retq 856 entry: 857 %lhs_vptr = bitcast i16* %lhs_ptr to <8 x i16>* 858 %lhs = load <8 x i16>, <8 x i16>* %lhs_vptr, align 1 859 %rhs_vptr = bitcast i16* %rhs_ptr to <8 x i16>* 860 %rhs = load <8 x i16>, <8 x i16>* %rhs_vptr, align 1 861 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 862 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 863 %c = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs_cast, <16 x i8> %rhs_cast, i8 25) 864 %result = icmp eq i32 %c, 0 865 ret i1 %result 866 } 867 868 define i32 @pcmpistri_mem_idx_i16(i16* %lhs_ptr, i16* %rhs_ptr) nounwind { 869 ; X32-LABEL: pcmpistri_mem_idx_i16: 870 ; X32: # %bb.0: # %entry 871 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 872 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 873 ; X32-NEXT: movdqu (%ecx), %xmm0 874 ; X32-NEXT: pcmpistri $25, (%eax), %xmm0 875 ; X32-NEXT: movl %ecx, %eax 876 ; X32-NEXT: retl 877 ; 878 ; X64-LABEL: pcmpistri_mem_idx_i16: 879 ; X64: # %bb.0: # %entry 880 ; X64-NEXT: movdqu (%rdi), %xmm0 881 ; X64-NEXT: pcmpistri $25, (%rsi), %xmm0 882 ; X64-NEXT: movl %ecx, %eax 883 ; X64-NEXT: retq 884 entry: 885 %lhs_vptr = bitcast i16* %lhs_ptr to <8 x i16>* 886 %lhs = load <8 x i16>, <8 x i16>* %lhs_vptr, align 1 887 %rhs_vptr = bitcast i16* %rhs_ptr to <8 x i16>* 888 %rhs = load <8 x i16>, <8 x i16>* %rhs_vptr, align 1 889 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 890 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 891 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs_cast, <16 x i8> %rhs_cast, i8 25) 892 ret i32 %idx 893 } 894 895 define i32 @pcmpistri_mem_diff_i16(i16* %lhs_ptr, i16* %rhs_ptr) nounwind { 896 ; X32-LABEL: pcmpistri_mem_diff_i16: 897 ; X32: # %bb.0: # %entry 898 ; X32-NEXT: pushl %ebp 899 ; X32-NEXT: movl %esp, %ebp 900 ; X32-NEXT: andl $-16, %esp 901 ; X32-NEXT: subl $48, %esp 902 ; X32-NEXT: movl 12(%ebp), %eax 903 ; X32-NEXT: movl 8(%ebp), %ecx 904 ; X32-NEXT: movdqu (%ecx), %xmm1 905 ; X32-NEXT: movdqu (%eax), %xmm0 906 ; X32-NEXT: pcmpistri $25, %xmm0, %xmm1 907 ; X32-NEXT: cmpl $8, %ecx 908 ; X32-NEXT: jne .LBB23_2 909 ; X32-NEXT: # %bb.1: 910 ; X32-NEXT: xorl %eax, %eax 911 ; X32-NEXT: jmp .LBB23_3 912 ; X32-NEXT: .LBB23_2: # %compare 913 ; X32-NEXT: movdqa %xmm1, (%esp) 914 ; X32-NEXT: addl %ecx, %ecx 915 ; X32-NEXT: andl $14, %ecx 916 ; X32-NEXT: movzwl (%esp,%ecx), %eax 917 ; X32-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) 918 ; X32-NEXT: subw 16(%esp,%ecx), %ax 919 ; X32-NEXT: .LBB23_3: # %exit 920 ; X32-NEXT: movzwl %ax, %eax 921 ; X32-NEXT: movl %ebp, %esp 922 ; X32-NEXT: popl %ebp 923 ; X32-NEXT: retl 924 ; 925 ; X64-LABEL: pcmpistri_mem_diff_i16: 926 ; X64: # %bb.0: # %entry 927 ; X64-NEXT: movdqu (%rdi), %xmm1 928 ; X64-NEXT: movdqu (%rsi), %xmm0 929 ; X64-NEXT: pcmpistri $25, %xmm0, %xmm1 930 ; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 931 ; X64-NEXT: cmpl $8, %ecx 932 ; X64-NEXT: jne .LBB23_2 933 ; X64-NEXT: # %bb.1: 934 ; X64-NEXT: xorl %eax, %eax 935 ; X64-NEXT: movzwl %ax, %eax 936 ; X64-NEXT: retq 937 ; X64-NEXT: .LBB23_2: # %compare 938 ; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 939 ; X64-NEXT: andl $7, %ecx 940 ; X64-NEXT: movzwl -24(%rsp,%rcx,2), %eax 941 ; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 942 ; X64-NEXT: subw -40(%rsp,%rcx,2), %ax 943 ; X64-NEXT: movzwl %ax, %eax 944 ; X64-NEXT: retq 945 entry: 946 %lhs_vptr = bitcast i16* %lhs_ptr to <8 x i16>* 947 %lhs = load <8 x i16>, <8 x i16>* %lhs_vptr, align 1 948 %rhs_vptr = bitcast i16* %rhs_ptr to <8 x i16>* 949 %rhs = load <8 x i16>, <8 x i16>* %rhs_vptr, align 1 950 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 951 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 952 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs_cast, <16 x i8> %rhs_cast, i8 25) 953 %eq = icmp eq i32 %idx, 8 954 br i1 %eq, label %exit, label %compare 955 956 compare: 957 %lhs_c = extractelement <8 x i16> %lhs, i32 %idx 958 %rhs_c = extractelement <8 x i16> %rhs, i32 %idx 959 %sub = sub i16 %lhs_c, %rhs_c 960 br label %exit 961 962 exit: 963 %result = phi i16 [ 0, %entry ], [ %sub, %compare ] 964 %result_ext = zext i16 %result to i32 965 ret i32 %result_ext 966 } 967 968 define void @pcmpestr_index_flag(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i32* %iptr, i32* %fptr) nounwind { 969 ; X32-LABEL: pcmpestr_index_flag: 970 ; X32: # %bb.0: # %entry 971 ; X32-NEXT: pushl %ebx 972 ; X32-NEXT: pushl %edi 973 ; X32-NEXT: pushl %esi 974 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 975 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi 976 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 977 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 978 ; X32-NEXT: xorl %ebx, %ebx 979 ; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 980 ; X32-NEXT: setb %bl 981 ; X32-NEXT: movl %ecx, (%edi) 982 ; X32-NEXT: movl %ebx, (%esi) 983 ; X32-NEXT: popl %esi 984 ; X32-NEXT: popl %edi 985 ; X32-NEXT: popl %ebx 986 ; X32-NEXT: retl 987 ; 988 ; X64-LABEL: pcmpestr_index_flag: 989 ; X64: # %bb.0: # %entry 990 ; X64-NEXT: movq %rcx, %r8 991 ; X64-NEXT: movq %rdx, %r9 992 ; X64-NEXT: xorl %r10d, %r10d 993 ; X64-NEXT: movl %edi, %eax 994 ; X64-NEXT: movl %esi, %edx 995 ; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 996 ; X64-NEXT: setb %r10b 997 ; X64-NEXT: movl %ecx, (%r9) 998 ; X64-NEXT: movl %r10d, (%r8) 999 ; X64-NEXT: retq 1000 entry: 1001 %flag = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1002 %index = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1003 store i32 %index, i32* %iptr 1004 store i32 %flag, i32* %fptr 1005 ret void 1006 } 1007 1008 define void @pcmpestr_mask_flag(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, <16 x i8>* %mptr, i32* %fptr) nounwind { 1009 ; X32-LABEL: pcmpestr_mask_flag: 1010 ; X32: # %bb.0: # %entry 1011 ; X32-NEXT: pushl %ebx 1012 ; X32-NEXT: pushl %esi 1013 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1014 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 1015 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1016 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1017 ; X32-NEXT: xorl %ebx, %ebx 1018 ; X32-NEXT: pcmpestrm $24, %xmm1, %xmm0 1019 ; X32-NEXT: setb %bl 1020 ; X32-NEXT: movdqa %xmm0, (%esi) 1021 ; X32-NEXT: movl %ebx, (%ecx) 1022 ; X32-NEXT: popl %esi 1023 ; X32-NEXT: popl %ebx 1024 ; X32-NEXT: retl 1025 ; 1026 ; X64-LABEL: pcmpestr_mask_flag: 1027 ; X64: # %bb.0: # %entry 1028 ; X64-NEXT: movq %rdx, %r8 1029 ; X64-NEXT: xorl %r9d, %r9d 1030 ; X64-NEXT: movl %edi, %eax 1031 ; X64-NEXT: movl %esi, %edx 1032 ; X64-NEXT: pcmpestrm $24, %xmm1, %xmm0 1033 ; X64-NEXT: setb %r9b 1034 ; X64-NEXT: movdqa %xmm0, (%r8) 1035 ; X64-NEXT: movl %r9d, (%rcx) 1036 ; X64-NEXT: retq 1037 entry: 1038 %flag = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1039 %mask = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1040 store <16 x i8> %mask, <16 x i8>* %mptr 1041 store i32 %flag, i32* %fptr 1042 ret void 1043 } 1044 1045 define void @pcmpestr_mask_index(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, <16 x i8>* %mptr, i32* %iptr) nounwind { 1046 ; X32-LABEL: pcmpestr_mask_index: 1047 ; X32: # %bb.0: # %entry 1048 ; X32-NEXT: pushl %edi 1049 ; X32-NEXT: pushl %esi 1050 ; X32-NEXT: movdqa %xmm0, %xmm2 1051 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1052 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1053 ; X32-NEXT: pcmpestrm $24, %xmm1, %xmm0 1054 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 1055 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi 1056 ; X32-NEXT: pcmpestri $24, %xmm1, %xmm2 1057 ; X32-NEXT: movdqa %xmm0, (%edi) 1058 ; X32-NEXT: movl %ecx, (%esi) 1059 ; X32-NEXT: popl %esi 1060 ; X32-NEXT: popl %edi 1061 ; X32-NEXT: retl 1062 ; 1063 ; X64-LABEL: pcmpestr_mask_index: 1064 ; X64: # %bb.0: # %entry 1065 ; X64-NEXT: movq %rcx, %r8 1066 ; X64-NEXT: movq %rdx, %r9 1067 ; X64-NEXT: movdqa %xmm0, %xmm2 1068 ; X64-NEXT: movl %edi, %eax 1069 ; X64-NEXT: movl %esi, %edx 1070 ; X64-NEXT: pcmpestrm $24, %xmm1, %xmm0 1071 ; X64-NEXT: pcmpestri $24, %xmm1, %xmm2 1072 ; X64-NEXT: movdqa %xmm0, (%r9) 1073 ; X64-NEXT: movl %ecx, (%r8) 1074 ; X64-NEXT: retq 1075 entry: 1076 %index = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1077 %mask = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1078 store <16 x i8> %mask, <16 x i8>* %mptr 1079 store i32 %index, i32* %iptr 1080 ret void 1081 } 1082 1083 define void @pcmpestr_mask_index_flag(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, <16 x i8>* %mptr, i32* %iptr, i32* %fptr) nounwind { 1084 ; X32-LABEL: pcmpestr_mask_index_flag: 1085 ; X32: # %bb.0: # %entry 1086 ; X32-NEXT: pushl %ebp 1087 ; X32-NEXT: pushl %ebx 1088 ; X32-NEXT: pushl %edi 1089 ; X32-NEXT: pushl %esi 1090 ; X32-NEXT: movdqa %xmm0, %xmm2 1091 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1092 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1093 ; X32-NEXT: pcmpestrm $24, %xmm1, %xmm0 1094 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 1095 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi 1096 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp 1097 ; X32-NEXT: xorl %ebx, %ebx 1098 ; X32-NEXT: pcmpestri $24, %xmm1, %xmm2 1099 ; X32-NEXT: setb %bl 1100 ; X32-NEXT: movdqa %xmm0, (%ebp) 1101 ; X32-NEXT: movl %ecx, (%edi) 1102 ; X32-NEXT: movl %ebx, (%esi) 1103 ; X32-NEXT: popl %esi 1104 ; X32-NEXT: popl %edi 1105 ; X32-NEXT: popl %ebx 1106 ; X32-NEXT: popl %ebp 1107 ; X32-NEXT: retl 1108 ; 1109 ; X64-LABEL: pcmpestr_mask_index_flag: 1110 ; X64: # %bb.0: # %entry 1111 ; X64-NEXT: movq %rcx, %r9 1112 ; X64-NEXT: movq %rdx, %r10 1113 ; X64-NEXT: movdqa %xmm0, %xmm2 1114 ; X64-NEXT: movl %edi, %eax 1115 ; X64-NEXT: movl %esi, %edx 1116 ; X64-NEXT: pcmpestrm $24, %xmm1, %xmm0 1117 ; X64-NEXT: xorl %esi, %esi 1118 ; X64-NEXT: pcmpestri $24, %xmm1, %xmm2 1119 ; X64-NEXT: setb %sil 1120 ; X64-NEXT: movdqa %xmm0, (%r10) 1121 ; X64-NEXT: movl %ecx, (%r9) 1122 ; X64-NEXT: movl %esi, (%r8) 1123 ; X64-NEXT: retq 1124 entry: 1125 %index = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1126 %mask = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1127 %flag = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1128 store <16 x i8> %mask, <16 x i8>* %mptr 1129 store i32 %index, i32* %iptr 1130 store i32 %flag, i32* %fptr 1131 ret void 1132 } 1133 1134 define void @pcmpistr_index_flag(<16 x i8> %lhs, <16 x i8> %rhs, i32* %iptr, i32* %fptr) nounwind { 1135 ; X32-LABEL: pcmpistr_index_flag: 1136 ; X32: # %bb.0: # %entry 1137 ; X32-NEXT: pushl %ebx 1138 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1139 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1140 ; X32-NEXT: xorl %ebx, %ebx 1141 ; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 1142 ; X32-NEXT: setb %bl 1143 ; X32-NEXT: movl %ecx, (%edx) 1144 ; X32-NEXT: movl %ebx, (%eax) 1145 ; X32-NEXT: popl %ebx 1146 ; X32-NEXT: retl 1147 ; 1148 ; X64-LABEL: pcmpistr_index_flag: 1149 ; X64: # %bb.0: # %entry 1150 ; X64-NEXT: xorl %eax, %eax 1151 ; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 1152 ; X64-NEXT: setb %al 1153 ; X64-NEXT: movl %ecx, (%rdi) 1154 ; X64-NEXT: movl %eax, (%rsi) 1155 ; X64-NEXT: retq 1156 entry: 1157 %flag = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1158 %index = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1159 store i32 %index, i32* %iptr 1160 store i32 %flag, i32* %fptr 1161 ret void 1162 } 1163 1164 define void @pcmpistr_mask_flag(<16 x i8> %lhs, <16 x i8> %rhs, <16 x i8>* %mptr, i32* %fptr) nounwind { 1165 ; X32-LABEL: pcmpistr_mask_flag: 1166 ; X32: # %bb.0: # %entry 1167 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1168 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1169 ; X32-NEXT: xorl %edx, %edx 1170 ; X32-NEXT: pcmpistrm $24, %xmm1, %xmm0 1171 ; X32-NEXT: setb %dl 1172 ; X32-NEXT: movdqa %xmm0, (%ecx) 1173 ; X32-NEXT: movl %edx, (%eax) 1174 ; X32-NEXT: retl 1175 ; 1176 ; X64-LABEL: pcmpistr_mask_flag: 1177 ; X64: # %bb.0: # %entry 1178 ; X64-NEXT: xorl %eax, %eax 1179 ; X64-NEXT: pcmpistrm $24, %xmm1, %xmm0 1180 ; X64-NEXT: setb %al 1181 ; X64-NEXT: movdqa %xmm0, (%rdi) 1182 ; X64-NEXT: movl %eax, (%rsi) 1183 ; X64-NEXT: retq 1184 entry: 1185 %flag = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1186 %mask = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1187 store <16 x i8> %mask, <16 x i8>* %mptr 1188 store i32 %flag, i32* %fptr 1189 ret void 1190 } 1191 1192 define void @pcmpistr_mask_index(<16 x i8> %lhs, <16 x i8> %rhs, <16 x i8>* %mptr, i32* %iptr) nounwind { 1193 ; X32-LABEL: pcmpistr_mask_index: 1194 ; X32: # %bb.0: # %entry 1195 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1196 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1197 ; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 1198 ; X32-NEXT: pcmpistrm $24, %xmm1, %xmm0 1199 ; X32-NEXT: movdqa %xmm0, (%edx) 1200 ; X32-NEXT: movl %ecx, (%eax) 1201 ; X32-NEXT: retl 1202 ; 1203 ; X64-LABEL: pcmpistr_mask_index: 1204 ; X64: # %bb.0: # %entry 1205 ; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 1206 ; X64-NEXT: pcmpistrm $24, %xmm1, %xmm0 1207 ; X64-NEXT: movdqa %xmm0, (%rdi) 1208 ; X64-NEXT: movl %ecx, (%rsi) 1209 ; X64-NEXT: retq 1210 entry: 1211 %index = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1212 %mask = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1213 store <16 x i8> %mask, <16 x i8>* %mptr 1214 store i32 %index, i32* %iptr 1215 ret void 1216 } 1217 1218 define void @pcmpistr_mask_index_flag(<16 x i8> %lhs, <16 x i8> %rhs, <16 x i8>* %mptr, i32* %iptr, i32* %fptr) nounwind { 1219 ; X32-LABEL: pcmpistr_mask_index_flag: 1220 ; X32: # %bb.0: # %entry 1221 ; X32-NEXT: pushl %ebx 1222 ; X32-NEXT: pushl %esi 1223 ; X32-NEXT: movdqa %xmm0, %xmm2 1224 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1225 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1226 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 1227 ; X32-NEXT: pcmpistrm $24, %xmm1, %xmm0 1228 ; X32-NEXT: xorl %ebx, %ebx 1229 ; X32-NEXT: pcmpistri $24, %xmm1, %xmm2 1230 ; X32-NEXT: setb %bl 1231 ; X32-NEXT: movdqa %xmm0, (%esi) 1232 ; X32-NEXT: movl %ecx, (%edx) 1233 ; X32-NEXT: movl %ebx, (%eax) 1234 ; X32-NEXT: popl %esi 1235 ; X32-NEXT: popl %ebx 1236 ; X32-NEXT: retl 1237 ; 1238 ; X64-LABEL: pcmpistr_mask_index_flag: 1239 ; X64: # %bb.0: # %entry 1240 ; X64-NEXT: movdqa %xmm0, %xmm2 1241 ; X64-NEXT: pcmpistrm $24, %xmm1, %xmm0 1242 ; X64-NEXT: xorl %eax, %eax 1243 ; X64-NEXT: pcmpistri $24, %xmm1, %xmm2 1244 ; X64-NEXT: setb %al 1245 ; X64-NEXT: movdqa %xmm0, (%rdi) 1246 ; X64-NEXT: movl %ecx, (%rsi) 1247 ; X64-NEXT: movl %eax, (%rdx) 1248 ; X64-NEXT: retq 1249 entry: 1250 %index = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1251 %mask = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1252 %flag = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1253 store <16 x i8> %mask, <16 x i8>* %mptr 1254 store i32 %index, i32* %iptr 1255 store i32 %flag, i32* %fptr 1256 ret void 1257 } 1258 1259 ; Make sure we don't fold loads when we need to emit pcmpistrm and pcmpistri. 1260 define void @pcmpistr_mask_index_flag_load(<16 x i8> %lhs, <16 x i8>* %rhsptr, <16 x i8>* %mptr, i32* %iptr, i32* %fptr) nounwind { 1261 ; X32-LABEL: pcmpistr_mask_index_flag_load: 1262 ; X32: # %bb.0: # %entry 1263 ; X32-NEXT: pushl %ebx 1264 ; X32-NEXT: pushl %esi 1265 ; X32-NEXT: movdqa %xmm0, %xmm1 1266 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1267 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1268 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 1269 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1270 ; X32-NEXT: movdqu (%ecx), %xmm2 1271 ; X32-NEXT: pcmpistrm $24, %xmm2, %xmm0 1272 ; X32-NEXT: xorl %ebx, %ebx 1273 ; X32-NEXT: pcmpistri $24, %xmm2, %xmm1 1274 ; X32-NEXT: setb %bl 1275 ; X32-NEXT: movdqa %xmm0, (%esi) 1276 ; X32-NEXT: movl %ecx, (%edx) 1277 ; X32-NEXT: movl %ebx, (%eax) 1278 ; X32-NEXT: popl %esi 1279 ; X32-NEXT: popl %ebx 1280 ; X32-NEXT: retl 1281 ; 1282 ; X64-LABEL: pcmpistr_mask_index_flag_load: 1283 ; X64: # %bb.0: # %entry 1284 ; X64-NEXT: movq %rcx, %rax 1285 ; X64-NEXT: movdqa %xmm0, %xmm1 1286 ; X64-NEXT: movdqu (%rdi), %xmm2 1287 ; X64-NEXT: pcmpistrm $24, %xmm2, %xmm0 1288 ; X64-NEXT: xorl %edi, %edi 1289 ; X64-NEXT: pcmpistri $24, %xmm2, %xmm1 1290 ; X64-NEXT: setb %dil 1291 ; X64-NEXT: movdqa %xmm0, (%rsi) 1292 ; X64-NEXT: movl %ecx, (%rdx) 1293 ; X64-NEXT: movl %edi, (%rax) 1294 ; X64-NEXT: retq 1295 entry: 1296 %rhs = load <16 x i8>, <16 x i8>* %rhsptr, align 1 1297 %index = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1298 %mask = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1299 %flag = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1300 store <16 x i8> %mask, <16 x i8>* %mptr 1301 store i32 %index, i32* %iptr 1302 store i32 %flag, i32* %fptr 1303 ret void 1304 } 1305 1306 ; Make sure we don't fold nontemporal loads. 1307 define i32 @pcmpestri_nontemporal(<16 x i8> %lhs, i32 %lhs_len, <16 x i8>* %rhsptr, i32 %rhs_len) nounwind { 1308 ; X32-LABEL: pcmpestri_nontemporal: 1309 ; X32: # %bb.0: # %entry 1310 ; X32-NEXT: pushl %ebx 1311 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1312 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1313 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1314 ; X32-NEXT: movntdqa (%ecx), %xmm1 1315 ; X32-NEXT: xorl %ebx, %ebx 1316 ; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 1317 ; X32-NEXT: setb %bl 1318 ; X32-NEXT: movl %ebx, %eax 1319 ; X32-NEXT: popl %ebx 1320 ; X32-NEXT: retl 1321 ; 1322 ; X64-LABEL: pcmpestri_nontemporal: 1323 ; X64: # %bb.0: # %entry 1324 ; X64-NEXT: movntdqa (%rsi), %xmm1 1325 ; X64-NEXT: xorl %esi, %esi 1326 ; X64-NEXT: movl %edi, %eax 1327 ; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 1328 ; X64-NEXT: setb %sil 1329 ; X64-NEXT: movl %esi, %eax 1330 ; X64-NEXT: retq 1331 entry: 1332 %rhs = load <16 x i8>, <16 x i8>* %rhsptr, align 16, !nontemporal !0 1333 %flag = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1334 ret i32 %flag 1335 } 1336 1337 !0 = !{ i32 1 } 1338