Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
      3 
      4 declare void @f()
      5 define <4 x i1> @test_4i1(<4 x i32> %a, <4 x i32> %b) {
      6 ; CHECK-LABEL: test_4i1:
      7 ; CHECK:       ## BB#0:
      8 ; CHECK-NEXT:    pushq %rax
      9 ; CHECK-NEXT:  Ltmp0:
     10 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
     11 ; CHECK-NEXT:    vpcmpnleud %xmm1, %xmm0, %k0
     12 ; CHECK-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
     13 ; CHECK-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0
     14 ; CHECK-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
     15 ; CHECK-NEXT:    callq _f
     16 ; CHECK-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
     17 ; CHECK-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1 ## 2-byte Folded Reload
     18 ; CHECK-NEXT:    korw %k1, %k0, %k0
     19 ; CHECK-NEXT:    vpmovm2d %k0, %xmm0
     20 ; CHECK-NEXT:    popq %rax
     21 ; CHECK-NEXT:    retq
     22 
     23   %cmp_res = icmp ugt <4 x i32> %a, %b
     24   %cmp_res2 = icmp sgt <4 x i32> %a, %b
     25   call void @f()
     26   %res = or <4 x i1> %cmp_res, %cmp_res2
     27   ret <4 x i1> %res
     28 }
     29 
     30 define <8 x i1> @test_8i1(<8 x i32> %a, <8 x i32> %b) {
     31 ; CHECK-LABEL: test_8i1:
     32 ; CHECK:       ## BB#0:
     33 ; CHECK-NEXT:    pushq %rax
     34 ; CHECK-NEXT:  Ltmp1:
     35 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
     36 ; CHECK-NEXT:    vpcmpnleud %ymm1, %ymm0, %k0
     37 ; CHECK-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
     38 ; CHECK-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
     39 ; CHECK-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
     40 ; CHECK-NEXT:    callq _f
     41 ; CHECK-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
     42 ; CHECK-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1 ## 2-byte Folded Reload
     43 ; CHECK-NEXT:    korb %k1, %k0, %k0
     44 ; CHECK-NEXT:    vpmovm2w %k0, %xmm0
     45 ; CHECK-NEXT:    popq %rax
     46 ; CHECK-NEXT:    retq
     47 
     48   %cmp_res = icmp ugt <8 x i32> %a, %b
     49   %cmp_res2 = icmp sgt <8 x i32> %a, %b
     50   call void @f()
     51   %res = or <8 x i1> %cmp_res, %cmp_res2
     52   ret <8 x i1> %res
     53 }
     54 
     55 define <16 x i1> @test_16i1(<16 x i32> %a, <16 x i32> %b) {
     56 ; CHECK-LABEL: test_16i1:
     57 ; CHECK:       ## BB#0:
     58 ; CHECK-NEXT:    pushq %rax
     59 ; CHECK-NEXT:  Ltmp2:
     60 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
     61 ; CHECK-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
     62 ; CHECK-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
     63 ; CHECK-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
     64 ; CHECK-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
     65 ; CHECK-NEXT:    callq _f
     66 ; CHECK-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
     67 ; CHECK-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1 ## 2-byte Folded Reload
     68 ; CHECK-NEXT:    korw %k1, %k0, %k0
     69 ; CHECK-NEXT:    vpmovm2b %k0, %xmm0
     70 ; CHECK-NEXT:    popq %rax
     71 ; CHECK-NEXT:    retq
     72   %cmp_res = icmp ugt <16 x i32> %a, %b
     73   %cmp_res2 = icmp sgt <16 x i32> %a, %b
     74   call void @f()
     75   %res = or <16 x i1> %cmp_res, %cmp_res2
     76   ret <16 x i1> %res
     77 }
     78 
     79 define <32 x i1> @test_32i1(<32 x i16> %a, <32 x i16> %b) {
     80 ; CHECK-LABEL: test_32i1:
     81 ; CHECK:       ## BB#0:
     82 ; CHECK-NEXT:    pushq %rax
     83 ; CHECK-NEXT:  Ltmp3:
     84 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
     85 ; CHECK-NEXT:    vpcmpnleuw %zmm1, %zmm0, %k0
     86 ; CHECK-NEXT:    kmovd %k0, {{[0-9]+}}(%rsp) ## 4-byte Folded Spill
     87 ; CHECK-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0
     88 ; CHECK-NEXT:    kmovd %k0, (%rsp) ## 4-byte Folded Spill
     89 ; CHECK-NEXT:    callq _f
     90 ; CHECK-NEXT:    kmovd {{[0-9]+}}(%rsp), %k0 ## 4-byte Folded Reload
     91 ; CHECK-NEXT:    kmovd (%rsp), %k1 ## 4-byte Folded Reload
     92 ; CHECK-NEXT:    kord %k1, %k0, %k0
     93 ; CHECK-NEXT:    vpmovm2b %k0, %ymm0
     94 ; CHECK-NEXT:    popq %rax
     95 ; CHECK-NEXT:    retq
     96   %cmp_res = icmp ugt <32 x i16> %a, %b
     97   %cmp_res2 = icmp sgt <32 x i16> %a, %b
     98   call void @f()
     99   %res = or <32 x i1> %cmp_res, %cmp_res2
    100   ret <32 x i1> %res
    101 }
    102 
    103 define <64 x i1> @test_64i1(<64 x i8> %a, <64 x i8> %b) {
    104 ; CHECK-LABEL: test_64i1:
    105 ; CHECK:       ## BB#0:
    106 ; CHECK-NEXT:    subq $24, %rsp
    107 ; CHECK-NEXT:  Ltmp4:
    108 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
    109 ; CHECK-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0
    110 ; CHECK-NEXT:    kmovq %k0, {{[0-9]+}}(%rsp) ## 8-byte Folded Spill
    111 ; CHECK-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0
    112 ; CHECK-NEXT:    kmovq %k0, {{[0-9]+}}(%rsp) ## 8-byte Folded Spill
    113 ; CHECK-NEXT:    callq _f
    114 ; CHECK-NEXT:    kmovq {{[0-9]+}}(%rsp), %k0 ## 8-byte Folded Reload
    115 ; CHECK-NEXT:    kmovq {{[0-9]+}}(%rsp), %k1 ## 8-byte Folded Reload
    116 ; CHECK-NEXT:    korq %k1, %k0, %k0
    117 ; CHECK-NEXT:    vpmovm2b %k0, %zmm0
    118 ; CHECK-NEXT:    addq $24, %rsp
    119 ; CHECK-NEXT:    retq
    120 
    121   %cmp_res = icmp ugt <64 x i8> %a, %b
    122   %cmp_res2 = icmp sgt <64 x i8> %a, %b
    123   call void @f()
    124   %res = or <64 x i1> %cmp_res, %cmp_res2
    125   ret <64 x i1> %res
    126 }
    127