Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
      3 
      4 define <4 x float> @select_mask_add_ss(<4 x float> %w, i8 zeroext %u, <4 x float> %a, <4 x float> %b) {
      5 ; CHECK-LABEL: select_mask_add_ss:
      6 ; CHECK:       ## %bb.0: ## %entry
      7 ; CHECK-NEXT:    kmovw %edi, %k1
      8 ; CHECK-NEXT:    vaddss %xmm2, %xmm1, %xmm0 {%k1}
      9 ; CHECK-NEXT:    retq
     10 entry:
     11   %0 = extractelement <4 x float> %b, i32 0
     12   %1 = extractelement <4 x float> %a, i32 0
     13   %2 = fadd float %1, %0
     14   %3 = and i8 %u, 1
     15   %4 = icmp eq i8 %3, 0
     16   %5 = extractelement <4 x float> %w, i32 0
     17   %6 = select i1 %4, float %5, float %2
     18   %7 = insertelement <4 x float> %a, float %6, i32 0
     19   ret <4 x float> %7
     20 }
     21 
     22 define <4 x float> @select_maskz_add_ss(i8 zeroext %u, <4 x float> %a, <4 x float> %b) {
     23 ; CHECK-LABEL: select_maskz_add_ss:
     24 ; CHECK:       ## %bb.0: ## %entry
     25 ; CHECK-NEXT:    kmovw %edi, %k1
     26 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0 {%k1} {z}
     27 ; CHECK-NEXT:    retq
     28 entry:
     29   %0 = extractelement <4 x float> %b, i32 0
     30   %1 = extractelement <4 x float> %a, i32 0
     31   %2 = fadd float %1, %0
     32   %3 = and i8 %u, 1
     33   %4 = icmp eq i8 %3, 0
     34   %5 = select i1 %4, float 0.000000e+00, float %2
     35   %6 = insertelement <4 x float> %a, float %5, i32 0
     36   ret <4 x float> %6
     37 }
     38 
     39 define <4 x float> @select_mask_sub_ss(<4 x float> %w, i8 zeroext %u, <4 x float> %a, <4 x float> %b) {
     40 ; CHECK-LABEL: select_mask_sub_ss:
     41 ; CHECK:       ## %bb.0: ## %entry
     42 ; CHECK-NEXT:    kmovw %edi, %k1
     43 ; CHECK-NEXT:    vsubss %xmm2, %xmm1, %xmm0 {%k1}
     44 ; CHECK-NEXT:    retq
     45 entry:
     46   %0 = extractelement <4 x float> %b, i32 0
     47   %1 = extractelement <4 x float> %a, i32 0
     48   %2 = fsub float %1, %0
     49   %3 = and i8 %u, 1
     50   %4 = icmp eq i8 %3, 0
     51   %5 = extractelement <4 x float> %w, i32 0
     52   %6 = select i1 %4, float %5, float %2
     53   %7 = insertelement <4 x float> %a, float %6, i32 0
     54   ret <4 x float> %7
     55 }
     56 
     57 define <4 x float> @select_maskz_sub_ss(i8 zeroext %u, <4 x float> %a, <4 x float> %b) {
     58 ; CHECK-LABEL: select_maskz_sub_ss:
     59 ; CHECK:       ## %bb.0: ## %entry
     60 ; CHECK-NEXT:    kmovw %edi, %k1
     61 ; CHECK-NEXT:    vsubss %xmm1, %xmm0, %xmm0 {%k1} {z}
     62 ; CHECK-NEXT:    retq
     63 entry:
     64   %0 = extractelement <4 x float> %b, i32 0
     65   %1 = extractelement <4 x float> %a, i32 0
     66   %2 = fsub float %1, %0
     67   %3 = and i8 %u, 1
     68   %4 = icmp eq i8 %3, 0
     69   %5 = select i1 %4, float 0.000000e+00, float %2
     70   %6 = insertelement <4 x float> %a, float %5, i32 0
     71   ret <4 x float> %6
     72 }
     73 
     74 define <4 x float> @select_mask_mul_ss(<4 x float> %w, i8 zeroext %u, <4 x float> %a, <4 x float> %b) {
     75 ; CHECK-LABEL: select_mask_mul_ss:
     76 ; CHECK:       ## %bb.0: ## %entry
     77 ; CHECK-NEXT:    kmovw %edi, %k1
     78 ; CHECK-NEXT:    vmulss %xmm2, %xmm1, %xmm0 {%k1}
     79 ; CHECK-NEXT:    retq
     80 entry:
     81   %0 = extractelement <4 x float> %b, i32 0
     82   %1 = extractelement <4 x float> %a, i32 0
     83   %2 = fmul float %1, %0
     84   %3 = and i8 %u, 1
     85   %4 = icmp eq i8 %3, 0
     86   %5 = extractelement <4 x float> %w, i32 0
     87   %6 = select i1 %4, float %5, float %2
     88   %7 = insertelement <4 x float> %a, float %6, i32 0
     89   ret <4 x float> %7
     90 }
     91 
     92 define <4 x float> @select_maskz_mul_ss(i8 zeroext %u, <4 x float> %a, <4 x float> %b) {
     93 ; CHECK-LABEL: select_maskz_mul_ss:
     94 ; CHECK:       ## %bb.0: ## %entry
     95 ; CHECK-NEXT:    kmovw %edi, %k1
     96 ; CHECK-NEXT:    vmulss %xmm1, %xmm0, %xmm0 {%k1} {z}
     97 ; CHECK-NEXT:    retq
     98 entry:
     99   %0 = extractelement <4 x float> %b, i32 0
    100   %1 = extractelement <4 x float> %a, i32 0
    101   %2 = fmul float %1, %0
    102   %3 = and i8 %u, 1
    103   %4 = icmp eq i8 %3, 0
    104   %5 = select i1 %4, float 0.000000e+00, float %2
    105   %6 = insertelement <4 x float> %a, float %5, i32 0
    106   ret <4 x float> %6
    107 }
    108 
    109 ; Make sure we don't crash trying to truncate the and instruction i4->i8. We need to extend instead.
    110 define <4 x float> @select_mask_add_ss_small_mask_type(<4 x float> %w, i4 %u, <4 x float> %a, <4 x float> %b) {
    111 ; CHECK-LABEL: select_mask_add_ss_small_mask_type:
    112 ; CHECK:       ## %bb.0: ## %entry
    113 ; CHECK-NEXT:    kmovw %edi, %k1
    114 ; CHECK-NEXT:    vaddss %xmm2, %xmm1, %xmm0 {%k1}
    115 ; CHECK-NEXT:    retq
    116 entry:
    117   %0 = extractelement <4 x float> %b, i32 0
    118   %1 = extractelement <4 x float> %a, i32 0
    119   %2 = fadd float %1, %0
    120   %3 = and i4 %u, 1
    121   %4 = icmp eq i4 %3, 0
    122   %5 = extractelement <4 x float> %w, i32 0
    123   %6 = select i1 %4, float %5, float %2
    124   %7 = insertelement <4 x float> %a, float %6, i32 0
    125   ret <4 x float> %7
    126 }
    127 
    128