Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s
      3 
      4 ; 256-bit
      5 
      6 define <32 x i8> @vpaddb256_test(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
      7 ; CHECK-LABEL: vpaddb256_test:
      8 ; CHECK:       # %bb.0:
      9 ; CHECK-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
     10 ; CHECK-NEXT:    retq
     11   %x = add <32 x i8> %i, %j
     12   ret <32 x i8> %x
     13 }
     14 
     15 define <32 x i8> @vpaddb256_fold_test(<32 x i8> %i, <32 x i8>* %j) nounwind {
     16 ; CHECK-LABEL: vpaddb256_fold_test:
     17 ; CHECK:       # %bb.0:
     18 ; CHECK-NEXT:    vpaddb (%rdi), %ymm0, %ymm0
     19 ; CHECK-NEXT:    retq
     20   %tmp = load <32 x i8>, <32 x i8>* %j, align 4
     21   %x = add <32 x i8> %i, %tmp
     22   ret <32 x i8> %x
     23 }
     24 
     25 define <16 x i16> @vpaddw256_test(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
     26 ; CHECK-LABEL: vpaddw256_test:
     27 ; CHECK:       # %bb.0:
     28 ; CHECK-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
     29 ; CHECK-NEXT:    retq
     30   %x = add <16 x i16> %i, %j
     31   ret <16 x i16> %x
     32 }
     33 
     34 define <16 x i16> @vpaddw256_fold_test(<16 x i16> %i, <16 x i16>* %j) nounwind {
     35 ; CHECK-LABEL: vpaddw256_fold_test:
     36 ; CHECK:       # %bb.0:
     37 ; CHECK-NEXT:    vpaddw (%rdi), %ymm0, %ymm0
     38 ; CHECK-NEXT:    retq
     39   %tmp = load <16 x i16>, <16 x i16>* %j, align 4
     40   %x = add <16 x i16> %i, %tmp
     41   ret <16 x i16> %x
     42 }
     43 
     44 define <16 x i16> @vpaddw256_mask_test(<16 x i16> %i, <16 x i16> %j, <16 x i16> %mask1) nounwind readnone {
     45 ; CHECK-LABEL: vpaddw256_mask_test:
     46 ; CHECK:       # %bb.0:
     47 ; CHECK-NEXT:    vptestmw %ymm2, %ymm2, %k1
     48 ; CHECK-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 {%k1}
     49 ; CHECK-NEXT:    retq
     50   %mask = icmp ne <16 x i16> %mask1, zeroinitializer
     51   %x = add <16 x i16> %i, %j
     52   %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %i
     53   ret <16 x i16> %r
     54 }
     55 
     56 define <16 x i16> @vpaddw256_maskz_test(<16 x i16> %i, <16 x i16> %j, <16 x i16> %mask1) nounwind readnone {
     57 ; CHECK-LABEL: vpaddw256_maskz_test:
     58 ; CHECK:       # %bb.0:
     59 ; CHECK-NEXT:    vptestmw %ymm2, %ymm2, %k1
     60 ; CHECK-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z}
     61 ; CHECK-NEXT:    retq
     62   %mask = icmp ne <16 x i16> %mask1, zeroinitializer
     63   %x = add <16 x i16> %i, %j
     64   %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
     65   ret <16 x i16> %r
     66 }
     67 
     68 define <16 x i16> @vpaddw256_mask_fold_test(<16 x i16> %i, <16 x i16>* %j.ptr, <16 x i16> %mask1) nounwind readnone {
     69 ; CHECK-LABEL: vpaddw256_mask_fold_test:
     70 ; CHECK:       # %bb.0:
     71 ; CHECK-NEXT:    vptestmw %ymm1, %ymm1, %k1
     72 ; CHECK-NEXT:    vpaddw (%rdi), %ymm0, %ymm0 {%k1}
     73 ; CHECK-NEXT:    retq
     74   %mask = icmp ne <16 x i16> %mask1, zeroinitializer
     75   %j = load <16 x i16>, <16 x i16>* %j.ptr
     76   %x = add <16 x i16> %i, %j
     77   %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %i
     78   ret <16 x i16> %r
     79 }
     80 
     81 define <16 x i16> @vpaddw256_maskz_fold_test(<16 x i16> %i, <16 x i16>* %j.ptr, <16 x i16> %mask1) nounwind readnone {
     82 ; CHECK-LABEL: vpaddw256_maskz_fold_test:
     83 ; CHECK:       # %bb.0:
     84 ; CHECK-NEXT:    vptestmw %ymm1, %ymm1, %k1
     85 ; CHECK-NEXT:    vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z}
     86 ; CHECK-NEXT:    retq
     87   %mask = icmp ne <16 x i16> %mask1, zeroinitializer
     88   %j = load <16 x i16>, <16 x i16>* %j.ptr
     89   %x = add <16 x i16> %i, %j
     90   %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
     91   ret <16 x i16> %r
     92 }
     93 
     94 define <32 x i8> @vpsubb256_test(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
     95 ; CHECK-LABEL: vpsubb256_test:
     96 ; CHECK:       # %bb.0:
     97 ; CHECK-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
     98 ; CHECK-NEXT:    retq
     99   %x = sub <32 x i8> %i, %j
    100   ret <32 x i8> %x
    101 }
    102 
    103 define <16 x i16> @vpsubw256_test(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
    104 ; CHECK-LABEL: vpsubw256_test:
    105 ; CHECK:       # %bb.0:
    106 ; CHECK-NEXT:    vpsubw %ymm1, %ymm0, %ymm0
    107 ; CHECK-NEXT:    retq
    108   %x = sub <16 x i16> %i, %j
    109   ret <16 x i16> %x
    110 }
    111 
    112 define <16 x i16> @vpmullw256_test(<16 x i16> %i, <16 x i16> %j) {
    113 ; CHECK-LABEL: vpmullw256_test:
    114 ; CHECK:       # %bb.0:
    115 ; CHECK-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
    116 ; CHECK-NEXT:    retq
    117   %x = mul <16 x i16> %i, %j
    118   ret <16 x i16> %x
    119 }
    120 
    121 ; 128-bit
    122 
    123 define <16 x i8> @vpaddb128_test(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
    124 ; CHECK-LABEL: vpaddb128_test:
    125 ; CHECK:       # %bb.0:
    126 ; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
    127 ; CHECK-NEXT:    retq
    128   %x = add <16 x i8> %i, %j
    129   ret <16 x i8> %x
    130 }
    131 
    132 define <16 x i8> @vpaddb128_fold_test(<16 x i8> %i, <16 x i8>* %j) nounwind {
    133 ; CHECK-LABEL: vpaddb128_fold_test:
    134 ; CHECK:       # %bb.0:
    135 ; CHECK-NEXT:    vpaddb (%rdi), %xmm0, %xmm0
    136 ; CHECK-NEXT:    retq
    137   %tmp = load <16 x i8>, <16 x i8>* %j, align 4
    138   %x = add <16 x i8> %i, %tmp
    139   ret <16 x i8> %x
    140 }
    141 
    142 define <8 x i16> @vpaddw128_test(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
    143 ; CHECK-LABEL: vpaddw128_test:
    144 ; CHECK:       # %bb.0:
    145 ; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
    146 ; CHECK-NEXT:    retq
    147   %x = add <8 x i16> %i, %j
    148   ret <8 x i16> %x
    149 }
    150 
    151 define <8 x i16> @vpaddw128_fold_test(<8 x i16> %i, <8 x i16>* %j) nounwind {
    152 ; CHECK-LABEL: vpaddw128_fold_test:
    153 ; CHECK:       # %bb.0:
    154 ; CHECK-NEXT:    vpaddw (%rdi), %xmm0, %xmm0
    155 ; CHECK-NEXT:    retq
    156   %tmp = load <8 x i16>, <8 x i16>* %j, align 4
    157   %x = add <8 x i16> %i, %tmp
    158   ret <8 x i16> %x
    159 }
    160 
    161 define <8 x i16> @vpaddw128_mask_test(<8 x i16> %i, <8 x i16> %j, <8 x i16> %mask1) nounwind readnone {
    162 ; CHECK-LABEL: vpaddw128_mask_test:
    163 ; CHECK:       # %bb.0:
    164 ; CHECK-NEXT:    vptestmw %xmm2, %xmm2, %k1
    165 ; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 {%k1}
    166 ; CHECK-NEXT:    retq
    167   %mask = icmp ne <8 x i16> %mask1, zeroinitializer
    168   %x = add <8 x i16> %i, %j
    169   %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %i
    170   ret <8 x i16> %r
    171 }
    172 
    173 define <8 x i16> @vpaddw128_maskz_test(<8 x i16> %i, <8 x i16> %j, <8 x i16> %mask1) nounwind readnone {
    174 ; CHECK-LABEL: vpaddw128_maskz_test:
    175 ; CHECK:       # %bb.0:
    176 ; CHECK-NEXT:    vptestmw %xmm2, %xmm2, %k1
    177 ; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z}
    178 ; CHECK-NEXT:    retq
    179   %mask = icmp ne <8 x i16> %mask1, zeroinitializer
    180   %x = add <8 x i16> %i, %j
    181   %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
    182   ret <8 x i16> %r
    183 }
    184 
    185 define <8 x i16> @vpaddw128_mask_fold_test(<8 x i16> %i, <8 x i16>* %j.ptr, <8 x i16> %mask1) nounwind readnone {
    186 ; CHECK-LABEL: vpaddw128_mask_fold_test:
    187 ; CHECK:       # %bb.0:
    188 ; CHECK-NEXT:    vptestmw %xmm1, %xmm1, %k1
    189 ; CHECK-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 {%k1}
    190 ; CHECK-NEXT:    retq
    191   %mask = icmp ne <8 x i16> %mask1, zeroinitializer
    192   %j = load <8 x i16>, <8 x i16>* %j.ptr
    193   %x = add <8 x i16> %i, %j
    194   %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %i
    195   ret <8 x i16> %r
    196 }
    197 
    198 define <8 x i16> @vpaddw128_maskz_fold_test(<8 x i16> %i, <8 x i16>* %j.ptr, <8 x i16> %mask1) nounwind readnone {
    199 ; CHECK-LABEL: vpaddw128_maskz_fold_test:
    200 ; CHECK:       # %bb.0:
    201 ; CHECK-NEXT:    vptestmw %xmm1, %xmm1, %k1
    202 ; CHECK-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z}
    203 ; CHECK-NEXT:    retq
    204   %mask = icmp ne <8 x i16> %mask1, zeroinitializer
    205   %j = load <8 x i16>, <8 x i16>* %j.ptr
    206   %x = add <8 x i16> %i, %j
    207   %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
    208   ret <8 x i16> %r
    209 }
    210 
    211 define <16 x i8> @vpsubb128_test(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
    212 ; CHECK-LABEL: vpsubb128_test:
    213 ; CHECK:       # %bb.0:
    214 ; CHECK-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
    215 ; CHECK-NEXT:    retq
    216   %x = sub <16 x i8> %i, %j
    217   ret <16 x i8> %x
    218 }
    219 
    220 define <8 x i16> @vpsubw128_test(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
    221 ; CHECK-LABEL: vpsubw128_test:
    222 ; CHECK:       # %bb.0:
    223 ; CHECK-NEXT:    vpsubw %xmm1, %xmm0, %xmm0
    224 ; CHECK-NEXT:    retq
    225   %x = sub <8 x i16> %i, %j
    226   ret <8 x i16> %x
    227 }
    228 
    229 define <8 x i16> @vpmullw128_test(<8 x i16> %i, <8 x i16> %j) {
    230 ; CHECK-LABEL: vpmullw128_test:
    231 ; CHECK:       # %bb.0:
    232 ; CHECK-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
    233 ; CHECK-NEXT:    retq
    234   %x = mul <8 x i16> %i, %j
    235   ret <8 x i16> %x
    236 }
    237 
    238