Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx     | FileCheck %s --check-prefixes=ANY,AVX1
      3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2    | FileCheck %s --check-prefixes=ANY,INT256,AVX2
      4 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=ANY,INT256,AVX512
      5 
      6 define <4 x double> @andpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
      7 ; ANY-LABEL: andpd256:
      8 ; ANY:       # %bb.0: # %entry
      9 ; ANY-NEXT:    vandpd %ymm0, %ymm1, %ymm0
     10 ; ANY-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
     11 ; ANY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
     12 ; ANY-NEXT:    retq
     13 entry:
     14   %0 = bitcast <4 x double> %x to <4 x i64>
     15   %1 = bitcast <4 x double> %y to <4 x i64>
     16   %and.i = and <4 x i64> %0, %1
     17   %2 = bitcast <4 x i64> %and.i to <4 x double>
     18   ; add forces execution domain
     19   %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
     20   ret <4 x double> %3
     21 }
     22 
     23 define <4 x double> @andpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
     24 ; ANY-LABEL: andpd256fold:
     25 ; ANY:       # %bb.0: # %entry
     26 ; ANY-NEXT:    vandpd {{.*}}(%rip), %ymm0, %ymm0
     27 ; ANY-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
     28 ; ANY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
     29 ; ANY-NEXT:    retq
     30 entry:
     31   %0 = bitcast <4 x double> %y to <4 x i64>
     32   %and.i = and <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
     33   %1 = bitcast <4 x i64> %and.i to <4 x double>
     34   ; add forces execution domain
     35   %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0>
     36   ret <4 x double> %2
     37 }
     38 
     39 define <8 x float> @andps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
     40 ; ANY-LABEL: andps256:
     41 ; ANY:       # %bb.0: # %entry
     42 ; ANY-NEXT:    vandps %ymm0, %ymm1, %ymm0
     43 ; ANY-NEXT:    retq
     44 entry:
     45   %0 = bitcast <8 x float> %x to <8 x i32>
     46   %1 = bitcast <8 x float> %y to <8 x i32>
     47   %and.i = and <8 x i32> %0, %1
     48   %2 = bitcast <8 x i32> %and.i to <8 x float>
     49   ret <8 x float> %2
     50 }
     51 
     52 define <8 x float> @andps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
     53 ; ANY-LABEL: andps256fold:
     54 ; ANY:       # %bb.0: # %entry
     55 ; ANY-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
     56 ; ANY-NEXT:    retq
     57 entry:
     58   %0 = bitcast <8 x float> %y to <8 x i32>
     59   %and.i = and <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938>
     60   %1 = bitcast <8 x i32> %and.i to <8 x float>
     61   ret <8 x float> %1
     62 }
     63 
     64 define <4 x double> @xorpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
     65 ; ANY-LABEL: xorpd256:
     66 ; ANY:       # %bb.0: # %entry
     67 ; ANY-NEXT:    vxorpd %ymm0, %ymm1, %ymm0
     68 ; ANY-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
     69 ; ANY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
     70 ; ANY-NEXT:    retq
     71 entry:
     72   %0 = bitcast <4 x double> %x to <4 x i64>
     73   %1 = bitcast <4 x double> %y to <4 x i64>
     74   %xor.i = xor <4 x i64> %0, %1
     75   %2 = bitcast <4 x i64> %xor.i to <4 x double>
     76   ; add forces execution domain
     77   %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
     78   ret <4 x double> %3
     79 }
     80 
     81 define <4 x double> @xorpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
     82 ; ANY-LABEL: xorpd256fold:
     83 ; ANY:       # %bb.0: # %entry
     84 ; ANY-NEXT:    vxorpd {{.*}}(%rip), %ymm0, %ymm0
     85 ; ANY-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
     86 ; ANY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
     87 ; ANY-NEXT:    retq
     88 entry:
     89   %0 = bitcast <4 x double> %y to <4 x i64>
     90   %xor.i = xor <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
     91   %1 = bitcast <4 x i64> %xor.i to <4 x double>
     92   ; add forces execution domain
     93   %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0>
     94   ret <4 x double> %2
     95 }
     96 
     97 define <8 x float> @xorps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
     98 ; ANY-LABEL: xorps256:
     99 ; ANY:       # %bb.0: # %entry
    100 ; ANY-NEXT:    vxorps %ymm0, %ymm1, %ymm0
    101 ; ANY-NEXT:    retq
    102 entry:
    103   %0 = bitcast <8 x float> %x to <8 x i32>
    104   %1 = bitcast <8 x float> %y to <8 x i32>
    105   %xor.i = xor <8 x i32> %0, %1
    106   %2 = bitcast <8 x i32> %xor.i to <8 x float>
    107   ret <8 x float> %2
    108 }
    109 
    110 define <8 x float> @xorps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
    111 ; ANY-LABEL: xorps256fold:
    112 ; ANY:       # %bb.0: # %entry
    113 ; ANY-NEXT:    vxorps {{.*}}(%rip), %ymm0, %ymm0
    114 ; ANY-NEXT:    retq
    115 entry:
    116   %0 = bitcast <8 x float> %y to <8 x i32>
    117   %xor.i = xor <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938>
    118   %1 = bitcast <8 x i32> %xor.i to <8 x float>
    119   ret <8 x float> %1
    120 }
    121 
    122 define <4 x double> @orpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
    123 ; ANY-LABEL: orpd256:
    124 ; ANY:       # %bb.0: # %entry
    125 ; ANY-NEXT:    vorpd %ymm0, %ymm1, %ymm0
    126 ; ANY-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    127 ; ANY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
    128 ; ANY-NEXT:    retq
    129 entry:
    130   %0 = bitcast <4 x double> %x to <4 x i64>
    131   %1 = bitcast <4 x double> %y to <4 x i64>
    132   %or.i = or <4 x i64> %0, %1
    133   %2 = bitcast <4 x i64> %or.i to <4 x double>
    134   ; add forces execution domain
    135   %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
    136   ret <4 x double> %3
    137 }
    138 
    139 define <4 x double> @orpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
    140 ; ANY-LABEL: orpd256fold:
    141 ; ANY:       # %bb.0: # %entry
    142 ; ANY-NEXT:    vorpd {{.*}}(%rip), %ymm0, %ymm0
    143 ; ANY-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    144 ; ANY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
    145 ; ANY-NEXT:    retq
    146 entry:
    147   %0 = bitcast <4 x double> %y to <4 x i64>
    148   %or.i = or <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
    149   %1 = bitcast <4 x i64> %or.i to <4 x double>
    150   ; add forces execution domain
    151   %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0>
    152   ret <4 x double> %2
    153 }
    154 
    155 define <8 x float> @orps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
    156 ; ANY-LABEL: orps256:
    157 ; ANY:       # %bb.0: # %entry
    158 ; ANY-NEXT:    vorps %ymm0, %ymm1, %ymm0
    159 ; ANY-NEXT:    retq
    160 entry:
    161   %0 = bitcast <8 x float> %x to <8 x i32>
    162   %1 = bitcast <8 x float> %y to <8 x i32>
    163   %or.i = or <8 x i32> %0, %1
    164   %2 = bitcast <8 x i32> %or.i to <8 x float>
    165   ret <8 x float> %2
    166 }
    167 
    168 define <8 x float> @orps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
    169 ; ANY-LABEL: orps256fold:
    170 ; ANY:       # %bb.0: # %entry
    171 ; ANY-NEXT:    vorps {{.*}}(%rip), %ymm0, %ymm0
    172 ; ANY-NEXT:    retq
    173 entry:
    174   %0 = bitcast <8 x float> %y to <8 x i32>
    175   %or.i = or <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938>
    176   %1 = bitcast <8 x i32> %or.i to <8 x float>
    177   ret <8 x float> %1
    178 }
    179 
    180 define <4 x double> @andnotpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
    181 ; ANY-LABEL: andnotpd256:
    182 ; ANY:       # %bb.0: # %entry
    183 ; ANY-NEXT:    vandnpd %ymm0, %ymm1, %ymm0
    184 ; ANY-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    185 ; ANY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
    186 ; ANY-NEXT:    retq
    187 entry:
    188   %0 = bitcast <4 x double> %x to <4 x i64>
    189   %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1>
    190   %1 = bitcast <4 x double> %y to <4 x i64>
    191   %and.i = and <4 x i64> %1, %neg.i
    192   %2 = bitcast <4 x i64> %and.i to <4 x double>
    193   ; add forces execution domain
    194   %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
    195   ret <4 x double> %3
    196 }
    197 
    198 define <4 x double> @andnotpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp {
    199 ; ANY-LABEL: andnotpd256fold:
    200 ; ANY:       # %bb.0: # %entry
    201 ; ANY-NEXT:    vandnpd (%rdi), %ymm0, %ymm0
    202 ; ANY-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    203 ; ANY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
    204 ; ANY-NEXT:    retq
    205 entry:
    206   %tmp2 = load <4 x double>, <4 x double>* %x, align 32
    207   %0 = bitcast <4 x double> %y to <4 x i64>
    208   %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1>
    209   %1 = bitcast <4 x double> %tmp2 to <4 x i64>
    210   %and.i = and <4 x i64> %1, %neg.i
    211   %2 = bitcast <4 x i64> %and.i to <4 x double>
    212   ; add forces execution domain
    213   %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
    214   ret <4 x double> %3
    215 }
    216 
    217 define <8 x float> @andnotps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
    218 ; ANY-LABEL: andnotps256:
    219 ; ANY:       # %bb.0: # %entry
    220 ; ANY-NEXT:    vandnps %ymm0, %ymm1, %ymm0
    221 ; ANY-NEXT:    retq
    222 entry:
    223   %0 = bitcast <8 x float> %x to <8 x i32>
    224   %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
    225   %1 = bitcast <8 x float> %y to <8 x i32>
    226   %and.i = and <8 x i32> %1, %neg.i
    227   %2 = bitcast <8 x i32> %and.i to <8 x float>
    228   ret <8 x float> %2
    229 }
    230 
    231 define <8 x float> @andnotps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp {
    232 ; ANY-LABEL: andnotps256fold:
    233 ; ANY:       # %bb.0: # %entry
    234 ; ANY-NEXT:    vandnps (%rdi), %ymm0, %ymm0
    235 ; ANY-NEXT:    retq
    236 entry:
    237   %tmp2 = load <8 x float>, <8 x float>* %x, align 32
    238   %0 = bitcast <8 x float> %y to <8 x i32>
    239   %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
    240   %1 = bitcast <8 x float> %tmp2 to <8 x i32>
    241   %and.i = and <8 x i32> %1, %neg.i
    242   %2 = bitcast <8 x i32> %and.i to <8 x float>
    243   ret <8 x float> %2
    244 }
    245 
    246 ;;; Test that basic 2 x i64 logic use the integer version on AVX
    247 
    248 define <2 x i64> @vpandn(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
    249   ; Force the execution domain with an add.
    250 ; ANY-LABEL: vpandn:
    251 ; ANY:       # %bb.0:
    252 ; ANY-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
    253 ; ANY-NEXT:    vpsubq %xmm1, %xmm0, %xmm1
    254 ; ANY-NEXT:    vpandn %xmm0, %xmm1, %xmm0
    255 ; ANY-NEXT:    retq
    256   %a2 = add <2 x i64> %a, <i64 1, i64 1>
    257   %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
    258   %x = and <2 x i64> %a, %y
    259   ret <2 x i64> %x
    260 }
    261 
    262 define <2 x i64> @vpand(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
    263   ; Force the execution domain with an add.
    264 ; ANY-LABEL: vpand:
    265 ; ANY:       # %bb.0:
    266 ; ANY-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
    267 ; ANY-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
    268 ; ANY-NEXT:    vpand %xmm1, %xmm0, %xmm0
    269 ; ANY-NEXT:    retq
    270   %a2 = add <2 x i64> %a, <i64 1, i64 1>
    271   %x = and <2 x i64> %a2, %b
    272   ret <2 x i64> %x
    273 }
    274 
    275 define <4 x i32> @and_xor_splat1_v4i32(<4 x i32> %x) nounwind {
    276 ; AVX1-LABEL: and_xor_splat1_v4i32:
    277 ; AVX1:       # %bb.0:
    278 ; AVX1-NEXT:    vandnps {{.*}}(%rip), %xmm0, %xmm0
    279 ; AVX1-NEXT:    retq
    280 ;
    281 ; INT256-LABEL: and_xor_splat1_v4i32:
    282 ; INT256:       # %bb.0:
    283 ; INT256-NEXT:    vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
    284 ; INT256-NEXT:    vandnps %xmm1, %xmm0, %xmm0
    285 ; INT256-NEXT:    retq
    286   %xor = xor <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
    287   %and = and <4 x i32> %xor, <i32 1, i32 1, i32 1, i32 1>
    288   ret <4 x i32> %and
    289 }
    290 
    291 define <4 x i64> @and_xor_splat1_v4i64(<4 x i64> %x) nounwind {
    292 ; AVX1-LABEL: and_xor_splat1_v4i64:
    293 ; AVX1:       # %bb.0:
    294 ; AVX1-NEXT:    vandnps {{.*}}(%rip), %ymm0, %ymm0
    295 ; AVX1-NEXT:    retq
    296 ;
    297 ; INT256-LABEL: and_xor_splat1_v4i64:
    298 ; INT256:       # %bb.0:
    299 ; INT256-NEXT:    vbroadcastsd {{.*#+}} ymm1 = [1,1,1,1]
    300 ; INT256-NEXT:    vandnps %ymm1, %ymm0, %ymm0
    301 ; INT256-NEXT:    retq
    302   %xor = xor <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1>
    303   %and = and <4 x i64> %xor, <i64 1, i64 1, i64 1, i64 1>
    304   ret <4 x i64> %and
    305 }
    306 
    307 ; PR37749 - https://bugs.llvm.org/show_bug.cgi?id=37749
    308 ; For AVX1, we don't want a 256-bit logic op with insert/extract to the surrounding 128-bit ops.
    309 
    310 define <8 x i32> @and_disguised_i8_elts(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
    311 ; AVX1-LABEL: and_disguised_i8_elts:
    312 ; AVX1:       # %bb.0:
    313 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm3
    314 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
    315 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    316 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
    317 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1095216660735,1095216660735]
    318 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
    319 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
    320 ; AVX1-NEXT:    vpaddd %xmm4, %xmm0, %xmm0
    321 ; AVX1-NEXT:    vpand %xmm1, %xmm3, %xmm1
    322 ; AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
    323 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    324 ; AVX1-NEXT:    retq
    325 ;
    326 ; INT256-LABEL: and_disguised_i8_elts:
    327 ; INT256:       # %bb.0:
    328 ; INT256-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
    329 ; INT256-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
    330 ; INT256-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
    331 ; INT256-NEXT:    retq
    332   %a = add <8 x i32> %x, %y
    333   %l = and <8 x i32> %a, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
    334   %t = add <8 x i32> %l, %z
    335   ret <8 x i32> %t
    336 }
    337 
    338 define <8 x i32> @or_disguised_i8_elts(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
    339 ; AVX1-LABEL: or_disguised_i8_elts:
    340 ; AVX1:       # %bb.0:
    341 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm3
    342 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
    343 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    344 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
    345 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1095216660735,1095216660735]
    346 ; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
    347 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
    348 ; AVX1-NEXT:    vpaddd %xmm4, %xmm0, %xmm0
    349 ; AVX1-NEXT:    vpor %xmm1, %xmm3, %xmm1
    350 ; AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
    351 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    352 ; AVX1-NEXT:    retq
    353 ;
    354 ; INT256-LABEL: or_disguised_i8_elts:
    355 ; INT256:       # %bb.0:
    356 ; INT256-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
    357 ; INT256-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
    358 ; INT256-NEXT:    vpor %ymm1, %ymm0, %ymm0
    359 ; INT256-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
    360 ; INT256-NEXT:    retq
    361   %a = add <8 x i32> %x, %y
    362   %l = or <8 x i32> %a, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
    363   %t = add <8 x i32> %l, %z
    364   ret <8 x i32> %t
    365 }
    366 
    367 define <8 x i32> @xor_disguised_i8_elts(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
    368 ; AVX1-LABEL: xor_disguised_i8_elts:
    369 ; AVX1:       # %bb.0:
    370 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm3
    371 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
    372 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    373 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
    374 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1095216660735,1095216660735]
    375 ; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    376 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
    377 ; AVX1-NEXT:    vpaddd %xmm4, %xmm0, %xmm0
    378 ; AVX1-NEXT:    vpxor %xmm1, %xmm3, %xmm1
    379 ; AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
    380 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    381 ; AVX1-NEXT:    retq
    382 ;
    383 ; INT256-LABEL: xor_disguised_i8_elts:
    384 ; INT256:       # %bb.0:
    385 ; INT256-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
    386 ; INT256-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
    387 ; INT256-NEXT:    vpxor %ymm1, %ymm0, %ymm0
    388 ; INT256-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
    389 ; INT256-NEXT:    retq
    390   %a = add <8 x i32> %x, %y
    391   %l = xor <8 x i32> %a, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
    392   %t = add <8 x i32> %l, %z
    393   ret <8 x i32> %t
    394 }
    395 
    396 define <8 x i32> @and_disguised_i16_elts(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
    397 ; AVX1-LABEL: and_disguised_i16_elts:
    398 ; AVX1:       # %bb.0:
    399 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm3
    400 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
    401 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    402 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
    403 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    404 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
    405 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
    406 ; AVX1-NEXT:    vpaddd %xmm4, %xmm0, %xmm0
    407 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm3[0],xmm1[1],xmm3[2],xmm1[3],xmm3[4],xmm1[5],xmm3[6],xmm1[7]
    408 ; AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
    409 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    410 ; AVX1-NEXT:    retq
    411 ;
    412 ; INT256-LABEL: and_disguised_i16_elts:
    413 ; INT256:       # %bb.0:
    414 ; INT256-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
    415 ; INT256-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    416 ; INT256-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
    417 ; INT256-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
    418 ; INT256-NEXT:    retq
    419   %a = add <8 x i32> %x, %y
    420   %l = and <8 x i32> %a, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
    421   %t = add <8 x i32> %l, %z
    422   ret <8 x i32> %t
    423 }
    424 
    425 define <8 x i32> @or_disguised_i16_elts(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
    426 ; AVX1-LABEL: or_disguised_i16_elts:
    427 ; AVX1:       # %bb.0:
    428 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm3
    429 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
    430 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    431 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
    432 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [281470681808895,281470681808895]
    433 ; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
    434 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
    435 ; AVX1-NEXT:    vpaddd %xmm4, %xmm0, %xmm0
    436 ; AVX1-NEXT:    vpor %xmm1, %xmm3, %xmm1
    437 ; AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
    438 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    439 ; AVX1-NEXT:    retq
    440 ;
    441 ; INT256-LABEL: or_disguised_i16_elts:
    442 ; INT256:       # %bb.0:
    443 ; INT256-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
    444 ; INT256-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535]
    445 ; INT256-NEXT:    vpor %ymm1, %ymm0, %ymm0
    446 ; INT256-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
    447 ; INT256-NEXT:    retq
    448   %a = add <8 x i32> %x, %y
    449   %l = or <8 x i32> %a, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
    450   %t = add <8 x i32> %l, %z
    451   ret <8 x i32> %t
    452 }
    453 
    454 define <8 x i32> @xor_disguised_i16_elts(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
    455 ; AVX1-LABEL: xor_disguised_i16_elts:
    456 ; AVX1:       # %bb.0:
    457 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm3
    458 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
    459 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    460 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
    461 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [281470681808895,281470681808895]
    462 ; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    463 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
    464 ; AVX1-NEXT:    vpaddd %xmm4, %xmm0, %xmm0
    465 ; AVX1-NEXT:    vpxor %xmm1, %xmm3, %xmm1
    466 ; AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
    467 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    468 ; AVX1-NEXT:    retq
    469 ;
    470 ; INT256-LABEL: xor_disguised_i16_elts:
    471 ; INT256:       # %bb.0:
    472 ; INT256-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
    473 ; INT256-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535]
    474 ; INT256-NEXT:    vpxor %ymm1, %ymm0, %ymm0
    475 ; INT256-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
    476 ; INT256-NEXT:    retq
    477   %a = add <8 x i32> %x, %y
    478   %l = xor <8 x i32> %a, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
    479   %t = add <8 x i32> %l, %z
    480   ret <8 x i32> %t
    481 }
    482 
    483