Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
      2 
      3 ; CHECK: vpaddq %ymm
      4 define <4 x i64> @test_vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
      5   %x = add <4 x i64> %i, %j
      6   ret <4 x i64> %x
      7 }
      8 
      9 ; CHECK: vpaddd %ymm
     10 define <8 x i32> @test_vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
     11   %x = add <8 x i32> %i, %j
     12   ret <8 x i32> %x
     13 }
     14 
     15 ; CHECK: vpaddw %ymm
     16 define <16 x i16> @test_vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
     17   %x = add <16 x i16> %i, %j
     18   ret <16 x i16> %x
     19 }
     20 
     21 ; CHECK: vpaddb %ymm
     22 define <32 x i8> @test_vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
     23   %x = add <32 x i8> %i, %j
     24   ret <32 x i8> %x
     25 }
     26 
     27 ; CHECK: vpsubq %ymm
     28 define <4 x i64> @test_vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
     29   %x = sub <4 x i64> %i, %j
     30   ret <4 x i64> %x
     31 }
     32 
     33 ; CHECK: vpsubd %ymm
     34 define <8 x i32> @test_vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
     35   %x = sub <8 x i32> %i, %j
     36   ret <8 x i32> %x
     37 }
     38 
     39 ; CHECK: vpsubw %ymm
     40 define <16 x i16> @test_vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
     41   %x = sub <16 x i16> %i, %j
     42   ret <16 x i16> %x
     43 }
     44 
     45 ; CHECK: vpsubb %ymm
     46 define <32 x i8> @test_vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
     47   %x = sub <32 x i8> %i, %j
     48   ret <32 x i8> %x
     49 }
     50 
     51 ; CHECK: vpmulld %ymm
     52 define <8 x i32> @test_vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
     53   %x = mul <8 x i32> %i, %j
     54   ret <8 x i32> %x
     55 }
     56 
     57 ; CHECK: vpmullw %ymm
     58 define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
     59   %x = mul <16 x i16> %i, %j
     60   ret <16 x i16> %x
     61 }
     62 
     63 ; CHECK: mul-v16i8
     64 ; CHECK:       # BB#0:
     65 ; CHECK-NEXT:  vpmovsxbw %xmm1, %ymm1
     66 ; CHECK-NEXT:  vpmovsxbw %xmm0, %ymm0
     67 ; CHECK-NEXT:  vpmullw %ymm1, %ymm0, %ymm0
     68 ; CHECK-NEXT:  vextracti128 $1, %ymm0, %xmm1
     69 ; CHECK-NEXT:  vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
     70 ; CHECK-NEXT:  vpshufb %xmm2, %xmm1, %xmm1
     71 ; CHECK-NEXT:  vpshufb %xmm2, %xmm0, %xmm0
     72 ; CHECK-NEXT:  vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     73 ; CHECK-NEXT:  vzeroupper
     74 ; CHECK-NEXT:  retq
     75 define <16 x i8> @mul-v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
     76   %x = mul <16 x i8> %i, %j
     77   ret <16 x i8> %x
     78 }
     79 
     80 ; CHECK: mul-v32i8
     81 ; CHECK:       # BB#0:
     82 ; CHECK-NEXT:  vextracti128 $1, %ymm1, %xmm2
     83 ; CHECK-NEXT:  vpmovsxbw %xmm2, %ymm2
     84 ; CHECK-NEXT:  vextracti128 $1, %ymm0, %xmm3
     85 ; CHECK-NEXT:  vpmovsxbw %xmm3, %ymm3
     86 ; CHECK-NEXT:  vpmullw %ymm2, %ymm3, %ymm2
     87 ; CHECK-NEXT:  vextracti128 $1, %ymm2, %xmm3
     88 ; CHECK-NEXT:  vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
     89 ; CHECK-NEXT:  vpshufb %xmm4, %xmm3, %xmm3
     90 ; CHECK-NEXT:  vpshufb %xmm4, %xmm2, %xmm2
     91 ; CHECK-NEXT:  vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
     92 ; CHECK-NEXT:  vpmovsxbw %xmm1, %ymm1
     93 ; CHECK-NEXT:  vpmovsxbw %xmm0, %ymm0
     94 ; CHECK-NEXT:  vpmullw %ymm1, %ymm0, %ymm0
     95 ; CHECK-NEXT:  vextracti128 $1, %ymm0, %xmm1
     96 ; CHECK-NEXT:  vpshufb %xmm4, %xmm1, %xmm1
     97 ; CHECK-NEXT:  vpshufb %xmm4, %xmm0, %xmm0
     98 ; CHECK-NEXT:  vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     99 ; CHECK-NEXT:  vinserti128 $1, %xmm2, %ymm0, %ymm0
    100 ; CHECK-NEXT:  retq
    101 define <32 x i8> @mul-v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
    102   %x = mul <32 x i8> %i, %j
    103   ret <32 x i8> %x
    104 }
    105 
    106 ; CHECK: mul-v4i64
    107 ; CHECK: vpmuludq %ymm
    108 ; CHECK-NEXT: vpsrlq $32, %ymm
    109 ; CHECK-NEXT: vpmuludq %ymm
    110 ; CHECK-NEXT: vpsllq $32, %ymm
    111 ; CHECK-NEXT: vpaddq %ymm
    112 ; CHECK-NEXT: vpsrlq $32, %ymm
    113 ; CHECK-NEXT: vpmuludq %ymm
    114 ; CHECK-NEXT: vpsllq $32, %ymm
    115 ; CHECK-NEXT: vpaddq %ymm
    116 define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
    117   %x = mul <4 x i64> %i, %j
    118   ret <4 x i64> %x
    119 }
    120 
    121 ; CHECK: mul_const1
    122 ; CHECK: vpaddd
    123 ; CHECK: ret
    124 define <8 x i32> @mul_const1(<8 x i32> %x) {
    125   %y = mul <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
    126   ret <8 x i32> %y
    127 }
    128 
    129 ; CHECK: mul_const2
    130 ; CHECK: vpsllq  $2
    131 ; CHECK: ret
    132 define <4 x i64> @mul_const2(<4 x i64> %x) {
    133   %y = mul <4 x i64> %x, <i64 4, i64 4, i64 4, i64 4>
    134   ret <4 x i64> %y
    135 }
    136 
    137 ; CHECK: mul_const3
    138 ; CHECK: vpsllw  $3
    139 ; CHECK: ret
    140 define <16 x i16> @mul_const3(<16 x i16> %x) {
    141   %y = mul <16 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
    142   ret <16 x i16> %y
    143 }
    144 
    145 ; CHECK: mul_const4
    146 ; CHECK: vpxor
    147 ; CHECK: vpsubq
    148 ; CHECK: ret
    149 define <4 x i64> @mul_const4(<4 x i64> %x) {
    150   %y = mul <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1>
    151   ret <4 x i64> %y
    152 }
    153 
    154 ; CHECK: mul_const5
    155 ; CHECK: vxorps
    156 ; CHECK-NEXT: ret
    157 define <8 x i32> @mul_const5(<8 x i32> %x) {
    158   %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    159   ret <8 x i32> %y
    160 }
    161 
    162 ; CHECK: mul_const6
    163 ; CHECK: vpmulld
    164 ; CHECK: ret
    165 define <8 x i32> @mul_const6(<8 x i32> %x) {
    166   %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 2, i32 0, i32 2, i32 0, i32 0>
    167   ret <8 x i32> %y
    168 }
    169 
    170 ; CHECK: mul_const7
    171 ; CHECK: vpaddq
    172 ; CHECK: vpaddq
    173 ; CHECK: ret
    174 define <8 x i64> @mul_const7(<8 x i64> %x) {
    175   %y = mul <8 x i64> %x, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
    176   ret <8 x i64> %y
    177 }
    178 
    179 ; CHECK: mul_const8
    180 ; CHECK: vpsllw  $3
    181 ; CHECK: ret
    182 define <8 x i16> @mul_const8(<8 x i16> %x) {
    183   %y = mul <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
    184   ret <8 x i16> %y
    185 }
    186 
    187 ; CHECK: mul_const9
    188 ; CHECK: vpmulld
    189 ; CHECK: ret
    190 define <8 x i32> @mul_const9(<8 x i32> %x) {
    191   %y = mul <8 x i32> %x, <i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    192   ret <8 x i32> %y
    193 }
    194 
    195 ; CHECK: mul_const10
    196 ; CHECK: vpmulld
    197 ; CHECK: ret
    198 define <4 x i32> @mul_const10(<4 x i32> %x) {
    199   ; %x * 0x01010101
    200   %m = mul <4 x i32> %x, <i32 16843009, i32 16843009, i32 16843009, i32 16843009>
    201   ret <4 x i32> %m
    202 }
    203 
    204 ; CHECK: mul_const11
    205 ; CHECK: vpmulld
    206 ; CHECK: ret
    207 define <4 x i32> @mul_const11(<4 x i32> %x) {
    208   ; %x * 0x80808080
    209   %m = mul <4 x i32> %x, <i32 2155905152, i32 2155905152, i32 2155905152, i32 2155905152>
    210   ret <4 x i32> %m
    211 }
    212