1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s 2 3 ; CHECK: vpaddq %ymm 4 define <4 x i64> @test_vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 5 %x = add <4 x i64> %i, %j 6 ret <4 x i64> %x 7 } 8 9 ; CHECK: vpaddd %ymm 10 define <8 x i32> @test_vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 11 %x = add <8 x i32> %i, %j 12 ret <8 x i32> %x 13 } 14 15 ; CHECK: vpaddw %ymm 16 define <16 x i16> @test_vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 17 %x = add <16 x i16> %i, %j 18 ret <16 x i16> %x 19 } 20 21 ; CHECK: vpaddb %ymm 22 define <32 x i8> @test_vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 23 %x = add <32 x i8> %i, %j 24 ret <32 x i8> %x 25 } 26 27 ; CHECK: vpsubq %ymm 28 define <4 x i64> @test_vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 29 %x = sub <4 x i64> %i, %j 30 ret <4 x i64> %x 31 } 32 33 ; CHECK: vpsubd %ymm 34 define <8 x i32> @test_vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 35 %x = sub <8 x i32> %i, %j 36 ret <8 x i32> %x 37 } 38 39 ; CHECK: vpsubw %ymm 40 define <16 x i16> @test_vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 41 %x = sub <16 x i16> %i, %j 42 ret <16 x i16> %x 43 } 44 45 ; CHECK: vpsubb %ymm 46 define <32 x i8> @test_vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 47 %x = sub <32 x i8> %i, %j 48 ret <32 x i8> %x 49 } 50 51 ; CHECK: vpmulld %ymm 52 define <8 x i32> @test_vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 53 %x = mul <8 x i32> %i, %j 54 ret <8 x i32> %x 55 } 56 57 ; CHECK: vpmullw %ymm 58 define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 59 %x = mul <16 x i16> %i, %j 60 ret <16 x i16> %x 61 } 62 63 ; CHECK: mul-v16i8 64 ; CHECK: # BB#0: 65 ; CHECK-NEXT: vpmovsxbw %xmm1, %ymm1 66 ; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 67 ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0 68 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1 69 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 70 ; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1 71 ; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0 72 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 73 ; CHECK-NEXT: vzeroupper 74 ; CHECK-NEXT: retq 75 define <16 x i8> @mul-v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone { 76 %x = mul <16 x i8> %i, %j 77 ret <16 x i8> %x 78 } 79 80 ; CHECK: mul-v32i8 81 ; CHECK: # BB#0: 82 ; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2 83 ; CHECK-NEXT: vpmovsxbw %xmm2, %ymm2 84 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm3 85 ; CHECK-NEXT: vpmovsxbw %xmm3, %ymm3 86 ; CHECK-NEXT: vpmullw %ymm2, %ymm3, %ymm2 87 ; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm3 88 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 89 ; CHECK-NEXT: vpshufb %xmm4, %xmm3, %xmm3 90 ; CHECK-NEXT: vpshufb %xmm4, %xmm2, %xmm2 91 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 92 ; CHECK-NEXT: vpmovsxbw %xmm1, %ymm1 93 ; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 94 ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0 95 ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1 96 ; CHECK-NEXT: vpshufb %xmm4, %xmm1, %xmm1 97 ; CHECK-NEXT: vpshufb %xmm4, %xmm0, %xmm0 98 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 99 ; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 100 ; CHECK-NEXT: retq 101 define <32 x i8> @mul-v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 102 %x = mul <32 x i8> %i, %j 103 ret <32 x i8> %x 104 } 105 106 ; CHECK: mul-v4i64 107 ; CHECK: vpmuludq %ymm 108 ; CHECK-NEXT: vpsrlq $32, %ymm 109 ; CHECK-NEXT: vpmuludq %ymm 110 ; CHECK-NEXT: vpsllq $32, %ymm 111 ; CHECK-NEXT: vpaddq %ymm 112 ; CHECK-NEXT: vpsrlq $32, %ymm 113 ; CHECK-NEXT: vpmuludq %ymm 114 ; CHECK-NEXT: vpsllq $32, %ymm 115 ; CHECK-NEXT: vpaddq %ymm 116 define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 117 %x = mul <4 x i64> %i, %j 118 ret <4 x i64> %x 119 } 120 121 ; CHECK: mul_const1 122 ; CHECK: vpaddd 123 ; CHECK: ret 124 define <8 x i32> @mul_const1(<8 x i32> %x) { 125 %y = mul <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 126 ret <8 x i32> %y 127 } 128 129 ; CHECK: mul_const2 130 ; CHECK: vpsllq $2 131 ; CHECK: ret 132 define <4 x i64> @mul_const2(<4 x i64> %x) { 133 %y = mul <4 x i64> %x, <i64 4, i64 4, i64 4, i64 4> 134 ret <4 x i64> %y 135 } 136 137 ; CHECK: mul_const3 138 ; CHECK: vpsllw $3 139 ; CHECK: ret 140 define <16 x i16> @mul_const3(<16 x i16> %x) { 141 %y = mul <16 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 142 ret <16 x i16> %y 143 } 144 145 ; CHECK: mul_const4 146 ; CHECK: vpxor 147 ; CHECK: vpsubq 148 ; CHECK: ret 149 define <4 x i64> @mul_const4(<4 x i64> %x) { 150 %y = mul <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1> 151 ret <4 x i64> %y 152 } 153 154 ; CHECK: mul_const5 155 ; CHECK: vxorps 156 ; CHECK-NEXT: ret 157 define <8 x i32> @mul_const5(<8 x i32> %x) { 158 %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 159 ret <8 x i32> %y 160 } 161 162 ; CHECK: mul_const6 163 ; CHECK: vpmulld 164 ; CHECK: ret 165 define <8 x i32> @mul_const6(<8 x i32> %x) { 166 %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 2, i32 0, i32 2, i32 0, i32 0> 167 ret <8 x i32> %y 168 } 169 170 ; CHECK: mul_const7 171 ; CHECK: vpaddq 172 ; CHECK: vpaddq 173 ; CHECK: ret 174 define <8 x i64> @mul_const7(<8 x i64> %x) { 175 %y = mul <8 x i64> %x, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 176 ret <8 x i64> %y 177 } 178 179 ; CHECK: mul_const8 180 ; CHECK: vpsllw $3 181 ; CHECK: ret 182 define <8 x i16> @mul_const8(<8 x i16> %x) { 183 %y = mul <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 184 ret <8 x i16> %y 185 } 186 187 ; CHECK: mul_const9 188 ; CHECK: vpmulld 189 ; CHECK: ret 190 define <8 x i32> @mul_const9(<8 x i32> %x) { 191 %y = mul <8 x i32> %x, <i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 192 ret <8 x i32> %y 193 } 194 195 ; CHECK: mul_const10 196 ; CHECK: vpmulld 197 ; CHECK: ret 198 define <4 x i32> @mul_const10(<4 x i32> %x) { 199 ; %x * 0x01010101 200 %m = mul <4 x i32> %x, <i32 16843009, i32 16843009, i32 16843009, i32 16843009> 201 ret <4 x i32> %m 202 } 203 204 ; CHECK: mul_const11 205 ; CHECK: vpmulld 206 ; CHECK: ret 207 define <4 x i32> @mul_const11(<4 x i32> %x) { 208 ; %x * 0x80808080 209 %m = mul <4 x i32> %x, <i32 2155905152, i32 2155905152, i32 2155905152, i32 2155905152> 210 ret <4 x i32> %m 211 } 212