1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s 2 3 ; CHECK: variable_shl0 4 ; CHECK: psllvd 5 ; CHECK: ret 6 define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) { 7 %k = shl <4 x i32> %x, %y 8 ret <4 x i32> %k 9 } 10 ; CHECK: variable_shl1 11 ; CHECK: psllvd 12 ; CHECK: ret 13 define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) { 14 %k = shl <8 x i32> %x, %y 15 ret <8 x i32> %k 16 } 17 ; CHECK: variable_shl2 18 ; CHECK: psllvq 19 ; CHECK: ret 20 define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) { 21 %k = shl <2 x i64> %x, %y 22 ret <2 x i64> %k 23 } 24 ; CHECK: variable_shl3 25 ; CHECK: psllvq 26 ; CHECK: ret 27 define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) { 28 %k = shl <4 x i64> %x, %y 29 ret <4 x i64> %k 30 } 31 ; CHECK: variable_srl0 32 ; CHECK: psrlvd 33 ; CHECK: ret 34 define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) { 35 %k = lshr <4 x i32> %x, %y 36 ret <4 x i32> %k 37 } 38 ; CHECK: variable_srl1 39 ; CHECK: psrlvd 40 ; CHECK: ret 41 define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) { 42 %k = lshr <8 x i32> %x, %y 43 ret <8 x i32> %k 44 } 45 ; CHECK: variable_srl2 46 ; CHECK: psrlvq 47 ; CHECK: ret 48 define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) { 49 %k = lshr <2 x i64> %x, %y 50 ret <2 x i64> %k 51 } 52 ; CHECK: variable_srl3 53 ; CHECK: psrlvq 54 ; CHECK: ret 55 define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) { 56 %k = lshr <4 x i64> %x, %y 57 ret <4 x i64> %k 58 } 59 60 ; CHECK: variable_sra0 61 ; CHECK: vpsravd 62 ; CHECK: ret 63 define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) { 64 %k = ashr <4 x i32> %x, %y 65 ret <4 x i32> %k 66 } 67 ; CHECK: variable_sra1 68 ; CHECK: vpsravd 69 ; CHECK: ret 70 define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) { 71 %k = ashr <8 x i32> %x, %y 72 ret <8 x i32> %k 73 } 74 75 ;;; Shift left 76 ; CHECK: vpslld 77 define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone { 78 %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 79 2> 80 ret <8 x i32> %s 81 } 82 83 ; CHECK: vpsllw 84 define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone { 85 %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 86 ret <16 x i16> %s 87 } 88 89 ; CHECK: vpsllq 90 define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone { 91 %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2> 92 ret <4 x i64> %s 93 } 94 95 ;;; Logical Shift right 96 ; CHECK: vpsrld 97 define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone { 98 %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 99 2> 100 ret <8 x i32> %s 101 } 102 103 ; CHECK: vpsrlw 104 define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone { 105 %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 106 ret <16 x i16> %s 107 } 108 109 ; CHECK: vpsrlq 110 define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone { 111 %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2> 112 ret <4 x i64> %s 113 } 114 115 ;;; Arithmetic Shift right 116 ; CHECK: vpsrad 117 define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone { 118 %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 119 2> 120 ret <8 x i32> %s 121 } 122 123 ; CHECK: vpsraw 124 define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone { 125 %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 126 ret <16 x i16> %s 127 } 128 129 ; CHECK: variable_sra0_load 130 ; CHECK: vpsravd (% 131 ; CHECK: ret 132 define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) { 133 %y1 = load <4 x i32>, <4 x i32>* %y 134 %k = ashr <4 x i32> %x, %y1 135 ret <4 x i32> %k 136 } 137 138 ; CHECK: variable_sra1_load 139 ; CHECK: vpsravd (% 140 ; CHECK: ret 141 define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) { 142 %y1 = load <8 x i32>, <8 x i32>* %y 143 %k = ashr <8 x i32> %x, %y1 144 ret <8 x i32> %k 145 } 146 147 ; CHECK: variable_shl0_load 148 ; CHECK: vpsllvd (% 149 ; CHECK: ret 150 define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) { 151 %y1 = load <4 x i32>, <4 x i32>* %y 152 %k = shl <4 x i32> %x, %y1 153 ret <4 x i32> %k 154 } 155 ; CHECK: variable_shl1_load 156 ; CHECK: vpsllvd (% 157 ; CHECK: ret 158 define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) { 159 %y1 = load <8 x i32>, <8 x i32>* %y 160 %k = shl <8 x i32> %x, %y1 161 ret <8 x i32> %k 162 } 163 ; CHECK: variable_shl2_load 164 ; CHECK: vpsllvq (% 165 ; CHECK: ret 166 define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) { 167 %y1 = load <2 x i64>, <2 x i64>* %y 168 %k = shl <2 x i64> %x, %y1 169 ret <2 x i64> %k 170 } 171 ; CHECK: variable_shl3_load 172 ; CHECK: vpsllvq (% 173 ; CHECK: ret 174 define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) { 175 %y1 = load <4 x i64>, <4 x i64>* %y 176 %k = shl <4 x i64> %x, %y1 177 ret <4 x i64> %k 178 } 179 ; CHECK: variable_srl0_load 180 ; CHECK: vpsrlvd (% 181 ; CHECK: ret 182 define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) { 183 %y1 = load <4 x i32>, <4 x i32>* %y 184 %k = lshr <4 x i32> %x, %y1 185 ret <4 x i32> %k 186 } 187 ; CHECK: variable_srl1_load 188 ; CHECK: vpsrlvd (% 189 ; CHECK: ret 190 define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) { 191 %y1 = load <8 x i32>, <8 x i32>* %y 192 %k = lshr <8 x i32> %x, %y1 193 ret <8 x i32> %k 194 } 195 ; CHECK: variable_srl2_load 196 ; CHECK: vpsrlvq (% 197 ; CHECK: ret 198 define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) { 199 %y1 = load <2 x i64>, <2 x i64>* %y 200 %k = lshr <2 x i64> %x, %y1 201 ret <2 x i64> %k 202 } 203 ; CHECK: variable_srl3_load 204 ; CHECK: vpsrlvq (% 205 ; CHECK: ret 206 define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) { 207 %y1 = load <4 x i64>, <4 x i64>* %y 208 %k = lshr <4 x i64> %x, %y1 209 ret <4 x i64> %k 210 } 211 212 define <32 x i8> @shl9(<32 x i8> %A) nounwind { 213 %B = shl <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 214 ret <32 x i8> %B 215 ; CHECK-LABEL: shl9: 216 ; CHECK: vpsllw $3 217 ; CHECK: vpand 218 ; CHECK: ret 219 } 220 221 define <32 x i8> @shr9(<32 x i8> %A) nounwind { 222 %B = lshr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 223 ret <32 x i8> %B 224 ; CHECK-LABEL: shr9: 225 ; CHECK: vpsrlw $3 226 ; CHECK: vpand 227 ; CHECK: ret 228 } 229 230 define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind { 231 %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 232 ret <32 x i8> %B 233 ; CHECK-LABEL: sra_v32i8_7: 234 ; CHECK: vpxor 235 ; CHECK: vpcmpgtb 236 ; CHECK: ret 237 } 238 239 define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind { 240 %B = ashr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 241 ret <32 x i8> %B 242 ; CHECK-LABEL: sra_v32i8: 243 ; CHECK: vpsrlw $3 244 ; CHECK: vpand 245 ; CHECK: vpxor 246 ; CHECK: vpsubb 247 ; CHECK: ret 248 } 249 250 ; CHECK: _sext_v16i16 251 ; CHECK: vpsllw 252 ; CHECK: vpsraw 253 ; CHECK-NOT: vinsertf128 254 define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind { 255 %b = trunc <16 x i16> %a to <16 x i8> 256 %c = sext <16 x i8> %b to <16 x i16> 257 ret <16 x i16> %c 258 } 259 260 ; CHECK: _sext_v8i32 261 ; CHECK: vpslld 262 ; CHECK: vpsrad 263 ; CHECK-NOT: vinsertf128 264 define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind { 265 %b = trunc <8 x i32> %a to <8 x i16> 266 %c = sext <8 x i16> %b to <8 x i32> 267 ret <8 x i32> %c 268 } 269 270 define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8 x i16> %rhs) { 271 ; CHECK-LABEL: variable_shl16: 272 ; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]] 273 ; CHECK-DAG: vpmovzxwd %xmm0, [[LHS:%ymm[0-9]+]] 274 ; CHECK: vpsllvd [[AMT]], [[LHS]], {{%ymm[0-9]+}} 275 ; CHECK: vpshufb 276 ; CHECK: vpermq 277 %res = shl <8 x i16> %lhs, %rhs 278 ret <8 x i16> %res 279 } 280 281 define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8 x i16> %rhs) { 282 ; CHECK-LABEL: variable_ashr16: 283 ; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]] 284 ; CHECK-DAG: vpmovsxwd %xmm0, [[LHS:%ymm[0-9]+]] 285 ; CHECK: vpsravd [[AMT]], [[LHS]], {{%ymm[0-9]+}} 286 ; CHECK: vpshufb 287 ; CHECK: vpermq 288 %res = ashr <8 x i16> %lhs, %rhs 289 ret <8 x i16> %res 290 } 291 292 define <8 x i16> @variable_lshr16(<8 x i16> %lhs, <8 x i16> %rhs) { 293 ; CHECK-LABEL: variable_lshr16: 294 ; CHECK-DAG: vpmovzxwd %xmm1, [[AMT:%ymm[0-9]+]] 295 ; CHECK-DAG: vpmovzxwd %xmm0, [[LHS:%ymm[0-9]+]] 296 ; CHECK: vpsrlvd [[AMT]], [[LHS]], {{%ymm[0-9]+}} 297 ; CHECK: vpshufb 298 ; CHECK: vpermq 299 %res = lshr <8 x i16> %lhs, %rhs 300 ret <8 x i16> %res 301 }