1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2 2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41 3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX 4 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 5 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX 6 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2 7 8 ; Verify the cost of vector shift left instructions. 9 10 ; 11 ; 12 ; Variable Shifts 13 ; 14 15 define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) { 16 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v2i64': 17 ; SSE2: Found an estimated cost of 4 for instruction: %shift 18 ; SSE41: Found an estimated cost of 4 for instruction: %shift 19 ; AVX: Found an estimated cost of 4 for instruction: %shift 20 ; AVX2: Found an estimated cost of 1 for instruction: %shift 21 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift 22 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 23 %shift = shl <2 x i64> %a, %b 24 ret <2 x i64> %shift 25 } 26 27 define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { 28 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64': 29 ; SSE2: Found an estimated cost of 8 for instruction: %shift 30 ; SSE41: Found an estimated cost of 8 for instruction: %shift 31 ; AVX: Found an estimated cost of 8 for instruction: %shift 32 ; AVX2: Found an estimated cost of 1 for instruction: %shift 33 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift 34 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 35 %shift = shl <4 x i64> %a, %b 36 ret <4 x i64> %shift 37 } 38 39 define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { 40 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32': 41 ; SSE2: Found an estimated cost of 10 for instruction: %shift 42 ; SSE41: Found an estimated cost of 10 for instruction: %shift 43 ; AVX: Found an estimated cost of 10 for instruction: %shift 44 ; AVX2: Found an estimated cost of 1 for instruction: %shift 45 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift 46 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 47 %shift = shl <4 x i32> %a, %b 48 ret <4 x i32> %shift 49 } 50 51 define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { 52 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32': 53 ; SSE2: Found an estimated cost of 20 for instruction: %shift 54 ; SSE41: Found an estimated cost of 20 for instruction: %shift 55 ; AVX: Found an estimated cost of 20 for instruction: %shift 56 ; AVX2: Found an estimated cost of 1 for instruction: %shift 57 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift 58 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 59 %shift = shl <8 x i32> %a, %b 60 ret <8 x i32> %shift 61 } 62 63 define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { 64 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16': 65 ; SSE2: Found an estimated cost of 32 for instruction: %shift 66 ; SSE41: Found an estimated cost of 32 for instruction: %shift 67 ; AVX: Found an estimated cost of 32 for instruction: %shift 68 ; AVX2: Found an estimated cost of 32 for instruction: %shift 69 ; XOP: Found an estimated cost of 1 for instruction: %shift 70 %shift = shl <8 x i16> %a, %b 71 ret <8 x i16> %shift 72 } 73 74 define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { 75 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16': 76 ; SSE2: Found an estimated cost of 64 for instruction: %shift 77 ; SSE41: Found an estimated cost of 64 for instruction: %shift 78 ; AVX: Found an estimated cost of 64 for instruction: %shift 79 ; AVX2: Found an estimated cost of 10 for instruction: %shift 80 ; XOP: Found an estimated cost of 2 for instruction: %shift 81 %shift = shl <16 x i16> %a, %b 82 ret <16 x i16> %shift 83 } 84 85 define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { 86 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8': 87 ; SSE2: Found an estimated cost of 26 for instruction: %shift 88 ; SSE41: Found an estimated cost of 26 for instruction: %shift 89 ; AVX: Found an estimated cost of 26 for instruction: %shift 90 ; AVX2: Found an estimated cost of 26 for instruction: %shift 91 ; XOP: Found an estimated cost of 1 for instruction: %shift 92 %shift = shl <16 x i8> %a, %b 93 ret <16 x i8> %shift 94 } 95 96 define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { 97 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8': 98 ; SSE2: Found an estimated cost of 52 for instruction: %shift 99 ; SSE41: Found an estimated cost of 52 for instruction: %shift 100 ; AVX: Found an estimated cost of 52 for instruction: %shift 101 ; AVX2: Found an estimated cost of 11 for instruction: %shift 102 ; XOP: Found an estimated cost of 2 for instruction: %shift 103 %shift = shl <32 x i8> %a, %b 104 ret <32 x i8> %shift 105 } 106 107 ; 108 ; Uniform Variable Shifts 109 ; 110 111 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) { 112 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v2i64': 113 ; SSE2: Found an estimated cost of 4 for instruction: %shift 114 ; SSE41: Found an estimated cost of 4 for instruction: %shift 115 ; AVX: Found an estimated cost of 4 for instruction: %shift 116 ; AVX2: Found an estimated cost of 1 for instruction: %shift 117 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift 118 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 119 %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer 120 %shift = shl <2 x i64> %a, %splat 121 ret <2 x i64> %shift 122 } 123 124 define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { 125 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64': 126 ; SSE2: Found an estimated cost of 8 for instruction: %shift 127 ; SSE41: Found an estimated cost of 8 for instruction: %shift 128 ; AVX: Found an estimated cost of 8 for instruction: %shift 129 ; AVX2: Found an estimated cost of 1 for instruction: %shift 130 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift 131 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 132 %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer 133 %shift = shl <4 x i64> %a, %splat 134 ret <4 x i64> %shift 135 } 136 137 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { 138 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32': 139 ; SSE2: Found an estimated cost of 10 for instruction: %shift 140 ; SSE41: Found an estimated cost of 10 for instruction: %shift 141 ; AVX: Found an estimated cost of 10 for instruction: %shift 142 ; AVX2: Found an estimated cost of 1 for instruction: %shift 143 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift 144 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 145 %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer 146 %shift = shl <4 x i32> %a, %splat 147 ret <4 x i32> %shift 148 } 149 150 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { 151 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32': 152 ; SSE2: Found an estimated cost of 20 for instruction: %shift 153 ; SSE41: Found an estimated cost of 20 for instruction: %shift 154 ; AVX: Found an estimated cost of 20 for instruction: %shift 155 ; AVX2: Found an estimated cost of 1 for instruction: %shift 156 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift 157 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 158 %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer 159 %shift = shl <8 x i32> %a, %splat 160 ret <8 x i32> %shift 161 } 162 163 define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { 164 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16': 165 ; SSE2: Found an estimated cost of 32 for instruction: %shift 166 ; SSE41: Found an estimated cost of 32 for instruction: %shift 167 ; AVX: Found an estimated cost of 32 for instruction: %shift 168 ; AVX2: Found an estimated cost of 32 for instruction: %shift 169 ; XOP: Found an estimated cost of 1 for instruction: %shift 170 %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer 171 %shift = shl <8 x i16> %a, %splat 172 ret <8 x i16> %shift 173 } 174 175 define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { 176 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16': 177 ; SSE2: Found an estimated cost of 64 for instruction: %shift 178 ; SSE41: Found an estimated cost of 64 for instruction: %shift 179 ; AVX: Found an estimated cost of 64 for instruction: %shift 180 ; AVX2: Found an estimated cost of 10 for instruction: %shift 181 ; XOP: Found an estimated cost of 2 for instruction: %shift 182 %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer 183 %shift = shl <16 x i16> %a, %splat 184 ret <16 x i16> %shift 185 } 186 187 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { 188 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8': 189 ; SSE2: Found an estimated cost of 26 for instruction: %shift 190 ; SSE41: Found an estimated cost of 26 for instruction: %shift 191 ; AVX: Found an estimated cost of 26 for instruction: %shift 192 ; AVX2: Found an estimated cost of 26 for instruction: %shift 193 ; XOP: Found an estimated cost of 1 for instruction: %shift 194 %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer 195 %shift = shl <16 x i8> %a, %splat 196 ret <16 x i8> %shift 197 } 198 199 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { 200 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8': 201 ; SSE2: Found an estimated cost of 52 for instruction: %shift 202 ; SSE41: Found an estimated cost of 52 for instruction: %shift 203 ; AVX: Found an estimated cost of 52 for instruction: %shift 204 ; AVX2: Found an estimated cost of 11 for instruction: %shift 205 ; XOP: Found an estimated cost of 2 for instruction: %shift 206 %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer 207 %shift = shl <32 x i8> %a, %splat 208 ret <32 x i8> %shift 209 } 210 211 ; 212 ; Constant Shifts 213 ; 214 215 define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) { 216 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v2i64': 217 ; SSE2: Found an estimated cost of 4 for instruction: %shift 218 ; SSE41: Found an estimated cost of 4 for instruction: %shift 219 ; AVX: Found an estimated cost of 4 for instruction: %shift 220 ; AVX2: Found an estimated cost of 1 for instruction: %shift 221 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift 222 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 223 %shift = shl <2 x i64> %a, <i64 1, i64 7> 224 ret <2 x i64> %shift 225 } 226 227 define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) { 228 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64': 229 ; SSE2: Found an estimated cost of 8 for instruction: %shift 230 ; SSE41: Found an estimated cost of 8 for instruction: %shift 231 ; AVX: Found an estimated cost of 8 for instruction: %shift 232 ; AVX2: Found an estimated cost of 1 for instruction: %shift 233 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift 234 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 235 %shift = shl <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31> 236 ret <4 x i64> %shift 237 } 238 239 define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) { 240 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32': 241 ; SSE2: Found an estimated cost of 6 for instruction: %shift 242 ; SSE41: Found an estimated cost of 1 for instruction: %shift 243 ; AVX: Found an estimated cost of 1 for instruction: %shift 244 ; AVX2: Found an estimated cost of 1 for instruction: %shift 245 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift 246 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 247 %shift = shl <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7> 248 ret <4 x i32> %shift 249 } 250 251 define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { 252 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32': 253 ; SSE2: Found an estimated cost of 12 for instruction: %shift 254 ; SSE41: Found an estimated cost of 2 for instruction: %shift 255 ; AVX: Found an estimated cost of 4 for instruction: %shift 256 ; AVX2: Found an estimated cost of 1 for instruction: %shift 257 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift 258 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 259 %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 260 ret <8 x i32> %shift 261 } 262 263 define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { 264 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16': 265 ; SSE2: Found an estimated cost of 1 for instruction: %shift 266 ; SSE41: Found an estimated cost of 1 for instruction: %shift 267 ; AVX: Found an estimated cost of 1 for instruction: %shift 268 ; AVX2: Found an estimated cost of 1 for instruction: %shift 269 ; XOP: Found an estimated cost of 1 for instruction: %shift 270 %shift = shl <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> 271 ret <8 x i16> %shift 272 } 273 274 define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { 275 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16': 276 ; SSE2: Found an estimated cost of 2 for instruction: %shift 277 ; SSE41: Found an estimated cost of 2 for instruction: %shift 278 ; AVX: Found an estimated cost of 4 for instruction: %shift 279 ; AVX2: Found an estimated cost of 1 for instruction: %shift 280 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift 281 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 282 %shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> 283 ret <16 x i16> %shift 284 } 285 286 define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { 287 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8': 288 ; SSE2: Found an estimated cost of 26 for instruction: %shift 289 ; SSE41: Found an estimated cost of 26 for instruction: %shift 290 ; AVX: Found an estimated cost of 26 for instruction: %shift 291 ; AVX2: Found an estimated cost of 26 for instruction: %shift 292 ; XOP: Found an estimated cost of 1 for instruction: %shift 293 %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0> 294 ret <16 x i8> %shift 295 } 296 297 define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { 298 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8': 299 ; SSE2: Found an estimated cost of 52 for instruction: %shift 300 ; SSE41: Found an estimated cost of 52 for instruction: %shift 301 ; AVX: Found an estimated cost of 52 for instruction: %shift 302 ; AVX2: Found an estimated cost of 11 for instruction: %shift 303 ; XOP: Found an estimated cost of 2 for instruction: %shift 304 %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0> 305 ret <32 x i8> %shift 306 } 307 308 ; 309 ; Uniform Constant Shifts 310 ; 311 312 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { 313 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v2i64': 314 ; SSE2: Found an estimated cost of 1 for instruction: %shift 315 ; SSE41: Found an estimated cost of 1 for instruction: %shift 316 ; AVX: Found an estimated cost of 1 for instruction: %shift 317 ; AVX2: Found an estimated cost of 1 for instruction: %shift 318 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift 319 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 320 %shift = shl <2 x i64> %a, <i64 7, i64 7> 321 ret <2 x i64> %shift 322 } 323 324 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { 325 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64': 326 ; SSE2: Found an estimated cost of 2 for instruction: %shift 327 ; SSE41: Found an estimated cost of 2 for instruction: %shift 328 ; AVX: Found an estimated cost of 2 for instruction: %shift 329 ; AVX2: Found an estimated cost of 1 for instruction: %shift 330 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift 331 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 332 %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7> 333 ret <4 x i64> %shift 334 } 335 336 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { 337 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32': 338 ; SSE2: Found an estimated cost of 1 for instruction: %shift 339 ; SSE41: Found an estimated cost of 1 for instruction: %shift 340 ; AVX: Found an estimated cost of 1 for instruction: %shift 341 ; AVX2: Found an estimated cost of 1 for instruction: %shift 342 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift 343 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 344 %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5> 345 ret <4 x i32> %shift 346 } 347 348 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { 349 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i32': 350 ; SSE2: Found an estimated cost of 2 for instruction: %shift 351 ; SSE41: Found an estimated cost of 2 for instruction: %shift 352 ; AVX: Found an estimated cost of 2 for instruction: %shift 353 ; AVX2: Found an estimated cost of 1 for instruction: %shift 354 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift 355 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 356 %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 357 ret <8 x i32> %shift 358 } 359 360 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { 361 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16': 362 ; SSE2: Found an estimated cost of 1 for instruction: %shift 363 ; SSE41: Found an estimated cost of 1 for instruction: %shift 364 ; AVX: Found an estimated cost of 1 for instruction: %shift 365 ; AVX2: Found an estimated cost of 1 for instruction: %shift 366 ; XOP: Found an estimated cost of 1 for instruction: %shift 367 %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 368 ret <8 x i16> %shift 369 } 370 371 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { 372 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i16': 373 ; SSE2: Found an estimated cost of 2 for instruction: %shift 374 ; SSE41: Found an estimated cost of 2 for instruction: %shift 375 ; AVX: Found an estimated cost of 2 for instruction: %shift 376 ; AVX2: Found an estimated cost of 1 for instruction: %shift 377 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift 378 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift 379 %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 380 ret <16 x i16> %shift 381 } 382 383 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { 384 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8': 385 ; SSE2: Found an estimated cost of 1 for instruction: %shift 386 ; SSE41: Found an estimated cost of 1 for instruction: %shift 387 ; AVX: Found an estimated cost of 1 for instruction: %shift 388 ; AVX2: Found an estimated cost of 1 for instruction: %shift 389 ; XOP: Found an estimated cost of 1 for instruction: %shift 390 %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 391 ret <16 x i8> %shift 392 } 393 394 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { 395 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8': 396 ; SSE2: Found an estimated cost of 2 for instruction: %shift 397 ; SSE41: Found an estimated cost of 2 for instruction: %shift 398 ; AVX: Found an estimated cost of 2 for instruction: %shift 399 ; AVX2: Found an estimated cost of 11 for instruction: %shift 400 ; XOP: Found an estimated cost of 2 for instruction: %shift 401 %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 402 ret <32 x i8> %shift 403 } 404 405 ; 406 ; Special Cases 407 ; 408 409 ; We always emit a single pmullw in the case of v8i16 vector shifts by 410 ; non-uniform constant. 411 412 define <8 x i16> @test1(<8 x i16> %a) { 413 %shl = shl <8 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11> 414 ret <8 x i16> %shl 415 } 416 ; CHECK: 'Cost Model Analysis' for function 'test1': 417 ; CHECK: Found an estimated cost of 1 for instruction: %shl 418 419 420 define <8 x i16> @test2(<8 x i16> %a) { 421 %shl = shl <8 x i16> %a, <i16 0, i16 undef, i16 0, i16 0, i16 1, i16 undef, i16 -1, i16 1> 422 ret <8 x i16> %shl 423 } 424 ; CHECK: 'Cost Model Analysis' for function 'test2': 425 ; CHECK: Found an estimated cost of 1 for instruction: %shl 426 427 428 ; With SSE4.1, v4i32 shifts can be lowered into a single pmulld instruction. 429 ; Make sure that the estimated cost is always 1 except for the case where 430 ; we only have SSE2 support. With SSE2, we are forced to special lower the 431 ; v4i32 mul as a 2x shuffle, 2x pmuludq, 2x shuffle. 432 433 define <4 x i32> @test3(<4 x i32> %a) { 434 %shl = shl <4 x i32> %a, <i32 1, i32 -1, i32 2, i32 -3> 435 ret <4 x i32> %shl 436 } 437 ; CHECK: 'Cost Model Analysis' for function 'test3': 438 ; SSE2: Found an estimated cost of 6 for instruction: %shl 439 ; SSE41: Found an estimated cost of 1 for instruction: %shl 440 ; AVX: Found an estimated cost of 1 for instruction: %shl 441 ; AVX2: Found an estimated cost of 1 for instruction: %shl 442 ; XOP: Found an estimated cost of 1 for instruction: %shl 443 444 445 define <4 x i32> @test4(<4 x i32> %a) { 446 %shl = shl <4 x i32> %a, <i32 0, i32 0, i32 1, i32 1> 447 ret <4 x i32> %shl 448 } 449 ; CHECK: 'Cost Model Analysis' for function 'test4': 450 ; SSE2: Found an estimated cost of 6 for instruction: %shl 451 ; SSE41: Found an estimated cost of 1 for instruction: %shl 452 ; AVX: Found an estimated cost of 1 for instruction: %shl 453 ; AVX2: Found an estimated cost of 1 for instruction: %shl 454 ; XOP: Found an estimated cost of 1 for instruction: %shl 455 456 457 ; On AVX2 we are able to lower the following shift into a single 458 ; vpsllvq. Therefore, the expected cost is only 1. 459 ; In all other cases, this shift is scalarized as the target does not support 460 ; vpsllv instructions. 461 462 define <2 x i64> @test5(<2 x i64> %a) { 463 %shl = shl <2 x i64> %a, <i64 2, i64 3> 464 ret <2 x i64> %shl 465 } 466 ; CHECK: 'Cost Model Analysis' for function 'test5': 467 ; SSE2: Found an estimated cost of 4 for instruction: %shl 468 ; SSE41: Found an estimated cost of 4 for instruction: %shl 469 ; AVX: Found an estimated cost of 4 for instruction: %shl 470 ; AVX2: Found an estimated cost of 1 for instruction: %shl 471 ; XOP: Found an estimated cost of 1 for instruction: %shl 472 473 474 ; v16i16 and v8i32 shift left by non-uniform constant are lowered into 475 ; vector multiply instructions. With AVX (but not AVX2), the vector multiply 476 ; is lowered into a sequence of: 1 extract + 2 vpmullw + 1 insert. 477 ; 478 ; With AVX2, instruction vpmullw works with 256bit quantities and 479 ; therefore there is no need to split the resulting vector multiply into 480 ; a sequence of two multiply. 481 ; 482 ; With SSE2 and SSE4.1, the vector shift cost for 'test6' is twice 483 ; the cost computed in the case of 'test1'. That is because the backend 484 ; simply emits 2 pmullw with no extract/insert. 485 486 487 define <16 x i16> @test6(<16 x i16> %a) { 488 %shl = shl <16 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11> 489 ret <16 x i16> %shl 490 } 491 ; CHECK: 'Cost Model Analysis' for function 'test6': 492 ; SSE2: Found an estimated cost of 2 for instruction: %shl 493 ; SSE41: Found an estimated cost of 2 for instruction: %shl 494 ; AVX: Found an estimated cost of 4 for instruction: %shl 495 ; AVX2: Found an estimated cost of 1 for instruction: %shl 496 ; XOPAVX: Found an estimated cost of 2 for instruction: %shl 497 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shl 498 499 500 ; With SSE2 and SSE4.1, the vector shift cost for 'test7' is twice 501 ; the cost computed in the case of 'test3'. That is because the multiply 502 ; is type-legalized into two 4i32 vector multiply. 503 504 define <8 x i32> @test7(<8 x i32> %a) { 505 %shl = shl <8 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3> 506 ret <8 x i32> %shl 507 } 508 ; CHECK: 'Cost Model Analysis' for function 'test7': 509 ; SSE2: Found an estimated cost of 12 for instruction: %shl 510 ; SSE41: Found an estimated cost of 2 for instruction: %shl 511 ; AVX: Found an estimated cost of 4 for instruction: %shl 512 ; AVX2: Found an estimated cost of 1 for instruction: %shl 513 ; XOPAVX: Found an estimated cost of 2 for instruction: %shl 514 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shl 515 516 517 ; On AVX2 we are able to lower the following shift into a single 518 ; vpsllvq. Therefore, the expected cost is only 1. 519 ; In all other cases, this shift is scalarized as the target does not support 520 ; vpsllv instructions. 521 522 define <4 x i64> @test8(<4 x i64> %a) { 523 %shl = shl <4 x i64> %a, <i64 1, i64 2, i64 3, i64 4> 524 ret <4 x i64> %shl 525 } 526 ; CHECK: 'Cost Model Analysis' for function 'test8': 527 ; SSE2: Found an estimated cost of 8 for instruction: %shl 528 ; SSE41: Found an estimated cost of 8 for instruction: %shl 529 ; AVX: Found an estimated cost of 8 for instruction: %shl 530 ; AVX2: Found an estimated cost of 1 for instruction: %shl 531 ; XOPAVX: Found an estimated cost of 2 for instruction: %shl 532 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shl 533 534 535 ; Same as 'test6', with the difference that the cost is double. 536 537 define <32 x i16> @test9(<32 x i16> %a) { 538 %shl = shl <32 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11> 539 ret <32 x i16> %shl 540 } 541 ; CHECK: 'Cost Model Analysis' for function 'test9': 542 ; SSE2: Found an estimated cost of 4 for instruction: %shl 543 ; SSE41: Found an estimated cost of 4 for instruction: %shl 544 ; AVX: Found an estimated cost of 8 for instruction: %shl 545 ; AVX2: Found an estimated cost of 2 for instruction: %shl 546 ; XOPAVX: Found an estimated cost of 4 for instruction: %shl 547 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shl 548 549 550 ; Same as 'test7', except that now the cost is double. 551 552 define <16 x i32> @test10(<16 x i32> %a) { 553 %shl = shl <16 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3> 554 ret <16 x i32> %shl 555 } 556 ; CHECK: 'Cost Model Analysis' for function 'test10': 557 ; SSE2: Found an estimated cost of 24 for instruction: %shl 558 ; SSE41: Found an estimated cost of 4 for instruction: %shl 559 ; AVX: Found an estimated cost of 8 for instruction: %shl 560 ; AVX2: Found an estimated cost of 2 for instruction: %shl 561 ; XOPAVX: Found an estimated cost of 4 for instruction: %shl 562 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shl 563 564 565 ; On AVX2 we are able to lower the following shift into a sequence of 566 ; two vpsllvq instructions. Therefore, the expected cost is only 2. 567 ; In all other cases, this shift is scalarized as we don't have vpsllv 568 ; instructions. 569 570 define <8 x i64> @test11(<8 x i64> %a) { 571 %shl = shl <8 x i64> %a, <i64 1, i64 1, i64 2, i64 3, i64 1, i64 1, i64 2, i64 3> 572 ret <8 x i64> %shl 573 } 574 ; CHECK: 'Cost Model Analysis' for function 'test11': 575 ; SSE2: Found an estimated cost of 16 for instruction: %shl 576 ; SSE41: Found an estimated cost of 16 for instruction: %shl 577 ; AVX: Found an estimated cost of 16 for instruction: %shl 578 ; AVX2: Found an estimated cost of 2 for instruction: %shl 579 ; XOPAVX: Found an estimated cost of 4 for instruction: %shl 580 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shl 581