1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2 ; RUN: opt < %s -instcombine -S | FileCheck %s 3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4 5 ; 6 ; ASHR - Immediate 7 ; 8 9 define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) { 10 ; CHECK-LABEL: @sse2_psrai_w_0( 11 ; CHECK-NEXT: ret <8 x i16> %v 12 ; 13 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0) 14 ret <8 x i16> %1 15 } 16 17 define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) { 18 ; CHECK-LABEL: @sse2_psrai_w_15( 19 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 20 ; CHECK-NEXT: ret <8 x i16> [[TMP1]] 21 ; 22 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15) 23 ret <8 x i16> %1 24 } 25 26 define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) { 27 ; CHECK-LABEL: @sse2_psrai_w_64( 28 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 29 ; CHECK-NEXT: ret <8 x i16> [[TMP1]] 30 ; 31 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64) 32 ret <8 x i16> %1 33 } 34 35 define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) { 36 ; CHECK-LABEL: @sse2_psrai_d_0( 37 ; CHECK-NEXT: ret <4 x i32> %v 38 ; 39 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0) 40 ret <4 x i32> %1 41 } 42 43 define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) { 44 ; CHECK-LABEL: @sse2_psrai_d_15( 45 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15> 46 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] 47 ; 48 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15) 49 ret <4 x i32> %1 50 } 51 52 define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) { 53 ; CHECK-LABEL: @sse2_psrai_d_64( 54 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31> 55 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] 56 ; 57 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64) 58 ret <4 x i32> %1 59 } 60 61 define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) { 62 ; CHECK-LABEL: @avx2_psrai_w_0( 63 ; CHECK-NEXT: ret <16 x i16> %v 64 ; 65 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0) 66 ret <16 x i16> %1 67 } 68 69 define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) { 70 ; CHECK-LABEL: @avx2_psrai_w_15( 71 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 72 ; CHECK-NEXT: ret <16 x i16> [[TMP1]] 73 ; 74 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15) 75 ret <16 x i16> %1 76 } 77 78 define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) { 79 ; CHECK-LABEL: @avx2_psrai_w_64( 80 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 81 ; CHECK-NEXT: ret <16 x i16> [[TMP1]] 82 ; 83 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64) 84 ret <16 x i16> %1 85 } 86 87 define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) { 88 ; CHECK-LABEL: @avx2_psrai_d_0( 89 ; CHECK-NEXT: ret <8 x i32> %v 90 ; 91 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0) 92 ret <8 x i32> %1 93 } 94 95 define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) { 96 ; CHECK-LABEL: @avx2_psrai_d_15( 97 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 98 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 99 ; 100 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15) 101 ret <8 x i32> %1 102 } 103 104 define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) { 105 ; CHECK-LABEL: @avx2_psrai_d_64( 106 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 107 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 108 ; 109 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64) 110 ret <8 x i32> %1 111 } 112 113 ; 114 ; LSHR - Immediate 115 ; 116 117 define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) { 118 ; CHECK-LABEL: @sse2_psrli_w_0( 119 ; CHECK-NEXT: ret <8 x i16> %v 120 ; 121 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0) 122 ret <8 x i16> %1 123 } 124 125 define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) { 126 ; CHECK-LABEL: @sse2_psrli_w_15( 127 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 128 ; CHECK-NEXT: ret <8 x i16> [[TMP1]] 129 ; 130 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15) 131 ret <8 x i16> %1 132 } 133 134 define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) { 135 ; CHECK-LABEL: @sse2_psrli_w_64( 136 ; CHECK-NEXT: ret <8 x i16> zeroinitializer 137 ; 138 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64) 139 ret <8 x i16> %1 140 } 141 142 define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) { 143 ; CHECK-LABEL: @sse2_psrli_d_0( 144 ; CHECK-NEXT: ret <4 x i32> %v 145 ; 146 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0) 147 ret <4 x i32> %1 148 } 149 150 define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) { 151 ; CHECK-LABEL: @sse2_psrli_d_15( 152 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15> 153 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] 154 ; 155 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15) 156 ret <4 x i32> %1 157 } 158 159 define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) { 160 ; CHECK-LABEL: @sse2_psrli_d_64( 161 ; CHECK-NEXT: ret <4 x i32> zeroinitializer 162 ; 163 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64) 164 ret <4 x i32> %1 165 } 166 167 define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) { 168 ; CHECK-LABEL: @sse2_psrli_q_0( 169 ; CHECK-NEXT: ret <2 x i64> %v 170 ; 171 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0) 172 ret <2 x i64> %1 173 } 174 175 define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) { 176 ; CHECK-LABEL: @sse2_psrli_q_15( 177 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 15, i64 15> 178 ; CHECK-NEXT: ret <2 x i64> [[TMP1]] 179 ; 180 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15) 181 ret <2 x i64> %1 182 } 183 184 define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) { 185 ; CHECK-LABEL: @sse2_psrli_q_64( 186 ; CHECK-NEXT: ret <2 x i64> zeroinitializer 187 ; 188 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64) 189 ret <2 x i64> %1 190 } 191 192 define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) { 193 ; CHECK-LABEL: @avx2_psrli_w_0( 194 ; CHECK-NEXT: ret <16 x i16> %v 195 ; 196 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0) 197 ret <16 x i16> %1 198 } 199 200 define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) { 201 ; CHECK-LABEL: @avx2_psrli_w_15( 202 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 203 ; CHECK-NEXT: ret <16 x i16> [[TMP1]] 204 ; 205 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15) 206 ret <16 x i16> %1 207 } 208 209 define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) { 210 ; CHECK-LABEL: @avx2_psrli_w_64( 211 ; CHECK-NEXT: ret <16 x i16> zeroinitializer 212 ; 213 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64) 214 ret <16 x i16> %1 215 } 216 217 define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) { 218 ; CHECK-LABEL: @avx2_psrli_d_0( 219 ; CHECK-NEXT: ret <8 x i32> %v 220 ; 221 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0) 222 ret <8 x i32> %1 223 } 224 225 define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) { 226 ; CHECK-LABEL: @avx2_psrli_d_15( 227 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 228 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 229 ; 230 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15) 231 ret <8 x i32> %1 232 } 233 234 define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) { 235 ; CHECK-LABEL: @avx2_psrli_d_64( 236 ; CHECK-NEXT: ret <8 x i32> zeroinitializer 237 ; 238 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64) 239 ret <8 x i32> %1 240 } 241 242 define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) { 243 ; CHECK-LABEL: @avx2_psrli_q_0( 244 ; CHECK-NEXT: ret <4 x i64> %v 245 ; 246 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0) 247 ret <4 x i64> %1 248 } 249 250 define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) { 251 ; CHECK-LABEL: @avx2_psrli_q_15( 252 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15> 253 ; CHECK-NEXT: ret <4 x i64> [[TMP1]] 254 ; 255 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15) 256 ret <4 x i64> %1 257 } 258 259 define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) { 260 ; CHECK-LABEL: @avx2_psrli_q_64( 261 ; CHECK-NEXT: ret <4 x i64> zeroinitializer 262 ; 263 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64) 264 ret <4 x i64> %1 265 } 266 267 ; 268 ; SHL - Immediate 269 ; 270 271 define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) { 272 ; CHECK-LABEL: @sse2_pslli_w_0( 273 ; CHECK-NEXT: ret <8 x i16> %v 274 ; 275 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0) 276 ret <8 x i16> %1 277 } 278 279 define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) { 280 ; CHECK-LABEL: @sse2_pslli_w_15( 281 ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 282 ; CHECK-NEXT: ret <8 x i16> [[TMP1]] 283 ; 284 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15) 285 ret <8 x i16> %1 286 } 287 288 define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) { 289 ; CHECK-LABEL: @sse2_pslli_w_64( 290 ; CHECK-NEXT: ret <8 x i16> zeroinitializer 291 ; 292 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64) 293 ret <8 x i16> %1 294 } 295 296 define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) { 297 ; CHECK-LABEL: @sse2_pslli_d_0( 298 ; CHECK-NEXT: ret <4 x i32> %v 299 ; 300 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0) 301 ret <4 x i32> %1 302 } 303 304 define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) { 305 ; CHECK-LABEL: @sse2_pslli_d_15( 306 ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15> 307 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] 308 ; 309 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15) 310 ret <4 x i32> %1 311 } 312 313 define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) { 314 ; CHECK-LABEL: @sse2_pslli_d_64( 315 ; CHECK-NEXT: ret <4 x i32> zeroinitializer 316 ; 317 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64) 318 ret <4 x i32> %1 319 } 320 321 define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) { 322 ; CHECK-LABEL: @sse2_pslli_q_0( 323 ; CHECK-NEXT: ret <2 x i64> %v 324 ; 325 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0) 326 ret <2 x i64> %1 327 } 328 329 define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) { 330 ; CHECK-LABEL: @sse2_pslli_q_15( 331 ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> %v, <i64 15, i64 15> 332 ; CHECK-NEXT: ret <2 x i64> [[TMP1]] 333 ; 334 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15) 335 ret <2 x i64> %1 336 } 337 338 define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) { 339 ; CHECK-LABEL: @sse2_pslli_q_64( 340 ; CHECK-NEXT: ret <2 x i64> zeroinitializer 341 ; 342 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64) 343 ret <2 x i64> %1 344 } 345 346 define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) { 347 ; CHECK-LABEL: @avx2_pslli_w_0( 348 ; CHECK-NEXT: ret <16 x i16> %v 349 ; 350 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0) 351 ret <16 x i16> %1 352 } 353 354 define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) { 355 ; CHECK-LABEL: @avx2_pslli_w_15( 356 ; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 357 ; CHECK-NEXT: ret <16 x i16> [[TMP1]] 358 ; 359 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15) 360 ret <16 x i16> %1 361 } 362 363 define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) { 364 ; CHECK-LABEL: @avx2_pslli_w_64( 365 ; CHECK-NEXT: ret <16 x i16> zeroinitializer 366 ; 367 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64) 368 ret <16 x i16> %1 369 } 370 371 define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) { 372 ; CHECK-LABEL: @avx2_pslli_d_0( 373 ; CHECK-NEXT: ret <8 x i32> %v 374 ; 375 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0) 376 ret <8 x i32> %1 377 } 378 379 define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) { 380 ; CHECK-LABEL: @avx2_pslli_d_15( 381 ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 382 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 383 ; 384 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15) 385 ret <8 x i32> %1 386 } 387 388 define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) { 389 ; CHECK-LABEL: @avx2_pslli_d_64( 390 ; CHECK-NEXT: ret <8 x i32> zeroinitializer 391 ; 392 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64) 393 ret <8 x i32> %1 394 } 395 396 define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) { 397 ; CHECK-LABEL: @avx2_pslli_q_0( 398 ; CHECK-NEXT: ret <4 x i64> %v 399 ; 400 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0) 401 ret <4 x i64> %1 402 } 403 404 define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) { 405 ; CHECK-LABEL: @avx2_pslli_q_15( 406 ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15> 407 ; CHECK-NEXT: ret <4 x i64> [[TMP1]] 408 ; 409 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15) 410 ret <4 x i64> %1 411 } 412 413 define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) { 414 ; CHECK-LABEL: @avx2_pslli_q_64( 415 ; CHECK-NEXT: ret <4 x i64> zeroinitializer 416 ; 417 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64) 418 ret <4 x i64> %1 419 } 420 421 ; 422 ; ASHR - Constant Vector 423 ; 424 425 define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) { 426 ; CHECK-LABEL: @sse2_psra_w_0( 427 ; CHECK-NEXT: ret <8 x i16> %v 428 ; 429 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer) 430 ret <8 x i16> %1 431 } 432 433 define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) { 434 ; CHECK-LABEL: @sse2_psra_w_15( 435 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 436 ; CHECK-NEXT: ret <8 x i16> [[TMP1]] 437 ; 438 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 439 ret <8 x i16> %1 440 } 441 442 define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) { 443 ; CHECK-LABEL: @sse2_psra_w_15_splat( 444 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 445 ; CHECK-NEXT: ret <8 x i16> [[TMP1]] 446 ; 447 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 448 ret <8 x i16> %1 449 } 450 451 define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) { 452 ; CHECK-LABEL: @sse2_psra_w_64( 453 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 454 ; CHECK-NEXT: ret <8 x i16> [[TMP1]] 455 ; 456 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 457 ret <8 x i16> %1 458 } 459 460 define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) { 461 ; CHECK-LABEL: @sse2_psra_d_0( 462 ; CHECK-NEXT: ret <4 x i32> %v 463 ; 464 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer) 465 ret <4 x i32> %1 466 } 467 468 define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) { 469 ; CHECK-LABEL: @sse2_psra_d_15( 470 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15> 471 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] 472 ; 473 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>) 474 ret <4 x i32> %1 475 } 476 477 define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) { 478 ; CHECK-LABEL: @sse2_psra_d_15_splat( 479 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31> 480 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] 481 ; 482 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 483 ret <4 x i32> %1 484 } 485 486 define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) { 487 ; CHECK-LABEL: @sse2_psra_d_64( 488 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31> 489 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] 490 ; 491 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>) 492 ret <4 x i32> %1 493 } 494 495 define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) { 496 ; CHECK-LABEL: @avx2_psra_w_0( 497 ; CHECK-NEXT: ret <16 x i16> %v 498 ; 499 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer) 500 ret <16 x i16> %1 501 } 502 503 define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) { 504 ; CHECK-LABEL: @avx2_psra_w_15( 505 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 506 ; CHECK-NEXT: ret <16 x i16> [[TMP1]] 507 ; 508 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 509 ret <16 x i16> %1 510 } 511 512 define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) { 513 ; CHECK-LABEL: @avx2_psra_w_15_splat( 514 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 515 ; CHECK-NEXT: ret <16 x i16> [[TMP1]] 516 ; 517 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 518 ret <16 x i16> %1 519 } 520 521 define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) { 522 ; CHECK-LABEL: @avx2_psra_w_64( 523 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 524 ; CHECK-NEXT: ret <16 x i16> [[TMP1]] 525 ; 526 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 527 ret <16 x i16> %1 528 } 529 530 define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) { 531 ; CHECK-LABEL: @avx2_psra_d_0( 532 ; CHECK-NEXT: ret <8 x i32> %v 533 ; 534 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer) 535 ret <8 x i32> %1 536 } 537 538 define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) { 539 ; CHECK-LABEL: @avx2_psra_d_15( 540 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 541 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 542 ; 543 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>) 544 ret <8 x i32> %1 545 } 546 547 define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) { 548 ; CHECK-LABEL: @avx2_psra_d_15_splat( 549 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 550 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 551 ; 552 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 553 ret <8 x i32> %1 554 } 555 556 define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) { 557 ; CHECK-LABEL: @avx2_psra_d_64( 558 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 559 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 560 ; 561 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>) 562 ret <8 x i32> %1 563 } 564 565 ; 566 ; LSHR - Constant Vector 567 ; 568 569 define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) { 570 ; CHECK-LABEL: @sse2_psrl_w_0( 571 ; CHECK-NEXT: ret <8 x i16> %v 572 ; 573 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer) 574 ret <8 x i16> %1 575 } 576 577 define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) { 578 ; CHECK-LABEL: @sse2_psrl_w_15( 579 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 580 ; CHECK-NEXT: ret <8 x i16> [[TMP1]] 581 ; 582 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 583 ret <8 x i16> %1 584 } 585 586 define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) { 587 ; CHECK-LABEL: @sse2_psrl_w_15_splat( 588 ; CHECK-NEXT: ret <8 x i16> zeroinitializer 589 ; 590 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 591 ret <8 x i16> %1 592 } 593 594 define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) { 595 ; CHECK-LABEL: @sse2_psrl_w_64( 596 ; CHECK-NEXT: ret <8 x i16> zeroinitializer 597 ; 598 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 599 ret <8 x i16> %1 600 } 601 602 define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) { 603 ; CHECK-LABEL: @sse2_psrl_d_0( 604 ; CHECK-NEXT: ret <4 x i32> %v 605 ; 606 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer) 607 ret <4 x i32> %1 608 } 609 610 define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) { 611 ; CHECK-LABEL: @sse2_psrl_d_15( 612 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15> 613 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] 614 ; 615 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>) 616 ret <4 x i32> %1 617 } 618 619 define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) { 620 ; CHECK-LABEL: @sse2_psrl_d_15_splat( 621 ; CHECK-NEXT: ret <4 x i32> zeroinitializer 622 ; 623 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 624 ret <4 x i32> %1 625 } 626 627 define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) { 628 ; CHECK-LABEL: @sse2_psrl_d_64( 629 ; CHECK-NEXT: ret <4 x i32> zeroinitializer 630 ; 631 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>) 632 ret <4 x i32> %1 633 } 634 635 define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) { 636 ; CHECK-LABEL: @sse2_psrl_q_0( 637 ; CHECK-NEXT: ret <2 x i64> %v 638 ; 639 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer) 640 ret <2 x i64> %1 641 } 642 643 define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) { 644 ; CHECK-LABEL: @sse2_psrl_q_15( 645 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 15, i64 15> 646 ; CHECK-NEXT: ret <2 x i64> [[TMP1]] 647 ; 648 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>) 649 ret <2 x i64> %1 650 } 651 652 define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) { 653 ; CHECK-LABEL: @sse2_psrl_q_64( 654 ; CHECK-NEXT: ret <2 x i64> zeroinitializer 655 ; 656 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>) 657 ret <2 x i64> %1 658 } 659 660 define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) { 661 ; CHECK-LABEL: @avx2_psrl_w_0( 662 ; CHECK-NEXT: ret <16 x i16> %v 663 ; 664 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer) 665 ret <16 x i16> %1 666 } 667 668 define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) { 669 ; CHECK-LABEL: @avx2_psrl_w_15( 670 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 671 ; CHECK-NEXT: ret <16 x i16> [[TMP1]] 672 ; 673 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 674 ret <16 x i16> %1 675 } 676 677 define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) { 678 ; CHECK-LABEL: @avx2_psrl_w_15_splat( 679 ; CHECK-NEXT: ret <16 x i16> zeroinitializer 680 ; 681 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 682 ret <16 x i16> %1 683 } 684 685 define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) { 686 ; CHECK-LABEL: @avx2_psrl_w_64( 687 ; CHECK-NEXT: ret <16 x i16> zeroinitializer 688 ; 689 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 690 ret <16 x i16> %1 691 } 692 693 define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) { 694 ; CHECK-LABEL: @avx2_psrl_d_0( 695 ; CHECK-NEXT: ret <8 x i32> %v 696 ; 697 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer) 698 ret <8 x i32> %1 699 } 700 701 define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) { 702 ; CHECK-LABEL: @avx2_psrl_d_15( 703 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 704 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 705 ; 706 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>) 707 ret <8 x i32> %1 708 } 709 710 define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) { 711 ; CHECK-LABEL: @avx2_psrl_d_15_splat( 712 ; CHECK-NEXT: ret <8 x i32> zeroinitializer 713 ; 714 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 715 ret <8 x i32> %1 716 } 717 718 define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) { 719 ; CHECK-LABEL: @avx2_psrl_d_64( 720 ; CHECK-NEXT: ret <8 x i32> zeroinitializer 721 ; 722 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>) 723 ret <8 x i32> %1 724 } 725 726 define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) { 727 ; CHECK-LABEL: @avx2_psrl_q_0( 728 ; CHECK-NEXT: ret <4 x i64> %v 729 ; 730 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer) 731 ret <4 x i64> %1 732 } 733 734 define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) { 735 ; CHECK-LABEL: @avx2_psrl_q_15( 736 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15> 737 ; CHECK-NEXT: ret <4 x i64> [[TMP1]] 738 ; 739 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>) 740 ret <4 x i64> %1 741 } 742 743 define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) { 744 ; CHECK-LABEL: @avx2_psrl_q_64( 745 ; CHECK-NEXT: ret <4 x i64> zeroinitializer 746 ; 747 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>) 748 ret <4 x i64> %1 749 } 750 751 ; 752 ; SHL - Constant Vector 753 ; 754 755 define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) { 756 ; CHECK-LABEL: @sse2_psll_w_0( 757 ; CHECK-NEXT: ret <8 x i16> %v 758 ; 759 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer) 760 ret <8 x i16> %1 761 } 762 763 define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) { 764 ; CHECK-LABEL: @sse2_psll_w_15( 765 ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 766 ; CHECK-NEXT: ret <8 x i16> [[TMP1]] 767 ; 768 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 769 ret <8 x i16> %1 770 } 771 772 define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) { 773 ; CHECK-LABEL: @sse2_psll_w_15_splat( 774 ; CHECK-NEXT: ret <8 x i16> zeroinitializer 775 ; 776 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 777 ret <8 x i16> %1 778 } 779 780 define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) { 781 ; CHECK-LABEL: @sse2_psll_w_64( 782 ; CHECK-NEXT: ret <8 x i16> zeroinitializer 783 ; 784 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 785 ret <8 x i16> %1 786 } 787 788 define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) { 789 ; CHECK-LABEL: @sse2_psll_d_0( 790 ; CHECK-NEXT: ret <4 x i32> %v 791 ; 792 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer) 793 ret <4 x i32> %1 794 } 795 796 define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) { 797 ; CHECK-LABEL: @sse2_psll_d_15( 798 ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15> 799 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] 800 ; 801 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>) 802 ret <4 x i32> %1 803 } 804 805 define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) { 806 ; CHECK-LABEL: @sse2_psll_d_15_splat( 807 ; CHECK-NEXT: ret <4 x i32> zeroinitializer 808 ; 809 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 810 ret <4 x i32> %1 811 } 812 813 define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) { 814 ; CHECK-LABEL: @sse2_psll_d_64( 815 ; CHECK-NEXT: ret <4 x i32> zeroinitializer 816 ; 817 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>) 818 ret <4 x i32> %1 819 } 820 821 define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) { 822 ; CHECK-LABEL: @sse2_psll_q_0( 823 ; CHECK-NEXT: ret <2 x i64> %v 824 ; 825 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer) 826 ret <2 x i64> %1 827 } 828 829 define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) { 830 ; CHECK-LABEL: @sse2_psll_q_15( 831 ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> %v, <i64 15, i64 15> 832 ; CHECK-NEXT: ret <2 x i64> [[TMP1]] 833 ; 834 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>) 835 ret <2 x i64> %1 836 } 837 838 define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) { 839 ; CHECK-LABEL: @sse2_psll_q_64( 840 ; CHECK-NEXT: ret <2 x i64> zeroinitializer 841 ; 842 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>) 843 ret <2 x i64> %1 844 } 845 846 define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) { 847 ; CHECK-LABEL: @avx2_psll_w_0( 848 ; CHECK-NEXT: ret <16 x i16> %v 849 ; 850 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer) 851 ret <16 x i16> %1 852 } 853 854 define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) { 855 ; CHECK-LABEL: @avx2_psll_w_15( 856 ; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 857 ; CHECK-NEXT: ret <16 x i16> [[TMP1]] 858 ; 859 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 860 ret <16 x i16> %1 861 } 862 863 define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) { 864 ; CHECK-LABEL: @avx2_psll_w_15_splat( 865 ; CHECK-NEXT: ret <16 x i16> zeroinitializer 866 ; 867 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 868 ret <16 x i16> %1 869 } 870 871 define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) { 872 ; CHECK-LABEL: @avx2_psll_w_64( 873 ; CHECK-NEXT: ret <16 x i16> zeroinitializer 874 ; 875 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 876 ret <16 x i16> %1 877 } 878 879 define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) { 880 ; CHECK-LABEL: @avx2_psll_d_0( 881 ; CHECK-NEXT: ret <8 x i32> %v 882 ; 883 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer) 884 ret <8 x i32> %1 885 } 886 887 define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) { 888 ; CHECK-LABEL: @avx2_psll_d_15( 889 ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 890 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 891 ; 892 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>) 893 ret <8 x i32> %1 894 } 895 896 define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) { 897 ; CHECK-LABEL: @avx2_psll_d_15_splat( 898 ; CHECK-NEXT: ret <8 x i32> zeroinitializer 899 ; 900 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 901 ret <8 x i32> %1 902 } 903 904 define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) { 905 ; CHECK-LABEL: @avx2_psll_d_64( 906 ; CHECK-NEXT: ret <8 x i32> zeroinitializer 907 ; 908 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>) 909 ret <8 x i32> %1 910 } 911 912 define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) { 913 ; CHECK-LABEL: @avx2_psll_q_0( 914 ; CHECK-NEXT: ret <4 x i64> %v 915 ; 916 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer) 917 ret <4 x i64> %1 918 } 919 920 define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) { 921 ; CHECK-LABEL: @avx2_psll_q_15( 922 ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15> 923 ; CHECK-NEXT: ret <4 x i64> [[TMP1]] 924 ; 925 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>) 926 ret <4 x i64> %1 927 } 928 929 define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) { 930 ; CHECK-LABEL: @avx2_psll_q_64( 931 ; CHECK-NEXT: ret <4 x i64> zeroinitializer 932 ; 933 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>) 934 ret <4 x i64> %1 935 } 936 937 ; 938 ; ASHR - Constant Per-Element Vector 939 ; 940 941 define <4 x i32> @avx2_psrav_d_128_0(<4 x i32> %v) { 942 ; CHECK-LABEL: @avx2_psrav_d_128_0( 943 ; CHECK-NEXT: ret <4 x i32> %v 944 ; 945 %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> zeroinitializer) 946 ret <4 x i32> %1 947 } 948 949 define <8 x i32> @avx2_psrav_d_256_0(<8 x i32> %v) { 950 ; CHECK-LABEL: @avx2_psrav_d_256_0( 951 ; CHECK-NEXT: ret <8 x i32> %v 952 ; 953 %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) 954 ret <8 x i32> %1 955 } 956 957 define <4 x i32> @avx2_psrav_d_128_var(<4 x i32> %v) { 958 ; CHECK-LABEL: @avx2_psrav_d_128_var( 959 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31> 960 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] 961 ; 962 %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>) 963 ret <4 x i32> %1 964 } 965 966 define <8 x i32> @avx2_psrav_d_256_var(<8 x i32> %v) { 967 ; CHECK-LABEL: @avx2_psrav_d_256_var( 968 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0> 969 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 970 ; 971 %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>) 972 ret <8 x i32> %1 973 } 974 975 define <4 x i32> @avx2_psrav_d_128_allbig(<4 x i32> %v) { 976 ; CHECK-LABEL: @avx2_psrav_d_128_allbig( 977 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 undef> 978 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] 979 ; 980 %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>) 981 ret <4 x i32> %1 982 } 983 984 define <8 x i32> @avx2_psrav_d_256_allbig(<8 x i32> %v) { 985 ; CHECK-LABEL: @avx2_psrav_d_256_allbig( 986 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 987 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 988 ; 989 %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>) 990 ret <8 x i32> %1 991 } 992 993 define <4 x i32> @avx2_psrav_d_128_undef(<4 x i32> %v) { 994 ; CHECK-LABEL: @avx2_psrav_d_128_undef( 995 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31> 996 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] 997 ; 998 %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 64>, i32 undef, i32 0 999 %2 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %1) 1000 ret <4 x i32> %2 1001 } 1002 1003 define <8 x i32> @avx2_psrav_d_256_undef(<8 x i32> %v) { 1004 ; CHECK-LABEL: @avx2_psrav_d_256_undef( 1005 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0> 1006 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 1007 ; 1008 %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>, i32 undef, i32 1 1009 %2 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> %1) 1010 ret <8 x i32> %2 1011 } 1012 1013 ; 1014 ; LSHR - Constant Per-Element Vector 1015 ; 1016 1017 define <4 x i32> @avx2_psrlv_d_128_0(<4 x i32> %v) { 1018 ; CHECK-LABEL: @avx2_psrlv_d_128_0( 1019 ; CHECK-NEXT: ret <4 x i32> %v 1020 ; 1021 %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> zeroinitializer) 1022 ret <4 x i32> %1 1023 } 1024 1025 define <8 x i32> @avx2_psrlv_d_256_0(<8 x i32> %v) { 1026 ; CHECK-LABEL: @avx2_psrlv_d_256_0( 1027 ; CHECK-NEXT: ret <8 x i32> %v 1028 ; 1029 %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) 1030 ret <8 x i32> %1 1031 } 1032 1033 define <4 x i32> @avx2_psrlv_d_128_var(<4 x i32> %v) { 1034 ; CHECK-LABEL: @avx2_psrlv_d_128_var( 1035 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31> 1036 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] 1037 ; 1038 %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>) 1039 ret <4 x i32> %1 1040 } 1041 1042 define <8 x i32> @avx2_psrlv_d_256_var(<8 x i32> %v) { 1043 ; CHECK-LABEL: @avx2_psrlv_d_256_var( 1044 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0> 1045 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 1046 ; 1047 %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>) 1048 ret <8 x i32> %1 1049 } 1050 1051 define <4 x i32> @avx2_psrlv_d_128_big(<4 x i32> %v) { 1052 ; CHECK-LABEL: @avx2_psrlv_d_128_big( 1053 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>) 1054 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] 1055 ; 1056 %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>) 1057 ret <4 x i32> %1 1058 } 1059 1060 define <8 x i32> @avx2_psrlv_d_256_big(<8 x i32> %v) { 1061 ; CHECK-LABEL: @avx2_psrlv_d_256_big( 1062 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>) 1063 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 1064 ; 1065 %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>) 1066 ret <8 x i32> %1 1067 } 1068 1069 define <4 x i32> @avx2_psrlv_d_128_allbig(<4 x i32> %v) { 1070 ; CHECK-LABEL: @avx2_psrlv_d_128_allbig( 1071 ; CHECK-NEXT: ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef> 1072 ; 1073 %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>) 1074 ret <4 x i32> %1 1075 } 1076 1077 define <8 x i32> @avx2_psrlv_d_256_allbig(<8 x i32> %v) { 1078 ; CHECK-LABEL: @avx2_psrlv_d_256_allbig( 1079 ; CHECK-NEXT: ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1080 ; 1081 %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>) 1082 ret <8 x i32> %1 1083 } 1084 1085 define <4 x i32> @avx2_psrlv_d_128_undef(<4 x i32> %v) { 1086 ; CHECK-LABEL: @avx2_psrlv_d_128_undef( 1087 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31> 1088 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] 1089 ; 1090 %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0 1091 %2 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> %1) 1092 ret <4 x i32> %2 1093 } 1094 1095 define <8 x i32> @avx2_psrlv_d_256_undef(<8 x i32> %v) { 1096 ; CHECK-LABEL: @avx2_psrlv_d_256_undef( 1097 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0> 1098 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 1099 ; 1100 %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1 1101 %2 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> %1) 1102 ret <8 x i32> %2 1103 } 1104 1105 define <2 x i64> @avx2_psrlv_q_128_0(<2 x i64> %v) { 1106 ; CHECK-LABEL: @avx2_psrlv_q_128_0( 1107 ; CHECK-NEXT: ret <2 x i64> %v 1108 ; 1109 %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> zeroinitializer) 1110 ret <2 x i64> %1 1111 } 1112 1113 define <4 x i64> @avx2_psrlv_q_256_0(<4 x i64> %v) { 1114 ; CHECK-LABEL: @avx2_psrlv_q_256_0( 1115 ; CHECK-NEXT: ret <4 x i64> %v 1116 ; 1117 %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer) 1118 ret <4 x i64> %1 1119 } 1120 1121 define <2 x i64> @avx2_psrlv_q_128_var(<2 x i64> %v) { 1122 ; CHECK-LABEL: @avx2_psrlv_q_128_var( 1123 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 0, i64 8> 1124 ; CHECK-NEXT: ret <2 x i64> [[TMP1]] 1125 ; 1126 %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>) 1127 ret <2 x i64> %1 1128 } 1129 1130 define <4 x i64> @avx2_psrlv_q_256_var(<4 x i64> %v) { 1131 ; CHECK-LABEL: @avx2_psrlv_q_256_var( 1132 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 0, i64 8, i64 16, i64 31> 1133 ; CHECK-NEXT: ret <4 x i64> [[TMP1]] 1134 ; 1135 %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>) 1136 ret <4 x i64> %1 1137 } 1138 1139 define <2 x i64> @avx2_psrlv_q_128_big(<2 x i64> %v) { 1140 ; CHECK-LABEL: @avx2_psrlv_q_128_big( 1141 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>) 1142 ; CHECK-NEXT: ret <2 x i64> [[TMP1]] 1143 ; 1144 %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>) 1145 ret <2 x i64> %1 1146 } 1147 1148 define <4 x i64> @avx2_psrlv_q_256_big(<4 x i64> %v) { 1149 ; CHECK-LABEL: @avx2_psrlv_q_256_big( 1150 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>) 1151 ; CHECK-NEXT: ret <4 x i64> [[TMP1]] 1152 ; 1153 %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>) 1154 ret <4 x i64> %1 1155 } 1156 1157 define <2 x i64> @avx2_psrlv_q_128_allbig(<2 x i64> %v) { 1158 ; CHECK-LABEL: @avx2_psrlv_q_128_allbig( 1159 ; CHECK-NEXT: ret <2 x i64> zeroinitializer 1160 ; 1161 %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>) 1162 ret <2 x i64> %1 1163 } 1164 1165 define <4 x i64> @avx2_psrlv_q_256_allbig(<4 x i64> %v) { 1166 ; CHECK-LABEL: @avx2_psrlv_q_256_allbig( 1167 ; CHECK-NEXT: ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0> 1168 ; 1169 %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>) 1170 ret <4 x i64> %1 1171 } 1172 1173 define <2 x i64> @avx2_psrlv_q_128_undef(<2 x i64> %v) { 1174 ; CHECK-LABEL: @avx2_psrlv_q_128_undef( 1175 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 0, i64 undef> 1176 ; CHECK-NEXT: ret <2 x i64> [[TMP1]] 1177 ; 1178 %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 1 1179 %2 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> %1) 1180 ret <2 x i64> %2 1181 } 1182 1183 define <4 x i64> @avx2_psrlv_q_256_undef(<4 x i64> %v) { 1184 ; CHECK-LABEL: @avx2_psrlv_q_256_undef( 1185 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 undef, i64 8, i64 16, i64 31> 1186 ; CHECK-NEXT: ret <4 x i64> [[TMP1]] 1187 ; 1188 %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0 1189 %2 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> %1) 1190 ret <4 x i64> %2 1191 } 1192 1193 ; 1194 ; SHL - Constant Per-Element Vector 1195 ; 1196 1197 define <4 x i32> @avx2_psllv_d_128_0(<4 x i32> %v) { 1198 ; CHECK-LABEL: @avx2_psllv_d_128_0( 1199 ; CHECK-NEXT: ret <4 x i32> %v 1200 ; 1201 %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> zeroinitializer) 1202 ret <4 x i32> %1 1203 } 1204 1205 define <8 x i32> @avx2_psllv_d_256_0(<8 x i32> %v) { 1206 ; CHECK-LABEL: @avx2_psllv_d_256_0( 1207 ; CHECK-NEXT: ret <8 x i32> %v 1208 ; 1209 %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) 1210 ret <8 x i32> %1 1211 } 1212 1213 define <4 x i32> @avx2_psllv_d_128_var(<4 x i32> %v) { 1214 ; CHECK-LABEL: @avx2_psllv_d_128_var( 1215 ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31> 1216 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] 1217 ; 1218 %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>) 1219 ret <4 x i32> %1 1220 } 1221 1222 define <8 x i32> @avx2_psllv_d_256_var(<8 x i32> %v) { 1223 ; CHECK-LABEL: @avx2_psllv_d_256_var( 1224 ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0> 1225 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 1226 ; 1227 %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>) 1228 ret <8 x i32> %1 1229 } 1230 1231 define <4 x i32> @avx2_psllv_d_128_big(<4 x i32> %v) { 1232 ; CHECK-LABEL: @avx2_psllv_d_128_big( 1233 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>) 1234 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] 1235 ; 1236 %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>) 1237 ret <4 x i32> %1 1238 } 1239 1240 define <8 x i32> @avx2_psllv_d_256_big(<8 x i32> %v) { 1241 ; CHECK-LABEL: @avx2_psllv_d_256_big( 1242 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>) 1243 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 1244 ; 1245 %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>) 1246 ret <8 x i32> %1 1247 } 1248 1249 define <4 x i32> @avx2_psllv_d_128_allbig(<4 x i32> %v) { 1250 ; CHECK-LABEL: @avx2_psllv_d_128_allbig( 1251 ; CHECK-NEXT: ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef> 1252 ; 1253 %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>) 1254 ret <4 x i32> %1 1255 } 1256 1257 define <8 x i32> @avx2_psllv_d_256_allbig(<8 x i32> %v) { 1258 ; CHECK-LABEL: @avx2_psllv_d_256_allbig( 1259 ; CHECK-NEXT: ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1260 ; 1261 %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>) 1262 ret <8 x i32> %1 1263 } 1264 1265 define <4 x i32> @avx2_psllv_d_128_undef(<4 x i32> %v) { 1266 ; CHECK-LABEL: @avx2_psllv_d_128_undef( 1267 ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31> 1268 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] 1269 ; 1270 %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0 1271 %2 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> %1) 1272 ret <4 x i32> %2 1273 } 1274 1275 define <8 x i32> @avx2_psllv_d_256_undef(<8 x i32> %v) { 1276 ; CHECK-LABEL: @avx2_psllv_d_256_undef( 1277 ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0> 1278 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 1279 ; 1280 %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1 1281 %2 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> %1) 1282 ret <8 x i32> %2 1283 } 1284 1285 define <2 x i64> @avx2_psllv_q_128_0(<2 x i64> %v) { 1286 ; CHECK-LABEL: @avx2_psllv_q_128_0( 1287 ; CHECK-NEXT: ret <2 x i64> %v 1288 ; 1289 %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> zeroinitializer) 1290 ret <2 x i64> %1 1291 } 1292 1293 define <4 x i64> @avx2_psllv_q_256_0(<4 x i64> %v) { 1294 ; CHECK-LABEL: @avx2_psllv_q_256_0( 1295 ; CHECK-NEXT: ret <4 x i64> %v 1296 ; 1297 %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer) 1298 ret <4 x i64> %1 1299 } 1300 1301 define <2 x i64> @avx2_psllv_q_128_var(<2 x i64> %v) { 1302 ; CHECK-LABEL: @avx2_psllv_q_128_var( 1303 ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> %v, <i64 0, i64 8> 1304 ; CHECK-NEXT: ret <2 x i64> [[TMP1]] 1305 ; 1306 %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>) 1307 ret <2 x i64> %1 1308 } 1309 1310 define <4 x i64> @avx2_psllv_q_256_var(<4 x i64> %v) { 1311 ; CHECK-LABEL: @avx2_psllv_q_256_var( 1312 ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> %v, <i64 0, i64 8, i64 16, i64 31> 1313 ; CHECK-NEXT: ret <4 x i64> [[TMP1]] 1314 ; 1315 %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>) 1316 ret <4 x i64> %1 1317 } 1318 1319 define <2 x i64> @avx2_psllv_q_128_big(<2 x i64> %v) { 1320 ; CHECK-LABEL: @avx2_psllv_q_128_big( 1321 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>) 1322 ; CHECK-NEXT: ret <2 x i64> [[TMP1]] 1323 ; 1324 %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>) 1325 ret <2 x i64> %1 1326 } 1327 1328 define <4 x i64> @avx2_psllv_q_256_big(<4 x i64> %v) { 1329 ; CHECK-LABEL: @avx2_psllv_q_256_big( 1330 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>) 1331 ; CHECK-NEXT: ret <4 x i64> [[TMP1]] 1332 ; 1333 %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>) 1334 ret <4 x i64> %1 1335 } 1336 1337 define <2 x i64> @avx2_psllv_q_128_allbig(<2 x i64> %v) { 1338 ; CHECK-LABEL: @avx2_psllv_q_128_allbig( 1339 ; CHECK-NEXT: ret <2 x i64> zeroinitializer 1340 ; 1341 %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>) 1342 ret <2 x i64> %1 1343 } 1344 1345 define <4 x i64> @avx2_psllv_q_256_allbig(<4 x i64> %v) { 1346 ; CHECK-LABEL: @avx2_psllv_q_256_allbig( 1347 ; CHECK-NEXT: ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0> 1348 ; 1349 %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>) 1350 ret <4 x i64> %1 1351 } 1352 1353 define <2 x i64> @avx2_psllv_q_128_undef(<2 x i64> %v) { 1354 ; CHECK-LABEL: @avx2_psllv_q_128_undef( 1355 ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> %v, <i64 0, i64 undef> 1356 ; CHECK-NEXT: ret <2 x i64> [[TMP1]] 1357 ; 1358 %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 1 1359 %2 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> %1) 1360 ret <2 x i64> %2 1361 } 1362 1363 define <4 x i64> @avx2_psllv_q_256_undef(<4 x i64> %v) { 1364 ; CHECK-LABEL: @avx2_psllv_q_256_undef( 1365 ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> %v, <i64 undef, i64 8, i64 16, i64 31> 1366 ; CHECK-NEXT: ret <4 x i64> [[TMP1]] 1367 ; 1368 %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0 1369 %2 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> %1) 1370 ret <4 x i64> %2 1371 } 1372 1373 ; 1374 ; Vector Demanded Bits 1375 ; 1376 1377 define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) { 1378 ; CHECK-LABEL: @sse2_psra_w_var( 1379 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %a) 1380 ; CHECK-NEXT: ret <8 x i16> [[TMP1]] 1381 ; 1382 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1383 %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1) 1384 ret <8 x i16> %2 1385 } 1386 1387 define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) { 1388 ; CHECK-LABEL: @sse2_psra_w_var_bc( 1389 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> %a to <8 x i16> 1390 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> [[TMP1]]) 1391 ; CHECK-NEXT: ret <8 x i16> [[TMP2]] 1392 ; 1393 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 1394 %2 = bitcast <2 x i64> %1 to <8 x i16> 1395 %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %2) 1396 ret <8 x i16> %3 1397 } 1398 1399 define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) { 1400 ; CHECK-LABEL: @sse2_psra_d_var( 1401 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %a) 1402 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] 1403 ; 1404 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1405 %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1) 1406 ret <4 x i32> %2 1407 } 1408 1409 define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) { 1410 ; CHECK-LABEL: @sse2_psra_d_var_bc( 1411 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> %a to <4 x i32> 1412 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> [[TMP1]]) 1413 ; CHECK-NEXT: ret <4 x i32> [[TMP2]] 1414 ; 1415 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1416 %2 = bitcast <8 x i16> %1 to <4 x i32> 1417 %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %2) 1418 ret <4 x i32> %3 1419 } 1420 1421 define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) { 1422 ; CHECK-LABEL: @avx2_psra_w_var( 1423 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %a) 1424 ; CHECK-NEXT: ret <16 x i16> [[TMP1]] 1425 ; 1426 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1427 %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1) 1428 ret <16 x i16> %2 1429 } 1430 1431 define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) { 1432 ; CHECK-LABEL: @avx2_psra_d_var( 1433 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %a) 1434 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 1435 ; 1436 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1437 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1) 1438 ret <8 x i32> %2 1439 } 1440 1441 define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) { 1442 ; CHECK-LABEL: @sse2_psrl_w_var( 1443 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %a) 1444 ; CHECK-NEXT: ret <8 x i16> [[TMP1]] 1445 ; 1446 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1447 %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1) 1448 ret <8 x i16> %2 1449 } 1450 1451 define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) { 1452 ; CHECK-LABEL: @sse2_psrl_d_var( 1453 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %a) 1454 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] 1455 ; 1456 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1457 %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1) 1458 ret <4 x i32> %2 1459 } 1460 1461 define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) { 1462 ; CHECK-LABEL: @sse2_psrl_q_var( 1463 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %a) 1464 ; CHECK-NEXT: ret <2 x i64> [[TMP1]] 1465 ; 1466 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 1467 %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1) 1468 ret <2 x i64> %2 1469 } 1470 1471 define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) { 1472 ; CHECK-LABEL: @avx2_psrl_w_var( 1473 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %a) 1474 ; CHECK-NEXT: ret <16 x i16> [[TMP1]] 1475 ; 1476 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1477 %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1) 1478 ret <16 x i16> %2 1479 } 1480 1481 define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) { 1482 ; CHECK-LABEL: @avx2_psrl_w_var_bc( 1483 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> %a to <8 x i16> 1484 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> [[TMP1]]) 1485 ; CHECK-NEXT: ret <16 x i16> [[TMP2]] 1486 ; 1487 %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1488 %2 = bitcast <16 x i8> %1 to <8 x i16> 1489 %3 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %2) 1490 ret <16 x i16> %3 1491 } 1492 1493 define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) { 1494 ; CHECK-LABEL: @avx2_psrl_d_var( 1495 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %a) 1496 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 1497 ; 1498 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1499 %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1) 1500 ret <8 x i32> %2 1501 } 1502 1503 define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) { 1504 ; CHECK-LABEL: @avx2_psrl_d_var_bc( 1505 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> %a to <4 x i32> 1506 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> [[TMP1]]) 1507 ; CHECK-NEXT: ret <8 x i32> [[TMP2]] 1508 ; 1509 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 1510 %2 = bitcast <2 x i64> %1 to <4 x i32> 1511 %3 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %2) 1512 ret <8 x i32> %3 1513 } 1514 1515 define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) { 1516 ; CHECK-LABEL: @avx2_psrl_q_var( 1517 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %a) 1518 ; CHECK-NEXT: ret <4 x i64> [[TMP1]] 1519 ; 1520 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 1521 %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1) 1522 ret <4 x i64> %2 1523 } 1524 1525 define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) { 1526 ; CHECK-LABEL: @sse2_psll_w_var( 1527 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %a) 1528 ; CHECK-NEXT: ret <8 x i16> [[TMP1]] 1529 ; 1530 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1531 %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1) 1532 ret <8 x i16> %2 1533 } 1534 1535 define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) { 1536 ; CHECK-LABEL: @sse2_psll_d_var( 1537 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %a) 1538 ; CHECK-NEXT: ret <4 x i32> [[TMP1]] 1539 ; 1540 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1541 %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1) 1542 ret <4 x i32> %2 1543 } 1544 1545 define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) { 1546 ; CHECK-LABEL: @sse2_psll_q_var( 1547 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %a) 1548 ; CHECK-NEXT: ret <2 x i64> [[TMP1]] 1549 ; 1550 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 1551 %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1) 1552 ret <2 x i64> %2 1553 } 1554 1555 define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) { 1556 ; CHECK-LABEL: @avx2_psll_w_var( 1557 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %a) 1558 ; CHECK-NEXT: ret <16 x i16> [[TMP1]] 1559 ; 1560 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1561 %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1) 1562 ret <16 x i16> %2 1563 } 1564 1565 define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) { 1566 ; CHECK-LABEL: @avx2_psll_d_var( 1567 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %a) 1568 ; CHECK-NEXT: ret <8 x i32> [[TMP1]] 1569 ; 1570 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1571 %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1) 1572 ret <8 x i32> %2 1573 } 1574 1575 define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) { 1576 ; CHECK-LABEL: @avx2_psll_q_var( 1577 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %a) 1578 ; CHECK-NEXT: ret <4 x i64> [[TMP1]] 1579 ; 1580 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 1581 %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1) 1582 ret <4 x i64> %2 1583 } 1584 1585 ; 1586 ; Constant Folding 1587 ; 1588 1589 define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) { 1590 ; CHECK-LABEL: @test_sse2_psra_w_0( 1591 ; CHECK-NEXT: ret <8 x i16> %A 1592 ; 1593 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0) 1594 %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>) 1595 %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0) 1596 ret <8 x i16> %3 1597 } 1598 1599 define <8 x i16> @test_sse2_psra_w_8() { 1600 ; CHECK-LABEL: @test_sse2_psra_w_8( 1601 ; CHECK-NEXT: ret <8 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16> 1602 ; 1603 %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <8 x i16> 1604 %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3) 1605 %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>) 1606 %4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2) 1607 ret <8 x i16> %4 1608 } 1609 1610 define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) { 1611 ; CHECK-LABEL: @test_sse2_psra_d_0( 1612 ; CHECK-NEXT: ret <4 x i32> %A 1613 ; 1614 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0) 1615 %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>) 1616 %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0) 1617 ret <4 x i32> %3 1618 } 1619 1620 define <4 x i32> @sse2_psra_d_8() { 1621 ; CHECK-LABEL: @sse2_psra_d_8( 1622 ; CHECK-NEXT: ret <4 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608> 1623 ; 1624 %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <4 x i32> 1625 %2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3) 1626 %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>) 1627 %4 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2) 1628 ret <4 x i32> %4 1629 } 1630 1631 define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) { 1632 ; CHECK-LABEL: @test_avx2_psra_w_0( 1633 ; CHECK-NEXT: ret <16 x i16> %A 1634 ; 1635 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0) 1636 %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>) 1637 %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0) 1638 ret <16 x i16> %3 1639 } 1640 1641 define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) { 1642 ; CHECK-LABEL: @test_avx2_psra_w_8( 1643 ; CHECK-NEXT: ret <16 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16> 1644 ; 1645 %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i16> 1646 %2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3) 1647 %3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>) 1648 %4 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %3, i32 2) 1649 ret <16 x i16> %4 1650 } 1651 1652 define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) { 1653 ; CHECK-LABEL: @test_avx2_psra_d_0( 1654 ; CHECK-NEXT: ret <8 x i32> %A 1655 ; 1656 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0) 1657 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>) 1658 %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0) 1659 ret <8 x i32> %3 1660 } 1661 1662 define <8 x i32> @test_avx2_psra_d_8() { 1663 ; CHECK-LABEL: @test_avx2_psra_d_8( 1664 ; CHECK-NEXT: ret <8 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608> 1665 ; 1666 %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <8 x i32> 1667 %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3) 1668 %3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>) 1669 %4 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %3, i32 2) 1670 ret <8 x i32> %4 1671 } 1672 1673 ; 1674 ; Old Tests 1675 ; 1676 1677 define <2 x i64> @test_sse2_1() { 1678 ; CHECK-LABEL: @test_sse2_1( 1679 ; CHECK-NEXT: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624> 1680 ; 1681 %S = bitcast i32 1 to i32 1682 %1 = zext i32 %S to i64 1683 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 1684 %3 = insertelement <2 x i64> %2, i64 0, i32 1 1685 %4 = bitcast <2 x i64> %3 to <8 x i16> 1686 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4) 1687 %6 = bitcast <8 x i16> %5 to <4 x i32> 1688 %7 = bitcast <2 x i64> %3 to <4 x i32> 1689 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7) 1690 %9 = bitcast <4 x i32> %8 to <2 x i64> 1691 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3) 1692 %11 = bitcast <2 x i64> %10 to <8 x i16> 1693 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S) 1694 %13 = bitcast <8 x i16> %12 to <4 x i32> 1695 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S) 1696 %15 = bitcast <4 x i32> %14 to <2 x i64> 1697 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S) 1698 ret <2 x i64> %16 1699 } 1700 1701 define <4 x i64> @test_avx2_1() { 1702 ; CHECK-LABEL: @test_avx2_1( 1703 ; CHECK-NEXT: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256> 1704 ; 1705 %S = bitcast i32 1 to i32 1706 %1 = zext i32 %S to i64 1707 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 1708 %3 = insertelement <2 x i64> %2, i64 0, i32 1 1709 %4 = bitcast <2 x i64> %3 to <8 x i16> 1710 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4) 1711 %6 = bitcast <16 x i16> %5 to <8 x i32> 1712 %7 = bitcast <2 x i64> %3 to <4 x i32> 1713 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7) 1714 %9 = bitcast <8 x i32> %8 to <4 x i64> 1715 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3) 1716 %11 = bitcast <4 x i64> %10 to <16 x i16> 1717 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S) 1718 %13 = bitcast <16 x i16> %12 to <8 x i32> 1719 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S) 1720 %15 = bitcast <8 x i32> %14 to <4 x i64> 1721 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S) 1722 ret <4 x i64> %16 1723 } 1724 1725 define <2 x i64> @test_sse2_0() { 1726 ; CHECK-LABEL: @test_sse2_0( 1727 ; CHECK-NEXT: ret <2 x i64> zeroinitializer 1728 ; 1729 %S = bitcast i32 128 to i32 1730 %1 = zext i32 %S to i64 1731 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 1732 %3 = insertelement <2 x i64> %2, i64 0, i32 1 1733 %4 = bitcast <2 x i64> %3 to <8 x i16> 1734 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4) 1735 %6 = bitcast <8 x i16> %5 to <4 x i32> 1736 %7 = bitcast <2 x i64> %3 to <4 x i32> 1737 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7) 1738 %9 = bitcast <4 x i32> %8 to <2 x i64> 1739 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3) 1740 %11 = bitcast <2 x i64> %10 to <8 x i16> 1741 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S) 1742 %13 = bitcast <8 x i16> %12 to <4 x i32> 1743 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S) 1744 %15 = bitcast <4 x i32> %14 to <2 x i64> 1745 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S) 1746 ret <2 x i64> %16 1747 } 1748 1749 define <4 x i64> @test_avx2_0() { 1750 ; CHECK-LABEL: @test_avx2_0( 1751 ; CHECK-NEXT: ret <4 x i64> zeroinitializer 1752 ; 1753 %S = bitcast i32 128 to i32 1754 %1 = zext i32 %S to i64 1755 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 1756 %3 = insertelement <2 x i64> %2, i64 0, i32 1 1757 %4 = bitcast <2 x i64> %3 to <8 x i16> 1758 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4) 1759 %6 = bitcast <16 x i16> %5 to <8 x i32> 1760 %7 = bitcast <2 x i64> %3 to <4 x i32> 1761 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7) 1762 %9 = bitcast <8 x i32> %8 to <4 x i64> 1763 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3) 1764 %11 = bitcast <4 x i64> %10 to <16 x i16> 1765 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S) 1766 %13 = bitcast <16 x i16> %12 to <8 x i32> 1767 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S) 1768 %15 = bitcast <8 x i32> %14 to <4 x i64> 1769 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S) 1770 ret <4 x i64> %16 1771 } 1772 define <2 x i64> @test_sse2_psrl_1() { 1773 ; CHECK-LABEL: @test_sse2_psrl_1( 1774 ; CHECK-NEXT: ret <2 x i64> <i64 562954248421376, i64 9007267974742020> 1775 ; 1776 %S = bitcast i32 1 to i32 1777 %1 = zext i32 %S to i64 1778 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 1779 %3 = insertelement <2 x i64> %2, i64 0, i32 1 1780 %4 = bitcast <2 x i64> %3 to <8 x i16> 1781 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4) 1782 %6 = bitcast <8 x i16> %5 to <4 x i32> 1783 %7 = bitcast <2 x i64> %3 to <4 x i32> 1784 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7) 1785 %9 = bitcast <4 x i32> %8 to <2 x i64> 1786 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3) 1787 %11 = bitcast <2 x i64> %10 to <8 x i16> 1788 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S) 1789 %13 = bitcast <8 x i16> %12 to <4 x i32> 1790 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S) 1791 %15 = bitcast <4 x i32> %14 to <2 x i64> 1792 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S) 1793 ret <2 x i64> %16 1794 } 1795 1796 define <4 x i64> @test_avx2_psrl_1() { 1797 ; CHECK-LABEL: @test_avx2_psrl_1( 1798 ; CHECK-NEXT: ret <4 x i64> <i64 16, i64 32, i64 64, i64 128> 1799 ; 1800 %S = bitcast i32 1 to i32 1801 %1 = zext i32 %S to i64 1802 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 1803 %3 = insertelement <2 x i64> %2, i64 0, i32 1 1804 %4 = bitcast <2 x i64> %3 to <8 x i16> 1805 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4) 1806 %6 = bitcast <16 x i16> %5 to <8 x i32> 1807 %7 = bitcast <2 x i64> %3 to <4 x i32> 1808 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7) 1809 %9 = bitcast <8 x i32> %8 to <4 x i64> 1810 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3) 1811 %11 = bitcast <4 x i64> %10 to <16 x i16> 1812 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S) 1813 %13 = bitcast <16 x i16> %12 to <8 x i32> 1814 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S) 1815 %15 = bitcast <8 x i32> %14 to <4 x i64> 1816 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S) 1817 ret <4 x i64> %16 1818 } 1819 1820 define <2 x i64> @test_sse2_psrl_0() { 1821 ; CHECK-LABEL: @test_sse2_psrl_0( 1822 ; CHECK-NEXT: ret <2 x i64> zeroinitializer 1823 ; 1824 %S = bitcast i32 128 to i32 1825 %1 = zext i32 %S to i64 1826 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 1827 %3 = insertelement <2 x i64> %2, i64 0, i32 1 1828 %4 = bitcast <2 x i64> %3 to <8 x i16> 1829 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4) 1830 %6 = bitcast <8 x i16> %5 to <4 x i32> 1831 %7 = bitcast <2 x i64> %3 to <4 x i32> 1832 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7) 1833 %9 = bitcast <4 x i32> %8 to <2 x i64> 1834 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3) 1835 %11 = bitcast <2 x i64> %10 to <8 x i16> 1836 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S) 1837 %13 = bitcast <8 x i16> %12 to <4 x i32> 1838 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S) 1839 %15 = bitcast <4 x i32> %14 to <2 x i64> 1840 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S) 1841 ret <2 x i64> %16 1842 } 1843 1844 define <4 x i64> @test_avx2_psrl_0() { 1845 ; CHECK-LABEL: @test_avx2_psrl_0( 1846 ; CHECK-NEXT: ret <4 x i64> zeroinitializer 1847 ; 1848 %S = bitcast i32 128 to i32 1849 %1 = zext i32 %S to i64 1850 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 1851 %3 = insertelement <2 x i64> %2, i64 0, i32 1 1852 %4 = bitcast <2 x i64> %3 to <8 x i16> 1853 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4) 1854 %6 = bitcast <16 x i16> %5 to <8 x i32> 1855 %7 = bitcast <2 x i64> %3 to <4 x i32> 1856 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7) 1857 %9 = bitcast <8 x i32> %8 to <4 x i64> 1858 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3) 1859 %11 = bitcast <4 x i64> %10 to <16 x i16> 1860 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S) 1861 %13 = bitcast <16 x i16> %12 to <8 x i32> 1862 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S) 1863 %15 = bitcast <8 x i32> %14 to <4 x i64> 1864 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S) 1865 ret <4 x i64> %16 1866 } 1867 1868 declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1 1869 declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1 1870 declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1 1871 declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1 1872 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1 1873 declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1 1874 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1 1875 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1 1876 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1 1877 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1 1878 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1 1879 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1 1880 1881 declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1 1882 declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1 1883 declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1 1884 declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1 1885 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1 1886 declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1 1887 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1 1888 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1 1889 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1 1890 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1 1891 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1 1892 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1 1893 1894 declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) #1 1895 declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) #1 1896 declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) #1 1897 declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) #1 1898 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) #1 1899 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) #1 1900 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) #1 1901 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) #1 1902 1903 declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) #1 1904 declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) #1 1905 1906 declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) #1 1907 declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) #1 1908 declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) #1 1909 declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) #1 1910 1911 declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) #1 1912 declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) #1 1913 declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) #1 1914 declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) #1 1915 1916 attributes #1 = { nounwind readnone } 1917