Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=-sse4.1 < %s | FileCheck %s
      2 
      3 ; Verify that we correctly fold target specific packed vector shifts by
      4 ; immediate count into a simple build_vector when the elements of the vector
      5 ; in input to the packed shift are all constants or undef.
      6 
      7 define <8 x i16> @test1() {
      8   %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> <i16 1, i16 2, i16 4, i16 8, i16 1, i16 2, i16 4, i16 8>, i32 3)
      9   ret <8 x i16> %1
     10 }
     11 ; CHECK-LABEL: test1
     12 ; CHECK-NOT: psll
     13 ; CHECK: movaps
     14 ; CHECK-NEXT: ret
     15 
     16 define <8 x i16> @test2() {
     17   %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i16 8, i16 16, i16 32>, i32 3)
     18   ret <8 x i16> %1
     19 }
     20 ; CHECK-LABEL: test2
     21 ; CHECK-NOT: psrl
     22 ; CHECK: movaps
     23 ; CHECK-NEXT: ret
     24 
     25 define <8 x i16> @test3() {
     26   %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i16 8, i16 16, i16 32>, i32 3)
     27   ret <8 x i16> %1
     28 }
     29 ; CHECK-LABEL: test3
     30 ; CHECK-NOT: psra
     31 ; CHECK: movaps
     32 ; CHECK-NEXT: ret
     33 
     34 define <4 x i32> @test4() {
     35   %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 1, i32 2, i32 4, i32 8>, i32 3)
     36   ret <4 x i32> %1
     37 }
     38 ; CHECK-LABEL: test4
     39 ; CHECK-NOT: psll
     40 ; CHECK: movaps
     41 ; CHECK-NEXT: ret
     42 
     43 define <4 x i32> @test5() {
     44   %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3)
     45   ret <4 x i32> %1
     46 }
     47 ; CHECK-LABEL: test5
     48 ; CHECK-NOT: psrl
     49 ; CHECK: movaps
     50 ; CHECK-NEXT: ret
     51 
     52 define <4 x i32> @test6() {
     53   %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3)
     54   ret <4 x i32> %1
     55 }
     56 ; CHECK-LABEL: test6
     57 ; CHECK-NOT: psra
     58 ; CHECK: movaps
     59 ; CHECK-NEXT: ret
     60 
     61 define <2 x i64> @test7() {
     62   %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> <i64 1, i64 2>, i32 3)
     63   ret <2 x i64> %1
     64 }
     65 ; CHECK-LABEL: test7
     66 ; CHECK-NOT: psll
     67 ; CHECK: movaps
     68 ; CHECK-NEXT: ret
     69 
     70 define <2 x i64> @test8() {
     71   %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> <i64 8, i64 16>, i32 3)
     72   ret <2 x i64> %1
     73 }
     74 ; CHECK-LABEL: test8
     75 ; CHECK-NOT: psrl
     76 ; CHECK: movaps
     77 ; CHECK-NEXT: ret
     78 
     79 define <8 x i16> @test9() {
     80   %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3)
     81   ret <8 x i16> %1
     82 }
     83 ; CHECK-LABEL: test9
     84 ; CHECK-NOT: psra
     85 ; CHECK: movaps
     86 ; CHECK-NEXT: ret
     87 
     88 define <4 x i32> @test10() {
     89   %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3)
     90   ret <4 x i32> %1
     91 }
     92 ; CHECK-LABEL: test10
     93 ; CHECK-NOT: psra
     94 ; CHECK: movaps
     95 ; CHECK-NEXT: ret
     96 
     97 define <2 x i64> @test11() {
     98   %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> <i64 undef, i64 31>, i32 3)
     99   ret <2 x i64> %1
    100 }
    101 ; CHECK-LABEL: test11
    102 ; CHECK-NOT: psrl
    103 ; CHECK: movaps
    104 ; CHECK-NEXT: ret
    105 
    106 define <8 x i16> @test12() {
    107   %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3)
    108   ret <8 x i16> %1
    109 }
    110 ; CHECK-LABEL: test12
    111 ; CHECK-NOT: psra
    112 ; CHECK: movaps
    113 ; CHECK-NEXT: ret
    114 
    115 define <4 x i32> @test13() {
    116   %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3)
    117   ret <4 x i32> %1
    118 }
    119 ; CHECK-LABEL: test13
    120 ; CHECK-NOT: psrl
    121 ; CHECK: movaps
    122 ; CHECK-NEXT: ret
    123 
    124 define <8 x i16> @test14() {
    125   %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3)
    126   ret <8 x i16> %1
    127 }
    128 ; CHECK-LABEL: test14
    129 ; CHECK-NOT: psrl
    130 ; CHECK: movaps
    131 ; CHECK-NEXT: ret
    132 
    133 define <4 x i32> @test15() {
    134   %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3)
    135   ret <4 x i32> %1
    136 }
    137 ; CHECK-LABEL: test15
    138 ; CHECK-NOT: psll
    139 ; CHECK: movaps
    140 ; CHECK-NEXT: ret
    141 
    142 define <2 x i64> @test16() {
    143   %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> <i64 undef, i64 31>, i32 3)
    144   ret <2 x i64> %1
    145 }
    146 ; CHECK-LABEL: test16
    147 ; CHECK-NOT: psll
    148 ; CHECK: movaps
    149 ; CHECK-NEXT: ret
    150 
    151 
    152 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32)
    153 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32)
    154 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32)
    155 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32)
    156 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32)
    157 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32)
    158 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32)
    159 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32)
    160 
    161