1 ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=-sse4.1 < %s | FileCheck %s 2 3 ; Verify that we correctly fold target specific packed vector shifts by 4 ; immediate count into a simple build_vector when the elements of the vector 5 ; in input to the packed shift are all constants or undef. 6 7 define <8 x i16> @test1() { 8 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> <i16 1, i16 2, i16 4, i16 8, i16 1, i16 2, i16 4, i16 8>, i32 3) 9 ret <8 x i16> %1 10 } 11 ; CHECK-LABEL: test1 12 ; CHECK-NOT: psll 13 ; CHECK: movaps 14 ; CHECK-NEXT: ret 15 16 define <8 x i16> @test2() { 17 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i16 8, i16 16, i16 32>, i32 3) 18 ret <8 x i16> %1 19 } 20 ; CHECK-LABEL: test2 21 ; CHECK-NOT: psrl 22 ; CHECK: movaps 23 ; CHECK-NEXT: ret 24 25 define <8 x i16> @test3() { 26 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i16 8, i16 16, i16 32>, i32 3) 27 ret <8 x i16> %1 28 } 29 ; CHECK-LABEL: test3 30 ; CHECK-NOT: psra 31 ; CHECK: movaps 32 ; CHECK-NEXT: ret 33 34 define <4 x i32> @test4() { 35 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 1, i32 2, i32 4, i32 8>, i32 3) 36 ret <4 x i32> %1 37 } 38 ; CHECK-LABEL: test4 39 ; CHECK-NOT: psll 40 ; CHECK: movaps 41 ; CHECK-NEXT: ret 42 43 define <4 x i32> @test5() { 44 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3) 45 ret <4 x i32> %1 46 } 47 ; CHECK-LABEL: test5 48 ; CHECK-NOT: psrl 49 ; CHECK: movaps 50 ; CHECK-NEXT: ret 51 52 define <4 x i32> @test6() { 53 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3) 54 ret <4 x i32> %1 55 } 56 ; CHECK-LABEL: test6 57 ; CHECK-NOT: psra 58 ; CHECK: movaps 59 ; CHECK-NEXT: ret 60 61 define <2 x i64> @test7() { 62 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> <i64 1, i64 2>, i32 3) 63 ret <2 x i64> %1 64 } 65 ; CHECK-LABEL: test7 66 ; CHECK-NOT: psll 67 ; CHECK: movaps 68 ; CHECK-NEXT: ret 69 70 define <2 x i64> @test8() { 71 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> <i64 8, i64 16>, i32 3) 72 ret <2 x i64> %1 73 } 74 ; CHECK-LABEL: test8 75 ; CHECK-NOT: psrl 76 ; CHECK: movaps 77 ; CHECK-NEXT: ret 78 79 define <8 x i16> @test9() { 80 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3) 81 ret <8 x i16> %1 82 } 83 ; CHECK-LABEL: test9 84 ; CHECK-NOT: psra 85 ; CHECK: movaps 86 ; CHECK-NEXT: ret 87 88 define <4 x i32> @test10() { 89 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3) 90 ret <4 x i32> %1 91 } 92 ; CHECK-LABEL: test10 93 ; CHECK-NOT: psra 94 ; CHECK: movaps 95 ; CHECK-NEXT: ret 96 97 define <2 x i64> @test11() { 98 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> <i64 undef, i64 31>, i32 3) 99 ret <2 x i64> %1 100 } 101 ; CHECK-LABEL: test11 102 ; CHECK-NOT: psrl 103 ; CHECK: movaps 104 ; CHECK-NEXT: ret 105 106 define <8 x i16> @test12() { 107 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3) 108 ret <8 x i16> %1 109 } 110 ; CHECK-LABEL: test12 111 ; CHECK-NOT: psra 112 ; CHECK: movaps 113 ; CHECK-NEXT: ret 114 115 define <4 x i32> @test13() { 116 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3) 117 ret <4 x i32> %1 118 } 119 ; CHECK-LABEL: test13 120 ; CHECK-NOT: psrl 121 ; CHECK: movaps 122 ; CHECK-NEXT: ret 123 124 define <8 x i16> @test14() { 125 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3) 126 ret <8 x i16> %1 127 } 128 ; CHECK-LABEL: test14 129 ; CHECK-NOT: psrl 130 ; CHECK: movaps 131 ; CHECK-NEXT: ret 132 133 define <4 x i32> @test15() { 134 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3) 135 ret <4 x i32> %1 136 } 137 ; CHECK-LABEL: test15 138 ; CHECK-NOT: psll 139 ; CHECK: movaps 140 ; CHECK-NEXT: ret 141 142 define <2 x i64> @test16() { 143 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> <i64 undef, i64 31>, i32 3) 144 ret <2 x i64> %1 145 } 146 ; CHECK-LABEL: test16 147 ; CHECK-NOT: psll 148 ; CHECK: movaps 149 ; CHECK-NEXT: ret 150 151 152 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) 153 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) 154 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) 155 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) 156 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) 157 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) 158 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) 159 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) 160 161