1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32 3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 4 5 ; Verify that we correctly fold target specific packed vector shifts by 6 ; immediate count into a simple build_vector when the elements of the vector 7 ; in input to the packed shift are all constants or undef. 8 9 define <8 x i16> @test1() { 10 ; X32-LABEL: test1: 11 ; X32: # %bb.0: 12 ; X32-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64,8,16,32,64] 13 ; X32-NEXT: retl 14 ; 15 ; X64-LABEL: test1: 16 ; X64: # %bb.0: 17 ; X64-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64,8,16,32,64] 18 ; X64-NEXT: retq 19 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> <i16 1, i16 2, i16 4, i16 8, i16 1, i16 2, i16 4, i16 8>, i32 3) 20 ret <8 x i16> %1 21 } 22 23 define <8 x i16> @test2() { 24 ; X32-LABEL: test2: 25 ; X32: # %bb.0: 26 ; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4] 27 ; X32-NEXT: retl 28 ; 29 ; X64-LABEL: test2: 30 ; X64: # %bb.0: 31 ; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4] 32 ; X64-NEXT: retq 33 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i16 8, i16 16, i16 32>, i32 3) 34 ret <8 x i16> %1 35 } 36 37 define <8 x i16> @test3() { 38 ; X32-LABEL: test3: 39 ; X32: # %bb.0: 40 ; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4] 41 ; X32-NEXT: retl 42 ; 43 ; X64-LABEL: test3: 44 ; X64: # %bb.0: 45 ; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4] 46 ; X64-NEXT: retq 47 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i16 8, i16 16, i16 32>, i32 3) 48 ret <8 x i16> %1 49 } 50 51 define <4 x i32> @test4() { 52 ; X32-LABEL: test4: 53 ; X32: # %bb.0: 54 ; X32-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64] 55 ; X32-NEXT: retl 56 ; 57 ; X64-LABEL: test4: 58 ; X64: # %bb.0: 59 ; X64-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64] 60 ; X64-NEXT: retq 61 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 1, i32 2, i32 4, i32 8>, i32 3) 62 ret <4 x i32> %1 63 } 64 65 define <4 x i32> @test5() { 66 ; X32-LABEL: test5: 67 ; X32: # %bb.0: 68 ; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4] 69 ; X32-NEXT: retl 70 ; 71 ; X64-LABEL: test5: 72 ; X64: # %bb.0: 73 ; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4] 74 ; X64-NEXT: retq 75 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3) 76 ret <4 x i32> %1 77 } 78 79 define <4 x i32> @test6() { 80 ; X32-LABEL: test6: 81 ; X32: # %bb.0: 82 ; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4] 83 ; X32-NEXT: retl 84 ; 85 ; X64-LABEL: test6: 86 ; X64: # %bb.0: 87 ; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4] 88 ; X64-NEXT: retq 89 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3) 90 ret <4 x i32> %1 91 } 92 93 define <2 x i64> @test7() { 94 ; X32-LABEL: test7: 95 ; X32: # %bb.0: 96 ; X32-NEXT: movaps {{.*#+}} xmm0 = [8,0,16,0] 97 ; X32-NEXT: retl 98 ; 99 ; X64-LABEL: test7: 100 ; X64: # %bb.0: 101 ; X64-NEXT: movaps {{.*#+}} xmm0 = [8,16] 102 ; X64-NEXT: retq 103 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> <i64 1, i64 2>, i32 3) 104 ret <2 x i64> %1 105 } 106 107 define <2 x i64> @test8() { 108 ; X32-LABEL: test8: 109 ; X32: # %bb.0: 110 ; X32-NEXT: movaps {{.*#+}} xmm0 = [1,0,2,0] 111 ; X32-NEXT: retl 112 ; 113 ; X64-LABEL: test8: 114 ; X64: # %bb.0: 115 ; X64-NEXT: movaps {{.*#+}} xmm0 = [1,2] 116 ; X64-NEXT: retq 117 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> <i64 8, i64 16>, i32 3) 118 ret <2 x i64> %1 119 } 120 121 define <8 x i16> @test9() { 122 ; X32-LABEL: test9: 123 ; X32: # %bb.0: 124 ; X32-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16> 125 ; X32-NEXT: retl 126 ; 127 ; X64-LABEL: test9: 128 ; X64: # %bb.0: 129 ; X64-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16> 130 ; X64-NEXT: retq 131 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3) 132 ret <8 x i16> %1 133 } 134 135 define <4 x i32> @test10() { 136 ; X32-LABEL: test10: 137 ; X32: # %bb.0: 138 ; X32-NEXT: movaps {{.*#+}} xmm0 = <u,1,u,4> 139 ; X32-NEXT: retl 140 ; 141 ; X64-LABEL: test10: 142 ; X64: # %bb.0: 143 ; X64-NEXT: movaps {{.*#+}} xmm0 = <u,1,u,4> 144 ; X64-NEXT: retq 145 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3) 146 ret <4 x i32> %1 147 } 148 149 define <2 x i64> @test11() { 150 ; X32-LABEL: test11: 151 ; X32: # %bb.0: 152 ; X32-NEXT: movaps {{.*#+}} xmm0 = <u,u,3,0> 153 ; X32-NEXT: retl 154 ; 155 ; X64-LABEL: test11: 156 ; X64: # %bb.0: 157 ; X64-NEXT: movaps {{.*#+}} xmm0 = <u,3> 158 ; X64-NEXT: retq 159 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> <i64 undef, i64 31>, i32 3) 160 ret <2 x i64> %1 161 } 162 163 define <8 x i16> @test12() { 164 ; X32-LABEL: test12: 165 ; X32: # %bb.0: 166 ; X32-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16> 167 ; X32-NEXT: retl 168 ; 169 ; X64-LABEL: test12: 170 ; X64: # %bb.0: 171 ; X64-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16> 172 ; X64-NEXT: retq 173 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3) 174 ret <8 x i16> %1 175 } 176 177 define <4 x i32> @test13() { 178 ; X32-LABEL: test13: 179 ; X32: # %bb.0: 180 ; X32-NEXT: movaps {{.*#+}} xmm0 = <u,1,u,4> 181 ; X32-NEXT: retl 182 ; 183 ; X64-LABEL: test13: 184 ; X64: # %bb.0: 185 ; X64-NEXT: movaps {{.*#+}} xmm0 = <u,1,u,4> 186 ; X64-NEXT: retq 187 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3) 188 ret <4 x i32> %1 189 } 190 191 define <8 x i16> @test14() { 192 ; X32-LABEL: test14: 193 ; X32: # %bb.0: 194 ; X32-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16> 195 ; X32-NEXT: retl 196 ; 197 ; X64-LABEL: test14: 198 ; X64: # %bb.0: 199 ; X64-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16> 200 ; X64-NEXT: retq 201 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3) 202 ret <8 x i16> %1 203 } 204 205 define <4 x i32> @test15() { 206 ; X32-LABEL: test15: 207 ; X32: # %bb.0: 208 ; X32-NEXT: movaps {{.*#+}} xmm0 = <u,64,u,256> 209 ; X32-NEXT: retl 210 ; 211 ; X64-LABEL: test15: 212 ; X64: # %bb.0: 213 ; X64-NEXT: movaps {{.*#+}} xmm0 = <u,64,u,256> 214 ; X64-NEXT: retq 215 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3) 216 ret <4 x i32> %1 217 } 218 219 define <2 x i64> @test16() { 220 ; X32-LABEL: test16: 221 ; X32: # %bb.0: 222 ; X32-NEXT: movaps {{.*#+}} xmm0 = <u,u,248,0> 223 ; X32-NEXT: retl 224 ; 225 ; X64-LABEL: test16: 226 ; X64: # %bb.0: 227 ; X64-NEXT: movaps {{.*#+}} xmm0 = <u,248> 228 ; X64-NEXT: retq 229 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> <i64 undef, i64 31>, i32 3) 230 ret <2 x i64> %1 231 } 232 233 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) 234 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) 235 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) 236 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) 237 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) 238 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) 239 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) 240 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) 241 242