1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32 3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 4 5 ; Verify that we correctly fold target specific packed vector shifts by 6 ; immediate count into a simple build_vector when the elements of the vector 7 ; in input to the packed shift are all constants or undef. 8 9 define <8 x i16> @test1() { 10 ; X32-LABEL: test1: 11 ; X32: # BB#0: 12 ; X32-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64,8,16,32,64] 13 ; X32-NEXT: retl 14 ; 15 ; X64-LABEL: test1: 16 ; X64: # BB#0: 17 ; X64-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64,8,16,32,64] 18 ; X64-NEXT: retq 19 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> <i16 1, i16 2, i16 4, i16 8, i16 1, i16 2, i16 4, i16 8>, i32 3) 20 ret <8 x i16> %1 21 } 22 23 define <8 x i16> @test2() { 24 ; X32-LABEL: test2: 25 ; X32: # BB#0: 26 ; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4] 27 ; X32-NEXT: retl 28 ; 29 ; X64-LABEL: test2: 30 ; X64: # BB#0: 31 ; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4] 32 ; X64-NEXT: retq 33 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i16 8, i16 16, i16 32>, i32 3) 34 ret <8 x i16> %1 35 } 36 37 define <8 x i16> @test3() { 38 ; X32-LABEL: test3: 39 ; X32: # BB#0: 40 ; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4] 41 ; X32-NEXT: retl 42 ; 43 ; X64-LABEL: test3: 44 ; X64: # BB#0: 45 ; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4] 46 ; X64-NEXT: retq 47 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i16 8, i16 16, i16 32>, i32 3) 48 ret <8 x i16> %1 49 } 50 51 define <4 x i32> @test4() { 52 ; X32-LABEL: test4: 53 ; X32: # BB#0: 54 ; X32-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64] 55 ; X32-NEXT: retl 56 ; 57 ; X64-LABEL: test4: 58 ; X64: # BB#0: 59 ; X64-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64] 60 ; X64-NEXT: retq 61 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 1, i32 2, i32 4, i32 8>, i32 3) 62 ret <4 x i32> %1 63 } 64 65 define <4 x i32> @test5() { 66 ; X32-LABEL: test5: 67 ; X32: # BB#0: 68 ; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4] 69 ; X32-NEXT: retl 70 ; 71 ; X64-LABEL: test5: 72 ; X64: # BB#0: 73 ; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4] 74 ; X64-NEXT: retq 75 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3) 76 ret <4 x i32> %1 77 } 78 79 define <4 x i32> @test6() { 80 ; X32-LABEL: test6: 81 ; X32: # BB#0: 82 ; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4] 83 ; X32-NEXT: retl 84 ; 85 ; X64-LABEL: test6: 86 ; X64: # BB#0: 87 ; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4] 88 ; X64-NEXT: retq 89 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3) 90 ret <4 x i32> %1 91 } 92 93 define <2 x i64> @test7() { 94 ; X32-LABEL: test7: 95 ; X32: # BB#0: 96 ; X32-NEXT: movdqa {{.*#+}} xmm0 = [1,0,2,0] 97 ; X32-NEXT: psllq $3, %xmm0 98 ; X32-NEXT: retl 99 ; 100 ; X64-LABEL: test7: 101 ; X64: # BB#0: 102 ; X64-NEXT: movaps {{.*#+}} xmm0 = [8,16] 103 ; X64-NEXT: retq 104 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> <i64 1, i64 2>, i32 3) 105 ret <2 x i64> %1 106 } 107 108 define <2 x i64> @test8() { 109 ; X32-LABEL: test8: 110 ; X32: # BB#0: 111 ; X32-NEXT: movdqa {{.*#+}} xmm0 = [8,0,16,0] 112 ; X32-NEXT: psrlq $3, %xmm0 113 ; X32-NEXT: retl 114 ; 115 ; X64-LABEL: test8: 116 ; X64: # BB#0: 117 ; X64-NEXT: movaps {{.*#+}} xmm0 = [1,2] 118 ; X64-NEXT: retq 119 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> <i64 8, i64 16>, i32 3) 120 ret <2 x i64> %1 121 } 122 123 define <8 x i16> @test9() { 124 ; X32-LABEL: test9: 125 ; X32: # BB#0: 126 ; X32-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16> 127 ; X32-NEXT: retl 128 ; 129 ; X64-LABEL: test9: 130 ; X64: # BB#0: 131 ; X64-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16> 132 ; X64-NEXT: retq 133 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3) 134 ret <8 x i16> %1 135 } 136 137 define <4 x i32> @test10() { 138 ; X32-LABEL: test10: 139 ; X32: # BB#0: 140 ; X32-NEXT: movaps {{.*#+}} xmm0 = <u,1,u,4> 141 ; X32-NEXT: retl 142 ; 143 ; X64-LABEL: test10: 144 ; X64: # BB#0: 145 ; X64-NEXT: movaps {{.*#+}} xmm0 = <u,1,u,4> 146 ; X64-NEXT: retq 147 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3) 148 ret <4 x i32> %1 149 } 150 151 define <2 x i64> @test11() { 152 ; X32-LABEL: test11: 153 ; X32: # BB#0: 154 ; X32-NEXT: movdqa {{.*#+}} xmm0 = <u,u,31,0> 155 ; X32-NEXT: psrlq $3, %xmm0 156 ; X32-NEXT: retl 157 ; 158 ; X64-LABEL: test11: 159 ; X64: # BB#0: 160 ; X64-NEXT: movaps {{.*#+}} xmm0 = <u,3> 161 ; X64-NEXT: retq 162 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> <i64 undef, i64 31>, i32 3) 163 ret <2 x i64> %1 164 } 165 166 define <8 x i16> @test12() { 167 ; X32-LABEL: test12: 168 ; X32: # BB#0: 169 ; X32-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16> 170 ; X32-NEXT: retl 171 ; 172 ; X64-LABEL: test12: 173 ; X64: # BB#0: 174 ; X64-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16> 175 ; X64-NEXT: retq 176 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3) 177 ret <8 x i16> %1 178 } 179 180 define <4 x i32> @test13() { 181 ; X32-LABEL: test13: 182 ; X32: # BB#0: 183 ; X32-NEXT: movaps {{.*#+}} xmm0 = <u,1,u,4> 184 ; X32-NEXT: retl 185 ; 186 ; X64-LABEL: test13: 187 ; X64: # BB#0: 188 ; X64-NEXT: movaps {{.*#+}} xmm0 = <u,1,u,4> 189 ; X64-NEXT: retq 190 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3) 191 ret <4 x i32> %1 192 } 193 194 define <8 x i16> @test14() { 195 ; X32-LABEL: test14: 196 ; X32: # BB#0: 197 ; X32-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16> 198 ; X32-NEXT: retl 199 ; 200 ; X64-LABEL: test14: 201 ; X64: # BB#0: 202 ; X64-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16> 203 ; X64-NEXT: retq 204 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3) 205 ret <8 x i16> %1 206 } 207 208 define <4 x i32> @test15() { 209 ; X32-LABEL: test15: 210 ; X32: # BB#0: 211 ; X32-NEXT: movaps {{.*#+}} xmm0 = <u,64,u,256> 212 ; X32-NEXT: retl 213 ; 214 ; X64-LABEL: test15: 215 ; X64: # BB#0: 216 ; X64-NEXT: movaps {{.*#+}} xmm0 = <u,64,u,256> 217 ; X64-NEXT: retq 218 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3) 219 ret <4 x i32> %1 220 } 221 222 define <2 x i64> @test16() { 223 ; X32-LABEL: test16: 224 ; X32: # BB#0: 225 ; X32-NEXT: movdqa {{.*#+}} xmm0 = <u,u,31,0> 226 ; X32-NEXT: psllq $3, %xmm0 227 ; X32-NEXT: retl 228 ; 229 ; X64-LABEL: test16: 230 ; X64: # BB#0: 231 ; X64-NEXT: movaps {{.*#+}} xmm0 = <u,248> 232 ; X64-NEXT: retq 233 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> <i64 undef, i64 31>, i32 3) 234 ret <2 x i64> %1 235 } 236 237 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) 238 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) 239 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) 240 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) 241 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) 242 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) 243 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) 244 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) 245 246