1 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse2 -mcpu=corei7 | FileCheck %s 2 3 ; SSE2 Logical Shift Left 4 5 define <8 x i16> @test_sllw_1(<8 x i16> %InVec) { 6 entry: 7 %shl = shl <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> 8 ret <8 x i16> %shl 9 } 10 11 ; CHECK-LABEL: test_sllw_1: 12 ; CHECK-NOT: psllw $0, %xmm0 13 ; CHECK: ret 14 15 define <8 x i16> @test_sllw_2(<8 x i16> %InVec) { 16 entry: 17 %shl = shl <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 18 ret <8 x i16> %shl 19 } 20 21 ; CHECK-LABEL: test_sllw_2: 22 ; CHECK: paddw %xmm0, %xmm0 23 ; CHECK-NEXT: ret 24 25 define <8 x i16> @test_sllw_3(<8 x i16> %InVec) { 26 entry: 27 %shl = shl <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 28 ret <8 x i16> %shl 29 } 30 31 ; CHECK-LABEL: test_sllw_3: 32 ; CHECK: psllw $15, %xmm0 33 ; CHECK-NEXT: ret 34 35 define <4 x i32> @test_slld_1(<4 x i32> %InVec) { 36 entry: 37 %shl = shl <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0> 38 ret <4 x i32> %shl 39 } 40 41 ; CHECK-LABEL: test_slld_1: 42 ; CHECK-NOT: pslld $0, %xmm0 43 ; CHECK: ret 44 45 define <4 x i32> @test_slld_2(<4 x i32> %InVec) { 46 entry: 47 %shl = shl <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1> 48 ret <4 x i32> %shl 49 } 50 51 ; CHECK-LABEL: test_slld_2: 52 ; CHECK: paddd %xmm0, %xmm0 53 ; CHECK-NEXT: ret 54 55 define <4 x i32> @test_slld_3(<4 x i32> %InVec) { 56 entry: 57 %shl = shl <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31> 58 ret <4 x i32> %shl 59 } 60 61 ; CHECK-LABEL: test_slld_3: 62 ; CHECK: pslld $31, %xmm0 63 ; CHECK-NEXT: ret 64 65 define <2 x i64> @test_sllq_1(<2 x i64> %InVec) { 66 entry: 67 %shl = shl <2 x i64> %InVec, <i64 0, i64 0> 68 ret <2 x i64> %shl 69 } 70 71 ; CHECK-LABEL: test_sllq_1: 72 ; CHECK-NOT: psllq $0, %xmm0 73 ; CHECK: ret 74 75 define <2 x i64> @test_sllq_2(<2 x i64> %InVec) { 76 entry: 77 %shl = shl <2 x i64> %InVec, <i64 1, i64 1> 78 ret <2 x i64> %shl 79 } 80 81 ; CHECK-LABEL: test_sllq_2: 82 ; CHECK: paddq %xmm0, %xmm0 83 ; CHECK-NEXT: ret 84 85 define <2 x i64> @test_sllq_3(<2 x i64> %InVec) { 86 entry: 87 %shl = shl <2 x i64> %InVec, <i64 63, i64 63> 88 ret <2 x i64> %shl 89 } 90 91 ; CHECK-LABEL: test_sllq_3: 92 ; CHECK: psllq $63, %xmm0 93 ; CHECK-NEXT: ret 94 95 ; SSE2 Arithmetic Shift 96 97 define <8 x i16> @test_sraw_1(<8 x i16> %InVec) { 98 entry: 99 %shl = ashr <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> 100 ret <8 x i16> %shl 101 } 102 103 ; CHECK-LABEL: test_sraw_1: 104 ; CHECK-NOT: psraw $0, %xmm0 105 ; CHECK: ret 106 107 define <8 x i16> @test_sraw_2(<8 x i16> %InVec) { 108 entry: 109 %shl = ashr <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 110 ret <8 x i16> %shl 111 } 112 113 ; CHECK-LABEL: test_sraw_2: 114 ; CHECK: psraw $1, %xmm0 115 ; CHECK-NEXT: ret 116 117 define <8 x i16> @test_sraw_3(<8 x i16> %InVec) { 118 entry: 119 %shl = ashr <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 120 ret <8 x i16> %shl 121 } 122 123 ; CHECK-LABEL: test_sraw_3: 124 ; CHECK: psraw $15, %xmm0 125 ; CHECK-NEXT: ret 126 127 define <4 x i32> @test_srad_1(<4 x i32> %InVec) { 128 entry: 129 %shl = ashr <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0> 130 ret <4 x i32> %shl 131 } 132 133 ; CHECK-LABEL: test_srad_1: 134 ; CHECK-NOT: psrad $0, %xmm0 135 ; CHECK: ret 136 137 define <4 x i32> @test_srad_2(<4 x i32> %InVec) { 138 entry: 139 %shl = ashr <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1> 140 ret <4 x i32> %shl 141 } 142 143 ; CHECK-LABEL: test_srad_2: 144 ; CHECK: psrad $1, %xmm0 145 ; CHECK-NEXT: ret 146 147 define <4 x i32> @test_srad_3(<4 x i32> %InVec) { 148 entry: 149 %shl = ashr <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31> 150 ret <4 x i32> %shl 151 } 152 153 ; CHECK-LABEL: test_srad_3: 154 ; CHECK: psrad $31, %xmm0 155 ; CHECK-NEXT: ret 156 157 ; SSE Logical Shift Right 158 159 define <8 x i16> @test_srlw_1(<8 x i16> %InVec) { 160 entry: 161 %shl = lshr <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> 162 ret <8 x i16> %shl 163 } 164 165 ; CHECK-LABEL: test_srlw_1: 166 ; CHECK-NOT: psrlw $0, %xmm0 167 ; CHECK: ret 168 169 define <8 x i16> @test_srlw_2(<8 x i16> %InVec) { 170 entry: 171 %shl = lshr <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 172 ret <8 x i16> %shl 173 } 174 175 ; CHECK-LABEL: test_srlw_2: 176 ; CHECK: psrlw $1, %xmm0 177 ; CHECK-NEXT: ret 178 179 define <8 x i16> @test_srlw_3(<8 x i16> %InVec) { 180 entry: 181 %shl = lshr <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 182 ret <8 x i16> %shl 183 } 184 185 ; CHECK-LABEL: test_srlw_3: 186 ; CHECK: psrlw $15, %xmm0 187 ; CHECK-NEXT: ret 188 189 define <4 x i32> @test_srld_1(<4 x i32> %InVec) { 190 entry: 191 %shl = lshr <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0> 192 ret <4 x i32> %shl 193 } 194 195 ; CHECK-LABEL: test_srld_1: 196 ; CHECK-NOT: psrld $0, %xmm0 197 ; CHECK: ret 198 199 define <4 x i32> @test_srld_2(<4 x i32> %InVec) { 200 entry: 201 %shl = lshr <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1> 202 ret <4 x i32> %shl 203 } 204 205 ; CHECK-LABEL: test_srld_2: 206 ; CHECK: psrld $1, %xmm0 207 ; CHECK-NEXT: ret 208 209 define <4 x i32> @test_srld_3(<4 x i32> %InVec) { 210 entry: 211 %shl = lshr <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31> 212 ret <4 x i32> %shl 213 } 214 215 ; CHECK-LABEL: test_srld_3: 216 ; CHECK: psrld $31, %xmm0 217 ; CHECK-NEXT: ret 218 219 define <2 x i64> @test_srlq_1(<2 x i64> %InVec) { 220 entry: 221 %shl = lshr <2 x i64> %InVec, <i64 0, i64 0> 222 ret <2 x i64> %shl 223 } 224 225 ; CHECK-LABEL: test_srlq_1: 226 ; CHECK-NOT: psrlq $0, %xmm0 227 ; CHECK: ret 228 229 define <2 x i64> @test_srlq_2(<2 x i64> %InVec) { 230 entry: 231 %shl = lshr <2 x i64> %InVec, <i64 1, i64 1> 232 ret <2 x i64> %shl 233 } 234 235 ; CHECK-LABEL: test_srlq_2: 236 ; CHECK: psrlq $1, %xmm0 237 ; CHECK-NEXT: ret 238 239 define <2 x i64> @test_srlq_3(<2 x i64> %InVec) { 240 entry: 241 %shl = lshr <2 x i64> %InVec, <i64 63, i64 63> 242 ret <2 x i64> %shl 243 } 244 245 ; CHECK-LABEL: test_srlq_3: 246 ; CHECK: psrlq $63, %xmm0 247 ; CHECK-NEXT: ret 248 249 250 ; CHECK-LABEL: sra_sra_v4i32: 251 ; CHECK: psrad $6, %xmm0 252 ; CHECK-NEXT: retq 253 define <4 x i32> @sra_sra_v4i32(<4 x i32> %x) nounwind { 254 %sra0 = ashr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2> 255 %sra1 = ashr <4 x i32> %sra0, <i32 4, i32 4, i32 4, i32 4> 256 ret <4 x i32> %sra1 257 } 258 259 ; CHECK-LABEL: @srl_srl_v4i32 260 ; CHECK: psrld $6, %xmm0 261 ; CHECK-NEXT: ret 262 define <4 x i32> @srl_srl_v4i32(<4 x i32> %x) nounwind { 263 %srl0 = lshr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2> 264 %srl1 = lshr <4 x i32> %srl0, <i32 4, i32 4, i32 4, i32 4> 265 ret <4 x i32> %srl1 266 } 267 268 ; CHECK-LABEL: @srl_shl_v4i32 269 ; CHECK: andps 270 ; CHECK-NEXT: retq 271 define <4 x i32> @srl_shl_v4i32(<4 x i32> %x) nounwind { 272 %srl0 = shl <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4> 273 %srl1 = lshr <4 x i32> %srl0, <i32 4, i32 4, i32 4, i32 4> 274 ret <4 x i32> %srl1 275 } 276 277 ; CHECK-LABEL: @srl_sra_31_v4i32 278 ; CHECK: psrld $31, %xmm0 279 ; CHECK-NEXT: ret 280 define <4 x i32> @srl_sra_31_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { 281 %sra = ashr <4 x i32> %x, %y 282 %srl1 = lshr <4 x i32> %sra, <i32 31, i32 31, i32 31, i32 31> 283 ret <4 x i32> %srl1 284 } 285 286 ; CHECK-LABEL: @shl_shl_v4i32 287 ; CHECK: pslld $6, %xmm0 288 ; CHECK-NEXT: ret 289 define <4 x i32> @shl_shl_v4i32(<4 x i32> %x) nounwind { 290 %shl0 = shl <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2> 291 %shl1 = shl <4 x i32> %shl0, <i32 4, i32 4, i32 4, i32 4> 292 ret <4 x i32> %shl1 293 } 294 295 ; CHECK-LABEL: @shl_sra_v4i32 296 ; CHECK: andps 297 ; CHECK-NEXT: ret 298 define <4 x i32> @shl_sra_v4i32(<4 x i32> %x) nounwind { 299 %shl0 = ashr <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4> 300 %shl1 = shl <4 x i32> %shl0, <i32 4, i32 4, i32 4, i32 4> 301 ret <4 x i32> %shl1 302 } 303 304 ; CHECK-LABEL: @shl_srl_v4i32 305 ; CHECK: pslld $3, %xmm0 306 ; CHECK-NEXT: pand 307 ; CHECK-NEXT: ret 308 define <4 x i32> @shl_srl_v4i32(<4 x i32> %x) nounwind { 309 %shl0 = lshr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2> 310 %shl1 = shl <4 x i32> %shl0, <i32 5, i32 5, i32 5, i32 5> 311 ret <4 x i32> %shl1 312 } 313 314 ; CHECK-LABEL: @shl_zext_srl_v4i32 315 ; CHECK: andps 316 ; CHECK-NEXT: ret 317 define <4 x i32> @shl_zext_srl_v4i32(<4 x i16> %x) nounwind { 318 %srl = lshr <4 x i16> %x, <i16 2, i16 2, i16 2, i16 2> 319 %zext = zext <4 x i16> %srl to <4 x i32> 320 %shl = shl <4 x i32> %zext, <i32 2, i32 2, i32 2, i32 2> 321 ret <4 x i32> %shl 322 } 323 324 ; CHECK: @sra_trunc_srl_v4i32 325 ; CHECK: psrad $19, %xmm0 326 ; CHECK-NEXT: retq 327 define <4 x i16> @sra_trunc_srl_v4i32(<4 x i32> %x) nounwind { 328 %srl = lshr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16> 329 %trunc = trunc <4 x i32> %srl to <4 x i16> 330 %sra = ashr <4 x i16> %trunc, <i16 3, i16 3, i16 3, i16 3> 331 ret <4 x i16> %sra 332 } 333 334 ; CHECK-LABEL: @shl_zext_shl_v4i32 335 ; CHECK: pand 336 ; CHECK-NEXT: pslld $19, %xmm0 337 ; CHECK-NEXT: ret 338 define <4 x i32> @shl_zext_shl_v4i32(<4 x i16> %x) nounwind { 339 %shl0 = shl <4 x i16> %x, <i16 2, i16 2, i16 2, i16 2> 340 %ext = zext <4 x i16> %shl0 to <4 x i32> 341 %shl1 = shl <4 x i32> %ext, <i32 17, i32 17, i32 17, i32 17> 342 ret <4 x i32> %shl1 343 } 344 345 ; CHECK-LABEL: @sra_v4i32 346 ; CHECK: psrad $3, %xmm0 347 ; CHECK-NEXT: ret 348 define <4 x i32> @sra_v4i32(<4 x i32> %x) nounwind { 349 %sra = ashr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3> 350 ret <4 x i32> %sra 351 } 352 353 ; CHECK-LABEL: @srl_v4i32 354 ; CHECK: psrld $3, %xmm0 355 ; CHECK-NEXT: ret 356 define <4 x i32> @srl_v4i32(<4 x i32> %x) nounwind { 357 %sra = lshr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3> 358 ret <4 x i32> %sra 359 } 360 361 ; CHECK-LABEL: @shl_v4i32 362 ; CHECK: pslld $3, %xmm0 363 ; CHECK-NEXT: ret 364 define <4 x i32> @shl_v4i32(<4 x i32> %x) nounwind { 365 %sra = shl <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3> 366 ret <4 x i32> %sra 367 } 368