1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 4 5 ; Splat patterns below 6 7 define <4 x i32> @shl4(<4 x i32> %A) nounwind { 8 ; X32-LABEL: shl4: 9 ; X32: # %bb.0: # %entry 10 ; X32-NEXT: movdqa %xmm0, %xmm1 11 ; X32-NEXT: pslld $2, %xmm1 12 ; X32-NEXT: paddd %xmm0, %xmm0 13 ; X32-NEXT: pxor %xmm1, %xmm0 14 ; X32-NEXT: retl 15 ; 16 ; X64-LABEL: shl4: 17 ; X64: # %bb.0: # %entry 18 ; X64-NEXT: movdqa %xmm0, %xmm1 19 ; X64-NEXT: pslld $2, %xmm1 20 ; X64-NEXT: paddd %xmm0, %xmm0 21 ; X64-NEXT: pxor %xmm1, %xmm0 22 ; X64-NEXT: retq 23 entry: 24 %B = shl <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2> 25 %C = shl <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1> 26 %K = xor <4 x i32> %B, %C 27 ret <4 x i32> %K 28 } 29 30 define <4 x i32> @shr4(<4 x i32> %A) nounwind { 31 ; X32-LABEL: shr4: 32 ; X32: # %bb.0: # %entry 33 ; X32-NEXT: movdqa %xmm0, %xmm1 34 ; X32-NEXT: psrld $2, %xmm1 35 ; X32-NEXT: psrld $1, %xmm0 36 ; X32-NEXT: pxor %xmm1, %xmm0 37 ; X32-NEXT: retl 38 ; 39 ; X64-LABEL: shr4: 40 ; X64: # %bb.0: # %entry 41 ; X64-NEXT: movdqa %xmm0, %xmm1 42 ; X64-NEXT: psrld $2, %xmm1 43 ; X64-NEXT: psrld $1, %xmm0 44 ; X64-NEXT: pxor %xmm1, %xmm0 45 ; X64-NEXT: retq 46 entry: 47 %B = lshr <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2> 48 %C = lshr <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1> 49 %K = xor <4 x i32> %B, %C 50 ret <4 x i32> %K 51 } 52 53 define <4 x i32> @sra4(<4 x i32> %A) nounwind { 54 ; X32-LABEL: sra4: 55 ; X32: # %bb.0: # %entry 56 ; X32-NEXT: movdqa %xmm0, %xmm1 57 ; X32-NEXT: psrad $2, %xmm1 58 ; X32-NEXT: psrad $1, %xmm0 59 ; X32-NEXT: pxor %xmm1, %xmm0 60 ; X32-NEXT: retl 61 ; 62 ; X64-LABEL: sra4: 63 ; X64: # %bb.0: # %entry 64 ; X64-NEXT: movdqa %xmm0, %xmm1 65 ; X64-NEXT: psrad $2, %xmm1 66 ; X64-NEXT: psrad $1, %xmm0 67 ; X64-NEXT: pxor %xmm1, %xmm0 68 ; X64-NEXT: retq 69 entry: 70 %B = ashr <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2> 71 %C = ashr <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1> 72 %K = xor <4 x i32> %B, %C 73 ret <4 x i32> %K 74 } 75 76 define <2 x i64> @shl2(<2 x i64> %A) nounwind { 77 ; X32-LABEL: shl2: 78 ; X32: # %bb.0: # %entry 79 ; X32-NEXT: movdqa %xmm0, %xmm1 80 ; X32-NEXT: psllq $2, %xmm1 81 ; X32-NEXT: psllq $9, %xmm0 82 ; X32-NEXT: pxor %xmm1, %xmm0 83 ; X32-NEXT: retl 84 ; 85 ; X64-LABEL: shl2: 86 ; X64: # %bb.0: # %entry 87 ; X64-NEXT: movdqa %xmm0, %xmm1 88 ; X64-NEXT: psllq $2, %xmm1 89 ; X64-NEXT: psllq $9, %xmm0 90 ; X64-NEXT: pxor %xmm1, %xmm0 91 ; X64-NEXT: retq 92 entry: 93 %B = shl <2 x i64> %A, < i64 2, i64 2> 94 %C = shl <2 x i64> %A, < i64 9, i64 9> 95 %K = xor <2 x i64> %B, %C 96 ret <2 x i64> %K 97 } 98 99 define <2 x i64> @shr2(<2 x i64> %A) nounwind { 100 ; X32-LABEL: shr2: 101 ; X32: # %bb.0: # %entry 102 ; X32-NEXT: movdqa %xmm0, %xmm1 103 ; X32-NEXT: psrlq $8, %xmm1 104 ; X32-NEXT: psrlq $1, %xmm0 105 ; X32-NEXT: pxor %xmm1, %xmm0 106 ; X32-NEXT: retl 107 ; 108 ; X64-LABEL: shr2: 109 ; X64: # %bb.0: # %entry 110 ; X64-NEXT: movdqa %xmm0, %xmm1 111 ; X64-NEXT: psrlq $8, %xmm1 112 ; X64-NEXT: psrlq $1, %xmm0 113 ; X64-NEXT: pxor %xmm1, %xmm0 114 ; X64-NEXT: retq 115 entry: 116 %B = lshr <2 x i64> %A, < i64 8, i64 8> 117 %C = lshr <2 x i64> %A, < i64 1, i64 1> 118 %K = xor <2 x i64> %B, %C 119 ret <2 x i64> %K 120 } 121 122 123 define <8 x i16> @shl8(<8 x i16> %A) nounwind { 124 ; X32-LABEL: shl8: 125 ; X32: # %bb.0: # %entry 126 ; X32-NEXT: movdqa %xmm0, %xmm1 127 ; X32-NEXT: psllw $2, %xmm1 128 ; X32-NEXT: paddw %xmm0, %xmm0 129 ; X32-NEXT: pxor %xmm1, %xmm0 130 ; X32-NEXT: retl 131 ; 132 ; X64-LABEL: shl8: 133 ; X64: # %bb.0: # %entry 134 ; X64-NEXT: movdqa %xmm0, %xmm1 135 ; X64-NEXT: psllw $2, %xmm1 136 ; X64-NEXT: paddw %xmm0, %xmm0 137 ; X64-NEXT: pxor %xmm1, %xmm0 138 ; X64-NEXT: retq 139 entry: 140 %B = shl <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 141 %C = shl <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 142 %K = xor <8 x i16> %B, %C 143 ret <8 x i16> %K 144 } 145 146 define <8 x i16> @shr8(<8 x i16> %A) nounwind { 147 ; X32-LABEL: shr8: 148 ; X32: # %bb.0: # %entry 149 ; X32-NEXT: movdqa %xmm0, %xmm1 150 ; X32-NEXT: psrlw $2, %xmm1 151 ; X32-NEXT: psrlw $1, %xmm0 152 ; X32-NEXT: pxor %xmm1, %xmm0 153 ; X32-NEXT: retl 154 ; 155 ; X64-LABEL: shr8: 156 ; X64: # %bb.0: # %entry 157 ; X64-NEXT: movdqa %xmm0, %xmm1 158 ; X64-NEXT: psrlw $2, %xmm1 159 ; X64-NEXT: psrlw $1, %xmm0 160 ; X64-NEXT: pxor %xmm1, %xmm0 161 ; X64-NEXT: retq 162 entry: 163 %B = lshr <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 164 %C = lshr <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 165 %K = xor <8 x i16> %B, %C 166 ret <8 x i16> %K 167 } 168 169 define <8 x i16> @sra8(<8 x i16> %A) nounwind { 170 ; X32-LABEL: sra8: 171 ; X32: # %bb.0: # %entry 172 ; X32-NEXT: movdqa %xmm0, %xmm1 173 ; X32-NEXT: psraw $2, %xmm1 174 ; X32-NEXT: psraw $1, %xmm0 175 ; X32-NEXT: pxor %xmm1, %xmm0 176 ; X32-NEXT: retl 177 ; 178 ; X64-LABEL: sra8: 179 ; X64: # %bb.0: # %entry 180 ; X64-NEXT: movdqa %xmm0, %xmm1 181 ; X64-NEXT: psraw $2, %xmm1 182 ; X64-NEXT: psraw $1, %xmm0 183 ; X64-NEXT: pxor %xmm1, %xmm0 184 ; X64-NEXT: retq 185 entry: 186 %B = ashr <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 187 %C = ashr <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 188 %K = xor <8 x i16> %B, %C 189 ret <8 x i16> %K 190 } 191 192 ; non-splat test 193 194 195 define <8 x i16> @sll8_nosplat(<8 x i16> %A) nounwind { 196 ; X32-LABEL: sll8_nosplat: 197 ; X32: # %bb.0: # %entry 198 ; X32-NEXT: movdqa {{.*#+}} xmm1 = [2,4,8,64,4,4,4,4] 199 ; X32-NEXT: pmullw %xmm0, %xmm1 200 ; X32-NEXT: pmullw {{\.LCPI.*}}, %xmm0 201 ; X32-NEXT: pxor %xmm1, %xmm0 202 ; X32-NEXT: retl 203 ; 204 ; X64-LABEL: sll8_nosplat: 205 ; X64: # %bb.0: # %entry 206 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [2,4,8,64,4,4,4,4] 207 ; X64-NEXT: pmullw %xmm0, %xmm1 208 ; X64-NEXT: pmullw {{.*}}(%rip), %xmm0 209 ; X64-NEXT: pxor %xmm1, %xmm0 210 ; X64-NEXT: retq 211 entry: 212 %B = shl <8 x i16> %A, < i16 1, i16 2, i16 3, i16 6, i16 2, i16 2, i16 2, i16 2> 213 %C = shl <8 x i16> %A, < i16 9, i16 7, i16 5, i16 1, i16 4, i16 1, i16 1, i16 1> 214 %K = xor <8 x i16> %B, %C 215 ret <8 x i16> %K 216 } 217 218 219 define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind { 220 ; X32-LABEL: shr2_nosplat: 221 ; X32: # %bb.0: # %entry 222 ; X32-NEXT: movdqa %xmm0, %xmm2 223 ; X32-NEXT: psrlq $8, %xmm2 224 ; X32-NEXT: movdqa %xmm0, %xmm1 225 ; X32-NEXT: psrlq $1, %xmm1 226 ; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 227 ; X32-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 228 ; X32-NEXT: xorpd %xmm0, %xmm1 229 ; X32-NEXT: movapd %xmm1, %xmm0 230 ; X32-NEXT: retl 231 ; 232 ; X64-LABEL: shr2_nosplat: 233 ; X64: # %bb.0: # %entry 234 ; X64-NEXT: movdqa %xmm0, %xmm2 235 ; X64-NEXT: psrlq $8, %xmm2 236 ; X64-NEXT: movdqa %xmm0, %xmm1 237 ; X64-NEXT: psrlq $1, %xmm1 238 ; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 239 ; X64-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 240 ; X64-NEXT: xorpd %xmm0, %xmm1 241 ; X64-NEXT: movapd %xmm1, %xmm0 242 ; X64-NEXT: retq 243 entry: 244 %B = lshr <2 x i64> %A, < i64 8, i64 1> 245 %C = lshr <2 x i64> %A, < i64 1, i64 0> 246 %K = xor <2 x i64> %B, %C 247 ret <2 x i64> %K 248 } 249 250 251 ; Other shifts 252 253 define <2 x i32> @shl2_other(<2 x i32> %A) nounwind { 254 ; X32-LABEL: shl2_other: 255 ; X32: # %bb.0: # %entry 256 ; X32-NEXT: movdqa %xmm0, %xmm1 257 ; X32-NEXT: psllq $2, %xmm1 258 ; X32-NEXT: psllq $9, %xmm0 259 ; X32-NEXT: pxor %xmm1, %xmm0 260 ; X32-NEXT: retl 261 ; 262 ; X64-LABEL: shl2_other: 263 ; X64: # %bb.0: # %entry 264 ; X64-NEXT: movdqa %xmm0, %xmm1 265 ; X64-NEXT: psllq $2, %xmm1 266 ; X64-NEXT: psllq $9, %xmm0 267 ; X64-NEXT: pxor %xmm1, %xmm0 268 ; X64-NEXT: retq 269 entry: 270 %B = shl <2 x i32> %A, < i32 2, i32 2> 271 %C = shl <2 x i32> %A, < i32 9, i32 9> 272 %K = xor <2 x i32> %B, %C 273 ret <2 x i32> %K 274 } 275 276 define <2 x i32> @shr2_other(<2 x i32> %A) nounwind { 277 ; X32-LABEL: shr2_other: 278 ; X32: # %bb.0: # %entry 279 ; X32-NEXT: pand {{\.LCPI.*}}, %xmm0 280 ; X32-NEXT: movdqa %xmm0, %xmm1 281 ; X32-NEXT: psrlq $8, %xmm1 282 ; X32-NEXT: psrlq $1, %xmm0 283 ; X32-NEXT: pxor %xmm1, %xmm0 284 ; X32-NEXT: retl 285 ; 286 ; X64-LABEL: shr2_other: 287 ; X64: # %bb.0: # %entry 288 ; X64-NEXT: pand {{.*}}(%rip), %xmm0 289 ; X64-NEXT: movdqa %xmm0, %xmm1 290 ; X64-NEXT: psrlq $8, %xmm1 291 ; X64-NEXT: psrlq $1, %xmm0 292 ; X64-NEXT: pxor %xmm1, %xmm0 293 ; X64-NEXT: retq 294 entry: 295 %B = lshr <2 x i32> %A, < i32 8, i32 8> 296 %C = lshr <2 x i32> %A, < i32 1, i32 1> 297 %K = xor <2 x i32> %B, %C 298 ret <2 x i32> %K 299 } 300 301 define <16 x i8> @shl9(<16 x i8> %A) nounwind { 302 ; X32-LABEL: shl9: 303 ; X32: # %bb.0: 304 ; X32-NEXT: psllw $3, %xmm0 305 ; X32-NEXT: pand {{\.LCPI.*}}, %xmm0 306 ; X32-NEXT: retl 307 ; 308 ; X64-LABEL: shl9: 309 ; X64: # %bb.0: 310 ; X64-NEXT: psllw $3, %xmm0 311 ; X64-NEXT: pand {{.*}}(%rip), %xmm0 312 ; X64-NEXT: retq 313 %B = shl <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 314 ret <16 x i8> %B 315 } 316 317 define <16 x i8> @shr9(<16 x i8> %A) nounwind { 318 ; X32-LABEL: shr9: 319 ; X32: # %bb.0: 320 ; X32-NEXT: psrlw $3, %xmm0 321 ; X32-NEXT: pand {{\.LCPI.*}}, %xmm0 322 ; X32-NEXT: retl 323 ; 324 ; X64-LABEL: shr9: 325 ; X64: # %bb.0: 326 ; X64-NEXT: psrlw $3, %xmm0 327 ; X64-NEXT: pand {{.*}}(%rip), %xmm0 328 ; X64-NEXT: retq 329 %B = lshr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 330 ret <16 x i8> %B 331 } 332 333 define <16 x i8> @sra_v16i8_7(<16 x i8> %A) nounwind { 334 ; X32-LABEL: sra_v16i8_7: 335 ; X32: # %bb.0: 336 ; X32-NEXT: pxor %xmm1, %xmm1 337 ; X32-NEXT: pcmpgtb %xmm0, %xmm1 338 ; X32-NEXT: movdqa %xmm1, %xmm0 339 ; X32-NEXT: retl 340 ; 341 ; X64-LABEL: sra_v16i8_7: 342 ; X64: # %bb.0: 343 ; X64-NEXT: pxor %xmm1, %xmm1 344 ; X64-NEXT: pcmpgtb %xmm0, %xmm1 345 ; X64-NEXT: movdqa %xmm1, %xmm0 346 ; X64-NEXT: retq 347 %B = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 348 ret <16 x i8> %B 349 } 350 351 define <16 x i8> @sra_v16i8(<16 x i8> %A) nounwind { 352 ; X32-LABEL: sra_v16i8: 353 ; X32: # %bb.0: 354 ; X32-NEXT: psrlw $3, %xmm0 355 ; X32-NEXT: pand {{\.LCPI.*}}, %xmm0 356 ; X32-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 357 ; X32-NEXT: pxor %xmm1, %xmm0 358 ; X32-NEXT: psubb %xmm1, %xmm0 359 ; X32-NEXT: retl 360 ; 361 ; X64-LABEL: sra_v16i8: 362 ; X64: # %bb.0: 363 ; X64-NEXT: psrlw $3, %xmm0 364 ; X64-NEXT: pand {{.*}}(%rip), %xmm0 365 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 366 ; X64-NEXT: pxor %xmm1, %xmm0 367 ; X64-NEXT: psubb %xmm1, %xmm0 368 ; X64-NEXT: retq 369 %B = ashr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 370 ret <16 x i8> %B 371 } 372