Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
      4 
      5 ; Splat patterns below
      6 
      7 define <4 x i32> @shl4(<4 x i32> %A) nounwind {
      8 ; X32-LABEL: shl4:
      9 ; X32:       # %bb.0: # %entry
     10 ; X32-NEXT:    movdqa %xmm0, %xmm1
     11 ; X32-NEXT:    pslld $2, %xmm1
     12 ; X32-NEXT:    paddd %xmm0, %xmm0
     13 ; X32-NEXT:    pxor %xmm1, %xmm0
     14 ; X32-NEXT:    retl
     15 ;
     16 ; X64-LABEL: shl4:
     17 ; X64:       # %bb.0: # %entry
     18 ; X64-NEXT:    movdqa %xmm0, %xmm1
     19 ; X64-NEXT:    pslld $2, %xmm1
     20 ; X64-NEXT:    paddd %xmm0, %xmm0
     21 ; X64-NEXT:    pxor %xmm1, %xmm0
     22 ; X64-NEXT:    retq
     23 entry:
     24   %B = shl <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
     25   %C = shl <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
     26   %K = xor <4 x i32> %B, %C
     27   ret <4 x i32> %K
     28 }
     29 
     30 define <4 x i32> @shr4(<4 x i32> %A) nounwind {
     31 ; X32-LABEL: shr4:
     32 ; X32:       # %bb.0: # %entry
     33 ; X32-NEXT:    movdqa %xmm0, %xmm1
     34 ; X32-NEXT:    psrld $2, %xmm1
     35 ; X32-NEXT:    psrld $1, %xmm0
     36 ; X32-NEXT:    pxor %xmm1, %xmm0
     37 ; X32-NEXT:    retl
     38 ;
     39 ; X64-LABEL: shr4:
     40 ; X64:       # %bb.0: # %entry
     41 ; X64-NEXT:    movdqa %xmm0, %xmm1
     42 ; X64-NEXT:    psrld $2, %xmm1
     43 ; X64-NEXT:    psrld $1, %xmm0
     44 ; X64-NEXT:    pxor %xmm1, %xmm0
     45 ; X64-NEXT:    retq
     46 entry:
     47   %B = lshr <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
     48   %C = lshr <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
     49   %K = xor <4 x i32> %B, %C
     50   ret <4 x i32> %K
     51 }
     52 
     53 define <4 x i32> @sra4(<4 x i32> %A) nounwind {
     54 ; X32-LABEL: sra4:
     55 ; X32:       # %bb.0: # %entry
     56 ; X32-NEXT:    movdqa %xmm0, %xmm1
     57 ; X32-NEXT:    psrad $2, %xmm1
     58 ; X32-NEXT:    psrad $1, %xmm0
     59 ; X32-NEXT:    pxor %xmm1, %xmm0
     60 ; X32-NEXT:    retl
     61 ;
     62 ; X64-LABEL: sra4:
     63 ; X64:       # %bb.0: # %entry
     64 ; X64-NEXT:    movdqa %xmm0, %xmm1
     65 ; X64-NEXT:    psrad $2, %xmm1
     66 ; X64-NEXT:    psrad $1, %xmm0
     67 ; X64-NEXT:    pxor %xmm1, %xmm0
     68 ; X64-NEXT:    retq
     69 entry:
     70   %B = ashr <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
     71   %C = ashr <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
     72   %K = xor <4 x i32> %B, %C
     73   ret <4 x i32> %K
     74 }
     75 
     76 define <2 x i64> @shl2(<2 x i64> %A) nounwind {
     77 ; X32-LABEL: shl2:
     78 ; X32:       # %bb.0: # %entry
     79 ; X32-NEXT:    movdqa %xmm0, %xmm1
     80 ; X32-NEXT:    psllq $2, %xmm1
     81 ; X32-NEXT:    psllq $9, %xmm0
     82 ; X32-NEXT:    pxor %xmm1, %xmm0
     83 ; X32-NEXT:    retl
     84 ;
     85 ; X64-LABEL: shl2:
     86 ; X64:       # %bb.0: # %entry
     87 ; X64-NEXT:    movdqa %xmm0, %xmm1
     88 ; X64-NEXT:    psllq $2, %xmm1
     89 ; X64-NEXT:    psllq $9, %xmm0
     90 ; X64-NEXT:    pxor %xmm1, %xmm0
     91 ; X64-NEXT:    retq
     92 entry:
     93   %B = shl <2 x i64> %A,  < i64 2, i64 2>
     94   %C = shl <2 x i64> %A,  < i64 9, i64 9>
     95   %K = xor <2 x i64> %B, %C
     96   ret <2 x i64> %K
     97 }
     98 
     99 define <2 x i64> @shr2(<2 x i64> %A) nounwind {
    100 ; X32-LABEL: shr2:
    101 ; X32:       # %bb.0: # %entry
    102 ; X32-NEXT:    movdqa %xmm0, %xmm1
    103 ; X32-NEXT:    psrlq $8, %xmm1
    104 ; X32-NEXT:    psrlq $1, %xmm0
    105 ; X32-NEXT:    pxor %xmm1, %xmm0
    106 ; X32-NEXT:    retl
    107 ;
    108 ; X64-LABEL: shr2:
    109 ; X64:       # %bb.0: # %entry
    110 ; X64-NEXT:    movdqa %xmm0, %xmm1
    111 ; X64-NEXT:    psrlq $8, %xmm1
    112 ; X64-NEXT:    psrlq $1, %xmm0
    113 ; X64-NEXT:    pxor %xmm1, %xmm0
    114 ; X64-NEXT:    retq
    115 entry:
    116   %B = lshr <2 x i64> %A,  < i64 8, i64 8>
    117   %C = lshr <2 x i64> %A,  < i64 1, i64 1>
    118   %K = xor <2 x i64> %B, %C
    119   ret <2 x i64> %K
    120 }
    121 
    122 
    123 define <8 x i16> @shl8(<8 x i16> %A) nounwind {
    124 ; X32-LABEL: shl8:
    125 ; X32:       # %bb.0: # %entry
    126 ; X32-NEXT:    movdqa %xmm0, %xmm1
    127 ; X32-NEXT:    psllw $2, %xmm1
    128 ; X32-NEXT:    paddw %xmm0, %xmm0
    129 ; X32-NEXT:    pxor %xmm1, %xmm0
    130 ; X32-NEXT:    retl
    131 ;
    132 ; X64-LABEL: shl8:
    133 ; X64:       # %bb.0: # %entry
    134 ; X64-NEXT:    movdqa %xmm0, %xmm1
    135 ; X64-NEXT:    psllw $2, %xmm1
    136 ; X64-NEXT:    paddw %xmm0, %xmm0
    137 ; X64-NEXT:    pxor %xmm1, %xmm0
    138 ; X64-NEXT:    retq
    139 entry:
    140   %B = shl <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
    141   %C = shl <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    142   %K = xor <8 x i16> %B, %C
    143   ret <8 x i16> %K
    144 }
    145 
    146 define <8 x i16> @shr8(<8 x i16> %A) nounwind {
    147 ; X32-LABEL: shr8:
    148 ; X32:       # %bb.0: # %entry
    149 ; X32-NEXT:    movdqa %xmm0, %xmm1
    150 ; X32-NEXT:    psrlw $2, %xmm1
    151 ; X32-NEXT:    psrlw $1, %xmm0
    152 ; X32-NEXT:    pxor %xmm1, %xmm0
    153 ; X32-NEXT:    retl
    154 ;
    155 ; X64-LABEL: shr8:
    156 ; X64:       # %bb.0: # %entry
    157 ; X64-NEXT:    movdqa %xmm0, %xmm1
    158 ; X64-NEXT:    psrlw $2, %xmm1
    159 ; X64-NEXT:    psrlw $1, %xmm0
    160 ; X64-NEXT:    pxor %xmm1, %xmm0
    161 ; X64-NEXT:    retq
    162 entry:
    163   %B = lshr <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
    164   %C = lshr <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    165   %K = xor <8 x i16> %B, %C
    166   ret <8 x i16> %K
    167 }
    168 
    169 define <8 x i16> @sra8(<8 x i16> %A) nounwind {
    170 ; X32-LABEL: sra8:
    171 ; X32:       # %bb.0: # %entry
    172 ; X32-NEXT:    movdqa %xmm0, %xmm1
    173 ; X32-NEXT:    psraw $2, %xmm1
    174 ; X32-NEXT:    psraw $1, %xmm0
    175 ; X32-NEXT:    pxor %xmm1, %xmm0
    176 ; X32-NEXT:    retl
    177 ;
    178 ; X64-LABEL: sra8:
    179 ; X64:       # %bb.0: # %entry
    180 ; X64-NEXT:    movdqa %xmm0, %xmm1
    181 ; X64-NEXT:    psraw $2, %xmm1
    182 ; X64-NEXT:    psraw $1, %xmm0
    183 ; X64-NEXT:    pxor %xmm1, %xmm0
    184 ; X64-NEXT:    retq
    185 entry:
    186   %B = ashr <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
    187   %C = ashr <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    188   %K = xor <8 x i16> %B, %C
    189   ret <8 x i16> %K
    190 }
    191 
    192 ; non-splat test
    193 
    194 
    195 define <8 x i16> @sll8_nosplat(<8 x i16> %A) nounwind {
    196 ; X32-LABEL: sll8_nosplat:
    197 ; X32:       # %bb.0: # %entry
    198 ; X32-NEXT:    movdqa {{.*#+}} xmm1 = [2,4,8,64,4,4,4,4]
    199 ; X32-NEXT:    pmullw %xmm0, %xmm1
    200 ; X32-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
    201 ; X32-NEXT:    pxor %xmm1, %xmm0
    202 ; X32-NEXT:    retl
    203 ;
    204 ; X64-LABEL: sll8_nosplat:
    205 ; X64:       # %bb.0: # %entry
    206 ; X64-NEXT:    movdqa {{.*#+}} xmm1 = [2,4,8,64,4,4,4,4]
    207 ; X64-NEXT:    pmullw %xmm0, %xmm1
    208 ; X64-NEXT:    pmullw {{.*}}(%rip), %xmm0
    209 ; X64-NEXT:    pxor %xmm1, %xmm0
    210 ; X64-NEXT:    retq
    211 entry:
    212   %B = shl <8 x i16> %A,  < i16 1, i16 2, i16 3, i16 6, i16 2, i16 2, i16 2, i16 2>
    213   %C = shl <8 x i16> %A,  < i16 9, i16 7, i16 5, i16 1, i16 4, i16 1, i16 1, i16 1>
    214   %K = xor <8 x i16> %B, %C
    215   ret <8 x i16> %K
    216 }
    217 
    218 
    219 define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind {
    220 ; X32-LABEL: shr2_nosplat:
    221 ; X32:       # %bb.0: # %entry
    222 ; X32-NEXT:    movdqa %xmm0, %xmm2
    223 ; X32-NEXT:    psrlq $8, %xmm2
    224 ; X32-NEXT:    movdqa %xmm0, %xmm1
    225 ; X32-NEXT:    psrlq $1, %xmm1
    226 ; X32-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    227 ; X32-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
    228 ; X32-NEXT:    xorpd %xmm0, %xmm1
    229 ; X32-NEXT:    movapd %xmm1, %xmm0
    230 ; X32-NEXT:    retl
    231 ;
    232 ; X64-LABEL: shr2_nosplat:
    233 ; X64:       # %bb.0: # %entry
    234 ; X64-NEXT:    movdqa %xmm0, %xmm2
    235 ; X64-NEXT:    psrlq $8, %xmm2
    236 ; X64-NEXT:    movdqa %xmm0, %xmm1
    237 ; X64-NEXT:    psrlq $1, %xmm1
    238 ; X64-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    239 ; X64-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
    240 ; X64-NEXT:    xorpd %xmm0, %xmm1
    241 ; X64-NEXT:    movapd %xmm1, %xmm0
    242 ; X64-NEXT:    retq
    243 entry:
    244   %B = lshr <2 x i64> %A,  < i64 8, i64 1>
    245   %C = lshr <2 x i64> %A,  < i64 1, i64 0>
    246   %K = xor <2 x i64> %B, %C
    247   ret <2 x i64> %K
    248 }
    249 
    250 
    251 ; Other shifts
    252 
    253 define <2 x i32> @shl2_other(<2 x i32> %A) nounwind {
    254 ; X32-LABEL: shl2_other:
    255 ; X32:       # %bb.0: # %entry
    256 ; X32-NEXT:    movdqa %xmm0, %xmm1
    257 ; X32-NEXT:    psllq $2, %xmm1
    258 ; X32-NEXT:    psllq $9, %xmm0
    259 ; X32-NEXT:    pxor %xmm1, %xmm0
    260 ; X32-NEXT:    retl
    261 ;
    262 ; X64-LABEL: shl2_other:
    263 ; X64:       # %bb.0: # %entry
    264 ; X64-NEXT:    movdqa %xmm0, %xmm1
    265 ; X64-NEXT:    psllq $2, %xmm1
    266 ; X64-NEXT:    psllq $9, %xmm0
    267 ; X64-NEXT:    pxor %xmm1, %xmm0
    268 ; X64-NEXT:    retq
    269 entry:
    270   %B = shl <2 x i32> %A,  < i32 2, i32 2>
    271   %C = shl <2 x i32> %A,  < i32 9, i32 9>
    272   %K = xor <2 x i32> %B, %C
    273   ret <2 x i32> %K
    274 }
    275 
    276 define <2 x i32> @shr2_other(<2 x i32> %A) nounwind {
    277 ; X32-LABEL: shr2_other:
    278 ; X32:       # %bb.0: # %entry
    279 ; X32-NEXT:    pand {{\.LCPI.*}}, %xmm0
    280 ; X32-NEXT:    movdqa %xmm0, %xmm1
    281 ; X32-NEXT:    psrlq $8, %xmm1
    282 ; X32-NEXT:    psrlq $1, %xmm0
    283 ; X32-NEXT:    pxor %xmm1, %xmm0
    284 ; X32-NEXT:    retl
    285 ;
    286 ; X64-LABEL: shr2_other:
    287 ; X64:       # %bb.0: # %entry
    288 ; X64-NEXT:    pand {{.*}}(%rip), %xmm0
    289 ; X64-NEXT:    movdqa %xmm0, %xmm1
    290 ; X64-NEXT:    psrlq $8, %xmm1
    291 ; X64-NEXT:    psrlq $1, %xmm0
    292 ; X64-NEXT:    pxor %xmm1, %xmm0
    293 ; X64-NEXT:    retq
    294 entry:
    295   %B = lshr <2 x i32> %A,  < i32 8, i32 8>
    296   %C = lshr <2 x i32> %A,  < i32 1, i32 1>
    297   %K = xor <2 x i32> %B, %C
    298   ret <2 x i32> %K
    299 }
    300 
    301 define <16 x i8> @shl9(<16 x i8> %A) nounwind {
    302 ; X32-LABEL: shl9:
    303 ; X32:       # %bb.0:
    304 ; X32-NEXT:    psllw $3, %xmm0
    305 ; X32-NEXT:    pand {{\.LCPI.*}}, %xmm0
    306 ; X32-NEXT:    retl
    307 ;
    308 ; X64-LABEL: shl9:
    309 ; X64:       # %bb.0:
    310 ; X64-NEXT:    psllw $3, %xmm0
    311 ; X64-NEXT:    pand {{.*}}(%rip), %xmm0
    312 ; X64-NEXT:    retq
    313   %B = shl <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    314   ret <16 x i8> %B
    315 }
    316 
    317 define <16 x i8> @shr9(<16 x i8> %A) nounwind {
    318 ; X32-LABEL: shr9:
    319 ; X32:       # %bb.0:
    320 ; X32-NEXT:    psrlw $3, %xmm0
    321 ; X32-NEXT:    pand {{\.LCPI.*}}, %xmm0
    322 ; X32-NEXT:    retl
    323 ;
    324 ; X64-LABEL: shr9:
    325 ; X64:       # %bb.0:
    326 ; X64-NEXT:    psrlw $3, %xmm0
    327 ; X64-NEXT:    pand {{.*}}(%rip), %xmm0
    328 ; X64-NEXT:    retq
    329   %B = lshr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    330   ret <16 x i8> %B
    331 }
    332 
    333 define <16 x i8> @sra_v16i8_7(<16 x i8> %A) nounwind {
    334 ; X32-LABEL: sra_v16i8_7:
    335 ; X32:       # %bb.0:
    336 ; X32-NEXT:    pxor %xmm1, %xmm1
    337 ; X32-NEXT:    pcmpgtb %xmm0, %xmm1
    338 ; X32-NEXT:    movdqa %xmm1, %xmm0
    339 ; X32-NEXT:    retl
    340 ;
    341 ; X64-LABEL: sra_v16i8_7:
    342 ; X64:       # %bb.0:
    343 ; X64-NEXT:    pxor %xmm1, %xmm1
    344 ; X64-NEXT:    pcmpgtb %xmm0, %xmm1
    345 ; X64-NEXT:    movdqa %xmm1, %xmm0
    346 ; X64-NEXT:    retq
    347   %B = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
    348   ret <16 x i8> %B
    349 }
    350 
    351 define <16 x i8> @sra_v16i8(<16 x i8> %A) nounwind {
    352 ; X32-LABEL: sra_v16i8:
    353 ; X32:       # %bb.0:
    354 ; X32-NEXT:    psrlw $3, %xmm0
    355 ; X32-NEXT:    pand {{\.LCPI.*}}, %xmm0
    356 ; X32-NEXT:    movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
    357 ; X32-NEXT:    pxor %xmm1, %xmm0
    358 ; X32-NEXT:    psubb %xmm1, %xmm0
    359 ; X32-NEXT:    retl
    360 ;
    361 ; X64-LABEL: sra_v16i8:
    362 ; X64:       # %bb.0:
    363 ; X64-NEXT:    psrlw $3, %xmm0
    364 ; X64-NEXT:    pand {{.*}}(%rip), %xmm0
    365 ; X64-NEXT:    movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
    366 ; X64-NEXT:    pxor %xmm1, %xmm0
    367 ; X64-NEXT:    psubb %xmm1, %xmm0
    368 ; X64-NEXT:    retq
    369   %B = ashr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    370   ret <16 x i8> %B
    371 }
    372