Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s
      2 
      3 ; Splat patterns below
      4 
      5 
      6 define <4 x i32> @shl4(<4 x i32> %A) nounwind {
      7 entry:
      8 ; CHECK:      shl4
      9 ; CHECK:      pslld
     10 ; CHECK:      padd
     11 ; CHECK:      ret
     12   %B = shl <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
     13   %C = shl <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
     14   %K = xor <4 x i32> %B, %C
     15   ret <4 x i32> %K
     16 }
     17 
     18 define <4 x i32> @shr4(<4 x i32> %A) nounwind {
     19 entry:
     20 ; CHECK:      shr4
     21 ; CHECK:      psrld
     22 ; CHECK-NEXT: psrld
     23 ; CHECK:      ret
     24   %B = lshr <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
     25   %C = lshr <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
     26   %K = xor <4 x i32> %B, %C
     27   ret <4 x i32> %K
     28 }
     29 
     30 define <4 x i32> @sra4(<4 x i32> %A) nounwind {
     31 entry:
     32 ; CHECK:      sra4
     33 ; CHECK:      psrad
     34 ; CHECK-NEXT: psrad
     35 ; CHECK:      ret
     36   %B = ashr <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
     37   %C = ashr <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
     38   %K = xor <4 x i32> %B, %C
     39   ret <4 x i32> %K
     40 }
     41 
     42 define <2 x i64> @shl2(<2 x i64> %A) nounwind {
     43 entry:
     44 ; CHECK:      shl2
     45 ; CHECK:      psllq
     46 ; CHECK-NEXT: psllq
     47 ; CHECK:      ret
     48   %B = shl <2 x i64> %A,  < i64 2, i64 2>
     49   %C = shl <2 x i64> %A,  < i64 9, i64 9>
     50   %K = xor <2 x i64> %B, %C
     51   ret <2 x i64> %K
     52 }
     53 
     54 define <2 x i64> @shr2(<2 x i64> %A) nounwind {
     55 entry:
     56 ; CHECK:      shr2
     57 ; CHECK:      psrlq
     58 ; CHECK-NEXT: psrlq
     59 ; CHECK:      ret
     60   %B = lshr <2 x i64> %A,  < i64 8, i64 8>
     61   %C = lshr <2 x i64> %A,  < i64 1, i64 1>
     62   %K = xor <2 x i64> %B, %C
     63   ret <2 x i64> %K
     64 }
     65 
     66 
     67 define <8 x i16> @shl8(<8 x i16> %A) nounwind {
     68 entry:
     69 ; CHECK:      shl8
     70 ; CHECK:      psllw
     71 ; CHECK:      padd
     72 ; CHECK:      ret
     73   %B = shl <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
     74   %C = shl <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
     75   %K = xor <8 x i16> %B, %C
     76   ret <8 x i16> %K
     77 }
     78 
     79 define <8 x i16> @shr8(<8 x i16> %A) nounwind {
     80 entry:
     81 ; CHECK:      shr8
     82 ; CHECK:      psrlw
     83 ; CHECK-NEXT: psrlw
     84 ; CHECK:      ret
     85   %B = lshr <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
     86   %C = lshr <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
     87   %K = xor <8 x i16> %B, %C
     88   ret <8 x i16> %K
     89 }
     90 
     91 define <8 x i16> @sra8(<8 x i16> %A) nounwind {
     92 entry:
     93 ; CHECK:      sra8
     94 ; CHECK:      psraw
     95 ; CHECK-NEXT: psraw
     96 ; CHECK:      ret
     97   %B = ashr <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
     98   %C = ashr <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
     99   %K = xor <8 x i16> %B, %C
    100   ret <8 x i16> %K
    101 }
    102 
    103 ; non-splat test
    104 
    105 
    106 define <8 x i16> @sll8_nosplat(<8 x i16> %A) nounwind {
    107 entry:
    108 ; CHECK: sll8_nosplat
    109 ; CHECK-NOT: psll
    110 ; CHECK-NOT: psll
    111 ; CHECK:      ret
    112   %B = shl <8 x i16> %A,  < i16 1, i16 2, i16 3, i16 6, i16 2, i16 2, i16 2, i16 2>
    113   %C = shl <8 x i16> %A,  < i16 9, i16 7, i16 5, i16 1, i16 4, i16 1, i16 1, i16 1>
    114   %K = xor <8 x i16> %B, %C
    115   ret <8 x i16> %K
    116 }
    117 
    118 
    119 define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind {
    120 entry:
    121 ; CHECK-LABEL: shr2_nosplat
    122 ; CHECK:       movdqa %xmm0, %xmm1
    123 ; CHECK-NEXT:  psrlq  $1, %xmm1
    124 ; CHECK-NEXT:  movdqa %xmm0, %xmm2
    125 ; CHECK-NEXT:  psrlq  $8, %xmm2
    126 ; CHECK-NEXT:  movsd  {{.*#+}} xmm0 = xmm1[0],xmm0[1]
    127 ; CHECK-NEXT:  movsd  {{.*#+}} xmm1 = xmm2[0],xmm1[1]
    128 ; CHECK-NEXT:  xorpd  %xmm0, %xmm1
    129 ; CHECK-NEXT:  movapd %xmm1, %xmm0
    130 ; CHECK-NEXT:  ret
    131   %B = lshr <2 x i64> %A,  < i64 8, i64 1>
    132   %C = lshr <2 x i64> %A,  < i64 1, i64 0>
    133   %K = xor <2 x i64> %B, %C
    134   ret <2 x i64> %K
    135 }
    136 
    137 
    138 ; Other shifts
    139 
    140 define <2 x i32> @shl2_other(<2 x i32> %A) nounwind {
    141 entry:
    142 ; CHECK: shl2_other
    143 ; CHECK: psllq
    144 ; CHECK: ret
    145   %B = shl <2 x i32> %A,  < i32 2, i32 2>
    146   %C = shl <2 x i32> %A,  < i32 9, i32 9>
    147   %K = xor <2 x i32> %B, %C
    148   ret <2 x i32> %K
    149 }
    150 
    151 define <2 x i32> @shr2_other(<2 x i32> %A) nounwind {
    152 entry:
    153 ; CHECK: shr2_other
    154 ; CHECK: psrlq
    155 ; CHECK: ret
    156   %B = lshr <2 x i32> %A,  < i32 8, i32 8>
    157   %C = lshr <2 x i32> %A,  < i32 1, i32 1>
    158   %K = xor <2 x i32> %B, %C
    159   ret <2 x i32> %K
    160 }
    161 
    162 define <16 x i8> @shl9(<16 x i8> %A) nounwind {
    163   %B = shl <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    164   ret <16 x i8> %B
    165 ; CHECK-LABEL: shl9:
    166 ; CHECK: psllw $3
    167 ; CHECK: pand
    168 ; CHECK: ret
    169 }
    170 
    171 define <16 x i8> @shr9(<16 x i8> %A) nounwind {
    172   %B = lshr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    173   ret <16 x i8> %B
    174 ; CHECK-LABEL: shr9:
    175 ; CHECK: psrlw $3
    176 ; CHECK: pand
    177 ; CHECK: ret
    178 }
    179 
    180 define <16 x i8> @sra_v16i8_7(<16 x i8> %A) nounwind {
    181   %B = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
    182   ret <16 x i8> %B
    183 ; CHECK-LABEL: sra_v16i8_7:
    184 ; CHECK: pxor
    185 ; CHECK: pcmpgtb
    186 ; CHECK: ret
    187 }
    188 
    189 define <16 x i8> @sra_v16i8(<16 x i8> %A) nounwind {
    190   %B = ashr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    191   ret <16 x i8> %B
    192 ; CHECK-LABEL: sra_v16i8:
    193 ; CHECK: psrlw $3
    194 ; CHECK: pand
    195 ; CHECK: pxor
    196 ; CHECK: psubb
    197 ; CHECK: ret
    198 }
    199