Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
      3 
      4 ;;; Shift left
      5 define <8 x i32> @vshift00(<8 x i32> %a) {
      6 ; CHECK-LABEL: vshift00:
      7 ; CHECK:       # BB#0:
      8 ; CHECK-NEXT:    vpslld $2, %xmm0, %xmm1
      9 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
     10 ; CHECK-NEXT:    vpslld $2, %xmm0, %xmm0
     11 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     12 ; CHECK-NEXT:    retq
     13   %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
     14 2>
     15   ret <8 x i32> %s
     16 }
     17 
     18 define <16 x i16> @vshift01(<16 x i16> %a) {
     19 ; CHECK-LABEL: vshift01:
     20 ; CHECK:       # BB#0:
     21 ; CHECK-NEXT:    vpsllw $2, %xmm0, %xmm1
     22 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
     23 ; CHECK-NEXT:    vpsllw $2, %xmm0, %xmm0
     24 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     25 ; CHECK-NEXT:    retq
     26   %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
     27   ret <16 x i16> %s
     28 }
     29 
     30 define <4 x i64> @vshift02(<4 x i64> %a) {
     31 ; CHECK-LABEL: vshift02:
     32 ; CHECK:       # BB#0:
     33 ; CHECK-NEXT:    vpsllq $2, %xmm0, %xmm1
     34 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
     35 ; CHECK-NEXT:    vpsllq $2, %xmm0, %xmm0
     36 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     37 ; CHECK-NEXT:    retq
     38   %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
     39   ret <4 x i64> %s
     40 }
     41 
     42 ;;; Logical Shift right
     43 define <8 x i32> @vshift03(<8 x i32> %a) {
     44 ; CHECK-LABEL: vshift03:
     45 ; CHECK:       # BB#0:
     46 ; CHECK-NEXT:    vpsrld $2, %xmm0, %xmm1
     47 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
     48 ; CHECK-NEXT:    vpsrld $2, %xmm0, %xmm0
     49 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     50 ; CHECK-NEXT:    retq
     51   %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
     52 2>
     53   ret <8 x i32> %s
     54 }
     55 
     56 define <16 x i16> @vshift04(<16 x i16> %a) {
     57 ; CHECK-LABEL: vshift04:
     58 ; CHECK:       # BB#0:
     59 ; CHECK-NEXT:    vpsrlw $2, %xmm0, %xmm1
     60 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
     61 ; CHECK-NEXT:    vpsrlw $2, %xmm0, %xmm0
     62 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     63 ; CHECK-NEXT:    retq
     64   %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
     65   ret <16 x i16> %s
     66 }
     67 
     68 define <4 x i64> @vshift05(<4 x i64> %a) {
     69 ; CHECK-LABEL: vshift05:
     70 ; CHECK:       # BB#0:
     71 ; CHECK-NEXT:    vpsrlq $2, %xmm0, %xmm1
     72 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
     73 ; CHECK-NEXT:    vpsrlq $2, %xmm0, %xmm0
     74 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     75 ; CHECK-NEXT:    retq
     76   %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
     77   ret <4 x i64> %s
     78 }
     79 
     80 ;;; Arithmetic Shift right
     81 define <8 x i32> @vshift06(<8 x i32> %a) {
     82 ; CHECK-LABEL: vshift06:
     83 ; CHECK:       # BB#0:
     84 ; CHECK-NEXT:    vpsrad $2, %xmm0, %xmm1
     85 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
     86 ; CHECK-NEXT:    vpsrad $2, %xmm0, %xmm0
     87 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     88 ; CHECK-NEXT:    retq
     89   %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
     90 2>
     91   ret <8 x i32> %s
     92 }
     93 
     94 define <16 x i16> @vshift07(<16 x i16> %a) {
     95 ; CHECK-LABEL: vshift07:
     96 ; CHECK:       # BB#0:
     97 ; CHECK-NEXT:    vpsraw $2, %xmm0, %xmm1
     98 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
     99 ; CHECK-NEXT:    vpsraw $2, %xmm0, %xmm0
    100 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    101 ; CHECK-NEXT:    retq
    102   %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
    103   ret <16 x i16> %s
    104 }
    105 
    106 define <32 x i8> @vshift09(<32 x i8> %a) {
    107 ; CHECK-LABEL: vshift09:
    108 ; CHECK:       # BB#0:
    109 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
    110 ; CHECK-NEXT:    vpsrlw $2, %xmm1, %xmm1
    111 ; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
    112 ; CHECK-NEXT:    vpand %xmm2, %xmm1, %xmm1
    113 ; CHECK-NEXT:    vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
    114 ; CHECK-NEXT:    vpxor %xmm3, %xmm1, %xmm1
    115 ; CHECK-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
    116 ; CHECK-NEXT:    vpsrlw $2, %xmm0, %xmm0
    117 ; CHECK-NEXT:    vpand %xmm2, %xmm0, %xmm0
    118 ; CHECK-NEXT:    vpxor %xmm3, %xmm0, %xmm0
    119 ; CHECK-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
    120 ; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    121 ; CHECK-NEXT:    retq
    122   %s = ashr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
    123   ret <32 x i8> %s
    124 }
    125 
    126 define <32 x i8> @vshift10(<32 x i8> %a) {
    127 ; CHECK-LABEL: vshift10:
    128 ; CHECK:       # BB#0:
    129 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
    130 ; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    131 ; CHECK-NEXT:    vpcmpgtb %xmm1, %xmm2, %xmm1
    132 ; CHECK-NEXT:    vpcmpgtb %xmm0, %xmm2, %xmm0
    133 ; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    134 ; CHECK-NEXT:    retq
    135   %s = ashr <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
    136   ret <32 x i8> %s
    137 }
    138 
    139 define <32 x i8> @vshift11(<32 x i8> %a) {
    140 ; CHECK-LABEL: vshift11:
    141 ; CHECK:       # BB#0:
    142 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
    143 ; CHECK-NEXT:    vpsrlw $2, %xmm1, %xmm1
    144 ; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
    145 ; CHECK-NEXT:    vpand %xmm2, %xmm1, %xmm1
    146 ; CHECK-NEXT:    vpsrlw $2, %xmm0, %xmm0
    147 ; CHECK-NEXT:    vpand %xmm2, %xmm0, %xmm0
    148 ; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    149 ; CHECK-NEXT:    retq
    150   %s = lshr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
    151   ret <32 x i8> %s
    152 }
    153 
    154 define <32 x i8> @vshift12(<32 x i8> %a) {
    155 ; CHECK-LABEL: vshift12:
    156 ; CHECK:       # BB#0:
    157 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
    158 ; CHECK-NEXT:    vpsllw $2, %xmm1, %xmm1
    159 ; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
    160 ; CHECK-NEXT:    vpand %xmm2, %xmm1, %xmm1
    161 ; CHECK-NEXT:    vpsllw $2, %xmm0, %xmm0
    162 ; CHECK-NEXT:    vpand %xmm2, %xmm0, %xmm0
    163 ; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    164 ; CHECK-NEXT:    retq
    165   %s = shl <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
    166   ret <32 x i8> %s
    167 }
    168 
    169 ;;; Support variable shifts
    170 define <8 x i32> @vshift08(<8 x i32> %a)  {
    171 ; CHECK-LABEL: vshift08:
    172 ; CHECK:       # BB#0:
    173 ; CHECK-NEXT:    vpslld $23, %xmm0, %xmm1
    174 ; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [1065353216,1065353216,1065353216,1065353216]
    175 ; CHECK-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
    176 ; CHECK-NEXT:    vcvttps2dq %xmm1, %xmm1
    177 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
    178 ; CHECK-NEXT:    vpslld $23, %xmm0, %xmm0
    179 ; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
    180 ; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm0
    181 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    182 ; CHECK-NEXT:    retq
    183   %bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a
    184   ret <8 x i32> %bitop
    185 }
    186 
    187 ; PR15141
    188 define <4 x i32> @vshift13(<4 x i32> %in) {
    189 ; CHECK-LABEL: vshift13:
    190 ; CHECK:       # BB#0:
    191 ; CHECK-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
    192 ; CHECK-NEXT:    retq
    193   %T = shl <4 x i32> %in, <i32 0, i32 1, i32 2, i32 4>
    194   ret <4 x i32> %T
    195 }
    196 
    197 ;;; Uses shifts for sign extension
    198 define <16 x i16> @sext_v16i16(<16 x i16> %a)  {
    199 ; CHECK-LABEL: sext_v16i16:
    200 ; CHECK:       # BB#0:
    201 ; CHECK-NEXT:    vpsllw $8, %xmm0, %xmm1
    202 ; CHECK-NEXT:    vpsraw $8, %xmm1, %xmm1
    203 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
    204 ; CHECK-NEXT:    vpsllw $8, %xmm0, %xmm0
    205 ; CHECK-NEXT:    vpsraw $8, %xmm0, %xmm0
    206 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    207 ; CHECK-NEXT:    retq
    208   %b = trunc <16 x i16> %a to <16 x i8>
    209   %c = sext <16 x i8> %b to <16 x i16>
    210   ret <16 x i16> %c
    211 }
    212 
    213 define <8 x i32> @sext_v8i32(<8 x i32> %a)  {
    214 ; CHECK-LABEL: sext_v8i32:
    215 ; CHECK:       # BB#0:
    216 ; CHECK-NEXT:    vpslld $16, %xmm0, %xmm1
    217 ; CHECK-NEXT:    vpsrad $16, %xmm1, %xmm1
    218 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
    219 ; CHECK-NEXT:    vpslld $16, %xmm0, %xmm0
    220 ; CHECK-NEXT:    vpsrad $16, %xmm0, %xmm0
    221 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    222 ; CHECK-NEXT:    retq
    223   %b = trunc <8 x i32> %a to <8 x i16>
    224   %c = sext <8 x i16> %b to <8 x i32>
    225   ret <8 x i32> %c
    226 }
    227