Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
      3 
      4 ;;; Shift left
      5 define <8 x i32> @vshift00(<8 x i32> %a) {
      6 ; CHECK-LABEL: vshift00:
      7 ; CHECK:       # BB#0:
      8 ; CHECK-NEXT:    vpslld $2, %xmm0, %xmm1
      9 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
     10 ; CHECK-NEXT:    vpslld $2, %xmm0, %xmm0
     11 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     12 ; CHECK-NEXT:    retq
     13   %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
     14   ret <8 x i32> %s
     15 }
     16 
     17 define <16 x i16> @vshift01(<16 x i16> %a) {
     18 ; CHECK-LABEL: vshift01:
     19 ; CHECK:       # BB#0:
     20 ; CHECK-NEXT:    vpsllw $2, %xmm0, %xmm1
     21 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
     22 ; CHECK-NEXT:    vpsllw $2, %xmm0, %xmm0
     23 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     24 ; CHECK-NEXT:    retq
     25   %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
     26   ret <16 x i16> %s
     27 }
     28 
     29 define <4 x i64> @vshift02(<4 x i64> %a) {
     30 ; CHECK-LABEL: vshift02:
     31 ; CHECK:       # BB#0:
     32 ; CHECK-NEXT:    vpsllq $2, %xmm0, %xmm1
     33 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
     34 ; CHECK-NEXT:    vpsllq $2, %xmm0, %xmm0
     35 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     36 ; CHECK-NEXT:    retq
     37   %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
     38   ret <4 x i64> %s
     39 }
     40 
     41 ;;; Logical Shift right
     42 define <8 x i32> @vshift03(<8 x i32> %a) {
     43 ; CHECK-LABEL: vshift03:
     44 ; CHECK:       # BB#0:
     45 ; CHECK-NEXT:    vpsrld $2, %xmm0, %xmm1
     46 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
     47 ; CHECK-NEXT:    vpsrld $2, %xmm0, %xmm0
     48 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     49 ; CHECK-NEXT:    retq
     50   %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
     51   ret <8 x i32> %s
     52 }
     53 
     54 define <16 x i16> @vshift04(<16 x i16> %a) {
     55 ; CHECK-LABEL: vshift04:
     56 ; CHECK:       # BB#0:
     57 ; CHECK-NEXT:    vpsrlw $2, %xmm0, %xmm1
     58 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
     59 ; CHECK-NEXT:    vpsrlw $2, %xmm0, %xmm0
     60 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     61 ; CHECK-NEXT:    retq
     62   %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
     63   ret <16 x i16> %s
     64 }
     65 
     66 define <4 x i64> @vshift05(<4 x i64> %a) {
     67 ; CHECK-LABEL: vshift05:
     68 ; CHECK:       # BB#0:
     69 ; CHECK-NEXT:    vpsrlq $2, %xmm0, %xmm1
     70 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
     71 ; CHECK-NEXT:    vpsrlq $2, %xmm0, %xmm0
     72 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     73 ; CHECK-NEXT:    retq
     74   %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
     75   ret <4 x i64> %s
     76 }
     77 
     78 ;;; Arithmetic Shift right
     79 define <8 x i32> @vshift06(<8 x i32> %a) {
     80 ; CHECK-LABEL: vshift06:
     81 ; CHECK:       # BB#0:
     82 ; CHECK-NEXT:    vpsrad $2, %xmm0, %xmm1
     83 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
     84 ; CHECK-NEXT:    vpsrad $2, %xmm0, %xmm0
     85 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     86 ; CHECK-NEXT:    retq
     87   %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
     88   ret <8 x i32> %s
     89 }
     90 
     91 define <16 x i16> @vshift07(<16 x i16> %a) {
     92 ; CHECK-LABEL: vshift07:
     93 ; CHECK:       # BB#0:
     94 ; CHECK-NEXT:    vpsraw $2, %xmm0, %xmm1
     95 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
     96 ; CHECK-NEXT:    vpsraw $2, %xmm0, %xmm0
     97 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     98 ; CHECK-NEXT:    retq
     99   %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
    100   ret <16 x i16> %s
    101 }
    102 
    103 define <32 x i8> @vshift09(<32 x i8> %a) {
    104 ; CHECK-LABEL: vshift09:
    105 ; CHECK:       # BB#0:
    106 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
    107 ; CHECK-NEXT:    vpsrlw $2, %xmm1, %xmm1
    108 ; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
    109 ; CHECK-NEXT:    vpand %xmm2, %xmm1, %xmm1
    110 ; CHECK-NEXT:    vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
    111 ; CHECK-NEXT:    vpxor %xmm3, %xmm1, %xmm1
    112 ; CHECK-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
    113 ; CHECK-NEXT:    vpsrlw $2, %xmm0, %xmm0
    114 ; CHECK-NEXT:    vpand %xmm2, %xmm0, %xmm0
    115 ; CHECK-NEXT:    vpxor %xmm3, %xmm0, %xmm0
    116 ; CHECK-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
    117 ; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    118 ; CHECK-NEXT:    retq
    119   %s = ashr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
    120   ret <32 x i8> %s
    121 }
    122 
    123 define <32 x i8> @vshift10(<32 x i8> %a) {
    124 ; CHECK-LABEL: vshift10:
    125 ; CHECK:       # BB#0:
    126 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
    127 ; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    128 ; CHECK-NEXT:    vpcmpgtb %xmm1, %xmm2, %xmm1
    129 ; CHECK-NEXT:    vpcmpgtb %xmm0, %xmm2, %xmm0
    130 ; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    131 ; CHECK-NEXT:    retq
    132   %s = ashr <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
    133   ret <32 x i8> %s
    134 }
    135 
    136 define <32 x i8> @vshift11(<32 x i8> %a) {
    137 ; CHECK-LABEL: vshift11:
    138 ; CHECK:       # BB#0:
    139 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
    140 ; CHECK-NEXT:    vpsrlw $2, %xmm1, %xmm1
    141 ; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
    142 ; CHECK-NEXT:    vpand %xmm2, %xmm1, %xmm1
    143 ; CHECK-NEXT:    vpsrlw $2, %xmm0, %xmm0
    144 ; CHECK-NEXT:    vpand %xmm2, %xmm0, %xmm0
    145 ; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    146 ; CHECK-NEXT:    retq
    147   %s = lshr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
    148   ret <32 x i8> %s
    149 }
    150 
    151 define <32 x i8> @vshift12(<32 x i8> %a) {
    152 ; CHECK-LABEL: vshift12:
    153 ; CHECK:       # BB#0:
    154 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
    155 ; CHECK-NEXT:    vpsllw $2, %xmm1, %xmm1
    156 ; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
    157 ; CHECK-NEXT:    vpand %xmm2, %xmm1, %xmm1
    158 ; CHECK-NEXT:    vpsllw $2, %xmm0, %xmm0
    159 ; CHECK-NEXT:    vpand %xmm2, %xmm0, %xmm0
    160 ; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    161 ; CHECK-NEXT:    retq
    162   %s = shl <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
    163   ret <32 x i8> %s
    164 }
    165 
    166 ;;; Support variable shifts
    167 define <8 x i32> @vshift08(<8 x i32> %a)  {
    168 ; CHECK-LABEL: vshift08:
    169 ; CHECK:       # BB#0:
    170 ; CHECK-NEXT:    vpslld $23, %xmm0, %xmm1
    171 ; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [1065353216,1065353216,1065353216,1065353216]
    172 ; CHECK-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
    173 ; CHECK-NEXT:    vcvttps2dq %xmm1, %xmm1
    174 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
    175 ; CHECK-NEXT:    vpslld $23, %xmm0, %xmm0
    176 ; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
    177 ; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm0
    178 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    179 ; CHECK-NEXT:    retq
    180   %bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a
    181   ret <8 x i32> %bitop
    182 }
    183 
    184 ; PR15141
    185 define <4 x i32> @vshift13(<4 x i32> %in) {
    186 ; CHECK-LABEL: vshift13:
    187 ; CHECK:       # BB#0:
    188 ; CHECK-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
    189 ; CHECK-NEXT:    retq
    190   %T = shl <4 x i32> %in, <i32 0, i32 1, i32 2, i32 4>
    191   ret <4 x i32> %T
    192 }
    193 
    194 ;;; Uses shifts for sign extension
    195 define <16 x i16> @sext_v16i16(<16 x i16> %a)  {
    196 ; CHECK-LABEL: sext_v16i16:
    197 ; CHECK:       # BB#0:
    198 ; CHECK-NEXT:    vpsllw $8, %xmm0, %xmm1
    199 ; CHECK-NEXT:    vpsraw $8, %xmm1, %xmm1
    200 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
    201 ; CHECK-NEXT:    vpsllw $8, %xmm0, %xmm0
    202 ; CHECK-NEXT:    vpsraw $8, %xmm0, %xmm0
    203 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    204 ; CHECK-NEXT:    retq
    205   %b = trunc <16 x i16> %a to <16 x i8>
    206   %c = sext <16 x i8> %b to <16 x i16>
    207   ret <16 x i16> %c
    208 }
    209 
    210 define <8 x i32> @sext_v8i32(<8 x i32> %a)  {
    211 ; CHECK-LABEL: sext_v8i32:
    212 ; CHECK:       # BB#0:
    213 ; CHECK-NEXT:    vpslld $16, %xmm0, %xmm1
    214 ; CHECK-NEXT:    vpsrad $16, %xmm1, %xmm1
    215 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
    216 ; CHECK-NEXT:    vpslld $16, %xmm0, %xmm0
    217 ; CHECK-NEXT:    vpsrad $16, %xmm0, %xmm0
    218 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    219 ; CHECK-NEXT:    retq
    220   %b = trunc <8 x i32> %a to <8 x i16>
    221   %c = sext <8 x i16> %b to <8 x i32>
    222   ret <8 x i32> %c
    223 }
    224