Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X32,X32-SLOW
      3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=X32,X32-FAST
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64,X64-SLOW
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=X64,X64-FAST
      6 
      7 define <4 x i32> @trunc4(<4 x i64> %A) nounwind {
      8 ; X32-SLOW-LABEL: trunc4:
      9 ; X32-SLOW:       # %bb.0:
     10 ; X32-SLOW-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
     11 ; X32-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,3]
     12 ; X32-SLOW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
     13 ; X32-SLOW-NEXT:    vzeroupper
     14 ; X32-SLOW-NEXT:    retl
     15 ;
     16 ; X32-FAST-LABEL: trunc4:
     17 ; X32-FAST:       # %bb.0:
     18 ; X32-FAST-NEXT:    vmovaps {{.*#+}} ymm1 = [0,2,4,6,4,6,6,7]
     19 ; X32-FAST-NEXT:    vpermps %ymm0, %ymm1, %ymm0
     20 ; X32-FAST-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
     21 ; X32-FAST-NEXT:    vzeroupper
     22 ; X32-FAST-NEXT:    retl
     23 ;
     24 ; X64-SLOW-LABEL: trunc4:
     25 ; X64-SLOW:       # %bb.0:
     26 ; X64-SLOW-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
     27 ; X64-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,3]
     28 ; X64-SLOW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
     29 ; X64-SLOW-NEXT:    vzeroupper
     30 ; X64-SLOW-NEXT:    retq
     31 ;
     32 ; X64-FAST-LABEL: trunc4:
     33 ; X64-FAST:       # %bb.0:
     34 ; X64-FAST-NEXT:    vmovaps {{.*#+}} ymm1 = [0,2,4,6,4,6,6,7]
     35 ; X64-FAST-NEXT:    vpermps %ymm0, %ymm1, %ymm0
     36 ; X64-FAST-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
     37 ; X64-FAST-NEXT:    vzeroupper
     38 ; X64-FAST-NEXT:    retq
     39   %B = trunc <4 x i64> %A to <4 x i32>
     40   ret <4 x i32>%B
     41 }
     42 
     43 define <8 x i16> @trunc8(<8 x i32> %A) nounwind {
     44 ; X32-LABEL: trunc8:
     45 ; X32:       # %bb.0:
     46 ; X32-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
     47 ; X32-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
     48 ; X32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
     49 ; X32-NEXT:    vzeroupper
     50 ; X32-NEXT:    retl
     51 ;
     52 ; X64-LABEL: trunc8:
     53 ; X64:       # %bb.0:
     54 ; X64-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
     55 ; X64-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
     56 ; X64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
     57 ; X64-NEXT:    vzeroupper
     58 ; X64-NEXT:    retq
     59   %B = trunc <8 x i32> %A to <8 x i16>
     60   ret <8 x i16>%B
     61 }
     62 
     63 define <4 x i64> @sext4(<4 x i32> %A) nounwind {
     64 ; X32-LABEL: sext4:
     65 ; X32:       # %bb.0:
     66 ; X32-NEXT:    vpmovsxdq %xmm0, %ymm0
     67 ; X32-NEXT:    retl
     68 ;
     69 ; X64-LABEL: sext4:
     70 ; X64:       # %bb.0:
     71 ; X64-NEXT:    vpmovsxdq %xmm0, %ymm0
     72 ; X64-NEXT:    retq
     73   %B = sext <4 x i32> %A to <4 x i64>
     74   ret <4 x i64>%B
     75 }
     76 
     77 define <8 x i32> @sext8(<8 x i16> %A) nounwind {
     78 ; X32-LABEL: sext8:
     79 ; X32:       # %bb.0:
     80 ; X32-NEXT:    vpmovsxwd %xmm0, %ymm0
     81 ; X32-NEXT:    retl
     82 ;
     83 ; X64-LABEL: sext8:
     84 ; X64:       # %bb.0:
     85 ; X64-NEXT:    vpmovsxwd %xmm0, %ymm0
     86 ; X64-NEXT:    retq
     87   %B = sext <8 x i16> %A to <8 x i32>
     88   ret <8 x i32>%B
     89 }
     90 
     91 define <4 x i64> @zext4(<4 x i32> %A) nounwind {
     92 ; X32-LABEL: zext4:
     93 ; X32:       # %bb.0:
     94 ; X32-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
     95 ; X32-NEXT:    retl
     96 ;
     97 ; X64-LABEL: zext4:
     98 ; X64:       # %bb.0:
     99 ; X64-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    100 ; X64-NEXT:    retq
    101   %B = zext <4 x i32> %A to <4 x i64>
    102   ret <4 x i64>%B
    103 }
    104 
    105 define <8 x i32> @zext8(<8 x i16> %A) nounwind {
    106 ; X32-LABEL: zext8:
    107 ; X32:       # %bb.0:
    108 ; X32-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    109 ; X32-NEXT:    retl
    110 ;
    111 ; X64-LABEL: zext8:
    112 ; X64:       # %bb.0:
    113 ; X64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    114 ; X64-NEXT:    retq
    115   %B = zext <8 x i16> %A to <8 x i32>
    116   ret <8 x i32>%B
    117 }
    118 
    119 define <8 x i32> @zext_8i8_8i32(<8 x i8> %A) nounwind {
    120 ; X32-LABEL: zext_8i8_8i32:
    121 ; X32:       # %bb.0:
    122 ; X32-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
    123 ; X32-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    124 ; X32-NEXT:    retl
    125 ;
    126 ; X64-LABEL: zext_8i8_8i32:
    127 ; X64:       # %bb.0:
    128 ; X64-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
    129 ; X64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    130 ; X64-NEXT:    retq
    131   %B = zext <8 x i8> %A to <8 x i32>
    132   ret <8 x i32>%B
    133 }
    134 
    135 define <16 x i16> @zext_16i8_16i16(<16 x i8> %z) {
    136 ; X32-LABEL: zext_16i8_16i16:
    137 ; X32:       # %bb.0:
    138 ; X32-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    139 ; X32-NEXT:    retl
    140 ;
    141 ; X64-LABEL: zext_16i8_16i16:
    142 ; X64:       # %bb.0:
    143 ; X64-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    144 ; X64-NEXT:    retq
    145   %t = zext <16 x i8> %z to <16 x i16>
    146   ret <16 x i16> %t
    147 }
    148 
    149 define <16 x i16> @sext_16i8_16i16(<16 x i8> %z) {
    150 ; X32-LABEL: sext_16i8_16i16:
    151 ; X32:       # %bb.0:
    152 ; X32-NEXT:    vpmovsxbw %xmm0, %ymm0
    153 ; X32-NEXT:    retl
    154 ;
    155 ; X64-LABEL: sext_16i8_16i16:
    156 ; X64:       # %bb.0:
    157 ; X64-NEXT:    vpmovsxbw %xmm0, %ymm0
    158 ; X64-NEXT:    retq
    159   %t = sext <16 x i8> %z to <16 x i16>
    160   ret <16 x i16> %t
    161 }
    162 
    163 define <16 x i8> @trunc_16i16_16i8(<16 x i16> %z) {
    164 ; X32-LABEL: trunc_16i16_16i8:
    165 ; X32:       # %bb.0:
    166 ; X32-NEXT:    vextracti128 $1, %ymm0, %xmm1
    167 ; X32-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
    168 ; X32-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    169 ; X32-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    170 ; X32-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    171 ; X32-NEXT:    vzeroupper
    172 ; X32-NEXT:    retl
    173 ;
    174 ; X64-LABEL: trunc_16i16_16i8:
    175 ; X64:       # %bb.0:
    176 ; X64-NEXT:    vextracti128 $1, %ymm0, %xmm1
    177 ; X64-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
    178 ; X64-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
    179 ; X64-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
    180 ; X64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    181 ; X64-NEXT:    vzeroupper
    182 ; X64-NEXT:    retq
    183   %t = trunc <16 x i16> %z to <16 x i8>
    184   ret <16 x i8> %t
    185 }
    186 
    187 define <4 x i64> @load_sext_test1(<4 x i32> *%ptr) {
    188 ; X32-LABEL: load_sext_test1:
    189 ; X32:       # %bb.0:
    190 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    191 ; X32-NEXT:    vpmovsxdq (%eax), %ymm0
    192 ; X32-NEXT:    retl
    193 ;
    194 ; X64-LABEL: load_sext_test1:
    195 ; X64:       # %bb.0:
    196 ; X64-NEXT:    vpmovsxdq (%rdi), %ymm0
    197 ; X64-NEXT:    retq
    198  %X = load <4 x i32>, <4 x i32>* %ptr
    199  %Y = sext <4 x i32> %X to <4 x i64>
    200  ret <4 x i64>%Y
    201 }
    202 
    203 define <4 x i64> @load_sext_test2(<4 x i8> *%ptr) {
    204 ; X32-LABEL: load_sext_test2:
    205 ; X32:       # %bb.0:
    206 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    207 ; X32-NEXT:    vpmovsxbq (%eax), %ymm0
    208 ; X32-NEXT:    retl
    209 ;
    210 ; X64-LABEL: load_sext_test2:
    211 ; X64:       # %bb.0:
    212 ; X64-NEXT:    vpmovsxbq (%rdi), %ymm0
    213 ; X64-NEXT:    retq
    214  %X = load <4 x i8>, <4 x i8>* %ptr
    215  %Y = sext <4 x i8> %X to <4 x i64>
    216  ret <4 x i64>%Y
    217 }
    218 
    219 define <4 x i64> @load_sext_test3(<4 x i16> *%ptr) {
    220 ; X32-LABEL: load_sext_test3:
    221 ; X32:       # %bb.0:
    222 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    223 ; X32-NEXT:    vpmovsxwq (%eax), %ymm0
    224 ; X32-NEXT:    retl
    225 ;
    226 ; X64-LABEL: load_sext_test3:
    227 ; X64:       # %bb.0:
    228 ; X64-NEXT:    vpmovsxwq (%rdi), %ymm0
    229 ; X64-NEXT:    retq
    230  %X = load <4 x i16>, <4 x i16>* %ptr
    231  %Y = sext <4 x i16> %X to <4 x i64>
    232  ret <4 x i64>%Y
    233 }
    234 
    235 define <8 x i32> @load_sext_test4(<8 x i16> *%ptr) {
    236 ; X32-LABEL: load_sext_test4:
    237 ; X32:       # %bb.0:
    238 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    239 ; X32-NEXT:    vpmovsxwd (%eax), %ymm0
    240 ; X32-NEXT:    retl
    241 ;
    242 ; X64-LABEL: load_sext_test4:
    243 ; X64:       # %bb.0:
    244 ; X64-NEXT:    vpmovsxwd (%rdi), %ymm0
    245 ; X64-NEXT:    retq
    246  %X = load <8 x i16>, <8 x i16>* %ptr
    247  %Y = sext <8 x i16> %X to <8 x i32>
    248  ret <8 x i32>%Y
    249 }
    250 
    251 define <8 x i32> @load_sext_test5(<8 x i8> *%ptr) {
    252 ; X32-LABEL: load_sext_test5:
    253 ; X32:       # %bb.0:
    254 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    255 ; X32-NEXT:    vpmovsxbd (%eax), %ymm0
    256 ; X32-NEXT:    retl
    257 ;
    258 ; X64-LABEL: load_sext_test5:
    259 ; X64:       # %bb.0:
    260 ; X64-NEXT:    vpmovsxbd (%rdi), %ymm0
    261 ; X64-NEXT:    retq
    262  %X = load <8 x i8>, <8 x i8>* %ptr
    263  %Y = sext <8 x i8> %X to <8 x i32>
    264  ret <8 x i32>%Y
    265 }
    266