Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mattr=+avx | FileCheck %s
      3 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mattr=+avx -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE
      4 
      5 define <8 x float> @cvt_v8i8_v8f32(<8 x i8> %src) {
      6 ; CHECK-LABEL: cvt_v8i8_v8f32:
      7 ; CHECK:       ## %bb.0:
      8 ; CHECK-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4,4,5,5,6,6,7,7]
      9 ; CHECK-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
     10 ; CHECK-NEXT:    vpslld $24, %xmm0, %xmm0
     11 ; CHECK-NEXT:    vpsrad $24, %xmm0, %xmm0
     12 ; CHECK-NEXT:    vpslld $24, %xmm1, %xmm1
     13 ; CHECK-NEXT:    vpsrad $24, %xmm1, %xmm1
     14 ; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
     15 ; CHECK-NEXT:    vcvtdq2ps %ymm0, %ymm0
     16 ; CHECK-NEXT:    retl
     17 ;
     18 ; CHECK-WIDE-LABEL: cvt_v8i8_v8f32:
     19 ; CHECK-WIDE:       ## %bb.0:
     20 ; CHECK-WIDE-NEXT:    vpmovsxbd %xmm0, %xmm1
     21 ; CHECK-WIDE-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
     22 ; CHECK-WIDE-NEXT:    vpmovsxbd %xmm0, %xmm0
     23 ; CHECK-WIDE-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     24 ; CHECK-WIDE-NEXT:    vcvtdq2ps %ymm0, %ymm0
     25 ; CHECK-WIDE-NEXT:    retl
     26   %res = sitofp <8 x i8> %src to <8 x float>
     27   ret <8 x float> %res
     28 }
     29 
     30 define <8 x float> @cvt_v8i16_v8f32(<8 x i16> %src) {
     31 ; CHECK-LABEL: cvt_v8i16_v8f32:
     32 ; CHECK:       ## %bb.0:
     33 ; CHECK-NEXT:    vpmovsxwd %xmm0, %xmm1
     34 ; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
     35 ; CHECK-NEXT:    vpmovsxwd %xmm0, %xmm0
     36 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     37 ; CHECK-NEXT:    vcvtdq2ps %ymm0, %ymm0
     38 ; CHECK-NEXT:    retl
     39 ;
     40 ; CHECK-WIDE-LABEL: cvt_v8i16_v8f32:
     41 ; CHECK-WIDE:       ## %bb.0:
     42 ; CHECK-WIDE-NEXT:    vpmovsxwd %xmm0, %xmm1
     43 ; CHECK-WIDE-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
     44 ; CHECK-WIDE-NEXT:    vpmovsxwd %xmm0, %xmm0
     45 ; CHECK-WIDE-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     46 ; CHECK-WIDE-NEXT:    vcvtdq2ps %ymm0, %ymm0
     47 ; CHECK-WIDE-NEXT:    retl
     48   %res = sitofp <8 x i16> %src to <8 x float>
     49   ret <8 x float> %res
     50 }
     51 
     52 define <4 x float> @cvt_v4i8_v4f32(<4 x i8> %src) {
     53 ; CHECK-LABEL: cvt_v4i8_v4f32:
     54 ; CHECK:       ## %bb.0:
     55 ; CHECK-NEXT:    vpslld $24, %xmm0, %xmm0
     56 ; CHECK-NEXT:    vpsrad $24, %xmm0, %xmm0
     57 ; CHECK-NEXT:    vcvtdq2ps %xmm0, %xmm0
     58 ; CHECK-NEXT:    retl
     59 ;
     60 ; CHECK-WIDE-LABEL: cvt_v4i8_v4f32:
     61 ; CHECK-WIDE:       ## %bb.0:
     62 ; CHECK-WIDE-NEXT:    vpmovsxbd %xmm0, %xmm0
     63 ; CHECK-WIDE-NEXT:    vcvtdq2ps %xmm0, %xmm0
     64 ; CHECK-WIDE-NEXT:    retl
     65   %res = sitofp <4 x i8> %src to <4 x float>
     66   ret <4 x float> %res
     67 }
     68 
     69 define <4 x float> @cvt_v4i16_v4f32(<4 x i16> %src) {
     70 ; CHECK-LABEL: cvt_v4i16_v4f32:
     71 ; CHECK:       ## %bb.0:
     72 ; CHECK-NEXT:    vpslld $16, %xmm0, %xmm0
     73 ; CHECK-NEXT:    vpsrad $16, %xmm0, %xmm0
     74 ; CHECK-NEXT:    vcvtdq2ps %xmm0, %xmm0
     75 ; CHECK-NEXT:    retl
     76 ;
     77 ; CHECK-WIDE-LABEL: cvt_v4i16_v4f32:
     78 ; CHECK-WIDE:       ## %bb.0:
     79 ; CHECK-WIDE-NEXT:    vpmovsxwd %xmm0, %xmm0
     80 ; CHECK-WIDE-NEXT:    vcvtdq2ps %xmm0, %xmm0
     81 ; CHECK-WIDE-NEXT:    retl
     82   %res = sitofp <4 x i16> %src to <4 x float>
     83   ret <4 x float> %res
     84 }
     85 
     86 define <8 x float> @cvt_v8u8_v8f32(<8 x i8> %src) {
     87 ; CHECK-LABEL: cvt_v8u8_v8f32:
     88 ; CHECK:       ## %bb.0:
     89 ; CHECK-NEXT:    vpand LCPI4_0, %xmm0, %xmm0
     90 ; CHECK-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
     91 ; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
     92 ; CHECK-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
     93 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     94 ; CHECK-NEXT:    vcvtdq2ps %ymm0, %ymm0
     95 ; CHECK-NEXT:    retl
     96 ;
     97 ; CHECK-WIDE-LABEL: cvt_v8u8_v8f32:
     98 ; CHECK-WIDE:       ## %bb.0:
     99 ; CHECK-WIDE-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    100 ; CHECK-WIDE-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
    101 ; CHECK-WIDE-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    102 ; CHECK-WIDE-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    103 ; CHECK-WIDE-NEXT:    vcvtdq2ps %ymm0, %ymm0
    104 ; CHECK-WIDE-NEXT:    retl
    105   %res = uitofp <8 x i8> %src to <8 x float>
    106   ret <8 x float> %res
    107 }
    108 
    109 define <8 x float> @cvt_v8u16_v8f32(<8 x i16> %src) {
    110 ; CHECK-LABEL: cvt_v8u16_v8f32:
    111 ; CHECK:       ## %bb.0:
    112 ; CHECK-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    113 ; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    114 ; CHECK-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    115 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    116 ; CHECK-NEXT:    vcvtdq2ps %ymm0, %ymm0
    117 ; CHECK-NEXT:    retl
    118 ;
    119 ; CHECK-WIDE-LABEL: cvt_v8u16_v8f32:
    120 ; CHECK-WIDE:       ## %bb.0:
    121 ; CHECK-WIDE-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    122 ; CHECK-WIDE-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
    123 ; CHECK-WIDE-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    124 ; CHECK-WIDE-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    125 ; CHECK-WIDE-NEXT:    vcvtdq2ps %ymm0, %ymm0
    126 ; CHECK-WIDE-NEXT:    retl
    127   %res = uitofp <8 x i16> %src to <8 x float>
    128   ret <8 x float> %res
    129 }
    130 
    131 define <4 x float> @cvt_v4u8_v4f32(<4 x i8> %src) {
    132 ; CHECK-LABEL: cvt_v4u8_v4f32:
    133 ; CHECK:       ## %bb.0:
    134 ; CHECK-NEXT:    vandps LCPI6_0, %xmm0, %xmm0
    135 ; CHECK-NEXT:    vcvtdq2ps %xmm0, %xmm0
    136 ; CHECK-NEXT:    retl
    137 ;
    138 ; CHECK-WIDE-LABEL: cvt_v4u8_v4f32:
    139 ; CHECK-WIDE:       ## %bb.0:
    140 ; CHECK-WIDE-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    141 ; CHECK-WIDE-NEXT:    vcvtdq2ps %xmm0, %xmm0
    142 ; CHECK-WIDE-NEXT:    retl
    143   %res = uitofp <4 x i8> %src to <4 x float>
    144   ret <4 x float> %res
    145 }
    146 
    147 define <4 x float> @cvt_v4u16_v4f32(<4 x i16> %src) {
    148 ; CHECK-LABEL: cvt_v4u16_v4f32:
    149 ; CHECK:       ## %bb.0:
    150 ; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    151 ; CHECK-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
    152 ; CHECK-NEXT:    vcvtdq2ps %xmm0, %xmm0
    153 ; CHECK-NEXT:    retl
    154 ;
    155 ; CHECK-WIDE-LABEL: cvt_v4u16_v4f32:
    156 ; CHECK-WIDE:       ## %bb.0:
    157 ; CHECK-WIDE-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    158 ; CHECK-WIDE-NEXT:    vcvtdq2ps %xmm0, %xmm0
    159 ; CHECK-WIDE-NEXT:    retl
    160   %res = uitofp <4 x i16> %src to <4 x float>
    161   ret <4 x float> %res
    162 }
    163 
    164 define <8 x i8> @cvt_v8f32_v8i8(<8 x float> %src) {
    165 ; CHECK-LABEL: cvt_v8f32_v8i8:
    166 ; CHECK:       ## %bb.0:
    167 ; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
    168 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
    169 ; CHECK-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
    170 ; CHECK-NEXT:    vzeroupper
    171 ; CHECK-NEXT:    retl
    172 ;
    173 ; CHECK-WIDE-LABEL: cvt_v8f32_v8i8:
    174 ; CHECK-WIDE:       ## %bb.0:
    175 ; CHECK-WIDE-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
    176 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm1, %eax
    177 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %ecx
    178 ; CHECK-WIDE-NEXT:    vmovd %ecx, %xmm1
    179 ; CHECK-WIDE-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
    180 ; CHECK-WIDE-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
    181 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
    182 ; CHECK-WIDE-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
    183 ; CHECK-WIDE-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
    184 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
    185 ; CHECK-WIDE-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm1
    186 ; CHECK-WIDE-NEXT:    vextractf128 $1, %ymm0, %xmm0
    187 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %eax
    188 ; CHECK-WIDE-NEXT:    vpinsrb $4, %eax, %xmm1, %xmm1
    189 ; CHECK-WIDE-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
    190 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
    191 ; CHECK-WIDE-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
    192 ; CHECK-WIDE-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
    193 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
    194 ; CHECK-WIDE-NEXT:    vpinsrb $6, %eax, %xmm1, %xmm1
    195 ; CHECK-WIDE-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
    196 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %eax
    197 ; CHECK-WIDE-NEXT:    vpinsrb $7, %eax, %xmm1, %xmm0
    198 ; CHECK-WIDE-NEXT:    vzeroupper
    199 ; CHECK-WIDE-NEXT:    retl
    200   %res = fptosi <8 x float> %src to <8 x i8>
    201   ret <8 x i8> %res
    202 }
    203 
    204 define <8 x i16> @cvt_v8f32_v8i16(<8 x float> %src) {
    205 ; CHECK-LABEL: cvt_v8f32_v8i16:
    206 ; CHECK:       ## %bb.0:
    207 ; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
    208 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
    209 ; CHECK-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
    210 ; CHECK-NEXT:    vzeroupper
    211 ; CHECK-NEXT:    retl
    212 ;
    213 ; CHECK-WIDE-LABEL: cvt_v8f32_v8i16:
    214 ; CHECK-WIDE:       ## %bb.0:
    215 ; CHECK-WIDE-NEXT:    vcvttps2dq %ymm0, %ymm0
    216 ; CHECK-WIDE-NEXT:    vextractf128 $1, %ymm0, %xmm1
    217 ; CHECK-WIDE-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
    218 ; CHECK-WIDE-NEXT:    vzeroupper
    219 ; CHECK-WIDE-NEXT:    retl
    220   %res = fptosi <8 x float> %src to <8 x i16>
    221   ret <8 x i16> %res
    222 }
    223 
    224 define <4 x i8> @cvt_v4f32_v4i8(<4 x float> %src) {
    225 ; CHECK-LABEL: cvt_v4f32_v4i8:
    226 ; CHECK:       ## %bb.0:
    227 ; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm0
    228 ; CHECK-NEXT:    retl
    229 ;
    230 ; CHECK-WIDE-LABEL: cvt_v4f32_v4i8:
    231 ; CHECK-WIDE:       ## %bb.0:
    232 ; CHECK-WIDE-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
    233 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm1, %eax
    234 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %ecx
    235 ; CHECK-WIDE-NEXT:    vmovd %ecx, %xmm1
    236 ; CHECK-WIDE-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
    237 ; CHECK-WIDE-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
    238 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
    239 ; CHECK-WIDE-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
    240 ; CHECK-WIDE-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
    241 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %eax
    242 ; CHECK-WIDE-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm0
    243 ; CHECK-WIDE-NEXT:    retl
    244   %res = fptosi <4 x float> %src to <4 x i8>
    245   ret <4 x i8> %res
    246 }
    247 
    248 define <4 x i16> @cvt_v4f32_v4i16(<4 x float> %src) {
    249 ; CHECK-LABEL: cvt_v4f32_v4i16:
    250 ; CHECK:       ## %bb.0:
    251 ; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm0
    252 ; CHECK-NEXT:    retl
    253 ;
    254 ; CHECK-WIDE-LABEL: cvt_v4f32_v4i16:
    255 ; CHECK-WIDE:       ## %bb.0:
    256 ; CHECK-WIDE-NEXT:    ## kill: def $xmm0 killed $xmm0 def $ymm0
    257 ; CHECK-WIDE-NEXT:    vcvttps2dq %ymm0, %ymm0
    258 ; CHECK-WIDE-NEXT:    vextractf128 $1, %ymm0, %xmm1
    259 ; CHECK-WIDE-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
    260 ; CHECK-WIDE-NEXT:    vzeroupper
    261 ; CHECK-WIDE-NEXT:    retl
    262   %res = fptosi <4 x float> %src to <4 x i16>
    263   ret <4 x i16> %res
    264 }
    265 
    266 define <8 x i8> @cvt_v8f32_v8u8(<8 x float> %src) {
    267 ; CHECK-LABEL: cvt_v8f32_v8u8:
    268 ; CHECK:       ## %bb.0:
    269 ; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
    270 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
    271 ; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
    272 ; CHECK-NEXT:    vzeroupper
    273 ; CHECK-NEXT:    retl
    274 ;
    275 ; CHECK-WIDE-LABEL: cvt_v8f32_v8u8:
    276 ; CHECK-WIDE:       ## %bb.0:
    277 ; CHECK-WIDE-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
    278 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm1, %eax
    279 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %ecx
    280 ; CHECK-WIDE-NEXT:    vmovd %ecx, %xmm1
    281 ; CHECK-WIDE-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
    282 ; CHECK-WIDE-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
    283 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
    284 ; CHECK-WIDE-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
    285 ; CHECK-WIDE-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
    286 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
    287 ; CHECK-WIDE-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm1
    288 ; CHECK-WIDE-NEXT:    vextractf128 $1, %ymm0, %xmm0
    289 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %eax
    290 ; CHECK-WIDE-NEXT:    vpinsrb $4, %eax, %xmm1, %xmm1
    291 ; CHECK-WIDE-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
    292 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
    293 ; CHECK-WIDE-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
    294 ; CHECK-WIDE-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
    295 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
    296 ; CHECK-WIDE-NEXT:    vpinsrb $6, %eax, %xmm1, %xmm1
    297 ; CHECK-WIDE-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
    298 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %eax
    299 ; CHECK-WIDE-NEXT:    vpinsrb $7, %eax, %xmm1, %xmm0
    300 ; CHECK-WIDE-NEXT:    vzeroupper
    301 ; CHECK-WIDE-NEXT:    retl
    302   %res = fptoui <8 x float> %src to <8 x i8>
    303   ret <8 x i8> %res
    304 }
    305 
    306 define <8 x i16> @cvt_v8f32_v8u16(<8 x float> %src) {
    307 ; CHECK-LABEL: cvt_v8f32_v8u16:
    308 ; CHECK:       ## %bb.0:
    309 ; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
    310 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
    311 ; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
    312 ; CHECK-NEXT:    vzeroupper
    313 ; CHECK-NEXT:    retl
    314 ;
    315 ; CHECK-WIDE-LABEL: cvt_v8f32_v8u16:
    316 ; CHECK-WIDE:       ## %bb.0:
    317 ; CHECK-WIDE-NEXT:    vcvttps2dq %ymm0, %ymm0
    318 ; CHECK-WIDE-NEXT:    vextractf128 $1, %ymm0, %xmm1
    319 ; CHECK-WIDE-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
    320 ; CHECK-WIDE-NEXT:    vzeroupper
    321 ; CHECK-WIDE-NEXT:    retl
    322   %res = fptoui <8 x float> %src to <8 x i16>
    323   ret <8 x i16> %res
    324 }
    325 
    326 define <4 x i8> @cvt_v4f32_v4u8(<4 x float> %src) {
    327 ; CHECK-LABEL: cvt_v4f32_v4u8:
    328 ; CHECK:       ## %bb.0:
    329 ; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm0
    330 ; CHECK-NEXT:    retl
    331 ;
    332 ; CHECK-WIDE-LABEL: cvt_v4f32_v4u8:
    333 ; CHECK-WIDE:       ## %bb.0:
    334 ; CHECK-WIDE-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
    335 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm1, %eax
    336 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %ecx
    337 ; CHECK-WIDE-NEXT:    vmovd %ecx, %xmm1
    338 ; CHECK-WIDE-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
    339 ; CHECK-WIDE-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
    340 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
    341 ; CHECK-WIDE-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
    342 ; CHECK-WIDE-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
    343 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %eax
    344 ; CHECK-WIDE-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm0
    345 ; CHECK-WIDE-NEXT:    retl
    346   %res = fptoui <4 x float> %src to <4 x i8>
    347   ret <4 x i8> %res
    348 }
    349 
    350 define <4 x i16> @cvt_v4f32_v4u16(<4 x float> %src) {
    351 ; CHECK-LABEL: cvt_v4f32_v4u16:
    352 ; CHECK:       ## %bb.0:
    353 ; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm0
    354 ; CHECK-NEXT:    retl
    355 ;
    356 ; CHECK-WIDE-LABEL: cvt_v4f32_v4u16:
    357 ; CHECK-WIDE:       ## %bb.0:
    358 ; CHECK-WIDE-NEXT:    ## kill: def $xmm0 killed $xmm0 def $ymm0
    359 ; CHECK-WIDE-NEXT:    vcvttps2dq %ymm0, %ymm0
    360 ; CHECK-WIDE-NEXT:    vextractf128 $1, %ymm0, %xmm1
    361 ; CHECK-WIDE-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
    362 ; CHECK-WIDE-NEXT:    vzeroupper
    363 ; CHECK-WIDE-NEXT:    retl
    364   %res = fptoui <4 x float> %src to <4 x i16>
    365   ret <4 x i16> %res
    366 }
    367 
    368