Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math \
      2 ; RUN:   | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=CST
      3 ; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+sse4.1 \
      4 ; RUN:   | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=CST
      5 ; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx \
      6 ; RUN:   | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=CST
      7 ; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx2 \
      8 ; RUN:   | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
      9 ; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx512f \
     10 ; RUN:   | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512F
     11 ; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx512vl \
     12 ; RUN:   | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL
     13 
     14 ; CST: [[MASKCSTADDR:.LCPI[0-9_]+]]:
     15 ; CST-NEXT: .long 65535 # 0xffff
     16 ; CST-NEXT: .long 65535 # 0xffff
     17 ; CST-NEXT: .long 65535 # 0xffff
     18 ; CST-NEXT: .long 65535 # 0xffff
     19 
     20 ; CST: [[FPMASKCSTADDR:.LCPI[0-9_]+]]:
     21 ; CST-NEXT: .long 1199570944 # float 65536
     22 ; CST-NEXT: .long 1199570944 # float 65536
     23 ; CST-NEXT: .long 1199570944 # float 65536
     24 ; CST-NEXT: .long 1199570944 # float 65536
     25 
     26 ; AVX2: [[FPMASKCSTADDR:.LCPI[0-9_]+]]:
     27 ; AVX2-NEXT: .long 1199570944 # float 65536
     28 
     29 ; AVX2: [[MASKCSTADDR:.LCPI[0-9_]+]]:
     30 ; AVX2-NEXT: .long 65535 # 0xffff
     31 
     32 define <4 x float> @test_uitofp_v4i32_to_v4f32(<4 x i32> %arg) {
     33 ; SSE-LABEL: test_uitofp_v4i32_to_v4f32:
     34 ; SSE:       # BB#0:
     35 ; SSE-NEXT:    movaps {{.*#+}} xmm1 = [65535,65535,65535,65535]
     36 ; SSE-NEXT:    andps %xmm0, %xmm1
     37 ; SSE-NEXT:    cvtdq2ps %xmm1, %xmm1
     38 ; SSE-NEXT:    psrld $16, %xmm0
     39 ; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
     40 ; SSE-NEXT:    mulps [[FPMASKCSTADDR]](%rip), %xmm0
     41 ; SSE-NEXT:    addps %xmm1, %xmm0
     42 ; SSE-NEXT:    retq
     43 ;
     44 ; AVX-LABEL: test_uitofp_v4i32_to_v4f32:
     45 ; AVX:       # BB#0:
     46 ; AVX-NEXT:    vandps [[MASKCSTADDR]](%rip), %xmm0, %xmm1
     47 ; AVX-NEXT:    vcvtdq2ps %xmm1, %xmm1
     48 ; AVX-NEXT:    vpsrld $16, %xmm0, %xmm0
     49 ; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
     50 ; AVX-NEXT:    vmulps [[FPMASKCSTADDR]](%rip), %xmm0, %xmm0
     51 ; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
     52 ; AVX-NEXT:    retq
     53 ;
     54 ; AVX2-LABEL: test_uitofp_v4i32_to_v4f32:
     55 ; AVX2:       # BB#0:
     56 ; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm1
     57 ; AVX2-NEXT:    vcvtdq2ps %xmm1, %xmm1
     58 ; AVX2-NEXT:    vbroadcastss [[FPMASKCSTADDR]](%rip), %xmm2
     59 ; AVX2-NEXT:    vmulps %xmm2, %xmm1, %xmm1
     60 ; AVX2-NEXT:    vpbroadcastd [[MASKCSTADDR]](%rip), %xmm2
     61 ; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
     62 ; AVX2-NEXT:    vcvtdq2ps %xmm0, %xmm0
     63 ; AVX2-NEXT:    vaddps %xmm0, %xmm1, %xmm0
     64 ; AVX2-NEXT:    retq
     65 ;
     66 ; AVX512F-LABEL: test_uitofp_v4i32_to_v4f32:
     67 ; AVX512F:       # BB#0:
     68 ; AVX512F-NEXT:    # kill
     69 ; AVX512F-NEXT:    vcvtudq2ps %zmm0, %zmm0
     70 ; AVX512F-NEXT:    # kill
     71 ; AVX512F-NEXT:    retq
     72 ;
     73 ; AVX512VL-LABEL: test_uitofp_v4i32_to_v4f32:
     74 ; AVX512VL:       # BB#0:
     75 ; AVX512VL-NEXT:    vcvtudq2ps %xmm0, %xmm0
     76 ; AVX512VL-NEXT:    retq
     77   %tmp = uitofp <4 x i32> %arg to <4 x float>
     78   ret <4 x float> %tmp
     79 }
     80 
     81 ; AVX: [[MASKCSTADDR_v8:.LCPI[0-9_]+]]:
     82 ; AVX-NEXT: .long 65535 # 0xffff
     83 ; AVX-NEXT: .long 65535 # 0xffff
     84 ; AVX-NEXT: .long 65535 # 0xffff
     85 ; AVX-NEXT: .long 65535 # 0xffff
     86 
     87 ; AVX: [[FPMASKCSTADDR_v8:.LCPI[0-9_]+]]:
     88 ; AVX-NEXT: .long 1199570944 # float 65536
     89 ; AVX-NEXT: .long 1199570944 # float 65536
     90 ; AVX-NEXT: .long 1199570944 # float 65536
     91 ; AVX-NEXT: .long 1199570944 # float 65536
     92 
     93 ; AVX2: [[FPMASKCSTADDR_v8:.LCPI[0-9_]+]]:
     94 ; AVX2-NEXT: .long 1199570944 # float 65536
     95 
     96 ; AVX2: [[MASKCSTADDR_v8:.LCPI[0-9_]+]]:
     97 ; AVX2-NEXT: .long 65535 # 0xffff
     98 
     99 define <8 x float> @test_uitofp_v8i32_to_v8f32(<8 x i32> %arg) {
    100 ; SSE-LABEL: test_uitofp_v8i32_to_v8f32:
    101 ; SSE:       # BB#0:
    102 ; SSE-NEXT:    movdqa %xmm0, %xmm2
    103 ; SSE-NEXT:    psrld $16, %xmm2
    104 ; SSE-NEXT:    cvtdq2ps %xmm2, %xmm2
    105 ; SSE-NEXT:    movaps {{.*#+}} xmm3 = [6.553600e+04,6.553600e+04,6.553600e+04,6.553600e+04]
    106 ; SSE-NEXT:    mulps %xmm3, %xmm2
    107 ; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [65535,65535,65535,65535]
    108 ; SSE-NEXT:    pand %xmm4, %xmm0
    109 ; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
    110 ; SSE-NEXT:    addps %xmm2, %xmm0
    111 ; SSE-NEXT:    movdqa %xmm1, %xmm2
    112 ; SSE-NEXT:    psrld $16, %xmm2
    113 ; SSE-NEXT:    cvtdq2ps %xmm2, %xmm2
    114 ; SSE-NEXT:    mulps %xmm3, %xmm2
    115 ; SSE-NEXT:    pand %xmm4, %xmm1
    116 ; SSE-NEXT:    cvtdq2ps %xmm1, %xmm1
    117 ; SSE-NEXT:    addps %xmm2, %xmm1
    118 ; SSE-NEXT:    retq
    119 ;
    120 ; AVX-LABEL: test_uitofp_v8i32_to_v8f32:
    121 ; AVX:       # BB#0:
    122 ; AVX-NEXT:    vandps [[MASKCSTADDR_v8]](%rip), %ymm0, %ymm1
    123 ; AVX-NEXT:    vcvtdq2ps %ymm1, %ymm1
    124 ; AVX-NEXT:    vpsrld $16, %xmm0, %xmm2
    125 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0
    126 ; AVX-NEXT:    vpsrld $16, %xmm0, %xmm0
    127 ; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
    128 ; AVX-NEXT:    vcvtdq2ps %ymm0, %ymm0
    129 ; AVX-NEXT:    vmulps [[FPMASKCSTADDR_v8]](%rip), %ymm0, %ymm0
    130 ; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
    131 ; AVX-NEXT:    retq
    132 ;
    133 ; AVX2-LABEL: test_uitofp_v8i32_to_v8f32:
    134 ; AVX2:       # BB#0:
    135 ; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm1
    136 ; AVX2-NEXT:    vcvtdq2ps %ymm1, %ymm1
    137 ; AVX2-NEXT:    vbroadcastss [[FPMASKCSTADDR_v8]](%rip), %ymm2
    138 ; AVX2-NEXT:    vmulps %ymm2, %ymm1, %ymm1
    139 ; AVX2-NEXT:    vpbroadcastd [[MASKCSTADDR_v8]](%rip), %ymm2
    140 ; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
    141 ; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
    142 ; AVX2-NEXT:    vaddps %ymm0, %ymm1, %ymm0
    143 ; AVX2-NEXT:    retq
    144 ;
    145 ; AVX512F-LABEL: test_uitofp_v8i32_to_v8f32:
    146 ; AVX512F:       # BB#0:
    147 ; AVX512F-NEXT:    # kill
    148 ; AVX512F-NEXT:    vcvtudq2ps %zmm0, %zmm0
    149 ; AVX512F-NEXT:    # kill
    150 ; AVX512F-NEXT:    retq
    151 ;
    152 ; AVX512VL-LABEL: test_uitofp_v8i32_to_v8f32:
    153 ; AVX512VL:       # BB#0:
    154 ; AVX512VL-NEXT:    vcvtudq2ps %ymm0, %ymm0
    155 ; AVX512VL-NEXT:    retq
    156   %tmp = uitofp <8 x i32> %arg to <8 x float>
    157   ret <8 x float> %tmp
    158 }
    159