Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=CST %s
      2 ; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse4.1 | FileCheck --check-prefix=CHECK --check-prefix=SSE41 --check-prefix=CST  %s
      3 ; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx | FileCheck --check-prefix=CHECK --check-prefix=AVX --check-prefix=CST %s
      4 ; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx2 | FileCheck --check-prefix=CHECK --check-prefix=AVX2 %s
      5 
      6 ; Check that the constant used in the vectors are the right ones.
      7 ; SSE: [[MASKCSTADDR:LCPI0_[0-9]+]]:
      8 ; SSE-NEXT: .long	65535                   ## 0xffff
      9 ; SSE-NEXT: .long	65535                   ## 0xffff
     10 ; SSE-NEXT: .long	65535                   ## 0xffff
     11 ; SSE-NEXT: .long	65535                   ## 0xffff
     12 
     13 ; CST: [[LOWCSTADDR:LCPI0_[0-9]+]]:
     14 ; CST-NEXT: .long	1258291200              ## 0x4b000000
     15 ; CST-NEXT: .long	1258291200              ## 0x4b000000
     16 ; CST-NEXT: .long	1258291200              ## 0x4b000000
     17 ; CST-NEXT: .long	1258291200              ## 0x4b000000
     18 
     19 ; CST: [[HIGHCSTADDR:LCPI0_[0-9]+]]:
     20 ; CST-NEXT: .long	1392508928              ## 0x53000000
     21 ; CST-NEXT: .long	1392508928              ## 0x53000000
     22 ; CST-NEXT: .long	1392508928              ## 0x53000000
     23 ; CST-NEXT: .long	1392508928              ## 0x53000000
     24 
     25 ; CST: [[MAGICCSTADDR:LCPI0_[0-9]+]]:
     26 ; CST-NEXT: .long	3539992704              ## float -5.49764202E+11
     27 ; CST-NEXT: .long	3539992704              ## float -5.49764202E+11
     28 ; CST-NEXT: .long	3539992704              ## float -5.49764202E+11
     29 ; CST-NEXT: .long	3539992704              ## float -5.49764202E+11
     30 
     31 ; AVX2: [[LOWCSTADDR:LCPI0_[0-9]+]]:
     32 ; AVX2-NEXT: .long	1258291200              ## 0x4b000000
     33 
     34 ; AVX2: [[HIGHCSTADDR:LCPI0_[0-9]+]]:
     35 ; AVX2-NEXT: .long	1392508928              ## 0x53000000
     36 
     37 ; AVX2: [[MAGICCSTADDR:LCPI0_[0-9]+]]:
     38 ; AVX2-NEXT: .long	3539992704              ## float -5.49764202E+11
     39 
     40 define <4 x float> @test1(<4 x i32> %A) nounwind {
     41 ; CHECK-LABEL: test1:
     42 ;
     43 ; SSE: movdqa [[MASKCSTADDR]](%rip), [[MASK:%xmm[0-9]+]]
     44 ; SSE-NEXT: pand %xmm0, [[MASK]]
     45 ; After this instruction, MASK will have the value of the low parts
     46 ; of the vector.
     47 ; SSE-NEXT: por [[LOWCSTADDR]](%rip), [[MASK]]
     48 ; SSE-NEXT: psrld $16, %xmm0
     49 ; SSE-NEXT: por [[HIGHCSTADDR]](%rip), %xmm0
     50 ; SSE-NEXT: addps [[MAGICCSTADDR]](%rip), %xmm0
     51 ; SSE-NEXT: addps [[MASK]], %xmm0
     52 ; SSE-NEXT: retq
     53 ;
     54 ; Currently we commute the arguments of the first blend, but this could be
     55 ; improved to match the lowering of the second blend.
     56 ; SSE41: movdqa [[LOWCSTADDR]](%rip), [[LOWVEC:%xmm[0-9]+]]
     57 ; SSE41-NEXT: pblendw $85, %xmm0, [[LOWVEC]]
     58 ; SSE41-NEXT: psrld $16, %xmm0
     59 ; SSE41-NEXT: pblendw $170, [[HIGHCSTADDR]](%rip), %xmm0
     60 ; SSE41-NEXT: addps [[MAGICCSTADDR]](%rip), %xmm0
     61 ; SSE41-NEXT: addps [[LOWVEC]], %xmm0
     62 ; SSE41-NEXT: retq
     63 ;
     64 ; AVX: vpblendw $170, [[LOWCSTADDR]](%rip), %xmm0, [[LOWVEC:%xmm[0-9]+]]
     65 ; AVX-NEXT: vpsrld $16, %xmm0, [[SHIFTVEC:%xmm[0-9]+]]
     66 ; AVX-NEXT: vpblendw $170, [[HIGHCSTADDR]](%rip), [[SHIFTVEC]], [[HIGHVEC:%xmm[0-9]+]]
     67 ; AVX-NEXT: vaddps [[MAGICCSTADDR]](%rip), [[HIGHVEC]], [[TMP:%xmm[0-9]+]]
     68 ; AVX-NEXT: vaddps [[TMP]], [[LOWVEC]], %xmm0
     69 ; AVX-NEXT: retq
     70 ;
     71 ; The lowering for AVX2 is a bit messy, because we select broadcast
     72 ; instructions, instead of folding the constant loads.
     73 ; AVX2: vpbroadcastd [[LOWCSTADDR]](%rip), [[LOWCST:%xmm[0-9]+]]
     74 ; AVX2-NEXT: vpblendw $170, [[LOWCST]], %xmm0, [[LOWVEC:%xmm[0-9]+]]
     75 ; AVX2-NEXT: vpsrld $16, %xmm0, [[SHIFTVEC:%xmm[0-9]+]]
     76 ; AVX2-NEXT: vpbroadcastd [[HIGHCSTADDR]](%rip), [[HIGHCST:%xmm[0-9]+]]
     77 ; AVX2-NEXT: vpblendw $170, [[HIGHCST]], [[SHIFTVEC]], [[HIGHVEC:%xmm[0-9]+]]
     78 ; AVX2-NEXT: vbroadcastss [[MAGICCSTADDR]](%rip), [[MAGICCST:%xmm[0-9]+]]
     79 ; AVX2-NEXT: vaddps [[MAGICCST]], [[HIGHVEC]], [[TMP:%xmm[0-9]+]]
     80 ; AVX2-NEXT: vaddps [[TMP]], [[LOWVEC]], %xmm0
     81 ; AVX2-NEXT: retq
     82   %C = uitofp <4 x i32> %A to <4 x float>
     83   ret <4 x float> %C
     84 }
     85 
     86 ; Match the AVX2 constants used in the next function
     87 ; AVX2: [[LOWCSTADDR:LCPI1_[0-9]+]]:
     88 ; AVX2-NEXT: .long	1258291200              ## 0x4b000000
     89 
     90 ; AVX2: [[HIGHCSTADDR:LCPI1_[0-9]+]]:
     91 ; AVX2-NEXT: .long	1392508928              ## 0x53000000
     92 
     93 ; AVX2: [[MAGICCSTADDR:LCPI1_[0-9]+]]:
     94 ; AVX2-NEXT: .long	3539992704              ## float -5.49764202E+11
     95 
     96 define <8 x float> @test2(<8 x i32> %A) nounwind {
     97 ; CHECK-LABEL: test2:
     98 ; Legalization will break the thing is 2 x <4 x i32> on anthing prior AVX.
     99 ; The constant used for in the vector instruction are shared between the
    100 ; two sequences of instructions.
    101 ;
    102 ; SSE: movdqa {{.*#+}} [[MASK:xmm[0-9]+]] = [65535,65535,65535,65535]
    103 ; SSE-NEXT: movdqa %xmm0, [[VECLOW:%xmm[0-9]+]]
    104 ; SSE-NEXT: pand %[[MASK]], [[VECLOW]]
    105 ; SSE-NEXT: movdqa {{.*#+}} [[LOWCST:xmm[0-9]+]] = [1258291200,1258291200,1258291200,1258291200]
    106 ; SSE-NEXT: por %[[LOWCST]], [[VECLOW]]
    107 ; SSE-NEXT: psrld $16, %xmm0
    108 ; SSE-NEXT: movdqa {{.*#+}} [[HIGHCST:xmm[0-9]+]] = [1392508928,1392508928,1392508928,1392508928]
    109 ; SSE-NEXT: por %[[HIGHCST]], %xmm0
    110 ; SSE-NEXT: movaps {{.*#+}} [[MAGICCST:xmm[0-9]+]] = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11]
    111 ; SSE-NEXT: addps %[[MAGICCST]], %xmm0
    112 ; SSE-NEXT: addps [[VECLOW]], %xmm0
    113 ; MASK is the low vector of the second part after this point.
    114 ; SSE-NEXT: pand %xmm1, %[[MASK]]
    115 ; SSE-NEXT: por %[[LOWCST]], %[[MASK]]
    116 ; SSE-NEXT: psrld $16, %xmm1
    117 ; SSE-NEXT: por %[[HIGHCST]], %xmm1
    118 ; SSE-NEXT: addps %[[MAGICCST]], %xmm1
    119 ; SSE-NEXT: addps %[[MASK]], %xmm1
    120 ; SSE-NEXT: retq
    121 ;
    122 ; SSE41: movdqa {{.*#+}} [[LOWCST:xmm[0-9]+]] = [1258291200,1258291200,1258291200,1258291200]
    123 ; SSE41-NEXT: movdqa %xmm0, [[VECLOW:%xmm[0-9]+]]
    124 ; SSE41-NEXT: pblendw $170, %[[LOWCST]], [[VECLOW]]
    125 ; SSE41-NEXT: psrld $16, %xmm0
    126 ; SSE41-NEXT: movdqa {{.*#+}} [[HIGHCST:xmm[0-9]+]] = [1392508928,1392508928,1392508928,1392508928]
    127 ; SSE41-NEXT: pblendw $170, %[[HIGHCST]], %xmm0
    128 ; SSE41-NEXT: movaps {{.*#+}} [[MAGICCST:xmm[0-9]+]] = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11]
    129 ; SSE41-NEXT: addps %[[MAGICCST]], %xmm0
    130 ; SSE41-NEXT: addps [[VECLOW]], %xmm0
    131 ; LOWCST is the low vector of the second part after this point.
    132 ; The operands of the blend are inverted because we reuse xmm1
    133 ; in the next shift.
    134 ; SSE41-NEXT: pblendw $85, %xmm1, %[[LOWCST]]
    135 ; SSE41-NEXT: psrld $16, %xmm1
    136 ; SSE41-NEXT: pblendw $170, %[[HIGHCST]], %xmm1
    137 ; SSE41-NEXT: addps %[[MAGICCST]], %xmm1
    138 ; SSE41-NEXT: addps %[[LOWCST]], %xmm1
    139 ; SSE41-NEXT: retq
    140 ;
    141 ; Test that we are not lowering uinttofp to scalars
    142 ; AVX-NOT: cvtsd2ss
    143 ; AVX: retq
    144 ;
    145 ; AVX2: vpbroadcastd [[LOWCSTADDR]](%rip), [[LOWCST:%ymm[0-9]+]]
    146 ; AVX2-NEXT: vpblendw $170, [[LOWCST]], %ymm0, [[LOWVEC:%ymm[0-9]+]]
    147 ; AVX2-NEXT: vpsrld $16, %ymm0, [[SHIFTVEC:%ymm[0-9]+]]
    148 ; AVX2-NEXT: vpbroadcastd [[HIGHCSTADDR]](%rip), [[HIGHCST:%ymm[0-9]+]]
    149 ; AVX2-NEXT: vpblendw $170, [[HIGHCST]], [[SHIFTVEC]], [[HIGHVEC:%ymm[0-9]+]]
    150 ; AVX2-NEXT: vbroadcastss [[MAGICCSTADDR]](%rip), [[MAGICCST:%ymm[0-9]+]]
    151 ; AVX2-NEXT: vaddps [[MAGICCST]], [[HIGHVEC]], [[TMP:%ymm[0-9]+]]
    152 ; AVX2-NEXT: vaddps [[TMP]], [[LOWVEC]], %ymm0
    153 ; AVX2-NEXT: retq
    154   %C = uitofp <8 x i32> %A to <8 x float>
    155   ret <8 x float> %C
    156 }
    157 
    158 define <4 x double> @test3(<4 x i32> %arg) {
    159 ; CHECK-LABEL: test3:
    160 ; This test used to crash because we were custom lowering it as if it was
    161 ; a conversion between <4 x i32> and <4 x float>.
    162 ; AVX: vcvtdq2pd
    163 ; AVX2: vcvtdq2pd
    164 ; CHECK: retq
    165   %tmp = uitofp <4 x i32> %arg to <4 x double>
    166   ret <4 x double> %tmp
    167 }
    168