1 ; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=CST %s 2 ; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse4.1 | FileCheck --check-prefix=CHECK --check-prefix=SSE41 --check-prefix=CST %s 3 ; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx | FileCheck --check-prefix=CHECK --check-prefix=AVX --check-prefix=CST %s 4 ; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx2 | FileCheck --check-prefix=CHECK --check-prefix=AVX2 %s 5 6 ; Check that the constant used in the vectors are the right ones. 7 ; SSE: [[MASKCSTADDR:LCPI0_[0-9]+]]: 8 ; SSE-NEXT: .long 65535 ## 0xffff 9 ; SSE-NEXT: .long 65535 ## 0xffff 10 ; SSE-NEXT: .long 65535 ## 0xffff 11 ; SSE-NEXT: .long 65535 ## 0xffff 12 13 ; CST: [[LOWCSTADDR:LCPI0_[0-9]+]]: 14 ; CST-NEXT: .long 1258291200 ## 0x4b000000 15 ; CST-NEXT: .long 1258291200 ## 0x4b000000 16 ; CST-NEXT: .long 1258291200 ## 0x4b000000 17 ; CST-NEXT: .long 1258291200 ## 0x4b000000 18 19 ; CST: [[HIGHCSTADDR:LCPI0_[0-9]+]]: 20 ; CST-NEXT: .long 1392508928 ## 0x53000000 21 ; CST-NEXT: .long 1392508928 ## 0x53000000 22 ; CST-NEXT: .long 1392508928 ## 0x53000000 23 ; CST-NEXT: .long 1392508928 ## 0x53000000 24 25 ; CST: [[MAGICCSTADDR:LCPI0_[0-9]+]]: 26 ; CST-NEXT: .long 3539992704 ## float -5.49764202E+11 27 ; CST-NEXT: .long 3539992704 ## float -5.49764202E+11 28 ; CST-NEXT: .long 3539992704 ## float -5.49764202E+11 29 ; CST-NEXT: .long 3539992704 ## float -5.49764202E+11 30 31 ; AVX2: [[LOWCSTADDR:LCPI0_[0-9]+]]: 32 ; AVX2-NEXT: .long 1258291200 ## 0x4b000000 33 34 ; AVX2: [[HIGHCSTADDR:LCPI0_[0-9]+]]: 35 ; AVX2-NEXT: .long 1392508928 ## 0x53000000 36 37 ; AVX2: [[MAGICCSTADDR:LCPI0_[0-9]+]]: 38 ; AVX2-NEXT: .long 3539992704 ## float -5.49764202E+11 39 40 define <4 x float> @test1(<4 x i32> %A) nounwind { 41 ; CHECK-LABEL: test1: 42 ; 43 ; SSE: movdqa [[MASKCSTADDR]](%rip), [[MASK:%xmm[0-9]+]] 44 ; SSE-NEXT: pand %xmm0, [[MASK]] 45 ; After this instruction, MASK will have the value of the low parts 46 ; of the vector. 47 ; SSE-NEXT: por [[LOWCSTADDR]](%rip), [[MASK]] 48 ; SSE-NEXT: psrld $16, %xmm0 49 ; SSE-NEXT: por [[HIGHCSTADDR]](%rip), %xmm0 50 ; SSE-NEXT: addps [[MAGICCSTADDR]](%rip), %xmm0 51 ; SSE-NEXT: addps [[MASK]], %xmm0 52 ; SSE-NEXT: retq 53 ; 54 ; Currently we commute the arguments of the first blend, but this could be 55 ; improved to match the lowering of the second blend. 56 ; SSE41: movdqa [[LOWCSTADDR]](%rip), [[LOWVEC:%xmm[0-9]+]] 57 ; SSE41-NEXT: pblendw $85, %xmm0, [[LOWVEC]] 58 ; SSE41-NEXT: psrld $16, %xmm0 59 ; SSE41-NEXT: pblendw $170, [[HIGHCSTADDR]](%rip), %xmm0 60 ; SSE41-NEXT: addps [[MAGICCSTADDR]](%rip), %xmm0 61 ; SSE41-NEXT: addps [[LOWVEC]], %xmm0 62 ; SSE41-NEXT: retq 63 ; 64 ; AVX: vpblendw $170, [[LOWCSTADDR]](%rip), %xmm0, [[LOWVEC:%xmm[0-9]+]] 65 ; AVX-NEXT: vpsrld $16, %xmm0, [[SHIFTVEC:%xmm[0-9]+]] 66 ; AVX-NEXT: vpblendw $170, [[HIGHCSTADDR]](%rip), [[SHIFTVEC]], [[HIGHVEC:%xmm[0-9]+]] 67 ; AVX-NEXT: vaddps [[MAGICCSTADDR]](%rip), [[HIGHVEC]], [[TMP:%xmm[0-9]+]] 68 ; AVX-NEXT: vaddps [[TMP]], [[LOWVEC]], %xmm0 69 ; AVX-NEXT: retq 70 ; 71 ; The lowering for AVX2 is a bit messy, because we select broadcast 72 ; instructions, instead of folding the constant loads. 73 ; AVX2: vpbroadcastd [[LOWCSTADDR]](%rip), [[LOWCST:%xmm[0-9]+]] 74 ; AVX2-NEXT: vpblendw $170, [[LOWCST]], %xmm0, [[LOWVEC:%xmm[0-9]+]] 75 ; AVX2-NEXT: vpsrld $16, %xmm0, [[SHIFTVEC:%xmm[0-9]+]] 76 ; AVX2-NEXT: vpbroadcastd [[HIGHCSTADDR]](%rip), [[HIGHCST:%xmm[0-9]+]] 77 ; AVX2-NEXT: vpblendw $170, [[HIGHCST]], [[SHIFTVEC]], [[HIGHVEC:%xmm[0-9]+]] 78 ; AVX2-NEXT: vbroadcastss [[MAGICCSTADDR]](%rip), [[MAGICCST:%xmm[0-9]+]] 79 ; AVX2-NEXT: vaddps [[MAGICCST]], [[HIGHVEC]], [[TMP:%xmm[0-9]+]] 80 ; AVX2-NEXT: vaddps [[TMP]], [[LOWVEC]], %xmm0 81 ; AVX2-NEXT: retq 82 %C = uitofp <4 x i32> %A to <4 x float> 83 ret <4 x float> %C 84 } 85 86 ; Match the AVX2 constants used in the next function 87 ; AVX2: [[LOWCSTADDR:LCPI1_[0-9]+]]: 88 ; AVX2-NEXT: .long 1258291200 ## 0x4b000000 89 90 ; AVX2: [[HIGHCSTADDR:LCPI1_[0-9]+]]: 91 ; AVX2-NEXT: .long 1392508928 ## 0x53000000 92 93 ; AVX2: [[MAGICCSTADDR:LCPI1_[0-9]+]]: 94 ; AVX2-NEXT: .long 3539992704 ## float -5.49764202E+11 95 96 define <8 x float> @test2(<8 x i32> %A) nounwind { 97 ; CHECK-LABEL: test2: 98 ; Legalization will break the thing is 2 x <4 x i32> on anthing prior AVX. 99 ; The constant used for in the vector instruction are shared between the 100 ; two sequences of instructions. 101 ; 102 ; SSE: movdqa {{.*#+}} [[MASK:xmm[0-9]+]] = [65535,65535,65535,65535] 103 ; SSE-NEXT: movdqa %xmm0, [[VECLOW:%xmm[0-9]+]] 104 ; SSE-NEXT: pand %[[MASK]], [[VECLOW]] 105 ; SSE-NEXT: movdqa {{.*#+}} [[LOWCST:xmm[0-9]+]] = [1258291200,1258291200,1258291200,1258291200] 106 ; SSE-NEXT: por %[[LOWCST]], [[VECLOW]] 107 ; SSE-NEXT: psrld $16, %xmm0 108 ; SSE-NEXT: movdqa {{.*#+}} [[HIGHCST:xmm[0-9]+]] = [1392508928,1392508928,1392508928,1392508928] 109 ; SSE-NEXT: por %[[HIGHCST]], %xmm0 110 ; SSE-NEXT: movaps {{.*#+}} [[MAGICCST:xmm[0-9]+]] = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11] 111 ; SSE-NEXT: addps %[[MAGICCST]], %xmm0 112 ; SSE-NEXT: addps [[VECLOW]], %xmm0 113 ; MASK is the low vector of the second part after this point. 114 ; SSE-NEXT: pand %xmm1, %[[MASK]] 115 ; SSE-NEXT: por %[[LOWCST]], %[[MASK]] 116 ; SSE-NEXT: psrld $16, %xmm1 117 ; SSE-NEXT: por %[[HIGHCST]], %xmm1 118 ; SSE-NEXT: addps %[[MAGICCST]], %xmm1 119 ; SSE-NEXT: addps %[[MASK]], %xmm1 120 ; SSE-NEXT: retq 121 ; 122 ; SSE41: movdqa {{.*#+}} [[LOWCST:xmm[0-9]+]] = [1258291200,1258291200,1258291200,1258291200] 123 ; SSE41-NEXT: movdqa %xmm0, [[VECLOW:%xmm[0-9]+]] 124 ; SSE41-NEXT: pblendw $170, %[[LOWCST]], [[VECLOW]] 125 ; SSE41-NEXT: psrld $16, %xmm0 126 ; SSE41-NEXT: movdqa {{.*#+}} [[HIGHCST:xmm[0-9]+]] = [1392508928,1392508928,1392508928,1392508928] 127 ; SSE41-NEXT: pblendw $170, %[[HIGHCST]], %xmm0 128 ; SSE41-NEXT: movaps {{.*#+}} [[MAGICCST:xmm[0-9]+]] = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11] 129 ; SSE41-NEXT: addps %[[MAGICCST]], %xmm0 130 ; SSE41-NEXT: addps [[VECLOW]], %xmm0 131 ; LOWCST is the low vector of the second part after this point. 132 ; The operands of the blend are inverted because we reuse xmm1 133 ; in the next shift. 134 ; SSE41-NEXT: pblendw $85, %xmm1, %[[LOWCST]] 135 ; SSE41-NEXT: psrld $16, %xmm1 136 ; SSE41-NEXT: pblendw $170, %[[HIGHCST]], %xmm1 137 ; SSE41-NEXT: addps %[[MAGICCST]], %xmm1 138 ; SSE41-NEXT: addps %[[LOWCST]], %xmm1 139 ; SSE41-NEXT: retq 140 ; 141 ; Test that we are not lowering uinttofp to scalars 142 ; AVX-NOT: cvtsd2ss 143 ; AVX: retq 144 ; 145 ; AVX2: vpbroadcastd [[LOWCSTADDR]](%rip), [[LOWCST:%ymm[0-9]+]] 146 ; AVX2-NEXT: vpblendw $170, [[LOWCST]], %ymm0, [[LOWVEC:%ymm[0-9]+]] 147 ; AVX2-NEXT: vpsrld $16, %ymm0, [[SHIFTVEC:%ymm[0-9]+]] 148 ; AVX2-NEXT: vpbroadcastd [[HIGHCSTADDR]](%rip), [[HIGHCST:%ymm[0-9]+]] 149 ; AVX2-NEXT: vpblendw $170, [[HIGHCST]], [[SHIFTVEC]], [[HIGHVEC:%ymm[0-9]+]] 150 ; AVX2-NEXT: vbroadcastss [[MAGICCSTADDR]](%rip), [[MAGICCST:%ymm[0-9]+]] 151 ; AVX2-NEXT: vaddps [[MAGICCST]], [[HIGHVEC]], [[TMP:%ymm[0-9]+]] 152 ; AVX2-NEXT: vaddps [[TMP]], [[LOWVEC]], %ymm0 153 ; AVX2-NEXT: retq 154 %C = uitofp <8 x i32> %A to <8 x float> 155 ret <8 x float> %C 156 } 157 158 define <4 x double> @test3(<4 x i32> %arg) { 159 ; CHECK-LABEL: test3: 160 ; This test used to crash because we were custom lowering it as if it was 161 ; a conversion between <4 x i32> and <4 x float>. 162 ; AVX: vcvtdq2pd 163 ; AVX2: vcvtdq2pd 164 ; CHECK: retq 165 %tmp = uitofp <4 x i32> %arg to <4 x double> 166 ret <4 x double> %tmp 167 } 168