1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32-SSE 3 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X32-AVX 4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64-SSE 5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64-AVX 6 7 ;PR29078 8 9 define <2 x double> @mask_sitofp_2i64_2f64(<2 x i64> %a) nounwind { 10 ; X32-SSE-LABEL: mask_sitofp_2i64_2f64: 11 ; X32-SSE: # %bb.0: 12 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 13 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 14 ; X32-SSE-NEXT: cvtdq2pd %xmm0, %xmm0 15 ; X32-SSE-NEXT: retl 16 ; 17 ; X32-AVX-LABEL: mask_sitofp_2i64_2f64: 18 ; X32-AVX: # %bb.0: 19 ; X32-AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8,9],zero,zero,xmm0[u,u,u,u,u,u,u,u] 20 ; X32-AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 21 ; X32-AVX-NEXT: retl 22 ; 23 ; X64-SSE-LABEL: mask_sitofp_2i64_2f64: 24 ; X64-SSE: # %bb.0: 25 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 26 ; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm0 27 ; X64-SSE-NEXT: cvtdq2pd %xmm0, %xmm0 28 ; X64-SSE-NEXT: retq 29 ; 30 ; X64-AVX-LABEL: mask_sitofp_2i64_2f64: 31 ; X64-AVX: # %bb.0: 32 ; X64-AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8,9],zero,zero,xmm0[u,u,u,u,u,u,u,u] 33 ; X64-AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 34 ; X64-AVX-NEXT: retq 35 %and = and <2 x i64> %a, <i64 255, i64 65535> 36 %cvt = sitofp <2 x i64> %and to <2 x double> 37 ret <2 x double> %cvt 38 } 39 40 define <2 x double> @mask_uitofp_2i64_2f64(<2 x i64> %a) nounwind { 41 ; X32-SSE-LABEL: mask_uitofp_2i64_2f64: 42 ; X32-SSE: # %bb.0: 43 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 44 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 45 ; X32-SSE-NEXT: cvtdq2pd %xmm0, %xmm0 46 ; X32-SSE-NEXT: retl 47 ; 48 ; X32-AVX-LABEL: mask_uitofp_2i64_2f64: 49 ; X32-AVX: # %bb.0: 50 ; X32-AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8,9],zero,zero,xmm0[u,u,u,u,u,u,u,u] 51 ; X32-AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 52 ; X32-AVX-NEXT: retl 53 ; 54 ; X64-SSE-LABEL: mask_uitofp_2i64_2f64: 55 ; X64-SSE: # %bb.0: 56 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 57 ; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm0 58 ; X64-SSE-NEXT: cvtdq2pd %xmm0, %xmm0 59 ; X64-SSE-NEXT: retq 60 ; 61 ; X64-AVX-LABEL: mask_uitofp_2i64_2f64: 62 ; X64-AVX: # %bb.0: 63 ; X64-AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8,9],zero,zero,xmm0[u,u,u,u,u,u,u,u] 64 ; X64-AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 65 ; X64-AVX-NEXT: retq 66 %and = and <2 x i64> %a, <i64 255, i64 65535> 67 %cvt = uitofp <2 x i64> %and to <2 x double> 68 ret <2 x double> %cvt 69 } 70 71 define <4 x float> @mask_sitofp_4i64_4f32(<4 x i64> %a) nounwind { 72 ; X32-SSE-LABEL: mask_sitofp_4i64_4f32: 73 ; X32-SSE: # %bb.0: 74 ; X32-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 75 ; X32-SSE-NEXT: andps {{\.LCPI.*}}, %xmm0 76 ; X32-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 77 ; X32-SSE-NEXT: retl 78 ; 79 ; X32-AVX-LABEL: mask_sitofp_4i64_4f32: 80 ; X32-AVX: # %bb.0: 81 ; X32-AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 82 ; X32-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 83 ; X32-AVX-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 84 ; X32-AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 85 ; X32-AVX-NEXT: vzeroupper 86 ; X32-AVX-NEXT: retl 87 ; 88 ; X64-SSE-LABEL: mask_sitofp_4i64_4f32: 89 ; X64-SSE: # %bb.0: 90 ; X64-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 91 ; X64-SSE-NEXT: andps {{.*}}(%rip), %xmm0 92 ; X64-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 93 ; X64-SSE-NEXT: retq 94 ; 95 ; X64-AVX-LABEL: mask_sitofp_4i64_4f32: 96 ; X64-AVX: # %bb.0: 97 ; X64-AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 98 ; X64-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 99 ; X64-AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 100 ; X64-AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 101 ; X64-AVX-NEXT: vzeroupper 102 ; X64-AVX-NEXT: retq 103 %and = and <4 x i64> %a, <i64 127, i64 255, i64 4095, i64 65535> 104 %cvt = sitofp <4 x i64> %and to <4 x float> 105 ret <4 x float> %cvt 106 } 107 108 define <4 x float> @mask_uitofp_4i64_4f32(<4 x i64> %a) nounwind { 109 ; X32-SSE-LABEL: mask_uitofp_4i64_4f32: 110 ; X32-SSE: # %bb.0: 111 ; X32-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 112 ; X32-SSE-NEXT: andps {{\.LCPI.*}}, %xmm0 113 ; X32-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 114 ; X32-SSE-NEXT: retl 115 ; 116 ; X32-AVX-LABEL: mask_uitofp_4i64_4f32: 117 ; X32-AVX: # %bb.0: 118 ; X32-AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 119 ; X32-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 120 ; X32-AVX-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 121 ; X32-AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 122 ; X32-AVX-NEXT: vzeroupper 123 ; X32-AVX-NEXT: retl 124 ; 125 ; X64-SSE-LABEL: mask_uitofp_4i64_4f32: 126 ; X64-SSE: # %bb.0: 127 ; X64-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 128 ; X64-SSE-NEXT: andps {{.*}}(%rip), %xmm0 129 ; X64-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 130 ; X64-SSE-NEXT: retq 131 ; 132 ; X64-AVX-LABEL: mask_uitofp_4i64_4f32: 133 ; X64-AVX: # %bb.0: 134 ; X64-AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 135 ; X64-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 136 ; X64-AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 137 ; X64-AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 138 ; X64-AVX-NEXT: vzeroupper 139 ; X64-AVX-NEXT: retq 140 %and = and <4 x i64> %a, <i64 127, i64 255, i64 4095, i64 65535> 141 %cvt = uitofp <4 x i64> %and to <4 x float> 142 ret <4 x float> %cvt 143 } 144 145 define <2 x double> @clamp_sitofp_2i64_2f64(<2 x i64> %a) nounwind { 146 ; X32-SSE-LABEL: clamp_sitofp_2i64_2f64: 147 ; X32-SSE: # %bb.0: 148 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,0,2147483648,0] 149 ; X32-SSE-NEXT: movdqa %xmm0, %xmm2 150 ; X32-SSE-NEXT: pxor %xmm1, %xmm2 151 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [2147483393,4294967295,2147483393,4294967295] 152 ; X32-SSE-NEXT: movdqa %xmm2, %xmm4 153 ; X32-SSE-NEXT: pcmpgtd %xmm3, %xmm4 154 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 155 ; X32-SSE-NEXT: pcmpeqd %xmm3, %xmm2 156 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 157 ; X32-SSE-NEXT: pand %xmm5, %xmm2 158 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 159 ; X32-SSE-NEXT: por %xmm2, %xmm3 160 ; X32-SSE-NEXT: pand %xmm3, %xmm0 161 ; X32-SSE-NEXT: pandn {{\.LCPI.*}}, %xmm3 162 ; X32-SSE-NEXT: por %xmm0, %xmm3 163 ; X32-SSE-NEXT: pxor %xmm3, %xmm1 164 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm0 = [2147483903,0,2147483903,0] 165 ; X32-SSE-NEXT: movdqa %xmm0, %xmm2 166 ; X32-SSE-NEXT: pcmpgtd %xmm1, %xmm2 167 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] 168 ; X32-SSE-NEXT: pcmpeqd %xmm0, %xmm1 169 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 170 ; X32-SSE-NEXT: pand %xmm4, %xmm0 171 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 172 ; X32-SSE-NEXT: por %xmm0, %xmm1 173 ; X32-SSE-NEXT: pand %xmm1, %xmm3 174 ; X32-SSE-NEXT: pandn {{\.LCPI.*}}, %xmm1 175 ; X32-SSE-NEXT: por %xmm3, %xmm1 176 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 177 ; X32-SSE-NEXT: cvtdq2pd %xmm0, %xmm0 178 ; X32-SSE-NEXT: retl 179 ; 180 ; X32-AVX-LABEL: clamp_sitofp_2i64_2f64: 181 ; X32-AVX: # %bb.0: 182 ; X32-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967041,4294967295,4294967041,4294967295] 183 ; X32-AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 184 ; X32-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 185 ; X32-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [255,0,255,0] 186 ; X32-AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 187 ; X32-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 188 ; X32-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] 189 ; X32-AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 190 ; X32-AVX-NEXT: retl 191 ; 192 ; X64-SSE-LABEL: clamp_sitofp_2i64_2f64: 193 ; X64-SSE: # %bb.0: 194 ; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,0,2147483648,0] 195 ; X64-SSE-NEXT: movdqa %xmm0, %xmm2 196 ; X64-SSE-NEXT: pxor %xmm1, %xmm2 197 ; X64-SSE-NEXT: movdqa {{.*#+}} xmm3 = [18446744071562067713,18446744071562067713] 198 ; X64-SSE-NEXT: movdqa %xmm2, %xmm4 199 ; X64-SSE-NEXT: pcmpgtd %xmm3, %xmm4 200 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 201 ; X64-SSE-NEXT: pcmpeqd %xmm3, %xmm2 202 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 203 ; X64-SSE-NEXT: pand %xmm5, %xmm2 204 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 205 ; X64-SSE-NEXT: por %xmm2, %xmm3 206 ; X64-SSE-NEXT: pand %xmm3, %xmm0 207 ; X64-SSE-NEXT: pandn {{.*}}(%rip), %xmm3 208 ; X64-SSE-NEXT: por %xmm0, %xmm3 209 ; X64-SSE-NEXT: pxor %xmm3, %xmm1 210 ; X64-SSE-NEXT: movdqa {{.*#+}} xmm0 = [2147483903,2147483903] 211 ; X64-SSE-NEXT: movdqa %xmm0, %xmm2 212 ; X64-SSE-NEXT: pcmpgtd %xmm1, %xmm2 213 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] 214 ; X64-SSE-NEXT: pcmpeqd %xmm0, %xmm1 215 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 216 ; X64-SSE-NEXT: pand %xmm4, %xmm0 217 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 218 ; X64-SSE-NEXT: por %xmm0, %xmm1 219 ; X64-SSE-NEXT: pand %xmm1, %xmm3 220 ; X64-SSE-NEXT: pandn {{.*}}(%rip), %xmm1 221 ; X64-SSE-NEXT: por %xmm3, %xmm1 222 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 223 ; X64-SSE-NEXT: cvtdq2pd %xmm0, %xmm0 224 ; X64-SSE-NEXT: retq 225 ; 226 ; X64-AVX-LABEL: clamp_sitofp_2i64_2f64: 227 ; X64-AVX: # %bb.0: 228 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551361,18446744073709551361] 229 ; X64-AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 230 ; X64-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 231 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255] 232 ; X64-AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 233 ; X64-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 234 ; X64-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] 235 ; X64-AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 236 ; X64-AVX-NEXT: retq 237 %clo = icmp slt <2 x i64> %a, <i64 -255, i64 -255> 238 %lo = select <2 x i1> %clo, <2 x i64> <i64 -255, i64 -255>, <2 x i64> %a 239 %chi = icmp sgt <2 x i64> %lo, <i64 255, i64 255> 240 %hi = select <2 x i1> %chi, <2 x i64> <i64 255, i64 255>, <2 x i64> %lo 241 %cvt = sitofp <2 x i64> %hi to <2 x double> 242 ret <2 x double> %cvt 243 } 244