1 ; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s 2 ; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=knl -cost-model -analyze < %s | FileCheck --check-prefix=AVX512F %s 3 4 define <2 x double> @sitofpv2i8v2double(<2 x i8> %a) { 5 ; SSE2: sitofpv2i8v2double 6 ; SSE2: cost of 20 {{.*}} sitofp 7 %1 = sitofp <2 x i8> %a to <2 x double> 8 ret <2 x double> %1 9 } 10 11 define <4 x double> @sitofpv4i8v4double(<4 x i8> %a) { 12 ; SSE2: sitofpv4i8v4double 13 ; SSE2: cost of 40 {{.*}} sitofp 14 %1 = sitofp <4 x i8> %a to <4 x double> 15 ret <4 x double> %1 16 } 17 18 define <8 x double> @sitofpv8i8v8double(<8 x i8> %a) { 19 ; SSE2: sitofpv8i8v8double 20 ; SSE2: cost of 80 {{.*}} sitofp 21 %1 = sitofp <8 x i8> %a to <8 x double> 22 ret <8 x double> %1 23 } 24 25 define <16 x double> @sitofpv16i8v16double(<16 x i8> %a) { 26 ; SSE2: sitofpv16i8v16double 27 ; SSE2: cost of 160 {{.*}} sitofp 28 %1 = sitofp <16 x i8> %a to <16 x double> 29 ret <16 x double> %1 30 } 31 32 define <32 x double> @sitofpv32i8v32double(<32 x i8> %a) { 33 ; SSE2: sitofpv32i8v32double 34 ; SSE2: cost of 320 {{.*}} sitofp 35 %1 = sitofp <32 x i8> %a to <32 x double> 36 ret <32 x double> %1 37 } 38 39 define <2 x double> @sitofpv2i16v2double(<2 x i16> %a) { 40 ; SSE2: sitofpv2i16v2double 41 ; SSE2: cost of 20 {{.*}} sitofp 42 %1 = sitofp <2 x i16> %a to <2 x double> 43 ret <2 x double> %1 44 } 45 46 define <4 x double> @sitofpv4i16v4double(<4 x i16> %a) { 47 ; SSE2: sitofpv4i16v4double 48 ; SSE2: cost of 40 {{.*}} sitofp 49 %1 = sitofp <4 x i16> %a to <4 x double> 50 ret <4 x double> %1 51 } 52 53 define <8 x double> @sitofpv8i16v8double(<8 x i16> %a) { 54 ; SSE2: sitofpv8i16v8double 55 ; SSE2: cost of 80 {{.*}} sitofp 56 %1 = sitofp <8 x i16> %a to <8 x double> 57 ret <8 x double> %1 58 } 59 60 define <16 x double> @sitofpv16i16v16double(<16 x i16> %a) { 61 ; SSE2: sitofpv16i16v16double 62 ; SSE2: cost of 160 {{.*}} sitofp 63 %1 = sitofp <16 x i16> %a to <16 x double> 64 ret <16 x double> %1 65 } 66 67 define <32 x double> @sitofpv32i16v32double(<32 x i16> %a) { 68 ; SSE2: sitofpv32i16v32double 69 ; SSE2: cost of 320 {{.*}} sitofp 70 %1 = sitofp <32 x i16> %a to <32 x double> 71 ret <32 x double> %1 72 } 73 74 define <2 x double> @sitofpv2i32v2double(<2 x i32> %a) { 75 ; SSE2: sitofpv2i32v2double 76 ; SSE2: cost of 20 {{.*}} sitofp 77 %1 = sitofp <2 x i32> %a to <2 x double> 78 ret <2 x double> %1 79 } 80 81 define <4 x double> @sitofpv4i32v4double(<4 x i32> %a) { 82 ; SSE2: sitofpv4i32v4double 83 ; SSE2: cost of 40 {{.*}} sitofp 84 %1 = sitofp <4 x i32> %a to <4 x double> 85 ret <4 x double> %1 86 } 87 88 define <8 x double> @sitofpv8i32v8double(<8 x i32> %a) { 89 ; SSE2: sitofpv8i32v8double 90 ; SSE2: cost of 80 {{.*}} sitofp 91 %1 = sitofp <8 x i32> %a to <8 x double> 92 ret <8 x double> %1 93 } 94 95 define <16 x double> @sitofpv16i32v16double(<16 x i32> %a) { 96 ; SSE2: sitofpv16i32v16double 97 ; SSE2: cost of 160 {{.*}} sitofp 98 %1 = sitofp <16 x i32> %a to <16 x double> 99 ret <16 x double> %1 100 } 101 102 define <32 x double> @sitofpv32i32v32double(<32 x i32> %a) { 103 ; SSE2: sitofpv32i32v32double 104 ; SSE2: cost of 320 {{.*}} sitofp 105 %1 = sitofp <32 x i32> %a to <32 x double> 106 ret <32 x double> %1 107 } 108 109 define <2 x double> @sitofpv2i64v2double(<2 x i64> %a) { 110 ; SSE2: sitofpv2i64v2double 111 ; SSE2: cost of 20 {{.*}} sitofp 112 %1 = sitofp <2 x i64> %a to <2 x double> 113 ret <2 x double> %1 114 } 115 116 define <4 x double> @sitofpv4i64v4double(<4 x i64> %a) { 117 ; SSE2: sitofpv4i64v4double 118 ; SSE2: cost of 40 {{.*}} sitofp 119 %1 = sitofp <4 x i64> %a to <4 x double> 120 ret <4 x double> %1 121 } 122 123 define <8 x double> @sitofpv8i64v8double(<8 x i64> %a) { 124 %1 = sitofp <8 x i64> %a to <8 x double> 125 ; SSE2: sitofpv8i64v8double 126 ; SSE2: cost of 80 {{.*}} sitofp 127 ret <8 x double> %1 128 } 129 130 define <16 x double> @sitofpv16i64v16double(<16 x i64> %a) { 131 ; SSE2: sitofpv16i64v16double 132 ; SSE2: cost of 160 {{.*}} sitofp 133 %1 = sitofp <16 x i64> %a to <16 x double> 134 ret <16 x double> %1 135 } 136 137 define <32 x double> @sitofpv32i64v32double(<32 x i64> %a) { 138 ; SSE2: sitofpv32i64v32double 139 ; SSE2: cost of 320 {{.*}} sitofp 140 %1 = sitofp <32 x i64> %a to <32 x double> 141 ret <32 x double> %1 142 } 143 144 define <2 x float> @sitofpv2i8v2float(<2 x i8> %a) { 145 ; SSE2: sitofpv2i8v2float 146 ; SSE2: cost of 15 {{.*}} sitofp 147 %1 = sitofp <2 x i8> %a to <2 x float> 148 ret <2 x float> %1 149 } 150 151 define <4 x float> @sitofpv4i8v4float(<4 x i8> %a) { 152 ; SSE2: sitofpv4i8v4float 153 ; SSE2: cost of 15 {{.*}} sitofp 154 %1 = sitofp <4 x i8> %a to <4 x float> 155 ret <4 x float> %1 156 } 157 158 define <8 x float> @sitofpv8i8v8float(<8 x i8> %a) { 159 ; SSE2: sitofpv8i8v8float 160 ; SSE2: cost of 15 {{.*}} sitofp 161 %1 = sitofp <8 x i8> %a to <8 x float> 162 ret <8 x float> %1 163 } 164 165 define <16 x float> @sitofpv16i8v16float(<16 x i8> %a) { 166 ; SSE2: sitofpv16i8v16float 167 ; SSE2: cost of 8 {{.*}} sitofp 168 %1 = sitofp <16 x i8> %a to <16 x float> 169 ret <16 x float> %1 170 } 171 172 define <32 x float> @sitofpv32i8v32float(<32 x i8> %a) { 173 ; SSE2: sitofpv32i8v32float 174 ; SSE2: cost of 16 {{.*}} sitofp 175 %1 = sitofp <32 x i8> %a to <32 x float> 176 ret <32 x float> %1 177 } 178 179 define <2 x float> @sitofpv2i16v2float(<2 x i16> %a) { 180 ; SSE2: sitofpv2i16v2float 181 ; SSE2: cost of 15 {{.*}} sitofp 182 %1 = sitofp <2 x i16> %a to <2 x float> 183 ret <2 x float> %1 184 } 185 186 define <4 x float> @sitofpv4i16v4float(<4 x i16> %a) { 187 ; SSE2: sitofpv4i16v4float 188 ; SSE2: cost of 15 {{.*}} sitofp 189 %1 = sitofp <4 x i16> %a to <4 x float> 190 ret <4 x float> %1 191 } 192 193 define <8 x float> @sitofpv8i16v8float(<8 x i16> %a) { 194 ; SSE2: sitofpv8i16v8float 195 ; SSE2: cost of 15 {{.*}} sitofp 196 %1 = sitofp <8 x i16> %a to <8 x float> 197 ret <8 x float> %1 198 } 199 200 define <16 x float> @sitofpv16i16v16float(<16 x i16> %a) { 201 ; SSE2: sitofpv16i16v16float 202 ; SSE2: cost of 30 {{.*}} sitofp 203 %1 = sitofp <16 x i16> %a to <16 x float> 204 ret <16 x float> %1 205 } 206 207 define <32 x float> @sitofpv32i16v32float(<32 x i16> %a) { 208 ; SSE2: sitofpv32i16v32float 209 ; SSE2: cost of 60 {{.*}} sitofp 210 %1 = sitofp <32 x i16> %a to <32 x float> 211 ret <32 x float> %1 212 } 213 214 define <2 x float> @sitofpv2i32v2float(<2 x i32> %a) { 215 ; SSE2: sitofpv2i32v2float 216 ; SSE2: cost of 15 {{.*}} sitofp 217 %1 = sitofp <2 x i32> %a to <2 x float> 218 ret <2 x float> %1 219 } 220 221 define <4 x float> @sitofpv4i32v4float(<4 x i32> %a) { 222 ; SSE2: sitofpv4i32v4float 223 ; SSE2: cost of 15 {{.*}} sitofp 224 %1 = sitofp <4 x i32> %a to <4 x float> 225 ret <4 x float> %1 226 } 227 228 define <8 x float> @sitofpv8i32v8float(<8 x i32> %a) { 229 ; SSE2: sitofpv8i32v8float 230 ; SSE2: cost of 30 {{.*}} sitofp 231 %1 = sitofp <8 x i32> %a to <8 x float> 232 ret <8 x float> %1 233 } 234 235 define <16 x float> @sitofpv16i32v16float(<16 x i32> %a) { 236 ; SSE2: sitofpv16i32v16float 237 ; SSE2: cost of 60 {{.*}} sitofp 238 %1 = sitofp <16 x i32> %a to <16 x float> 239 ret <16 x float> %1 240 } 241 242 define <32 x float> @sitofpv32i32v32float(<32 x i32> %a) { 243 ; SSE2: sitofpv32i32v32float 244 ; SSE2: cost of 120 {{.*}} sitofp 245 %1 = sitofp <32 x i32> %a to <32 x float> 246 ret <32 x float> %1 247 } 248 249 define <2 x float> @sitofpv2i64v2float(<2 x i64> %a) { 250 ; SSE2: sitofpv2i64v2float 251 ; SSE2: cost of 15 {{.*}} sitofp 252 %1 = sitofp <2 x i64> %a to <2 x float> 253 ret <2 x float> %1 254 } 255 256 define <4 x float> @sitofpv4i64v4float(<4 x i64> %a) { 257 ; SSE2: sitofpv4i64v4float 258 ; SSE2: cost of 30 {{.*}} sitofp 259 %1 = sitofp <4 x i64> %a to <4 x float> 260 ret <4 x float> %1 261 } 262 263 define <8 x float> @sitofpv8i64v8float(<8 x i64> %a) { 264 ; SSE2: sitofpv8i64v8float 265 ; SSE2: cost of 60 {{.*}} sitofp 266 %1 = sitofp <8 x i64> %a to <8 x float> 267 ret <8 x float> %1 268 } 269 270 define <16 x float> @sitofpv16i64v16float(<16 x i64> %a) { 271 ; SSE2: sitofpv16i64v16float 272 ; SSE2: cost of 120 {{.*}} sitofp 273 %1 = sitofp <16 x i64> %a to <16 x float> 274 ret <16 x float> %1 275 } 276 277 define <32 x float> @sitofpv32i64v32float(<32 x i64> %a) { 278 ; SSE2: sitofpv32i64v32float 279 ; SSE2: cost of 240 {{.*}} sitofp 280 %1 = sitofp <32 x i64> %a to <32 x float> 281 ret <32 x float> %1 282 } 283 284 ; AVX512F-LABEL: sitofp_16i8_float 285 ; AVX512F: cost of 2 {{.*}} sitofp 286 define <16 x float> @sitofp_16i8_float(<16 x i8> %a) { 287 %1 = sitofp <16 x i8> %a to <16 x float> 288 ret <16 x float> %1 289 } 290 291 define <16 x float> @sitofp_16i16_float(<16 x i16> %a) { 292 ; AVX512F-LABEL: sitofp_16i16_float 293 ; AVX512F: cost of 2 {{.*}} sitofp 294 %1 = sitofp <16 x i16> %a to <16 x float> 295 ret <16 x float> %1 296 } 297 298 ; AVX512F-LABEL: sitofp_8i8_double 299 ; AVX512F: cost of 2 {{.*}} sitofp 300 define <8 x double> @sitofp_8i8_double(<8 x i8> %a) { 301 %1 = sitofp <8 x i8> %a to <8 x double> 302 ret <8 x double> %1 303 } 304 305 ; AVX512F-LABEL: sitofp_8i16_double 306 ; AVX512F: cost of 2 {{.*}} sitofp 307 define <8 x double> @sitofp_8i16_double(<8 x i16> %a) { 308 %1 = sitofp <8 x i16> %a to <8 x double> 309 ret <8 x double> %1 310 } 311 312 ; AVX512F-LABEL: sitofp_8i1_double 313 ; AVX512F: cost of 4 {{.*}} sitofp 314 define <8 x double> @sitofp_8i1_double(<8 x double> %a) { 315 %cmpres = fcmp ogt <8 x double> %a, zeroinitializer 316 %1 = sitofp <8 x i1> %cmpres to <8 x double> 317 ret <8 x double> %1 318 } 319 320 ; AVX512F-LABEL: sitofp_16i1_float 321 ; AVX512F: cost of 3 {{.*}} sitofp 322 define <16 x float> @sitofp_16i1_float(<16 x float> %a) { 323 %cmpres = fcmp ogt <16 x float> %a, zeroinitializer 324 %1 = sitofp <16 x i1> %cmpres to <16 x float> 325 ret <16 x float> %1 326 } 327