1 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE 2 ; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s 3 4 define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { 5 ; CHECK: addsd 6 %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 7 ret <2 x double> %res 8 } 9 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 10 11 12 define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) { 13 ; CHECK: cmpordpd 14 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 15 ret <2 x double> %res 16 } 17 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 18 19 20 define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) { 21 ; CHECK: cmpordsd 22 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 23 ret <2 x double> %res 24 } 25 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 26 27 28 define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) { 29 ; CHECK: comisd 30 ; CHECK: sete 31 ; CHECK: movzbl 32 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 33 ret i32 %res 34 } 35 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 36 37 38 define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) { 39 ; CHECK: comisd 40 ; CHECK: setae 41 ; CHECK: movzbl 42 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 43 ret i32 %res 44 } 45 declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone 46 47 48 define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) { 49 ; CHECK: comisd 50 ; CHECK: seta 51 ; CHECK: movzbl 52 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 53 ret i32 %res 54 } 55 declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone 56 57 58 define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) { 59 ; CHECK: comisd 60 ; CHECK: setbe 61 ; CHECK: movzbl 62 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 63 ret i32 %res 64 } 65 declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone 66 67 68 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) { 69 ; CHECK: comisd 70 ; CHECK: sbbl %eax, %eax 71 ; CHECK: andl $1, %eax 72 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 73 ret i32 %res 74 } 75 declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone 76 77 78 define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) { 79 ; CHECK: comisd 80 ; CHECK: setne 81 ; CHECK: movzbl 82 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 83 ret i32 %res 84 } 85 declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone 86 87 88 define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { 89 ; CHECK: cvtdq2pd 90 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] 91 ret <2 x double> %res 92 } 93 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 94 95 96 define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { 97 ; CHECK: cvtdq2ps 98 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] 99 ret <4 x float> %res 100 } 101 declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone 102 103 104 define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) { 105 ; CHECK: cvtpd2dq 106 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 107 ret <4 x i32> %res 108 } 109 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 110 111 112 define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) { 113 ; CHECK: cvtpd2ps 114 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] 115 ret <4 x float> %res 116 } 117 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone 118 119 120 define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) { 121 ; CHECK: cvtps2dq 122 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 123 ret <4 x i32> %res 124 } 125 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 126 127 128 define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { 129 ; CHECK: cvtps2pd 130 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] 131 ret <2 x double> %res 132 } 133 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 134 135 136 define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) { 137 ; CHECK: cvtsd2si 138 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1] 139 ret i32 %res 140 } 141 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 142 143 144 define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { 145 ; CHECK: cvtsd2ss 146 ; SSE-NOT: cvtsd2ss %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}} 147 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] 148 ret <4 x float> %res 149 } 150 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 151 152 153 define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) { 154 ; CHECK: movl 155 ; CHECK: cvtsi2sd 156 %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 157 ret <2 x double> %res 158 } 159 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 160 161 162 define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { 163 ; CHECK: cvtss2sd 164 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] 165 ret <2 x double> %res 166 } 167 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 168 169 170 define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { 171 ; CHECK: cvttpd2dq 172 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 173 ret <4 x i32> %res 174 } 175 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 176 177 178 define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) { 179 ; CHECK: cvttps2dq 180 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 181 ret <4 x i32> %res 182 } 183 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone 184 185 186 define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) { 187 ; CHECK: cvttsd2si 188 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1] 189 ret i32 %res 190 } 191 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 192 193 194 define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) { 195 ; CHECK: divsd 196 %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 197 ret <2 x double> %res 198 } 199 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 200 201 202 203 define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) { 204 ; CHECK: maxpd 205 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 206 ret <2 x double> %res 207 } 208 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 209 210 211 define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) { 212 ; CHECK: maxsd 213 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 214 ret <2 x double> %res 215 } 216 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 217 218 219 define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) { 220 ; CHECK: minpd 221 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 222 ret <2 x double> %res 223 } 224 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 225 226 227 define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) { 228 ; CHECK: minsd 229 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 230 ret <2 x double> %res 231 } 232 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 233 234 235 define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) { 236 ; CHECK: movmskpd 237 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1] 238 ret i32 %res 239 } 240 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone 241 242 243 244 245 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { 246 ; CHECK: test_x86_sse2_mul_sd 247 ; CHECK: mulsd 248 %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 249 ret <2 x double> %res 250 } 251 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 252 253 254 define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { 255 ; CHECK: packssdw 256 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 257 ret <8 x i16> %res 258 } 259 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone 260 261 262 define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { 263 ; CHECK: packsswb 264 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 265 ret <16 x i8> %res 266 } 267 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone 268 269 270 define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { 271 ; CHECK: packuswb 272 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 273 ret <16 x i8> %res 274 } 275 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 276 277 278 define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) { 279 ; CHECK: paddsb 280 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 281 ret <16 x i8> %res 282 } 283 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone 284 285 286 define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) { 287 ; CHECK: paddsw 288 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 289 ret <8 x i16> %res 290 } 291 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone 292 293 294 define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) { 295 ; CHECK: paddusb 296 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 297 ret <16 x i8> %res 298 } 299 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone 300 301 302 define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) { 303 ; CHECK: paddusw 304 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 305 ret <8 x i16> %res 306 } 307 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone 308 309 310 define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) { 311 ; CHECK: pavgb 312 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 313 ret <16 x i8> %res 314 } 315 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone 316 317 318 define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) { 319 ; CHECK: pavgw 320 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 321 ret <8 x i16> %res 322 } 323 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone 324 325 326 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) { 327 ; CHECK: pmaddwd 328 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1] 329 ret <4 x i32> %res 330 } 331 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 332 333 334 define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) { 335 ; CHECK: pmaxsw 336 %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 337 ret <8 x i16> %res 338 } 339 declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone 340 341 342 define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) { 343 ; CHECK: pmaxub 344 %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 345 ret <16 x i8> %res 346 } 347 declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone 348 349 350 define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) { 351 ; CHECK: pminsw 352 %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 353 ret <8 x i16> %res 354 } 355 declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone 356 357 358 define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) { 359 ; CHECK: pminub 360 %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 361 ret <16 x i8> %res 362 } 363 declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone 364 365 366 define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) { 367 ; CHECK: pmovmskb 368 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1] 369 ret i32 %res 370 } 371 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone 372 373 374 define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) { 375 ; CHECK: pmulhw 376 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 377 ret <8 x i16> %res 378 } 379 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone 380 381 382 define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) { 383 ; CHECK: pmulhuw 384 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 385 ret <8 x i16> %res 386 } 387 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone 388 389 390 define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) { 391 ; CHECK: pmuludq 392 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 393 ret <2 x i64> %res 394 } 395 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone 396 397 398 define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) { 399 ; CHECK: psadbw 400 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1] 401 ret <2 x i64> %res 402 } 403 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone 404 405 406 define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) { 407 ; CHECK: pslld 408 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 409 ret <4 x i32> %res 410 } 411 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 412 413 414 define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) { 415 ; CHECK: psllq 416 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 417 ret <2 x i64> %res 418 } 419 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone 420 421 422 define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) { 423 ; CHECK: psllw 424 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 425 ret <8 x i16> %res 426 } 427 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 428 429 430 define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) { 431 ; CHECK: pslld 432 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 433 ret <4 x i32> %res 434 } 435 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone 436 437 438 define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) { 439 ; CHECK: psllq 440 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 441 ret <2 x i64> %res 442 } 443 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone 444 445 446 define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) { 447 ; CHECK: psllw 448 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 449 ret <8 x i16> %res 450 } 451 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone 452 453 454 define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) { 455 ; CHECK: psrad 456 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 457 ret <4 x i32> %res 458 } 459 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone 460 461 462 define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) { 463 ; CHECK: psraw 464 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 465 ret <8 x i16> %res 466 } 467 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 468 469 470 define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) { 471 ; CHECK: psrad 472 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 473 ret <4 x i32> %res 474 } 475 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone 476 477 478 define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) { 479 ; CHECK: psraw 480 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 481 ret <8 x i16> %res 482 } 483 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone 484 485 486 define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) { 487 ; CHECK: psrld 488 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 489 ret <4 x i32> %res 490 } 491 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone 492 493 494 define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) { 495 ; CHECK: psrlq 496 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 497 ret <2 x i64> %res 498 } 499 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 500 501 502 define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) { 503 ; CHECK: psrlw 504 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 505 ret <8 x i16> %res 506 } 507 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 508 509 510 define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) { 511 ; CHECK: psrld 512 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 513 ret <4 x i32> %res 514 } 515 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone 516 517 518 define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) { 519 ; CHECK: psrlq 520 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 521 ret <2 x i64> %res 522 } 523 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone 524 525 526 define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) { 527 ; CHECK: psrlw 528 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 529 ret <8 x i16> %res 530 } 531 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone 532 533 534 define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) { 535 ; CHECK: psubsb 536 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 537 ret <16 x i8> %res 538 } 539 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone 540 541 542 define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) { 543 ; CHECK: psubsw 544 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 545 ret <8 x i16> %res 546 } 547 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone 548 549 550 define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) { 551 ; CHECK: psubusb 552 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 553 ret <16 x i8> %res 554 } 555 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone 556 557 558 define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) { 559 ; CHECK: psubusw 560 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 561 ret <8 x i16> %res 562 } 563 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone 564 565 566 define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) { 567 ; CHECK: sqrtpd 568 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 569 ret <2 x double> %res 570 } 571 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 572 573 574 define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { 575 ; CHECK: sqrtsd 576 %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 577 ret <2 x double> %res 578 } 579 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 580 581 582 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { 583 ; CHECK: test_x86_sse2_storel_dq 584 ; CHECK: movl 585 ; CHECK: movlps 586 call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) 587 ret void 588 } 589 declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind 590 591 592 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { 593 ; CHECK: test_x86_sse2_storeu_dq 594 ; CHECK: movl 595 ; CHECK: movdqu 596 ; add operation forces the execution domain. 597 %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 598 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2) 599 ret void 600 } 601 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind 602 603 604 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { 605 ; CHECK: test_x86_sse2_storeu_pd 606 ; CHECK: movl 607 ; CHECK: movupd 608 ; fadd operation forces the execution domain. 609 %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000> 610 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2) 611 ret void 612 } 613 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind 614 615 616 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { 617 ; CHECK: test_x86_sse2_sub_sd 618 ; CHECK: subsd 619 %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 620 ret <2 x double> %res 621 } 622 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 623 624 625 define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) { 626 ; CHECK: ucomisd 627 ; CHECK: sete 628 ; CHECK: movzbl 629 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 630 ret i32 %res 631 } 632 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 633 634 635 define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) { 636 ; CHECK: ucomisd 637 ; CHECK: setae 638 ; CHECK: movzbl 639 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 640 ret i32 %res 641 } 642 declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone 643 644 645 define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) { 646 ; CHECK: ucomisd 647 ; CHECK: seta 648 ; CHECK: movzbl 649 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 650 ret i32 %res 651 } 652 declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone 653 654 655 define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) { 656 ; CHECK: ucomisd 657 ; CHECK: setbe 658 ; CHECK: movzbl 659 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 660 ret i32 %res 661 } 662 declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone 663 664 665 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) { 666 ; CHECK: ucomisd 667 ; CHECK: sbbl 668 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 669 ret i32 %res 670 } 671 declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone 672 673 674 define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) { 675 ; CHECK: ucomisd 676 ; CHECK: setne 677 ; CHECK: movzbl 678 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 679 ret i32 %res 680 } 681 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone 682 683 define void @test_x86_sse2_pause() { 684 ; CHECK: pause 685 tail call void @llvm.x86.sse2.pause() 686 ret void 687 } 688 declare void @llvm.x86.sse2.pause() nounwind 689 690 define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) { 691 ; CHECK-LABEL: test_x86_sse2_pshuf_d: 692 ; CHECK: pshufd $27 693 entry: 694 %res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone 695 ret <4 x i32> %res 696 } 697 declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone 698 699 define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) { 700 ; CHECK-LABEL: test_x86_sse2_pshufl_w: 701 ; CHECK: pshuflw $27 702 entry: 703 %res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone 704 ret <8 x i16> %res 705 } 706 declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone 707 708 define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) { 709 ; CHECK-LABEL: test_x86_sse2_pshufh_w: 710 ; CHECK: pshufhw $27 711 entry: 712 %res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone 713 ret <8 x i16> %res 714 } 715 declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone 716