1 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s 2 3 define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { 4 ; CHECK: addsd 5 %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 6 ret <2 x double> %res 7 } 8 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 9 10 11 define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) { 12 ; CHECK: cmpordpd 13 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 14 ret <2 x double> %res 15 } 16 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 17 18 19 define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) { 20 ; CHECK: cmpordsd 21 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 22 ret <2 x double> %res 23 } 24 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 25 26 27 define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) { 28 ; CHECK: comisd 29 ; CHECK: sete 30 ; CHECK: movzbl 31 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 32 ret i32 %res 33 } 34 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 35 36 37 define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) { 38 ; CHECK: comisd 39 ; CHECK: setae 40 ; CHECK: movzbl 41 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 42 ret i32 %res 43 } 44 declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone 45 46 47 define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) { 48 ; CHECK: comisd 49 ; CHECK: seta 50 ; CHECK: movzbl 51 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 52 ret i32 %res 53 } 54 declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone 55 56 57 define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) { 58 ; CHECK: comisd 59 ; CHECK: setbe 60 ; CHECK: movzbl 61 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 62 ret i32 %res 63 } 64 declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone 65 66 67 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) { 68 ; CHECK: comisd 69 ; CHECK: sbbl %eax, %eax 70 ; CHECK: andl $1, %eax 71 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 72 ret i32 %res 73 } 74 declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone 75 76 77 define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) { 78 ; CHECK: comisd 79 ; CHECK: setne 80 ; CHECK: movzbl 81 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 82 ret i32 %res 83 } 84 declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone 85 86 87 define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { 88 ; CHECK: cvtdq2pd 89 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] 90 ret <2 x double> %res 91 } 92 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 93 94 95 define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { 96 ; CHECK: cvtdq2ps 97 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] 98 ret <4 x float> %res 99 } 100 declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone 101 102 103 define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) { 104 ; CHECK: cvtpd2dq 105 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 106 ret <4 x i32> %res 107 } 108 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 109 110 111 define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) { 112 ; CHECK: cvtpd2ps 113 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] 114 ret <4 x float> %res 115 } 116 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone 117 118 119 define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) { 120 ; CHECK: cvtps2dq 121 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 122 ret <4 x i32> %res 123 } 124 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 125 126 127 define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { 128 ; CHECK: cvtps2pd 129 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] 130 ret <2 x double> %res 131 } 132 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 133 134 135 define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) { 136 ; CHECK: cvtsd2si 137 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1] 138 ret i32 %res 139 } 140 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 141 142 143 define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { 144 ; CHECK: cvtsd2ss 145 ; CHECK-NOT: cvtsd2ss %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}} 146 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] 147 ret <4 x float> %res 148 } 149 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 150 151 152 define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) { 153 ; CHECK: movl 154 ; CHECK: cvtsi2sd 155 %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 156 ret <2 x double> %res 157 } 158 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 159 160 161 define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { 162 ; CHECK: cvtss2sd 163 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] 164 ret <2 x double> %res 165 } 166 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 167 168 169 define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { 170 ; CHECK: cvttpd2dq 171 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 172 ret <4 x i32> %res 173 } 174 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 175 176 177 define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) { 178 ; CHECK: cvttps2dq 179 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 180 ret <4 x i32> %res 181 } 182 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone 183 184 185 define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) { 186 ; CHECK: cvttsd2si 187 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1] 188 ret i32 %res 189 } 190 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 191 192 193 define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) { 194 ; CHECK: divsd 195 %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 196 ret <2 x double> %res 197 } 198 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 199 200 201 202 define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) { 203 ; CHECK: maxpd 204 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 205 ret <2 x double> %res 206 } 207 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 208 209 210 define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) { 211 ; CHECK: maxsd 212 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 213 ret <2 x double> %res 214 } 215 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 216 217 218 define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) { 219 ; CHECK: minpd 220 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 221 ret <2 x double> %res 222 } 223 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 224 225 226 define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) { 227 ; CHECK: minsd 228 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 229 ret <2 x double> %res 230 } 231 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 232 233 234 define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) { 235 ; CHECK: movmskpd 236 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1] 237 ret i32 %res 238 } 239 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone 240 241 242 243 244 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { 245 ; CHECK: test_x86_sse2_mul_sd 246 ; CHECK: mulsd 247 %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 248 ret <2 x double> %res 249 } 250 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 251 252 253 define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { 254 ; CHECK: packssdw 255 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 256 ret <8 x i16> %res 257 } 258 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone 259 260 261 define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { 262 ; CHECK: packsswb 263 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 264 ret <16 x i8> %res 265 } 266 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone 267 268 269 define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { 270 ; CHECK: packuswb 271 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 272 ret <16 x i8> %res 273 } 274 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 275 276 277 define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) { 278 ; CHECK: paddsb 279 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 280 ret <16 x i8> %res 281 } 282 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone 283 284 285 define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) { 286 ; CHECK: paddsw 287 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 288 ret <8 x i16> %res 289 } 290 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone 291 292 293 define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) { 294 ; CHECK: paddusb 295 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 296 ret <16 x i8> %res 297 } 298 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone 299 300 301 define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) { 302 ; CHECK: paddusw 303 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 304 ret <8 x i16> %res 305 } 306 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone 307 308 309 define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) { 310 ; CHECK: pavgb 311 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 312 ret <16 x i8> %res 313 } 314 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone 315 316 317 define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) { 318 ; CHECK: pavgw 319 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 320 ret <8 x i16> %res 321 } 322 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone 323 324 325 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) { 326 ; CHECK: pmaddwd 327 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1] 328 ret <4 x i32> %res 329 } 330 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 331 332 333 define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) { 334 ; CHECK: pmaxsw 335 %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 336 ret <8 x i16> %res 337 } 338 declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone 339 340 341 define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) { 342 ; CHECK: pmaxub 343 %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 344 ret <16 x i8> %res 345 } 346 declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone 347 348 349 define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) { 350 ; CHECK: pminsw 351 %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 352 ret <8 x i16> %res 353 } 354 declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone 355 356 357 define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) { 358 ; CHECK: pminub 359 %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 360 ret <16 x i8> %res 361 } 362 declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone 363 364 365 define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) { 366 ; CHECK: pmovmskb 367 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1] 368 ret i32 %res 369 } 370 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone 371 372 373 define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) { 374 ; CHECK: pmulhw 375 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 376 ret <8 x i16> %res 377 } 378 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone 379 380 381 define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) { 382 ; CHECK: pmulhuw 383 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 384 ret <8 x i16> %res 385 } 386 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone 387 388 389 define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) { 390 ; CHECK: pmuludq 391 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 392 ret <2 x i64> %res 393 } 394 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone 395 396 397 define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) { 398 ; CHECK: psadbw 399 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1] 400 ret <2 x i64> %res 401 } 402 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone 403 404 405 define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) { 406 ; CHECK: pslld 407 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 408 ret <4 x i32> %res 409 } 410 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 411 412 413 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) { 414 ; CHECK: pslldq 415 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 416 ret <2 x i64> %res 417 } 418 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone 419 420 421 define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) { 422 ; CHECK: pslldq 423 %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 424 ret <2 x i64> %res 425 } 426 declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone 427 428 429 define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) { 430 ; CHECK: psllq 431 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 432 ret <2 x i64> %res 433 } 434 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone 435 436 437 define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) { 438 ; CHECK: psllw 439 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 440 ret <8 x i16> %res 441 } 442 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 443 444 445 define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) { 446 ; CHECK: pslld 447 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 448 ret <4 x i32> %res 449 } 450 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone 451 452 453 define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) { 454 ; CHECK: psllq 455 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 456 ret <2 x i64> %res 457 } 458 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone 459 460 461 define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) { 462 ; CHECK: psllw 463 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 464 ret <8 x i16> %res 465 } 466 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone 467 468 469 define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) { 470 ; CHECK: psrad 471 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 472 ret <4 x i32> %res 473 } 474 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone 475 476 477 define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) { 478 ; CHECK: psraw 479 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 480 ret <8 x i16> %res 481 } 482 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 483 484 485 define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) { 486 ; CHECK: psrad 487 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 488 ret <4 x i32> %res 489 } 490 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone 491 492 493 define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) { 494 ; CHECK: psraw 495 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 496 ret <8 x i16> %res 497 } 498 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone 499 500 501 define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) { 502 ; CHECK: psrld 503 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 504 ret <4 x i32> %res 505 } 506 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone 507 508 509 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) { 510 ; CHECK: psrldq 511 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 512 ret <2 x i64> %res 513 } 514 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone 515 516 517 define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) { 518 ; CHECK: psrldq 519 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 520 ret <2 x i64> %res 521 } 522 declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone 523 524 525 define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) { 526 ; CHECK: psrlq 527 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 528 ret <2 x i64> %res 529 } 530 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 531 532 533 define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) { 534 ; CHECK: psrlw 535 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 536 ret <8 x i16> %res 537 } 538 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 539 540 541 define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) { 542 ; CHECK: psrld 543 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 544 ret <4 x i32> %res 545 } 546 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone 547 548 549 define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) { 550 ; CHECK: psrlq 551 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 552 ret <2 x i64> %res 553 } 554 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone 555 556 557 define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) { 558 ; CHECK: psrlw 559 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 560 ret <8 x i16> %res 561 } 562 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone 563 564 565 define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) { 566 ; CHECK: psubsb 567 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 568 ret <16 x i8> %res 569 } 570 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone 571 572 573 define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) { 574 ; CHECK: psubsw 575 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 576 ret <8 x i16> %res 577 } 578 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone 579 580 581 define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) { 582 ; CHECK: psubusb 583 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 584 ret <16 x i8> %res 585 } 586 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone 587 588 589 define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) { 590 ; CHECK: psubusw 591 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 592 ret <8 x i16> %res 593 } 594 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone 595 596 597 define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) { 598 ; CHECK: sqrtpd 599 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 600 ret <2 x double> %res 601 } 602 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 603 604 605 define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { 606 ; CHECK: sqrtsd 607 %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 608 ret <2 x double> %res 609 } 610 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 611 612 613 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { 614 ; CHECK: test_x86_sse2_storel_dq 615 ; CHECK: movl 616 ; CHECK: movq 617 call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) 618 ret void 619 } 620 declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind 621 622 623 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { 624 ; CHECK: test_x86_sse2_storeu_dq 625 ; CHECK: movl 626 ; CHECK: movdqu 627 ; add operation forces the execution domain. 628 %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 629 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2) 630 ret void 631 } 632 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind 633 634 635 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { 636 ; CHECK: test_x86_sse2_storeu_pd 637 ; CHECK: movl 638 ; CHECK: movupd 639 ; fadd operation forces the execution domain. 640 %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000> 641 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2) 642 ret void 643 } 644 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind 645 646 647 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { 648 ; CHECK: test_x86_sse2_sub_sd 649 ; CHECK: subsd 650 %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 651 ret <2 x double> %res 652 } 653 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 654 655 656 define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) { 657 ; CHECK: ucomisd 658 ; CHECK: sete 659 ; CHECK: movzbl 660 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 661 ret i32 %res 662 } 663 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 664 665 666 define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) { 667 ; CHECK: ucomisd 668 ; CHECK: setae 669 ; CHECK: movzbl 670 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 671 ret i32 %res 672 } 673 declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone 674 675 676 define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) { 677 ; CHECK: ucomisd 678 ; CHECK: seta 679 ; CHECK: movzbl 680 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 681 ret i32 %res 682 } 683 declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone 684 685 686 define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) { 687 ; CHECK: ucomisd 688 ; CHECK: setbe 689 ; CHECK: movzbl 690 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 691 ret i32 %res 692 } 693 declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone 694 695 696 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) { 697 ; CHECK: ucomisd 698 ; CHECK: sbbl 699 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 700 ret i32 %res 701 } 702 declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone 703 704 705 define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) { 706 ; CHECK: ucomisd 707 ; CHECK: setne 708 ; CHECK: movzbl 709 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 710 ret i32 %res 711 } 712 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone 713 714 define void @test_x86_sse2_pause() { 715 ; CHECK: pause 716 tail call void @llvm.x86.sse2.pause() 717 ret void 718 } 719 declare void @llvm.x86.sse2.pause() nounwind 720 721 define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) { 722 ; CHECK-LABEL: test_x86_sse2_pshuf_d: 723 ; CHECK: pshufd $27 724 entry: 725 %res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone 726 ret <4 x i32> %res 727 } 728 declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone 729 730 define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) { 731 ; CHECK-LABEL: test_x86_sse2_pshufl_w: 732 ; CHECK: pshuflw $27 733 entry: 734 %res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone 735 ret <8 x i16> %res 736 } 737 declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone 738 739 define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) { 740 ; CHECK-LABEL: test_x86_sse2_pshufh_w: 741 ; CHECK: pshufhw $27 742 entry: 743 %res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone 744 ret <8 x i16> %res 745 } 746 declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone 747