1 ; NOTE: Assertions have been autogenerated by update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=SSE 3 ; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL 4 5 define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { 6 ; SSE-LABEL: test_x86_sse2_add_sd: 7 ; SSE: ## BB#0: 8 ; SSE-NEXT: addsd %xmm1, %xmm0 9 ; SSE-NEXT: retl 10 ; 11 ; KNL-LABEL: test_x86_sse2_add_sd: 12 ; KNL: ## BB#0: 13 ; KNL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 14 ; KNL-NEXT: retl 15 %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 16 ret <2 x double> %res 17 } 18 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 19 20 21 define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) { 22 ; SSE-LABEL: test_x86_sse2_cmp_pd: 23 ; SSE: ## BB#0: 24 ; SSE-NEXT: cmpordpd %xmm1, %xmm0 25 ; SSE-NEXT: retl 26 ; 27 ; KNL-LABEL: test_x86_sse2_cmp_pd: 28 ; KNL: ## BB#0: 29 ; KNL-NEXT: vcmpordpd %xmm1, %xmm0, %xmm0 30 ; KNL-NEXT: retl 31 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 32 ret <2 x double> %res 33 } 34 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 35 36 37 define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) { 38 ; SSE-LABEL: test_x86_sse2_cmp_sd: 39 ; SSE: ## BB#0: 40 ; SSE-NEXT: cmpordsd %xmm1, %xmm0 41 ; SSE-NEXT: retl 42 ; 43 ; KNL-LABEL: test_x86_sse2_cmp_sd: 44 ; KNL: ## BB#0: 45 ; KNL-NEXT: vcmpordsd %xmm1, %xmm0, %xmm0 46 ; KNL-NEXT: retl 47 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 48 ret <2 x double> %res 49 } 50 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 51 52 53 define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) { 54 ; SSE-LABEL: test_x86_sse2_comieq_sd: 55 ; SSE: ## BB#0: 56 ; SSE-NEXT: comisd %xmm1, %xmm0 57 ; SSE-NEXT: setnp %al 58 ; SSE-NEXT: sete %cl 59 ; SSE-NEXT: andb %al, %cl 60 ; SSE-NEXT: movzbl %cl, %eax 61 ; SSE-NEXT: retl 62 ; 63 ; KNL-LABEL: test_x86_sse2_comieq_sd: 64 ; KNL: ## BB#0: 65 ; KNL-NEXT: vcomisd %xmm1, %xmm0 66 ; KNL-NEXT: setnp %al 67 ; KNL-NEXT: sete %cl 68 ; KNL-NEXT: andb %al, %cl 69 ; KNL-NEXT: movzbl %cl, %eax 70 ; KNL-NEXT: retl 71 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 72 ret i32 %res 73 } 74 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 75 76 77 define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) { 78 ; SSE-LABEL: test_x86_sse2_comige_sd: 79 ; SSE: ## BB#0: 80 ; SSE-NEXT: xorl %eax, %eax 81 ; SSE-NEXT: comisd %xmm1, %xmm0 82 ; SSE-NEXT: setae %al 83 ; SSE-NEXT: retl 84 ; 85 ; KNL-LABEL: test_x86_sse2_comige_sd: 86 ; KNL: ## BB#0: 87 ; KNL-NEXT: xorl %eax, %eax 88 ; KNL-NEXT: vcomisd %xmm1, %xmm0 89 ; KNL-NEXT: setae %al 90 ; KNL-NEXT: retl 91 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 92 ret i32 %res 93 } 94 declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone 95 96 97 define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) { 98 ; SSE-LABEL: test_x86_sse2_comigt_sd: 99 ; SSE: ## BB#0: 100 ; SSE-NEXT: xorl %eax, %eax 101 ; SSE-NEXT: comisd %xmm1, %xmm0 102 ; SSE-NEXT: seta %al 103 ; SSE-NEXT: retl 104 ; 105 ; KNL-LABEL: test_x86_sse2_comigt_sd: 106 ; KNL: ## BB#0: 107 ; KNL-NEXT: xorl %eax, %eax 108 ; KNL-NEXT: vcomisd %xmm1, %xmm0 109 ; KNL-NEXT: seta %al 110 ; KNL-NEXT: retl 111 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 112 ret i32 %res 113 } 114 declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone 115 116 117 define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) { 118 ; SSE-LABEL: test_x86_sse2_comile_sd: 119 ; SSE: ## BB#0: 120 ; SSE-NEXT: xorl %eax, %eax 121 ; SSE-NEXT: comisd %xmm0, %xmm1 122 ; SSE-NEXT: setae %al 123 ; SSE-NEXT: retl 124 ; 125 ; KNL-LABEL: test_x86_sse2_comile_sd: 126 ; KNL: ## BB#0: 127 ; KNL-NEXT: xorl %eax, %eax 128 ; KNL-NEXT: vcomisd %xmm0, %xmm1 129 ; KNL-NEXT: setae %al 130 ; KNL-NEXT: retl 131 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 132 ret i32 %res 133 } 134 declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone 135 136 137 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) { 138 ; SSE-LABEL: test_x86_sse2_comilt_sd: 139 ; SSE: ## BB#0: 140 ; SSE-NEXT: xorl %eax, %eax 141 ; SSE-NEXT: comisd %xmm0, %xmm1 142 ; SSE-NEXT: seta %al 143 ; SSE-NEXT: retl 144 ; 145 ; KNL-LABEL: test_x86_sse2_comilt_sd: 146 ; KNL: ## BB#0: 147 ; KNL-NEXT: xorl %eax, %eax 148 ; KNL-NEXT: vcomisd %xmm0, %xmm1 149 ; KNL-NEXT: seta %al 150 ; KNL-NEXT: retl 151 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 152 ret i32 %res 153 } 154 declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone 155 156 157 define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) { 158 ; SSE-LABEL: test_x86_sse2_comineq_sd: 159 ; SSE: ## BB#0: 160 ; SSE-NEXT: comisd %xmm1, %xmm0 161 ; SSE-NEXT: setp %al 162 ; SSE-NEXT: setne %cl 163 ; SSE-NEXT: orb %al, %cl 164 ; SSE-NEXT: movzbl %cl, %eax 165 ; SSE-NEXT: retl 166 ; 167 ; KNL-LABEL: test_x86_sse2_comineq_sd: 168 ; KNL: ## BB#0: 169 ; KNL-NEXT: vcomisd %xmm1, %xmm0 170 ; KNL-NEXT: setp %al 171 ; KNL-NEXT: setne %cl 172 ; KNL-NEXT: orb %al, %cl 173 ; KNL-NEXT: movzbl %cl, %eax 174 ; KNL-NEXT: retl 175 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 176 ret i32 %res 177 } 178 declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone 179 180 181 define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { 182 ; SSE-LABEL: test_x86_sse2_cvtdq2ps: 183 ; SSE: ## BB#0: 184 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 185 ; SSE-NEXT: retl 186 ; 187 ; KNL-LABEL: test_x86_sse2_cvtdq2ps: 188 ; KNL: ## BB#0: 189 ; KNL-NEXT: vcvtdq2ps %xmm0, %xmm0 190 ; KNL-NEXT: retl 191 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] 192 ret <4 x float> %res 193 } 194 declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone 195 196 197 define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) { 198 ; SSE-LABEL: test_x86_sse2_cvtpd2dq: 199 ; SSE: ## BB#0: 200 ; SSE-NEXT: cvtpd2dq %xmm0, %xmm0 201 ; SSE-NEXT: retl 202 ; 203 ; KNL-LABEL: test_x86_sse2_cvtpd2dq: 204 ; KNL: ## BB#0: 205 ; KNL-NEXT: vcvtpd2dq %xmm0, %xmm0 206 ; KNL-NEXT: retl 207 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 208 ret <4 x i32> %res 209 } 210 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 211 212 213 define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) { 214 ; SSE-LABEL: test_x86_sse2_cvtpd2ps: 215 ; SSE: ## BB#0: 216 ; SSE-NEXT: cvtpd2ps %xmm0, %xmm0 217 ; SSE-NEXT: retl 218 ; 219 ; KNL-LABEL: test_x86_sse2_cvtpd2ps: 220 ; KNL: ## BB#0: 221 ; KNL-NEXT: vcvtpd2ps %xmm0, %xmm0 222 ; KNL-NEXT: retl 223 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] 224 ret <4 x float> %res 225 } 226 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone 227 228 229 define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) { 230 ; SSE-LABEL: test_x86_sse2_cvtps2dq: 231 ; SSE: ## BB#0: 232 ; SSE-NEXT: cvtps2dq %xmm0, %xmm0 233 ; SSE-NEXT: retl 234 ; 235 ; KNL-LABEL: test_x86_sse2_cvtps2dq: 236 ; KNL: ## BB#0: 237 ; KNL-NEXT: vcvtps2dq %xmm0, %xmm0 238 ; KNL-NEXT: retl 239 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 240 ret <4 x i32> %res 241 } 242 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 243 244 245 define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) { 246 ; SSE-LABEL: test_x86_sse2_cvtsd2si: 247 ; SSE: ## BB#0: 248 ; SSE-NEXT: cvtsd2si %xmm0, %eax 249 ; SSE-NEXT: retl 250 ; 251 ; KNL-LABEL: test_x86_sse2_cvtsd2si: 252 ; KNL: ## BB#0: 253 ; KNL-NEXT: vcvtsd2si %xmm0, %eax 254 ; KNL-NEXT: retl 255 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1] 256 ret i32 %res 257 } 258 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 259 260 261 define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { 262 ; SSE-LABEL: test_x86_sse2_cvtsd2ss: 263 ; SSE: ## BB#0: 264 ; SSE-NEXT: cvtsd2ss %xmm1, %xmm0 265 ; SSE-NEXT: retl 266 ; 267 ; KNL-LABEL: test_x86_sse2_cvtsd2ss: 268 ; KNL: ## BB#0: 269 ; KNL-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 270 ; KNL-NEXT: retl 271 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] 272 ret <4 x float> %res 273 } 274 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 275 276 277 define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0, i32 %a1) { 278 ; SSE-LABEL: test_x86_sse2_cvtsi2sd: 279 ; SSE: ## BB#0: 280 ; SSE-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 281 ; SSE-NEXT: retl 282 ; 283 ; KNL-LABEL: test_x86_sse2_cvtsi2sd: 284 ; KNL: ## BB#0: 285 ; KNL-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 286 ; KNL-NEXT: retl 287 %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 %a1) ; <<2 x double>> [#uses=1] 288 ret <2 x double> %res 289 } 290 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 291 292 293 define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { 294 ; SSE-LABEL: test_x86_sse2_cvtss2sd: 295 ; SSE: ## BB#0: 296 ; SSE-NEXT: cvtss2sd %xmm1, %xmm0 297 ; SSE-NEXT: retl 298 ; 299 ; KNL-LABEL: test_x86_sse2_cvtss2sd: 300 ; KNL: ## BB#0: 301 ; KNL-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 302 ; KNL-NEXT: retl 303 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] 304 ret <2 x double> %res 305 } 306 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 307 308 309 define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { 310 ; SSE-LABEL: test_x86_sse2_cvttpd2dq: 311 ; SSE: ## BB#0: 312 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 313 ; SSE-NEXT: retl 314 ; 315 ; KNL-LABEL: test_x86_sse2_cvttpd2dq: 316 ; KNL: ## BB#0: 317 ; KNL-NEXT: vcvttpd2dq %xmm0, %xmm0 318 ; KNL-NEXT: retl 319 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 320 ret <4 x i32> %res 321 } 322 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 323 324 325 define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) { 326 ; SSE-LABEL: test_x86_sse2_cvttsd2si: 327 ; SSE: ## BB#0: 328 ; SSE-NEXT: cvttsd2si %xmm0, %eax 329 ; SSE-NEXT: retl 330 ; 331 ; KNL-LABEL: test_x86_sse2_cvttsd2si: 332 ; KNL: ## BB#0: 333 ; KNL-NEXT: vcvttsd2si %xmm0, %eax 334 ; KNL-NEXT: retl 335 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1] 336 ret i32 %res 337 } 338 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 339 340 341 define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) { 342 ; SSE-LABEL: test_x86_sse2_div_sd: 343 ; SSE: ## BB#0: 344 ; SSE-NEXT: divsd %xmm1, %xmm0 345 ; SSE-NEXT: retl 346 ; 347 ; KNL-LABEL: test_x86_sse2_div_sd: 348 ; KNL: ## BB#0: 349 ; KNL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 350 ; KNL-NEXT: retl 351 %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 352 ret <2 x double> %res 353 } 354 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 355 356 357 358 define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) { 359 ; SSE-LABEL: test_x86_sse2_max_pd: 360 ; SSE: ## BB#0: 361 ; SSE-NEXT: maxpd %xmm1, %xmm0 362 ; SSE-NEXT: retl 363 ; 364 ; KNL-LABEL: test_x86_sse2_max_pd: 365 ; KNL: ## BB#0: 366 ; KNL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 367 ; KNL-NEXT: retl 368 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 369 ret <2 x double> %res 370 } 371 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 372 373 374 define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) { 375 ; SSE-LABEL: test_x86_sse2_max_sd: 376 ; SSE: ## BB#0: 377 ; SSE-NEXT: maxsd %xmm1, %xmm0 378 ; SSE-NEXT: retl 379 ; 380 ; KNL-LABEL: test_x86_sse2_max_sd: 381 ; KNL: ## BB#0: 382 ; KNL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 383 ; KNL-NEXT: retl 384 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 385 ret <2 x double> %res 386 } 387 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 388 389 390 define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) { 391 ; SSE-LABEL: test_x86_sse2_min_pd: 392 ; SSE: ## BB#0: 393 ; SSE-NEXT: minpd %xmm1, %xmm0 394 ; SSE-NEXT: retl 395 ; 396 ; KNL-LABEL: test_x86_sse2_min_pd: 397 ; KNL: ## BB#0: 398 ; KNL-NEXT: vminpd %xmm1, %xmm0, %xmm0 399 ; KNL-NEXT: retl 400 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 401 ret <2 x double> %res 402 } 403 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 404 405 406 define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) { 407 ; SSE-LABEL: test_x86_sse2_min_sd: 408 ; SSE: ## BB#0: 409 ; SSE-NEXT: minsd %xmm1, %xmm0 410 ; SSE-NEXT: retl 411 ; 412 ; KNL-LABEL: test_x86_sse2_min_sd: 413 ; KNL: ## BB#0: 414 ; KNL-NEXT: vminsd %xmm1, %xmm0, %xmm0 415 ; KNL-NEXT: retl 416 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 417 ret <2 x double> %res 418 } 419 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 420 421 422 define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) { 423 ; SSE-LABEL: test_x86_sse2_movmsk_pd: 424 ; SSE: ## BB#0: 425 ; SSE-NEXT: movmskpd %xmm0, %eax 426 ; SSE-NEXT: retl 427 ; 428 ; KNL-LABEL: test_x86_sse2_movmsk_pd: 429 ; KNL: ## BB#0: 430 ; KNL-NEXT: vmovmskpd %xmm0, %eax 431 ; KNL-NEXT: retl 432 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1] 433 ret i32 %res 434 } 435 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone 436 437 438 439 440 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { 441 ; SSE-LABEL: test_x86_sse2_mul_sd: 442 ; SSE: ## BB#0: 443 ; SSE-NEXT: mulsd %xmm1, %xmm0 444 ; SSE-NEXT: retl 445 ; 446 ; KNL-LABEL: test_x86_sse2_mul_sd: 447 ; KNL: ## BB#0: 448 ; KNL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 449 ; KNL-NEXT: retl 450 %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 451 ret <2 x double> %res 452 } 453 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 454 455 456 define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { 457 ; SSE-LABEL: test_x86_sse2_packssdw_128: 458 ; SSE: ## BB#0: 459 ; SSE-NEXT: packssdw %xmm1, %xmm0 460 ; SSE-NEXT: retl 461 ; 462 ; KNL-LABEL: test_x86_sse2_packssdw_128: 463 ; KNL: ## BB#0: 464 ; KNL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 465 ; KNL-NEXT: retl 466 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 467 ret <8 x i16> %res 468 } 469 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone 470 471 472 define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { 473 ; SSE-LABEL: test_x86_sse2_packsswb_128: 474 ; SSE: ## BB#0: 475 ; SSE-NEXT: packsswb %xmm1, %xmm0 476 ; SSE-NEXT: retl 477 ; 478 ; KNL-LABEL: test_x86_sse2_packsswb_128: 479 ; KNL: ## BB#0: 480 ; KNL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 481 ; KNL-NEXT: retl 482 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 483 ret <16 x i8> %res 484 } 485 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone 486 487 488 define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { 489 ; SSE-LABEL: test_x86_sse2_packuswb_128: 490 ; SSE: ## BB#0: 491 ; SSE-NEXT: packuswb %xmm1, %xmm0 492 ; SSE-NEXT: retl 493 ; 494 ; KNL-LABEL: test_x86_sse2_packuswb_128: 495 ; KNL: ## BB#0: 496 ; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 497 ; KNL-NEXT: retl 498 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 499 ret <16 x i8> %res 500 } 501 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 502 503 504 define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) { 505 ; SSE-LABEL: test_x86_sse2_padds_b: 506 ; SSE: ## BB#0: 507 ; SSE-NEXT: paddsb %xmm1, %xmm0 508 ; SSE-NEXT: retl 509 ; 510 ; KNL-LABEL: test_x86_sse2_padds_b: 511 ; KNL: ## BB#0: 512 ; KNL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 513 ; KNL-NEXT: retl 514 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 515 ret <16 x i8> %res 516 } 517 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone 518 519 520 define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) { 521 ; SSE-LABEL: test_x86_sse2_padds_w: 522 ; SSE: ## BB#0: 523 ; SSE-NEXT: paddsw %xmm1, %xmm0 524 ; SSE-NEXT: retl 525 ; 526 ; KNL-LABEL: test_x86_sse2_padds_w: 527 ; KNL: ## BB#0: 528 ; KNL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 529 ; KNL-NEXT: retl 530 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 531 ret <8 x i16> %res 532 } 533 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone 534 535 536 define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) { 537 ; SSE-LABEL: test_x86_sse2_paddus_b: 538 ; SSE: ## BB#0: 539 ; SSE-NEXT: paddusb %xmm1, %xmm0 540 ; SSE-NEXT: retl 541 ; 542 ; KNL-LABEL: test_x86_sse2_paddus_b: 543 ; KNL: ## BB#0: 544 ; KNL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 545 ; KNL-NEXT: retl 546 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 547 ret <16 x i8> %res 548 } 549 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone 550 551 552 define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) { 553 ; SSE-LABEL: test_x86_sse2_paddus_w: 554 ; SSE: ## BB#0: 555 ; SSE-NEXT: paddusw %xmm1, %xmm0 556 ; SSE-NEXT: retl 557 ; 558 ; KNL-LABEL: test_x86_sse2_paddus_w: 559 ; KNL: ## BB#0: 560 ; KNL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 561 ; KNL-NEXT: retl 562 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 563 ret <8 x i16> %res 564 } 565 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone 566 567 568 define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) { 569 ; SSE-LABEL: test_x86_sse2_pavg_b: 570 ; SSE: ## BB#0: 571 ; SSE-NEXT: pavgb %xmm1, %xmm0 572 ; SSE-NEXT: retl 573 ; 574 ; KNL-LABEL: test_x86_sse2_pavg_b: 575 ; KNL: ## BB#0: 576 ; KNL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 577 ; KNL-NEXT: retl 578 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 579 ret <16 x i8> %res 580 } 581 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone 582 583 584 define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) { 585 ; SSE-LABEL: test_x86_sse2_pavg_w: 586 ; SSE: ## BB#0: 587 ; SSE-NEXT: pavgw %xmm1, %xmm0 588 ; SSE-NEXT: retl 589 ; 590 ; KNL-LABEL: test_x86_sse2_pavg_w: 591 ; KNL: ## BB#0: 592 ; KNL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 593 ; KNL-NEXT: retl 594 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 595 ret <8 x i16> %res 596 } 597 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone 598 599 600 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) { 601 ; SSE-LABEL: test_x86_sse2_pmadd_wd: 602 ; SSE: ## BB#0: 603 ; SSE-NEXT: pmaddwd %xmm1, %xmm0 604 ; SSE-NEXT: retl 605 ; 606 ; KNL-LABEL: test_x86_sse2_pmadd_wd: 607 ; KNL: ## BB#0: 608 ; KNL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 609 ; KNL-NEXT: retl 610 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1] 611 ret <4 x i32> %res 612 } 613 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 614 615 616 define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) { 617 ; SSE-LABEL: test_x86_sse2_pmaxs_w: 618 ; SSE: ## BB#0: 619 ; SSE-NEXT: pmaxsw %xmm1, %xmm0 620 ; SSE-NEXT: retl 621 ; 622 ; KNL-LABEL: test_x86_sse2_pmaxs_w: 623 ; KNL: ## BB#0: 624 ; KNL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 625 ; KNL-NEXT: retl 626 %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 627 ret <8 x i16> %res 628 } 629 declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone 630 631 632 define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) { 633 ; SSE-LABEL: test_x86_sse2_pmaxu_b: 634 ; SSE: ## BB#0: 635 ; SSE-NEXT: pmaxub %xmm1, %xmm0 636 ; SSE-NEXT: retl 637 ; 638 ; KNL-LABEL: test_x86_sse2_pmaxu_b: 639 ; KNL: ## BB#0: 640 ; KNL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 641 ; KNL-NEXT: retl 642 %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 643 ret <16 x i8> %res 644 } 645 declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone 646 647 648 define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) { 649 ; SSE-LABEL: test_x86_sse2_pmins_w: 650 ; SSE: ## BB#0: 651 ; SSE-NEXT: pminsw %xmm1, %xmm0 652 ; SSE-NEXT: retl 653 ; 654 ; KNL-LABEL: test_x86_sse2_pmins_w: 655 ; KNL: ## BB#0: 656 ; KNL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 657 ; KNL-NEXT: retl 658 %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 659 ret <8 x i16> %res 660 } 661 declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone 662 663 664 define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) { 665 ; SSE-LABEL: test_x86_sse2_pminu_b: 666 ; SSE: ## BB#0: 667 ; SSE-NEXT: pminub %xmm1, %xmm0 668 ; SSE-NEXT: retl 669 ; 670 ; KNL-LABEL: test_x86_sse2_pminu_b: 671 ; KNL: ## BB#0: 672 ; KNL-NEXT: vpminub %xmm1, %xmm0, %xmm0 673 ; KNL-NEXT: retl 674 %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 675 ret <16 x i8> %res 676 } 677 declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone 678 679 680 define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) { 681 ; SSE-LABEL: test_x86_sse2_pmovmskb_128: 682 ; SSE: ## BB#0: 683 ; SSE-NEXT: pmovmskb %xmm0, %eax 684 ; SSE-NEXT: retl 685 ; 686 ; KNL-LABEL: test_x86_sse2_pmovmskb_128: 687 ; KNL: ## BB#0: 688 ; KNL-NEXT: vpmovmskb %xmm0, %eax 689 ; KNL-NEXT: retl 690 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1] 691 ret i32 %res 692 } 693 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone 694 695 696 define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) { 697 ; SSE-LABEL: test_x86_sse2_pmulh_w: 698 ; SSE: ## BB#0: 699 ; SSE-NEXT: pmulhw %xmm1, %xmm0 700 ; SSE-NEXT: retl 701 ; 702 ; KNL-LABEL: test_x86_sse2_pmulh_w: 703 ; KNL: ## BB#0: 704 ; KNL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 705 ; KNL-NEXT: retl 706 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 707 ret <8 x i16> %res 708 } 709 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone 710 711 712 define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) { 713 ; SSE-LABEL: test_x86_sse2_pmulhu_w: 714 ; SSE: ## BB#0: 715 ; SSE-NEXT: pmulhuw %xmm1, %xmm0 716 ; SSE-NEXT: retl 717 ; 718 ; KNL-LABEL: test_x86_sse2_pmulhu_w: 719 ; KNL: ## BB#0: 720 ; KNL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 721 ; KNL-NEXT: retl 722 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 723 ret <8 x i16> %res 724 } 725 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone 726 727 728 define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) { 729 ; SSE-LABEL: test_x86_sse2_pmulu_dq: 730 ; SSE: ## BB#0: 731 ; SSE-NEXT: pmuludq %xmm1, %xmm0 732 ; SSE-NEXT: retl 733 ; 734 ; KNL-LABEL: test_x86_sse2_pmulu_dq: 735 ; KNL: ## BB#0: 736 ; KNL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 737 ; KNL-NEXT: retl 738 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 739 ret <2 x i64> %res 740 } 741 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone 742 743 744 define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) { 745 ; SSE-LABEL: test_x86_sse2_psad_bw: 746 ; SSE: ## BB#0: 747 ; SSE-NEXT: psadbw %xmm1, %xmm0 748 ; SSE-NEXT: retl 749 ; 750 ; KNL-LABEL: test_x86_sse2_psad_bw: 751 ; KNL: ## BB#0: 752 ; KNL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 753 ; KNL-NEXT: retl 754 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1] 755 ret <2 x i64> %res 756 } 757 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone 758 759 760 define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) { 761 ; SSE-LABEL: test_x86_sse2_psll_d: 762 ; SSE: ## BB#0: 763 ; SSE-NEXT: pslld %xmm1, %xmm0 764 ; SSE-NEXT: retl 765 ; 766 ; KNL-LABEL: test_x86_sse2_psll_d: 767 ; KNL: ## BB#0: 768 ; KNL-NEXT: vpslld %xmm1, %xmm0, %xmm0 769 ; KNL-NEXT: retl 770 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 771 ret <4 x i32> %res 772 } 773 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 774 775 776 define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) { 777 ; SSE-LABEL: test_x86_sse2_psll_q: 778 ; SSE: ## BB#0: 779 ; SSE-NEXT: psllq %xmm1, %xmm0 780 ; SSE-NEXT: retl 781 ; 782 ; KNL-LABEL: test_x86_sse2_psll_q: 783 ; KNL: ## BB#0: 784 ; KNL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 785 ; KNL-NEXT: retl 786 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 787 ret <2 x i64> %res 788 } 789 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone 790 791 792 define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) { 793 ; SSE-LABEL: test_x86_sse2_psll_w: 794 ; SSE: ## BB#0: 795 ; SSE-NEXT: psllw %xmm1, %xmm0 796 ; SSE-NEXT: retl 797 ; 798 ; KNL-LABEL: test_x86_sse2_psll_w: 799 ; KNL: ## BB#0: 800 ; KNL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 801 ; KNL-NEXT: retl 802 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 803 ret <8 x i16> %res 804 } 805 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 806 807 808 define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) { 809 ; SSE-LABEL: test_x86_sse2_pslli_d: 810 ; SSE: ## BB#0: 811 ; SSE-NEXT: pslld $7, %xmm0 812 ; SSE-NEXT: retl 813 ; 814 ; KNL-LABEL: test_x86_sse2_pslli_d: 815 ; KNL: ## BB#0: 816 ; KNL-NEXT: vpslld $7, %xmm0, %xmm0 817 ; KNL-NEXT: retl 818 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 819 ret <4 x i32> %res 820 } 821 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone 822 823 824 define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) { 825 ; SSE-LABEL: test_x86_sse2_pslli_q: 826 ; SSE: ## BB#0: 827 ; SSE-NEXT: psllq $7, %xmm0 828 ; SSE-NEXT: retl 829 ; 830 ; KNL-LABEL: test_x86_sse2_pslli_q: 831 ; KNL: ## BB#0: 832 ; KNL-NEXT: vpsllq $7, %xmm0, %xmm0 833 ; KNL-NEXT: retl 834 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 835 ret <2 x i64> %res 836 } 837 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone 838 839 840 define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) { 841 ; SSE-LABEL: test_x86_sse2_pslli_w: 842 ; SSE: ## BB#0: 843 ; SSE-NEXT: psllw $7, %xmm0 844 ; SSE-NEXT: retl 845 ; 846 ; KNL-LABEL: test_x86_sse2_pslli_w: 847 ; KNL: ## BB#0: 848 ; KNL-NEXT: vpsllw $7, %xmm0, %xmm0 849 ; KNL-NEXT: retl 850 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 851 ret <8 x i16> %res 852 } 853 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone 854 855 856 define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) { 857 ; SSE-LABEL: test_x86_sse2_psra_d: 858 ; SSE: ## BB#0: 859 ; SSE-NEXT: psrad %xmm1, %xmm0 860 ; SSE-NEXT: retl 861 ; 862 ; KNL-LABEL: test_x86_sse2_psra_d: 863 ; KNL: ## BB#0: 864 ; KNL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 865 ; KNL-NEXT: retl 866 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 867 ret <4 x i32> %res 868 } 869 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone 870 871 872 define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) { 873 ; SSE-LABEL: test_x86_sse2_psra_w: 874 ; SSE: ## BB#0: 875 ; SSE-NEXT: psraw %xmm1, %xmm0 876 ; SSE-NEXT: retl 877 ; 878 ; KNL-LABEL: test_x86_sse2_psra_w: 879 ; KNL: ## BB#0: 880 ; KNL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 881 ; KNL-NEXT: retl 882 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 883 ret <8 x i16> %res 884 } 885 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 886 887 888 define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) { 889 ; SSE-LABEL: test_x86_sse2_psrai_d: 890 ; SSE: ## BB#0: 891 ; SSE-NEXT: psrad $7, %xmm0 892 ; SSE-NEXT: retl 893 ; 894 ; KNL-LABEL: test_x86_sse2_psrai_d: 895 ; KNL: ## BB#0: 896 ; KNL-NEXT: vpsrad $7, %xmm0, %xmm0 897 ; KNL-NEXT: retl 898 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 899 ret <4 x i32> %res 900 } 901 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone 902 903 904 define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) { 905 ; SSE-LABEL: test_x86_sse2_psrai_w: 906 ; SSE: ## BB#0: 907 ; SSE-NEXT: psraw $7, %xmm0 908 ; SSE-NEXT: retl 909 ; 910 ; KNL-LABEL: test_x86_sse2_psrai_w: 911 ; KNL: ## BB#0: 912 ; KNL-NEXT: vpsraw $7, %xmm0, %xmm0 913 ; KNL-NEXT: retl 914 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 915 ret <8 x i16> %res 916 } 917 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone 918 919 920 define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) { 921 ; SSE-LABEL: test_x86_sse2_psrl_d: 922 ; SSE: ## BB#0: 923 ; SSE-NEXT: psrld %xmm1, %xmm0 924 ; SSE-NEXT: retl 925 ; 926 ; KNL-LABEL: test_x86_sse2_psrl_d: 927 ; KNL: ## BB#0: 928 ; KNL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 929 ; KNL-NEXT: retl 930 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 931 ret <4 x i32> %res 932 } 933 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone 934 935 936 define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) { 937 ; SSE-LABEL: test_x86_sse2_psrl_q: 938 ; SSE: ## BB#0: 939 ; SSE-NEXT: psrlq %xmm1, %xmm0 940 ; SSE-NEXT: retl 941 ; 942 ; KNL-LABEL: test_x86_sse2_psrl_q: 943 ; KNL: ## BB#0: 944 ; KNL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 945 ; KNL-NEXT: retl 946 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 947 ret <2 x i64> %res 948 } 949 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 950 951 952 define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) { 953 ; SSE-LABEL: test_x86_sse2_psrl_w: 954 ; SSE: ## BB#0: 955 ; SSE-NEXT: psrlw %xmm1, %xmm0 956 ; SSE-NEXT: retl 957 ; 958 ; KNL-LABEL: test_x86_sse2_psrl_w: 959 ; KNL: ## BB#0: 960 ; KNL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 961 ; KNL-NEXT: retl 962 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 963 ret <8 x i16> %res 964 } 965 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 966 967 968 define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) { 969 ; SSE-LABEL: test_x86_sse2_psrli_d: 970 ; SSE: ## BB#0: 971 ; SSE-NEXT: psrld $7, %xmm0 972 ; SSE-NEXT: retl 973 ; 974 ; KNL-LABEL: test_x86_sse2_psrli_d: 975 ; KNL: ## BB#0: 976 ; KNL-NEXT: vpsrld $7, %xmm0, %xmm0 977 ; KNL-NEXT: retl 978 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 979 ret <4 x i32> %res 980 } 981 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone 982 983 984 define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) { 985 ; SSE-LABEL: test_x86_sse2_psrli_q: 986 ; SSE: ## BB#0: 987 ; SSE-NEXT: psrlq $7, %xmm0 988 ; SSE-NEXT: retl 989 ; 990 ; KNL-LABEL: test_x86_sse2_psrli_q: 991 ; KNL: ## BB#0: 992 ; KNL-NEXT: vpsrlq $7, %xmm0, %xmm0 993 ; KNL-NEXT: retl 994 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 995 ret <2 x i64> %res 996 } 997 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone 998 999 1000 define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) { 1001 ; SSE-LABEL: test_x86_sse2_psrli_w: 1002 ; SSE: ## BB#0: 1003 ; SSE-NEXT: psrlw $7, %xmm0 1004 ; SSE-NEXT: retl 1005 ; 1006 ; KNL-LABEL: test_x86_sse2_psrli_w: 1007 ; KNL: ## BB#0: 1008 ; KNL-NEXT: vpsrlw $7, %xmm0, %xmm0 1009 ; KNL-NEXT: retl 1010 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 1011 ret <8 x i16> %res 1012 } 1013 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone 1014 1015 1016 define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) { 1017 ; SSE-LABEL: test_x86_sse2_psubs_b: 1018 ; SSE: ## BB#0: 1019 ; SSE-NEXT: psubsb %xmm1, %xmm0 1020 ; SSE-NEXT: retl 1021 ; 1022 ; KNL-LABEL: test_x86_sse2_psubs_b: 1023 ; KNL: ## BB#0: 1024 ; KNL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 1025 ; KNL-NEXT: retl 1026 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1027 ret <16 x i8> %res 1028 } 1029 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone 1030 1031 1032 define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) { 1033 ; SSE-LABEL: test_x86_sse2_psubs_w: 1034 ; SSE: ## BB#0: 1035 ; SSE-NEXT: psubsw %xmm1, %xmm0 1036 ; SSE-NEXT: retl 1037 ; 1038 ; KNL-LABEL: test_x86_sse2_psubs_w: 1039 ; KNL: ## BB#0: 1040 ; KNL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 1041 ; KNL-NEXT: retl 1042 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1043 ret <8 x i16> %res 1044 } 1045 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone 1046 1047 1048 define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) { 1049 ; SSE-LABEL: test_x86_sse2_psubus_b: 1050 ; SSE: ## BB#0: 1051 ; SSE-NEXT: psubusb %xmm1, %xmm0 1052 ; SSE-NEXT: retl 1053 ; 1054 ; KNL-LABEL: test_x86_sse2_psubus_b: 1055 ; KNL: ## BB#0: 1056 ; KNL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 1057 ; KNL-NEXT: retl 1058 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1059 ret <16 x i8> %res 1060 } 1061 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone 1062 1063 1064 define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) { 1065 ; SSE-LABEL: test_x86_sse2_psubus_w: 1066 ; SSE: ## BB#0: 1067 ; SSE-NEXT: psubusw %xmm1, %xmm0 1068 ; SSE-NEXT: retl 1069 ; 1070 ; KNL-LABEL: test_x86_sse2_psubus_w: 1071 ; KNL: ## BB#0: 1072 ; KNL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 1073 ; KNL-NEXT: retl 1074 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1075 ret <8 x i16> %res 1076 } 1077 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone 1078 1079 1080 define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) { 1081 ; SSE-LABEL: test_x86_sse2_sqrt_pd: 1082 ; SSE: ## BB#0: 1083 ; SSE-NEXT: sqrtpd %xmm0, %xmm0 1084 ; SSE-NEXT: retl 1085 ; 1086 ; KNL-LABEL: test_x86_sse2_sqrt_pd: 1087 ; KNL: ## BB#0: 1088 ; KNL-NEXT: vsqrtpd %xmm0, %xmm0 1089 ; KNL-NEXT: retl 1090 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 1091 ret <2 x double> %res 1092 } 1093 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 1094 1095 1096 define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { 1097 ; SSE-LABEL: test_x86_sse2_sqrt_sd: 1098 ; SSE: ## BB#0: 1099 ; SSE-NEXT: sqrtsd %xmm0, %xmm0 1100 ; SSE-NEXT: retl 1101 ; 1102 ; KNL-LABEL: test_x86_sse2_sqrt_sd: 1103 ; KNL: ## BB#0: 1104 ; KNL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 1105 ; KNL-NEXT: retl 1106 %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 1107 ret <2 x double> %res 1108 } 1109 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 1110 1111 1112 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { 1113 ; SSE-LABEL: test_x86_sse2_sub_sd: 1114 ; SSE: ## BB#0: 1115 ; SSE-NEXT: subsd %xmm1, %xmm0 1116 ; SSE-NEXT: retl 1117 ; 1118 ; KNL-LABEL: test_x86_sse2_sub_sd: 1119 ; KNL: ## BB#0: 1120 ; KNL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 1121 ; KNL-NEXT: retl 1122 %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 1123 ret <2 x double> %res 1124 } 1125 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 1126 1127 1128 define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) { 1129 ; SSE-LABEL: test_x86_sse2_ucomieq_sd: 1130 ; SSE: ## BB#0: 1131 ; SSE-NEXT: ucomisd %xmm1, %xmm0 1132 ; SSE-NEXT: setnp %al 1133 ; SSE-NEXT: sete %cl 1134 ; SSE-NEXT: andb %al, %cl 1135 ; SSE-NEXT: movzbl %cl, %eax 1136 ; SSE-NEXT: retl 1137 ; 1138 ; KNL-LABEL: test_x86_sse2_ucomieq_sd: 1139 ; KNL: ## BB#0: 1140 ; KNL-NEXT: vucomisd %xmm1, %xmm0 1141 ; KNL-NEXT: setnp %al 1142 ; KNL-NEXT: sete %cl 1143 ; KNL-NEXT: andb %al, %cl 1144 ; KNL-NEXT: movzbl %cl, %eax 1145 ; KNL-NEXT: retl 1146 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 1147 ret i32 %res 1148 } 1149 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 1150 1151 1152 define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) { 1153 ; SSE-LABEL: test_x86_sse2_ucomige_sd: 1154 ; SSE: ## BB#0: 1155 ; SSE-NEXT: xorl %eax, %eax 1156 ; SSE-NEXT: ucomisd %xmm1, %xmm0 1157 ; SSE-NEXT: setae %al 1158 ; SSE-NEXT: retl 1159 ; 1160 ; KNL-LABEL: test_x86_sse2_ucomige_sd: 1161 ; KNL: ## BB#0: 1162 ; KNL-NEXT: xorl %eax, %eax 1163 ; KNL-NEXT: vucomisd %xmm1, %xmm0 1164 ; KNL-NEXT: setae %al 1165 ; KNL-NEXT: retl 1166 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 1167 ret i32 %res 1168 } 1169 declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone 1170 1171 1172 define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) { 1173 ; SSE-LABEL: test_x86_sse2_ucomigt_sd: 1174 ; SSE: ## BB#0: 1175 ; SSE-NEXT: xorl %eax, %eax 1176 ; SSE-NEXT: ucomisd %xmm1, %xmm0 1177 ; SSE-NEXT: seta %al 1178 ; SSE-NEXT: retl 1179 ; 1180 ; KNL-LABEL: test_x86_sse2_ucomigt_sd: 1181 ; KNL: ## BB#0: 1182 ; KNL-NEXT: xorl %eax, %eax 1183 ; KNL-NEXT: vucomisd %xmm1, %xmm0 1184 ; KNL-NEXT: seta %al 1185 ; KNL-NEXT: retl 1186 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 1187 ret i32 %res 1188 } 1189 declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone 1190 1191 1192 define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) { 1193 ; SSE-LABEL: test_x86_sse2_ucomile_sd: 1194 ; SSE: ## BB#0: 1195 ; SSE-NEXT: xorl %eax, %eax 1196 ; SSE-NEXT: ucomisd %xmm0, %xmm1 1197 ; SSE-NEXT: setae %al 1198 ; SSE-NEXT: retl 1199 ; 1200 ; KNL-LABEL: test_x86_sse2_ucomile_sd: 1201 ; KNL: ## BB#0: 1202 ; KNL-NEXT: xorl %eax, %eax 1203 ; KNL-NEXT: vucomisd %xmm0, %xmm1 1204 ; KNL-NEXT: setae %al 1205 ; KNL-NEXT: retl 1206 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 1207 ret i32 %res 1208 } 1209 declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone 1210 1211 1212 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) { 1213 ; SSE-LABEL: test_x86_sse2_ucomilt_sd: 1214 ; SSE: ## BB#0: 1215 ; SSE-NEXT: xorl %eax, %eax 1216 ; SSE-NEXT: ucomisd %xmm0, %xmm1 1217 ; SSE-NEXT: seta %al 1218 ; SSE-NEXT: retl 1219 ; 1220 ; KNL-LABEL: test_x86_sse2_ucomilt_sd: 1221 ; KNL: ## BB#0: 1222 ; KNL-NEXT: xorl %eax, %eax 1223 ; KNL-NEXT: vucomisd %xmm0, %xmm1 1224 ; KNL-NEXT: seta %al 1225 ; KNL-NEXT: retl 1226 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 1227 ret i32 %res 1228 } 1229 declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone 1230 1231 1232 define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) { 1233 ; SSE-LABEL: test_x86_sse2_ucomineq_sd: 1234 ; SSE: ## BB#0: 1235 ; SSE-NEXT: ucomisd %xmm1, %xmm0 1236 ; SSE-NEXT: setp %al 1237 ; SSE-NEXT: setne %cl 1238 ; SSE-NEXT: orb %al, %cl 1239 ; SSE-NEXT: movzbl %cl, %eax 1240 ; SSE-NEXT: retl 1241 ; 1242 ; KNL-LABEL: test_x86_sse2_ucomineq_sd: 1243 ; KNL: ## BB#0: 1244 ; KNL-NEXT: vucomisd %xmm1, %xmm0 1245 ; KNL-NEXT: setp %al 1246 ; KNL-NEXT: setne %cl 1247 ; KNL-NEXT: orb %al, %cl 1248 ; KNL-NEXT: movzbl %cl, %eax 1249 ; KNL-NEXT: retl 1250 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 1251 ret i32 %res 1252 } 1253 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone 1254 1255 define void @test_x86_sse2_pause() { 1256 ; SSE-LABEL: test_x86_sse2_pause: 1257 ; SSE: ## BB#0: 1258 ; SSE-NEXT: pause 1259 ; SSE-NEXT: retl 1260 ; 1261 ; KNL-LABEL: test_x86_sse2_pause: 1262 ; KNL: ## BB#0: 1263 ; KNL-NEXT: pause 1264 ; KNL-NEXT: retl 1265 tail call void @llvm.x86.sse2.pause() 1266 ret void 1267 } 1268 declare void @llvm.x86.sse2.pause() nounwind 1269