1 ; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3 | FileCheck %s 2 ; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s 3 4 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone 5 6 define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 7 ; CHECK: phaddw 8 entry: 9 %0 = bitcast <1 x i64> %b to <4 x i16> 10 %1 = bitcast <1 x i64> %a to <4 x i16> 11 %2 = bitcast <4 x i16> %1 to x86_mmx 12 %3 = bitcast <4 x i16> %0 to x86_mmx 13 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone 14 %5 = bitcast x86_mmx %4 to <4 x i16> 15 %6 = bitcast <4 x i16> %5 to <1 x i64> 16 %7 = extractelement <1 x i64> %6, i32 0 17 ret i64 %7 18 } 19 20 declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone 21 22 define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 23 ; CHECK: pcmpgtd 24 entry: 25 %0 = bitcast <1 x i64> %b to <2 x i32> 26 %1 = bitcast <1 x i64> %a to <2 x i32> 27 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 28 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 29 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 30 %3 = bitcast x86_mmx %2 to <2 x i32> 31 %4 = bitcast <2 x i32> %3 to <1 x i64> 32 %5 = extractelement <1 x i64> %4, i32 0 33 ret i64 %5 34 } 35 36 declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone 37 38 define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 39 ; CHECK: pcmpgtw 40 entry: 41 %0 = bitcast <1 x i64> %b to <4 x i16> 42 %1 = bitcast <1 x i64> %a to <4 x i16> 43 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 44 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 45 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 46 %3 = bitcast x86_mmx %2 to <4 x i16> 47 %4 = bitcast <4 x i16> %3 to <1 x i64> 48 %5 = extractelement <1 x i64> %4, i32 0 49 ret i64 %5 50 } 51 52 declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone 53 54 define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 55 ; CHECK: pcmpgtb 56 entry: 57 %0 = bitcast <1 x i64> %b to <8 x i8> 58 %1 = bitcast <1 x i64> %a to <8 x i8> 59 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 60 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 61 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 62 %3 = bitcast x86_mmx %2 to <8 x i8> 63 %4 = bitcast <8 x i8> %3 to <1 x i64> 64 %5 = extractelement <1 x i64> %4, i32 0 65 ret i64 %5 66 } 67 68 declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone 69 70 define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 71 ; CHECK: pcmpeqd 72 entry: 73 %0 = bitcast <1 x i64> %b to <2 x i32> 74 %1 = bitcast <1 x i64> %a to <2 x i32> 75 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 76 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 77 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 78 %3 = bitcast x86_mmx %2 to <2 x i32> 79 %4 = bitcast <2 x i32> %3 to <1 x i64> 80 %5 = extractelement <1 x i64> %4, i32 0 81 ret i64 %5 82 } 83 84 declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone 85 86 define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 87 ; CHECK: pcmpeqw 88 entry: 89 %0 = bitcast <1 x i64> %b to <4 x i16> 90 %1 = bitcast <1 x i64> %a to <4 x i16> 91 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 92 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 93 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 94 %3 = bitcast x86_mmx %2 to <4 x i16> 95 %4 = bitcast <4 x i16> %3 to <1 x i64> 96 %5 = extractelement <1 x i64> %4, i32 0 97 ret i64 %5 98 } 99 100 declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone 101 102 define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 103 ; CHECK: pcmpeqb 104 entry: 105 %0 = bitcast <1 x i64> %b to <8 x i8> 106 %1 = bitcast <1 x i64> %a to <8 x i8> 107 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 108 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 109 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 110 %3 = bitcast x86_mmx %2 to <8 x i8> 111 %4 = bitcast <8 x i8> %3 to <1 x i64> 112 %5 = extractelement <1 x i64> %4, i32 0 113 ret i64 %5 114 } 115 116 declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone 117 118 define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 119 ; CHECK: punpckldq 120 entry: 121 %0 = bitcast <1 x i64> %b to <2 x i32> 122 %1 = bitcast <1 x i64> %a to <2 x i32> 123 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 124 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 125 %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 126 %3 = bitcast x86_mmx %2 to <2 x i32> 127 %4 = bitcast <2 x i32> %3 to <1 x i64> 128 %5 = extractelement <1 x i64> %4, i32 0 129 ret i64 %5 130 } 131 132 declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone 133 134 define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 135 ; CHECK: punpcklwd 136 entry: 137 %0 = bitcast <1 x i64> %b to <4 x i16> 138 %1 = bitcast <1 x i64> %a to <4 x i16> 139 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 140 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 141 %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 142 %3 = bitcast x86_mmx %2 to <4 x i16> 143 %4 = bitcast <4 x i16> %3 to <1 x i64> 144 %5 = extractelement <1 x i64> %4, i32 0 145 ret i64 %5 146 } 147 148 declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone 149 150 define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 151 ; CHECK: punpcklbw 152 entry: 153 %0 = bitcast <1 x i64> %b to <8 x i8> 154 %1 = bitcast <1 x i64> %a to <8 x i8> 155 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 156 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 157 %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 158 %3 = bitcast x86_mmx %2 to <8 x i8> 159 %4 = bitcast <8 x i8> %3 to <1 x i64> 160 %5 = extractelement <1 x i64> %4, i32 0 161 ret i64 %5 162 } 163 164 declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone 165 166 define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 167 ; CHECK: punpckhdq 168 entry: 169 %0 = bitcast <1 x i64> %b to <2 x i32> 170 %1 = bitcast <1 x i64> %a to <2 x i32> 171 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 172 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 173 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 174 %3 = bitcast x86_mmx %2 to <2 x i32> 175 %4 = bitcast <2 x i32> %3 to <1 x i64> 176 %5 = extractelement <1 x i64> %4, i32 0 177 ret i64 %5 178 } 179 180 declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone 181 182 define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 183 ; CHECK: punpckhwd 184 entry: 185 %0 = bitcast <1 x i64> %b to <4 x i16> 186 %1 = bitcast <1 x i64> %a to <4 x i16> 187 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 188 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 189 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 190 %3 = bitcast x86_mmx %2 to <4 x i16> 191 %4 = bitcast <4 x i16> %3 to <1 x i64> 192 %5 = extractelement <1 x i64> %4, i32 0 193 ret i64 %5 194 } 195 196 declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone 197 198 define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 199 ; CHECK: punpckhbw 200 entry: 201 %0 = bitcast <1 x i64> %b to <8 x i8> 202 %1 = bitcast <1 x i64> %a to <8 x i8> 203 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 204 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 205 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 206 %3 = bitcast x86_mmx %2 to <8 x i8> 207 %4 = bitcast <8 x i8> %3 to <1 x i64> 208 %5 = extractelement <1 x i64> %4, i32 0 209 ret i64 %5 210 } 211 212 declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone 213 214 define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 215 ; CHECK: packuswb 216 entry: 217 %0 = bitcast <1 x i64> %b to <4 x i16> 218 %1 = bitcast <1 x i64> %a to <4 x i16> 219 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 220 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 221 %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 222 %3 = bitcast x86_mmx %2 to <8 x i8> 223 %4 = bitcast <8 x i8> %3 to <1 x i64> 224 %5 = extractelement <1 x i64> %4, i32 0 225 ret i64 %5 226 } 227 228 declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone 229 230 define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 231 ; CHECK: packssdw 232 entry: 233 %0 = bitcast <1 x i64> %b to <2 x i32> 234 %1 = bitcast <1 x i64> %a to <2 x i32> 235 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 236 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 237 %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 238 %3 = bitcast x86_mmx %2 to <4 x i16> 239 %4 = bitcast <4 x i16> %3 to <1 x i64> 240 %5 = extractelement <1 x i64> %4, i32 0 241 ret i64 %5 242 } 243 244 declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone 245 246 define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 247 ; CHECK: packsswb 248 entry: 249 %0 = bitcast <1 x i64> %b to <4 x i16> 250 %1 = bitcast <1 x i64> %a to <4 x i16> 251 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 252 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 253 %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 254 %3 = bitcast x86_mmx %2 to <8 x i8> 255 %4 = bitcast <8 x i8> %3 to <1 x i64> 256 %5 = extractelement <1 x i64> %4, i32 0 257 ret i64 %5 258 } 259 260 declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone 261 262 define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp { 263 ; CHECK: psrad 264 entry: 265 %0 = bitcast <1 x i64> %a to <2 x i32> 266 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx 267 %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind 268 %2 = bitcast x86_mmx %1 to <2 x i32> 269 %3 = bitcast <2 x i32> %2 to <1 x i64> 270 %4 = extractelement <1 x i64> %3, i32 0 271 ret i64 %4 272 } 273 274 declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone 275 276 define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp { 277 ; CHECK: psraw 278 entry: 279 %0 = bitcast <1 x i64> %a to <4 x i16> 280 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx 281 %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind 282 %2 = bitcast x86_mmx %1 to <4 x i16> 283 %3 = bitcast <4 x i16> %2 to <1 x i64> 284 %4 = extractelement <1 x i64> %3, i32 0 285 ret i64 %4 286 } 287 288 declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone 289 290 define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp { 291 ; CHECK: psrlq 292 entry: 293 %0 = extractelement <1 x i64> %a, i32 0 294 %mmx_var.i = bitcast i64 %0 to x86_mmx 295 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind 296 %2 = bitcast x86_mmx %1 to i64 297 ret i64 %2 298 } 299 300 declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone 301 302 define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp { 303 ; CHECK: psrld 304 entry: 305 %0 = bitcast <1 x i64> %a to <2 x i32> 306 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx 307 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind 308 %2 = bitcast x86_mmx %1 to <2 x i32> 309 %3 = bitcast <2 x i32> %2 to <1 x i64> 310 %4 = extractelement <1 x i64> %3, i32 0 311 ret i64 %4 312 } 313 314 declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone 315 316 define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp { 317 ; CHECK: psrlw 318 entry: 319 %0 = bitcast <1 x i64> %a to <4 x i16> 320 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx 321 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind 322 %2 = bitcast x86_mmx %1 to <4 x i16> 323 %3 = bitcast <4 x i16> %2 to <1 x i64> 324 %4 = extractelement <1 x i64> %3, i32 0 325 ret i64 %4 326 } 327 328 declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone 329 330 define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp { 331 ; CHECK: psllq 332 entry: 333 %0 = extractelement <1 x i64> %a, i32 0 334 %mmx_var.i = bitcast i64 %0 to x86_mmx 335 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind 336 %2 = bitcast x86_mmx %1 to i64 337 ret i64 %2 338 } 339 340 declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone 341 342 define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp { 343 ; CHECK: pslld 344 entry: 345 %0 = bitcast <1 x i64> %a to <2 x i32> 346 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx 347 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind 348 %2 = bitcast x86_mmx %1 to <2 x i32> 349 %3 = bitcast <2 x i32> %2 to <1 x i64> 350 %4 = extractelement <1 x i64> %3, i32 0 351 ret i64 %4 352 } 353 354 declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone 355 356 define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp { 357 ; CHECK: psllw 358 entry: 359 %0 = bitcast <1 x i64> %a to <4 x i16> 360 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx 361 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind 362 %2 = bitcast x86_mmx %1 to <4 x i16> 363 %3 = bitcast <4 x i16> %2 to <1 x i64> 364 %4 = extractelement <1 x i64> %3, i32 0 365 ret i64 %4 366 } 367 368 declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone 369 370 define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 371 ; CHECK: psrad 372 entry: 373 %0 = bitcast <1 x i64> %a to <2 x i32> 374 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx 375 %1 = extractelement <1 x i64> %b, i32 0 376 %mmx_var1.i = bitcast i64 %1 to x86_mmx 377 %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 378 %3 = bitcast x86_mmx %2 to <2 x i32> 379 %4 = bitcast <2 x i32> %3 to <1 x i64> 380 %5 = extractelement <1 x i64> %4, i32 0 381 ret i64 %5 382 } 383 384 declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone 385 386 define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 387 ; CHECK: psraw 388 entry: 389 %0 = bitcast <1 x i64> %a to <4 x i16> 390 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx 391 %1 = extractelement <1 x i64> %b, i32 0 392 %mmx_var1.i = bitcast i64 %1 to x86_mmx 393 %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 394 %3 = bitcast x86_mmx %2 to <4 x i16> 395 %4 = bitcast <4 x i16> %3 to <1 x i64> 396 %5 = extractelement <1 x i64> %4, i32 0 397 ret i64 %5 398 } 399 400 declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone 401 402 define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 403 ; CHECK: psrlq 404 entry: 405 %0 = extractelement <1 x i64> %a, i32 0 406 %mmx_var.i = bitcast i64 %0 to x86_mmx 407 %1 = extractelement <1 x i64> %b, i32 0 408 %mmx_var1.i = bitcast i64 %1 to x86_mmx 409 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 410 %3 = bitcast x86_mmx %2 to i64 411 ret i64 %3 412 } 413 414 declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone 415 416 define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 417 ; CHECK: psrld 418 entry: 419 %0 = bitcast <1 x i64> %a to <2 x i32> 420 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx 421 %1 = extractelement <1 x i64> %b, i32 0 422 %mmx_var1.i = bitcast i64 %1 to x86_mmx 423 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 424 %3 = bitcast x86_mmx %2 to <2 x i32> 425 %4 = bitcast <2 x i32> %3 to <1 x i64> 426 %5 = extractelement <1 x i64> %4, i32 0 427 ret i64 %5 428 } 429 430 declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone 431 432 define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 433 ; CHECK: psrlw 434 entry: 435 %0 = bitcast <1 x i64> %a to <4 x i16> 436 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx 437 %1 = extractelement <1 x i64> %b, i32 0 438 %mmx_var1.i = bitcast i64 %1 to x86_mmx 439 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 440 %3 = bitcast x86_mmx %2 to <4 x i16> 441 %4 = bitcast <4 x i16> %3 to <1 x i64> 442 %5 = extractelement <1 x i64> %4, i32 0 443 ret i64 %5 444 } 445 446 declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone 447 448 define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 449 ; CHECK: psllq 450 entry: 451 %0 = extractelement <1 x i64> %a, i32 0 452 %mmx_var.i = bitcast i64 %0 to x86_mmx 453 %1 = extractelement <1 x i64> %b, i32 0 454 %mmx_var1.i = bitcast i64 %1 to x86_mmx 455 %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 456 %3 = bitcast x86_mmx %2 to i64 457 ret i64 %3 458 } 459 460 declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone 461 462 define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 463 ; CHECK: pslld 464 entry: 465 %0 = bitcast <1 x i64> %a to <2 x i32> 466 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx 467 %1 = extractelement <1 x i64> %b, i32 0 468 %mmx_var1.i = bitcast i64 %1 to x86_mmx 469 %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 470 %3 = bitcast x86_mmx %2 to <2 x i32> 471 %4 = bitcast <2 x i32> %3 to <1 x i64> 472 %5 = extractelement <1 x i64> %4, i32 0 473 ret i64 %5 474 } 475 476 declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone 477 478 define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 479 ; CHECK: psllw 480 entry: 481 %0 = bitcast <1 x i64> %a to <4 x i16> 482 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx 483 %1 = extractelement <1 x i64> %b, i32 0 484 %mmx_var1.i = bitcast i64 %1 to x86_mmx 485 %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 486 %3 = bitcast x86_mmx %2 to <4 x i16> 487 %4 = bitcast <4 x i16> %3 to <1 x i64> 488 %5 = extractelement <1 x i64> %4, i32 0 489 ret i64 %5 490 } 491 492 declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone 493 494 define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 495 ; CHECK: pxor 496 entry: 497 %0 = bitcast <1 x i64> %b to <2 x i32> 498 %1 = bitcast <1 x i64> %a to <2 x i32> 499 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 500 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 501 %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 502 %3 = bitcast x86_mmx %2 to <2 x i32> 503 %4 = bitcast <2 x i32> %3 to <1 x i64> 504 %5 = extractelement <1 x i64> %4, i32 0 505 ret i64 %5 506 } 507 508 declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone 509 510 define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 511 ; CHECK: por 512 entry: 513 %0 = bitcast <1 x i64> %b to <2 x i32> 514 %1 = bitcast <1 x i64> %a to <2 x i32> 515 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 516 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 517 %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 518 %3 = bitcast x86_mmx %2 to <2 x i32> 519 %4 = bitcast <2 x i32> %3 to <1 x i64> 520 %5 = extractelement <1 x i64> %4, i32 0 521 ret i64 %5 522 } 523 524 declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone 525 526 define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 527 ; CHECK: pandn 528 entry: 529 %0 = bitcast <1 x i64> %b to <2 x i32> 530 %1 = bitcast <1 x i64> %a to <2 x i32> 531 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 532 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 533 %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 534 %3 = bitcast x86_mmx %2 to <2 x i32> 535 %4 = bitcast <2 x i32> %3 to <1 x i64> 536 %5 = extractelement <1 x i64> %4, i32 0 537 ret i64 %5 538 } 539 540 declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone 541 542 define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 543 ; CHECK: pand 544 entry: 545 %0 = bitcast <1 x i64> %b to <2 x i32> 546 %1 = bitcast <1 x i64> %a to <2 x i32> 547 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 548 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 549 %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 550 %3 = bitcast x86_mmx %2 to <2 x i32> 551 %4 = bitcast <2 x i32> %3 to <1 x i64> 552 %5 = extractelement <1 x i64> %4, i32 0 553 ret i64 %5 554 } 555 556 declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone 557 558 define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 559 ; CHECK: pmullw 560 entry: 561 %0 = bitcast <1 x i64> %b to <4 x i16> 562 %1 = bitcast <1 x i64> %a to <4 x i16> 563 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 564 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 565 %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 566 %3 = bitcast x86_mmx %2 to <4 x i16> 567 %4 = bitcast <4 x i16> %3 to <1 x i64> 568 %5 = extractelement <1 x i64> %4, i32 0 569 ret i64 %5 570 } 571 572 define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 573 ; CHECK: pmullw 574 entry: 575 %0 = bitcast <1 x i64> %b to <4 x i16> 576 %1 = bitcast <1 x i64> %a to <4 x i16> 577 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 578 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 579 %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 580 %3 = bitcast x86_mmx %2 to <4 x i16> 581 %4 = bitcast <4 x i16> %3 to <1 x i64> 582 %5 = extractelement <1 x i64> %4, i32 0 583 ret i64 %5 584 } 585 586 declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone 587 588 define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 589 ; CHECK: pmulhw 590 entry: 591 %0 = bitcast <1 x i64> %b to <4 x i16> 592 %1 = bitcast <1 x i64> %a to <4 x i16> 593 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 594 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 595 %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 596 %3 = bitcast x86_mmx %2 to <4 x i16> 597 %4 = bitcast <4 x i16> %3 to <1 x i64> 598 %5 = extractelement <1 x i64> %4, i32 0 599 ret i64 %5 600 } 601 602 declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone 603 604 define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 605 ; CHECK: pmaddwd 606 entry: 607 %0 = bitcast <1 x i64> %b to <4 x i16> 608 %1 = bitcast <1 x i64> %a to <4 x i16> 609 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 610 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 611 %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 612 %3 = bitcast x86_mmx %2 to <2 x i32> 613 %4 = bitcast <2 x i32> %3 to <1 x i64> 614 %5 = extractelement <1 x i64> %4, i32 0 615 ret i64 %5 616 } 617 618 declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone 619 620 define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 621 ; CHECK: psubusw 622 entry: 623 %0 = bitcast <1 x i64> %b to <4 x i16> 624 %1 = bitcast <1 x i64> %a to <4 x i16> 625 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 626 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 627 %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 628 %3 = bitcast x86_mmx %2 to <4 x i16> 629 %4 = bitcast <4 x i16> %3 to <1 x i64> 630 %5 = extractelement <1 x i64> %4, i32 0 631 ret i64 %5 632 } 633 634 declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone 635 636 define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 637 ; CHECK: psubusb 638 entry: 639 %0 = bitcast <1 x i64> %b to <8 x i8> 640 %1 = bitcast <1 x i64> %a to <8 x i8> 641 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 642 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 643 %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 644 %3 = bitcast x86_mmx %2 to <8 x i8> 645 %4 = bitcast <8 x i8> %3 to <1 x i64> 646 %5 = extractelement <1 x i64> %4, i32 0 647 ret i64 %5 648 } 649 650 declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone 651 652 define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 653 ; CHECK: psubsw 654 entry: 655 %0 = bitcast <1 x i64> %b to <4 x i16> 656 %1 = bitcast <1 x i64> %a to <4 x i16> 657 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 658 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 659 %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 660 %3 = bitcast x86_mmx %2 to <4 x i16> 661 %4 = bitcast <4 x i16> %3 to <1 x i64> 662 %5 = extractelement <1 x i64> %4, i32 0 663 ret i64 %5 664 } 665 666 declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone 667 668 define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 669 ; CHECK: psubsb 670 entry: 671 %0 = bitcast <1 x i64> %b to <8 x i8> 672 %1 = bitcast <1 x i64> %a to <8 x i8> 673 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 674 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 675 %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 676 %3 = bitcast x86_mmx %2 to <8 x i8> 677 %4 = bitcast <8 x i8> %3 to <1 x i64> 678 %5 = extractelement <1 x i64> %4, i32 0 679 ret i64 %5 680 } 681 682 define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 683 ; CHECK: psubq 684 entry: 685 %0 = extractelement <1 x i64> %a, i32 0 686 %mmx_var = bitcast i64 %0 to x86_mmx 687 %1 = extractelement <1 x i64> %b, i32 0 688 %mmx_var1 = bitcast i64 %1 to x86_mmx 689 %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1) 690 %3 = bitcast x86_mmx %2 to i64 691 ret i64 %3 692 } 693 694 declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone 695 696 declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone 697 698 define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 699 ; CHECK: psubd 700 entry: 701 %0 = bitcast <1 x i64> %b to <2 x i32> 702 %1 = bitcast <1 x i64> %a to <2 x i32> 703 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 704 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 705 %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 706 %3 = bitcast x86_mmx %2 to <2 x i32> 707 %4 = bitcast <2 x i32> %3 to <1 x i64> 708 %5 = extractelement <1 x i64> %4, i32 0 709 ret i64 %5 710 } 711 712 declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone 713 714 define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 715 ; CHECK: psubw 716 entry: 717 %0 = bitcast <1 x i64> %b to <4 x i16> 718 %1 = bitcast <1 x i64> %a to <4 x i16> 719 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 720 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 721 %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 722 %3 = bitcast x86_mmx %2 to <4 x i16> 723 %4 = bitcast <4 x i16> %3 to <1 x i64> 724 %5 = extractelement <1 x i64> %4, i32 0 725 ret i64 %5 726 } 727 728 declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone 729 730 define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 731 ; CHECK: psubb 732 entry: 733 %0 = bitcast <1 x i64> %b to <8 x i8> 734 %1 = bitcast <1 x i64> %a to <8 x i8> 735 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 736 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 737 %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 738 %3 = bitcast x86_mmx %2 to <8 x i8> 739 %4 = bitcast <8 x i8> %3 to <1 x i64> 740 %5 = extractelement <1 x i64> %4, i32 0 741 ret i64 %5 742 } 743 744 declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone 745 746 define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 747 ; CHECK: paddusw 748 entry: 749 %0 = bitcast <1 x i64> %b to <4 x i16> 750 %1 = bitcast <1 x i64> %a to <4 x i16> 751 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 752 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 753 %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 754 %3 = bitcast x86_mmx %2 to <4 x i16> 755 %4 = bitcast <4 x i16> %3 to <1 x i64> 756 %5 = extractelement <1 x i64> %4, i32 0 757 ret i64 %5 758 } 759 760 declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone 761 762 define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 763 ; CHECK: paddusb 764 entry: 765 %0 = bitcast <1 x i64> %b to <8 x i8> 766 %1 = bitcast <1 x i64> %a to <8 x i8> 767 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 768 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 769 %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 770 %3 = bitcast x86_mmx %2 to <8 x i8> 771 %4 = bitcast <8 x i8> %3 to <1 x i64> 772 %5 = extractelement <1 x i64> %4, i32 0 773 ret i64 %5 774 } 775 776 declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone 777 778 define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 779 ; CHECK: paddsw 780 entry: 781 %0 = bitcast <1 x i64> %b to <4 x i16> 782 %1 = bitcast <1 x i64> %a to <4 x i16> 783 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 784 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 785 %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 786 %3 = bitcast x86_mmx %2 to <4 x i16> 787 %4 = bitcast <4 x i16> %3 to <1 x i64> 788 %5 = extractelement <1 x i64> %4, i32 0 789 ret i64 %5 790 } 791 792 declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone 793 794 define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 795 ; CHECK: paddsb 796 entry: 797 %0 = bitcast <1 x i64> %b to <8 x i8> 798 %1 = bitcast <1 x i64> %a to <8 x i8> 799 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 800 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 801 %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 802 %3 = bitcast x86_mmx %2 to <8 x i8> 803 %4 = bitcast <8 x i8> %3 to <1 x i64> 804 %5 = extractelement <1 x i64> %4, i32 0 805 ret i64 %5 806 } 807 808 declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone 809 810 define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 811 ; CHECK: paddq 812 entry: 813 %0 = extractelement <1 x i64> %a, i32 0 814 %mmx_var = bitcast i64 %0 to x86_mmx 815 %1 = extractelement <1 x i64> %b, i32 0 816 %mmx_var1 = bitcast i64 %1 to x86_mmx 817 %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1) 818 %3 = bitcast x86_mmx %2 to i64 819 ret i64 %3 820 } 821 822 declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone 823 824 define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 825 ; CHECK: paddd 826 entry: 827 %0 = bitcast <1 x i64> %b to <2 x i32> 828 %1 = bitcast <1 x i64> %a to <2 x i32> 829 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 830 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 831 %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 832 %3 = bitcast x86_mmx %2 to <2 x i32> 833 %4 = bitcast <2 x i32> %3 to <1 x i64> 834 %5 = extractelement <1 x i64> %4, i32 0 835 ret i64 %5 836 } 837 838 declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone 839 840 define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 841 ; CHECK: paddw 842 entry: 843 %0 = bitcast <1 x i64> %b to <4 x i16> 844 %1 = bitcast <1 x i64> %a to <4 x i16> 845 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 846 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 847 %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 848 %3 = bitcast x86_mmx %2 to <4 x i16> 849 %4 = bitcast <4 x i16> %3 to <1 x i64> 850 %5 = extractelement <1 x i64> %4, i32 0 851 ret i64 %5 852 } 853 854 declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone 855 856 define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 857 ; CHECK: paddb 858 entry: 859 %0 = bitcast <1 x i64> %b to <8 x i8> 860 %1 = bitcast <1 x i64> %a to <8 x i8> 861 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 862 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 863 %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 864 %3 = bitcast x86_mmx %2 to <8 x i8> 865 %4 = bitcast <8 x i8> %3 to <1 x i64> 866 %5 = extractelement <1 x i64> %4, i32 0 867 ret i64 %5 868 } 869 870 declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone 871 872 define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 873 ; CHECK: psadbw 874 entry: 875 %0 = bitcast <1 x i64> %b to <8 x i8> 876 %1 = bitcast <1 x i64> %a to <8 x i8> 877 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 878 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 879 %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 880 %3 = bitcast x86_mmx %2 to i64 881 ret i64 %3 882 } 883 884 declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone 885 886 define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 887 ; CHECK: pminsw 888 entry: 889 %0 = bitcast <1 x i64> %b to <4 x i16> 890 %1 = bitcast <1 x i64> %a to <4 x i16> 891 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 892 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 893 %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 894 %3 = bitcast x86_mmx %2 to <4 x i16> 895 %4 = bitcast <4 x i16> %3 to <1 x i64> 896 %5 = extractelement <1 x i64> %4, i32 0 897 ret i64 %5 898 } 899 900 declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone 901 902 define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 903 ; CHECK: pminub 904 entry: 905 %0 = bitcast <1 x i64> %b to <8 x i8> 906 %1 = bitcast <1 x i64> %a to <8 x i8> 907 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 908 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 909 %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 910 %3 = bitcast x86_mmx %2 to <8 x i8> 911 %4 = bitcast <8 x i8> %3 to <1 x i64> 912 %5 = extractelement <1 x i64> %4, i32 0 913 ret i64 %5 914 } 915 916 declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone 917 918 define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 919 ; CHECK: pmaxsw 920 entry: 921 %0 = bitcast <1 x i64> %b to <4 x i16> 922 %1 = bitcast <1 x i64> %a to <4 x i16> 923 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 924 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 925 %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 926 %3 = bitcast x86_mmx %2 to <4 x i16> 927 %4 = bitcast <4 x i16> %3 to <1 x i64> 928 %5 = extractelement <1 x i64> %4, i32 0 929 ret i64 %5 930 } 931 932 declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone 933 934 define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 935 ; CHECK: pmaxub 936 entry: 937 %0 = bitcast <1 x i64> %b to <8 x i8> 938 %1 = bitcast <1 x i64> %a to <8 x i8> 939 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 940 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 941 %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 942 %3 = bitcast x86_mmx %2 to <8 x i8> 943 %4 = bitcast <8 x i8> %3 to <1 x i64> 944 %5 = extractelement <1 x i64> %4, i32 0 945 ret i64 %5 946 } 947 948 declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone 949 950 define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 951 ; CHECK: pavgw 952 entry: 953 %0 = bitcast <1 x i64> %b to <4 x i16> 954 %1 = bitcast <1 x i64> %a to <4 x i16> 955 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 956 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 957 %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 958 %3 = bitcast x86_mmx %2 to <4 x i16> 959 %4 = bitcast <4 x i16> %3 to <1 x i64> 960 %5 = extractelement <1 x i64> %4, i32 0 961 ret i64 %5 962 } 963 964 declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone 965 966 define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 967 ; CHECK: pavgb 968 entry: 969 %0 = bitcast <1 x i64> %b to <8 x i8> 970 %1 = bitcast <1 x i64> %a to <8 x i8> 971 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 972 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 973 %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 974 %3 = bitcast x86_mmx %2 to <8 x i8> 975 %4 = bitcast <8 x i8> %3 to <1 x i64> 976 %5 = extractelement <1 x i64> %4, i32 0 977 ret i64 %5 978 } 979 980 declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind 981 982 define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp { 983 ; CHECK: movntq 984 entry: 985 %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx* 986 %0 = extractelement <1 x i64> %a, i32 0 987 %mmx_var.i = bitcast i64 %0 to x86_mmx 988 tail call void @llvm.x86.mmx.movnt.dq(x86_mmx* %mmx_ptr_var.i, x86_mmx %mmx_var.i) nounwind 989 ret void 990 } 991 992 declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone 993 994 define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp { 995 ; CHECK: pmovmskb 996 entry: 997 %0 = bitcast <1 x i64> %a to <8 x i8> 998 %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx 999 %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind 1000 ret i32 %1 1001 } 1002 1003 declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind 1004 1005 define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp { 1006 ; CHECK: maskmovq 1007 entry: 1008 %0 = bitcast <1 x i64> %n to <8 x i8> 1009 %1 = bitcast <1 x i64> %d to <8 x i8> 1010 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 1011 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 1012 tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, i8* %p) nounwind 1013 ret void 1014 } 1015 1016 declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone 1017 1018 define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1019 ; CHECK: pmulhuw 1020 entry: 1021 %0 = bitcast <1 x i64> %b to <4 x i16> 1022 %1 = bitcast <1 x i64> %a to <4 x i16> 1023 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 1024 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 1025 %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 1026 %3 = bitcast x86_mmx %2 to <4 x i16> 1027 %4 = bitcast <4 x i16> %3 to <1 x i64> 1028 %5 = extractelement <1 x i64> %4, i32 0 1029 ret i64 %5 1030 } 1031 1032 declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone 1033 1034 define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp { 1035 ; CHECK: pshufw 1036 entry: 1037 %0 = bitcast <1 x i64> %a to <4 x i16> 1038 %1 = bitcast <4 x i16> %0 to x86_mmx 1039 %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone 1040 %3 = bitcast x86_mmx %2 to <4 x i16> 1041 %4 = bitcast <4 x i16> %3 to <1 x i64> 1042 %5 = extractelement <1 x i64> %4, i32 0 1043 ret i64 %5 1044 } 1045 1046 define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp { 1047 ; CHECK: test21_2 1048 ; CHECK: pshufw 1049 ; CHECK: movd 1050 entry: 1051 %0 = bitcast <1 x i64> %a to <4 x i16> 1052 %1 = bitcast <4 x i16> %0 to x86_mmx 1053 %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone 1054 %3 = bitcast x86_mmx %2 to <4 x i16> 1055 %4 = bitcast <4 x i16> %3 to <2 x i32> 1056 %5 = extractelement <2 x i32> %4, i32 0 1057 ret i32 %5 1058 } 1059 1060 declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone 1061 1062 define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1063 ; CHECK: pmuludq 1064 entry: 1065 %0 = bitcast <1 x i64> %b to <2 x i32> 1066 %1 = bitcast <1 x i64> %a to <2 x i32> 1067 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 1068 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 1069 %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 1070 %3 = bitcast x86_mmx %2 to i64 1071 ret i64 %3 1072 } 1073 1074 declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone 1075 1076 define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp { 1077 ; CHECK: cvtpi2pd 1078 entry: 1079 %0 = bitcast <1 x i64> %a to <2 x i32> 1080 %1 = bitcast <2 x i32> %0 to x86_mmx 1081 %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone 1082 ret <2 x double> %2 1083 } 1084 1085 declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone 1086 1087 define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp { 1088 ; CHECK: cvttpd2pi 1089 entry: 1090 %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone 1091 %1 = bitcast x86_mmx %0 to <2 x i32> 1092 %2 = bitcast <2 x i32> %1 to <1 x i64> 1093 %3 = extractelement <1 x i64> %2, i32 0 1094 ret i64 %3 1095 } 1096 1097 declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone 1098 1099 define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp { 1100 ; CHECK: cvtpd2pi 1101 entry: 1102 %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone 1103 %1 = bitcast x86_mmx %0 to <2 x i32> 1104 %2 = bitcast <2 x i32> %1 to <1 x i64> 1105 %3 = extractelement <1 x i64> %2, i32 0 1106 ret i64 %3 1107 } 1108 1109 declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone 1110 1111 define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1112 ; CHECK: palignr 1113 entry: 1114 %0 = extractelement <1 x i64> %a, i32 0 1115 %mmx_var = bitcast i64 %0 to x86_mmx 1116 %1 = extractelement <1 x i64> %b, i32 0 1117 %mmx_var1 = bitcast i64 %1 to x86_mmx 1118 %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16) 1119 %3 = bitcast x86_mmx %2 to i64 1120 ret i64 %3 1121 } 1122 1123 declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone 1124 1125 define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp { 1126 ; CHECK: pabsd 1127 entry: 1128 %0 = bitcast <1 x i64> %a to <2 x i32> 1129 %1 = bitcast <2 x i32> %0 to x86_mmx 1130 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone 1131 %3 = bitcast x86_mmx %2 to <2 x i32> 1132 %4 = bitcast <2 x i32> %3 to <1 x i64> 1133 %5 = extractelement <1 x i64> %4, i32 0 1134 ret i64 %5 1135 } 1136 1137 declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone 1138 1139 define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp { 1140 ; CHECK: pabsw 1141 entry: 1142 %0 = bitcast <1 x i64> %a to <4 x i16> 1143 %1 = bitcast <4 x i16> %0 to x86_mmx 1144 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone 1145 %3 = bitcast x86_mmx %2 to <4 x i16> 1146 %4 = bitcast <4 x i16> %3 to <1 x i64> 1147 %5 = extractelement <1 x i64> %4, i32 0 1148 ret i64 %5 1149 } 1150 1151 declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone 1152 1153 define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp { 1154 ; CHECK: pabsb 1155 entry: 1156 %0 = bitcast <1 x i64> %a to <8 x i8> 1157 %1 = bitcast <8 x i8> %0 to x86_mmx 1158 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone 1159 %3 = bitcast x86_mmx %2 to <8 x i8> 1160 %4 = bitcast <8 x i8> %3 to <1 x i64> 1161 %5 = extractelement <1 x i64> %4, i32 0 1162 ret i64 %5 1163 } 1164 1165 declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone 1166 1167 define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1168 ; CHECK: psignd 1169 entry: 1170 %0 = bitcast <1 x i64> %b to <2 x i32> 1171 %1 = bitcast <1 x i64> %a to <2 x i32> 1172 %2 = bitcast <2 x i32> %1 to x86_mmx 1173 %3 = bitcast <2 x i32> %0 to x86_mmx 1174 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone 1175 %5 = bitcast x86_mmx %4 to <2 x i32> 1176 %6 = bitcast <2 x i32> %5 to <1 x i64> 1177 %7 = extractelement <1 x i64> %6, i32 0 1178 ret i64 %7 1179 } 1180 1181 declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone 1182 1183 define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1184 ; CHECK: psignw 1185 entry: 1186 %0 = bitcast <1 x i64> %b to <4 x i16> 1187 %1 = bitcast <1 x i64> %a to <4 x i16> 1188 %2 = bitcast <4 x i16> %1 to x86_mmx 1189 %3 = bitcast <4 x i16> %0 to x86_mmx 1190 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone 1191 %5 = bitcast x86_mmx %4 to <4 x i16> 1192 %6 = bitcast <4 x i16> %5 to <1 x i64> 1193 %7 = extractelement <1 x i64> %6, i32 0 1194 ret i64 %7 1195 } 1196 1197 declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone 1198 1199 define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1200 ; CHECK: psignb 1201 entry: 1202 %0 = bitcast <1 x i64> %b to <8 x i8> 1203 %1 = bitcast <1 x i64> %a to <8 x i8> 1204 %2 = bitcast <8 x i8> %1 to x86_mmx 1205 %3 = bitcast <8 x i8> %0 to x86_mmx 1206 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone 1207 %5 = bitcast x86_mmx %4 to <8 x i8> 1208 %6 = bitcast <8 x i8> %5 to <1 x i64> 1209 %7 = extractelement <1 x i64> %6, i32 0 1210 ret i64 %7 1211 } 1212 1213 declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone 1214 1215 define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1216 ; CHECK: pshufb 1217 entry: 1218 %0 = bitcast <1 x i64> %b to <8 x i8> 1219 %1 = bitcast <1 x i64> %a to <8 x i8> 1220 %2 = bitcast <8 x i8> %1 to x86_mmx 1221 %3 = bitcast <8 x i8> %0 to x86_mmx 1222 %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone 1223 %5 = bitcast x86_mmx %4 to <8 x i8> 1224 %6 = bitcast <8 x i8> %5 to <1 x i64> 1225 %7 = extractelement <1 x i64> %6, i32 0 1226 ret i64 %7 1227 } 1228 1229 declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone 1230 1231 define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1232 ; CHECK: pmulhrsw 1233 entry: 1234 %0 = bitcast <1 x i64> %b to <4 x i16> 1235 %1 = bitcast <1 x i64> %a to <4 x i16> 1236 %2 = bitcast <4 x i16> %1 to x86_mmx 1237 %3 = bitcast <4 x i16> %0 to x86_mmx 1238 %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone 1239 %5 = bitcast x86_mmx %4 to <4 x i16> 1240 %6 = bitcast <4 x i16> %5 to <1 x i64> 1241 %7 = extractelement <1 x i64> %6, i32 0 1242 ret i64 %7 1243 } 1244 1245 declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone 1246 1247 define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1248 ; CHECK: pmaddubsw 1249 entry: 1250 %0 = bitcast <1 x i64> %b to <8 x i8> 1251 %1 = bitcast <1 x i64> %a to <8 x i8> 1252 %2 = bitcast <8 x i8> %1 to x86_mmx 1253 %3 = bitcast <8 x i8> %0 to x86_mmx 1254 %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone 1255 %5 = bitcast x86_mmx %4 to <8 x i8> 1256 %6 = bitcast <8 x i8> %5 to <1 x i64> 1257 %7 = extractelement <1 x i64> %6, i32 0 1258 ret i64 %7 1259 } 1260 1261 declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone 1262 1263 define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1264 ; CHECK: phsubsw 1265 entry: 1266 %0 = bitcast <1 x i64> %b to <4 x i16> 1267 %1 = bitcast <1 x i64> %a to <4 x i16> 1268 %2 = bitcast <4 x i16> %1 to x86_mmx 1269 %3 = bitcast <4 x i16> %0 to x86_mmx 1270 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone 1271 %5 = bitcast x86_mmx %4 to <4 x i16> 1272 %6 = bitcast <4 x i16> %5 to <1 x i64> 1273 %7 = extractelement <1 x i64> %6, i32 0 1274 ret i64 %7 1275 } 1276 1277 declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone 1278 1279 define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1280 ; CHECK: phsubd 1281 entry: 1282 %0 = bitcast <1 x i64> %b to <2 x i32> 1283 %1 = bitcast <1 x i64> %a to <2 x i32> 1284 %2 = bitcast <2 x i32> %1 to x86_mmx 1285 %3 = bitcast <2 x i32> %0 to x86_mmx 1286 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone 1287 %5 = bitcast x86_mmx %4 to <2 x i32> 1288 %6 = bitcast <2 x i32> %5 to <1 x i64> 1289 %7 = extractelement <1 x i64> %6, i32 0 1290 ret i64 %7 1291 } 1292 1293 declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone 1294 1295 define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1296 ; CHECK: phsubw 1297 entry: 1298 %0 = bitcast <1 x i64> %b to <4 x i16> 1299 %1 = bitcast <1 x i64> %a to <4 x i16> 1300 %2 = bitcast <4 x i16> %1 to x86_mmx 1301 %3 = bitcast <4 x i16> %0 to x86_mmx 1302 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone 1303 %5 = bitcast x86_mmx %4 to <4 x i16> 1304 %6 = bitcast <4 x i16> %5 to <1 x i64> 1305 %7 = extractelement <1 x i64> %6, i32 0 1306 ret i64 %7 1307 } 1308 1309 declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone 1310 1311 define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1312 ; CHECK: phaddsw 1313 entry: 1314 %0 = bitcast <1 x i64> %b to <4 x i16> 1315 %1 = bitcast <1 x i64> %a to <4 x i16> 1316 %2 = bitcast <4 x i16> %1 to x86_mmx 1317 %3 = bitcast <4 x i16> %0 to x86_mmx 1318 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone 1319 %5 = bitcast x86_mmx %4 to <4 x i16> 1320 %6 = bitcast <4 x i16> %5 to <1 x i64> 1321 %7 = extractelement <1 x i64> %6, i32 0 1322 ret i64 %7 1323 } 1324 1325 declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone 1326 1327 define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1328 ; CHECK: phaddd 1329 entry: 1330 %0 = bitcast <1 x i64> %b to <2 x i32> 1331 %1 = bitcast <1 x i64> %a to <2 x i32> 1332 %2 = bitcast <2 x i32> %1 to x86_mmx 1333 %3 = bitcast <2 x i32> %0 to x86_mmx 1334 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone 1335 %5 = bitcast x86_mmx %4 to <2 x i32> 1336 %6 = bitcast <2 x i32> %5 to <1 x i64> 1337 %7 = extractelement <1 x i64> %6, i32 0 1338 ret i64 %7 1339 } 1340 1341 define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind { 1342 ; CHECK: cvtpi2ps 1343 %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, x86_mmx %b) 1344 ret <4 x float> %c 1345 } 1346 1347 declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone 1348