Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i386-pc-win32       -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq  | FileCheck %s --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-win32        -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq  | FileCheck %s --check-prefix=WIN64
      4 ; RUN: llc < %s -mtriple=x86_64-linux-gnu    -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq  | FileCheck %s --check-prefix=LINUXOSX64
      5 
      6 ; Test regcall when receiving/returning i1
      7 define x86_regcallcc i1 @test_argReti1(i1 %a)  {
      8 ; X32-LABEL: test_argReti1:
      9 ; X32:       # %bb.0:
     10 ; X32-NEXT:    incb %al
     11 ; X32-NEXT:    # kill: def $al killed $al killed $eax
     12 ; X32-NEXT:    retl
     13 ;
     14 ; WIN64-LABEL: test_argReti1:
     15 ; WIN64:       # %bb.0:
     16 ; WIN64-NEXT:    incb %al
     17 ; WIN64-NEXT:    # kill: def $al killed $al killed $eax
     18 ; WIN64-NEXT:    retq
     19 ;
     20 ; LINUXOSX64-LABEL: test_argReti1:
     21 ; LINUXOSX64:       # %bb.0:
     22 ; LINUXOSX64-NEXT:    incb %al
     23 ; LINUXOSX64-NEXT:    # kill: def $al killed $al killed $eax
     24 ; LINUXOSX64-NEXT:    retq
     25   %add = add i1 %a, 1
     26   ret i1 %add
     27 }
     28 
     29 ; Test regcall when passing/retrieving i1
     30 define x86_regcallcc i1 @test_CallargReti1(i1 %a)  {
     31 ; X32-LABEL: test_CallargReti1:
     32 ; X32:       # %bb.0:
     33 ; X32-NEXT:    pushl %esp
     34 ; X32-NEXT:    incb %al
     35 ; X32-NEXT:    movzbl %al, %eax
     36 ; X32-NEXT:    calll _test_argReti1
     37 ; X32-NEXT:    incb %al
     38 ; X32-NEXT:    popl %esp
     39 ; X32-NEXT:    retl
     40 ;
     41 ; WIN64-LABEL: test_CallargReti1:
     42 ; WIN64:       # %bb.0:
     43 ; WIN64-NEXT:    pushq %rsp
     44 ; WIN64-NEXT:    .seh_pushreg 4
     45 ; WIN64-NEXT:    .seh_endprologue
     46 ; WIN64-NEXT:    incb %al
     47 ; WIN64-NEXT:    movzbl %al, %eax
     48 ; WIN64-NEXT:    callq test_argReti1
     49 ; WIN64-NEXT:    incb %al
     50 ; WIN64-NEXT:    popq %rsp
     51 ; WIN64-NEXT:    retq
     52 ; WIN64-NEXT:    .seh_handlerdata
     53 ; WIN64-NEXT:    .text
     54 ; WIN64-NEXT:    .seh_endproc
     55 ;
     56 ; LINUXOSX64-LABEL: test_CallargReti1:
     57 ; LINUXOSX64:       # %bb.0:
     58 ; LINUXOSX64-NEXT:    pushq %rsp
     59 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
     60 ; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
     61 ; LINUXOSX64-NEXT:    incb %al
     62 ; LINUXOSX64-NEXT:    movzbl %al, %eax
     63 ; LINUXOSX64-NEXT:    callq test_argReti1
     64 ; LINUXOSX64-NEXT:    incb %al
     65 ; LINUXOSX64-NEXT:    popq %rsp
     66 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
     67 ; LINUXOSX64-NEXT:    retq
     68   %b = add i1 %a, 1
     69   %c = call x86_regcallcc i1 @test_argReti1(i1 %b)
     70   %d = add i1 %c, 1
     71   ret i1 %d
     72 }
     73 
     74 ; Test regcall when receiving/returning i8
     75 define x86_regcallcc i8 @test_argReti8(i8 %a)  {
     76 ; X32-LABEL: test_argReti8:
     77 ; X32:       # %bb.0:
     78 ; X32-NEXT:    incb %al
     79 ; X32-NEXT:    # kill: def $al killed $al killed $eax
     80 ; X32-NEXT:    retl
     81 ;
     82 ; WIN64-LABEL: test_argReti8:
     83 ; WIN64:       # %bb.0:
     84 ; WIN64-NEXT:    incb %al
     85 ; WIN64-NEXT:    # kill: def $al killed $al killed $eax
     86 ; WIN64-NEXT:    retq
     87 ;
     88 ; LINUXOSX64-LABEL: test_argReti8:
     89 ; LINUXOSX64:       # %bb.0:
     90 ; LINUXOSX64-NEXT:    incb %al
     91 ; LINUXOSX64-NEXT:    # kill: def $al killed $al killed $eax
     92 ; LINUXOSX64-NEXT:    retq
     93   %add = add i8 %a, 1
     94   ret i8 %add
     95 }
     96 
     97 ; Test regcall when passing/retrieving i8
     98 define x86_regcallcc i8 @test_CallargReti8(i8 %a)  {
     99 ; X32-LABEL: test_CallargReti8:
    100 ; X32:       # %bb.0:
    101 ; X32-NEXT:    pushl %esp
    102 ; X32-NEXT:    incb %al
    103 ; X32-NEXT:    movzbl %al, %eax
    104 ; X32-NEXT:    calll _test_argReti8
    105 ; X32-NEXT:    incb %al
    106 ; X32-NEXT:    popl %esp
    107 ; X32-NEXT:    retl
    108 ;
    109 ; WIN64-LABEL: test_CallargReti8:
    110 ; WIN64:       # %bb.0:
    111 ; WIN64-NEXT:    pushq %rsp
    112 ; WIN64-NEXT:    .seh_pushreg 4
    113 ; WIN64-NEXT:    .seh_endprologue
    114 ; WIN64-NEXT:    incb %al
    115 ; WIN64-NEXT:    movzbl %al, %eax
    116 ; WIN64-NEXT:    callq test_argReti8
    117 ; WIN64-NEXT:    incb %al
    118 ; WIN64-NEXT:    popq %rsp
    119 ; WIN64-NEXT:    retq
    120 ; WIN64-NEXT:    .seh_handlerdata
    121 ; WIN64-NEXT:    .text
    122 ; WIN64-NEXT:    .seh_endproc
    123 ;
    124 ; LINUXOSX64-LABEL: test_CallargReti8:
    125 ; LINUXOSX64:       # %bb.0:
    126 ; LINUXOSX64-NEXT:    pushq %rsp
    127 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
    128 ; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
    129 ; LINUXOSX64-NEXT:    incb %al
    130 ; LINUXOSX64-NEXT:    movzbl %al, %eax
    131 ; LINUXOSX64-NEXT:    callq test_argReti8
    132 ; LINUXOSX64-NEXT:    incb %al
    133 ; LINUXOSX64-NEXT:    popq %rsp
    134 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
    135 ; LINUXOSX64-NEXT:    retq
    136   %b = add i8 %a, 1
    137   %c = call x86_regcallcc i8 @test_argReti8(i8 %b)
    138   %d = add i8 %c, 1
    139   ret i8 %d
    140 }
    141 
    142 ; Test regcall when receiving/returning i16
    143 define x86_regcallcc i16 @test_argReti16(i16 %a)  {
    144 ; X32-LABEL: test_argReti16:
    145 ; X32:       # %bb.0:
    146 ; X32-NEXT:    incl %eax
    147 ; X32-NEXT:    # kill: def $ax killed $ax killed $eax
    148 ; X32-NEXT:    retl
    149 ;
    150 ; WIN64-LABEL: test_argReti16:
    151 ; WIN64:       # %bb.0:
    152 ; WIN64-NEXT:    incl %eax
    153 ; WIN64-NEXT:    # kill: def $ax killed $ax killed $eax
    154 ; WIN64-NEXT:    retq
    155 ;
    156 ; LINUXOSX64-LABEL: test_argReti16:
    157 ; LINUXOSX64:       # %bb.0:
    158 ; LINUXOSX64-NEXT:    incl %eax
    159 ; LINUXOSX64-NEXT:    # kill: def $ax killed $ax killed $eax
    160 ; LINUXOSX64-NEXT:    retq
    161   %add = add i16 %a, 1
    162   ret i16 %add
    163 }
    164 
    165 ; Test regcall when passing/retrieving i16
    166 define x86_regcallcc i16 @test_CallargReti16(i16 %a)  {
    167 ; X32-LABEL: test_CallargReti16:
    168 ; X32:       # %bb.0:
    169 ; X32-NEXT:    pushl %esp
    170 ; X32-NEXT:    incl %eax
    171 ; X32-NEXT:    calll _test_argReti16
    172 ; X32-NEXT:    # kill: def $ax killed $ax def $eax
    173 ; X32-NEXT:    incl %eax
    174 ; X32-NEXT:    # kill: def $ax killed $ax killed $eax
    175 ; X32-NEXT:    popl %esp
    176 ; X32-NEXT:    retl
    177 ;
    178 ; WIN64-LABEL: test_CallargReti16:
    179 ; WIN64:       # %bb.0:
    180 ; WIN64-NEXT:    pushq %rsp
    181 ; WIN64-NEXT:    .seh_pushreg 4
    182 ; WIN64-NEXT:    .seh_endprologue
    183 ; WIN64-NEXT:    incl %eax
    184 ; WIN64-NEXT:    callq test_argReti16
    185 ; WIN64-NEXT:    # kill: def $ax killed $ax def $eax
    186 ; WIN64-NEXT:    incl %eax
    187 ; WIN64-NEXT:    # kill: def $ax killed $ax killed $eax
    188 ; WIN64-NEXT:    popq %rsp
    189 ; WIN64-NEXT:    retq
    190 ; WIN64-NEXT:    .seh_handlerdata
    191 ; WIN64-NEXT:    .text
    192 ; WIN64-NEXT:    .seh_endproc
    193 ;
    194 ; LINUXOSX64-LABEL: test_CallargReti16:
    195 ; LINUXOSX64:       # %bb.0:
    196 ; LINUXOSX64-NEXT:    pushq %rsp
    197 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
    198 ; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
    199 ; LINUXOSX64-NEXT:    incl %eax
    200 ; LINUXOSX64-NEXT:    callq test_argReti16
    201 ; LINUXOSX64-NEXT:    # kill: def $ax killed $ax def $eax
    202 ; LINUXOSX64-NEXT:    incl %eax
    203 ; LINUXOSX64-NEXT:    # kill: def $ax killed $ax killed $eax
    204 ; LINUXOSX64-NEXT:    popq %rsp
    205 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
    206 ; LINUXOSX64-NEXT:    retq
    207   %b = add i16 %a, 1
    208   %c = call x86_regcallcc i16 @test_argReti16(i16 %b)
    209   %d = add i16 %c, 1
    210   ret i16 %d
    211 }
    212 
    213 ; Test regcall when receiving/returning i32
    214 define x86_regcallcc i32 @test_argReti32(i32 %a)  {
    215 ; X32-LABEL: test_argReti32:
    216 ; X32:       # %bb.0:
    217 ; X32-NEXT:    incl %eax
    218 ; X32-NEXT:    retl
    219 ;
    220 ; WIN64-LABEL: test_argReti32:
    221 ; WIN64:       # %bb.0:
    222 ; WIN64-NEXT:    incl %eax
    223 ; WIN64-NEXT:    retq
    224 ;
    225 ; LINUXOSX64-LABEL: test_argReti32:
    226 ; LINUXOSX64:       # %bb.0:
    227 ; LINUXOSX64-NEXT:    incl %eax
    228 ; LINUXOSX64-NEXT:    retq
    229   %add = add i32 %a, 1
    230   ret i32 %add
    231 }
    232 
    233 ; Test regcall when passing/retrieving i32
    234 define x86_regcallcc i32 @test_CallargReti32(i32 %a)  {
    235 ; X32-LABEL: test_CallargReti32:
    236 ; X32:       # %bb.0:
    237 ; X32-NEXT:    pushl %esp
    238 ; X32-NEXT:    incl %eax
    239 ; X32-NEXT:    calll _test_argReti32
    240 ; X32-NEXT:    incl %eax
    241 ; X32-NEXT:    popl %esp
    242 ; X32-NEXT:    retl
    243 ;
    244 ; WIN64-LABEL: test_CallargReti32:
    245 ; WIN64:       # %bb.0:
    246 ; WIN64-NEXT:    pushq %rsp
    247 ; WIN64-NEXT:    .seh_pushreg 4
    248 ; WIN64-NEXT:    .seh_endprologue
    249 ; WIN64-NEXT:    incl %eax
    250 ; WIN64-NEXT:    callq test_argReti32
    251 ; WIN64-NEXT:    incl %eax
    252 ; WIN64-NEXT:    popq %rsp
    253 ; WIN64-NEXT:    retq
    254 ; WIN64-NEXT:    .seh_handlerdata
    255 ; WIN64-NEXT:    .text
    256 ; WIN64-NEXT:    .seh_endproc
    257 ;
    258 ; LINUXOSX64-LABEL: test_CallargReti32:
    259 ; LINUXOSX64:       # %bb.0:
    260 ; LINUXOSX64-NEXT:    pushq %rsp
    261 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
    262 ; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
    263 ; LINUXOSX64-NEXT:    incl %eax
    264 ; LINUXOSX64-NEXT:    callq test_argReti32
    265 ; LINUXOSX64-NEXT:    incl %eax
    266 ; LINUXOSX64-NEXT:    popq %rsp
    267 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
    268 ; LINUXOSX64-NEXT:    retq
    269   %b = add i32 %a, 1
    270   %c = call x86_regcallcc i32 @test_argReti32(i32 %b)
    271   %d = add i32 %c, 1
    272   ret i32 %d
    273 }
    274 
    275 ; Test regcall when receiving/returning i64
    276 define x86_regcallcc i64 @test_argReti64(i64 %a)  {
    277 ; X32-LABEL: test_argReti64:
    278 ; X32:       # %bb.0:
    279 ; X32-NEXT:    addl $3, %eax
    280 ; X32-NEXT:    adcl $1, %ecx
    281 ; X32-NEXT:    retl
    282 ;
    283 ; WIN64-LABEL: test_argReti64:
    284 ; WIN64:       # %bb.0:
    285 ; WIN64-NEXT:    movabsq $4294967299, %rcx # imm = 0x100000003
    286 ; WIN64-NEXT:    addq %rcx, %rax
    287 ; WIN64-NEXT:    retq
    288 ;
    289 ; LINUXOSX64-LABEL: test_argReti64:
    290 ; LINUXOSX64:       # %bb.0:
    291 ; LINUXOSX64-NEXT:    movabsq $4294967299, %rcx # imm = 0x100000003
    292 ; LINUXOSX64-NEXT:    addq %rcx, %rax
    293 ; LINUXOSX64-NEXT:    retq
    294   %add = add i64 %a, 4294967299
    295   ret i64 %add
    296 }
    297 
    298 ; Test regcall when passing/retrieving i64
    299 define x86_regcallcc i64 @test_CallargReti64(i64 %a)  {
    300 ; X32-LABEL: test_CallargReti64:
    301 ; X32:       # %bb.0:
    302 ; X32-NEXT:    pushl %esp
    303 ; X32-NEXT:    addl $1, %eax
    304 ; X32-NEXT:    adcl $0, %ecx
    305 ; X32-NEXT:    calll _test_argReti64
    306 ; X32-NEXT:    addl $1, %eax
    307 ; X32-NEXT:    adcl $0, %ecx
    308 ; X32-NEXT:    popl %esp
    309 ; X32-NEXT:    retl
    310 ;
    311 ; WIN64-LABEL: test_CallargReti64:
    312 ; WIN64:       # %bb.0:
    313 ; WIN64-NEXT:    pushq %rsp
    314 ; WIN64-NEXT:    .seh_pushreg 4
    315 ; WIN64-NEXT:    .seh_endprologue
    316 ; WIN64-NEXT:    incq %rax
    317 ; WIN64-NEXT:    callq test_argReti64
    318 ; WIN64-NEXT:    incq %rax
    319 ; WIN64-NEXT:    popq %rsp
    320 ; WIN64-NEXT:    retq
    321 ; WIN64-NEXT:    .seh_handlerdata
    322 ; WIN64-NEXT:    .text
    323 ; WIN64-NEXT:    .seh_endproc
    324 ;
    325 ; LINUXOSX64-LABEL: test_CallargReti64:
    326 ; LINUXOSX64:       # %bb.0:
    327 ; LINUXOSX64-NEXT:    pushq %rsp
    328 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
    329 ; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
    330 ; LINUXOSX64-NEXT:    incq %rax
    331 ; LINUXOSX64-NEXT:    callq test_argReti64
    332 ; LINUXOSX64-NEXT:    incq %rax
    333 ; LINUXOSX64-NEXT:    popq %rsp
    334 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
    335 ; LINUXOSX64-NEXT:    retq
    336   %b = add i64 %a, 1
    337   %c = call x86_regcallcc i64 @test_argReti64(i64 %b)
    338   %d = add i64 %c, 1
    339   ret i64 %d
    340 }
    341 
    342 ; Test regcall when receiving/returning float
    343 define x86_regcallcc float @test_argRetFloat(float %a)  {
    344 ; X32-LABEL: test_argRetFloat:
    345 ; X32:       # %bb.0:
    346 ; X32-NEXT:    vaddss __real@3f800000, %xmm0, %xmm0
    347 ; X32-NEXT:    retl
    348 ;
    349 ; WIN64-LABEL: test_argRetFloat:
    350 ; WIN64:       # %bb.0:
    351 ; WIN64-NEXT:    vaddss __real@{{.*}}(%rip), %xmm0, %xmm0
    352 ; WIN64-NEXT:    retq
    353 ;
    354 ; LINUXOSX64-LABEL: test_argRetFloat:
    355 ; LINUXOSX64:       # %bb.0:
    356 ; LINUXOSX64-NEXT:    vaddss {{.*}}(%rip), %xmm0, %xmm0
    357 ; LINUXOSX64-NEXT:    retq
    358   %add = fadd float 1.0, %a
    359   ret float %add
    360 }
    361 
    362 ; Test regcall when passing/retrieving float
    363 define x86_regcallcc float @test_CallargRetFloat(float %a)  {
    364 ; X32-LABEL: test_CallargRetFloat:
    365 ; X32:       # %bb.0:
    366 ; X32-NEXT:    pushl %esp
    367 ; X32-NEXT:    subl $24, %esp
    368 ; X32-NEXT:    vmovups %xmm4, (%esp) # 16-byte Spill
    369 ; X32-NEXT:    vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
    370 ; X32-NEXT:    vaddss %xmm4, %xmm0, %xmm0
    371 ; X32-NEXT:    calll _test_argRetFloat
    372 ; X32-NEXT:    vaddss %xmm4, %xmm0, %xmm0
    373 ; X32-NEXT:    vmovups (%esp), %xmm4 # 16-byte Reload
    374 ; X32-NEXT:    addl $24, %esp
    375 ; X32-NEXT:    popl %esp
    376 ; X32-NEXT:    retl
    377 ;
    378 ; WIN64-LABEL: test_CallargRetFloat:
    379 ; WIN64:       # %bb.0:
    380 ; WIN64-NEXT:    pushq %rsp
    381 ; WIN64-NEXT:    .seh_pushreg 4
    382 ; WIN64-NEXT:    subq $16, %rsp
    383 ; WIN64-NEXT:    .seh_stackalloc 16
    384 ; WIN64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
    385 ; WIN64-NEXT:    .seh_savexmm 8, 0
    386 ; WIN64-NEXT:    .seh_endprologue
    387 ; WIN64-NEXT:    vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
    388 ; WIN64-NEXT:    vaddss %xmm8, %xmm0, %xmm0
    389 ; WIN64-NEXT:    callq test_argRetFloat
    390 ; WIN64-NEXT:    vaddss %xmm8, %xmm0, %xmm0
    391 ; WIN64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
    392 ; WIN64-NEXT:    addq $16, %rsp
    393 ; WIN64-NEXT:    popq %rsp
    394 ; WIN64-NEXT:    retq
    395 ; WIN64-NEXT:    .seh_handlerdata
    396 ; WIN64-NEXT:    .text
    397 ; WIN64-NEXT:    .seh_endproc
    398 ;
    399 ; LINUXOSX64-LABEL: test_CallargRetFloat:
    400 ; LINUXOSX64:       # %bb.0:
    401 ; LINUXOSX64-NEXT:    pushq %rsp
    402 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
    403 ; LINUXOSX64-NEXT:    subq $16, %rsp
    404 ; LINUXOSX64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
    405 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 32
    406 ; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
    407 ; LINUXOSX64-NEXT:    .cfi_offset %xmm8, -32
    408 ; LINUXOSX64-NEXT:    vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
    409 ; LINUXOSX64-NEXT:    vaddss %xmm8, %xmm0, %xmm0
    410 ; LINUXOSX64-NEXT:    callq test_argRetFloat
    411 ; LINUXOSX64-NEXT:    vaddss %xmm8, %xmm0, %xmm0
    412 ; LINUXOSX64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
    413 ; LINUXOSX64-NEXT:    addq $16, %rsp
    414 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
    415 ; LINUXOSX64-NEXT:    popq %rsp
    416 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
    417 ; LINUXOSX64-NEXT:    retq
    418   %b = fadd float 1.0, %a
    419   %c = call x86_regcallcc float @test_argRetFloat(float %b)
    420   %d = fadd float 1.0, %c
    421   ret float %d
    422 }
    423 
    424 ; Test regcall when receiving/returning double
    425 define x86_regcallcc double @test_argRetDouble(double %a)  {
    426 ; X32-LABEL: test_argRetDouble:
    427 ; X32:       # %bb.0:
    428 ; X32-NEXT:    vaddsd __real@3ff0000000000000, %xmm0, %xmm0
    429 ; X32-NEXT:    retl
    430 ;
    431 ; WIN64-LABEL: test_argRetDouble:
    432 ; WIN64:       # %bb.0:
    433 ; WIN64-NEXT:    vaddsd __real@{{.*}}(%rip), %xmm0, %xmm0
    434 ; WIN64-NEXT:    retq
    435 ;
    436 ; LINUXOSX64-LABEL: test_argRetDouble:
    437 ; LINUXOSX64:       # %bb.0:
    438 ; LINUXOSX64-NEXT:    vaddsd {{.*}}(%rip), %xmm0, %xmm0
    439 ; LINUXOSX64-NEXT:    retq
    440   %add = fadd double %a, 1.0
    441   ret double %add
    442 }
    443 
    444 ; Test regcall when passing/retrieving double
    445 define x86_regcallcc double @test_CallargRetDouble(double %a)  {
    446 ; X32-LABEL: test_CallargRetDouble:
    447 ; X32:       # %bb.0:
    448 ; X32-NEXT:    pushl %esp
    449 ; X32-NEXT:    subl $24, %esp
    450 ; X32-NEXT:    vmovups %xmm4, (%esp) # 16-byte Spill
    451 ; X32-NEXT:    vmovsd {{.*#+}} xmm4 = mem[0],zero
    452 ; X32-NEXT:    vaddsd %xmm4, %xmm0, %xmm0
    453 ; X32-NEXT:    calll _test_argRetDouble
    454 ; X32-NEXT:    vaddsd %xmm4, %xmm0, %xmm0
    455 ; X32-NEXT:    vmovups (%esp), %xmm4 # 16-byte Reload
    456 ; X32-NEXT:    addl $24, %esp
    457 ; X32-NEXT:    popl %esp
    458 ; X32-NEXT:    retl
    459 ;
    460 ; WIN64-LABEL: test_CallargRetDouble:
    461 ; WIN64:       # %bb.0:
    462 ; WIN64-NEXT:    pushq %rsp
    463 ; WIN64-NEXT:    .seh_pushreg 4
    464 ; WIN64-NEXT:    subq $16, %rsp
    465 ; WIN64-NEXT:    .seh_stackalloc 16
    466 ; WIN64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
    467 ; WIN64-NEXT:    .seh_savexmm 8, 0
    468 ; WIN64-NEXT:    .seh_endprologue
    469 ; WIN64-NEXT:    vmovsd {{.*#+}} xmm8 = mem[0],zero
    470 ; WIN64-NEXT:    vaddsd %xmm8, %xmm0, %xmm0
    471 ; WIN64-NEXT:    callq test_argRetDouble
    472 ; WIN64-NEXT:    vaddsd %xmm8, %xmm0, %xmm0
    473 ; WIN64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
    474 ; WIN64-NEXT:    addq $16, %rsp
    475 ; WIN64-NEXT:    popq %rsp
    476 ; WIN64-NEXT:    retq
    477 ; WIN64-NEXT:    .seh_handlerdata
    478 ; WIN64-NEXT:    .text
    479 ; WIN64-NEXT:    .seh_endproc
    480 ;
    481 ; LINUXOSX64-LABEL: test_CallargRetDouble:
    482 ; LINUXOSX64:       # %bb.0:
    483 ; LINUXOSX64-NEXT:    pushq %rsp
    484 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
    485 ; LINUXOSX64-NEXT:    subq $16, %rsp
    486 ; LINUXOSX64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
    487 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 32
    488 ; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
    489 ; LINUXOSX64-NEXT:    .cfi_offset %xmm8, -32
    490 ; LINUXOSX64-NEXT:    vmovsd {{.*#+}} xmm8 = mem[0],zero
    491 ; LINUXOSX64-NEXT:    vaddsd %xmm8, %xmm0, %xmm0
    492 ; LINUXOSX64-NEXT:    callq test_argRetDouble
    493 ; LINUXOSX64-NEXT:    vaddsd %xmm8, %xmm0, %xmm0
    494 ; LINUXOSX64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
    495 ; LINUXOSX64-NEXT:    addq $16, %rsp
    496 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
    497 ; LINUXOSX64-NEXT:    popq %rsp
    498 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
    499 ; LINUXOSX64-NEXT:    retq
    500   %b = fadd double 1.0, %a
    501   %c = call x86_regcallcc double @test_argRetDouble(double %b)
    502   %d = fadd double 1.0, %c
    503   ret double %d
    504 }
    505 
    506 ; Test regcall when receiving/returning long double
    507 define x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %a0) nounwind {
    508 ; X32-LABEL: test_argRetf80:
    509 ; X32:       # %bb.0:
    510 ; X32-NEXT:    fadd %st(0), %st(0)
    511 ; X32-NEXT:    retl
    512 ;
    513 ; WIN64-LABEL: test_argRetf80:
    514 ; WIN64:       # %bb.0:
    515 ; WIN64-NEXT:    fadd %st(0), %st(0)
    516 ; WIN64-NEXT:    retq
    517 ;
    518 ; LINUXOSX64-LABEL: test_argRetf80:
    519 ; LINUXOSX64:       # %bb.0:
    520 ; LINUXOSX64-NEXT:    fadd %st(0), %st(0)
    521 ; LINUXOSX64-NEXT:    retq
    522   %r0 = fadd x86_fp80 %a0, %a0
    523   ret x86_fp80 %r0
    524 }
    525 
    526 ; Test regcall when passing/retrieving long double
    527 define x86_regcallcc x86_fp80 @test_CallargRetf80(x86_fp80 %a)  {
    528 ; X32-LABEL: test_CallargRetf80:
    529 ; X32:       # %bb.0:
    530 ; X32-NEXT:    pushl %esp
    531 ; X32-NEXT:    fadd %st(0), %st(0)
    532 ; X32-NEXT:    calll _test_argRetf80
    533 ; X32-NEXT:    fadd %st(0), %st(0)
    534 ; X32-NEXT:    popl %esp
    535 ; X32-NEXT:    retl
    536 ;
    537 ; WIN64-LABEL: test_CallargRetf80:
    538 ; WIN64:       # %bb.0:
    539 ; WIN64-NEXT:    pushq %rsp
    540 ; WIN64-NEXT:    .seh_pushreg 4
    541 ; WIN64-NEXT:    .seh_endprologue
    542 ; WIN64-NEXT:    fadd %st(0), %st(0)
    543 ; WIN64-NEXT:    callq test_argRetf80
    544 ; WIN64-NEXT:    fadd %st(0), %st(0)
    545 ; WIN64-NEXT:    popq %rsp
    546 ; WIN64-NEXT:    retq
    547 ; WIN64-NEXT:    .seh_handlerdata
    548 ; WIN64-NEXT:    .text
    549 ; WIN64-NEXT:    .seh_endproc
    550 ;
    551 ; LINUXOSX64-LABEL: test_CallargRetf80:
    552 ; LINUXOSX64:       # %bb.0:
    553 ; LINUXOSX64-NEXT:    pushq %rsp
    554 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
    555 ; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
    556 ; LINUXOSX64-NEXT:    fadd %st(0), %st(0)
    557 ; LINUXOSX64-NEXT:    callq test_argRetf80
    558 ; LINUXOSX64-NEXT:    fadd %st(0), %st(0)
    559 ; LINUXOSX64-NEXT:    popq %rsp
    560 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
    561 ; LINUXOSX64-NEXT:    retq
    562   %b = fadd x86_fp80 %a, %a
    563   %c = call x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %b)
    564   %d = fadd x86_fp80 %c, %c
    565   ret x86_fp80 %d
    566 }
    567 
    568 ; Test regcall when receiving/returning pointer
    569 define x86_regcallcc [4 x i32]* @test_argRetPointer([4 x i32]* %a)  {
    570 ; X32-LABEL: test_argRetPointer:
    571 ; X32:       # %bb.0:
    572 ; X32-NEXT:    incl %eax
    573 ; X32-NEXT:    retl
    574 ;
    575 ; WIN64-LABEL: test_argRetPointer:
    576 ; WIN64:       # %bb.0:
    577 ; WIN64-NEXT:    incl %eax
    578 ; WIN64-NEXT:    retq
    579 ;
    580 ; LINUXOSX64-LABEL: test_argRetPointer:
    581 ; LINUXOSX64:       # %bb.0:
    582 ; LINUXOSX64-NEXT:    incl %eax
    583 ; LINUXOSX64-NEXT:    retq
    584   %b = ptrtoint [4 x i32]* %a to i32
    585   %c = add i32 %b, 1
    586   %d = inttoptr i32 %c to [4 x i32]*
    587   ret [4 x i32]* %d
    588 }
    589 
    590 ; Test regcall when passing/retrieving pointer
    591 define x86_regcallcc [4 x i32]* @test_CallargRetPointer([4 x i32]* %a)  {
    592 ; X32-LABEL: test_CallargRetPointer:
    593 ; X32:       # %bb.0:
    594 ; X32-NEXT:    pushl %esp
    595 ; X32-NEXT:    incl %eax
    596 ; X32-NEXT:    calll _test_argRetPointer
    597 ; X32-NEXT:    incl %eax
    598 ; X32-NEXT:    popl %esp
    599 ; X32-NEXT:    retl
    600 ;
    601 ; WIN64-LABEL: test_CallargRetPointer:
    602 ; WIN64:       # %bb.0:
    603 ; WIN64-NEXT:    pushq %rsp
    604 ; WIN64-NEXT:    .seh_pushreg 4
    605 ; WIN64-NEXT:    .seh_endprologue
    606 ; WIN64-NEXT:    incl %eax
    607 ; WIN64-NEXT:    callq test_argRetPointer
    608 ; WIN64-NEXT:    incl %eax
    609 ; WIN64-NEXT:    popq %rsp
    610 ; WIN64-NEXT:    retq
    611 ; WIN64-NEXT:    .seh_handlerdata
    612 ; WIN64-NEXT:    .text
    613 ; WIN64-NEXT:    .seh_endproc
    614 ;
    615 ; LINUXOSX64-LABEL: test_CallargRetPointer:
    616 ; LINUXOSX64:       # %bb.0:
    617 ; LINUXOSX64-NEXT:    pushq %rsp
    618 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
    619 ; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
    620 ; LINUXOSX64-NEXT:    incl %eax
    621 ; LINUXOSX64-NEXT:    callq test_argRetPointer
    622 ; LINUXOSX64-NEXT:    incl %eax
    623 ; LINUXOSX64-NEXT:    popq %rsp
    624 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
    625 ; LINUXOSX64-NEXT:    retq
    626   %b = ptrtoint [4 x i32]* %a to i32
    627   %c = add i32 %b, 1
    628   %d = inttoptr i32 %c to [4 x i32]*
    629   %e = call x86_regcallcc [4 x i32]* @test_argRetPointer([4 x i32]* %d)
    630   %f = ptrtoint [4 x i32]* %e to i32
    631   %g = add i32 %f, 1
    632   %h = inttoptr i32 %g to [4 x i32]*
    633   ret [4 x i32]* %h
    634 }
    635 
    636 ; Test regcall when receiving/returning 128 bit vector
    637 define x86_regcallcc <4 x i32> @test_argRet128Vector(<4 x i32> %a, <4 x i32> %b)  {
    638 ; X32-LABEL: test_argRet128Vector:
    639 ; X32:       # %bb.0:
    640 ; X32-NEXT:    vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
    641 ; X32-NEXT:    retl
    642 ;
    643 ; WIN64-LABEL: test_argRet128Vector:
    644 ; WIN64:       # %bb.0:
    645 ; WIN64-NEXT:    vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
    646 ; WIN64-NEXT:    retq
    647 ;
    648 ; LINUXOSX64-LABEL: test_argRet128Vector:
    649 ; LINUXOSX64:       # %bb.0:
    650 ; LINUXOSX64-NEXT:    vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
    651 ; LINUXOSX64-NEXT:    retq
    652   %d = select <4 x i1> undef , <4 x i32> %a, <4 x i32> %b
    653   ret <4 x i32> %d
    654 }
    655 
    656 ; Test regcall when passing/retrieving 128 bit vector
    657 define x86_regcallcc <4 x i32> @test_CallargRet128Vector(<4 x i32> %a)  {
    658 ; X32-LABEL: test_CallargRet128Vector:
    659 ; X32:       # %bb.0:
    660 ; X32-NEXT:    pushl %esp
    661 ; X32-NEXT:    subl $24, %esp
    662 ; X32-NEXT:    vmovups %xmm4, (%esp) # 16-byte Spill
    663 ; X32-NEXT:    vmovdqa %xmm0, %xmm4
    664 ; X32-NEXT:    vmovdqa %xmm0, %xmm1
    665 ; X32-NEXT:    calll _test_argRet128Vector
    666 ; X32-NEXT:    vmovdqa32 %xmm4, %xmm0 {%k1}
    667 ; X32-NEXT:    vmovups (%esp), %xmm4 # 16-byte Reload
    668 ; X32-NEXT:    addl $24, %esp
    669 ; X32-NEXT:    popl %esp
    670 ; X32-NEXT:    retl
    671 ;
    672 ; WIN64-LABEL: test_CallargRet128Vector:
    673 ; WIN64:       # %bb.0:
    674 ; WIN64-NEXT:    pushq %rsp
    675 ; WIN64-NEXT:    .seh_pushreg 4
    676 ; WIN64-NEXT:    subq $16, %rsp
    677 ; WIN64-NEXT:    .seh_stackalloc 16
    678 ; WIN64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
    679 ; WIN64-NEXT:    .seh_savexmm 8, 0
    680 ; WIN64-NEXT:    .seh_endprologue
    681 ; WIN64-NEXT:    vmovdqa %xmm0, %xmm8
    682 ; WIN64-NEXT:    vmovdqa %xmm0, %xmm1
    683 ; WIN64-NEXT:    callq test_argRet128Vector
    684 ; WIN64-NEXT:    vmovdqa32 %xmm8, %xmm0 {%k1}
    685 ; WIN64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
    686 ; WIN64-NEXT:    addq $16, %rsp
    687 ; WIN64-NEXT:    popq %rsp
    688 ; WIN64-NEXT:    retq
    689 ; WIN64-NEXT:    .seh_handlerdata
    690 ; WIN64-NEXT:    .text
    691 ; WIN64-NEXT:    .seh_endproc
    692 ;
    693 ; LINUXOSX64-LABEL: test_CallargRet128Vector:
    694 ; LINUXOSX64:       # %bb.0:
    695 ; LINUXOSX64-NEXT:    pushq %rsp
    696 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
    697 ; LINUXOSX64-NEXT:    subq $16, %rsp
    698 ; LINUXOSX64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
    699 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 32
    700 ; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
    701 ; LINUXOSX64-NEXT:    .cfi_offset %xmm8, -32
    702 ; LINUXOSX64-NEXT:    vmovdqa %xmm0, %xmm8
    703 ; LINUXOSX64-NEXT:    vmovdqa %xmm0, %xmm1
    704 ; LINUXOSX64-NEXT:    callq test_argRet128Vector
    705 ; LINUXOSX64-NEXT:    vmovdqa32 %xmm8, %xmm0 {%k1}
    706 ; LINUXOSX64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
    707 ; LINUXOSX64-NEXT:    addq $16, %rsp
    708 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
    709 ; LINUXOSX64-NEXT:    popq %rsp
    710 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
    711 ; LINUXOSX64-NEXT:    retq
    712   %b = call x86_regcallcc <4 x i32> @test_argRet128Vector(<4 x i32> %a, <4 x i32> %a)
    713   %c = select <4 x i1> undef , <4 x i32> %a, <4 x i32> %b
    714   ret <4 x i32> %c
    715 }
    716 
    717 ; Test regcall when receiving/returning 256 bit vector
    718 define x86_regcallcc <8 x i32> @test_argRet256Vector(<8 x i32> %a, <8 x i32> %b)  {
    719 ; X32-LABEL: test_argRet256Vector:
    720 ; X32:       # %bb.0:
    721 ; X32-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
    722 ; X32-NEXT:    retl
    723 ;
    724 ; WIN64-LABEL: test_argRet256Vector:
    725 ; WIN64:       # %bb.0:
    726 ; WIN64-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
    727 ; WIN64-NEXT:    retq
    728 ;
    729 ; LINUXOSX64-LABEL: test_argRet256Vector:
    730 ; LINUXOSX64:       # %bb.0:
    731 ; LINUXOSX64-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
    732 ; LINUXOSX64-NEXT:    retq
    733   %d = select <8 x i1> undef , <8 x i32> %a, <8 x i32> %b
    734   ret <8 x i32> %d
    735 }
    736 
    737 ; Test regcall when passing/retrieving 256 bit vector
    738 define x86_regcallcc <8 x i32> @test_CallargRet256Vector(<8 x i32> %a)  {
    739 ; X32-LABEL: test_CallargRet256Vector:
    740 ; X32:       # %bb.0:
    741 ; X32-NEXT:    pushl %esp
    742 ; X32-NEXT:    subl $56, %esp
    743 ; X32-NEXT:    vmovdqu %ymm0, (%esp) # 32-byte Spill
    744 ; X32-NEXT:    vmovdqa %ymm0, %ymm1
    745 ; X32-NEXT:    calll _test_argRet256Vector
    746 ; X32-NEXT:    vmovdqu (%esp), %ymm1 # 32-byte Reload
    747 ; X32-NEXT:    vmovdqa32 %ymm1, %ymm0 {%k1}
    748 ; X32-NEXT:    addl $56, %esp
    749 ; X32-NEXT:    popl %esp
    750 ; X32-NEXT:    retl
    751 ;
    752 ; WIN64-LABEL: test_CallargRet256Vector:
    753 ; WIN64:       # %bb.0:
    754 ; WIN64-NEXT:    pushq %rsp
    755 ; WIN64-NEXT:    .seh_pushreg 4
    756 ; WIN64-NEXT:    subq $48, %rsp
    757 ; WIN64-NEXT:    .seh_stackalloc 48
    758 ; WIN64-NEXT:    .seh_endprologue
    759 ; WIN64-NEXT:    vmovdqu %ymm0, (%rsp) # 32-byte Spill
    760 ; WIN64-NEXT:    vmovdqa %ymm0, %ymm1
    761 ; WIN64-NEXT:    callq test_argRet256Vector
    762 ; WIN64-NEXT:    vmovdqu (%rsp), %ymm1 # 32-byte Reload
    763 ; WIN64-NEXT:    vmovdqa32 %ymm1, %ymm0 {%k1}
    764 ; WIN64-NEXT:    addq $48, %rsp
    765 ; WIN64-NEXT:    popq %rsp
    766 ; WIN64-NEXT:    retq
    767 ; WIN64-NEXT:    .seh_handlerdata
    768 ; WIN64-NEXT:    .text
    769 ; WIN64-NEXT:    .seh_endproc
    770 ;
    771 ; LINUXOSX64-LABEL: test_CallargRet256Vector:
    772 ; LINUXOSX64:       # %bb.0:
    773 ; LINUXOSX64-NEXT:    pushq %rsp
    774 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
    775 ; LINUXOSX64-NEXT:    subq $48, %rsp
    776 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 64
    777 ; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
    778 ; LINUXOSX64-NEXT:    vmovdqu %ymm0, (%rsp) # 32-byte Spill
    779 ; LINUXOSX64-NEXT:    vmovdqa %ymm0, %ymm1
    780 ; LINUXOSX64-NEXT:    callq test_argRet256Vector
    781 ; LINUXOSX64-NEXT:    vmovdqu (%rsp), %ymm1 # 32-byte Reload
    782 ; LINUXOSX64-NEXT:    vmovdqa32 %ymm1, %ymm0 {%k1}
    783 ; LINUXOSX64-NEXT:    addq $48, %rsp
    784 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
    785 ; LINUXOSX64-NEXT:    popq %rsp
    786 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
    787 ; LINUXOSX64-NEXT:    retq
    788   %b = call x86_regcallcc <8 x i32> @test_argRet256Vector(<8 x i32> %a, <8 x i32> %a)
    789   %c = select <8 x i1> undef , <8 x i32> %a, <8 x i32> %b
    790   ret <8 x i32> %c
    791 }
    792 
    793 ; Test regcall when receiving/returning 512 bit vector
    794 define x86_regcallcc <16 x i32> @test_argRet512Vector(<16 x i32> %a, <16 x i32> %b)  {
    795 ; X32-LABEL: test_argRet512Vector:
    796 ; X32:       # %bb.0:
    797 ; X32-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
    798 ; X32-NEXT:    retl
    799 ;
    800 ; WIN64-LABEL: test_argRet512Vector:
    801 ; WIN64:       # %bb.0:
    802 ; WIN64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
    803 ; WIN64-NEXT:    retq
    804 ;
    805 ; LINUXOSX64-LABEL: test_argRet512Vector:
    806 ; LINUXOSX64:       # %bb.0:
    807 ; LINUXOSX64-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
    808 ; LINUXOSX64-NEXT:    retq
    809   %d = select <16 x i1> undef , <16 x i32> %a, <16 x i32> %b
    810   ret <16 x i32> %d
    811 }
    812 
    813 ; Test regcall when passing/retrieving 512 bit vector
    814 define x86_regcallcc <16 x i32> @test_CallargRet512Vector(<16 x i32> %a)  {
    815 ; X32-LABEL: test_CallargRet512Vector:
    816 ; X32:       # %bb.0:
    817 ; X32-NEXT:    pushl %esp
    818 ; X32-NEXT:    subl $120, %esp
    819 ; X32-NEXT:    vmovdqu64 %zmm0, (%esp) # 64-byte Spill
    820 ; X32-NEXT:    vmovdqa64 %zmm0, %zmm1
    821 ; X32-NEXT:    calll _test_argRet512Vector
    822 ; X32-NEXT:    vmovdqu64 (%esp), %zmm1 # 64-byte Reload
    823 ; X32-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
    824 ; X32-NEXT:    addl $120, %esp
    825 ; X32-NEXT:    popl %esp
    826 ; X32-NEXT:    retl
    827 ;
    828 ; WIN64-LABEL: test_CallargRet512Vector:
    829 ; WIN64:       # %bb.0:
    830 ; WIN64-NEXT:    pushq %rsp
    831 ; WIN64-NEXT:    .seh_pushreg 4
    832 ; WIN64-NEXT:    subq $112, %rsp
    833 ; WIN64-NEXT:    .seh_stackalloc 112
    834 ; WIN64-NEXT:    .seh_endprologue
    835 ; WIN64-NEXT:    vmovdqu64 %zmm0, (%rsp) # 64-byte Spill
    836 ; WIN64-NEXT:    vmovdqa64 %zmm0, %zmm1
    837 ; WIN64-NEXT:    callq test_argRet512Vector
    838 ; WIN64-NEXT:    vmovdqu64 (%rsp), %zmm1 # 64-byte Reload
    839 ; WIN64-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
    840 ; WIN64-NEXT:    addq $112, %rsp
    841 ; WIN64-NEXT:    popq %rsp
    842 ; WIN64-NEXT:    retq
    843 ; WIN64-NEXT:    .seh_handlerdata
    844 ; WIN64-NEXT:    .text
    845 ; WIN64-NEXT:    .seh_endproc
    846 ;
    847 ; LINUXOSX64-LABEL: test_CallargRet512Vector:
    848 ; LINUXOSX64:       # %bb.0:
    849 ; LINUXOSX64-NEXT:    pushq %rsp
    850 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
    851 ; LINUXOSX64-NEXT:    subq $112, %rsp
    852 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 128
    853 ; LINUXOSX64-NEXT:    .cfi_offset %rsp, -16
    854 ; LINUXOSX64-NEXT:    vmovdqu64 %zmm0, (%rsp) # 64-byte Spill
    855 ; LINUXOSX64-NEXT:    vmovdqa64 %zmm0, %zmm1
    856 ; LINUXOSX64-NEXT:    callq test_argRet512Vector
    857 ; LINUXOSX64-NEXT:    vmovdqu64 (%rsp), %zmm1 # 64-byte Reload
    858 ; LINUXOSX64-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
    859 ; LINUXOSX64-NEXT:    addq $112, %rsp
    860 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
    861 ; LINUXOSX64-NEXT:    popq %rsp
    862 ; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
    863 ; LINUXOSX64-NEXT:    retq
    864   %b = call x86_regcallcc <16 x i32> @test_argRet512Vector(<16 x i32> %a, <16 x i32> %a)
    865   %c = select <16 x i1> undef , <16 x i32> %a, <16 x i32> %b
    866   ret <16 x i32> %c
    867 }
    868 
    869 ; Test regcall when running multiple input parameters - callee saved xmms
    870 define x86_regcallcc <32 x float> @testf32_inp(<32 x float> %a, <32 x float> %b, <32 x float> %c) nounwind {
    871 ; X32-LABEL: testf32_inp:
    872 ; X32:       # %bb.0:
    873 ; X32-NEXT:    subl $44, %esp
    874 ; X32-NEXT:    vmovups %xmm7, {{[0-9]+}}(%esp) # 16-byte Spill
    875 ; X32-NEXT:    vmovups %xmm6, (%esp) # 16-byte Spill
    876 ; X32-NEXT:    vaddps %zmm2, %zmm0, %zmm6
    877 ; X32-NEXT:    vaddps %zmm3, %zmm1, %zmm7
    878 ; X32-NEXT:    vmulps %zmm2, %zmm0, %zmm0
    879 ; X32-NEXT:    vsubps %zmm0, %zmm6, %zmm0
    880 ; X32-NEXT:    vmulps %zmm3, %zmm1, %zmm1
    881 ; X32-NEXT:    vsubps %zmm1, %zmm7, %zmm1
    882 ; X32-NEXT:    vaddps %zmm4, %zmm0, %zmm0
    883 ; X32-NEXT:    vaddps %zmm5, %zmm1, %zmm1
    884 ; X32-NEXT:    vmovups (%esp), %xmm6 # 16-byte Reload
    885 ; X32-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload
    886 ; X32-NEXT:    addl $44, %esp
    887 ; X32-NEXT:    retl
    888 ;
    889 ; WIN64-LABEL: testf32_inp:
    890 ; WIN64:       # %bb.0:
    891 ; WIN64-NEXT:    vaddps %zmm2, %zmm0, %zmm6
    892 ; WIN64-NEXT:    vaddps %zmm3, %zmm1, %zmm7
    893 ; WIN64-NEXT:    vmulps %zmm2, %zmm0, %zmm0
    894 ; WIN64-NEXT:    vsubps %zmm0, %zmm6, %zmm0
    895 ; WIN64-NEXT:    vmulps %zmm3, %zmm1, %zmm1
    896 ; WIN64-NEXT:    vsubps %zmm1, %zmm7, %zmm1
    897 ; WIN64-NEXT:    vaddps %zmm4, %zmm0, %zmm0
    898 ; WIN64-NEXT:    vaddps %zmm5, %zmm1, %zmm1
    899 ; WIN64-NEXT:    retq
    900 ;
    901 ; LINUXOSX64-LABEL: testf32_inp:
    902 ; LINUXOSX64:       # %bb.0:
    903 ; LINUXOSX64-NEXT:    vaddps %zmm2, %zmm0, %zmm6
    904 ; LINUXOSX64-NEXT:    vaddps %zmm3, %zmm1, %zmm7
    905 ; LINUXOSX64-NEXT:    vmulps %zmm2, %zmm0, %zmm0
    906 ; LINUXOSX64-NEXT:    vsubps %zmm0, %zmm6, %zmm0
    907 ; LINUXOSX64-NEXT:    vmulps %zmm3, %zmm1, %zmm1
    908 ; LINUXOSX64-NEXT:    vsubps %zmm1, %zmm7, %zmm1
    909 ; LINUXOSX64-NEXT:    vaddps %zmm4, %zmm0, %zmm0
    910 ; LINUXOSX64-NEXT:    vaddps %zmm5, %zmm1, %zmm1
    911 ; LINUXOSX64-NEXT:    retq
    912   %x1 = fadd <32 x float> %a, %b
    913   %x2 = fmul <32 x float> %a, %b
    914   %x3 = fsub <32 x float> %x1, %x2
    915   %x4 = fadd <32 x float> %x3, %c
    916   ret <32 x float> %x4
    917 }
    918 
    919 ; Test regcall when running multiple input parameters - callee saved GPRs
    920 define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6) nounwind {
    921 ; X32-LABEL: testi32_inp:
    922 ; X32:       # %bb.0:
    923 ; X32-NEXT:    pushl %ebp
    924 ; X32-NEXT:    pushl %ebx
    925 ; X32-NEXT:    subl $20, %esp
    926 ; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
    927 ; X32-NEXT:    movl %edi, %esi
    928 ; X32-NEXT:    movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill
    929 ; X32-NEXT:    movl %edx, %ebx
    930 ; X32-NEXT:    movl %edx, (%esp) # 4-byte Spill
    931 ; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
    932 ; X32-NEXT:    movl %eax, %edx
    933 ; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
    934 ; X32-NEXT:    subl %ecx, %edx
    935 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
    936 ; X32-NEXT:    movl %edi, %ebp
    937 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ebp
    938 ; X32-NEXT:    imull %ebp, %edx
    939 ; X32-NEXT:    subl %esi, %ebx
    940 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
    941 ; X32-NEXT:    movl %esi, %ecx
    942 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
    943 ; X32-NEXT:    imull %ebx, %ecx
    944 ; X32-NEXT:    addl %ecx, %edx
    945 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
    946 ; X32-NEXT:    movl %ebx, %ebp
    947 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ebp
    948 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    949 ; X32-NEXT:    movl %ecx, %eax
    950 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
    951 ; X32-NEXT:    imull %ebp, %eax
    952 ; X32-NEXT:    addl %eax, %edx
    953 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
    954 ; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax # 4-byte Folded Reload
    955 ; X32-NEXT:    movl (%esp), %ebp # 4-byte Reload
    956 ; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
    957 ; X32-NEXT:    addl {{[0-9]+}}(%esp), %ebx
    958 ; X32-NEXT:    addl {{[0-9]+}}(%esp), %edi
    959 ; X32-NEXT:    imull %eax, %edi
    960 ; X32-NEXT:    addl {{[0-9]+}}(%esp), %esi
    961 ; X32-NEXT:    imull %ebp, %esi
    962 ; X32-NEXT:    addl %edi, %esi
    963 ; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx
    964 ; X32-NEXT:    imull %ebx, %ecx
    965 ; X32-NEXT:    addl %esi, %ecx
    966 ; X32-NEXT:    addl %ecx, %edx
    967 ; X32-NEXT:    movl %edx, %eax
    968 ; X32-NEXT:    addl $20, %esp
    969 ; X32-NEXT:    popl %ebx
    970 ; X32-NEXT:    popl %ebp
    971 ; X32-NEXT:    retl
    972 ;
    973 ; WIN64-LABEL: testi32_inp:
    974 ; WIN64:       # %bb.0:
    975 ; WIN64-NEXT:    pushq %r13
    976 ; WIN64-NEXT:    pushq %rbp
    977 ; WIN64-NEXT:    pushq %rbx
    978 ; WIN64-NEXT:    movl %eax, %r13d
    979 ; WIN64-NEXT:    subl %ecx, %eax
    980 ; WIN64-NEXT:    movl %edx, %ebp
    981 ; WIN64-NEXT:    subl %edi, %ebp
    982 ; WIN64-NEXT:    movl %r9d, %ebx
    983 ; WIN64-NEXT:    subl %r10d, %ebx
    984 ; WIN64-NEXT:    imull %ebx, %eax
    985 ; WIN64-NEXT:    movl %r11d, %ebx
    986 ; WIN64-NEXT:    subl %r12d, %ebx
    987 ; WIN64-NEXT:    imull %ebp, %ebx
    988 ; WIN64-NEXT:    movl %esi, %ebp
    989 ; WIN64-NEXT:    subl %r8d, %ebp
    990 ; WIN64-NEXT:    addl %ebx, %eax
    991 ; WIN64-NEXT:    movl %r14d, %ebx
    992 ; WIN64-NEXT:    subl %r15d, %ebx
    993 ; WIN64-NEXT:    imull %ebp, %ebx
    994 ; WIN64-NEXT:    addl %ebx, %eax
    995 ; WIN64-NEXT:    addl %ecx, %r13d
    996 ; WIN64-NEXT:    addl %edi, %edx
    997 ; WIN64-NEXT:    addl %r8d, %esi
    998 ; WIN64-NEXT:    addl %r10d, %r9d
    999 ; WIN64-NEXT:    imull %r13d, %r9d
   1000 ; WIN64-NEXT:    addl %r12d, %r11d
   1001 ; WIN64-NEXT:    imull %edx, %r11d
   1002 ; WIN64-NEXT:    addl %r9d, %r11d
   1003 ; WIN64-NEXT:    addl %r15d, %r14d
   1004 ; WIN64-NEXT:    imull %esi, %r14d
   1005 ; WIN64-NEXT:    addl %r11d, %r14d
   1006 ; WIN64-NEXT:    addl %r14d, %eax
   1007 ; WIN64-NEXT:    popq %rbx
   1008 ; WIN64-NEXT:    popq %rbp
   1009 ; WIN64-NEXT:    popq %r13
   1010 ; WIN64-NEXT:    retq
   1011 ;
   1012 ; LINUXOSX64-LABEL: testi32_inp:
   1013 ; LINUXOSX64:       # %bb.0:
   1014 ; LINUXOSX64-NEXT:    pushq %rbp
   1015 ; LINUXOSX64-NEXT:    pushq %rbx
   1016 ; LINUXOSX64-NEXT:    movl %eax, %r10d
   1017 ; LINUXOSX64-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
   1018 ; LINUXOSX64-NEXT:    subl %ecx, %eax
   1019 ; LINUXOSX64-NEXT:    movl %edx, %ebx
   1020 ; LINUXOSX64-NEXT:    subl %edi, %ebx
   1021 ; LINUXOSX64-NEXT:    movl %r9d, %ebp
   1022 ; LINUXOSX64-NEXT:    subl %r12d, %ebp
   1023 ; LINUXOSX64-NEXT:    imull %ebp, %eax
   1024 ; LINUXOSX64-NEXT:    movl %r13d, %ebp
   1025 ; LINUXOSX64-NEXT:    subl %r14d, %ebp
   1026 ; LINUXOSX64-NEXT:    imull %ebx, %ebp
   1027 ; LINUXOSX64-NEXT:    movl %esi, %ebx
   1028 ; LINUXOSX64-NEXT:    subl %r8d, %ebx
   1029 ; LINUXOSX64-NEXT:    addl %ebp, %eax
   1030 ; LINUXOSX64-NEXT:    movl %r15d, %ebp
   1031 ; LINUXOSX64-NEXT:    subl %r11d, %ebp
   1032 ; LINUXOSX64-NEXT:    imull %ebx, %ebp
   1033 ; LINUXOSX64-NEXT:    addl %ebp, %eax
   1034 ; LINUXOSX64-NEXT:    addl %ecx, %r10d
   1035 ; LINUXOSX64-NEXT:    addl %edi, %edx
   1036 ; LINUXOSX64-NEXT:    addl %r8d, %esi
   1037 ; LINUXOSX64-NEXT:    addl %r12d, %r9d
   1038 ; LINUXOSX64-NEXT:    imull %r10d, %r9d
   1039 ; LINUXOSX64-NEXT:    addl %r14d, %r13d
   1040 ; LINUXOSX64-NEXT:    imull %edx, %r13d
   1041 ; LINUXOSX64-NEXT:    addl %r9d, %r13d
   1042 ; LINUXOSX64-NEXT:    addl %r11d, %r15d
   1043 ; LINUXOSX64-NEXT:    imull %esi, %r15d
   1044 ; LINUXOSX64-NEXT:    addl %r13d, %r15d
   1045 ; LINUXOSX64-NEXT:    addl %r15d, %eax
   1046 ; LINUXOSX64-NEXT:    popq %rbx
   1047 ; LINUXOSX64-NEXT:    popq %rbp
   1048 ; LINUXOSX64-NEXT:    retq
   1049   %x1 = sub i32 %a1, %a2
   1050   %x2 = sub i32 %a3, %a4
   1051   %x3 = sub i32 %a5, %a6
   1052   %y1 = sub i32 %b1, %b2
   1053   %y2 = sub i32 %b3, %b4
   1054   %y3 = sub i32 %b5, %b6
   1055   %v1 = add i32 %a1, %a2
   1056   %v2 = add i32 %a3, %a4
   1057   %v3 = add i32 %a5, %a6
   1058   %w1 = add i32 %b1, %b2
   1059   %w2 = add i32 %b3, %b4
   1060   %w3 = add i32 %b5, %b6
   1061   %s1 = mul i32 %x1, %y1
   1062   %s2 = mul i32 %x2, %y2
   1063   %s3 = mul i32 %x3, %y3
   1064   %t1 = mul i32 %v1, %w1
   1065   %t2 = mul i32 %v2, %w2
   1066   %t3 = mul i32 %v3, %w3
   1067   %m1 = add i32 %s1, %s2
   1068   %m2 = add i32 %m1, %s3
   1069   %n1 = add i32 %t1, %t2
   1070   %n2 = add i32 %n1, %t3
   1071   %r1 = add i32 %m2, %n2
   1072   ret i32 %r1
   1073 }
   1074 
   1075 ; Test that parameters, overflowing register capacity, are passed through the stack
   1076 define x86_regcallcc <32 x float> @testf32_stack(<32 x float> %a0, <32 x float> %b0, <32 x float> %c0, <32 x float> %a1, <32 x float> %b1, <32 x float> %c1, <32 x float> %a2, <32 x float> %b2, <32 x float> %c2) nounwind {
   1077 ; X32-LABEL: testf32_stack:
   1078 ; X32:       # %bb.0:
   1079 ; X32-NEXT:    pushl %ebp
   1080 ; X32-NEXT:    movl %esp, %ebp
   1081 ; X32-NEXT:    andl $-64, %esp
   1082 ; X32-NEXT:    subl $64, %esp
   1083 ; X32-NEXT:    vaddps %zmm3, %zmm1, %zmm1
   1084 ; X32-NEXT:    vaddps %zmm2, %zmm0, %zmm0
   1085 ; X32-NEXT:    vaddps %zmm0, %zmm4, %zmm0
   1086 ; X32-NEXT:    vaddps %zmm1, %zmm5, %zmm1
   1087 ; X32-NEXT:    vaddps %zmm1, %zmm7, %zmm1
   1088 ; X32-NEXT:    vaddps %zmm0, %zmm6, %zmm0
   1089 ; X32-NEXT:    vaddps 8(%ebp), %zmm0, %zmm0
   1090 ; X32-NEXT:    vaddps 72(%ebp), %zmm1, %zmm1
   1091 ; X32-NEXT:    vaddps 200(%ebp), %zmm1, %zmm1
   1092 ; X32-NEXT:    vaddps 136(%ebp), %zmm0, %zmm0
   1093 ; X32-NEXT:    vaddps 264(%ebp), %zmm0, %zmm0
   1094 ; X32-NEXT:    vaddps 328(%ebp), %zmm1, %zmm1
   1095 ; X32-NEXT:    vaddps 456(%ebp), %zmm1, %zmm1
   1096 ; X32-NEXT:    vaddps 392(%ebp), %zmm0, %zmm0
   1097 ; X32-NEXT:    vaddps 520(%ebp), %zmm0, %zmm0
   1098 ; X32-NEXT:    vaddps 584(%ebp), %zmm1, %zmm1
   1099 ; X32-NEXT:    movl %ebp, %esp
   1100 ; X32-NEXT:    popl %ebp
   1101 ; X32-NEXT:    retl
   1102 ;
   1103 ; WIN64-LABEL: testf32_stack:
   1104 ; WIN64:       # %bb.0:
   1105 ; WIN64-NEXT:    pushq %rbp
   1106 ; WIN64-NEXT:    subq $48, %rsp
   1107 ; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rbp
   1108 ; WIN64-NEXT:    andq $-64, %rsp
   1109 ; WIN64-NEXT:    vaddps %zmm3, %zmm1, %zmm1
   1110 ; WIN64-NEXT:    vaddps %zmm2, %zmm0, %zmm0
   1111 ; WIN64-NEXT:    vaddps %zmm0, %zmm4, %zmm0
   1112 ; WIN64-NEXT:    vaddps %zmm1, %zmm5, %zmm1
   1113 ; WIN64-NEXT:    vaddps %zmm1, %zmm7, %zmm1
   1114 ; WIN64-NEXT:    vaddps %zmm0, %zmm6, %zmm0
   1115 ; WIN64-NEXT:    vaddps %zmm0, %zmm8, %zmm0
   1116 ; WIN64-NEXT:    vaddps %zmm1, %zmm9, %zmm1
   1117 ; WIN64-NEXT:    vaddps %zmm1, %zmm11, %zmm1
   1118 ; WIN64-NEXT:    vaddps %zmm0, %zmm10, %zmm0
   1119 ; WIN64-NEXT:    vaddps %zmm0, %zmm12, %zmm0
   1120 ; WIN64-NEXT:    vaddps %zmm1, %zmm13, %zmm1
   1121 ; WIN64-NEXT:    vaddps %zmm1, %zmm15, %zmm1
   1122 ; WIN64-NEXT:    vaddps %zmm0, %zmm14, %zmm0
   1123 ; WIN64-NEXT:    vaddps 16(%rbp), %zmm0, %zmm0
   1124 ; WIN64-NEXT:    vaddps 80(%rbp), %zmm1, %zmm1
   1125 ; WIN64-NEXT:    movq %rbp, %rsp
   1126 ; WIN64-NEXT:    popq %rbp
   1127 ; WIN64-NEXT:    retq
   1128 ;
   1129 ; LINUXOSX64-LABEL: testf32_stack:
   1130 ; LINUXOSX64:       # %bb.0:
   1131 ; LINUXOSX64-NEXT:    pushq %rbp
   1132 ; LINUXOSX64-NEXT:    movq %rsp, %rbp
   1133 ; LINUXOSX64-NEXT:    andq $-64, %rsp
   1134 ; LINUXOSX64-NEXT:    subq $64, %rsp
   1135 ; LINUXOSX64-NEXT:    vaddps %zmm3, %zmm1, %zmm1
   1136 ; LINUXOSX64-NEXT:    vaddps %zmm2, %zmm0, %zmm0
   1137 ; LINUXOSX64-NEXT:    vaddps %zmm0, %zmm4, %zmm0
   1138 ; LINUXOSX64-NEXT:    vaddps %zmm1, %zmm5, %zmm1
   1139 ; LINUXOSX64-NEXT:    vaddps %zmm1, %zmm7, %zmm1
   1140 ; LINUXOSX64-NEXT:    vaddps %zmm0, %zmm6, %zmm0
   1141 ; LINUXOSX64-NEXT:    vaddps %zmm0, %zmm8, %zmm0
   1142 ; LINUXOSX64-NEXT:    vaddps %zmm1, %zmm9, %zmm1
   1143 ; LINUXOSX64-NEXT:    vaddps %zmm1, %zmm11, %zmm1
   1144 ; LINUXOSX64-NEXT:    vaddps %zmm0, %zmm10, %zmm0
   1145 ; LINUXOSX64-NEXT:    vaddps %zmm0, %zmm12, %zmm0
   1146 ; LINUXOSX64-NEXT:    vaddps %zmm1, %zmm13, %zmm1
   1147 ; LINUXOSX64-NEXT:    vaddps %zmm1, %zmm15, %zmm1
   1148 ; LINUXOSX64-NEXT:    vaddps %zmm0, %zmm14, %zmm0
   1149 ; LINUXOSX64-NEXT:    vaddps 16(%rbp), %zmm0, %zmm0
   1150 ; LINUXOSX64-NEXT:    vaddps 80(%rbp), %zmm1, %zmm1
   1151 ; LINUXOSX64-NEXT:    movq %rbp, %rsp
   1152 ; LINUXOSX64-NEXT:    popq %rbp
   1153 ; LINUXOSX64-NEXT:    retq
   1154   %x1 = fadd <32 x float> %a0, %b0
   1155   %x2 = fadd <32 x float> %c0, %x1
   1156   %x3 = fadd <32 x float> %a1, %x2
   1157   %x4 = fadd <32 x float> %b1, %x3
   1158   %x5 = fadd <32 x float> %c1, %x4
   1159   %x6 = fadd <32 x float> %a2, %x5
   1160   %x7 = fadd <32 x float> %b2, %x6
   1161   %x8 = fadd <32 x float> %c2, %x7
   1162   ret <32 x float> %x8
   1163 }
   1164 
   1165 ; Test regcall when passing/retrieving mixed types
   1166 define x86_regcallcc i32 @test_argRetMixTypes(double, float, i8 signext, i32, i64, i16 signext, i32*) #0 {
   1167 ; X32-LABEL: test_argRetMixTypes:
   1168 ; X32:       # %bb.0:
   1169 ; X32-NEXT:    pushl %ebx
   1170 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
   1171 ; X32-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
   1172 ; X32-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
   1173 ; X32-NEXT:    vcvtsi2sdl %eax, %xmm2, %xmm1
   1174 ; X32-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
   1175 ; X32-NEXT:    vcvtsi2sdl %ecx, %xmm2, %xmm1
   1176 ; X32-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
   1177 ; X32-NEXT:    vmovd %edx, %xmm1
   1178 ; X32-NEXT:    vpinsrd $1, %edi, %xmm1, %xmm1
   1179 ; X32-NEXT:    vcvtqq2pd %ymm1, %ymm1
   1180 ; X32-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
   1181 ; X32-NEXT:    vcvtsi2sdl %esi, %xmm2, %xmm1
   1182 ; X32-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
   1183 ; X32-NEXT:    vcvtsi2sdl (%ebx), %xmm2, %xmm1
   1184 ; X32-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
   1185 ; X32-NEXT:    vcvttsd2si %xmm0, %eax
   1186 ; X32-NEXT:    popl %ebx
   1187 ; X32-NEXT:    vzeroupper
   1188 ; X32-NEXT:    retl
   1189 ;
   1190 ; WIN64-LABEL: test_argRetMixTypes:
   1191 ; WIN64:       # %bb.0:
   1192 ; WIN64-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
   1193 ; WIN64-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
   1194 ; WIN64-NEXT:    vcvtsi2sdl %eax, %xmm2, %xmm1
   1195 ; WIN64-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
   1196 ; WIN64-NEXT:    vcvtsi2sdl %ecx, %xmm2, %xmm1
   1197 ; WIN64-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
   1198 ; WIN64-NEXT:    vcvtsi2sdq %rdx, %xmm2, %xmm1
   1199 ; WIN64-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
   1200 ; WIN64-NEXT:    vcvtsi2sdl %edi, %xmm2, %xmm1
   1201 ; WIN64-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
   1202 ; WIN64-NEXT:    vcvtsi2sdl (%rsi), %xmm2, %xmm1
   1203 ; WIN64-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
   1204 ; WIN64-NEXT:    vcvttsd2si %xmm0, %eax
   1205 ; WIN64-NEXT:    retq
   1206 ;
   1207 ; LINUXOSX64-LABEL: test_argRetMixTypes:
   1208 ; LINUXOSX64:       # %bb.0:
   1209 ; LINUXOSX64-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
   1210 ; LINUXOSX64-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
   1211 ; LINUXOSX64-NEXT:    vcvtsi2sdl %eax, %xmm2, %xmm1
   1212 ; LINUXOSX64-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
   1213 ; LINUXOSX64-NEXT:    vcvtsi2sdl %ecx, %xmm2, %xmm1
   1214 ; LINUXOSX64-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
   1215 ; LINUXOSX64-NEXT:    vcvtsi2sdq %rdx, %xmm2, %xmm1
   1216 ; LINUXOSX64-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
   1217 ; LINUXOSX64-NEXT:    vcvtsi2sdl %edi, %xmm2, %xmm1
   1218 ; LINUXOSX64-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
   1219 ; LINUXOSX64-NEXT:    vcvtsi2sdl (%rsi), %xmm2, %xmm1
   1220 ; LINUXOSX64-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
   1221 ; LINUXOSX64-NEXT:    vcvttsd2si %xmm0, %eax
   1222 ; LINUXOSX64-NEXT:    retq
   1223   %8 = fpext float %1 to double
   1224   %9 = fadd double %8, %0
   1225   %10 = sitofp i8 %2 to double
   1226   %11 = fadd double %9, %10
   1227   %12 = sitofp i32 %3 to double
   1228   %13 = fadd double %11, %12
   1229   %14 = sitofp i64 %4 to double
   1230   %15 = fadd double %13, %14
   1231   %16 = sitofp i16 %5 to double
   1232   %17 = fadd double %15, %16
   1233   %18 = load i32, i32* %6, align 4
   1234   %19 = sitofp i32 %18 to double
   1235   %20 = fadd double %17, %19
   1236   %21 = fptosi double %20 to i32
   1237   ret i32 %21
   1238 }
   1239 
   1240 %struct.complex = type { float, double, i32, i8, i64}
   1241 
   1242 define x86_regcallcc %struct.complex @test_argMultiRet(float, double, i32, i8, i64) local_unnamed_addr #0 {
   1243 ; X32-LABEL: test_argMultiRet:
   1244 ; X32:       # %bb.0:
   1245 ; X32-NEXT:    vaddsd __real@4014000000000000, %xmm1, %xmm1
   1246 ; X32-NEXT:    movl $4, %eax
   1247 ; X32-NEXT:    movb $7, %cl
   1248 ; X32-NEXT:    movl $999, %edx # imm = 0x3E7
   1249 ; X32-NEXT:    xorl %edi, %edi
   1250 ; X32-NEXT:    retl
   1251 ;
   1252 ; WIN64-LABEL: test_argMultiRet:
   1253 ; WIN64:       # %bb.0:
   1254 ; WIN64-NEXT:    vaddsd __real@{{.*}}(%rip), %xmm1, %xmm1
   1255 ; WIN64-NEXT:    movl $4, %eax
   1256 ; WIN64-NEXT:    movb $7, %cl
   1257 ; WIN64-NEXT:    movl $999, %edx # imm = 0x3E7
   1258 ; WIN64-NEXT:    retq
   1259 ;
   1260 ; LINUXOSX64-LABEL: test_argMultiRet:
   1261 ; LINUXOSX64:       # %bb.0:
   1262 ; LINUXOSX64-NEXT:    vaddsd {{.*}}(%rip), %xmm1, %xmm1
   1263 ; LINUXOSX64-NEXT:    movl $4, %eax
   1264 ; LINUXOSX64-NEXT:    movb $7, %cl
   1265 ; LINUXOSX64-NEXT:    movl $999, %edx # imm = 0x3E7
   1266 ; LINUXOSX64-NEXT:    retq
   1267   %6 = fadd double %1, 5.000000e+00
   1268   %7 = insertvalue %struct.complex undef, float %0, 0
   1269   %8 = insertvalue %struct.complex %7, double %6, 1
   1270   %9 = insertvalue %struct.complex %8, i32 4, 2
   1271   %10 = insertvalue %struct.complex %9, i8 7, 3
   1272   %11 = insertvalue %struct.complex %10, i64 999, 4
   1273   ret %struct.complex %11
   1274 }
   1275