Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f -O0 | FileCheck %s
      3 
      4 define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6, float %f7, float %f8, float %f9, float %f10, float %f11, float %f12, float %f13, float %f14, float %f15, float %f16) #0 {
      5 ; CHECK-LABEL: makefloat:
      6 ; CHECK:       # %bb.0: # %entry
      7 ; CHECK-NEXT:    pushq %rbp
      8 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
      9 ; CHECK-NEXT:    .cfi_offset %rbp, -16
     10 ; CHECK-NEXT:    movq %rsp, %rbp
     11 ; CHECK-NEXT:    .cfi_def_cfa_register %rbp
     12 ; CHECK-NEXT:    andq $-64, %rsp
     13 ; CHECK-NEXT:    subq $320, %rsp # imm = 0x140
     14 ; CHECK-NEXT:    vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
     15 ; CHECK-NEXT:    vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero
     16 ; CHECK-NEXT:    vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero
     17 ; CHECK-NEXT:    vmovss {{.*#+}} xmm11 = mem[0],zero,zero,zero
     18 ; CHECK-NEXT:    vmovss {{.*#+}} xmm12 = mem[0],zero,zero,zero
     19 ; CHECK-NEXT:    vmovss {{.*#+}} xmm13 = mem[0],zero,zero,zero
     20 ; CHECK-NEXT:    vmovss {{.*#+}} xmm14 = mem[0],zero,zero,zero
     21 ; CHECK-NEXT:    vmovss {{.*#+}} xmm15 = mem[0],zero,zero,zero
     22 ; CHECK-NEXT:    vmovss %xmm0, {{[0-9]+}}(%rsp)
     23 ; CHECK-NEXT:    vmovss %xmm1, {{[0-9]+}}(%rsp)
     24 ; CHECK-NEXT:    vmovss %xmm2, {{[0-9]+}}(%rsp)
     25 ; CHECK-NEXT:    vmovss %xmm3, {{[0-9]+}}(%rsp)
     26 ; CHECK-NEXT:    vmovss %xmm4, {{[0-9]+}}(%rsp)
     27 ; CHECK-NEXT:    vmovss %xmm5, {{[0-9]+}}(%rsp)
     28 ; CHECK-NEXT:    vmovss %xmm6, {{[0-9]+}}(%rsp)
     29 ; CHECK-NEXT:    vmovss %xmm7, {{[0-9]+}}(%rsp)
     30 ; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     31 ; CHECK-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
     32 ; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
     33 ; CHECK-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
     34 ; CHECK-NEXT:    vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
     35 ; CHECK-NEXT:    vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
     36 ; CHECK-NEXT:    vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero
     37 ; CHECK-NEXT:    vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero
     38 ; CHECK-NEXT:    vmovss {{.*#+}} xmm16 = mem[0],zero,zero,zero
     39 ; CHECK-NEXT:    vmovss {{.*#+}} xmm17 = mem[0],zero,zero,zero
     40 ; CHECK-NEXT:    vmovss {{.*#+}} xmm18 = mem[0],zero,zero,zero
     41 ; CHECK-NEXT:    vmovss {{.*#+}} xmm19 = mem[0],zero,zero,zero
     42 ; CHECK-NEXT:    vmovss {{.*#+}} xmm20 = mem[0],zero,zero,zero
     43 ; CHECK-NEXT:    vmovss {{.*#+}} xmm21 = mem[0],zero,zero,zero
     44 ; CHECK-NEXT:    vmovss {{.*#+}} xmm22 = mem[0],zero,zero,zero
     45 ; CHECK-NEXT:    vmovss {{.*#+}} xmm23 = mem[0],zero,zero,zero
     46 ; CHECK-NEXT:    vmovss %xmm0, {{[0-9]+}}(%rsp)
     47 ; CHECK-NEXT:    vmovss %xmm1, {{[0-9]+}}(%rsp)
     48 ; CHECK-NEXT:    vmovss %xmm2, {{[0-9]+}}(%rsp)
     49 ; CHECK-NEXT:    vmovss %xmm3, {{[0-9]+}}(%rsp)
     50 ; CHECK-NEXT:    vmovss %xmm4, {{[0-9]+}}(%rsp)
     51 ; CHECK-NEXT:    vmovss %xmm5, {{[0-9]+}}(%rsp)
     52 ; CHECK-NEXT:    vmovss %xmm6, {{[0-9]+}}(%rsp)
     53 ; CHECK-NEXT:    vmovss %xmm7, {{[0-9]+}}(%rsp)
     54 ; CHECK-NEXT:    vmovss %xmm16, {{[0-9]+}}(%rsp)
     55 ; CHECK-NEXT:    vmovss %xmm17, {{[0-9]+}}(%rsp)
     56 ; CHECK-NEXT:    vmovss %xmm18, {{[0-9]+}}(%rsp)
     57 ; CHECK-NEXT:    vmovss %xmm19, {{[0-9]+}}(%rsp)
     58 ; CHECK-NEXT:    vmovss %xmm20, {{[0-9]+}}(%rsp)
     59 ; CHECK-NEXT:    vmovss %xmm21, {{[0-9]+}}(%rsp)
     60 ; CHECK-NEXT:    vmovss %xmm22, {{[0-9]+}}(%rsp)
     61 ; CHECK-NEXT:    vmovss %xmm23, {{[0-9]+}}(%rsp)
     62 ; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     63 ; CHECK-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
     64 ; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
     65 ; CHECK-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
     66 ; CHECK-NEXT:    vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
     67 ; CHECK-NEXT:    vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
     68 ; CHECK-NEXT:    vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero
     69 ; CHECK-NEXT:    vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero
     70 ; CHECK-NEXT:    vmovss {{.*#+}} xmm16 = mem[0],zero,zero,zero
     71 ; CHECK-NEXT:    vmovss {{.*#+}} xmm17 = mem[0],zero,zero,zero
     72 ; CHECK-NEXT:    vmovss {{.*#+}} xmm18 = mem[0],zero,zero,zero
     73 ; CHECK-NEXT:    vmovss {{.*#+}} xmm19 = mem[0],zero,zero,zero
     74 ; CHECK-NEXT:    vmovss {{.*#+}} xmm20 = mem[0],zero,zero,zero
     75 ; CHECK-NEXT:    vmovss {{.*#+}} xmm21 = mem[0],zero,zero,zero
     76 ; CHECK-NEXT:    vmovss {{.*#+}} xmm22 = mem[0],zero,zero,zero
     77 ; CHECK-NEXT:    vmovss {{.*#+}} xmm23 = mem[0],zero,zero,zero
     78 ; CHECK-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
     79 ; CHECK-NEXT:    vmovaps %zmm21, %zmm0
     80 ; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
     81 ; CHECK-NEXT:    vmovaps %zmm20, %zmm0
     82 ; CHECK-NEXT:    vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
     83 ; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
     84 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
     85 ; CHECK-NEXT:    vmovaps %zmm22, %zmm1
     86 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
     87 ; CHECK-NEXT:    vmovaps %zmm23, %zmm1
     88 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
     89 ; CHECK-NEXT:    vmovaps %zmm17, %zmm1
     90 ; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
     91 ; CHECK-NEXT:    vmovaps %zmm16, %zmm0
     92 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
     93 ; CHECK-NEXT:    vmovaps %zmm18, %zmm1
     94 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
     95 ; CHECK-NEXT:    vmovaps %zmm19, %zmm1
     96 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
     97 ; CHECK-NEXT:    # implicit-def: $ymm1
     98 ; CHECK-NEXT:    vmovaps %xmm0, %xmm1
     99 ; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
    100 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
    101 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm4[0],xmm5[0],xmm4[2,3]
    102 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm6[0],xmm0[3]
    103 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm7[0]
    104 ; CHECK-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 4-byte Reload
    105 ; CHECK-NEXT:    # xmm4 = mem[0],zero,zero,zero
    106 ; CHECK-NEXT:    vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 4-byte Reload
    107 ; CHECK-NEXT:    # xmm5 = mem[0],zero,zero,zero
    108 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[2,3]
    109 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm2 = xmm4[0,1],xmm2[0],xmm4[3]
    110 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
    111 ; CHECK-NEXT:    # implicit-def: $ymm3
    112 ; CHECK-NEXT:    vmovaps %xmm2, %xmm3
    113 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm3
    114 ; CHECK-NEXT:    # implicit-def: $zmm24
    115 ; CHECK-NEXT:    vmovaps %zmm3, %zmm24
    116 ; CHECK-NEXT:    vinsertf64x4 $1, %ymm1, %zmm24, %zmm24
    117 ; CHECK-NEXT:    vmovaps %zmm24, {{[0-9]+}}(%rsp)
    118 ; CHECK-NEXT:    vmovaps {{[0-9]+}}(%rsp), %zmm0
    119 ; CHECK-NEXT:    vmovss %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
    120 ; CHECK-NEXT:    vmovss %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
    121 ; CHECK-NEXT:    vmovss %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
    122 ; CHECK-NEXT:    vmovss %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
    123 ; CHECK-NEXT:    vmovss %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
    124 ; CHECK-NEXT:    vmovss %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
    125 ; CHECK-NEXT:    vmovss %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
    126 ; CHECK-NEXT:    vmovss %xmm14, (%rsp) # 4-byte Spill
    127 ; CHECK-NEXT:    movq %rbp, %rsp
    128 ; CHECK-NEXT:    popq %rbp
    129 ; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
    130 ; CHECK-NEXT:    retq
    131 entry:
    132   %__A.addr.i = alloca float, align 4
    133   %__B.addr.i = alloca float, align 4
    134   %__C.addr.i = alloca float, align 4
    135   %__D.addr.i = alloca float, align 4
    136   %__E.addr.i = alloca float, align 4
    137   %__F.addr.i = alloca float, align 4
    138   %__G.addr.i = alloca float, align 4
    139   %__H.addr.i = alloca float, align 4
    140   %__I.addr.i = alloca float, align 4
    141   %__J.addr.i = alloca float, align 4
    142   %__K.addr.i = alloca float, align 4
    143   %__L.addr.i = alloca float, align 4
    144   %__M.addr.i = alloca float, align 4
    145   %__N.addr.i = alloca float, align 4
    146   %__O.addr.i = alloca float, align 4
    147   %__P.addr.i = alloca float, align 4
    148   %.compoundliteral.i = alloca <16 x float>, align 64
    149   %f1.addr = alloca float, align 4
    150   %f2.addr = alloca float, align 4
    151   %f3.addr = alloca float, align 4
    152   %f4.addr = alloca float, align 4
    153   %f5.addr = alloca float, align 4
    154   %f6.addr = alloca float, align 4
    155   %f7.addr = alloca float, align 4
    156   %f8.addr = alloca float, align 4
    157   %f9.addr = alloca float, align 4
    158   %f10.addr = alloca float, align 4
    159   %f11.addr = alloca float, align 4
    160   %f12.addr = alloca float, align 4
    161   %f13.addr = alloca float, align 4
    162   %f14.addr = alloca float, align 4
    163   %f15.addr = alloca float, align 4
    164   %f16.addr = alloca float, align 4
    165   store float %f1, float* %f1.addr, align 4
    166   store float %f2, float* %f2.addr, align 4
    167   store float %f3, float* %f3.addr, align 4
    168   store float %f4, float* %f4.addr, align 4
    169   store float %f5, float* %f5.addr, align 4
    170   store float %f6, float* %f6.addr, align 4
    171   store float %f7, float* %f7.addr, align 4
    172   store float %f8, float* %f8.addr, align 4
    173   store float %f9, float* %f9.addr, align 4
    174   store float %f10, float* %f10.addr, align 4
    175   store float %f11, float* %f11.addr, align 4
    176   store float %f12, float* %f12.addr, align 4
    177   store float %f13, float* %f13.addr, align 4
    178   store float %f14, float* %f14.addr, align 4
    179   store float %f15, float* %f15.addr, align 4
    180   store float %f16, float* %f16.addr, align 4
    181   %0 = load float, float* %f16.addr, align 4
    182   %1 = load float, float* %f15.addr, align 4
    183   %2 = load float, float* %f14.addr, align 4
    184   %3 = load float, float* %f13.addr, align 4
    185   %4 = load float, float* %f12.addr, align 4
    186   %5 = load float, float* %f11.addr, align 4
    187   %6 = load float, float* %f10.addr, align 4
    188   %7 = load float, float* %f9.addr, align 4
    189   %8 = load float, float* %f8.addr, align 4
    190   %9 = load float, float* %f7.addr, align 4
    191   %10 = load float, float* %f6.addr, align 4
    192   %11 = load float, float* %f5.addr, align 4
    193   %12 = load float, float* %f4.addr, align 4
    194   %13 = load float, float* %f3.addr, align 4
    195   %14 = load float, float* %f2.addr, align 4
    196   %15 = load float, float* %f1.addr, align 4
    197   store float %0, float* %__A.addr.i, align 4
    198   store float %1, float* %__B.addr.i, align 4
    199   store float %2, float* %__C.addr.i, align 4
    200   store float %3, float* %__D.addr.i, align 4
    201   store float %4, float* %__E.addr.i, align 4
    202   store float %5, float* %__F.addr.i, align 4
    203   store float %6, float* %__G.addr.i, align 4
    204   store float %7, float* %__H.addr.i, align 4
    205   store float %8, float* %__I.addr.i, align 4
    206   store float %9, float* %__J.addr.i, align 4
    207   store float %10, float* %__K.addr.i, align 4
    208   store float %11, float* %__L.addr.i, align 4
    209   store float %12, float* %__M.addr.i, align 4
    210   store float %13, float* %__N.addr.i, align 4
    211   store float %14, float* %__O.addr.i, align 4
    212   store float %15, float* %__P.addr.i, align 4
    213   %16 = load float, float* %__P.addr.i, align 4
    214   %vecinit.i = insertelement <16 x float> undef, float %16, i32 0
    215   %17 = load float, float* %__O.addr.i, align 4
    216   %vecinit1.i = insertelement <16 x float> %vecinit.i, float %17, i32 1
    217   %18 = load float, float* %__N.addr.i, align 4
    218   %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %18, i32 2
    219   %19 = load float, float* %__M.addr.i, align 4
    220   %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %19, i32 3
    221   %20 = load float, float* %__L.addr.i, align 4
    222   %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %20, i32 4
    223   %21 = load float, float* %__K.addr.i, align 4
    224   %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %21, i32 5
    225   %22 = load float, float* %__J.addr.i, align 4
    226   %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %22, i32 6
    227   %23 = load float, float* %__I.addr.i, align 4
    228   %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %23, i32 7
    229   %24 = load float, float* %__H.addr.i, align 4
    230   %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %24, i32 8
    231   %25 = load float, float* %__G.addr.i, align 4
    232   %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %25, i32 9
    233   %26 = load float, float* %__F.addr.i, align 4
    234   %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %26, i32 10
    235   %27 = load float, float* %__E.addr.i, align 4
    236   %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %27, i32 11
    237   %28 = load float, float* %__D.addr.i, align 4
    238   %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %28, i32 12
    239   %29 = load float, float* %__C.addr.i, align 4
    240   %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %29, i32 13
    241   %30 = load float, float* %__B.addr.i, align 4
    242   %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %30, i32 14
    243   %31 = load float, float* %__A.addr.i, align 4
    244   %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %31, i32 15
    245   store <16 x float> %vecinit15.i, <16 x float>* %.compoundliteral.i, align 64
    246   %32 = load <16 x float>, <16 x float>* %.compoundliteral.i, align 64
    247   ret <16 x float> %32
    248 }
    249