Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=tahiti -misched=gcn-minreg -verify-machineinstrs < %s | FileCheck -check-prefixes=SI,SI-MINREG %s
      2 ; RUN: llc -march=amdgcn -mcpu=tahiti -misched=gcn-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck -check-prefixes=SI,SI-MAXOCC %s
      3 ; RUN: llc -march=amdgcn -mcpu=fiji -misched=gcn-minreg -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,VI-MINREG %s
      4 ; RUN: llc -march=amdgcn -mcpu=fiji -misched=gcn-max-occupancy-experimental -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,VI-MAXOCC %s
      5 
      6 ; SI-MINREG: NumSgprs: {{[1-9]$}}
      7 ; SI-MINREG: NumVgprs: {{[1-9]$}}
      8 
      9 ; SI-MAXOCC: NumSgprs: {{[0-4][0-9]$}}
     10 ; SI-MAXOCC: NumVgprs: {{[0-4][0-9]$}}
     11 
     12 ; stores may alias loads
     13 ; VI: NumSgprs: {{[0-9]$}}
     14 ; VI: NumVgprs: {{[1-3][0-9]$}}
     15 
     16 define amdgpu_kernel void @load_fma_store(float addrspace(3)* nocapture readonly %in_arg, float addrspace(1)* nocapture %out_arg) {
     17 bb:
     18   %adr.a.0 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 20004
     19   %adr.b.0 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 20252
     20   %adr.c.0 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 20508
     21   %adr.a.1 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 20772
     22   %adr.b.1 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 21020
     23   %adr.c.1 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 21276
     24   %adr.a.2 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 21540
     25   %adr.b.2 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 21788
     26   %adr.c.2 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 22044
     27   %adr.a.3 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 22308
     28   %adr.b.3 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 22556
     29   %adr.c.3 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 22812
     30   %adr.a.4 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 23076
     31   %adr.b.4 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 23324
     32   %adr.c.4 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 23580
     33   %adr.a.5 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 23844
     34   %adr.b.5 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 24092
     35   %adr.c.5 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 24348
     36   %adr.a.6 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 24612
     37   %adr.b.6 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 24860
     38   %adr.c.6 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 25116
     39   %adr.a.7 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 25380
     40   %adr.b.7 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 25628
     41   %adr.c.7 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 25884
     42   %adr.a.8 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 26148
     43   %adr.b.8 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 26396
     44   %adr.c.8 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 26652
     45   %adr.a.9 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 26916
     46   %adr.b.9 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 27164
     47   %adr.c.9 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 27420
     48   %adr.a.10 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 27684
     49   %adr.b.10 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 27932
     50   %adr.c.10 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 28188
     51   %adr.a.11 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 28452
     52   %adr.b.11 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 28700
     53   %adr.c.11 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 28956
     54   %adr.a.12 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 29220
     55   %adr.b.12 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 29468
     56   %adr.c.12 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 29724
     57   %adr.a.13 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 29988
     58   %adr.b.13 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 30236
     59   %adr.c.13 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 30492
     60   %adr.a.14 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 30756
     61   %adr.b.14 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 31004
     62   %adr.c.14 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 31260
     63   %adr.a.15 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 31524
     64   %adr.b.15 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 31772
     65   %adr.c.15 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 32028
     66   %adr.a.16 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 32292
     67   %adr.b.16 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 32540
     68   %adr.c.16 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 32796
     69   %adr.a.17 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 33060
     70   %adr.b.17 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 33308
     71   %adr.c.17 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 33564
     72   %adr.a.18 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 33828
     73   %adr.b.18 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 34076
     74   %adr.c.18 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 34332
     75   %adr.a.19 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 34596
     76   %adr.b.19 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 34844
     77   %adr.c.19 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 35100
     78   %adr.a.20 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 35364
     79   %adr.b.20 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 35612
     80   %adr.c.20 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 35868
     81   %adr.a.21 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 36132
     82   %adr.b.21 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 36380
     83   %adr.c.21 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 36636
     84   %adr.a.22 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 36900
     85   %adr.b.22 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 37148
     86   %adr.c.22 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 37404
     87   %adr.a.23 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 37668
     88   %adr.b.23 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 37916
     89   %adr.c.23 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 38172
     90   %adr.a.24 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 38436
     91   %adr.b.24 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 38684
     92   %adr.c.24 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 38940
     93   %adr.a.25 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 39204
     94   %adr.b.25 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 39452
     95   %adr.c.25 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 39708
     96   %adr.a.26 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 39972
     97   %adr.b.26 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 40220
     98   %adr.c.26 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 40476
     99   %adr.a.27 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 40740
    100   %adr.b.27 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 40988
    101   %adr.c.27 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 41244
    102   %adr.a.28 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 41508
    103   %adr.b.28 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 41756
    104   %adr.c.28 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 42012
    105   %adr.a.29 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 42276
    106   %adr.b.29 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 42524
    107   %adr.c.29 = getelementptr inbounds float, float addrspace(3)* %in_arg, i32 42780
    108   %a.0 = load float, float addrspace(3)* %adr.a.0, align 4
    109   %b.0 = load float, float addrspace(3)* %adr.b.0, align 4
    110   %c.0 = load float, float addrspace(3)* %adr.c.0, align 4
    111   %a.1 = load float, float addrspace(3)* %adr.a.1, align 4
    112   %b.1 = load float, float addrspace(3)* %adr.b.1, align 4
    113   %c.1 = load float, float addrspace(3)* %adr.c.1, align 4
    114   %a.2 = load float, float addrspace(3)* %adr.a.2, align 4
    115   %b.2 = load float, float addrspace(3)* %adr.b.2, align 4
    116   %c.2 = load float, float addrspace(3)* %adr.c.2, align 4
    117   %a.3 = load float, float addrspace(3)* %adr.a.3, align 4
    118   %b.3 = load float, float addrspace(3)* %adr.b.3, align 4
    119   %c.3 = load float, float addrspace(3)* %adr.c.3, align 4
    120   %a.4 = load float, float addrspace(3)* %adr.a.4, align 4
    121   %b.4 = load float, float addrspace(3)* %adr.b.4, align 4
    122   %c.4 = load float, float addrspace(3)* %adr.c.4, align 4
    123   %a.5 = load float, float addrspace(3)* %adr.a.5, align 4
    124   %b.5 = load float, float addrspace(3)* %adr.b.5, align 4
    125   %c.5 = load float, float addrspace(3)* %adr.c.5, align 4
    126   %a.6 = load float, float addrspace(3)* %adr.a.6, align 4
    127   %b.6 = load float, float addrspace(3)* %adr.b.6, align 4
    128   %c.6 = load float, float addrspace(3)* %adr.c.6, align 4
    129   %a.7 = load float, float addrspace(3)* %adr.a.7, align 4
    130   %b.7 = load float, float addrspace(3)* %adr.b.7, align 4
    131   %c.7 = load float, float addrspace(3)* %adr.c.7, align 4
    132   %a.8 = load float, float addrspace(3)* %adr.a.8, align 4
    133   %b.8 = load float, float addrspace(3)* %adr.b.8, align 4
    134   %c.8 = load float, float addrspace(3)* %adr.c.8, align 4
    135   %a.9 = load float, float addrspace(3)* %adr.a.9, align 4
    136   %b.9 = load float, float addrspace(3)* %adr.b.9, align 4
    137   %c.9 = load float, float addrspace(3)* %adr.c.9, align 4
    138   %a.10 = load float, float addrspace(3)* %adr.a.10, align 4
    139   %b.10 = load float, float addrspace(3)* %adr.b.10, align 4
    140   %c.10 = load float, float addrspace(3)* %adr.c.10, align 4
    141   %a.11 = load float, float addrspace(3)* %adr.a.11, align 4
    142   %b.11 = load float, float addrspace(3)* %adr.b.11, align 4
    143   %c.11 = load float, float addrspace(3)* %adr.c.11, align 4
    144   %a.12 = load float, float addrspace(3)* %adr.a.12, align 4
    145   %b.12 = load float, float addrspace(3)* %adr.b.12, align 4
    146   %c.12 = load float, float addrspace(3)* %adr.c.12, align 4
    147   %a.13 = load float, float addrspace(3)* %adr.a.13, align 4
    148   %b.13 = load float, float addrspace(3)* %adr.b.13, align 4
    149   %c.13 = load float, float addrspace(3)* %adr.c.13, align 4
    150   %a.14 = load float, float addrspace(3)* %adr.a.14, align 4
    151   %b.14 = load float, float addrspace(3)* %adr.b.14, align 4
    152   %c.14 = load float, float addrspace(3)* %adr.c.14, align 4
    153   %a.15 = load float, float addrspace(3)* %adr.a.15, align 4
    154   %b.15 = load float, float addrspace(3)* %adr.b.15, align 4
    155   %c.15 = load float, float addrspace(3)* %adr.c.15, align 4
    156   %a.16 = load float, float addrspace(3)* %adr.a.16, align 4
    157   %b.16 = load float, float addrspace(3)* %adr.b.16, align 4
    158   %c.16 = load float, float addrspace(3)* %adr.c.16, align 4
    159   %a.17 = load float, float addrspace(3)* %adr.a.17, align 4
    160   %b.17 = load float, float addrspace(3)* %adr.b.17, align 4
    161   %c.17 = load float, float addrspace(3)* %adr.c.17, align 4
    162   %a.18 = load float, float addrspace(3)* %adr.a.18, align 4
    163   %b.18 = load float, float addrspace(3)* %adr.b.18, align 4
    164   %c.18 = load float, float addrspace(3)* %adr.c.18, align 4
    165   %a.19 = load float, float addrspace(3)* %adr.a.19, align 4
    166   %b.19 = load float, float addrspace(3)* %adr.b.19, align 4
    167   %c.19 = load float, float addrspace(3)* %adr.c.19, align 4
    168   %a.20 = load float, float addrspace(3)* %adr.a.20, align 4
    169   %b.20 = load float, float addrspace(3)* %adr.b.20, align 4
    170   %c.20 = load float, float addrspace(3)* %adr.c.20, align 4
    171   %a.21 = load float, float addrspace(3)* %adr.a.21, align 4
    172   %b.21 = load float, float addrspace(3)* %adr.b.21, align 4
    173   %c.21 = load float, float addrspace(3)* %adr.c.21, align 4
    174   %a.22 = load float, float addrspace(3)* %adr.a.22, align 4
    175   %b.22 = load float, float addrspace(3)* %adr.b.22, align 4
    176   %c.22 = load float, float addrspace(3)* %adr.c.22, align 4
    177   %a.23 = load float, float addrspace(3)* %adr.a.23, align 4
    178   %b.23 = load float, float addrspace(3)* %adr.b.23, align 4
    179   %c.23 = load float, float addrspace(3)* %adr.c.23, align 4
    180   %a.24 = load float, float addrspace(3)* %adr.a.24, align 4
    181   %b.24 = load float, float addrspace(3)* %adr.b.24, align 4
    182   %c.24 = load float, float addrspace(3)* %adr.c.24, align 4
    183   %a.25 = load float, float addrspace(3)* %adr.a.25, align 4
    184   %b.25 = load float, float addrspace(3)* %adr.b.25, align 4
    185   %c.25 = load float, float addrspace(3)* %adr.c.25, align 4
    186   %a.26 = load float, float addrspace(3)* %adr.a.26, align 4
    187   %b.26 = load float, float addrspace(3)* %adr.b.26, align 4
    188   %c.26 = load float, float addrspace(3)* %adr.c.26, align 4
    189   %a.27 = load float, float addrspace(3)* %adr.a.27, align 4
    190   %b.27 = load float, float addrspace(3)* %adr.b.27, align 4
    191   %c.27 = load float, float addrspace(3)* %adr.c.27, align 4
    192   %a.28 = load float, float addrspace(3)* %adr.a.28, align 4
    193   %b.28 = load float, float addrspace(3)* %adr.b.28, align 4
    194   %c.28 = load float, float addrspace(3)* %adr.c.28, align 4
    195   %a.29 = load float, float addrspace(3)* %adr.a.29, align 4
    196   %b.29 = load float, float addrspace(3)* %adr.b.29, align 4
    197   %c.29 = load float, float addrspace(3)* %adr.c.29, align 4
    198   %res.0 = tail call float @llvm.fmuladd.f32(float %a.0, float %b.0, float %c.0)
    199   %res.1 = tail call float @llvm.fmuladd.f32(float %a.1, float %b.1, float %c.1)
    200   %res.2 = tail call float @llvm.fmuladd.f32(float %a.2, float %b.2, float %c.2)
    201   %res.3 = tail call float @llvm.fmuladd.f32(float %a.3, float %b.3, float %c.3)
    202   %res.4 = tail call float @llvm.fmuladd.f32(float %a.4, float %b.4, float %c.4)
    203   %res.5 = tail call float @llvm.fmuladd.f32(float %a.5, float %b.5, float %c.5)
    204   %res.6 = tail call float @llvm.fmuladd.f32(float %a.6, float %b.6, float %c.6)
    205   %res.7 = tail call float @llvm.fmuladd.f32(float %a.7, float %b.7, float %c.7)
    206   %res.8 = tail call float @llvm.fmuladd.f32(float %a.8, float %b.8, float %c.8)
    207   %res.9 = tail call float @llvm.fmuladd.f32(float %a.9, float %b.9, float %c.9)
    208   %res.10 = tail call float @llvm.fmuladd.f32(float %a.10, float %b.10, float %c.10)
    209   %res.11 = tail call float @llvm.fmuladd.f32(float %a.11, float %b.11, float %c.11)
    210   %res.12 = tail call float @llvm.fmuladd.f32(float %a.12, float %b.12, float %c.12)
    211   %res.13 = tail call float @llvm.fmuladd.f32(float %a.13, float %b.13, float %c.13)
    212   %res.14 = tail call float @llvm.fmuladd.f32(float %a.14, float %b.14, float %c.14)
    213   %res.15 = tail call float @llvm.fmuladd.f32(float %a.15, float %b.15, float %c.15)
    214   %res.16 = tail call float @llvm.fmuladd.f32(float %a.16, float %b.16, float %c.16)
    215   %res.17 = tail call float @llvm.fmuladd.f32(float %a.17, float %b.17, float %c.17)
    216   %res.18 = tail call float @llvm.fmuladd.f32(float %a.18, float %b.18, float %c.18)
    217   %res.19 = tail call float @llvm.fmuladd.f32(float %a.19, float %b.19, float %c.19)
    218   %res.20 = tail call float @llvm.fmuladd.f32(float %a.20, float %b.20, float %c.20)
    219   %res.21 = tail call float @llvm.fmuladd.f32(float %a.21, float %b.21, float %c.21)
    220   %res.22 = tail call float @llvm.fmuladd.f32(float %a.22, float %b.22, float %c.22)
    221   %res.23 = tail call float @llvm.fmuladd.f32(float %a.23, float %b.23, float %c.23)
    222   %res.24 = tail call float @llvm.fmuladd.f32(float %a.24, float %b.24, float %c.24)
    223   %res.25 = tail call float @llvm.fmuladd.f32(float %a.25, float %b.25, float %c.25)
    224   %res.26 = tail call float @llvm.fmuladd.f32(float %a.26, float %b.26, float %c.26)
    225   %res.27 = tail call float @llvm.fmuladd.f32(float %a.27, float %b.27, float %c.27)
    226   %res.28 = tail call float @llvm.fmuladd.f32(float %a.28, float %b.28, float %c.28)
    227   %res.29 = tail call float @llvm.fmuladd.f32(float %a.29, float %b.29, float %c.29)
    228   %adr.res.0 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 0
    229   %adr.res.1 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 2
    230   %adr.res.2 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 4
    231   %adr.res.3 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 6
    232   %adr.res.4 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 8
    233   %adr.res.5 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 10
    234   %adr.res.6 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 12
    235   %adr.res.7 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 14
    236   %adr.res.8 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 16
    237   %adr.res.9 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 18
    238   %adr.res.10 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 20
    239   %adr.res.11 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 22
    240   %adr.res.12 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 24
    241   %adr.res.13 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 26
    242   %adr.res.14 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 28
    243   %adr.res.15 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 30
    244   %adr.res.16 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 32
    245   %adr.res.17 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 34
    246   %adr.res.18 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 36
    247   %adr.res.19 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 38
    248   %adr.res.20 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 40
    249   %adr.res.21 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 42
    250   %adr.res.22 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 44
    251   %adr.res.23 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 46
    252   %adr.res.24 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 48
    253   %adr.res.25 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 50
    254   %adr.res.26 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 52
    255   %adr.res.27 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 54
    256   %adr.res.28 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 56
    257   %adr.res.29 = getelementptr inbounds float, float addrspace(1)* %out_arg, i64 58
    258   store float %res.0, float addrspace(1)* %adr.res.0, align 4
    259   store float %res.1, float addrspace(1)* %adr.res.1, align 4
    260   store float %res.2, float addrspace(1)* %adr.res.2, align 4
    261   store float %res.3, float addrspace(1)* %adr.res.3, align 4
    262   store float %res.4, float addrspace(1)* %adr.res.4, align 4
    263   store float %res.5, float addrspace(1)* %adr.res.5, align 4
    264   store float %res.6, float addrspace(1)* %adr.res.6, align 4
    265   store float %res.7, float addrspace(1)* %adr.res.7, align 4
    266   store float %res.8, float addrspace(1)* %adr.res.8, align 4
    267   store float %res.9, float addrspace(1)* %adr.res.9, align 4
    268   store float %res.10, float addrspace(1)* %adr.res.10, align 4
    269   store float %res.11, float addrspace(1)* %adr.res.11, align 4
    270   store float %res.12, float addrspace(1)* %adr.res.12, align 4
    271   store float %res.13, float addrspace(1)* %adr.res.13, align 4
    272   store float %res.14, float addrspace(1)* %adr.res.14, align 4
    273   store float %res.15, float addrspace(1)* %adr.res.15, align 4
    274   store float %res.16, float addrspace(1)* %adr.res.16, align 4
    275   store float %res.17, float addrspace(1)* %adr.res.17, align 4
    276   store float %res.18, float addrspace(1)* %adr.res.18, align 4
    277   store float %res.19, float addrspace(1)* %adr.res.19, align 4
    278   store float %res.20, float addrspace(1)* %adr.res.20, align 4
    279   store float %res.21, float addrspace(1)* %adr.res.21, align 4
    280   store float %res.22, float addrspace(1)* %adr.res.22, align 4
    281   store float %res.23, float addrspace(1)* %adr.res.23, align 4
    282   store float %res.24, float addrspace(1)* %adr.res.24, align 4
    283   store float %res.25, float addrspace(1)* %adr.res.25, align 4
    284   store float %res.26, float addrspace(1)* %adr.res.26, align 4
    285   store float %res.27, float addrspace(1)* %adr.res.27, align 4
    286   store float %res.28, float addrspace(1)* %adr.res.28, align 4
    287   store float %res.29, float addrspace(1)* %adr.res.29, align 4
    288   ret void
    289 }
    290 declare float @llvm.fmuladd.f32(float, float, float) #0
    291 attributes #0 = { nounwind readnone }
    292