Home | History | Annotate | Download | only in X86
      1 ; REQUIRES: asserts
      2 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched -stats 2>&1 | FileCheck %s
      3 ;
      4 ; Verify that register pressure heuristics are working in MachineScheduler.
      5 ;
      6 ; We can further reduce spills in this case with a global register
      7 ; pressure heuristic, like sethi-ullman numbers or biasing toward
      8 ; scheduled subtrees. However, these heuristics are marginally
      9 ; beneficial on x86_64 and exacerbate register pressure in other
     10 ; more complex cases.
     11 ;
     12 ; CHECK: @wrap_mul4
     13 ; CHECK: 23 regalloc - Number of spills inserted
     14 
     15 define void @wrap_mul4(double* nocapture %Out, [4 x double]* nocapture %A, [4 x double]* nocapture %B) #0 {
     16 entry:
     17   %arrayidx1.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 0, i64 0
     18   %0 = load double, double* %arrayidx1.i, align 8
     19   %arrayidx3.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 0, i64 0
     20   %1 = load double, double* %arrayidx3.i, align 8
     21   %mul.i = fmul double %0, %1
     22   %arrayidx5.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 0, i64 1
     23   %2 = load double, double* %arrayidx5.i, align 8
     24   %arrayidx7.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 1, i64 0
     25   %3 = load double, double* %arrayidx7.i, align 8
     26   %mul8.i = fmul double %2, %3
     27   %add.i = fadd double %mul.i, %mul8.i
     28   %arrayidx10.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 0, i64 2
     29   %4 = load double, double* %arrayidx10.i, align 8
     30   %arrayidx12.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 2, i64 0
     31   %5 = load double, double* %arrayidx12.i, align 8
     32   %mul13.i = fmul double %4, %5
     33   %add14.i = fadd double %add.i, %mul13.i
     34   %arrayidx16.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 0, i64 3
     35   %6 = load double, double* %arrayidx16.i, align 8
     36   %arrayidx18.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 3, i64 0
     37   %7 = load double, double* %arrayidx18.i, align 8
     38   %mul19.i = fmul double %6, %7
     39   %add20.i = fadd double %add14.i, %mul19.i
     40   %arrayidx25.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 0, i64 1
     41   %8 = load double, double* %arrayidx25.i, align 8
     42   %mul26.i = fmul double %0, %8
     43   %arrayidx30.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 1, i64 1
     44   %9 = load double, double* %arrayidx30.i, align 8
     45   %mul31.i = fmul double %2, %9
     46   %add32.i = fadd double %mul26.i, %mul31.i
     47   %arrayidx36.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 2, i64 1
     48   %10 = load double, double* %arrayidx36.i, align 8
     49   %mul37.i = fmul double %4, %10
     50   %add38.i = fadd double %add32.i, %mul37.i
     51   %arrayidx42.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 3, i64 1
     52   %11 = load double, double* %arrayidx42.i, align 8
     53   %mul43.i = fmul double %6, %11
     54   %add44.i = fadd double %add38.i, %mul43.i
     55   %arrayidx49.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 0, i64 2
     56   %12 = load double, double* %arrayidx49.i, align 8
     57   %mul50.i = fmul double %0, %12
     58   %arrayidx54.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 1, i64 2
     59   %13 = load double, double* %arrayidx54.i, align 8
     60   %mul55.i = fmul double %2, %13
     61   %add56.i = fadd double %mul50.i, %mul55.i
     62   %arrayidx60.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 2, i64 2
     63   %14 = load double, double* %arrayidx60.i, align 8
     64   %mul61.i = fmul double %4, %14
     65   %add62.i = fadd double %add56.i, %mul61.i
     66   %arrayidx66.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 3, i64 2
     67   %15 = load double, double* %arrayidx66.i, align 8
     68   %mul67.i = fmul double %6, %15
     69   %add68.i = fadd double %add62.i, %mul67.i
     70   %arrayidx73.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 0, i64 3
     71   %16 = load double, double* %arrayidx73.i, align 8
     72   %mul74.i = fmul double %0, %16
     73   %arrayidx78.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 1, i64 3
     74   %17 = load double, double* %arrayidx78.i, align 8
     75   %mul79.i = fmul double %2, %17
     76   %add80.i = fadd double %mul74.i, %mul79.i
     77   %arrayidx84.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 2, i64 3
     78   %18 = load double, double* %arrayidx84.i, align 8
     79   %mul85.i = fmul double %4, %18
     80   %add86.i = fadd double %add80.i, %mul85.i
     81   %arrayidx90.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 3, i64 3
     82   %19 = load double, double* %arrayidx90.i, align 8
     83   %mul91.i = fmul double %6, %19
     84   %add92.i = fadd double %add86.i, %mul91.i
     85   %arrayidx95.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 1, i64 0
     86   %20 = load double, double* %arrayidx95.i, align 8
     87   %mul98.i = fmul double %1, %20
     88   %arrayidx100.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 1, i64 1
     89   %21 = load double, double* %arrayidx100.i, align 8
     90   %mul103.i = fmul double %3, %21
     91   %add104.i = fadd double %mul98.i, %mul103.i
     92   %arrayidx106.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 1, i64 2
     93   %22 = load double, double* %arrayidx106.i, align 8
     94   %mul109.i = fmul double %5, %22
     95   %add110.i = fadd double %add104.i, %mul109.i
     96   %arrayidx112.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 1, i64 3
     97   %23 = load double, double* %arrayidx112.i, align 8
     98   %mul115.i = fmul double %7, %23
     99   %add116.i = fadd double %add110.i, %mul115.i
    100   %mul122.i = fmul double %8, %20
    101   %mul127.i = fmul double %9, %21
    102   %add128.i = fadd double %mul122.i, %mul127.i
    103   %mul133.i = fmul double %10, %22
    104   %add134.i = fadd double %add128.i, %mul133.i
    105   %mul139.i = fmul double %11, %23
    106   %add140.i = fadd double %add134.i, %mul139.i
    107   %mul146.i = fmul double %12, %20
    108   %mul151.i = fmul double %13, %21
    109   %add152.i = fadd double %mul146.i, %mul151.i
    110   %mul157.i = fmul double %14, %22
    111   %add158.i = fadd double %add152.i, %mul157.i
    112   %mul163.i = fmul double %15, %23
    113   %add164.i = fadd double %add158.i, %mul163.i
    114   %mul170.i = fmul double %16, %20
    115   %mul175.i = fmul double %17, %21
    116   %add176.i = fadd double %mul170.i, %mul175.i
    117   %mul181.i = fmul double %18, %22
    118   %add182.i = fadd double %add176.i, %mul181.i
    119   %mul187.i = fmul double %19, %23
    120   %add188.i = fadd double %add182.i, %mul187.i
    121   %arrayidx191.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 2, i64 0
    122   %24 = load double, double* %arrayidx191.i, align 8
    123   %mul194.i = fmul double %1, %24
    124   %arrayidx196.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 2, i64 1
    125   %25 = load double, double* %arrayidx196.i, align 8
    126   %mul199.i = fmul double %3, %25
    127   %add200.i = fadd double %mul194.i, %mul199.i
    128   %arrayidx202.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 2, i64 2
    129   %26 = load double, double* %arrayidx202.i, align 8
    130   %mul205.i = fmul double %5, %26
    131   %add206.i = fadd double %add200.i, %mul205.i
    132   %arrayidx208.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 2, i64 3
    133   %27 = load double, double* %arrayidx208.i, align 8
    134   %mul211.i = fmul double %7, %27
    135   %add212.i = fadd double %add206.i, %mul211.i
    136   %mul218.i = fmul double %8, %24
    137   %mul223.i = fmul double %9, %25
    138   %add224.i = fadd double %mul218.i, %mul223.i
    139   %mul229.i = fmul double %10, %26
    140   %add230.i = fadd double %add224.i, %mul229.i
    141   %mul235.i = fmul double %11, %27
    142   %add236.i = fadd double %add230.i, %mul235.i
    143   %mul242.i = fmul double %12, %24
    144   %mul247.i = fmul double %13, %25
    145   %add248.i = fadd double %mul242.i, %mul247.i
    146   %mul253.i = fmul double %14, %26
    147   %add254.i = fadd double %add248.i, %mul253.i
    148   %mul259.i = fmul double %15, %27
    149   %add260.i = fadd double %add254.i, %mul259.i
    150   %mul266.i = fmul double %16, %24
    151   %mul271.i = fmul double %17, %25
    152   %add272.i = fadd double %mul266.i, %mul271.i
    153   %mul277.i = fmul double %18, %26
    154   %add278.i = fadd double %add272.i, %mul277.i
    155   %mul283.i = fmul double %19, %27
    156   %add284.i = fadd double %add278.i, %mul283.i
    157   %arrayidx287.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 3, i64 0
    158   %28 = load double, double* %arrayidx287.i, align 8
    159   %mul290.i = fmul double %1, %28
    160   %arrayidx292.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 3, i64 1
    161   %29 = load double, double* %arrayidx292.i, align 8
    162   %mul295.i = fmul double %3, %29
    163   %add296.i = fadd double %mul290.i, %mul295.i
    164   %arrayidx298.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 3, i64 2
    165   %30 = load double, double* %arrayidx298.i, align 8
    166   %mul301.i = fmul double %5, %30
    167   %add302.i = fadd double %add296.i, %mul301.i
    168   %arrayidx304.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 3, i64 3
    169   %31 = load double, double* %arrayidx304.i, align 8
    170   %mul307.i = fmul double %7, %31
    171   %add308.i = fadd double %add302.i, %mul307.i
    172   %mul314.i = fmul double %8, %28
    173   %mul319.i = fmul double %9, %29
    174   %add320.i = fadd double %mul314.i, %mul319.i
    175   %mul325.i = fmul double %10, %30
    176   %add326.i = fadd double %add320.i, %mul325.i
    177   %mul331.i = fmul double %11, %31
    178   %add332.i = fadd double %add326.i, %mul331.i
    179   %mul338.i = fmul double %12, %28
    180   %mul343.i = fmul double %13, %29
    181   %add344.i = fadd double %mul338.i, %mul343.i
    182   %mul349.i = fmul double %14, %30
    183   %add350.i = fadd double %add344.i, %mul349.i
    184   %mul355.i = fmul double %15, %31
    185   %add356.i = fadd double %add350.i, %mul355.i
    186   %mul362.i = fmul double %16, %28
    187   %mul367.i = fmul double %17, %29
    188   %add368.i = fadd double %mul362.i, %mul367.i
    189   %mul373.i = fmul double %18, %30
    190   %add374.i = fadd double %add368.i, %mul373.i
    191   %mul379.i = fmul double %19, %31
    192   %add380.i = fadd double %add374.i, %mul379.i
    193   store double %add20.i, double* %Out, align 8
    194   %Res.i.sroa.1.8.idx2 = getelementptr inbounds double, double* %Out, i64 1
    195   store double %add44.i, double* %Res.i.sroa.1.8.idx2, align 8
    196   %Res.i.sroa.2.16.idx4 = getelementptr inbounds double, double* %Out, i64 2
    197   store double %add68.i, double* %Res.i.sroa.2.16.idx4, align 8
    198   %Res.i.sroa.3.24.idx6 = getelementptr inbounds double, double* %Out, i64 3
    199   store double %add92.i, double* %Res.i.sroa.3.24.idx6, align 8
    200   %Res.i.sroa.4.32.idx8 = getelementptr inbounds double, double* %Out, i64 4
    201   store double %add116.i, double* %Res.i.sroa.4.32.idx8, align 8
    202   %Res.i.sroa.5.40.idx10 = getelementptr inbounds double, double* %Out, i64 5
    203   store double %add140.i, double* %Res.i.sroa.5.40.idx10, align 8
    204   %Res.i.sroa.6.48.idx12 = getelementptr inbounds double, double* %Out, i64 6
    205   store double %add164.i, double* %Res.i.sroa.6.48.idx12, align 8
    206   %Res.i.sroa.7.56.idx14 = getelementptr inbounds double, double* %Out, i64 7
    207   store double %add188.i, double* %Res.i.sroa.7.56.idx14, align 8
    208   %Res.i.sroa.8.64.idx16 = getelementptr inbounds double, double* %Out, i64 8
    209   store double %add212.i, double* %Res.i.sroa.8.64.idx16, align 8
    210   %Res.i.sroa.9.72.idx18 = getelementptr inbounds double, double* %Out, i64 9
    211   store double %add236.i, double* %Res.i.sroa.9.72.idx18, align 8
    212   %Res.i.sroa.10.80.idx20 = getelementptr inbounds double, double* %Out, i64 10
    213   store double %add260.i, double* %Res.i.sroa.10.80.idx20, align 8
    214   %Res.i.sroa.11.88.idx22 = getelementptr inbounds double, double* %Out, i64 11
    215   store double %add284.i, double* %Res.i.sroa.11.88.idx22, align 8
    216   %Res.i.sroa.12.96.idx24 = getelementptr inbounds double, double* %Out, i64 12
    217   store double %add308.i, double* %Res.i.sroa.12.96.idx24, align 8
    218   %Res.i.sroa.13.104.idx26 = getelementptr inbounds double, double* %Out, i64 13
    219   store double %add332.i, double* %Res.i.sroa.13.104.idx26, align 8
    220   %Res.i.sroa.14.112.idx28 = getelementptr inbounds double, double* %Out, i64 14
    221   store double %add356.i, double* %Res.i.sroa.14.112.idx28, align 8
    222   %Res.i.sroa.15.120.idx30 = getelementptr inbounds double, double* %Out, i64 15
    223   store double %add380.i, double* %Res.i.sroa.15.120.idx30, align 8
    224   ret void
    225 }
    226 
    227 attributes #0 = { noinline nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
    228