Home | History | Annotate | Download | only in X86
      1 ; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
      2 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
      3 target triple = "x86_64-unknown-linux-gnu"
      4 
      5 @b = common global [4 x i32] zeroinitializer, align 16
      6 @c = common global [4 x i32] zeroinitializer, align 16
      7 @d = common global [4 x i32] zeroinitializer, align 16
      8 @e = common global [4 x i32] zeroinitializer, align 16
      9 @a = common global [4 x i32] zeroinitializer, align 16
     10 @fb = common global [4 x float] zeroinitializer, align 16
     11 @fc = common global [4 x float] zeroinitializer, align 16
     12 @fa = common global [4 x float] zeroinitializer, align 16
     13 @fd = common global [4 x float] zeroinitializer, align 16
     14 
     15 ; CHECK-LABEL: @addsub
     16 ; CHECK: %5 = add nsw <4 x i32> %3, %4
     17 ; CHECK: %6 = add nsw <4 x i32> %2, %5
     18 ; CHECK: %7 = sub nsw <4 x i32> %2, %5
     19 ; CHECK: %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
     20 
     21 ; Function Attrs: nounwind uwtable
     22 define void @addsub() #0 {
     23 entry:
     24   %0 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 0), align 4
     25   %1 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 0), align 4
     26   %add = add nsw i32 %0, %1
     27   %2 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 0), align 4
     28   %3 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 0), align 4
     29   %add1 = add nsw i32 %2, %3
     30   %add2 = add nsw i32 %add, %add1
     31   store i32 %add2, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 0), align 4
     32   %4 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 1), align 4
     33   %5 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 1), align 4
     34   %add3 = add nsw i32 %4, %5
     35   %6 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 1), align 4
     36   %7 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 1), align 4
     37   %add4 = add nsw i32 %6, %7
     38   %sub = sub nsw i32 %add3, %add4
     39   store i32 %sub, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 1), align 4
     40   %8 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 2), align 4
     41   %9 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 2), align 4
     42   %add5 = add nsw i32 %8, %9
     43   %10 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 2), align 4
     44   %11 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 2), align 4
     45   %add6 = add nsw i32 %10, %11
     46   %add7 = add nsw i32 %add5, %add6
     47   store i32 %add7, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 2), align 4
     48   %12 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 3), align 4
     49   %13 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 3), align 4
     50   %add8 = add nsw i32 %12, %13
     51   %14 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 3), align 4
     52   %15 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 3), align 4
     53   %add9 = add nsw i32 %14, %15
     54   %sub10 = sub nsw i32 %add8, %add9
     55   store i32 %sub10, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 3), align 4
     56   ret void
     57 }
     58 
     59 ; CHECK-LABEL: @subadd
     60 ; CHECK:  %5 = add nsw <4 x i32> %3, %4
     61 ; CHECK:  %6 = sub nsw <4 x i32> %2, %5
     62 ; CHECK:  %7 = add nsw <4 x i32> %2, %5
     63 ; CHECK:  %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
     64 
     65 ; Function Attrs: nounwind uwtable
     66 define void @subadd() #0 {
     67 entry:
     68   %0 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 0), align 4
     69   %1 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 0), align 4
     70   %add = add nsw i32 %0, %1
     71   %2 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 0), align 4
     72   %3 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 0), align 4
     73   %add1 = add nsw i32 %2, %3
     74   %sub = sub nsw i32 %add, %add1
     75   store i32 %sub, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 0), align 4
     76   %4 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 1), align 4
     77   %5 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 1), align 4
     78   %add2 = add nsw i32 %4, %5
     79   %6 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 1), align 4
     80   %7 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 1), align 4
     81   %add3 = add nsw i32 %6, %7
     82   %add4 = add nsw i32 %add2, %add3
     83   store i32 %add4, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 1), align 4
     84   %8 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 2), align 4
     85   %9 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 2), align 4
     86   %add5 = add nsw i32 %8, %9
     87   %10 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 2), align 4
     88   %11 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 2), align 4
     89   %add6 = add nsw i32 %10, %11
     90   %sub7 = sub nsw i32 %add5, %add6
     91   store i32 %sub7, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 2), align 4
     92   %12 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 3), align 4
     93   %13 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 3), align 4
     94   %add8 = add nsw i32 %12, %13
     95   %14 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 3), align 4
     96   %15 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 3), align 4
     97   %add9 = add nsw i32 %14, %15
     98   %add10 = add nsw i32 %add8, %add9
     99   store i32 %add10, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 3), align 4
    100   ret void
    101 }
    102 
    103 ; CHECK-LABEL: @faddfsub
    104 ; CHECK: %2 = fadd <4 x float> %0, %1
    105 ; CHECK: %3 = fsub <4 x float> %0, %1
    106 ; CHECK: %4 = shufflevector <4 x float> %2, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    107 ; Function Attrs: nounwind uwtable
    108 define void @faddfsub() #0 {
    109 entry:
    110   %0 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
    111   %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
    112   %add = fadd float %0, %1
    113   store float %add, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
    114   %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
    115   %3 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
    116   %sub = fsub float %2, %3
    117   store float %sub, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
    118   %4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
    119   %5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
    120   %add1 = fadd float %4, %5
    121   store float %add1, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
    122   %6 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
    123   %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
    124   %sub2 = fsub float %6, %7
    125   store float %sub2, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
    126   ret void
    127 }
    128 
    129 ; CHECK-LABEL: @fsubfadd
    130 ; CHECK: %2 = fsub <4 x float> %0, %1
    131 ; CHECK: %3 = fadd <4 x float> %0, %1
    132 ; CHECK: %4 = shufflevector <4 x float> %2, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    133 ; Function Attrs: nounwind uwtable
    134 define void @fsubfadd() #0 {
    135 entry:
    136   %0 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
    137   %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
    138   %sub = fsub float %0, %1
    139   store float %sub, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
    140   %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
    141   %3 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
    142   %add = fadd float %2, %3
    143   store float %add, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
    144   %4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
    145   %5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
    146   %sub1 = fsub float %4, %5
    147   store float %sub1, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
    148   %6 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
    149   %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
    150   %add2 = fadd float %6, %7
    151   store float %add2, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
    152   ret void
    153 }
    154 
    155 ; CHECK-LABEL: @No_faddfsub
    156 ; CHECK-NOT: fadd <4 x float>
    157 ; CHECK-NOT: fsub <4 x float>
    158 ; CHECK-NOT: shufflevector
    159 ; Function Attrs: nounwind uwtable
    160 define void @No_faddfsub() #0 {
    161 entry:
    162   %0 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
    163   %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
    164   %add = fadd float %0, %1
    165   store float %add, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
    166   %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
    167   %3 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
    168   %add1 = fadd float %2, %3
    169   store float %add1, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
    170   %4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
    171   %5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
    172   %add2 = fadd float %4, %5
    173   store float %add2, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
    174   %6 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
    175   %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
    176   %sub = fsub float %6, %7
    177   store float %sub, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
    178   ret void
    179 }
    180 
    181 ; Check vectorization of following code for float data type-
    182 ;  fc[0] = fb[0]+fa[0]; //swapped fb and fa
    183 ;  fc[1] = fa[1]-fb[1];
    184 ;  fc[2] = fa[2]+fb[2];
    185 ;  fc[3] = fa[3]-fb[3];
    186 
    187 ; CHECK-LABEL: @reorder_alt
    188 ; CHECK: %3 = fadd <4 x float> %1, %2
    189 ; CHECK: %4 = fsub <4 x float> %1, %2
    190 ; CHECK: %5 = shufflevector <4 x float> %3, <4 x float> %4, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    191 define void @reorder_alt() #0 {
    192   %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
    193   %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
    194   %3 = fadd float %1, %2
    195   store float %3, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
    196   %4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
    197   %5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
    198   %6 = fsub float %4, %5
    199   store float %6, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
    200   %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
    201   %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
    202   %9 = fadd float %7, %8
    203   store float %9, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
    204   %10 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
    205   %11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
    206   %12 = fsub float %10, %11
    207   store float %12, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
    208   ret void
    209 }
    210 
    211 ; Check vectorization of following code for float data type-
    212 ;  fc[0] = fa[0]+(fb[0]-fd[0]);
    213 ;  fc[1] = fa[1]-(fb[1]+fd[1]);
    214 ;  fc[2] = fa[2]+(fb[2]-fd[2]);
    215 ;  fc[3] = fa[3]-(fd[3]+fb[3]); //swapped fd and fb 
    216 
    217 ; CHECK-LABEL: @reorder_alt_subTree
    218 ; CHECK: %4 = fsub <4 x float> %3, %2
    219 ; CHECK: %5 = fadd <4 x float> %3, %2
    220 ; CHECK: %6 = shufflevector <4 x float> %4, <4 x float> %5, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    221 ; CHECK: %7 = fadd <4 x float> %1, %6
    222 ; CHECK: %8 = fsub <4 x float> %1, %6
    223 ; CHECK: %9 = shufflevector <4 x float> %7, <4 x float> %8, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    224 define void @reorder_alt_subTree() #0 {
    225   %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
    226   %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
    227   %3 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fd, i32 0, i64 0), align 4
    228   %4 = fsub float %2, %3
    229   %5 = fadd float %1, %4
    230   store float %5, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
    231   %6 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
    232   %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
    233   %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fd, i32 0, i64 1), align 4
    234   %9 = fadd float %7, %8
    235   %10 = fsub float %6, %9
    236   store float %10, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
    237   %11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
    238   %12 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
    239   %13 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fd, i32 0, i64 2), align 4
    240   %14 = fsub float %12, %13
    241   %15 = fadd float %11, %14
    242   store float %15, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
    243   %16 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
    244   %17 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fd, i32 0, i64 3), align 4
    245   %18 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
    246   %19 = fadd float %17, %18
    247   %20 = fsub float %16, %19
    248   store float %20, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
    249   ret void
    250 }
    251 
    252 ; Check vectorization of following code for double data type-
    253 ;  c[0] = (a[0]+b[0])-d[0];
    254 ;  c[1] = d[1]+(a[1]+b[1]); //swapped d[1] and (a[1]+b[1]) 
    255 
    256 ; CHECK-LABEL: @reorder_alt_rightsubTree
    257 ; CHECK: fadd <2 x double>
    258 ; CHECK: fsub <2 x double>
    259 ; CHECK: shufflevector <2 x double> 
    260 define void @reorder_alt_rightsubTree(double* nocapture %c, double* noalias nocapture readonly %a, double* noalias nocapture readonly %b, double* noalias nocapture readonly %d) {
    261   %1 = load double, double* %a
    262   %2 = load double, double* %b
    263   %3 = fadd double %1, %2
    264   %4 = load double, double* %d
    265   %5 = fsub double %3, %4
    266   store double %5, double* %c
    267   %6 = getelementptr inbounds double, double* %d, i64 1
    268   %7 = load double, double* %6
    269   %8 = getelementptr inbounds double, double* %a, i64 1
    270   %9 = load double, double* %8
    271   %10 = getelementptr inbounds double, double* %b, i64 1
    272   %11 = load double, double* %10
    273   %12 = fadd double %9, %11
    274   %13 = fadd double %7, %12
    275   %14 = getelementptr inbounds double, double* %c, i64 1
    276   store double %13, double* %14
    277   ret void
    278 }
    279 
    280 ; Dont vectorization of following code for float data type as sub is not commutative-
    281 ;  fc[0] = fb[0]+fa[0];
    282 ;  fc[1] = fa[1]-fb[1];
    283 ;  fc[2] = fa[2]+fb[2];
    284 ;  fc[3] = fb[3]-fa[3];
    285 ;  In the above code we can swap the 1st and 2nd operation as fadd is commutative
    286 ;  but not 2nd or 4th as fsub is not commutative. 
    287 
    288 ; CHECK-LABEL: @no_vec_shuff_reorder
    289 ; CHECK-NOT: fadd <4 x float>
    290 ; CHECK-NOT: fsub <4 x float>
    291 ; CHECK-NOT: shufflevector
    292 define void @no_vec_shuff_reorder() #0 {
    293   %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 0), align 4
    294   %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 0), align 4
    295   %3 = fadd float %1, %2
    296   store float %3, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 0), align 4
    297   %4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 1), align 4
    298   %5 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 1), align 4
    299   %6 = fsub float %4, %5
    300   store float %6, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 1), align 4
    301   %7 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 2), align 4
    302   %8 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 2), align 4
    303   %9 = fadd float %7, %8
    304   store float %9, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 2), align 4
    305   %10 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fb, i32 0, i64 3), align 4
    306   %11 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @fa, i32 0, i64 3), align 4
    307   %12 = fsub float %10, %11
    308   store float %12, float* getelementptr inbounds ([4 x float], [4 x float]* @fc, i32 0, i64 3), align 4
    309   ret void
    310 }
    311 
    312 
    313 attributes #0 = { nounwind }
    314 
    315