Home | History | Annotate | Download | only in ScalarRepl
      1 ; RUN: opt < %s -scalarrepl -S | FileCheck %s
      2 
      3 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
      4 target triple = "x86_64-apple-darwin10.0.0"
      5 
      6 ; CHECK: @test1
      7 ; CHECK: %[[alloc:[\.a-z0-9]*]] = alloca <4 x float>
      8 ; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc]]
      9 ; CHECK: memset
     10 ; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
     11 
     12 ; Split the array but don't replace the memset with an insert
     13 ; element as its not a constant offset.
     14 ; The load, however, can be replaced with an extract element.
     15 define float @test1(i32 %idx1, i32 %idx2) {
     16 entry:
     17   %0 = alloca [4 x <4 x float>]
     18   store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
     19   %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
     20   %cast = bitcast float* %ptr1 to i8*
     21   call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 4, i32 4, i1 false)
     22   %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 1, i32 %idx2
     23   %ret = load float* %ptr2
     24   ret float %ret
     25 }
     26 
     27 ; CHECK: @test2
     28 ; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
     29 ; CHECK: extractelement <4 x float> %[[ins]], i32 %idx2
     30 
     31 ; Do SROA on the array when it has dynamic vector reads and writes.
     32 define float @test2(i32 %idx1, i32 %idx2) {
     33 entry:
     34   %0 = alloca [4 x <4 x float>]
     35   store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
     36   %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
     37   store float 1.0, float* %ptr1
     38   %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2
     39   %ret = load float* %ptr2
     40   ret float %ret
     41 }
     42 
     43 ; CHECK: test3
     44 ; CHECK: %0 = alloca [4 x <4 x float>]
     45 ; CHECK-NOT: alloca
     46 
     47 ; Don't do SROA on a dynamically indexed vector when it spans
     48 ; more than one array element of the alloca array it is within.
     49 define float @test3(i32 %idx1, i32 %idx2) {
     50 entry:
     51   %0 = alloca [4 x <4 x float>]
     52   store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
     53   %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>*
     54   %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1
     55   store float 1.0, float* %ptr1
     56   %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2
     57   %ret = load float* %ptr2
     58   ret float %ret
     59 }
     60 
     61 ; CHECK: test4
     62 ; CHECK: insertelement <16 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
     63 ; CHECK: extractelement <16 x float> %0, i32 %idx2
     64 
     65 ; Don't do SROA on a dynamically indexed vector when it spans
     66 ; more than one array element of the alloca array it is within.
     67 ; However, unlike test3, the store is on the vector type
     68 ; so SROA will convert the large alloca into the large vector
     69 ; type and do all accesses with insert/extract element
     70 define float @test4(i32 %idx1, i32 %idx2) {
     71 entry:
     72   %0 = alloca [4 x <4 x float>]
     73   %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>*
     74   store <16 x float> zeroinitializer, <16 x float>* %bigvec
     75   %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1
     76   store float 1.0, float* %ptr1
     77   %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2
     78   %ret = load float* %ptr2
     79   ret float %ret
     80 }
     81 
     82 ; CHECK: @test5
     83 ; CHECK: %0 = alloca [4 x <4 x float>]
     84 ; CHECK-NOT: alloca
     85 
     86 ; Don't do SROA as the is a second dynamically indexed array
     87 ; which may span multiple elements of the alloca.
     88 define float @test5(i32 %idx1, i32 %idx2) {
     89 entry:
     90   %0 = alloca [4 x <4 x float>]
     91   store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
     92   %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
     93   %ptr2 = bitcast float* %ptr1 to [1 x <2 x float>]*
     94   %ptr3 = getelementptr [1 x <2 x float>]* %ptr2, i32 0, i32 0, i32 %idx1
     95   store float 1.0, float* %ptr1
     96   %ptr4 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2
     97   %ret = load float* %ptr4
     98   ret float %ret
     99 }
    100 
    101 ; CHECK: test6
    102 ; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
    103 ; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
    104 
    105 %vector.pair = type { %vector.anon, %vector.anon }
    106 %vector.anon = type { %vector }
    107 %vector = type { <4 x float> }
    108 
    109 ; Dynamic GEPs on vectors were crashing when the vector was inside a struct
    110 ; as the new GEP for the new alloca might not include all the indices from
    111 ; the original GEP, just the indices it needs to get to the correct offset of
    112 ; some type, not necessarily the dynamic vector.
    113 ; This test makes sure we don't have this crash.
    114 define float @test6(i32 %idx1, i32 %idx2) {
    115 entry:
    116   %0 = alloca %vector.pair
    117   store %vector.pair zeroinitializer, %vector.pair* %0
    118   %ptr1 = getelementptr %vector.pair* %0, i32 0, i32 0, i32 0, i32 0, i32 %idx1
    119   store float 1.0, float* %ptr1
    120   %ptr2 = getelementptr %vector.pair* %0, i32 0, i32 1, i32 0, i32 0, i32 %idx2
    121   %ret = load float* %ptr2
    122   ret float %ret
    123 }
    124 
    125 ; CHECK: test7
    126 ; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
    127 ; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
    128 
    129 %array.pair = type { [2 x %array.anon], %array.anon }
    130 %array.anon = type { [2 x %vector] }
    131 
    132 ; This is the same as test6 and tests the same crash, but on arrays.
    133 define float @test7(i32 %idx1, i32 %idx2) {
    134 entry:
    135   %0 = alloca %array.pair
    136   store %array.pair zeroinitializer, %array.pair* %0
    137   %ptr1 = getelementptr %array.pair* %0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 %idx1
    138   store float 1.0, float* %ptr1
    139   %ptr2 = getelementptr %array.pair* %0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 %idx2
    140   %ret = load float* %ptr2
    141   ret float %ret
    142 }
    143 
    144 ; CHECK: test8
    145 ; CHECK: %[[offset1:[\.a-z0-9]*]] = add i32 %idx1, 1
    146 ; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %[[offset1]]
    147 ; CHECK: %[[offset2:[\.a-z0-9]*]] = add i32 %idx2, 2
    148 ; CHECK: extractelement <4 x float> %[[ins]], i32 %[[offset2]]
    149 
    150 ; Do SROA on the vector when it has dynamic vector reads and writes
    151 ; from a non-zero offset.
    152 define float @test8(i32 %idx1, i32 %idx2) {
    153 entry:
    154   %0 = alloca <4 x float>
    155   store <4 x float> zeroinitializer, <4 x float>* %0
    156   %ptr1 = getelementptr <4 x float>* %0, i32 0, i32 1
    157   %ptr2 = bitcast float* %ptr1 to <3 x float>*
    158   %ptr3 = getelementptr <3 x float>* %ptr2, i32 0, i32 %idx1
    159   store float 1.0, float* %ptr3
    160   %ptr4 = getelementptr <4 x float>* %0, i32 0, i32 2
    161   %ptr5 = bitcast float* %ptr4 to <2 x float>*
    162   %ptr6 = getelementptr <2 x float>* %ptr5, i32 0, i32 %idx2
    163   %ret = load float* %ptr6
    164   ret float %ret
    165 }
    166 
    167 declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)
    168