Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -basicaa -load-store-vectorizer -S -o - %s | FileCheck %s
      2 
      3 target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
      4 
      5 declare i32 @llvm.amdgcn.workitem.id.x() #1
      6 
      7 ; CHECK-LABEL: @merge_v2p1i8(
      8 ; CHECK: load <2 x i64>
      9 ; CHECK: inttoptr i64 %{{[0-9]+}} to i8 addrspace(1)*
     10 ; CHECK: inttoptr i64 %{{[0-9]+}} to i8 addrspace(1)*
     11 ; CHECK: store <2 x i64> zeroinitializer
     12 define void @merge_v2p1i8(i8 addrspace(1)* addrspace(1)* nocapture %a, i8 addrspace(1)* addrspace(1)* nocapture readonly %b) #0 {
     13 entry:
     14   %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1
     15   %b.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %b, i64 1
     16 
     17   %ld.c = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %b, align 4
     18   %ld.c.idx.1 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %b.1, align 4
     19 
     20   store i8 addrspace(1)* null, i8 addrspace(1)* addrspace(1)* %a, align 4
     21   store i8 addrspace(1)* null, i8 addrspace(1)* addrspace(1)* %a.1, align 4
     22 
     23   ret void
     24 }
     25 
     26 ; CHECK-LABEL: @merge_v2p3i8(
     27 ; CHECK: load <2 x i32>
     28 ; CHECK: inttoptr i32 %{{[0-9]+}} to i8 addrspace(3)*
     29 ; CHECK: inttoptr i32 %{{[0-9]+}} to i8 addrspace(3)*
     30 ; CHECK: store <2 x i32> zeroinitializer
     31 define void @merge_v2p3i8(i8 addrspace(3)* addrspace(3)* nocapture %a, i8 addrspace(3)* addrspace(3)* nocapture readonly %b) #0 {
     32 entry:
     33   %a.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a, i64 1
     34   %b.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %b, i64 1
     35 
     36   %ld.c = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %b, align 4
     37   %ld.c.idx.1 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %b.1, align 4
     38 
     39   store i8 addrspace(3)* null, i8 addrspace(3)* addrspace(3)* %a, align 4
     40   store i8 addrspace(3)* null, i8 addrspace(3)* addrspace(3)* %a.1, align 4
     41 
     42   ret void
     43 }
     44 
     45 ; CHECK-LABEL: @merge_load_i64_ptr64(
     46 ; CHECK: load <2 x i64>
     47 ; CHECK: [[ELT1:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 1
     48 ; CHECK: inttoptr i64 [[ELT1]] to i8 addrspace(1)*
     49 define void @merge_load_i64_ptr64(i64 addrspace(1)* nocapture %a) #0 {
     50 entry:
     51   %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
     52   %a.1.cast = bitcast i64 addrspace(1)* %a.1 to i8 addrspace(1)* addrspace(1)*
     53 
     54   %ld.0 = load i64, i64 addrspace(1)* %a
     55   %ld.1 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.1.cast
     56 
     57   ret void
     58 }
     59 
     60 ; CHECK-LABEL: @merge_load_ptr64_i64(
     61 ; CHECK: load <2 x i64>
     62 ; CHECK: [[ELT0:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 0
     63 ; CHECK: inttoptr i64 [[ELT0]] to i8 addrspace(1)*
     64 define void @merge_load_ptr64_i64(i64 addrspace(1)* nocapture %a) #0 {
     65 entry:
     66   %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(1)* addrspace(1)*
     67   %a.1 =  getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
     68 
     69   %ld.0 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.cast
     70   %ld.1 = load i64, i64 addrspace(1)* %a.1
     71 
     72   ret void
     73 }
     74 
     75 ; CHECK-LABEL: @merge_store_ptr64_i64(
     76 ; CHECK: [[ELT0:%[0-9]+]] = ptrtoint i8 addrspace(1)* %ptr0 to i64
     77 ; CHECK: insertelement <2 x i64> undef, i64 [[ELT0]], i32 0
     78 ; CHECK: store <2 x i64>
     79 define void @merge_store_ptr64_i64(i64 addrspace(1)* nocapture %a, i8 addrspace(1)* %ptr0, i64 %val1) #0 {
     80 entry:
     81   %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(1)* addrspace(1)*
     82   %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
     83 
     84 
     85   store i8 addrspace(1)* %ptr0, i8 addrspace(1)* addrspace(1)* %a.cast
     86   store i64 %val1, i64 addrspace(1)* %a.1
     87 
     88   ret void
     89 }
     90 
     91 ; CHECK-LABEL: @merge_store_i64_ptr64(
     92 ; CHECK: [[ELT1:%[0-9]+]] = ptrtoint i8 addrspace(1)* %ptr1 to i64
     93 ; CHECK: insertelement <2 x i64> %{{[0-9]+}}, i64 [[ELT1]], i32 1
     94 ; CHECK: store <2 x i64>
     95 define void @merge_store_i64_ptr64(i8 addrspace(1)* addrspace(1)* nocapture %a, i64 %val0, i8 addrspace(1)* %ptr1) #0 {
     96 entry:
     97   %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1
     98   %a.cast = bitcast i8 addrspace(1)* addrspace(1)* %a to i64 addrspace(1)*
     99 
    100   store i64 %val0, i64 addrspace(1)* %a.cast
    101   store i8 addrspace(1)* %ptr1, i8 addrspace(1)* addrspace(1)* %a.1
    102 
    103   ret void
    104 }
    105 
    106 ; CHECK-LABEL: @merge_load_i32_ptr32(
    107 ; CHECK: load <2 x i32>
    108 ; CHECK: [[ELT1:%[0-9]+]] = extractelement <2 x i32> %{{[0-9]+}}, i32 1
    109 ; CHECK: inttoptr i32 [[ELT1]] to i8 addrspace(3)*
    110 define void @merge_load_i32_ptr32(i32 addrspace(3)* nocapture %a) #0 {
    111 entry:
    112   %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1
    113   %a.1.cast = bitcast i32 addrspace(3)* %a.1 to i8 addrspace(3)* addrspace(3)*
    114 
    115   %ld.0 = load i32, i32 addrspace(3)* %a
    116   %ld.1 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a.1.cast
    117 
    118   ret void
    119 }
    120 
    121 ; CHECK-LABEL: @merge_load_ptr32_i32(
    122 ; CHECK: load <2 x i32>
    123 ; CHECK: [[ELT0:%[0-9]+]] = extractelement <2 x i32> %{{[0-9]+}}, i32 0
    124 ; CHECK: inttoptr i32 [[ELT0]] to i8 addrspace(3)*
    125 define void @merge_load_ptr32_i32(i32 addrspace(3)* nocapture %a) #0 {
    126 entry:
    127   %a.cast = bitcast i32 addrspace(3)* %a to i8 addrspace(3)* addrspace(3)*
    128   %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1
    129 
    130   %ld.0 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a.cast
    131   %ld.1 = load i32, i32 addrspace(3)* %a.1
    132 
    133   ret void
    134 }
    135 
    136 ; CHECK-LABEL: @merge_store_ptr32_i32(
    137 ; CHECK: [[ELT0:%[0-9]+]] = ptrtoint i8 addrspace(3)* %ptr0 to i32
    138 ; CHECK: insertelement <2 x i32> undef, i32 [[ELT0]], i32 0
    139 ; CHECK: store <2 x i32>
    140 define void @merge_store_ptr32_i32(i32 addrspace(3)* nocapture %a, i8 addrspace(3)* %ptr0, i32 %val1) #0 {
    141 entry:
    142   %a.cast = bitcast i32 addrspace(3)* %a to i8 addrspace(3)* addrspace(3)*
    143   %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1
    144 
    145   store i8 addrspace(3)* %ptr0, i8 addrspace(3)* addrspace(3)* %a.cast
    146   store i32 %val1, i32 addrspace(3)* %a.1
    147 
    148   ret void
    149 }
    150 
    151 ; CHECK-LABEL: @merge_store_i32_ptr32(
    152 ; CHECK: [[ELT1:%[0-9]+]] = ptrtoint i8 addrspace(3)* %ptr1 to i32
    153 ; CHECK: insertelement <2 x i32> %{{[0-9]+}}, i32 [[ELT1]], i32 1
    154 ; CHECK: store <2 x i32>
    155 define void @merge_store_i32_ptr32(i8 addrspace(3)* addrspace(3)* nocapture %a, i32 %val0, i8 addrspace(3)* %ptr1) #0 {
    156 entry:
    157   %a.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a, i32 1
    158   %a.cast = bitcast i8 addrspace(3)* addrspace(3)* %a to i32 addrspace(3)*
    159 
    160   store i32 %val0, i32 addrspace(3)* %a.cast
    161   store i8 addrspace(3)* %ptr1, i8 addrspace(3)* addrspace(3)* %a.1
    162 
    163   ret void
    164 }
    165 
    166 ; CHECK-LABEL: @no_merge_store_ptr32_i64(
    167 ; CHECK: store i8 addrspace(3)*
    168 ; CHECK: store i64
    169 define void @no_merge_store_ptr32_i64(i64 addrspace(1)* nocapture %a, i8 addrspace(3)* %ptr0, i64 %val1) #0 {
    170 entry:
    171   %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(3)* addrspace(1)*
    172   %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
    173 
    174 
    175   store i8 addrspace(3)* %ptr0, i8 addrspace(3)* addrspace(1)* %a.cast
    176   store i64 %val1, i64 addrspace(1)* %a.1
    177 
    178   ret void
    179 }
    180 
    181 ; CHECK-LABEL: @no_merge_store_i64_ptr32(
    182 ; CHECK: store i64
    183 ; CHECK: store i8 addrspace(3)*
    184 define void @no_merge_store_i64_ptr32(i8 addrspace(3)* addrspace(1)* nocapture %a, i64 %val0, i8 addrspace(3)* %ptr1) #0 {
    185 entry:
    186   %a.1 =  getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* %a, i64 1
    187   %a.cast = bitcast i8 addrspace(3)* addrspace(1)* %a to i64 addrspace(1)*
    188 
    189   store i64 %val0, i64 addrspace(1)* %a.cast
    190   store i8 addrspace(3)* %ptr1, i8 addrspace(3)* addrspace(1)* %a.1
    191 
    192   ret void
    193 }
    194 
    195 ; CHECK-LABEL: @no_merge_load_i64_ptr32(
    196 ; CHECK: load i64,
    197 ; CHECK: load i8 addrspace(3)*,
    198 define void @no_merge_load_i64_ptr32(i64 addrspace(1)* nocapture %a) #0 {
    199 entry:
    200   %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
    201   %a.1.cast = bitcast i64 addrspace(1)* %a.1 to i8 addrspace(3)* addrspace(1)*
    202 
    203   %ld.0 = load i64, i64 addrspace(1)* %a
    204   %ld.1 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* %a.1.cast
    205 
    206   ret void
    207 }
    208 
    209 ; CHECK-LABEL: @no_merge_load_ptr32_i64(
    210 ; CHECK: load i8 addrspace(3)*,
    211 ; CHECK: load i64,
    212 define void @no_merge_load_ptr32_i64(i64 addrspace(1)* nocapture %a) #0 {
    213 entry:
    214   %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(3)* addrspace(1)*
    215   %a.1 =  getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
    216 
    217   %ld.0 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* %a.cast
    218   %ld.1 = load i64, i64 addrspace(1)* %a.1
    219 
    220   ret void
    221 }
    222 
    223 ; XXX - This isn't merged for some reason
    224 ; CHECK-LABEL: @merge_v2p1i8_v2p1i8(
    225 ; CHECK: load <2 x i8 addrspace(1)*>
    226 ; CHECK: load <2 x i8 addrspace(1)*>
    227 ; CHECK: store <2 x i8 addrspace(1)*>
    228 ; CHECK: store <2 x i8 addrspace(1)*>
    229 define void @merge_v2p1i8_v2p1i8(<2 x i8 addrspace(1)*> addrspace(1)* nocapture noalias %a, <2 x i8 addrspace(1)*> addrspace(1)* nocapture readonly noalias %b) #0 {
    230 entry:
    231   %a.1 = getelementptr inbounds <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %a, i64 1
    232   %b.1 = getelementptr inbounds <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %b, i64 1
    233 
    234   %ld.c = load <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %b, align 4
    235   %ld.c.idx.1 = load <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %b.1, align 4
    236 
    237   store <2 x i8 addrspace(1)*> zeroinitializer, <2 x i8 addrspace(1)*> addrspace(1)* %a, align 4
    238   store <2 x i8 addrspace(1)*> zeroinitializer, <2 x i8 addrspace(1)*> addrspace(1)* %a.1, align 4
    239   ret void
    240 }
    241 
    242 ; CHECK-LABEL: @merge_load_ptr64_f64(
    243 ; CHECK: load <2 x i64>
    244 ; CHECK: [[ELT0:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 0
    245 ; CHECK: [[ELT0_INT:%[0-9]+]] = inttoptr i64 [[ELT0]] to i8 addrspace(1)*
    246 ; CHECK: [[ELT1_INT:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 1
    247 ; CHECK: bitcast i64 [[ELT1_INT]] to double
    248 define void @merge_load_ptr64_f64(double addrspace(1)* nocapture %a) #0 {
    249 entry:
    250   %a.cast = bitcast double addrspace(1)* %a to i8 addrspace(1)* addrspace(1)*
    251   %a.1 =  getelementptr inbounds double, double addrspace(1)* %a, i64 1
    252 
    253   %ld.0 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.cast
    254   %ld.1 = load double, double addrspace(1)* %a.1
    255 
    256   ret void
    257 }
    258 
    259 ; CHECK-LABEL: @merge_load_f64_ptr64(
    260 ; CHECK: load <2 x i64>
    261 ; CHECK: [[ELT0:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 0
    262 ; CHECK: bitcast i64 [[ELT0]] to double
    263 ; CHECK: [[ELT1:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 1
    264 ; CHECK: inttoptr i64 [[ELT1]] to i8 addrspace(1)*
    265 define void @merge_load_f64_ptr64(double addrspace(1)* nocapture %a) #0 {
    266 entry:
    267   %a.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1
    268   %a.1.cast = bitcast double addrspace(1)* %a.1 to i8 addrspace(1)* addrspace(1)*
    269 
    270   %ld.0 = load double, double addrspace(1)* %a
    271   %ld.1 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.1.cast
    272 
    273   ret void
    274 }
    275 
    276 ; CHECK-LABEL: @merge_store_ptr64_f64(
    277 ; CHECK: [[ELT0_INT:%[0-9]+]] = ptrtoint i8 addrspace(1)* %ptr0 to i64
    278 ; CHECK: insertelement <2 x i64> undef, i64 [[ELT0_INT]], i32 0
    279 ; CHECK: [[ELT1_INT:%[0-9]+]] = bitcast double %val1 to i64
    280 ; CHECK: insertelement <2 x i64> %{{[0-9]+}}, i64 [[ELT1_INT]], i32 1
    281 ; CHECK: store <2 x i64>
    282 define void @merge_store_ptr64_f64(double addrspace(1)* nocapture %a, i8 addrspace(1)* %ptr0, double %val1) #0 {
    283 entry:
    284   %a.cast = bitcast double addrspace(1)* %a to i8 addrspace(1)* addrspace(1)*
    285   %a.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1
    286 
    287   store i8 addrspace(1)* %ptr0, i8 addrspace(1)* addrspace(1)* %a.cast
    288   store double %val1, double addrspace(1)* %a.1
    289 
    290   ret void
    291 }
    292 
    293 ; CHECK-LABEL: @merge_store_f64_ptr64(
    294 ; CHECK: [[ELT0_INT:%[0-9]+]] = bitcast double %val0 to i64
    295 ; CHECK: insertelement <2 x i64> undef, i64 [[ELT0_INT]], i32 0
    296 ; CHECK: [[ELT1_INT:%[0-9]+]] = ptrtoint i8 addrspace(1)* %ptr1 to i64
    297 ; CHECK: insertelement <2 x i64> %{{[0-9]+}}, i64 [[ELT1_INT]], i32 1
    298 ; CHECK: store <2 x i64>
    299 define void @merge_store_f64_ptr64(i8 addrspace(1)* addrspace(1)* nocapture %a, double %val0, i8 addrspace(1)* %ptr1) #0 {
    300 entry:
    301   %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1
    302   %a.cast = bitcast i8 addrspace(1)* addrspace(1)* %a to double addrspace(1)*
    303 
    304   store double %val0, double addrspace(1)* %a.cast
    305   store i8 addrspace(1)* %ptr1, i8 addrspace(1)* addrspace(1)* %a.1
    306 
    307   ret void
    308 }
    309 
    310 attributes #0 = { nounwind }
    311 attributes #1 = { nounwind readnone }
    312