Home | History | Annotate | Download | only in R600
      1 ; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
      2 
      3 ; FIXME: Broken on evergreen
      4 ; FIXME: For some reason the 8 and 16 vectors are being stored as
      5 ; individual elements instead of 128-bit stores.
      6 
      7 
      8 ; FIXME: Why is the constant moved into the intermediate register and
      9 ; not just directly into the vector component?
     10 
     11 ; SI-LABEL: @insertelement_v4f32_0:
     12 ; S_LOAD_DWORDX4 s{{[}}[[LOW_REG:[0-9]+]]:
     13 ; V_MOV_B32_e32
     14 ; V_MOV_B32_e32 [[CONSTREG:v[0-9]+]], 5.000000e+00
     15 ; V_MOV_B32_e32 v[[LOW_REG]], [[CONSTREG]]
     16 ; BUFFER_STORE_DWORDX4 v{{[}}[[LOW_REG]]:
     17 define void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
     18   %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 0
     19   store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
     20   ret void
     21 }
     22 
     23 ; SI-LABEL: @insertelement_v4f32_1:
     24 define void @insertelement_v4f32_1(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
     25   %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 1
     26   store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
     27   ret void
     28 }
     29 
     30 ; SI-LABEL: @insertelement_v4f32_2:
     31 define void @insertelement_v4f32_2(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
     32   %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 2
     33   store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
     34   ret void
     35 }
     36 
     37 ; SI-LABEL: @insertelement_v4f32_3:
     38 define void @insertelement_v4f32_3(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
     39   %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 3
     40   store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
     41   ret void
     42 }
     43 
     44 ; SI-LABEL: @insertelement_v4i32_0:
     45 define void @insertelement_v4i32_0(<4 x i32> addrspace(1)* %out, <4 x i32> %a) nounwind {
     46   %vecins = insertelement <4 x i32> %a, i32 999, i32 0
     47   store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16
     48   ret void
     49 }
     50 
     51 ; SI-LABEL: @dynamic_insertelement_v2f32:
     52 ; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 5.000000e+00
     53 ; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
     54 ; SI: BUFFER_STORE_DWORDX2 {{v\[}}[[LOW_RESULT_REG]]:
     55 define void @dynamic_insertelement_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, i32 %b) nounwind {
     56   %vecins = insertelement <2 x float> %a, float 5.000000e+00, i32 %b
     57   store <2 x float> %vecins, <2 x float> addrspace(1)* %out, align 8
     58   ret void
     59 }
     60 
     61 ; SI-LABEL: @dynamic_insertelement_v4f32:
     62 ; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 5.000000e+00
     63 ; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
     64 ; SI: BUFFER_STORE_DWORDX4 {{v\[}}[[LOW_RESULT_REG]]:
     65 define void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, i32 %b) nounwind {
     66   %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 %b
     67   store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
     68   ret void
     69 }
     70 
     71 ; SI-LABEL: @dynamic_insertelement_v8f32:
     72 ; FIXMESI: BUFFER_STORE_DWORDX4
     73 ; FIXMESI: BUFFER_STORE_DWORDX4
     74 define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, i32 %b) nounwind {
     75   %vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b
     76   store <8 x float> %vecins, <8 x float> addrspace(1)* %out, align 32
     77   ret void
     78 }
     79 
     80 ; SI-LABEL: @dynamic_insertelement_v16f32:
     81 ; FIXMESI: BUFFER_STORE_DWORDX4
     82 ; FIXMESI: BUFFER_STORE_DWORDX4
     83 ; FIXMESI: BUFFER_STORE_DWORDX4
     84 ; FIXMESI: BUFFER_STORE_DWORDX4
     85 define void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, i32 %b) nounwind {
     86   %vecins = insertelement <16 x float> %a, float 5.000000e+00, i32 %b
     87   store <16 x float> %vecins, <16 x float> addrspace(1)* %out, align 64
     88   ret void
     89 }
     90 
     91 ; SI-LABEL: @dynamic_insertelement_v2i32:
     92 ; SI: BUFFER_STORE_DWORDX2
     93 define void @dynamic_insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, i32 %b) nounwind {
     94   %vecins = insertelement <2 x i32> %a, i32 5, i32 %b
     95   store <2 x i32> %vecins, <2 x i32> addrspace(1)* %out, align 8
     96   ret void
     97 }
     98 
     99 ; SI-LABEL: @dynamic_insertelement_v4i32:
    100 ; SI: BUFFER_STORE_DWORDX4
    101 define void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, i32 %b) nounwind {
    102   %vecins = insertelement <4 x i32> %a, i32 5, i32 %b
    103   store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16
    104   ret void
    105 }
    106 
    107 ; SI-LABEL: @dynamic_insertelement_v8i32:
    108 ; FIXMESI: BUFFER_STORE_DWORDX4
    109 ; FIXMESI: BUFFER_STORE_DWORDX4
    110 define void @dynamic_insertelement_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, i32 %b) nounwind {
    111   %vecins = insertelement <8 x i32> %a, i32 5, i32 %b
    112   store <8 x i32> %vecins, <8 x i32> addrspace(1)* %out, align 32
    113   ret void
    114 }
    115 
    116 ; SI-LABEL: @dynamic_insertelement_v16i32:
    117 ; FIXMESI: BUFFER_STORE_DWORDX4
    118 ; FIXMESI: BUFFER_STORE_DWORDX4
    119 ; FIXMESI: BUFFER_STORE_DWORDX4
    120 ; FIXMESI: BUFFER_STORE_DWORDX4
    121 define void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, i32 %b) nounwind {
    122   %vecins = insertelement <16 x i32> %a, i32 5, i32 %b
    123   store <16 x i32> %vecins, <16 x i32> addrspace(1)* %out, align 64
    124   ret void
    125 }
    126 
    127 
    128 ; SI-LABEL: @dynamic_insertelement_v2i16:
    129 ; FIXMESI: BUFFER_STORE_DWORDX2
    130 define void @dynamic_insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, i32 %b) nounwind {
    131   %vecins = insertelement <2 x i16> %a, i16 5, i32 %b
    132   store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out, align 8
    133   ret void
    134 }
    135 
    136 ; SI-LABEL: @dynamic_insertelement_v4i16:
    137 ; FIXMESI: BUFFER_STORE_DWORDX4
    138 define void @dynamic_insertelement_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, i32 %b) nounwind {
    139   %vecins = insertelement <4 x i16> %a, i16 5, i32 %b
    140   store <4 x i16> %vecins, <4 x i16> addrspace(1)* %out, align 16
    141   ret void
    142 }
    143 
    144 
    145 ; SI-LABEL: @dynamic_insertelement_v2i8:
    146 ; FIXMESI: BUFFER_STORE_USHORT
    147 define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a, i32 %b) nounwind {
    148   %vecins = insertelement <2 x i8> %a, i8 5, i32 %b
    149   store <2 x i8> %vecins, <2 x i8> addrspace(1)* %out, align 8
    150   ret void
    151 }
    152 
    153 ; SI-LABEL: @dynamic_insertelement_v4i8:
    154 ; FIXMESI: BUFFER_STORE_DWORD
    155 define void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, i32 %b) nounwind {
    156   %vecins = insertelement <4 x i8> %a, i8 5, i32 %b
    157   store <4 x i8> %vecins, <4 x i8> addrspace(1)* %out, align 16
    158   ret void
    159 }
    160 
    161 ; SI-LABEL: @dynamic_insertelement_v8i8:
    162 ; FIXMESI: BUFFER_STORE_DWORDX2
    163 define void @dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> %a, i32 %b) nounwind {
    164   %vecins = insertelement <8 x i8> %a, i8 5, i32 %b
    165   store <8 x i8> %vecins, <8 x i8> addrspace(1)* %out, align 16
    166   ret void
    167 }
    168 
    169 ; SI-LABEL: @dynamic_insertelement_v16i8:
    170 ; FIXMESI: BUFFER_STORE_DWORDX4
    171 define void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> %a, i32 %b) nounwind {
    172   %vecins = insertelement <16 x i8> %a, i8 5, i32 %b
    173   store <16 x i8> %vecins, <16 x i8> addrspace(1)* %out, align 16
    174   ret void
    175 }
    176 
    177 ; This test requires handling INSERT_SUBREG in SIFixSGPRCopies.  Check that
    178 ; the compiler doesn't crash.
    179 ; SI-LABEL: @insert_split_bb
    180 define void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b) {
    181 entry:
    182   %0 = insertelement <2 x i32> undef, i32 %a, i32 0
    183   %1 = icmp eq i32 %a, 0
    184   br i1 %1, label %if, label %else
    185 
    186 if:
    187   %2 = load i32 addrspace(1)* %in
    188   %3 = insertelement <2 x i32> %0, i32 %2, i32 1
    189   br label %endif
    190 
    191 else:
    192   %4 = getelementptr i32 addrspace(1)* %in, i32 1
    193   %5 = load i32 addrspace(1)* %4
    194   %6 = insertelement <2 x i32> %0, i32 %5, i32 1
    195   br label %endif
    196 
    197 endif:
    198   %7 = phi <2 x i32> [%3, %if], [%6, %else]
    199   store <2 x i32> %7, <2 x i32> addrspace(1)* %out
    200   ret void
    201 }
    202