Home | History | Annotate | Download | only in R600
      1 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
      2 
      3 ; SI-LABEL: @load_i8_to_f32:
      4 ; SI: BUFFER_LOAD_UBYTE [[LOADREG:v[0-9]+]],
      5 ; SI-NOT: BFE
      6 ; SI-NOT: LSHR
      7 ; SI: V_CVT_F32_UBYTE0_e32 [[CONV:v[0-9]+]], [[LOADREG]]
      8 ; SI: BUFFER_STORE_DWORD [[CONV]],
      9 define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
     10   %load = load i8 addrspace(1)* %in, align 1
     11   %cvt = uitofp i8 %load to float
     12   store float %cvt, float addrspace(1)* %out, align 4
     13   ret void
     14 }
     15 
     16 ; SI-LABEL: @load_v2i8_to_v2f32:
     17 ; SI: BUFFER_LOAD_USHORT [[LOADREG:v[0-9]+]],
     18 ; SI-NOT: BFE
     19 ; SI-NOT: LSHR
     20 ; SI-NOT: AND
     21 ; SI-DAG: V_CVT_F32_UBYTE1_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
     22 ; SI-DAG: V_CVT_F32_UBYTE0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
     23 ; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
     24 define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
     25   %load = load <2 x i8> addrspace(1)* %in, align 1
     26   %cvt = uitofp <2 x i8> %load to <2 x float>
     27   store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16
     28   ret void
     29 }
     30 
     31 ; SI-LABEL: @load_v3i8_to_v3f32:
     32 ; SI-NOT: BFE
     33 ; SI-NOT: V_CVT_F32_UBYTE3_e32
     34 ; SI-DAG: V_CVT_F32_UBYTE2_e32
     35 ; SI-DAG: V_CVT_F32_UBYTE1_e32
     36 ; SI-DAG: V_CVT_F32_UBYTE0_e32
     37 ; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
     38 define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
     39   %load = load <3 x i8> addrspace(1)* %in, align 1
     40   %cvt = uitofp <3 x i8> %load to <3 x float>
     41   store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16
     42   ret void
     43 }
     44 
     45 ; SI-LABEL: @load_v4i8_to_v4f32:
     46 ; SI: BUFFER_LOAD_DWORD [[LOADREG:v[0-9]+]],
     47 ; SI-NOT: BFE
     48 ; SI-NOT: LSHR
     49 ; SI-DAG: V_CVT_F32_UBYTE3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
     50 ; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, [[LOADREG]]
     51 ; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, [[LOADREG]]
     52 ; SI-DAG: V_CVT_F32_UBYTE0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
     53 ; SI: BUFFER_STORE_DWORDX4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
     54 define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
     55   %load = load <4 x i8> addrspace(1)* %in, align 1
     56   %cvt = uitofp <4 x i8> %load to <4 x float>
     57   store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
     58   ret void
     59 }
     60 
     61 ; XXX - This should really still be able to use the V_CVT_F32_UBYTE0
     62 ; for each component, but computeKnownBits doesn't handle vectors very
     63 ; well.
     64 
     65 ; SI-LABEL: @load_v4i8_to_v4f32_2_uses:
     66 ; SI: BUFFER_LOAD_UBYTE
     67 ; SI: V_CVT_F32_UBYTE0_e32
     68 ; SI: BUFFER_LOAD_UBYTE
     69 ; SI: V_CVT_F32_UBYTE0_e32
     70 ; SI: BUFFER_LOAD_UBYTE
     71 ; SI: V_CVT_F32_UBYTE0_e32
     72 ; SI: BUFFER_LOAD_UBYTE
     73 ; SI: V_CVT_F32_UBYTE0_e32
     74 
     75 ; XXX - replace with this when v4i8 loads aren't scalarized anymore.
     76 ; XSI: BUFFER_LOAD_DWORD
     77 ; XSI: V_CVT_F32_U32_e32
     78 ; XSI: V_CVT_F32_U32_e32
     79 ; XSI: V_CVT_F32_U32_e32
     80 ; XSI: V_CVT_F32_U32_e32
     81 ; SI: S_ENDPGM
     82 define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind {
     83   %load = load <4 x i8> addrspace(1)* %in, align 4
     84   %cvt = uitofp <4 x i8> %load to <4 x float>
     85   store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
     86   %add = add <4 x i8> %load, <i8 9, i8 9, i8 9, i8 9> ; Second use of %load
     87   store <4 x i8> %add, <4 x i8> addrspace(1)* %out2, align 4
     88   ret void
     89 }
     90 
     91 ; Make sure this doesn't crash.
     92 ; SI-LABEL: @load_v7i8_to_v7f32:
     93 ; SI: S_ENDPGM
     94 define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind {
     95   %load = load <7 x i8> addrspace(1)* %in, align 1
     96   %cvt = uitofp <7 x i8> %load to <7 x float>
     97   store <7 x float> %cvt, <7 x float> addrspace(1)* %out, align 16
     98   ret void
     99 }
    100 
    101 ; SI-LABEL: @load_v8i8_to_v8f32:
    102 ; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LOLOAD:[0-9]+]]:[[HILOAD:[0-9]+]]{{\]}},
    103 ; SI-NOT: BFE
    104 ; SI-NOT: LSHR
    105 ; SI-DAG: V_CVT_F32_UBYTE3_e32 v{{[0-9]+}}, v[[LOLOAD]]
    106 ; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, v[[LOLOAD]]
    107 ; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, v[[LOLOAD]]
    108 ; SI-DAG: V_CVT_F32_UBYTE0_e32 v{{[0-9]+}}, v[[LOLOAD]]
    109 ; SI-DAG: V_CVT_F32_UBYTE3_e32 v{{[0-9]+}}, v[[HILOAD]]
    110 ; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, v[[HILOAD]]
    111 ; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, v[[HILOAD]]
    112 ; SI-DAG: V_CVT_F32_UBYTE0_e32 v{{[0-9]+}}, v[[HILOAD]]
    113 ; SI-NOT: BFE
    114 ; SI-NOT: LSHR
    115 ; SI: BUFFER_STORE_DWORD
    116 ; SI: BUFFER_STORE_DWORD
    117 ; SI: BUFFER_STORE_DWORD
    118 ; SI: BUFFER_STORE_DWORD
    119 ; SI: BUFFER_STORE_DWORD
    120 ; SI: BUFFER_STORE_DWORD
    121 ; SI: BUFFER_STORE_DWORD
    122 ; SI: BUFFER_STORE_DWORD
    123 define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
    124   %load = load <8 x i8> addrspace(1)* %in, align 1
    125   %cvt = uitofp <8 x i8> %load to <8 x float>
    126   store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16
    127   ret void
    128 }
    129 
    130 ; SI-LABEL: @i8_zext_inreg_i32_to_f32:
    131 ; SI: BUFFER_LOAD_DWORD [[LOADREG:v[0-9]+]],
    132 ; SI: V_ADD_I32_e32 [[ADD:v[0-9]+]], 2, [[LOADREG]]
    133 ; SI-NEXT: V_CVT_F32_UBYTE0_e32 [[CONV:v[0-9]+]], [[ADD]]
    134 ; SI: BUFFER_STORE_DWORD [[CONV]],
    135 define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
    136   %load = load i32 addrspace(1)* %in, align 4
    137   %add = add i32 %load, 2
    138   %inreg = and i32 %add, 255
    139   %cvt = uitofp i32 %inreg to float
    140   store float %cvt, float addrspace(1)* %out, align 4
    141   ret void
    142 }
    143 
    144 ; SI-LABEL: @i8_zext_inreg_hi1_to_f32:
    145 define void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
    146   %load = load i32 addrspace(1)* %in, align 4
    147   %inreg = and i32 %load, 65280
    148   %shr = lshr i32 %inreg, 8
    149   %cvt = uitofp i32 %shr to float
    150   store float %cvt, float addrspace(1)* %out, align 4
    151   ret void
    152 }
    153 
    154 
    155 ; We don't get these ones because of the zext, but instcombine removes
    156 ; them so it shouldn't really matter.
    157 define void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
    158   %load = load i8 addrspace(1)* %in, align 1
    159   %ext = zext i8 %load to i32
    160   %cvt = uitofp i32 %ext to float
    161   store float %cvt, float addrspace(1)* %out, align 4
    162   ret void
    163 }
    164 
    165 define void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
    166   %load = load <4 x i8> addrspace(1)* %in, align 1
    167   %ext = zext <4 x i8> %load to <4 x i32>
    168   %cvt = uitofp <4 x i32> %ext to <4 x float>
    169   store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
    170   ret void
    171 }
    172