Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
      3 
      4 ; SI-LABEL: {{^}}load_i8_to_f32:
      5 ; SI: buffer_load_ubyte [[LOADREG:v[0-9]+]],
      6 ; SI-NOT: bfe
      7 ; SI-NOT: lshr
      8 ; SI: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[LOADREG]]
      9 ; SI: buffer_store_dword [[CONV]],
     10 define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
     11   %load = load i8, i8 addrspace(1)* %in, align 1
     12   %cvt = uitofp i8 %load to float
     13   store float %cvt, float addrspace(1)* %out, align 4
     14   ret void
     15 }
     16 
     17 ; SI-LABEL: {{^}}load_v2i8_to_v2f32:
     18 ; SI: buffer_load_ushort [[LD:v[0-9]+]]
     19 ; SI-DAG: v_cvt_f32_ubyte1_e32 v[[HIRESULT:[0-9]+]], [[LD]]
     20 ; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LD]]
     21 ; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
     22 define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
     23   %load = load <2 x i8>, <2 x i8> addrspace(1)* %in, align 2
     24   %cvt = uitofp <2 x i8> %load to <2 x float>
     25   store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16
     26   ret void
     27 }
     28 
     29 ; SI-LABEL: {{^}}load_v3i8_to_v3f32:
     30 ; SI: buffer_load_dword [[VAL:v[0-9]+]]
     31 ; SI-NOT: v_cvt_f32_ubyte3_e32
     32 ; SI-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, [[VAL]]
     33 ; SI-DAG: v_cvt_f32_ubyte1_e32 v[[HIRESULT:[0-9]+]], [[VAL]]
     34 ; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[VAL]]
     35 ; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
     36 define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
     37   %load = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 4
     38   %cvt = uitofp <3 x i8> %load to <3 x float>
     39   store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16
     40   ret void
     41 }
     42 
     43 ; SI-LABEL: {{^}}load_v4i8_to_v4f32:
     44 ; SI: buffer_load_dword [[LOADREG:v[0-9]+]]
     45 ; SI-NOT: bfe
     46 ; SI-NOT: lshr
     47 ; SI-DAG: v_cvt_f32_ubyte3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
     48 ; SI-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, [[LOADREG]]
     49 ; SI-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, [[LOADREG]]
     50 ; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
     51 ; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
     52 define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
     53   %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
     54   %cvt = uitofp <4 x i8> %load to <4 x float>
     55   store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
     56   ret void
     57 }
     58 
     59 ; This should not be adding instructions to shift into the correct
     60 ; position in the word for the component.
     61 
     62 ; FIXME: Packing bytes
     63 ; SI-LABEL: {{^}}load_v4i8_to_v4f32_unaligned:
     64 ; SI: buffer_load_ubyte [[LOADREG3:v[0-9]+]]
     65 ; SI: buffer_load_ubyte [[LOADREG2:v[0-9]+]]
     66 ; SI: buffer_load_ubyte [[LOADREG1:v[0-9]+]]
     67 ; SI: buffer_load_ubyte [[LOADREG0:v[0-9]+]]
     68 ; SI-DAG: v_lshlrev_b32
     69 ; SI-DAG: v_or_b32
     70 ; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]],
     71 ; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}},
     72 ; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}},
     73 ; SI-DAG: v_cvt_f32_ubyte0_e32 v[[HIRESULT:[0-9]+]]
     74 
     75 ; SI: buffer_store_dwordx4
     76 define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
     77   %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 1
     78   %cvt = uitofp <4 x i8> %load to <4 x float>
     79   store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
     80   ret void
     81 }
     82 
     83 ; Instructions still emitted to repack bytes for add use.
     84 ; SI-LABEL: {{^}}load_v4i8_to_v4f32_2_uses:
     85 ; SI: buffer_load_dword
     86 ; SI-DAG: v_cvt_f32_ubyte0_e32
     87 ; SI-DAG: v_cvt_f32_ubyte1_e32
     88 ; SI-DAG: v_cvt_f32_ubyte2_e32
     89 ; SI-DAG: v_cvt_f32_ubyte3_e32
     90 
     91 ; SI-DAG: v_lshrrev_b32_e32 v{{[0-9]+}}, 24
     92 ; SI-DAG: v_lshrrev_b32_e32 v{{[0-9]+}}, 16
     93 ; SI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 16
     94 ; SI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 8
     95 ; SI-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xffff,
     96 ; SI-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff00,
     97 ; SI-DAG: v_add_i32
     98 
     99 ; SI: buffer_store_dwordx4
    100 ; SI: buffer_store_dword
    101 
    102 ; SI: s_endpgm
    103 define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind {
    104   %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
    105   %cvt = uitofp <4 x i8> %load to <4 x float>
    106   store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
    107   %add = add <4 x i8> %load, <i8 9, i8 9, i8 9, i8 9> ; Second use of %load
    108   store <4 x i8> %add, <4 x i8> addrspace(1)* %out2, align 4
    109   ret void
    110 }
    111 
    112 ; Make sure this doesn't crash.
    113 ; SI-LABEL: {{^}}load_v7i8_to_v7f32:
    114 ; SI: s_endpgm
    115 define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind {
    116   %load = load <7 x i8>, <7 x i8> addrspace(1)* %in, align 1
    117   %cvt = uitofp <7 x i8> %load to <7 x float>
    118   store <7 x float> %cvt, <7 x float> addrspace(1)* %out, align 16
    119   ret void
    120 }
    121 
    122 ; SI-LABEL: {{^}}load_v8i8_to_v8f32:
    123 ; SI: buffer_load_dwordx2 v{{\[}}[[LOLOAD:[0-9]+]]:[[HILOAD:[0-9]+]]{{\]}},
    124 ; SI-NOT: bfe
    125 ; SI-NOT: lshr
    126 ; SI-DAG: v_cvt_f32_ubyte3_e32 v{{[0-9]+}}, v[[LOLOAD]]
    127 ; SI-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, v[[LOLOAD]]
    128 ; SI-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, v[[LOLOAD]]
    129 ; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, v[[LOLOAD]]
    130 ; SI-DAG: v_cvt_f32_ubyte3_e32 v{{[0-9]+}}, v[[HILOAD]]
    131 ; SI-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, v[[HILOAD]]
    132 ; SI-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, v[[HILOAD]]
    133 ; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, v[[HILOAD]]
    134 ; SI-NOT: bfe
    135 ; SI-NOT: lshr
    136 ; SI: buffer_store_dwordx4
    137 ; SI: buffer_store_dwordx4
    138 define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
    139   %load = load <8 x i8>, <8 x i8> addrspace(1)* %in, align 8
    140   %cvt = uitofp <8 x i8> %load to <8 x float>
    141   store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16
    142   ret void
    143 }
    144 
    145 ; SI-LABEL: {{^}}i8_zext_inreg_i32_to_f32:
    146 ; SI: buffer_load_dword [[LOADREG:v[0-9]+]],
    147 ; SI: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 2, [[LOADREG]]
    148 ; SI-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]]
    149 ; SI: buffer_store_dword [[CONV]],
    150 define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
    151   %load = load i32, i32 addrspace(1)* %in, align 4
    152   %add = add i32 %load, 2
    153   %inreg = and i32 %add, 255
    154   %cvt = uitofp i32 %inreg to float
    155   store float %cvt, float addrspace(1)* %out, align 4
    156   ret void
    157 }
    158 
    159 ; SI-LABEL: {{^}}i8_zext_inreg_hi1_to_f32:
    160 define void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
    161   %load = load i32, i32 addrspace(1)* %in, align 4
    162   %inreg = and i32 %load, 65280
    163   %shr = lshr i32 %inreg, 8
    164   %cvt = uitofp i32 %shr to float
    165   store float %cvt, float addrspace(1)* %out, align 4
    166   ret void
    167 }
    168 
    169 ; We don't get these ones because of the zext, but instcombine removes
    170 ; them so it shouldn't really matter.
    171 ; SI-LABEL: {{^}}i8_zext_i32_to_f32:
    172 define void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
    173   %load = load i8, i8 addrspace(1)* %in, align 1
    174   %ext = zext i8 %load to i32
    175   %cvt = uitofp i32 %ext to float
    176   store float %cvt, float addrspace(1)* %out, align 4
    177   ret void
    178 }
    179 
    180 ; SI-LABEL: {{^}}v4i8_zext_v4i32_to_v4f32:
    181 define void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
    182   %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 1
    183   %ext = zext <4 x i8> %load to <4 x i32>
    184   %cvt = uitofp <4 x i32> %ext to <4 x float>
    185   store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
    186   ret void
    187 }
    188 
    189 ; SI-LABEL: {{^}}extract_byte0_to_f32:
    190 ; SI: buffer_load_dword [[VAL:v[0-9]+]]
    191 ; SI-NOT: [[VAL]]
    192 ; SI: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[VAL]]
    193 ; SI: buffer_store_dword [[CONV]]
    194 define void @extract_byte0_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
    195   %val = load i32, i32 addrspace(1)* %in
    196   %and = and i32 %val, 255
    197   %cvt = uitofp i32 %and to float
    198   store float %cvt, float addrspace(1)* %out
    199   ret void
    200 }
    201 
    202 ; SI-LABEL: {{^}}extract_byte1_to_f32:
    203 ; SI: buffer_load_dword [[VAL:v[0-9]+]]
    204 ; SI-NOT: [[VAL]]
    205 ; SI: v_cvt_f32_ubyte1_e32 [[CONV:v[0-9]+]], [[VAL]]
    206 ; SI: buffer_store_dword [[CONV]]
    207 define void @extract_byte1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
    208   %val = load i32, i32 addrspace(1)* %in
    209   %srl = lshr i32 %val, 8
    210   %and = and i32 %srl, 255
    211   %cvt = uitofp i32 %and to float
    212   store float %cvt, float addrspace(1)* %out
    213   ret void
    214 }
    215 
    216 ; SI-LABEL: {{^}}extract_byte2_to_f32:
    217 ; SI: buffer_load_dword [[VAL:v[0-9]+]]
    218 ; SI-NOT: [[VAL]]
    219 ; SI: v_cvt_f32_ubyte2_e32 [[CONV:v[0-9]+]], [[VAL]]
    220 ; SI: buffer_store_dword [[CONV]]
    221 define void @extract_byte2_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
    222   %val = load i32, i32 addrspace(1)* %in
    223   %srl = lshr i32 %val, 16
    224   %and = and i32 %srl, 255
    225   %cvt = uitofp i32 %and to float
    226   store float %cvt, float addrspace(1)* %out
    227   ret void
    228 }
    229 
    230 ; SI-LABEL: {{^}}extract_byte3_to_f32:
    231 ; SI: buffer_load_dword [[VAL:v[0-9]+]]
    232 ; SI-NOT: [[VAL]]
    233 ; SI: v_cvt_f32_ubyte3_e32 [[CONV:v[0-9]+]], [[VAL]]
    234 ; SI: buffer_store_dword [[CONV]]
    235 define void @extract_byte3_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
    236   %val = load i32, i32 addrspace(1)* %in
    237   %srl = lshr i32 %val, 24
    238   %and = and i32 %srl, 255
    239   %cvt = uitofp i32 %and to float
    240   store float %cvt, float addrspace(1)* %out
    241   ret void
    242 }
    243