Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -amdgpu-codegenprepare-widen-constant-loads=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
      2 ; RUN: llc -amdgpu-codegenprepare-widen-constant-loads=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
      3 
      4 ; GCN-LABEL: {{^}}widen_i16_constant_load:
      5 ; GCN: s_load_dword [[VAL:s[0-9]+]]
      6 ; GCN: s_addk_i32 [[VAL]], 0x3e7
      7 ; GCN: s_or_b32 [[OR:s[0-9]+]], [[VAL]], 4
      8 define amdgpu_kernel void @widen_i16_constant_load(i16 addrspace(4)* %arg) {
      9   %load = load i16, i16 addrspace(4)* %arg, align 4
     10   %add = add i16 %load, 999
     11   %or = or i16 %add, 4
     12   store i16 %or, i16 addrspace(1)* null
     13   ret void
     14 }
     15 
     16 ; GCN-LABEL: {{^}}widen_i16_constant_load_zext_i32:
     17 ; GCN: s_load_dword [[VAL:s[0-9]+]]
     18 ; GCN: s_and_b32 [[TRUNC:s[0-9]+]], [[VAL]], 0xffff{{$}}
     19 ; GCN: s_addk_i32 [[TRUNC]], 0x3e7
     20 ; GCN: s_or_b32 [[OR:s[0-9]+]], [[TRUNC]], 4
     21 define amdgpu_kernel void @widen_i16_constant_load_zext_i32(i16 addrspace(4)* %arg) {
     22   %load = load i16, i16 addrspace(4)* %arg, align 4
     23   %ext = zext i16 %load to i32
     24   %add = add i32 %ext, 999
     25   %or = or i32 %add, 4
     26   store i32 %or, i32 addrspace(1)* null
     27   ret void
     28 }
     29 
     30 ; GCN-LABEL: {{^}}widen_i16_constant_load_sext_i32:
     31 ; GCN: s_load_dword [[VAL:s[0-9]+]]
     32 ; GCN: s_sext_i32_i16 [[EXT:s[0-9]+]], [[VAL]]
     33 ; GCN: s_addk_i32 [[EXT]], 0x3e7
     34 ; GCN: s_or_b32 [[OR:s[0-9]+]], [[EXT]], 4
     35 define amdgpu_kernel void @widen_i16_constant_load_sext_i32(i16 addrspace(4)* %arg) {
     36   %load = load i16, i16 addrspace(4)* %arg, align 4
     37   %ext = sext i16 %load to i32
     38   %add = add i32 %ext, 999
     39   %or = or i32 %add, 4
     40   store i32 %or, i32 addrspace(1)* null
     41   ret void
     42 }
     43 
     44 ; GCN-LABEL: {{^}}widen_i17_constant_load:
     45 ; GCN: s_load_dword [[VAL:s[0-9]+]]
     46 ; GCN: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 34
     47 ; GCN: s_or_b32 [[OR:s[0-9]+]], [[ADD]], 4
     48 ; GCN: s_bfe_u32 s{{[0-9]+}}, [[OR]], 0x10010
     49 define amdgpu_kernel void @widen_i17_constant_load(i17 addrspace(4)* %arg) {
     50   %load = load i17, i17 addrspace(4)* %arg, align 4
     51   %add = add i17 %load, 34
     52   %or = or i17 %add, 4
     53   store i17 %or, i17 addrspace(1)* null
     54   ret void
     55 }
     56 
     57 ; GCN-LABEL: {{^}}widen_f16_constant_load:
     58 ; GCN: s_load_dword [[VAL:s[0-9]+]]
     59 ; SI: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[VAL]]
     60 ; SI: v_add_f32_e32 [[ADD:v[0-9]+]], 4.0, [[CVT]]
     61 
     62 ; VI: v_add_f16_e64 [[ADD:v[0-9]+]], [[VAL]], 4.0
     63 define amdgpu_kernel void @widen_f16_constant_load(half addrspace(4)* %arg) {
     64   %load = load half, half addrspace(4)* %arg, align 4
     65   %add = fadd half %load, 4.0
     66   store half %add, half addrspace(1)* null
     67   ret void
     68 }
     69 
     70 ; FIXME: valu usage on VI
     71 ; GCN-LABEL: {{^}}widen_v2i8_constant_load:
     72 ; GCN: s_load_dword [[VAL:s[0-9]+]]
     73 
     74 ; SI: s_add_i32
     75 ; SI: s_or_b32
     76 ; SI: s_addk_i32
     77 ; SI: s_and_b32
     78 ; SI: s_or_b32
     79 ; SI: s_or_b32
     80 
     81 ; VI: s_add_i32
     82 ; VI: v_add_u32_sdwa
     83 ; VI: v_or_b32_sdwa
     84 ; VI: v_or_b32_e32
     85 define amdgpu_kernel void @widen_v2i8_constant_load(<2 x i8> addrspace(4)* %arg) {
     86   %load = load <2 x i8>, <2 x i8> addrspace(4)* %arg, align 4
     87   %add = add <2 x i8> %load, <i8 12, i8 44>
     88   %or = or <2 x i8> %add, <i8 4, i8 3>
     89   store <2 x i8> %or, <2 x i8> addrspace(1)* null
     90   ret void
     91 }
     92 
     93 ; GCN-LABEL: {{^}}no_widen_i16_constant_divergent_load:
     94 ; GCN: {{buffer|flat}}_load_ushort
     95 define amdgpu_kernel void @no_widen_i16_constant_divergent_load(i16 addrspace(4)* %arg) {
     96   %tid = call i32 @llvm.amdgcn.workitem.id.x()
     97   %tid.ext = zext i32 %tid to i64
     98   %gep.arg = getelementptr inbounds i16, i16 addrspace(4)* %arg, i64 %tid.ext
     99   %load = load i16, i16 addrspace(4)* %gep.arg, align 4
    100   %add = add i16 %load, 999
    101   %or = or i16 %add, 4
    102   store i16 %or, i16 addrspace(1)* null
    103   ret void
    104 }
    105 
    106 ; GCN-LABEL: {{^}}widen_i1_constant_load:
    107 ; GCN: s_load_dword [[VAL:s[0-9]+]]
    108 ; GCN: s_and_b32 {{s[0-9]+}}, [[VAL]], 1{{$}}
    109 define amdgpu_kernel void @widen_i1_constant_load(i1 addrspace(4)* %arg) {
    110   %load = load i1, i1 addrspace(4)* %arg, align 4
    111   %and = and i1 %load, true
    112   store i1 %and, i1 addrspace(1)* null
    113   ret void
    114 }
    115 
    116 ; GCN-LABEL: {{^}}widen_i16_zextload_i64_constant_load:
    117 ; GCN: s_load_dword [[VAL:s[0-9]+]]
    118 ; GCN: s_and_b32 [[TRUNC:s[0-9]+]], [[VAL]], 0xffff{{$}}
    119 ; GCN: s_addk_i32 [[TRUNC]], 0x3e7
    120 ; GCN: s_or_b32 [[OR:s[0-9]+]], [[TRUNC]], 4
    121 define amdgpu_kernel void @widen_i16_zextload_i64_constant_load(i16 addrspace(4)* %arg) {
    122   %load = load i16, i16 addrspace(4)* %arg, align 4
    123   %zext = zext i16 %load to i32
    124   %add = add i32 %zext, 999
    125   %or = or i32 %add, 4
    126   store i32 %or, i32 addrspace(1)* null
    127   ret void
    128 }
    129 
    130 ; GCN-LABEL: {{^}}widen_i1_zext_to_i64_constant_load:
    131 ; GCN: s_load_dword [[VAL:s[0-9]+]]
    132 ; GCN: s_and_b32 [[AND:s[0-9]+]], [[VAL]], 1
    133 ; GCN: s_add_u32 [[ADD:s[0-9]+]], [[AND]], 0x3e7
    134 ; GCN: s_addc_u32 s{{[0-9]+}}, 0, 0
    135 define amdgpu_kernel void @widen_i1_zext_to_i64_constant_load(i1 addrspace(4)* %arg) {
    136   %load = load i1, i1 addrspace(4)* %arg, align 4
    137   %zext = zext i1 %load to i64
    138   %add = add i64 %zext, 999
    139   store i64 %add, i64 addrspace(1)* null
    140   ret void
    141 }
    142 
    143 ; GCN-LABEL: {{^}}widen_i16_constant32_load:
    144 ; GCN: s_load_dword [[VAL:s[0-9]+]]
    145 ; GCN: s_addk_i32 [[VAL]], 0x3e7
    146 ; GCN: s_or_b32 [[OR:s[0-9]+]], [[VAL]], 4
    147 define amdgpu_kernel void @widen_i16_constant32_load(i16 addrspace(6)* %arg) {
    148   %load = load i16, i16 addrspace(6)* %arg, align 4
    149   %add = add i16 %load, 999
    150   %or = or i16 %add, 4
    151   store i16 %or, i16 addrspace(1)* null
    152   ret void
    153 }
    154 
    155 ; GCN-LABEL: {{^}}widen_i16_global_invariant_load:
    156 ; GCN: s_load_dword [[VAL:s[0-9]+]]
    157 ; GCN: s_addk_i32 [[VAL]], 0x3e7
    158 ; GCN: s_or_b32 [[OR:s[0-9]+]], [[VAL]], 1
    159 define amdgpu_kernel void @widen_i16_global_invariant_load(i16 addrspace(1)* %arg) {
    160   %load = load i16, i16 addrspace(1)* %arg, align 4, !invariant.load !0
    161   %add = add i16 %load, 999
    162   %or = or i16 %add, 1
    163   store i16 %or, i16 addrspace(1)* null
    164   ret void
    165 }
    166 
    167 declare i32 @llvm.amdgcn.workitem.id.x()
    168 
    169 !0 = !{}
    170