Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 
      3 declare i32 @llvm.r600.read.tidig.x() nounwind readnone
      4 
      5 ; Make sure we don't turn the 32-bit argument load into a 16-bit
      6 ; load. There aren't extending scalar lods, so that would require
      7 ; using a buffer_load instruction.
      8 
      9 ; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i16:
     10 ; SI: s_load_dword s
     11 ; SI: buffer_store_short v
     12 define void @truncate_kernarg_i32_to_i16(i16 addrspace(1)* %out, i32 %arg) nounwind {
     13   %trunc = trunc i32 %arg to i16
     14   store i16 %trunc, i16 addrspace(1)* %out
     15   ret void
     16 }
     17 
     18 ; It should be OK (and probably performance neutral) to reduce this,
     19 ; but we don't know if the load is uniform yet.
     20 
     21 ; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i16:
     22 ; SI: buffer_load_dword v
     23 ; SI: buffer_store_short v
     24 define void @truncate_buffer_load_i32_to_i16(i16 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
     25   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
     26   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
     27   %gep.out = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
     28   %load = load i32, i32 addrspace(1)* %gep.in
     29   %trunc = trunc i32 %load to i16
     30   store i16 %trunc, i16 addrspace(1)* %gep.out
     31   ret void
     32 }
     33 
     34 ; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i8:
     35 ; SI: s_load_dword s
     36 ; SI: buffer_store_byte v
     37 define void @truncate_kernarg_i32_to_i8(i8 addrspace(1)* %out, i32 %arg) nounwind {
     38   %trunc = trunc i32 %arg to i8
     39   store i8 %trunc, i8 addrspace(1)* %out
     40   ret void
     41 }
     42 
     43 ; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i8:
     44 ; SI: buffer_load_dword v
     45 ; SI: buffer_store_byte v
     46 define void @truncate_buffer_load_i32_to_i8(i8 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
     47   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
     48   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
     49   %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
     50   %load = load i32, i32 addrspace(1)* %gep.in
     51   %trunc = trunc i32 %load to i8
     52   store i8 %trunc, i8 addrspace(1)* %gep.out
     53   ret void
     54 }
     55 
     56 ; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i1:
     57 ; SI: s_load_dword s
     58 ; SI: buffer_store_byte v
     59 define void @truncate_kernarg_i32_to_i1(i1 addrspace(1)* %out, i32 %arg) nounwind {
     60   %trunc = trunc i32 %arg to i1
     61   store i1 %trunc, i1 addrspace(1)* %out
     62   ret void
     63 }
     64 
     65 ; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i1:
     66 ; SI: buffer_load_dword v
     67 ; SI: buffer_store_byte v
     68 define void @truncate_buffer_load_i32_to_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
     69   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
     70   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
     71   %gep.out = getelementptr i1, i1 addrspace(1)* %out, i32 %tid
     72   %load = load i32, i32 addrspace(1)* %gep.in
     73   %trunc = trunc i32 %load to i1
     74   store i1 %trunc, i1 addrspace(1)* %gep.out
     75   ret void
     76 }
     77 
     78 ; FUNC-LABEL: {{^}}truncate_kernarg_i64_to_i32:
     79 ; SI: s_load_dword s
     80 ; SI: buffer_store_dword v
     81 define void @truncate_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounwind {
     82   %trunc = trunc i64 %arg to i32
     83   store i32 %trunc, i32 addrspace(1)* %out
     84   ret void
     85 }
     86 
     87 ; FUNC-LABEL: {{^}}truncate_buffer_load_i64_to_i32:
     88 ; SI: buffer_load_dword v
     89 ; SI: buffer_store_dword v
     90 define void @truncate_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
     91   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
     92   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
     93   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
     94   %load = load i64, i64 addrspace(1)* %gep.in
     95   %trunc = trunc i64 %load to i32
     96   store i32 %trunc, i32 addrspace(1)* %gep.out
     97   ret void
     98 }
     99 
    100 ; FUNC-LABEL: {{^}}srl_kernarg_i64_to_i32:
    101 ; SI: s_load_dword s
    102 ; SI: buffer_store_dword v
    103 define void @srl_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounwind {
    104   %srl = lshr i64 %arg, 32
    105   %trunc = trunc i64 %srl to i32
    106   store i32 %trunc, i32 addrspace(1)* %out
    107   ret void
    108 }
    109 
    110 ; FUNC-LABEL: {{^}}srl_buffer_load_i64_to_i32:
    111 ; SI: buffer_load_dword v
    112 ; SI: buffer_store_dword v
    113 define void @srl_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
    114   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
    115   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
    116   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
    117   %load = load i64, i64 addrspace(1)* %gep.in
    118   %srl = lshr i64 %load, 32
    119   %trunc = trunc i64 %srl to i32
    120   store i32 %trunc, i32 addrspace(1)* %gep.out
    121   ret void
    122 }
    123 
    124 ; Might as well reduce to 8-bit loads.
    125 ; FUNC-LABEL: {{^}}truncate_kernarg_i16_to_i8:
    126 ; SI: s_load_dword s
    127 ; SI: buffer_store_byte v
    128 define void @truncate_kernarg_i16_to_i8(i8 addrspace(1)* %out, i16 %arg) nounwind {
    129   %trunc = trunc i16 %arg to i8
    130   store i8 %trunc, i8 addrspace(1)* %out
    131   ret void
    132 }
    133 
    134 ; FUNC-LABEL: {{^}}truncate_buffer_load_i16_to_i8:
    135 ; SI: buffer_load_ubyte v
    136 ; SI: buffer_store_byte v
    137 define void @truncate_buffer_load_i16_to_i8(i8 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
    138   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
    139   %gep.in = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
    140   %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
    141   %load = load i16, i16 addrspace(1)* %gep.in
    142   %trunc = trunc i16 %load to i8
    143   store i8 %trunc, i8 addrspace(1)* %gep.out
    144   ret void
    145 }
    146 
    147 ; FUNC-LABEL: {{^}}srl_kernarg_i64_to_i8:
    148 ; SI: s_load_dword s
    149 ; SI: buffer_store_byte v
    150 define void @srl_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwind {
    151   %srl = lshr i64 %arg, 32
    152   %trunc = trunc i64 %srl to i8
    153   store i8 %trunc, i8 addrspace(1)* %out
    154   ret void
    155 }
    156 
    157 ; FUNC-LABEL: {{^}}srl_buffer_load_i64_to_i8:
    158 ; SI: buffer_load_dword v
    159 ; SI: buffer_store_byte v
    160 define void @srl_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
    161   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
    162   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
    163   %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
    164   %load = load i64, i64 addrspace(1)* %gep.in
    165   %srl = lshr i64 %load, 32
    166   %trunc = trunc i64 %srl to i8
    167   store i8 %trunc, i8 addrspace(1)* %gep.out
    168   ret void
    169 }
    170 
    171 ; FUNC-LABEL: {{^}}truncate_kernarg_i64_to_i8:
    172 ; SI: s_load_dword s
    173 ; SI: buffer_store_byte v
    174 define void @truncate_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwind {
    175   %trunc = trunc i64 %arg to i8
    176   store i8 %trunc, i8 addrspace(1)* %out
    177   ret void
    178 }
    179 
    180 ; FUNC-LABEL: {{^}}truncate_buffer_load_i64_to_i8:
    181 ; SI: buffer_load_dword v
    182 ; SI: buffer_store_byte v
    183 define void @truncate_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
    184   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
    185   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
    186   %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
    187   %load = load i64, i64 addrspace(1)* %gep.in
    188   %trunc = trunc i64 %load to i8
    189   store i8 %trunc, i8 addrspace(1)* %gep.out
    190   ret void
    191 }
    192 
    193 ; FUNC-LABEL: {{^}}smrd_mask_i32_to_i16
    194 ; SI: s_load_dword [[LOAD:s[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0x0
    195 ; SI: s_waitcnt lgkmcnt(0)
    196 ; SI: s_and_b32 s{{[0-9]+}}, [[LOAD]], 0xffff
    197 define void @smrd_mask_i32_to_i16(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
    198 entry:
    199   %val = load i32, i32 addrspace(2)* %in
    200   %mask = and i32 %val, 65535
    201   store i32 %mask, i32 addrspace(1)* %out
    202   ret void
    203 }
    204