Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
      3 
      4 ; The bitcast should be pushed through the bitcasts so the vectors can
      5 ; be broken down and the shared components can be CSEd
      6 
      7 ; GCN-LABEL: {{^}}store_bitcast_constant_v8i32_to_v8f32:
      8 ; GCN: buffer_store_dwordx4
      9 ; GCN: buffer_store_dwordx4
     10 ; GCN-NOT: v_mov_b32
     11 ; GCN: buffer_store_dwordx4
     12 ; GCN-NOT: v_mov_b32
     13 ; GCN: buffer_store_dwordx4
     14 define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v8f32(<8 x float> addrspace(1)* %out, <8 x i32> %vec) {
     15   %vec0.bc = bitcast <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8> to <8 x float>
     16   store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out
     17 
     18   %vec1.bc = bitcast <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 9> to <8 x float>
     19   store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out
     20   ret void
     21 }
     22 
     23 ; GCN-LABEL: {{^}}store_bitcast_constant_v4i64_to_v8f32:
     24 ; GCN: buffer_store_dwordx4
     25 ; GCN: buffer_store_dwordx4
     26 ; GCN-NOT: v_mov_b32
     27 ; GCN: buffer_store_dwordx4
     28 ; GCN-NOT: v_mov_b32
     29 ; GCN: buffer_store_dwordx4
     30 define amdgpu_kernel void @store_bitcast_constant_v4i64_to_v8f32(<8 x float> addrspace(1)* %out, <4 x i64> %vec) {
     31   %vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <8 x float>
     32   store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out
     33 
     34   %vec1.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 9> to <8 x float>
     35   store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out
     36   ret void
     37 }
     38 
     39 ; GCN-LABEL: {{^}}store_bitcast_constant_v4i64_to_v4f64:
     40 ; GCN: buffer_store_dwordx4
     41 ; GCN: buffer_store_dwordx4
     42 ; GCN-NOT: v_mov_b32
     43 ; GCN: buffer_store_dwordx4
     44 ; GCN-NOT: v_mov_b32
     45 ; GCN: buffer_store_dwordx4
     46 define amdgpu_kernel void @store_bitcast_constant_v4i64_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i64> %vec) {
     47   %vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <4 x double>
     48   store volatile <4 x double> %vec0.bc, <4 x double> addrspace(1)* %out
     49 
     50   %vec1.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 9> to <4 x double>
     51   store volatile <4 x double> %vec1.bc, <4 x double> addrspace(1)* %out
     52   ret void
     53 }
     54 
     55 ; GCN-LABEL: {{^}}store_bitcast_constant_v8i32_to_v16i16:
     56 ; GCN: buffer_store_dwordx4
     57 ; GCN: buffer_store_dwordx4
     58 ; GCN-NOT: v_mov_b32
     59 ; GCN: buffer_store_dwordx4
     60 ; GCN-NOT: v_mov_b32
     61 ; GCN: buffer_store_dwordx4
     62 define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v16i16(<8 x float> addrspace(1)* %out, <16 x i16> %vec) {
     63   %vec0.bc = bitcast <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 8> to <8 x float>
     64   store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out
     65 
     66   %vec1.bc = bitcast <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 9> to <8 x float>
     67   store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out
     68   ret void
     69 }
     70 
     71 ; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source:
     72 ; GCN-NOT: store_dword
     73 define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(<2 x i32> addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 {
     74   %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1
     75   %bc = bitcast i64 %undef to <2 x i32>
     76   store volatile <2 x i32> %bc, <2 x i32> addrspace(1)* %out
     77   ret void
     78 }
     79 
     80 ; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source_extractelt:
     81 ; GCN-NOT: store_dword
     82 define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractelt(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 {
     83   %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1
     84   %bc = bitcast i64 %undef to <2 x i32>
     85   %elt1 = extractelement <2 x i32> %bc, i32 1
     86   store volatile i32 %elt1, i32 addrspace(1)* %out
     87   ret void
     88 }
     89 
     90 declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #1
     91 
     92 attributes #0 = { nounwind }
     93 attributes #1 = { nounwind readnone convergent }
     94