1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI %s 2 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=CI %s 3 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI %s 4 5 ; GCN-LABEL: {{^}}global_store_v3i64: 6 ; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 7 ; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 8 define amdgpu_kernel void @global_store_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> %x) { 9 store <3 x i64> %x, <3 x i64> addrspace(1)* %out, align 32 10 ret void 11 } 12 13 ; GCN-LABEL: {{^}}global_store_v3i64_unaligned: 14 ; GCN: buffer_store_byte 15 ; GCN: buffer_store_byte 16 ; GCN: buffer_store_byte 17 ; GCN: buffer_store_byte 18 19 ; GCN: buffer_store_byte 20 ; GCN: buffer_store_byte 21 ; GCN: buffer_store_byte 22 ; GCN: buffer_store_byte 23 24 ; GCN: buffer_store_byte 25 ; GCN: buffer_store_byte 26 ; GCN: buffer_store_byte 27 ; GCN: buffer_store_byte 28 29 ; GCN: buffer_store_byte 30 ; GCN: buffer_store_byte 31 ; GCN: buffer_store_byte 32 ; GCN: buffer_store_byte 33 34 ; GCN: buffer_store_byte 35 ; GCN: buffer_store_byte 36 ; GCN: buffer_store_byte 37 ; GCN: buffer_store_byte 38 39 ; GCN: buffer_store_byte 40 ; GCN: buffer_store_byte 41 ; GCN: buffer_store_byte 42 ; GCN: buffer_store_byte 43 define amdgpu_kernel void @global_store_v3i64_unaligned(<3 x i64> addrspace(1)* %out, <3 x i64> %x) { 44 store <3 x i64> %x, <3 x i64> addrspace(1)* %out, align 1 45 ret void 46 } 47 48 ; GCN-LABEL: {{^}}local_store_v3i64: 49 ; GCN: ds_write2_b64 50 ; GCN: ds_write_b64 51 define amdgpu_kernel void @local_store_v3i64(<3 x i64> addrspace(3)* %out, <3 x i64> %x) { 52 store <3 x i64> %x, <3 x i64> addrspace(3)* %out, align 32 53 ret void 54 } 55 56 ; GCN-LABEL: {{^}}local_store_v3i64_unaligned: 57 ; GCN: ds_write_b8 58 ; GCN: ds_write_b8 59 ; GCN: ds_write_b8 60 ; GCN: ds_write_b8 61 62 ; GCN: ds_write_b8 63 ; GCN: ds_write_b8 64 ; GCN: ds_write_b8 65 ; GCN: ds_write_b8 66 67 ; GCN: ds_write_b8 68 ; GCN: ds_write_b8 69 ; GCN: ds_write_b8 70 ; GCN: ds_write_b8 71 72 ; GCN: ds_write_b8 73 ; GCN: ds_write_b8 74 ; GCN: ds_write_b8 75 ; GCN: ds_write_b8 76 77 ; GCN: ds_write_b8 78 ; GCN: ds_write_b8 79 ; GCN: ds_write_b8 80 ; GCN: ds_write_b8 81 82 ; GCN: ds_write_b8 83 ; GCN: ds_write_b8 84 ; GCN: ds_write_b8 85 ; GCN: ds_write_b8 86 define amdgpu_kernel void @local_store_v3i64_unaligned(<3 x i64> addrspace(3)* %out, <3 x i64> %x) { 87 store <3 x i64> %x, <3 x i64> addrspace(3)* %out, align 1 88 ret void 89 } 90 91 ; GCN-LABEL: {{^}}global_truncstore_v3i64_to_v3i32: 92 ; GCN-DAG: buffer_store_dwordx2 93 ; GCN-DAG: buffer_store_dword v 94 define amdgpu_kernel void @global_truncstore_v3i64_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i64> %x) { 95 %trunc = trunc <3 x i64> %x to <3 x i32> 96 store <3 x i32> %trunc, <3 x i32> addrspace(1)* %out 97 ret void 98 } 99 100 ; GCN-LABEL: {{^}}global_truncstore_v3i64_to_v3i16: 101 ; GCN-DAG: buffer_store_short 102 ; GCN-DAG: buffer_store_dword v 103 define amdgpu_kernel void @global_truncstore_v3i64_to_v3i16(<3 x i16> addrspace(1)* %out, <3 x i64> %x) { 104 %trunc = trunc <3 x i64> %x to <3 x i16> 105 store <3 x i16> %trunc, <3 x i16> addrspace(1)* %out 106 ret void 107 } 108 109 110 ; GCN-LABEL: {{^}}global_truncstore_v3i64_to_v3i8: 111 ; GCN-DAG: buffer_store_short 112 ; GCN-DAG: buffer_store_byte v 113 define amdgpu_kernel void @global_truncstore_v3i64_to_v3i8(<3 x i8> addrspace(1)* %out, <3 x i64> %x) { 114 %trunc = trunc <3 x i64> %x to <3 x i8> 115 store <3 x i8> %trunc, <3 x i8> addrspace(1)* %out 116 ret void 117 } 118 119 ; GCN-LABEL: {{^}}global_truncstore_v3i64_to_v3i1: 120 ; GCN-DAG: buffer_store_byte v 121 ; GCN-DAG: buffer_store_byte v 122 ; GCN-DAG: buffer_store_byte v 123 define amdgpu_kernel void @global_truncstore_v3i64_to_v3i1(<3 x i1> addrspace(1)* %out, <3 x i64> %x) { 124 %trunc = trunc <3 x i64> %x to <3 x i1> 125 store <3 x i1> %trunc, <3 x i1> addrspace(1)* %out 126 ret void 127 } 128