Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
      2 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
      3 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
      4 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
      5 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
      6 
      7 ; Testing for ds_read_b128
      8 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
      9 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
     10 
     11 ; FUNC-LABEL: {{^}}local_load_f64:
     12 ; SICIV: s_mov_b32 m0
     13 ; GFX9-NOT: m0
     14 
     15 ; GCN: ds_read_b64 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}{{$}}
     16 ; GCN: ds_write_b64 v{{[0-9]+}}, [[VAL]]
     17 
     18 ; EG: LDS_READ_RET
     19 ; EG: LDS_READ_RET
     20 define amdgpu_kernel void @local_load_f64(double addrspace(3)* %out, double addrspace(3)* %in) #0 {
     21   %ld = load double, double addrspace(3)* %in
     22   store double %ld, double addrspace(3)* %out
     23   ret void
     24 }
     25 
     26 ; FUNC-LABEL: {{^}}local_load_v2f64:
     27 ; SICIV: s_mov_b32 m0
     28 ; GFX9-NOT: m0
     29 
     30 ; GCN: ds_read2_b64
     31 
     32 ; EG: LDS_READ_RET
     33 ; EG: LDS_READ_RET
     34 ; EG: LDS_READ_RET
     35 ; EG: LDS_READ_RET
     36 define amdgpu_kernel void @local_load_v2f64(<2 x double> addrspace(3)* %out, <2 x double> addrspace(3)* %in) #0 {
     37 entry:
     38   %ld = load <2 x double>, <2 x double> addrspace(3)* %in
     39   store <2 x double> %ld, <2 x double> addrspace(3)* %out
     40   ret void
     41 }
     42 
     43 ; FUNC-LABEL: {{^}}local_load_v3f64:
     44 ; SICIV: s_mov_b32 m0
     45 ; GFX9-NOT: m0
     46 
     47 ; GCN-DAG: ds_read2_b64
     48 ; GCN-DAG: ds_read_b64
     49 
     50 ; EG: LDS_READ_RET
     51 ; EG: LDS_READ_RET
     52 ; EG: LDS_READ_RET
     53 ; EG: LDS_READ_RET
     54 ; EG: LDS_READ_RET
     55 ; EG: LDS_READ_RET
     56 define amdgpu_kernel void @local_load_v3f64(<3 x double> addrspace(3)* %out, <3 x double> addrspace(3)* %in) #0 {
     57 entry:
     58   %ld = load <3 x double>, <3 x double> addrspace(3)* %in
     59   store <3 x double> %ld, <3 x double> addrspace(3)* %out
     60   ret void
     61 }
     62 
     63 ; FUNC-LABEL: {{^}}local_load_v4f64:
     64 ; SICIV: s_mov_b32 m0
     65 ; GFX9-NOT: m0
     66 
     67 ; GCN: ds_read2_b64
     68 ; GCN: ds_read2_b64
     69 
     70 ; EG: LDS_READ_RET
     71 ; EG: LDS_READ_RET
     72 ; EG: LDS_READ_RET
     73 ; EG: LDS_READ_RET
     74 
     75 ; EG: LDS_READ_RET
     76 ; EG: LDS_READ_RET
     77 ; EG: LDS_READ_RET
     78 ; EG: LDS_READ_RET
     79 define amdgpu_kernel void @local_load_v4f64(<4 x double> addrspace(3)* %out, <4 x double> addrspace(3)* %in) #0 {
     80 entry:
     81   %ld = load <4 x double>, <4 x double> addrspace(3)* %in
     82   store <4 x double> %ld, <4 x double> addrspace(3)* %out
     83   ret void
     84 }
     85 
     86 ; FUNC-LABEL: {{^}}local_load_v8f64:
     87 ; SICIV: s_mov_b32 m0
     88 ; GFX9-NOT: m0
     89 
     90 ; GCN: ds_read2_b64
     91 ; GCN: ds_read2_b64
     92 ; GCN: ds_read2_b64
     93 ; GCN: ds_read2_b64
     94 
     95 ; EG: LDS_READ_RET
     96 ; EG: LDS_READ_RET
     97 ; EG: LDS_READ_RET
     98 ; EG: LDS_READ_RET
     99 ; EG: LDS_READ_RET
    100 ; EG: LDS_READ_RET
    101 ; EG: LDS_READ_RET
    102 ; EG: LDS_READ_RET
    103 ; EG: LDS_READ_RET
    104 ; EG: LDS_READ_RET
    105 ; EG: LDS_READ_RET
    106 ; EG: LDS_READ_RET
    107 ; EG: LDS_READ_RET
    108 ; EG: LDS_READ_RET
    109 ; EG: LDS_READ_RET
    110 ; EG: LDS_READ_RET
    111 define amdgpu_kernel void @local_load_v8f64(<8 x double> addrspace(3)* %out, <8 x double> addrspace(3)* %in) #0 {
    112 entry:
    113   %ld = load <8 x double>, <8 x double> addrspace(3)* %in
    114   store <8 x double> %ld, <8 x double> addrspace(3)* %out
    115   ret void
    116 }
    117 
    118 ; FUNC-LABEL: {{^}}local_load_v16f64:
    119 ; SICIV: s_mov_b32 m0
    120 ; GFX9-NOT: m0
    121 
    122 ; GCN: ds_read2_b64
    123 ; GCN: ds_read2_b64
    124 ; GCN: ds_read2_b64
    125 ; GCN: ds_read2_b64
    126 ; GCN: ds_read2_b64
    127 ; GCN: ds_read2_b64
    128 ; GCN: ds_read2_b64
    129 ; GCN: ds_read2_b64
    130 
    131 ; EG: LDS_READ_RET
    132 ; EG: LDS_READ_RET
    133 ; EG: LDS_READ_RET
    134 ; EG: LDS_READ_RET
    135 
    136 ; EG: LDS_READ_RET
    137 ; EG: LDS_READ_RET
    138 ; EG: LDS_READ_RET
    139 ; EG: LDS_READ_RET
    140 
    141 ; EG: LDS_READ_RET
    142 ; EG: LDS_READ_RET
    143 ; EG: LDS_READ_RET
    144 ; EG: LDS_READ_RET
    145 
    146 ; EG: LDS_READ_RET
    147 ; EG: LDS_READ_RET
    148 ; EG: LDS_READ_RET
    149 ; EG: LDS_READ_RET
    150 
    151 ; EG: LDS_READ_RET
    152 ; EG: LDS_READ_RET
    153 ; EG: LDS_READ_RET
    154 ; EG: LDS_READ_RET
    155 
    156 ; EG: LDS_READ_RET
    157 ; EG: LDS_READ_RET
    158 ; EG: LDS_READ_RET
    159 ; EG: LDS_READ_RET
    160 
    161 ; EG: LDS_READ_RET
    162 ; EG: LDS_READ_RET
    163 ; EG: LDS_READ_RET
    164 ; EG: LDS_READ_RET
    165 
    166 ; EG: LDS_READ_RET
    167 ; EG: LDS_READ_RET
    168 ; EG: LDS_READ_RET
    169 ; EG: LDS_READ_RET
    170 define amdgpu_kernel void @local_load_v16f64(<16 x double> addrspace(3)* %out, <16 x double> addrspace(3)* %in) #0 {
    171 entry:
    172   %ld = load <16 x double>, <16 x double> addrspace(3)* %in
    173   store <16 x double> %ld, <16 x double> addrspace(3)* %out
    174   ret void
    175 }
    176 
    177 ; Tests if ds_read_b128 gets generated for the 16 byte aligned load.
    178 ; FUNC-LABEL: {{^}}local_load_v2f64_to_128:
    179 
    180 ; CIVI: ds_read_b128
    181 ; CIVI: ds_write_b128
    182 
    183 ; EG: LDS_READ_RET
    184 ; EG: LDS_READ_RET
    185 ; EG: LDS_READ_RET
    186 ; EG: LDS_READ_RET
    187 define amdgpu_kernel void @local_load_v2f64_to_128(<2 x double> addrspace(3)* %out, <2 x double> addrspace(3)* %in) {
    188 entry:
    189   %ld = load <2 x double>, <2 x double> addrspace(3)* %in, align 16
    190   store <2 x double> %ld, <2 x double> addrspace(3)* %out, align 16
    191   ret void
    192 }
    193 
    194 attributes #0 = { nounwind }
    195