Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=GCN,CI %s
      2 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GCN,VI %s
      3 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GCN,GFX9 %s
      4 
      5 ; GCN: 'extractelement_v2i32'
      6 ; GCN: estimated cost of 0 for {{.*}} extractelement <2 x i32>
      7 define amdgpu_kernel void @extractelement_v2i32(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %vaddr) {
      8   %vec = load <2 x i32>, <2 x i32> addrspace(1)* %vaddr
      9   %elt = extractelement <2 x i32> %vec, i32 1
     10   store i32 %elt, i32 addrspace(1)* %out
     11   ret void
     12 }
     13 
     14 ; GCN: 'extractelement_v2f32'
     15 ; GCN: estimated cost of 0 for {{.*}} extractelement <2 x float>
     16 define amdgpu_kernel void @extractelement_v2f32(float addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr) {
     17   %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr
     18   %elt = extractelement <2 x float> %vec, i32 1
     19   store float %elt, float addrspace(1)* %out
     20   ret void
     21 }
     22 
     23 ; GCN: 'extractelement_v3i32'
     24 ; GCN: estimated cost of 0 for {{.*}} extractelement <3 x i32>
     25 define amdgpu_kernel void @extractelement_v3i32(i32 addrspace(1)* %out, <3 x i32> addrspace(1)* %vaddr) {
     26   %vec = load <3 x i32>, <3 x i32> addrspace(1)* %vaddr
     27   %elt = extractelement <3 x i32> %vec, i32 1
     28   store i32 %elt, i32 addrspace(1)* %out
     29   ret void
     30 }
     31 
     32 ; GCN: 'extractelement_v4i32'
     33 ; GCN: estimated cost of 0 for {{.*}} extractelement <4 x i32>
     34 define amdgpu_kernel void @extractelement_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %vaddr) {
     35   %vec = load <4 x i32>, <4 x i32> addrspace(1)* %vaddr
     36   %elt = extractelement <4 x i32> %vec, i32 1
     37   store i32 %elt, i32 addrspace(1)* %out
     38   ret void
     39 }
     40 
     41 ; GCN: 'extractelement_v8i32'
     42 ; GCN: estimated cost of 0 for {{.*}} extractelement <8 x i32>
     43 define amdgpu_kernel void @extractelement_v8i32(i32 addrspace(1)* %out, <8 x i32> addrspace(1)* %vaddr) {
     44   %vec = load <8 x i32>, <8 x i32> addrspace(1)* %vaddr
     45   %elt = extractelement <8 x i32> %vec, i32 1
     46   store i32 %elt, i32 addrspace(1)* %out
     47   ret void
     48 }
     49 
     50 ; FIXME: Should be non-0
     51 ; GCN: 'extractelement_v8i32_dynindex'
     52 ; GCN: estimated cost of 2 for {{.*}} extractelement <8 x i32>
     53 define amdgpu_kernel void @extractelement_v8i32_dynindex(i32 addrspace(1)* %out, <8 x i32> addrspace(1)* %vaddr, i32 %idx) {
     54   %vec = load <8 x i32>, <8 x i32> addrspace(1)* %vaddr
     55   %elt = extractelement <8 x i32> %vec, i32 %idx
     56   store i32 %elt, i32 addrspace(1)* %out
     57   ret void
     58 }
     59 
     60 ; GCN: 'extractelement_v2i64'
     61 ; GCN: estimated cost of 0 for {{.*}} extractelement <2 x i64>
     62 define amdgpu_kernel void @extractelement_v2i64(i64 addrspace(1)* %out, <2 x i64> addrspace(1)* %vaddr) {
     63   %vec = load <2 x i64>, <2 x i64> addrspace(1)* %vaddr
     64   %elt = extractelement <2 x i64> %vec, i64 1
     65   store i64 %elt, i64 addrspace(1)* %out
     66   ret void
     67 }
     68 
     69 ; GCN: 'extractelement_v3i64'
     70 ; GCN: estimated cost of 0 for {{.*}} extractelement <3 x i64>
     71 define amdgpu_kernel void @extractelement_v3i64(i64 addrspace(1)* %out, <3 x i64> addrspace(1)* %vaddr) {
     72   %vec = load <3 x i64>, <3 x i64> addrspace(1)* %vaddr
     73   %elt = extractelement <3 x i64> %vec, i64 1
     74   store i64 %elt, i64 addrspace(1)* %out
     75   ret void
     76 }
     77 
     78 ; GCN: 'extractelement_v4i64'
     79 ; GCN: estimated cost of 0 for {{.*}} extractelement <4 x i64>
     80 define amdgpu_kernel void @extractelement_v4i64(i64 addrspace(1)* %out, <4 x i64> addrspace(1)* %vaddr) {
     81   %vec = load <4 x i64>, <4 x i64> addrspace(1)* %vaddr
     82   %elt = extractelement <4 x i64> %vec, i64 1
     83   store i64 %elt, i64 addrspace(1)* %out
     84   ret void
     85 }
     86 
     87 ; GCN: 'extractelement_v8i64'
     88 ; GCN: estimated cost of 0 for {{.*}} extractelement <8 x i64>
     89 define amdgpu_kernel void @extractelement_v8i64(i64 addrspace(1)* %out, <8 x i64> addrspace(1)* %vaddr) {
     90   %vec = load <8 x i64>, <8 x i64> addrspace(1)* %vaddr
     91   %elt = extractelement <8 x i64> %vec, i64 1
     92   store i64 %elt, i64 addrspace(1)* %out
     93   ret void
     94 }
     95 
     96 ; GCN: 'extractelement_v4i8'
     97 ; GCN: estimated cost of 1 for {{.*}} extractelement <4 x i8>
     98 define amdgpu_kernel void @extractelement_v4i8(i8 addrspace(1)* %out, <4 x i8> addrspace(1)* %vaddr) {
     99   %vec = load <4 x i8>, <4 x i8> addrspace(1)* %vaddr
    100   %elt = extractelement <4 x i8> %vec, i8 1
    101   store i8 %elt, i8 addrspace(1)* %out
    102   ret void
    103 }
    104 
    105 ; GCN: 'extractelement_0_v2i16':
    106 ; CI: estimated cost of 1 for {{.*}} extractelement <2 x i16> %vec, i16 0
    107 ; VI: estimated cost of 0 for {{.*}} extractelement <2 x i16>
    108 ; GFX9: estimated cost of 0 for {{.*}} extractelement <2 x i16>
    109 define amdgpu_kernel void @extractelement_0_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) {
    110   %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr
    111   %elt = extractelement <2 x i16> %vec, i16 0
    112   store i16 %elt, i16 addrspace(1)* %out
    113   ret void
    114 }
    115 
    116 ; GCN: 'extractelement_1_v2i16':
    117 ; GCN: estimated cost of 1 for {{.*}} extractelement <2 x i16>
    118 define amdgpu_kernel void @extractelement_1_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) {
    119   %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr
    120   %elt = extractelement <2 x i16> %vec, i16 1
    121   store i16 %elt, i16 addrspace(1)* %out
    122   ret void
    123 }
    124 
    125 ; GCN: 'extractelement_var_v2i16'
    126 ; GCN: estimated cost of 1 for {{.*}} extractelement <2 x i16>
    127 define amdgpu_kernel void @extractelement_var_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, i32 %idx) {
    128   %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr
    129   %elt = extractelement <2 x i16> %vec, i32 %idx
    130   store i16 %elt, i16 addrspace(1)* %out
    131   ret void
    132 }
    133