Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      4 
      5 
      6 ; FUNC-LABEL: {{^}}or_v2i32:
      7 ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
      8 ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
      9 
     10 ; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
     11 ; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
     12 define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
     13   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
     14   %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
     15   %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
     16   %result = or <2 x i32> %a, %b
     17   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
     18   ret void
     19 }
     20 
     21 ; FUNC-LABEL: {{^}}or_v4i32:
     22 ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
     23 ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
     24 ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
     25 ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
     26 
     27 ; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
     28 ; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
     29 ; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
     30 ; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
     31 define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
     32   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
     33   %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
     34   %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
     35   %result = or <4 x i32> %a, %b
     36   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
     37   ret void
     38 }
     39 
     40 ; FUNC-LABEL: {{^}}scalar_or_i32:
     41 ; SI: s_or_b32
     42 define void @scalar_or_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
     43   %or = or i32 %a, %b
     44   store i32 %or, i32 addrspace(1)* %out
     45   ret void
     46 }
     47 
     48 ; FUNC-LABEL: {{^}}vector_or_i32:
     49 ; SI: v_or_b32_e32 v{{[0-9]}}
     50 define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) {
     51   %loada = load i32, i32 addrspace(1)* %a
     52   %or = or i32 %loada, %b
     53   store i32 %or, i32 addrspace(1)* %out
     54   ret void
     55 }
     56 
     57 ; FUNC-LABEL: {{^}}scalar_or_literal_i32:
     58 ; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x1869f
     59 define void @scalar_or_literal_i32(i32 addrspace(1)* %out, i32 %a) {
     60   %or = or i32 %a, 99999
     61   store i32 %or, i32 addrspace(1)* %out, align 4
     62   ret void
     63 }
     64 
     65 ; FUNC-LABEL: {{^}}vector_or_literal_i32:
     66 ; SI: v_or_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
     67 define void @vector_or_literal_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
     68   %loada = load i32, i32 addrspace(1)* %a, align 4
     69   %or = or i32 %loada, 65535
     70   store i32 %or, i32 addrspace(1)* %out, align 4
     71   ret void
     72 }
     73 
     74 ; FUNC-LABEL: {{^}}vector_or_inline_immediate_i32:
     75 ; SI: v_or_b32_e32 v{{[0-9]+}}, 4, v{{[0-9]+}}
     76 define void @vector_or_inline_immediate_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
     77   %loada = load i32, i32 addrspace(1)* %a, align 4
     78   %or = or i32 %loada, 4
     79   store i32 %or, i32 addrspace(1)* %out, align 4
     80   ret void
     81 }
     82 
     83 ; FUNC-LABEL: {{^}}scalar_or_i64:
     84 ; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
     85 ; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
     86 
     87 ; SI: s_or_b64
     88 define void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
     89   %or = or i64 %a, %b
     90   store i64 %or, i64 addrspace(1)* %out
     91   ret void
     92 }
     93 
     94 ; FUNC-LABEL: {{^}}vector_or_i64:
     95 ; SI: v_or_b32_e32 v{{[0-9]}}
     96 ; SI: v_or_b32_e32 v{{[0-9]}}
     97 define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
     98   %loada = load i64, i64 addrspace(1)* %a, align 8
     99   %loadb = load i64, i64 addrspace(1)* %a, align 8
    100   %or = or i64 %loada, %loadb
    101   store i64 %or, i64 addrspace(1)* %out
    102   ret void
    103 }
    104 
    105 ; FUNC-LABEL: {{^}}scalar_vector_or_i64:
    106 ; SI: v_or_b32_e32 v{{[0-9]}}
    107 ; SI: v_or_b32_e32 v{{[0-9]}}
    108 define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) {
    109   %loada = load i64, i64 addrspace(1)* %a
    110   %or = or i64 %loada, %b
    111   store i64 %or, i64 addrspace(1)* %out
    112   ret void
    113 }
    114 
    115 ; FUNC-LABEL: {{^}}vector_or_i64_loadimm:
    116 ; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xdf77987f
    117 ; SI-DAG: s_movk_i32 [[HI_S_IMM:s[0-9]+]], 0x146f
    118 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
    119 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
    120 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
    121 ; SI: s_endpgm
    122 define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
    123   %loada = load i64, i64 addrspace(1)* %a, align 8
    124   %or = or i64 %loada, 22470723082367
    125   store i64 %or, i64 addrspace(1)* %out
    126   ret void
    127 }
    128 
    129 ; FIXME: The or 0 should really be removed.
    130 ; FUNC-LABEL: {{^}}vector_or_i64_imm:
    131 ; SI: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
    132 ; SI: v_or_b32_e32 {{v[0-9]+}}, 8, v[[LO_VREG]]
    133 ; SI: v_or_b32_e32 {{v[0-9]+}}, 0, {{.*}}
    134 ; SI: s_endpgm
    135 define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
    136   %loada = load i64, i64 addrspace(1)* %a, align 8
    137   %or = or i64 %loada, 8
    138   store i64 %or, i64 addrspace(1)* %out
    139   ret void
    140 }
    141 
    142 ; FUNC-LABEL: {{^}}trunc_i64_or_to_i32:
    143 ; SI: s_load_dword s[[SREG0:[0-9]+]]
    144 ; SI: s_load_dword s[[SREG1:[0-9]+]]
    145 ; SI: s_or_b32 s[[SRESULT:[0-9]+]], s[[SREG1]], s[[SREG0]]
    146 ; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], s[[SRESULT]]
    147 ; SI: buffer_store_dword [[VRESULT]],
    148 define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
    149   %add = or i64 %b, %a
    150   %trunc = trunc i64 %add to i32
    151   store i32 %trunc, i32 addrspace(1)* %out, align 8
    152   ret void
    153 }
    154 
    155 ; FUNC-LABEL: {{^}}or_i1:
    156 ; EG: OR_INT * {{\** *}}T{{[0-9]+\.[XYZW], PS, PV\.[XYZW]}}
    157 
    158 ; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], vcc, s[{{[0-9]+:[0-9]+}}]
    159 define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
    160   %a = load float, float addrspace(1)* %in0
    161   %b = load float, float addrspace(1)* %in1
    162   %acmp = fcmp oge float %a, 0.000000e+00
    163   %bcmp = fcmp oge float %b, 0.000000e+00
    164   %or = or i1 %acmp, %bcmp
    165   %result = zext i1 %or to i32
    166   store i32 %result, i32 addrspace(1)* %out
    167   ret void
    168 }
    169 
    170 ; FUNC-LABEL: {{^}}s_or_i1:
    171 ; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], vcc, s[{{[0-9]+:[0-9]+}}]
    172 define void @s_or_i1(i1 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
    173   %cmp0 = icmp eq i32 %a, %b
    174   %cmp1 = icmp eq i32 %c, %d
    175   %or = or i1 %cmp0, %cmp1
    176   store i1 %or, i1 addrspace(1)* %out
    177   ret void
    178 }
    179