Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
      2 
      3 ; Test combine to reduce the width of a 64-bit shift to 32-bit if
      4 ; truncated to 16-bit.
      5 
      6 ; GCN-LABEL: {{^}}trunc_srl_i64_16_to_i16:
      7 ; GCN: s_waitcnt
      8 ; GCN-NEXT: v_lshrrev_b32_e32 v0, 16, v0
      9 ; GCN-NEXT: s_setpc_b64
     10 define i16 @trunc_srl_i64_16_to_i16(i64 %x) {
     11   %shift = lshr i64 %x, 16
     12   %trunc = trunc i64 %shift to i16
     13   ret i16 %trunc
     14 }
     15 
     16 ; GCN-LABEL: {{^}}trunc_srl_i64_17_to_i16:
     17 ; GCN: s_waitcnt
     18 ; GCN-NEXT: v_lshrrev_b64 v[0:1], 17, v[0:1]
     19 ; GCN-NEXT: s_setpc_b64
     20 define i16 @trunc_srl_i64_17_to_i16(i64 %x) {
     21   %shift = lshr i64 %x, 17
     22   %trunc = trunc i64 %shift to i16
     23   ret i16 %trunc
     24 }
     25 
     26 ; GCN-LABEL: {{^}}trunc_srl_i55_16_to_i15:
     27 ; GCN: s_waitcnt
     28 ; GCN-NEXT: v_lshrrev_b32_e32 v0, 15, v0
     29 ; GCN-NEXT: v_add_u16_e32 v0, 4, v0
     30 ; GCN-NEXT: s_setpc_b64
     31 define i15 @trunc_srl_i55_16_to_i15(i55 %x) {
     32   %shift = lshr i55 %x, 15
     33   %trunc = trunc i55 %shift to i15
     34   %add = add i15 %trunc, 4
     35   ret i15 %add
     36 }
     37 
     38 ; GCN-LABEL: {{^}}trunc_sra_i64_16_to_i16:
     39 ; GCN: s_waitcnt
     40 ; GCN-NEXT: v_lshrrev_b32_e32 v0, 16, v0
     41 ; GCN-NEXT: s_setpc_b64
     42 define i16 @trunc_sra_i64_16_to_i16(i64 %x) {
     43   %shift = ashr i64 %x, 16
     44   %trunc = trunc i64 %shift to i16
     45   ret i16 %trunc
     46 }
     47 
     48 ; GCN-LABEL: {{^}}trunc_sra_i64_17_to_i16:
     49 ; GCN: s_waitcnt
     50 ; GCN-NEXT: v_lshrrev_b64 v[0:1], 17, v[0:1]
     51 ; GCN-NEXT: s_setpc_b64
     52 define i16 @trunc_sra_i64_17_to_i16(i64 %x) {
     53   %shift = ashr i64 %x, 17
     54   %trunc = trunc i64 %shift to i16
     55   ret i16 %trunc
     56 }
     57 
     58 ; GCN-LABEL: {{^}}trunc_shl_i64_16_to_i16:
     59 ; GCN: s_waitcnt
     60 ; GCN-NEXT: v_mov_b32_e32 v0, 0
     61 ; GCN-NEXT: s_setpc_b64
     62 define i16 @trunc_shl_i64_16_to_i16(i64 %x) {
     63   %shift = shl i64 %x, 16
     64   %trunc = trunc i64 %shift to i16
     65   ret i16 %trunc
     66 }
     67 
     68 ; GCN-LABEL: {{^}}trunc_shl_i64_17_to_i16:
     69 ; GCN: s_waitcnt
     70 ; GCN-NEXT: v_mov_b32_e32 v0, 0
     71 ; GCN-NEXT: s_setpc_b64
     72 define i16 @trunc_shl_i64_17_to_i16(i64 %x) {
     73   %shift = shl i64 %x, 17
     74   %trunc = trunc i64 %shift to i16
     75   ret i16 %trunc
     76 }
     77 
     78 ; GCN-LABEL: {{^}}trunc_srl_v2i64_16_to_v2i16:
     79 ; GCN: s_waitcnt
     80 ; GCN-DAG: v_lshrrev_b32_e32 v0, 16, v0
     81 ; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0000
     82 ; GCN: v_and_or_b32 v0, v2, [[MASK]], v0
     83 ; GCN-NEXT: s_setpc_b64
     84 define <2 x i16> @trunc_srl_v2i64_16_to_v2i16(<2 x i64> %x) {
     85   %shift = lshr <2 x i64> %x, <i64 16, i64 16>
     86   %trunc = trunc <2 x i64> %shift to <2 x i16>
     87   ret <2 x i16> %trunc
     88 }
     89 
     90 ; GCN-LABEL: {{^}}s_trunc_srl_i64_16_to_i16:
     91 ; GCN: s_load_dword [[VAL:s[0-9]+]]
     92 ; GCN: s_lshr_b32 [[VAL_SHIFT:s[0-9]+]], [[VAL]], 16
     93 ; GCN: s_or_b32 [[RESULT:s[0-9]+]], [[VAL_SHIFT]], 4
     94 ; GCN: v_mov_b32_e32 [[V_RESULT:v[0-9]+]], [[RESULT]]
     95 ; GCN: global_store_short v{{\[[0-9]+:[0-9]+\]}}, [[V_RESULT]]
     96 define amdgpu_kernel void @s_trunc_srl_i64_16_to_i16(i64 %x) {
     97   %shift = lshr i64 %x, 16
     98   %trunc = trunc i64 %shift to i16
     99   %add = or i16 %trunc, 4
    100   store i16 %add, i16 addrspace(1)* undef
    101   ret void
    102 }
    103 
    104 ; GCN-LABEL: {{^}}trunc_srl_i64_var_mask15_to_i16:
    105 ; GCN: s_waitcnt
    106 ; GCN-NEXT: v_and_b32_e32 v1, 15, v2
    107 ; GCN-NEXT: v_lshrrev_b32_e32 v0, v1, v0
    108 ; GCN-NEXT: s_setpc_b64
    109 define i16 @trunc_srl_i64_var_mask15_to_i16(i64 %x, i64 %amt) {
    110   %amt.masked = and i64 %amt, 15
    111   %shift = lshr i64 %x, %amt.masked
    112   %trunc = trunc i64 %shift to i16
    113   ret i16 %trunc
    114 }
    115 
    116 ; GCN-LABEL: {{^}}trunc_srl_i64_var_mask16_to_i16:
    117 ; GCN: s_waitcnt
    118 ; GCN-NEXT: v_and_b32_e32 v2, 16, v2
    119 ; GCN-NEXT: v_lshrrev_b64 v[0:1], v2, v[0:1]
    120 ; GCN-NEXT: s_setpc_b64
    121 define i16 @trunc_srl_i64_var_mask16_to_i16(i64 %x, i64 %amt) {
    122   %amt.masked = and i64 %amt, 16
    123   %shift = lshr i64 %x, %amt.masked
    124   %trunc = trunc i64 %shift to i16
    125   ret i16 %trunc
    126 }
    127 
    128 ; GCN-LABEL: {{^}}trunc_srl_i64_var_mask31_to_i16:
    129 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
    130 ; GCN-NEXT: v_and_b32_e32 v2, 31, v2
    131 ; GCN-NEXT: v_lshrrev_b64 v[0:1], v2, v[0:1]
    132 ; GCN-NEXT: s_setpc_b64 s[30:31]
    133 define i16 @trunc_srl_i64_var_mask31_to_i16(i64 %x, i64 %amt) {
    134   %amt.masked = and i64 %amt, 31
    135   %shift = lshr i64 %x, %amt.masked
    136   %trunc = trunc i64 %shift to i16
    137   ret i16 %trunc
    138 }
    139