1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s 2 3 ; Test combine to reduce the width of a 64-bit shift to 32-bit if 4 ; truncated to 16-bit. 5 6 ; GCN-LABEL: {{^}}trunc_srl_i64_16_to_i16: 7 ; GCN: s_waitcnt 8 ; GCN-NEXT: v_lshrrev_b32_e32 v0, 16, v0 9 ; GCN-NEXT: s_setpc_b64 10 define i16 @trunc_srl_i64_16_to_i16(i64 %x) { 11 %shift = lshr i64 %x, 16 12 %trunc = trunc i64 %shift to i16 13 ret i16 %trunc 14 } 15 16 ; GCN-LABEL: {{^}}trunc_srl_i64_17_to_i16: 17 ; GCN: s_waitcnt 18 ; GCN-NEXT: v_lshrrev_b64 v[0:1], 17, v[0:1] 19 ; GCN-NEXT: s_setpc_b64 20 define i16 @trunc_srl_i64_17_to_i16(i64 %x) { 21 %shift = lshr i64 %x, 17 22 %trunc = trunc i64 %shift to i16 23 ret i16 %trunc 24 } 25 26 ; GCN-LABEL: {{^}}trunc_srl_i55_16_to_i15: 27 ; GCN: s_waitcnt 28 ; GCN-NEXT: v_lshrrev_b32_e32 v0, 15, v0 29 ; GCN-NEXT: v_add_u16_e32 v0, 4, v0 30 ; GCN-NEXT: s_setpc_b64 31 define i15 @trunc_srl_i55_16_to_i15(i55 %x) { 32 %shift = lshr i55 %x, 15 33 %trunc = trunc i55 %shift to i15 34 %add = add i15 %trunc, 4 35 ret i15 %add 36 } 37 38 ; GCN-LABEL: {{^}}trunc_sra_i64_16_to_i16: 39 ; GCN: s_waitcnt 40 ; GCN-NEXT: v_lshrrev_b32_e32 v0, 16, v0 41 ; GCN-NEXT: s_setpc_b64 42 define i16 @trunc_sra_i64_16_to_i16(i64 %x) { 43 %shift = ashr i64 %x, 16 44 %trunc = trunc i64 %shift to i16 45 ret i16 %trunc 46 } 47 48 ; GCN-LABEL: {{^}}trunc_sra_i64_17_to_i16: 49 ; GCN: s_waitcnt 50 ; GCN-NEXT: v_lshrrev_b64 v[0:1], 17, v[0:1] 51 ; GCN-NEXT: s_setpc_b64 52 define i16 @trunc_sra_i64_17_to_i16(i64 %x) { 53 %shift = ashr i64 %x, 17 54 %trunc = trunc i64 %shift to i16 55 ret i16 %trunc 56 } 57 58 ; GCN-LABEL: {{^}}trunc_shl_i64_16_to_i16: 59 ; GCN: s_waitcnt 60 ; GCN-NEXT: v_mov_b32_e32 v0, 0 61 ; GCN-NEXT: s_setpc_b64 62 define i16 @trunc_shl_i64_16_to_i16(i64 %x) { 63 %shift = shl i64 %x, 16 64 %trunc = trunc i64 %shift to i16 65 ret i16 %trunc 66 } 67 68 ; GCN-LABEL: {{^}}trunc_shl_i64_17_to_i16: 69 ; GCN: s_waitcnt 70 ; GCN-NEXT: v_mov_b32_e32 v0, 0 71 ; GCN-NEXT: s_setpc_b64 72 define i16 @trunc_shl_i64_17_to_i16(i64 %x) { 73 %shift = shl i64 %x, 17 74 %trunc = trunc i64 %shift to i16 75 ret i16 %trunc 76 } 77 78 ; GCN-LABEL: {{^}}trunc_srl_v2i64_16_to_v2i16: 79 ; GCN: s_waitcnt 80 ; GCN-DAG: v_lshrrev_b32_e32 v0, 16, v0 81 ; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0000 82 ; GCN: v_and_or_b32 v0, v2, [[MASK]], v0 83 ; GCN-NEXT: s_setpc_b64 84 define <2 x i16> @trunc_srl_v2i64_16_to_v2i16(<2 x i64> %x) { 85 %shift = lshr <2 x i64> %x, <i64 16, i64 16> 86 %trunc = trunc <2 x i64> %shift to <2 x i16> 87 ret <2 x i16> %trunc 88 } 89 90 ; GCN-LABEL: {{^}}s_trunc_srl_i64_16_to_i16: 91 ; GCN: s_load_dword [[VAL:s[0-9]+]] 92 ; GCN: s_lshr_b32 [[VAL_SHIFT:s[0-9]+]], [[VAL]], 16 93 ; GCN: s_or_b32 [[RESULT:s[0-9]+]], [[VAL_SHIFT]], 4 94 ; GCN: v_mov_b32_e32 [[V_RESULT:v[0-9]+]], [[RESULT]] 95 ; GCN: global_store_short v{{\[[0-9]+:[0-9]+\]}}, [[V_RESULT]] 96 define amdgpu_kernel void @s_trunc_srl_i64_16_to_i16(i64 %x) { 97 %shift = lshr i64 %x, 16 98 %trunc = trunc i64 %shift to i16 99 %add = or i16 %trunc, 4 100 store i16 %add, i16 addrspace(1)* undef 101 ret void 102 } 103 104 ; GCN-LABEL: {{^}}trunc_srl_i64_var_mask15_to_i16: 105 ; GCN: s_waitcnt 106 ; GCN-NEXT: v_and_b32_e32 v1, 15, v2 107 ; GCN-NEXT: v_lshrrev_b32_e32 v0, v1, v0 108 ; GCN-NEXT: s_setpc_b64 109 define i16 @trunc_srl_i64_var_mask15_to_i16(i64 %x, i64 %amt) { 110 %amt.masked = and i64 %amt, 15 111 %shift = lshr i64 %x, %amt.masked 112 %trunc = trunc i64 %shift to i16 113 ret i16 %trunc 114 } 115 116 ; GCN-LABEL: {{^}}trunc_srl_i64_var_mask16_to_i16: 117 ; GCN: s_waitcnt 118 ; GCN-NEXT: v_and_b32_e32 v2, 16, v2 119 ; GCN-NEXT: v_lshrrev_b64 v[0:1], v2, v[0:1] 120 ; GCN-NEXT: s_setpc_b64 121 define i16 @trunc_srl_i64_var_mask16_to_i16(i64 %x, i64 %amt) { 122 %amt.masked = and i64 %amt, 16 123 %shift = lshr i64 %x, %amt.masked 124 %trunc = trunc i64 %shift to i16 125 ret i16 %trunc 126 } 127 128 ; GCN-LABEL: {{^}}trunc_srl_i64_var_mask31_to_i16: 129 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 130 ; GCN-NEXT: v_and_b32_e32 v2, 31, v2 131 ; GCN-NEXT: v_lshrrev_b64 v[0:1], v2, v[0:1] 132 ; GCN-NEXT: s_setpc_b64 s[30:31] 133 define i16 @trunc_srl_i64_var_mask31_to_i16(i64 %x, i64 %amt) { 134 %amt.masked = and i64 %amt, 31 135 %shift = lshr i64 %x, %amt.masked 136 %trunc = trunc i64 %shift to i16 137 ret i16 %trunc 138 } 139