Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=FAST64 -check-prefix=GCN %s
      2 ; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=SLOW64 -check-prefix=GCN %s
      3 
      4 
      5 ; lshr (i64 x), c: c > 32 => reg_sequence lshr (i32 hi_32(x)), (c - 32), 0
      6 ; GCN-LABEL: {{^}}lshr_i64_35:
      7 ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]]
      8 ; GCN-DAG: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 3, [[VAL]]
      9 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
     10 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
     11 define void @lshr_i64_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
     12   %val = load i64, i64 addrspace(1)* %in
     13   %shl = lshr i64 %val, 35
     14   store i64 %shl, i64 addrspace(1)* %out
     15   ret void
     16 }
     17 
     18 ; GCN-LABEL: {{^}}lshr_i64_63:
     19 ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]]
     20 ; GCN-DAG: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 31, [[VAL]]
     21 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
     22 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
     23 define void @lshr_i64_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
     24   %val = load i64, i64 addrspace(1)* %in
     25   %shl = lshr i64 %val, 63
     26   store i64 %shl, i64 addrspace(1)* %out
     27   ret void
     28 }
     29 
     30 ; GCN-LABEL: {{^}}lshr_i64_33:
     31 ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]]
     32 ; GCN-DAG: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 1, [[VAL]]
     33 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
     34 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
     35 define void @lshr_i64_33(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
     36   %val = load i64, i64 addrspace(1)* %in
     37   %shl = lshr i64 %val, 33
     38   store i64 %shl, i64 addrspace(1)* %out
     39   ret void
     40 }
     41 
     42 ; GCN-LABEL: {{^}}lshr_i64_32:
     43 ; GCN-DAG: buffer_load_dword v[[LO:[0-9]+]]
     44 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
     45 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
     46 define void @lshr_i64_32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
     47   %val = load i64, i64 addrspace(1)* %in
     48   %shl = lshr i64 %val, 32
     49   store i64 %shl, i64 addrspace(1)* %out
     50   ret void
     51 }
     52 
     53 ; Make sure the and of the constant doesn't prevent bfe from forming
     54 ; after 64-bit shift is split.
     55 
     56 ; GCN-LABEL: {{^}}lshr_and_i64_35:
     57 ; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
     58 ; GCN: v_bfe_u32 v[[BFE:[0-9]+]], v[[HI]], 8, 23
     59 ; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
     60 ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
     61 define void @lshr_and_i64_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
     62   %val = load i64, i64 addrspace(1)* %in
     63   %and = and i64 %val, 9223372036854775807 ; 0x7fffffffffffffff
     64   %shl = lshr i64 %and, 40
     65   store i64 %shl, i64 addrspace(1)* %out
     66   ret void
     67 }
     68 
     69 ; lshl (i64 x), c: c > 32 => reg_sequence lshl 0, (i32 lo_32(x)), (c - 32)
     70 
     71 ; GCN-LABEL: {{^}}shl_i64_const_35:
     72 ; GCN: buffer_load_dword [[VAL:v[0-9]+]]
     73 ; GCN: v_lshlrev_b32_e32 v[[HI:[0-9]+]], 3, [[VAL]]
     74 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
     75 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
     76 define void @shl_i64_const_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
     77   %val = load i64, i64 addrspace(1)* %in
     78   %shl = shl i64 %val, 35
     79   store i64 %shl, i64 addrspace(1)* %out
     80   ret void
     81 }
     82 
     83 ; GCN-LABEL: {{^}}shl_i64_const_32:
     84 ; GCN-DAG: buffer_load_dword v[[HI:[0-9]+]]
     85 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
     86 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
     87 define void @shl_i64_const_32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
     88   %val = load i64, i64 addrspace(1)* %in
     89   %shl = shl i64 %val, 32
     90   store i64 %shl, i64 addrspace(1)* %out
     91   ret void
     92 }
     93 
     94 ; GCN-LABEL: {{^}}shl_i64_const_63:
     95 ; GCN: buffer_load_dword [[VAL:v[0-9]+]]
     96 ; GCN: v_lshlrev_b32_e32 v[[HI:[0-9]+]], 31, [[VAL]]
     97 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
     98 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
     99 define void @shl_i64_const_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
    100   %val = load i64, i64 addrspace(1)* %in
    101   %shl = shl i64 %val, 63
    102   store i64 %shl, i64 addrspace(1)* %out
    103   ret void
    104 }
    105 
    106 ; ashr (i64 x), 63 => (ashr lo(x), 31), lo(x)
    107 
    108 ; GCN-LABEL: {{^}}ashr_i64_const_32:
    109 define void @ashr_i64_const_32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
    110   %val = load i64, i64 addrspace(1)* %in
    111   %shl = ashr i64 %val, 32
    112   store i64 %shl, i64 addrspace(1)* %out
    113   ret void
    114 }
    115 
    116 ; GCN-LABEL: {{^}}ashr_i64_const_63:
    117 define void @ashr_i64_const_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
    118   %val = load i64, i64 addrspace(1)* %in
    119   %shl = ashr i64 %val, 63
    120   store i64 %shl, i64 addrspace(1)* %out
    121   ret void
    122 }
    123 
    124 ; GCN-LABEL: {{^}}trunc_shl_31_i32_i64:
    125 ; GCN: buffer_load_dword [[VAL:v[0-9]+]]
    126 ; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 31, [[VAL]]
    127 ; GCN: buffer_store_dword [[SHL]]
    128 define void @trunc_shl_31_i32_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
    129   %val = load i64, i64 addrspace(1)* %in
    130   %shl = shl i64 %val, 31
    131   %trunc = trunc i64 %shl to i32
    132   store i32 %trunc, i32 addrspace(1)* %out
    133   ret void
    134 }
    135 
    136 ; GCN-LABEL: {{^}}trunc_shl_15_i16_i64:
    137 ; GCN: buffer_load_dword [[VAL:v[0-9]+]]
    138 ; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 15, [[VAL]]
    139 ; GCN: buffer_store_short [[SHL]]
    140 define void @trunc_shl_15_i16_i64(i16 addrspace(1)* %out, i64 addrspace(1)* %in) {
    141   %val = load i64, i64 addrspace(1)* %in
    142   %shl = shl i64 %val, 15
    143   %trunc = trunc i64 %shl to i16
    144   store i16 %trunc, i16 addrspace(1)* %out
    145   ret void
    146 }
    147 
    148 ; GCN-LABEL: {{^}}trunc_shl_15_i16_i32:
    149 ; GCN: buffer_load_dword [[VAL:v[0-9]+]]
    150 ; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 15, [[VAL]]
    151 ; GCN: buffer_store_short [[SHL]]
    152 define void @trunc_shl_15_i16_i32(i16 addrspace(1)* %out, i32 addrspace(1)* %in) {
    153   %val = load i32, i32 addrspace(1)* %in
    154   %shl = shl i32 %val, 15
    155   %trunc = trunc i32 %shl to i16
    156   store i16 %trunc, i16 addrspace(1)* %out
    157   ret void
    158 }
    159 
    160 ; GCN-LABEL: {{^}}trunc_shl_7_i8_i64:
    161 ; GCN: buffer_load_dword [[VAL:v[0-9]+]]
    162 ; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 7, [[VAL]]
    163 ; GCN: buffer_store_byte [[SHL]]
    164 define void @trunc_shl_7_i8_i64(i8 addrspace(1)* %out, i64 addrspace(1)* %in) {
    165   %val = load i64, i64 addrspace(1)* %in
    166   %shl = shl i64 %val, 7
    167   %trunc = trunc i64 %shl to i8
    168   store i8 %trunc, i8 addrspace(1)* %out
    169   ret void
    170 }
    171 
    172 ; GCN-LABEL: {{^}}trunc_shl_1_i2_i64:
    173 ; GCN: buffer_load_dword [[VAL:v[0-9]+]]
    174 ; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 1, [[VAL]]
    175 ; GCN: v_and_b32_e32 [[AND:v[0-9]+]], 2, [[SHL]]
    176 ; GCN: buffer_store_byte [[AND]]
    177 define void @trunc_shl_1_i2_i64(i2 addrspace(1)* %out, i64 addrspace(1)* %in) {
    178   %val = load i64, i64 addrspace(1)* %in
    179   %shl = shl i64 %val, 1
    180   %trunc = trunc i64 %shl to i2
    181   store i2 %trunc, i2 addrspace(1)* %out
    182   ret void
    183 }
    184 
    185 ; GCN-LABEL: {{^}}trunc_shl_1_i32_i64:
    186 ; GCN: buffer_load_dword [[VAL:v[0-9]+]]
    187 ; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 1, [[VAL]]
    188 ; GCN: buffer_store_dword [[SHL]]
    189 define void @trunc_shl_1_i32_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
    190   %val = load i64, i64 addrspace(1)* %in
    191   %shl = shl i64 %val, 1
    192   %trunc = trunc i64 %shl to i32
    193   store i32 %trunc, i32 addrspace(1)* %out
    194   ret void
    195 }
    196 
    197 ; GCN-LABEL: {{^}}trunc_shl_16_i32_i64:
    198 ; GCN: buffer_load_dword [[VAL:v[0-9]+]]
    199 ; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, [[VAL]]
    200 ; GCN: buffer_store_dword [[SHL]]
    201 define void @trunc_shl_16_i32_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
    202   %val = load i64, i64 addrspace(1)* %in
    203   %shl = shl i64 %val, 16
    204   %trunc = trunc i64 %shl to i32
    205   store i32 %trunc, i32 addrspace(1)* %out
    206   ret void
    207 }
    208 
    209 ; GCN-LABEL: {{^}}trunc_shl_33_i32_i64:
    210 ; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
    211 ; GCN: buffer_store_dword [[ZERO]]
    212 define void @trunc_shl_33_i32_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
    213   %val = load i64, i64 addrspace(1)* %in
    214   %shl = shl i64 %val, 33
    215   %trunc = trunc i64 %shl to i32
    216   store i32 %trunc, i32 addrspace(1)* %out
    217   ret void
    218 }
    219 
    220 ; GCN-LABEL: {{^}}trunc_shl_16_v2i32_v2i64:
    221 ; GCN: buffer_load_dwordx4 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
    222 ; GCN-DAG: v_lshlrev_b32_e32 v[[RESHI:[0-9]+]], 16, v{{[0-9]+}}
    223 ; GCN-DAG: v_lshlrev_b32_e32 v[[RESLO:[0-9]+]], 16, v[[LO]]
    224 ; GCN: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]{{\]}}
    225 define void @trunc_shl_16_v2i32_v2i64(<2 x i32> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
    226   %val = load <2 x i64>, <2 x i64> addrspace(1)* %in
    227   %shl = shl <2 x i64> %val, <i64 16, i64 16>
    228   %trunc = trunc <2 x i64> %shl to <2 x i32>
    229   store <2 x i32> %trunc, <2 x i32> addrspace(1)* %out
    230   ret void
    231 }
    232 
    233 ; GCN-LABEL: {{^}}trunc_shl_31_i32_i64_multi_use:
    234 ; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
    235 ; GCN: v_lshl_b64 v{{\[}}[[RESLO:[0-9]+]]:[[RESHI:[0-9]+]]{{\]}}, [[VAL]], 31
    236 ; GCN: buffer_store_dword v[[RESLO]]
    237 ; GCN: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]{{\]}}
    238 define void @trunc_shl_31_i32_i64_multi_use(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
    239   %val = load i64, i64 addrspace(1)* %in
    240   %shl = shl i64 %val, 31
    241   %trunc = trunc i64 %shl to i32
    242   store volatile i32 %trunc, i32 addrspace(1)* %out
    243   store volatile i64 %shl, i64 addrspace(1)* %in
    244   ret void
    245 }
    246