Home | History | Annotate | Download | only in AMDGPU
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
      3 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
      4 
      5 ; Loosely based on test/CodeGen/{X86,AArch64}/extract-lowbits.ll,
      6 ; but with all 64-bit tests, and tests with loads dropped.
      7 
      8 ; Patterns:
      9 ;   a) x &  (1 << nbits) - 1
     10 ;   b) x & ~(-1 << nbits)
     11 ;   c) x &  (-1 >> (32 - y))
     12 ;   d) x << (32 - y) >> (32 - y)
     13 ; are equivalent.
     14 
     15 ; ---------------------------------------------------------------------------- ;
     16 ; Pattern a. 32-bit
     17 ; ---------------------------------------------------------------------------- ;
     18 
     19 define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind {
     20 ; GCN-LABEL: bzhi32_a0:
     21 ; GCN:       ; %bb.0:
     22 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
     23 ; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
     24 ; GCN-NEXT:    s_setpc_b64 s[30:31]
     25   %onebit = shl i32 1, %numlowbits
     26   %mask = add nsw i32 %onebit, -1
     27   %masked = and i32 %mask, %val
     28   ret i32 %masked
     29 }
     30 
     31 define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
     32 ; GCN-LABEL: bzhi32_a1_indexzext:
     33 ; GCN:       ; %bb.0:
     34 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
     35 ; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
     36 ; GCN-NEXT:    s_setpc_b64 s[30:31]
     37   %conv = zext i8 %numlowbits to i32
     38   %onebit = shl i32 1, %conv
     39   %mask = add nsw i32 %onebit, -1
     40   %masked = and i32 %mask, %val
     41   ret i32 %masked
     42 }
     43 
     44 define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind {
     45 ; GCN-LABEL: bzhi32_a4_commutative:
     46 ; GCN:       ; %bb.0:
     47 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
     48 ; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
     49 ; GCN-NEXT:    s_setpc_b64 s[30:31]
     50   %onebit = shl i32 1, %numlowbits
     51   %mask = add nsw i32 %onebit, -1
     52   %masked = and i32 %val, %mask ; swapped order
     53   ret i32 %masked
     54 }
     55 
     56 ; ---------------------------------------------------------------------------- ;
     57 ; Pattern b. 32-bit
     58 ; ---------------------------------------------------------------------------- ;
     59 
     60 define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
     61 ; GCN-LABEL: bzhi32_b0:
     62 ; GCN:       ; %bb.0:
     63 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
     64 ; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
     65 ; GCN-NEXT:    s_setpc_b64 s[30:31]
     66   %notmask = shl i32 -1, %numlowbits
     67   %mask = xor i32 %notmask, -1
     68   %masked = and i32 %mask, %val
     69   ret i32 %masked
     70 }
     71 
     72 define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
     73 ; GCN-LABEL: bzhi32_b1_indexzext:
     74 ; GCN:       ; %bb.0:
     75 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
     76 ; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
     77 ; GCN-NEXT:    s_setpc_b64 s[30:31]
     78   %conv = zext i8 %numlowbits to i32
     79   %notmask = shl i32 -1, %conv
     80   %mask = xor i32 %notmask, -1
     81   %masked = and i32 %mask, %val
     82   ret i32 %masked
     83 }
     84 
     85 define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
     86 ; GCN-LABEL: bzhi32_b4_commutative:
     87 ; GCN:       ; %bb.0:
     88 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
     89 ; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
     90 ; GCN-NEXT:    s_setpc_b64 s[30:31]
     91   %notmask = shl i32 -1, %numlowbits
     92   %mask = xor i32 %notmask, -1
     93   %masked = and i32 %val, %mask ; swapped order
     94   ret i32 %masked
     95 }
     96 
     97 ; ---------------------------------------------------------------------------- ;
     98 ; Pattern c. 32-bit
     99 ; ---------------------------------------------------------------------------- ;
    100 
    101 define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
    102 ; GCN-LABEL: bzhi32_c0:
    103 ; GCN:       ; %bb.0:
    104 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
    105 ; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
    106 ; GCN-NEXT:    s_setpc_b64 s[30:31]
    107   %numhighbits = sub i32 32, %numlowbits
    108   %mask = lshr i32 -1, %numhighbits
    109   %masked = and i32 %mask, %val
    110   ret i32 %masked
    111 }
    112 
    113 define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
    114 ; SI-LABEL: bzhi32_c1_indexzext:
    115 ; SI:       ; %bb.0:
    116 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
    117 ; SI-NEXT:    v_sub_i32_e32 v1, vcc, 32, v1
    118 ; SI-NEXT:    v_and_b32_e32 v1, 0xff, v1
    119 ; SI-NEXT:    v_lshr_b32_e32 v1, -1, v1
    120 ; SI-NEXT:    v_and_b32_e32 v0, v1, v0
    121 ; SI-NEXT:    s_setpc_b64 s[30:31]
    122 ;
    123 ; VI-LABEL: bzhi32_c1_indexzext:
    124 ; VI:       ; %bb.0:
    125 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
    126 ; VI-NEXT:    v_sub_u16_e32 v1, 32, v1
    127 ; VI-NEXT:    v_mov_b32_e32 v2, -1
    128 ; VI-NEXT:    v_lshrrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
    129 ; VI-NEXT:    v_and_b32_e32 v0, v1, v0
    130 ; VI-NEXT:    s_setpc_b64 s[30:31]
    131   %numhighbits = sub i8 32, %numlowbits
    132   %sh_prom = zext i8 %numhighbits to i32
    133   %mask = lshr i32 -1, %sh_prom
    134   %masked = and i32 %mask, %val
    135   ret i32 %masked
    136 }
    137 
    138 define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
    139 ; GCN-LABEL: bzhi32_c4_commutative:
    140 ; GCN:       ; %bb.0:
    141 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
    142 ; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
    143 ; GCN-NEXT:    s_setpc_b64 s[30:31]
    144   %numhighbits = sub i32 32, %numlowbits
    145   %mask = lshr i32 -1, %numhighbits
    146   %masked = and i32 %val, %mask ; swapped order
    147   ret i32 %masked
    148 }
    149 
    150 ; ---------------------------------------------------------------------------- ;
    151 ; Pattern d. 32-bit.
    152 ; ---------------------------------------------------------------------------- ;
    153 
    154 define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
    155 ; GCN-LABEL: bzhi32_d0:
    156 ; GCN:       ; %bb.0:
    157 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
    158 ; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
    159 ; GCN-NEXT:    s_setpc_b64 s[30:31]
    160   %numhighbits = sub i32 32, %numlowbits
    161   %highbitscleared = shl i32 %val, %numhighbits
    162   %masked = lshr i32 %highbitscleared, %numhighbits
    163   ret i32 %masked
    164 }
    165 
    166 define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
    167 ; SI-LABEL: bzhi32_d1_indexzext:
    168 ; SI:       ; %bb.0:
    169 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
    170 ; SI-NEXT:    v_sub_i32_e32 v1, vcc, 32, v1
    171 ; SI-NEXT:    v_and_b32_e32 v1, 0xff, v1
    172 ; SI-NEXT:    v_lshl_b32_e32 v0, v0, v1
    173 ; SI-NEXT:    v_lshr_b32_e32 v0, v0, v1
    174 ; SI-NEXT:    s_setpc_b64 s[30:31]
    175 ;
    176 ; VI-LABEL: bzhi32_d1_indexzext:
    177 ; VI:       ; %bb.0:
    178 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
    179 ; VI-NEXT:    v_sub_u16_e32 v1, 32, v1
    180 ; VI-NEXT:    v_and_b32_e32 v1, 0xff, v1
    181 ; VI-NEXT:    v_lshlrev_b32_e32 v0, v1, v0
    182 ; VI-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
    183 ; VI-NEXT:    s_setpc_b64 s[30:31]
    184   %numhighbits = sub i8 32, %numlowbits
    185   %sh_prom = zext i8 %numhighbits to i32
    186   %highbitscleared = shl i32 %val, %sh_prom
    187   %masked = lshr i32 %highbitscleared, %sh_prom
    188   ret i32 %masked
    189 }
    190