Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
      2 
      3 ; GCN-LABEL: {{^}}v_sad_u32_pat1:
      4 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
      5 define amdgpu_kernel void @v_sad_u32_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
      6   %icmp0 = icmp ugt i32 %a, %b
      7   %t0 = select i1 %icmp0, i32 %a, i32 %b
      8 
      9   %icmp1 = icmp ule i32 %a, %b
     10   %t1 = select i1 %icmp1, i32 %a, i32 %b
     11 
     12   %ret0 = sub i32 %t0, %t1
     13   %ret = add i32 %ret0, %c
     14 
     15   store i32 %ret, i32 addrspace(1)* %out
     16   ret void
     17 }
     18 
     19 ; GCN-LABEL: {{^}}v_sad_u32_constant_pat1:
     20 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, 20
     21 define amdgpu_kernel void @v_sad_u32_constant_pat1(i32 addrspace(1)* %out, i32 %a) {
     22   %icmp0 = icmp ugt i32 %a, 90
     23   %t0 = select i1 %icmp0, i32 %a, i32 90
     24 
     25   %icmp1 = icmp ule i32 %a, 90
     26   %t1 = select i1 %icmp1, i32 %a, i32 90
     27 
     28   %ret0 = sub i32 %t0, %t1
     29   %ret = add i32 %ret0, 20
     30 
     31   store i32 %ret, i32 addrspace(1)* %out
     32   ret void
     33 }
     34 
     35 ; GCN-LABEL: {{^}}v_sad_u32_pat2:
     36 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
     37 define amdgpu_kernel void @v_sad_u32_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
     38   %icmp0 = icmp ugt i32 %a, %b
     39   %sub0 = sub i32 %a, %b
     40   %sub1 = sub i32 %b, %a
     41   %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1
     42 
     43   %ret = add i32 %ret0, %c
     44 
     45   store i32 %ret, i32 addrspace(1)* %out
     46   ret void
     47 }
     48 
     49 ; GCN-LABEL: {{^}}v_sad_u32_multi_use_sub_pat1:
     50 ; GCN: s_max_u32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
     51 ; GCN: s_min_u32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
     52 ; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
     53 ; GCN: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
     54 define amdgpu_kernel void @v_sad_u32_multi_use_sub_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
     55   %icmp0 = icmp ugt i32 %a, %b
     56   %t0 = select i1 %icmp0, i32 %a, i32 %b
     57 
     58   %icmp1 = icmp ule i32 %a, %b
     59   %t1 = select i1 %icmp1, i32 %a, i32 %b
     60 
     61   %ret0 = sub i32 %t0, %t1
     62   store volatile i32 %ret0, i32  addrspace(5)*undef
     63   %ret = add i32 %ret0, %c
     64 
     65   store i32 %ret, i32 addrspace(1)* %out
     66   ret void
     67 }
     68 
     69 ; GCN-LABEL: {{^}}v_sad_u32_multi_use_add_pat1:
     70 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
     71 define amdgpu_kernel void @v_sad_u32_multi_use_add_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
     72   %icmp0 = icmp ugt i32 %a, %b
     73   %t0 = select i1 %icmp0, i32 %a, i32 %b
     74 
     75   %icmp1 = icmp ule i32 %a, %b
     76   %t1 = select i1 %icmp1, i32 %a, i32 %b
     77 
     78   %ret0 = sub i32 %t0, %t1
     79   %ret = add i32 %ret0, %c
     80   store volatile i32 %ret, i32  addrspace(5)*undef
     81   store i32 %ret, i32 addrspace(1)* %out
     82   ret void
     83 }
     84 
     85 ; GCN-LABEL: {{^}}v_sad_u32_multi_use_max_pat1:
     86 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
     87 define amdgpu_kernel void @v_sad_u32_multi_use_max_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
     88   %icmp0 = icmp ugt i32 %a, %b
     89   %t0 = select i1 %icmp0, i32 %a, i32 %b
     90   store volatile i32 %t0, i32  addrspace(5)*undef
     91 
     92   %icmp1 = icmp ule i32 %a, %b
     93   %t1 = select i1 %icmp1, i32 %a, i32 %b
     94 
     95   %ret0 = sub i32 %t0, %t1
     96   %ret = add i32 %ret0, %c
     97 
     98   store i32 %ret, i32 addrspace(1)* %out
     99   ret void
    100 }
    101 
    102 ; GCN-LABEL: {{^}}v_sad_u32_multi_use_min_pat1:
    103 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    104 define amdgpu_kernel void @v_sad_u32_multi_use_min_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
    105   %icmp0 = icmp ugt i32 %a, %b
    106   %t0 = select i1 %icmp0, i32 %a, i32 %b
    107 
    108   %icmp1 = icmp ule i32 %a, %b
    109   %t1 = select i1 %icmp1, i32 %a, i32 %b
    110 
    111   store volatile i32 %t1, i32  addrspace(5)*undef
    112 
    113   %ret0 = sub i32 %t0, %t1
    114   %ret = add i32 %ret0, %c
    115 
    116   store i32 %ret, i32 addrspace(1)* %out
    117   ret void
    118 }
    119 
    120 ; GCN-LABEL: {{^}}v_sad_u32_multi_use_sub_pat2:
    121 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    122 define amdgpu_kernel void @v_sad_u32_multi_use_sub_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
    123   %icmp0 = icmp ugt i32 %a, %b
    124   %sub0 = sub i32 %a, %b
    125   store volatile i32 %sub0, i32  addrspace(5)*undef
    126   %sub1 = sub i32 %b, %a
    127   %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1
    128 
    129   %ret = add i32 %ret0, %c
    130 
    131   store i32 %ret, i32 addrspace(1)* %out
    132   ret void
    133 }
    134 
    135 ; GCN-LABEL: {{^}}v_sad_u32_multi_use_select_pat2:
    136 ; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
    137 ; GCN-DAG: v_cmp_gt_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
    138 ; GCN-DAG: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
    139 define amdgpu_kernel void @v_sad_u32_multi_use_select_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
    140   %icmp0 = icmp ugt i32 %a, %b
    141   %sub0 = sub i32 %a, %b
    142   %sub1 = sub i32 %b, %a
    143   %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1
    144   store volatile i32 %ret0, i32  addrspace(5)*undef
    145 
    146   %ret = add i32 %ret0, %c
    147 
    148   store i32 %ret, i32 addrspace(1)* %out
    149   ret void
    150 }
    151 
    152 ; GCN-LABEL: {{^}}v_sad_u32_vector_pat1:
    153 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    154 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    155 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    156 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    157 define amdgpu_kernel void @v_sad_u32_vector_pat1(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
    158   %icmp0 = icmp ugt <4 x i32> %a, %b
    159   %t0 = select <4 x i1> %icmp0, <4 x i32> %a, <4 x i32> %b
    160 
    161   %icmp1 = icmp ule <4 x i32> %a, %b
    162   %t1 = select <4 x i1> %icmp1, <4 x i32> %a, <4 x i32> %b
    163 
    164   %ret0 = sub <4 x i32> %t0, %t1
    165   %ret = add <4 x i32> %ret0, %c
    166 
    167   store <4 x i32> %ret, <4 x i32> addrspace(1)* %out
    168   ret void
    169 }
    170 
    171 ; GCN-LABEL: {{^}}v_sad_u32_vector_pat2:
    172 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    173 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    174 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    175 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    176 define amdgpu_kernel void @v_sad_u32_vector_pat2(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
    177   %icmp0 = icmp ugt <4 x i32> %a, %b
    178   %sub0 = sub <4 x i32> %a, %b
    179   %sub1 = sub <4 x i32> %b, %a
    180   %ret0 = select <4 x i1> %icmp0, <4 x i32> %sub0, <4 x i32> %sub1
    181 
    182   %ret = add <4 x i32> %ret0, %c
    183 
    184   store <4 x i32> %ret, <4 x i32> addrspace(1)* %out
    185   ret void
    186 }
    187 
    188 ; GCN-LABEL: {{^}}v_sad_u32_i16_pat1:
    189 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    190 define amdgpu_kernel void @v_sad_u32_i16_pat1(i16 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) {
    191 
    192   %icmp0 = icmp ugt i16 %a, %b
    193   %t0 = select i1 %icmp0, i16 %a, i16 %b
    194 
    195   %icmp1 = icmp ule i16 %a, %b
    196   %t1 = select i1 %icmp1, i16 %a, i16 %b
    197 
    198   %ret0 = sub i16 %t0, %t1
    199   %ret = add i16 %ret0, %c
    200 
    201   store i16 %ret, i16 addrspace(1)* %out
    202   ret void
    203 }
    204 
    205 ; GCN-LABEL: {{^}}v_sad_u32_i16_pat2:
    206 ; GCN: v_sad_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    207 define amdgpu_kernel void @v_sad_u32_i16_pat2(i16 addrspace(1)* %out) {
    208   %a = load volatile i16, i16 addrspace(1)* undef
    209   %b = load volatile i16, i16 addrspace(1)* undef
    210   %c = load volatile i16, i16 addrspace(1)* undef
    211   %icmp0 = icmp ugt i16 %a, %b
    212   %sub0 = sub i16 %a, %b
    213   %sub1 = sub i16 %b, %a
    214   %ret0 = select i1 %icmp0, i16 %sub0, i16 %sub1
    215 
    216   %ret = add i16 %ret0, %c
    217 
    218   store i16 %ret, i16 addrspace(1)* %out
    219   ret void
    220 }
    221 
    222 ; GCN-LABEL: {{^}}v_sad_u32_i8_pat1:
    223 ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    224 define amdgpu_kernel void @v_sad_u32_i8_pat1(i8 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) {
    225   %icmp0 = icmp ugt i8 %a, %b
    226   %t0 = select i1 %icmp0, i8 %a, i8 %b
    227 
    228   %icmp1 = icmp ule i8 %a, %b
    229   %t1 = select i1 %icmp1, i8 %a, i8 %b
    230 
    231   %ret0 = sub i8 %t0, %t1
    232   %ret = add i8 %ret0, %c
    233 
    234   store i8 %ret, i8 addrspace(1)* %out
    235   ret void
    236 }
    237 
    238 ; GCN-LABEL: {{^}}v_sad_u32_i8_pat2:
    239 ; GCN: v_sad_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    240 define amdgpu_kernel void @v_sad_u32_i8_pat2(i8 addrspace(1)* %out) {
    241   %a = load volatile i8, i8 addrspace(1)* undef
    242   %b = load volatile i8, i8 addrspace(1)* undef
    243   %c = load volatile i8, i8 addrspace(1)* undef
    244   %icmp0 = icmp ugt i8 %a, %b
    245   %sub0 = sub i8 %a, %b
    246   %sub1 = sub i8 %b, %a
    247   %ret0 = select i1 %icmp0, i8 %sub0, i8 %sub1
    248 
    249   %ret = add i8 %ret0, %c
    250 
    251   store i8 %ret, i8 addrspace(1)* %out
    252   ret void
    253 }
    254 
    255 ; GCN-LABEL: {{^}}s_sad_u32_i8_pat2:
    256 ; GCN: s_load_dword
    257 ; GCN: s_bfe_u32
    258 ; GCN: s_sub_i32
    259 ; GCN: s_and_b32
    260 ; GCN: s_sub_i32
    261 ; GCN: s_lshr_b32
    262 ; GCN: v_add_i32_e32
    263 define amdgpu_kernel void @s_sad_u32_i8_pat2(i8 addrspace(1)* %out, i8 zeroext %a, i8 zeroext %b, i8 zeroext %c) {
    264   %icmp0 = icmp ugt i8 %a, %b
    265   %sub0 = sub i8 %a, %b
    266   %sub1 = sub i8 %b, %a
    267   %ret0 = select i1 %icmp0, i8 %sub0, i8 %sub1
    268 
    269   %ret = add i8 %ret0, %c
    270 
    271   store i8 %ret, i8 addrspace(1)* %out
    272   ret void
    273 }
    274 
    275 ; GCN-LABEL: {{^}}v_sad_u32_mismatched_operands_pat1:
    276 ; GCN: v_cmp_le_u32_e32 vcc, s{{[0-9]+}}, v{{[0-9]+}}
    277 ; GCN: s_max_u32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
    278 ; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
    279 ; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
    280 define amdgpu_kernel void @v_sad_u32_mismatched_operands_pat1(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
    281   %icmp0 = icmp ugt i32 %a, %b
    282   %t0 = select i1 %icmp0, i32 %a, i32 %b
    283 
    284   %icmp1 = icmp ule i32 %a, %b
    285   %t1 = select i1 %icmp1, i32 %a, i32 %d
    286 
    287   %ret0 = sub i32 %t0, %t1
    288   %ret = add i32 %ret0, %c
    289 
    290   store i32 %ret, i32 addrspace(1)* %out
    291   ret void
    292 }
    293 
    294 ; GCN-LABEL: {{^}}v_sad_u32_mismatched_operands_pat2:
    295 ; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
    296 ; GCN: s_sub_i32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
    297 ; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}
    298 define amdgpu_kernel void @v_sad_u32_mismatched_operands_pat2(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
    299   %icmp0 = icmp ugt i32 %a, %b
    300   %sub0 = sub i32 %a, %d
    301   %sub1 = sub i32 %b, %a
    302   %ret0 = select i1 %icmp0, i32 %sub0, i32 %sub1
    303 
    304   %ret = add i32 %ret0, %c
    305 
    306   store i32 %ret, i32 addrspace(1)* %out
    307   ret void
    308 }
    309 
    310