Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SICIVI -check-prefix=SI %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SICIVI -check-prefix=VI %s
      3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
      4 
      5 declare i32 @llvm.amdgcn.workitem.id.x() #0
      6 
      7 ; GCN-LABEL: {{^}}v_test_umed3_r_i_i_i32:
      8 ; GCN: v_med3_u32 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
      9 define amdgpu_kernel void @v_test_umed3_r_i_i_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 {
     10   %tid = call i32 @llvm.amdgcn.workitem.id.x()
     11   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
     12   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
     13   %a = load i32, i32 addrspace(1)* %gep0
     14 
     15   %icmp0 = icmp ugt i32 %a, 12
     16   %i0 = select i1 %icmp0, i32 %a, i32 12
     17 
     18   %icmp1 = icmp ult i32 %i0, 17
     19   %i1 = select i1 %icmp1, i32 %i0, i32 17
     20 
     21   store i32 %i1, i32 addrspace(1)* %outgep
     22   ret void
     23 }
     24 
     25 ; GCN-LABEL: {{^}}v_test_umed3_multi_use_r_i_i_i32:
     26 ; GCN: v_max_u32
     27 ; GCN: v_min_u32
     28 define amdgpu_kernel void @v_test_umed3_multi_use_r_i_i_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 {
     29   %tid = call i32 @llvm.amdgcn.workitem.id.x()
     30   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
     31   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
     32   %a = load i32, i32 addrspace(1)* %gep0
     33 
     34   %icmp0 = icmp ugt i32 %a, 12
     35   %i0 = select i1 %icmp0, i32 %a, i32 12
     36 
     37   %icmp1 = icmp ult i32 %i0, 17
     38   %i1 = select i1 %icmp1, i32 %i0, i32 17
     39 
     40   store volatile i32 %i0, i32 addrspace(1)* %outgep
     41   store volatile i32 %i1, i32 addrspace(1)* %outgep
     42   ret void
     43 }
     44 
     45 ; GCN-LABEL: {{^}}v_test_umed3_r_i_i_constant_order_i32:
     46 ; GCN: v_max_u32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}}
     47 ; GCN: v_min_u32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}}
     48 define amdgpu_kernel void @v_test_umed3_r_i_i_constant_order_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 {
     49   %tid = call i32 @llvm.amdgcn.workitem.id.x()
     50   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
     51   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
     52   %a = load i32, i32 addrspace(1)* %gep0
     53 
     54   %icmp0 = icmp ugt i32 %a, 17
     55   %i0 = select i1 %icmp0, i32 %a, i32 17
     56 
     57   %icmp1 = icmp ult i32 %i0, 12
     58   %i1 = select i1 %icmp1, i32 %i0, i32 12
     59 
     60   store i32 %i1, i32 addrspace(1)* %outgep
     61   ret void
     62 }
     63 
     64 ; GCN-LABEL: {{^}}v_test_umed3_r_i_i_sign_mismatch_i32:
     65 ; GCN: v_max_i32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}}
     66 ; GCN: v_min_u32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}}
     67 define amdgpu_kernel void @v_test_umed3_r_i_i_sign_mismatch_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 {
     68   %tid = call i32 @llvm.amdgcn.workitem.id.x()
     69   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
     70   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
     71   %a = load i32, i32 addrspace(1)* %gep0
     72 
     73   %icmp0 = icmp sgt i32 %a, 12
     74   %i0 = select i1 %icmp0, i32 %a, i32 12
     75 
     76   %icmp1 = icmp ult i32 %i0, 17
     77   %i1 = select i1 %icmp1, i32 %i0, i32 17
     78 
     79   store i32 %i1, i32 addrspace(1)* %outgep
     80   ret void
     81 }
     82 
     83 ; GCN-LABEL: {{^}}v_test_umed3_r_i_i_i64:
     84 ; GCN: v_cmp_lt_u64
     85 ; GCN: v_cmp_gt_u64
     86 define amdgpu_kernel void @v_test_umed3_r_i_i_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) #1 {
     87   %tid = call i32 @llvm.amdgcn.workitem.id.x()
     88   %gep0 = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
     89   %outgep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
     90   %a = load i64, i64 addrspace(1)* %gep0
     91 
     92   %icmp0 = icmp ugt i64 %a, 12
     93   %i0 = select i1 %icmp0, i64 %a, i64 12
     94 
     95   %icmp1 = icmp ult i64 %i0, 17
     96   %i1 = select i1 %icmp1, i64 %i0, i64 17
     97 
     98   store i64 %i1, i64 addrspace(1)* %outgep
     99   ret void
    100 }
    101 
    102 ; GCN-LABEL: {{^}}v_test_umed3_r_i_i_i16:
    103 ; SICIVI: v_med3_u32 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
    104 ; GFX9: v_med3_u16 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
    105 define amdgpu_kernel void @v_test_umed3_r_i_i_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr) #1 {
    106   %tid = call i32 @llvm.amdgcn.workitem.id.x()
    107   %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
    108   %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
    109   %a = load i16, i16 addrspace(1)* %gep0
    110 
    111   %icmp0 = icmp ugt i16 %a, 12
    112   %i0 = select i1 %icmp0, i16 %a, i16 12
    113 
    114   %icmp1 = icmp ult i16 %i0, 17
    115   %i1 = select i1 %icmp1, i16 %i0, i16 17
    116 
    117   store i16 %i1, i16 addrspace(1)* %outgep
    118   ret void
    119 }
    120 
    121 define internal i32 @umin(i32 %x, i32 %y) #2 {
    122   %cmp = icmp ult i32 %x, %y
    123   %sel = select i1 %cmp, i32 %x, i32 %y
    124   ret i32 %sel
    125 }
    126 
    127 define internal i32 @umax(i32 %x, i32 %y) #2 {
    128   %cmp = icmp ugt i32 %x, %y
    129   %sel = select i1 %cmp, i32 %x, i32 %y
    130   ret i32 %sel
    131 }
    132 
    133 define internal i16 @umin16(i16 %x, i16 %y) #2 {
    134   %cmp = icmp ult i16 %x, %y
    135   %sel = select i1 %cmp, i16 %x, i16 %y
    136   ret i16 %sel
    137 }
    138 
    139 define internal i16 @umax16(i16 %x, i16 %y) #2 {
    140   %cmp = icmp ugt i16 %x, %y
    141   %sel = select i1 %cmp, i16 %x, i16 %y
    142   ret i16 %sel
    143 }
    144 
    145 define internal i8 @umin8(i8 %x, i8 %y) #2 {
    146   %cmp = icmp ult i8 %x, %y
    147   %sel = select i1 %cmp, i8 %x, i8 %y
    148   ret i8 %sel
    149 }
    150 
    151 define internal i8 @umax8(i8 %x, i8 %y) #2 {
    152   %cmp = icmp ugt i8 %x, %y
    153   %sel = select i1 %cmp, i8 %x, i8 %y
    154   ret i8 %sel
    155 }
    156 
    157 ; 16 combinations
    158 
    159 ; 0: max(min(x, y), min(max(x, y), z))
    160 ; 1: max(min(x, y), min(max(y, x), z))
    161 ; 2: max(min(x, y), min(z, max(x, y)))
    162 ; 3: max(min(x, y), min(z, max(y, x)))
    163 ; 4: max(min(y, x), min(max(x, y), z))
    164 ; 5: max(min(y, x), min(max(y, x), z))
    165 ; 6: max(min(y, x), min(z, max(x, y)))
    166 ; 7: max(min(y, x), min(z, max(y, x)))
    167 ;
    168 ; + commute outermost max
    169 
    170 
    171 ; FIXME: In these cases we probably should have used scalar operations
    172 ; instead.
    173 
    174 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_0:
    175 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    176 define amdgpu_kernel void @s_test_umed3_i32_pat_0(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    177 bb:
    178   %tmp0 = call i32 @umin(i32 %x, i32 %y)
    179   %tmp1 = call i32 @umax(i32 %x, i32 %y)
    180   %tmp2 = call i32 @umin(i32 %tmp1, i32 %z)
    181   %tmp3 = call i32 @umax(i32 %tmp0, i32 %tmp2)
    182   store i32 %tmp3, i32 addrspace(1)* %arg
    183   ret void
    184 }
    185 
    186 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_1:
    187 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    188 define amdgpu_kernel void @s_test_umed3_i32_pat_1(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    189 bb:
    190   %tmp0 = call i32 @umin(i32 %x, i32 %y)
    191   %tmp1 = call i32 @umax(i32 %y, i32 %x)
    192   %tmp2 = call i32 @umin(i32 %tmp1, i32 %z)
    193   %tmp3 = call i32 @umax(i32 %tmp0, i32 %tmp2)
    194   store i32 %tmp3, i32 addrspace(1)* %arg
    195   ret void
    196 }
    197 
    198 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_2:
    199 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    200 define amdgpu_kernel void @s_test_umed3_i32_pat_2(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    201 bb:
    202   %tmp0 = call i32 @umin(i32 %x, i32 %y)
    203   %tmp1 = call i32 @umax(i32 %x, i32 %y)
    204   %tmp2 = call i32 @umin(i32 %z, i32 %tmp1)
    205   %tmp3 = call i32 @umax(i32 %tmp0, i32 %tmp2)
    206   store i32 %tmp3, i32 addrspace(1)* %arg
    207   ret void
    208 }
    209 
    210 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_3:
    211 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    212 define amdgpu_kernel void @s_test_umed3_i32_pat_3(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    213 bb:
    214   %tmp0 = call i32 @umin(i32 %x, i32 %y)
    215   %tmp1 = call i32 @umax(i32 %y, i32 %x)
    216   %tmp2 = call i32 @umin(i32 %z, i32 %tmp1)
    217   %tmp3 = call i32 @umax(i32 %tmp0, i32 %tmp2)
    218   store i32 %tmp3, i32 addrspace(1)* %arg
    219   ret void
    220 }
    221 
    222 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_4:
    223 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    224 define amdgpu_kernel void @s_test_umed3_i32_pat_4(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    225 bb:
    226   %tmp0 = call i32 @umin(i32 %y, i32 %x)
    227   %tmp1 = call i32 @umax(i32 %x, i32 %y)
    228   %tmp2 = call i32 @umin(i32 %tmp1, i32 %z)
    229   %tmp3 = call i32 @umax(i32 %tmp0, i32 %tmp2)
    230   store i32 %tmp3, i32 addrspace(1)* %arg
    231   ret void
    232 }
    233 
    234 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_5:
    235 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    236 define amdgpu_kernel void @s_test_umed3_i32_pat_5(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    237 bb:
    238   %tmp0 = call i32 @umin(i32 %y, i32 %x)
    239   %tmp1 = call i32 @umax(i32 %y, i32 %x)
    240   %tmp2 = call i32 @umin(i32 %tmp1, i32 %z)
    241   %tmp3 = call i32 @umax(i32 %tmp0, i32 %tmp2)
    242   store i32 %tmp3, i32 addrspace(1)* %arg
    243   ret void
    244 }
    245 
    246 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_6:
    247 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    248 define amdgpu_kernel void @s_test_umed3_i32_pat_6(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    249 bb:
    250   %tmp0 = call i32 @umin(i32 %y, i32 %x)
    251   %tmp1 = call i32 @umax(i32 %x, i32 %y)
    252   %tmp2 = call i32 @umin(i32 %z, i32 %tmp1)
    253   %tmp3 = call i32 @umax(i32 %tmp0, i32 %tmp2)
    254   store i32 %tmp3, i32 addrspace(1)* %arg
    255   ret void
    256 }
    257 
    258 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_7:
    259 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    260 define amdgpu_kernel void @s_test_umed3_i32_pat_7(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    261 bb:
    262   %tmp0 = call i32 @umin(i32 %y, i32 %x)
    263   %tmp1 = call i32 @umax(i32 %y, i32 %x)
    264   %tmp2 = call i32 @umin(i32 %z, i32 %tmp1)
    265   %tmp3 = call i32 @umax(i32 %tmp0, i32 %tmp2)
    266   store i32 %tmp3, i32 addrspace(1)* %arg
    267   ret void
    268 }
    269 
    270 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_8:
    271 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    272 define amdgpu_kernel void @s_test_umed3_i32_pat_8(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    273 bb:
    274   %tmp0 = call i32 @umin(i32 %x, i32 %y)
    275   %tmp1 = call i32 @umax(i32 %x, i32 %y)
    276   %tmp2 = call i32 @umin(i32 %tmp1, i32 %z)
    277   %tmp3 = call i32 @umax(i32 %tmp2, i32 %tmp0)
    278   store i32 %tmp3, i32 addrspace(1)* %arg
    279   ret void
    280 }
    281 
    282 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_9:
    283 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    284 define amdgpu_kernel void @s_test_umed3_i32_pat_9(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    285 bb:
    286   %tmp0 = call i32 @umin(i32 %x, i32 %y)
    287   %tmp1 = call i32 @umax(i32 %y, i32 %x)
    288   %tmp2 = call i32 @umin(i32 %tmp1, i32 %z)
    289   %tmp3 = call i32 @umax(i32 %tmp2, i32 %tmp0)
    290   store i32 %tmp3, i32 addrspace(1)* %arg
    291   ret void
    292 }
    293 
    294 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_10:
    295 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    296 define amdgpu_kernel void @s_test_umed3_i32_pat_10(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    297 bb:
    298   %tmp0 = call i32 @umin(i32 %x, i32 %y)
    299   %tmp1 = call i32 @umax(i32 %x, i32 %y)
    300   %tmp2 = call i32 @umin(i32 %z, i32 %tmp1)
    301   %tmp3 = call i32 @umax(i32 %tmp2, i32 %tmp0)
    302   store i32 %tmp3, i32 addrspace(1)* %arg
    303   ret void
    304 }
    305 
    306 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_11:
    307 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    308 define amdgpu_kernel void @s_test_umed3_i32_pat_11(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    309 bb:
    310   %tmp0 = call i32 @umin(i32 %x, i32 %y)
    311   %tmp1 = call i32 @umax(i32 %y, i32 %x)
    312   %tmp2 = call i32 @umin(i32 %z, i32 %tmp1)
    313   %tmp3 = call i32 @umax(i32 %tmp2, i32 %tmp0)
    314   store i32 %tmp3, i32 addrspace(1)* %arg
    315   ret void
    316 }
    317 
    318 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_12:
    319 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    320 define amdgpu_kernel void @s_test_umed3_i32_pat_12(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    321 bb:
    322   %tmp0 = call i32 @umin(i32 %y, i32 %x)
    323   %tmp1 = call i32 @umax(i32 %x, i32 %y)
    324   %tmp2 = call i32 @umin(i32 %tmp1, i32 %z)
    325   %tmp3 = call i32 @umax(i32 %tmp2, i32 %tmp0)
    326   store i32 %tmp3, i32 addrspace(1)* %arg
    327   ret void
    328 }
    329 
    330 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_13:
    331 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    332 define amdgpu_kernel void @s_test_umed3_i32_pat_13(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    333 bb:
    334   %tmp0 = call i32 @umin(i32 %y, i32 %x)
    335   %tmp1 = call i32 @umax(i32 %y, i32 %x)
    336   %tmp2 = call i32 @umin(i32 %tmp1, i32 %z)
    337   %tmp3 = call i32 @umax(i32 %tmp2, i32 %tmp0)
    338   store i32 %tmp3, i32 addrspace(1)* %arg
    339   ret void
    340 }
    341 
    342 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_14:
    343 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    344 define amdgpu_kernel void @s_test_umed3_i32_pat_14(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    345 bb:
    346   %tmp0 = call i32 @umin(i32 %y, i32 %x)
    347   %tmp1 = call i32 @umax(i32 %x, i32 %y)
    348   %tmp2 = call i32 @umin(i32 %z, i32 %tmp1)
    349   %tmp3 = call i32 @umax(i32 %tmp2, i32 %tmp0)
    350   store i32 %tmp3, i32 addrspace(1)* %arg
    351   ret void
    352 }
    353 
    354 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_15:
    355 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    356 define amdgpu_kernel void @s_test_umed3_i32_pat_15(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    357 bb:
    358   %tmp0 = call i32 @umin(i32 %y, i32 %x)
    359   %tmp1 = call i32 @umax(i32 %y, i32 %x)
    360   %tmp2 = call i32 @umin(i32 %z, i32 %tmp1)
    361   %tmp3 = call i32 @umax(i32 %tmp2, i32 %tmp0)
    362   store i32 %tmp3, i32 addrspace(1)* %arg
    363   ret void
    364 }
    365 
    366 ; GCN-LABEL: {{^}}s_test_umed3_i16_pat_0:
    367 ; GCN: s_and_b32
    368 ; GCN: s_and_b32
    369 ; GCN: s_and_b32
    370 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    371 define amdgpu_kernel void @s_test_umed3_i16_pat_0(i16 addrspace(1)* %arg, [8 x i32], i16 %x, [8 x i32], i16 %y, [8 x i32], i16 %z) #1 {
    372 bb:
    373   %tmp0 = call i16 @umin16(i16 %x, i16 %y)
    374   %tmp1 = call i16 @umax16(i16 %x, i16 %y)
    375   %tmp2 = call i16 @umin16(i16 %tmp1, i16 %z)
    376   %tmp3 = call i16 @umax16(i16 %tmp0, i16 %tmp2)
    377   store i16 %tmp3, i16 addrspace(1)* %arg
    378   ret void
    379 }
    380 
    381 ; GCN-LABEL: {{^}}s_test_umed3_i8_pat_0:
    382 ; GCN: s_and_b32
    383 ; GCN: s_and_b32
    384 ; GCN: s_and_b32
    385 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    386 define amdgpu_kernel void @s_test_umed3_i8_pat_0(i8 addrspace(1)* %arg, [8 x i32], i8 %x, [8 x i32], i8 %y, [8 x i32], i8 %z) #1 {
    387 bb:
    388   %tmp0 = call i8 @umin8(i8 %x, i8 %y)
    389   %tmp1 = call i8 @umax8(i8 %x, i8 %y)
    390   %tmp2 = call i8 @umin8(i8 %tmp1, i8 %z)
    391   %tmp3 = call i8 @umax8(i8 %tmp0, i8 %tmp2)
    392   store i8 %tmp3, i8 addrspace(1)* %arg
    393   ret void
    394 }
    395 
    396 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_0_multi_use_0:
    397 ; GCN-NOT: v_med3_u32
    398 define amdgpu_kernel void @s_test_umed3_i32_pat_0_multi_use_0(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    399 bb:
    400   %tmp0 = call i32 @umin(i32 %x, i32 %y)
    401   %tmp1 = call i32 @umax(i32 %x, i32 %y)
    402   %tmp2 = call i32 @umin(i32 %tmp1, i32 %z)
    403   %tmp3 = call i32 @umax(i32 %tmp0, i32 %tmp2)
    404   store volatile i32 %tmp0, i32 addrspace(1)* %arg
    405   store volatile i32 %tmp3, i32 addrspace(1)* %arg
    406   ret void
    407 }
    408 
    409 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_0_multi_use_1:
    410 ; GCN-NOT: v_med3_u32
    411 define amdgpu_kernel void @s_test_umed3_i32_pat_0_multi_use_1(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    412 bb:
    413   %tmp0 = call i32 @umin(i32 %x, i32 %y)
    414   %tmp1 = call i32 @umax(i32 %x, i32 %y)
    415   %tmp2 = call i32 @umin(i32 %tmp1, i32 %z)
    416   %tmp3 = call i32 @umax(i32 %tmp0, i32 %tmp2)
    417   store volatile i32 %tmp1, i32 addrspace(1)* %arg
    418   store volatile i32 %tmp3, i32 addrspace(1)* %arg
    419   ret void
    420 }
    421 
    422 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_0_multi_use_2:
    423 ; GCN-NOT: v_med3_u32
    424 define amdgpu_kernel void @s_test_umed3_i32_pat_0_multi_use_2(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    425 bb:
    426   %tmp0 = call i32 @umin(i32 %x, i32 %y)
    427   %tmp1 = call i32 @umax(i32 %x, i32 %y)
    428   %tmp2 = call i32 @umin(i32 %tmp1, i32 %z)
    429   %tmp3 = call i32 @umax(i32 %tmp0, i32 %tmp2)
    430   store volatile i32 %tmp2, i32 addrspace(1)* %arg
    431   store volatile i32 %tmp3, i32 addrspace(1)* %arg
    432   ret void
    433 }
    434 
    435 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_0_multi_use_result:
    436 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    437 define amdgpu_kernel void @s_test_umed3_i32_pat_0_multi_use_result(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    438 bb:
    439   %tmp0 = call i32 @umin(i32 %x, i32 %y)
    440   %tmp1 = call i32 @umax(i32 %x, i32 %y)
    441   %tmp2 = call i32 @umin(i32 %tmp1, i32 %z)
    442   %tmp3 = call i32 @umax(i32 %tmp0, i32 %tmp2)
    443   store volatile i32 %tmp3, i32 addrspace(1)* %arg
    444   store volatile i32 %tmp3, i32 addrspace(1)* %arg
    445   ret void
    446 }
    447 
    448 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_0_imm_src0:
    449 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, 1, v{{[0-9]+}}
    450 define amdgpu_kernel void @s_test_umed3_i32_pat_0_imm_src0(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    451 bb:
    452   %tmp0 = call i32 @umin(i32 1, i32 %y)
    453   %tmp1 = call i32 @umax(i32 1, i32 %y)
    454   %tmp2 = call i32 @umin(i32 %tmp1, i32 %z)
    455   %tmp3 = call i32 @umax(i32 %tmp0, i32 %tmp2)
    456   store i32 %tmp3, i32 addrspace(1)* %arg
    457   ret void
    458 }
    459 
    460 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_0_imm_src1:
    461 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, 2, v{{[0-9]+}}
    462 define amdgpu_kernel void @s_test_umed3_i32_pat_0_imm_src1(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    463 bb:
    464   %tmp0 = call i32 @umin(i32 %x, i32 2)
    465   %tmp1 = call i32 @umax(i32 %x, i32 2)
    466   %tmp2 = call i32 @umin(i32 %tmp1, i32 %z)
    467   %tmp3 = call i32 @umax(i32 %tmp0, i32 %tmp2)
    468   store i32 %tmp3, i32 addrspace(1)* %arg
    469   ret void
    470 }
    471 
    472 ; GCN-LABEL: {{^}}s_test_umed3_i32_pat_0_imm_src2:
    473 ; GCN: v_med3_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, 9
    474 define amdgpu_kernel void @s_test_umed3_i32_pat_0_imm_src2(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) #1 {
    475 bb:
    476   %tmp0 = call i32 @umin(i32 %x, i32 %y)
    477   %tmp1 = call i32 @umax(i32 %x, i32 %y)
    478   %tmp2 = call i32 @umin(i32 %tmp1, i32 9)
    479   %tmp3 = call i32 @umax(i32 %tmp0, i32 %tmp2)
    480   store i32 %tmp3, i32 addrspace(1)* %arg
    481   ret void
    482 }
    483 
    484 ; GCN-LABEL: {{^}}v_test_umed3_i16_pat_0:
    485 ; SI: v_med3_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    486 
    487 ; FIXME: VI not matching med3
    488 ; VI: v_min_u16
    489 ; VI: v_max_u16
    490 ; VI: v_min_u16
    491 ; VI: v_max_u16
    492 
    493 ; GFX9: v_med3_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    494 define amdgpu_kernel void @v_test_umed3_i16_pat_0(i16 addrspace(1)* %arg, i16 addrspace(1)* %out, i16 addrspace(1)* %a.ptr) #1 {
    495 bb:
    496   %tid = call i32 @llvm.amdgcn.workitem.id.x()
    497   %gep0 = getelementptr inbounds i16, i16 addrspace(1)* %a.ptr, i32 %tid
    498   %gep1 = getelementptr inbounds i16, i16 addrspace(1)* %gep0, i32 3
    499   %gep2 = getelementptr inbounds i16, i16 addrspace(1)* %gep0, i32 8
    500   %out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
    501   %x = load i16, i16 addrspace(1)* %gep0
    502   %y = load i16, i16 addrspace(1)* %gep1
    503   %z = load i16, i16 addrspace(1)* %gep2
    504 
    505   %tmp0 = call i16 @umin16(i16 %x, i16 %y)
    506   %tmp1 = call i16 @umax16(i16 %x, i16 %y)
    507   %tmp2 = call i16 @umin16(i16 %tmp1, i16 %z)
    508   %tmp3 = call i16 @umax16(i16 %tmp0, i16 %tmp2)
    509   store i16 %tmp3, i16 addrspace(1)* %out.gep
    510   ret void
    511 }
    512 
    513 attributes #0 = { nounwind readnone }
    514 attributes #1 = { nounwind }
    515 attributes #2 = { nounwind readnone alwaysinline }
    516