Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
      2 
      3 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
      4 declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1) nounwind readnone
      5 declare { double, i1 } @llvm.amdgcn.div.scale.f64(double, double, i1) nounwind readnone
      6 declare float @llvm.fabs.f32(float) nounwind readnone
      7 
      8 ; SI-LABEL: {{^}}test_div_scale_f32_1:
      9 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
     10 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
     11 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
     12 ; SI: buffer_store_dword [[RESULT0]]
     13 ; SI: s_endpgm
     14 define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
     15   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     16   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
     17   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
     18 
     19   %a = load volatile float, float addrspace(1)* %gep.0, align 4
     20   %b = load volatile float, float addrspace(1)* %gep.1, align 4
     21 
     22   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
     23   %result0 = extractvalue { float, i1 } %result, 0
     24   store float %result0, float addrspace(1)* %out, align 4
     25   ret void
     26 }
     27 
     28 ; SI-LABEL: {{^}}test_div_scale_f32_2:
     29 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
     30 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
     31 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
     32 ; SI: buffer_store_dword [[RESULT0]]
     33 ; SI: s_endpgm
     34 define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
     35   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     36   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
     37   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
     38 
     39   %a = load volatile float, float addrspace(1)* %gep.0, align 4
     40   %b = load volatile float, float addrspace(1)* %gep.1, align 4
     41 
     42   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
     43   %result0 = extractvalue { float, i1 } %result, 0
     44   store float %result0, float addrspace(1)* %out, align 4
     45   ret void
     46 }
     47 
     48 ; SI-LABEL: {{^}}test_div_scale_f64_1:
     49 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
     50 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
     51 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
     52 ; SI: buffer_store_dwordx2 [[RESULT0]]
     53 ; SI: s_endpgm
     54 define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
     55   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     56   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
     57   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
     58 
     59   %a = load volatile double, double addrspace(1)* %gep.0, align 8
     60   %b = load volatile double, double addrspace(1)* %gep.1, align 8
     61 
     62   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
     63   %result0 = extractvalue { double, i1 } %result, 0
     64   store double %result0, double addrspace(1)* %out, align 8
     65   ret void
     66 }
     67 
     68 ; SI-LABEL: {{^}}test_div_scale_f64_2:
     69 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
     70 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
     71 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
     72 ; SI: buffer_store_dwordx2 [[RESULT0]]
     73 ; SI: s_endpgm
     74 define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
     75   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     76   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
     77   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
     78 
     79   %a = load volatile double, double addrspace(1)* %gep.0, align 8
     80   %b = load volatile double, double addrspace(1)* %gep.1, align 8
     81 
     82   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
     83   %result0 = extractvalue { double, i1 } %result, 0
     84   store double %result0, double addrspace(1)* %out, align 8
     85   ret void
     86 }
     87 
     88 ; SI-LABEL: {{^}}test_div_scale_f32_scalar_num_1:
     89 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]]
     90 ; SI-DAG: s_load_dword [[A:s[0-9]+]]
     91 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
     92 ; SI: buffer_store_dword [[RESULT0]]
     93 ; SI: s_endpgm
     94 define void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
     95   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
     96   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
     97 
     98   %b = load float, float addrspace(1)* %gep, align 4
     99 
    100   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
    101   %result0 = extractvalue { float, i1 } %result, 0
    102   store float %result0, float addrspace(1)* %out, align 4
    103   ret void
    104 }
    105 
    106 ; SI-LABEL: {{^}}test_div_scale_f32_scalar_num_2:
    107 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]]
    108 ; SI-DAG: s_load_dword [[A:s[0-9]+]]
    109 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
    110 ; SI: buffer_store_dword [[RESULT0]]
    111 ; SI: s_endpgm
    112 define void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
    113   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
    114   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
    115 
    116   %b = load float, float addrspace(1)* %gep, align 4
    117 
    118   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
    119   %result0 = extractvalue { float, i1 } %result, 0
    120   store float %result0, float addrspace(1)* %out, align 4
    121   ret void
    122 }
    123 
    124 ; SI-LABEL: {{^}}test_div_scale_f32_scalar_den_1:
    125 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]]
    126 ; SI-DAG: s_load_dword [[B:s[0-9]+]]
    127 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
    128 ; SI: buffer_store_dword [[RESULT0]]
    129 ; SI: s_endpgm
    130 define void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
    131   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
    132   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
    133 
    134   %a = load float, float addrspace(1)* %gep, align 4
    135 
    136   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
    137   %result0 = extractvalue { float, i1 } %result, 0
    138   store float %result0, float addrspace(1)* %out, align 4
    139   ret void
    140 }
    141 
    142 ; SI-LABEL: {{^}}test_div_scale_f32_scalar_den_2:
    143 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]]
    144 ; SI-DAG: s_load_dword [[B:s[0-9]+]]
    145 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
    146 ; SI: buffer_store_dword [[RESULT0]]
    147 ; SI: s_endpgm
    148 define void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
    149   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
    150   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
    151 
    152   %a = load float, float addrspace(1)* %gep, align 4
    153 
    154   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
    155   %result0 = extractvalue { float, i1 } %result, 0
    156   store float %result0, float addrspace(1)* %out, align 4
    157   ret void
    158 }
    159 
    160 ; SI-LABEL: {{^}}test_div_scale_f64_scalar_num_1:
    161 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
    162 ; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
    163 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
    164 ; SI: buffer_store_dwordx2 [[RESULT0]]
    165 ; SI: s_endpgm
    166 define void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
    167   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
    168   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
    169 
    170   %b = load double, double addrspace(1)* %gep, align 8
    171 
    172   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
    173   %result0 = extractvalue { double, i1 } %result, 0
    174   store double %result0, double addrspace(1)* %out, align 8
    175   ret void
    176 }
    177 
    178 ; SI-LABEL: {{^}}test_div_scale_f64_scalar_num_2:
    179 ; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
    180 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
    181 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
    182 ; SI: buffer_store_dwordx2 [[RESULT0]]
    183 ; SI: s_endpgm
    184 define void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
    185   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
    186   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
    187 
    188   %b = load double, double addrspace(1)* %gep, align 8
    189 
    190   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
    191   %result0 = extractvalue { double, i1 } %result, 0
    192   store double %result0, double addrspace(1)* %out, align 8
    193   ret void
    194 }
    195 
    196 ; SI-LABEL: {{^}}test_div_scale_f64_scalar_den_1:
    197 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
    198 ; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
    199 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
    200 ; SI: buffer_store_dwordx2 [[RESULT0]]
    201 ; SI: s_endpgm
    202 define void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
    203   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
    204   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
    205 
    206   %a = load double, double addrspace(1)* %gep, align 8
    207 
    208   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
    209   %result0 = extractvalue { double, i1 } %result, 0
    210   store double %result0, double addrspace(1)* %out, align 8
    211   ret void
    212 }
    213 
    214 ; SI-LABEL: {{^}}test_div_scale_f64_scalar_den_2:
    215 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
    216 ; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
    217 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
    218 ; SI: buffer_store_dwordx2 [[RESULT0]]
    219 ; SI: s_endpgm
    220 define void @test_div_scale_f64_scalar_den_2(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
    221   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
    222   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
    223 
    224   %a = load double, double addrspace(1)* %gep, align 8
    225 
    226   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
    227   %result0 = extractvalue { double, i1 } %result, 0
    228   store double %result0, double addrspace(1)* %out, align 8
    229   ret void
    230 }
    231 
    232 ; SI-LABEL: {{^}}test_div_scale_f32_all_scalar_1:
    233 ; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
    234 ; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
    235 ; SI: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
    236 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[VA]]
    237 ; SI: buffer_store_dword [[RESULT0]]
    238 ; SI: s_endpgm
    239 define void @test_div_scale_f32_all_scalar_1(float addrspace(1)* %out, float %a, float %b) nounwind {
    240   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
    241   %result0 = extractvalue { float, i1 } %result, 0
    242   store float %result0, float addrspace(1)* %out, align 4
    243   ret void
    244 }
    245 
    246 ; SI-LABEL: {{^}}test_div_scale_f32_all_scalar_2:
    247 ; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
    248 ; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
    249 ; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
    250 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[VB]], [[A]]
    251 ; SI: buffer_store_dword [[RESULT0]]
    252 ; SI: s_endpgm
    253 define void @test_div_scale_f32_all_scalar_2(float addrspace(1)* %out, float %a, float %b) nounwind {
    254   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
    255   %result0 = extractvalue { float, i1 } %result, 0
    256   store float %result0, float addrspace(1)* %out, align 4
    257   ret void
    258 }
    259 
    260 ; SI-LABEL: {{^}}test_div_scale_f64_all_scalar_1:
    261 ; SI-DAG: s_load_dwordx2 s{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb
    262 ; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
    263 ; SI-DAG: v_mov_b32_e32 v[[VA_LO:[0-9]+]], s[[A_LO]]
    264 ; SI-DAG: v_mov_b32_e32 v[[VA_HI:[0-9]+]], s[[A_HI]]
    265 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], v{{\[}}[[VA_LO]]:[[VA_HI]]{{\]}}
    266 ; SI: buffer_store_dwordx2 [[RESULT0]]
    267 ; SI: s_endpgm
    268 define void @test_div_scale_f64_all_scalar_1(double addrspace(1)* %out, double %a, double %b) nounwind {
    269   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
    270   %result0 = extractvalue { double, i1 } %result, 0
    271   store double %result0, double addrspace(1)* %out, align 8
    272   ret void
    273 }
    274 
    275 ; SI-LABEL: {{^}}test_div_scale_f64_all_scalar_2:
    276 ; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
    277 ; SI-DAG: s_load_dwordx2 s{{\[}}[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xd
    278 ; SI-DAG: v_mov_b32_e32 v[[VB_LO:[0-9]+]], s[[B_LO]]
    279 ; SI-DAG: v_mov_b32_e32 v[[VB_HI:[0-9]+]], s[[B_HI]]
    280 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], v{{\[}}[[VB_LO]]:[[VB_HI]]{{\]}}, [[A]]
    281 ; SI: buffer_store_dwordx2 [[RESULT0]]
    282 ; SI: s_endpgm
    283 define void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, double %a, double %b) nounwind {
    284   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
    285   %result0 = extractvalue { double, i1 } %result, 0
    286   store double %result0, double addrspace(1)* %out, align 8
    287   ret void
    288 }
    289 
    290 ; SI-LABEL: {{^}}test_div_scale_f32_inline_imm_num:
    291 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
    292 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[A]], 1.0
    293 ; SI: buffer_store_dword [[RESULT0]]
    294 ; SI: s_endpgm
    295 define void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
    296   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
    297   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
    298   %a = load float, float addrspace(1)* %gep.0, align 4
    299 
    300   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone
    301   %result0 = extractvalue { float, i1 } %result, 0
    302   store float %result0, float addrspace(1)* %out, align 4
    303   ret void
    304 }
    305 
    306 ; SI-LABEL: {{^}}test_div_scale_f32_inline_imm_den:
    307 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
    308 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], 2.0, 2.0, [[A]]
    309 ; SI: buffer_store_dword [[RESULT0]]
    310 ; SI: s_endpgm
    311 define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
    312   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
    313   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
    314   %a = load float, float addrspace(1)* %gep.0, align 4
    315 
    316   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone
    317   %result0 = extractvalue { float, i1 } %result, 0
    318   store float %result0, float addrspace(1)* %out, align 4
    319   ret void
    320 }
    321 
    322 ; SI-LABEL: {{^}}test_div_scale_f32_fabs_num:
    323 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
    324 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
    325 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], |[[A]]|
    326 ; SI: buffer_store_dword [[RESULT0]]
    327 ; SI: s_endpgm
    328 define void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
    329   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
    330   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
    331   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
    332 
    333   %a = load volatile float, float addrspace(1)* %gep.0, align 4
    334   %b = load volatile float, float addrspace(1)* %gep.1, align 4
    335 
    336   %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
    337 
    338   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a.fabs, float %b, i1 false) nounwind readnone
    339   %result0 = extractvalue { float, i1 } %result, 0
    340   store float %result0, float addrspace(1)* %out, align 4
    341   ret void
    342 }
    343 
    344 ; SI-LABEL: {{^}}test_div_scale_f32_fabs_den:
    345 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
    346 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
    347 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], |[[B]]|, |[[B]]|, [[A]]
    348 ; SI: buffer_store_dword [[RESULT0]]
    349 ; SI: s_endpgm
    350 define void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
    351   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
    352   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
    353   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
    354 
    355   %a = load volatile float, float addrspace(1)* %gep.0, align 4
    356   %b = load volatile float, float addrspace(1)* %gep.1, align 4
    357 
    358   %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
    359 
    360   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b.fabs, i1 false) nounwind readnone
    361   %result0 = extractvalue { float, i1 } %result, 0
    362   store float %result0, float addrspace(1)* %out, align 4
    363   ret void
    364 }
    365