Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
      2 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
      3 
      4 ; GCN-LABEL: {{^}}add_select_fabs_fabs_f32:
      5 ; GCN: buffer_load_dword [[X:v[0-9]+]]
      6 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
      7 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
      8 
      9 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
     10 ; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
     11 define amdgpu_kernel void @add_select_fabs_fabs_f32(i32 %c) #0 {
     12   %x = load volatile float, float addrspace(1)* undef
     13   %y = load volatile float, float addrspace(1)* undef
     14   %z = load volatile float, float addrspace(1)* undef
     15   %cmp = icmp eq i32 %c, 0
     16   %fabs.x = call float @llvm.fabs.f32(float %x)
     17   %fabs.y = call float @llvm.fabs.f32(float %y)
     18   %select = select i1 %cmp, float %fabs.x, float %fabs.y
     19   %add = fadd float %select, %z
     20   store float %add, float addrspace(1)* undef
     21   ret void
     22 }
     23 
     24 ; GCN-LABEL: {{^}}add_select_multi_use_lhs_fabs_fabs_f32:
     25 ; GCN: buffer_load_dword [[X:v[0-9]+]]
     26 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
     27 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
     28 ; GCN: buffer_load_dword [[W:v[0-9]+]]
     29 
     30 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
     31 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
     32 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[X]]|, [[W]]
     33 define amdgpu_kernel void @add_select_multi_use_lhs_fabs_fabs_f32(i32 %c) #0 {
     34   %x = load volatile float, float addrspace(1)* undef
     35   %y = load volatile float, float addrspace(1)* undef
     36   %z = load volatile float, float addrspace(1)* undef
     37   %w = load volatile float, float addrspace(1)* undef
     38   %cmp = icmp eq i32 %c, 0
     39   %fabs.x = call float @llvm.fabs.f32(float %x)
     40   %fabs.y = call float @llvm.fabs.f32(float %y)
     41   %select = select i1 %cmp, float %fabs.x, float %fabs.y
     42   %add0 = fadd float %select, %z
     43   %add1 = fadd float %fabs.x, %w
     44   store volatile float %add0, float addrspace(1)* undef
     45   store volatile float %add1, float addrspace(1)* undef
     46   ret void
     47 }
     48 
     49 ; GCN-LABEL: {{^}}add_select_multi_store_use_lhs_fabs_fabs_f32:
     50 ; GCN: buffer_load_dword [[X:v[0-9]+]]
     51 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
     52 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
     53 
     54 ; GCN-DAG: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
     55 ; GCN-DAG: v_add_f32_e64 [[ADD:v[0-9]+]], |[[SELECT]]|, [[Z]]
     56 ; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
     57 
     58 ; GCN: buffer_store_dword [[ADD]]
     59 ; GCN: buffer_store_dword [[X_ABS]]
     60 define amdgpu_kernel void @add_select_multi_store_use_lhs_fabs_fabs_f32(i32 %c) #0 {
     61   %x = load volatile float, float addrspace(1)* undef
     62   %y = load volatile float, float addrspace(1)* undef
     63   %z = load volatile float, float addrspace(1)* undef
     64   %cmp = icmp eq i32 %c, 0
     65   %fabs.x = call float @llvm.fabs.f32(float %x)
     66   %fabs.y = call float @llvm.fabs.f32(float %y)
     67   %select = select i1 %cmp, float %fabs.x, float %fabs.y
     68   %add0 = fadd float %select, %z
     69   store volatile float %add0, float addrspace(1)* undef
     70   store volatile float %fabs.x, float addrspace(1)* undef
     71   ret void
     72 }
     73 
     74 ; GCN-LABEL: {{^}}add_select_multi_use_rhs_fabs_fabs_f32:
     75 ; GCN: buffer_load_dword [[X:v[0-9]+]]
     76 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
     77 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
     78 ; GCN: buffer_load_dword [[W:v[0-9]+]]
     79 
     80 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
     81 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
     82 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[Y]]|, [[W]]
     83 define amdgpu_kernel void @add_select_multi_use_rhs_fabs_fabs_f32(i32 %c) #0 {
     84   %x = load volatile float, float addrspace(1)* undef
     85   %y = load volatile float, float addrspace(1)* undef
     86   %z = load volatile float, float addrspace(1)* undef
     87   %w = load volatile float, float addrspace(1)* undef
     88   %cmp = icmp eq i32 %c, 0
     89   %fabs.x = call float @llvm.fabs.f32(float %x)
     90   %fabs.y = call float @llvm.fabs.f32(float %y)
     91   %select = select i1 %cmp, float %fabs.x, float %fabs.y
     92   %add0 = fadd float %select, %z
     93   %add1 = fadd float %fabs.y, %w
     94   store volatile float %add0, float addrspace(1)* undef
     95   store volatile float %add1, float addrspace(1)* undef
     96   ret void
     97 }
     98 
     99 ; GCN-LABEL: {{^}}add_select_fabs_var_f32:
    100 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    101 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    102 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
    103 
    104 ; GCN: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
    105 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X_ABS]], vcc
    106 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
    107 define amdgpu_kernel void @add_select_fabs_var_f32(i32 %c) #0 {
    108   %x = load volatile float, float addrspace(1)* undef
    109   %y = load volatile float, float addrspace(1)* undef
    110   %z = load volatile float, float addrspace(1)* undef
    111   %cmp = icmp eq i32 %c, 0
    112   %fabs.x = call float @llvm.fabs.f32(float %x)
    113   %select = select i1 %cmp, float %fabs.x, float %y
    114   %add = fadd float %select, %z
    115   store volatile float %add, float addrspace(1)* undef
    116   ret void
    117 }
    118 
    119 ; GCN-LABEL: {{^}}add_select_fabs_negk_f32:
    120 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    121 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    122 
    123 ; GCN: v_and_b32_e32 [[FABS_X:v[0-9]+]], 0x7fffffff, [[X]]
    124 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[FABS_X]], vcc
    125 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
    126 define amdgpu_kernel void @add_select_fabs_negk_f32(i32 %c) #0 {
    127   %x = load volatile float, float addrspace(1)* undef
    128   %y = load volatile float, float addrspace(1)* undef
    129   %cmp = icmp eq i32 %c, 0
    130   %fabs = call float @llvm.fabs.f32(float %x)
    131   %select = select i1 %cmp, float %fabs, float -1.0
    132   %add = fadd float %select, %y
    133   store volatile float %add, float addrspace(1)* undef
    134   ret void
    135 }
    136 
    137 ; FIXME: fabs should fold away
    138 ; GCN-LABEL: {{^}}add_select_fabs_negk_negk_f32:
    139 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    140 
    141 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
    142 ; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[X]]
    143 define amdgpu_kernel void @add_select_fabs_negk_negk_f32(i32 %c) #0 {
    144   %x = load volatile float, float addrspace(1)* undef
    145   %cmp = icmp eq i32 %c, 0
    146   %select = select i1 %cmp, float -2.0, float -1.0
    147   %fabs = call float @llvm.fabs.f32(float %select)
    148   %add = fadd float %fabs, %x
    149   store volatile float %add, float addrspace(1)* undef
    150   ret void
    151 }
    152 
    153 ; GCN-LABEL: {{^}}add_select_posk_posk_f32:
    154 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    155 
    156 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, 2.0, s
    157 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
    158 define amdgpu_kernel void @add_select_posk_posk_f32(i32 %c) #0 {
    159   %x = load volatile float, float addrspace(1)* undef
    160   %cmp = icmp eq i32 %c, 0
    161   %select = select i1 %cmp, float 2.0, float 1.0
    162   %add = fadd float %select, %x
    163   store volatile float %add, float addrspace(1)* undef
    164   ret void
    165 }
    166 
    167 ; GCN-LABEL: {{^}}add_select_negk_fabs_f32:
    168 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    169 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    170 
    171 ; GCN-DAG: v_and_b32_e32 [[FABS_X:v[0-9]+]], 0x7fffffff, [[X]]
    172 ; GCN-DAG: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
    173 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[FABS_X]], vcc
    174 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
    175 define amdgpu_kernel void @add_select_negk_fabs_f32(i32 %c) #0 {
    176   %x = load volatile float, float addrspace(1)* undef
    177   %y = load volatile float, float addrspace(1)* undef
    178   %cmp = icmp eq i32 %c, 0
    179   %fabs = call float @llvm.fabs.f32(float %x)
    180   %select = select i1 %cmp, float -1.0, float %fabs
    181   %add = fadd float %select, %y
    182   store volatile float %add, float addrspace(1)* undef
    183   ret void
    184 }
    185 
    186 ; GCN-LABEL: {{^}}add_select_negliteralk_fabs_f32:
    187 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    188 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    189 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xc4800000
    190 
    191 ; GCN-DAG: v_and_b32_e32 [[FABS_X:v[0-9]+]], 0x7fffffff, [[X]]
    192 ; GCN-DAG: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
    193 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[FABS_X]], vcc
    194 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
    195 define amdgpu_kernel void @add_select_negliteralk_fabs_f32(i32 %c) #0 {
    196   %x = load volatile float, float addrspace(1)* undef
    197   %y = load volatile float, float addrspace(1)* undef
    198   %cmp = icmp eq i32 %c, 0
    199   %fabs = call float @llvm.fabs.f32(float %x)
    200   %select = select i1 %cmp, float -1024.0, float %fabs
    201   %add = fadd float %select, %y
    202   store volatile float %add, float addrspace(1)* undef
    203   ret void
    204 }
    205 
    206 ; GCN-LABEL: {{^}}add_select_fabs_posk_f32:
    207 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    208 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    209 
    210 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
    211 ; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Y]]
    212 define amdgpu_kernel void @add_select_fabs_posk_f32(i32 %c) #0 {
    213   %x = load volatile float, float addrspace(1)* undef
    214   %y = load volatile float, float addrspace(1)* undef
    215 
    216   %cmp = icmp eq i32 %c, 0
    217   %fabs = call float @llvm.fabs.f32(float %x)
    218   %select = select i1 %cmp, float %fabs, float 1.0
    219   %add = fadd float %select, %y
    220   store volatile float %add, float addrspace(1)* undef
    221   ret void
    222 }
    223 
    224 ; GCN-LABEL: {{^}}add_select_posk_fabs_f32:
    225 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    226 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    227 
    228 ; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
    229 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
    230 ; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Y]]
    231 define amdgpu_kernel void @add_select_posk_fabs_f32(i32 %c) #0 {
    232   %x = load volatile float, float addrspace(1)* undef
    233   %y = load volatile float, float addrspace(1)* undef
    234   %cmp = icmp eq i32 %c, 0
    235   %fabs = call float @llvm.fabs.f32(float %x)
    236   %select = select i1 %cmp, float 1.0, float %fabs
    237   %add = fadd float %select, %y
    238   store volatile float %add, float addrspace(1)* undef
    239   ret void
    240 }
    241 
    242 ; GCN-LABEL: {{^}}add_select_fneg_fneg_f32:
    243 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    244 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    245 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
    246 
    247 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
    248 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
    249 define amdgpu_kernel void @add_select_fneg_fneg_f32(i32 %c) #0 {
    250   %x = load volatile float, float addrspace(1)* undef
    251   %y = load volatile float, float addrspace(1)* undef
    252   %z = load volatile float, float addrspace(1)* undef
    253   %cmp = icmp eq i32 %c, 0
    254   %fneg.x = fsub float -0.0, %x
    255   %fneg.y = fsub float -0.0, %y
    256   %select = select i1 %cmp, float %fneg.x, float %fneg.y
    257   %add = fadd float %select, %z
    258   store volatile float %add, float addrspace(1)* undef
    259   ret void
    260 }
    261 
    262 ; GCN-LABEL: {{^}}add_select_multi_use_lhs_fneg_fneg_f32:
    263 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    264 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    265 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
    266 ; GCN: buffer_load_dword [[W:v[0-9]+]]
    267 
    268 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
    269 ; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
    270 ; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[W]], [[X]]
    271 define amdgpu_kernel void @add_select_multi_use_lhs_fneg_fneg_f32(i32 %c) #0 {
    272   %x = load volatile float, float addrspace(1)* undef
    273   %y = load volatile float, float addrspace(1)* undef
    274   %z = load volatile float, float addrspace(1)* undef
    275   %w = load volatile float, float addrspace(1)* undef
    276   %cmp = icmp eq i32 %c, 0
    277   %fneg.x = fsub float -0.0, %x
    278   %fneg.y = fsub float -0.0, %y
    279   %select = select i1 %cmp, float %fneg.x, float %fneg.y
    280   %add0 = fadd float %select, %z
    281   %add1 = fadd float %fneg.x, %w
    282   store volatile float %add0, float addrspace(1)* undef
    283   store volatile float %add1, float addrspace(1)* undef
    284   ret void
    285 }
    286 
    287 ; GCN-LABEL: {{^}}add_select_multi_store_use_lhs_fneg_fneg_f32:
    288 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    289 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    290 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
    291 
    292 ; GCN-DAG: v_xor_b32_e32 [[NEG_X:v[0-9]+]], 0x80000000, [[X]]
    293 ; GCN-DAG: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
    294 ; GCN-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[Z]], [[SELECT]]
    295 
    296 ; GCN: buffer_store_dword [[ADD]]
    297 ; GCN: buffer_store_dword [[NEG_X]]
    298 define amdgpu_kernel void @add_select_multi_store_use_lhs_fneg_fneg_f32(i32 %c) #0 {
    299   %x = load volatile float, float addrspace(1)* undef
    300   %y = load volatile float, float addrspace(1)* undef
    301   %z = load volatile float, float addrspace(1)* undef
    302   %cmp = icmp eq i32 %c, 0
    303   %fneg.x = fsub float -0.0, %x
    304   %fneg.y = fsub float -0.0, %y
    305   %select = select i1 %cmp, float %fneg.x, float %fneg.y
    306   %add0 = fadd float %select, %z
    307   store volatile float %add0, float addrspace(1)* undef
    308   store volatile float %fneg.x, float addrspace(1)* undef
    309   ret void
    310 }
    311 
    312 ; GCN-LABEL: {{^}}add_select_multi_use_rhs_fneg_fneg_f32:
    313 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    314 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    315 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
    316 ; GCN: buffer_load_dword [[W:v[0-9]+]]
    317 
    318 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
    319 ; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
    320 ; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[W]], [[Y]]
    321 define amdgpu_kernel void @add_select_multi_use_rhs_fneg_fneg_f32(i32 %c) #0 {
    322   %x = load volatile float, float addrspace(1)* undef
    323   %y = load volatile float, float addrspace(1)* undef
    324   %z = load volatile float, float addrspace(1)* undef
    325   %w = load volatile float, float addrspace(1)* undef
    326   %cmp = icmp eq i32 %c, 0
    327   %fneg.x = fsub float -0.0, %x
    328   %fneg.y = fsub float -0.0, %y
    329   %select = select i1 %cmp, float %fneg.x, float %fneg.y
    330   %add0 = fadd float %select, %z
    331   %add1 = fadd float %fneg.y, %w
    332   store volatile float %add0, float addrspace(1)* undef
    333   store volatile float %add1, float addrspace(1)* undef
    334   ret void
    335 }
    336 
    337 ; GCN-LABEL: {{^}}add_select_fneg_var_f32:
    338 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    339 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    340 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
    341 
    342 ; GCN: v_xor_b32_e32 [[X_NEG:v[0-9]+]], 0x80000000, [[X]]
    343 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X_NEG]], vcc
    344 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
    345 define amdgpu_kernel void @add_select_fneg_var_f32(i32 %c) #0 {
    346   %x = load volatile float, float addrspace(1)* undef
    347   %y = load volatile float, float addrspace(1)* undef
    348   %z = load volatile float, float addrspace(1)* undef
    349   %cmp = icmp eq i32 %c, 0
    350   %fneg.x = fsub float -0.0, %x
    351   %select = select i1 %cmp, float %fneg.x, float %y
    352   %add = fadd float %select, %z
    353   store volatile float %add, float addrspace(1)* undef
    354   ret void
    355 }
    356 
    357 ; GCN-LABEL: {{^}}add_select_fneg_negk_f32:
    358 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    359 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    360 
    361 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
    362 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
    363 define amdgpu_kernel void @add_select_fneg_negk_f32(i32 %c) #0 {
    364   %x = load volatile float, float addrspace(1)* undef
    365   %y = load volatile float, float addrspace(1)* undef
    366   %cmp = icmp eq i32 %c, 0
    367   %fneg.x = fsub float -0.0, %x
    368   %select = select i1 %cmp, float %fneg.x, float -1.0
    369   %add = fadd float %select, %y
    370   store volatile float %add, float addrspace(1)* undef
    371   ret void
    372 }
    373 
    374 ; GCN-LABEL: {{^}}add_select_fneg_inv2pi_f32:
    375 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    376 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    377 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983
    378 
    379 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc
    380 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
    381 define amdgpu_kernel void @add_select_fneg_inv2pi_f32(i32 %c) #0 {
    382   %x = load volatile float, float addrspace(1)* undef
    383   %y = load volatile float, float addrspace(1)* undef
    384   %cmp = icmp eq i32 %c, 0
    385   %fneg.x = fsub float -0.0, %x
    386   %select = select i1 %cmp, float %fneg.x, float 0x3FC45F3060000000
    387   %add = fadd float %select, %y
    388   store volatile float %add, float addrspace(1)* undef
    389   ret void
    390 }
    391 
    392 ; GCN-LABEL: {{^}}add_select_fneg_neginv2pi_f32:
    393 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    394 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    395 ; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983
    396 
    397 ; SI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc
    398 ; VI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0.15915494, [[X]], vcc
    399 
    400 ; GCN: v_sub_f32_e32 v{{[0-9]+}},  [[Y]], [[SELECT]]
    401 define amdgpu_kernel void @add_select_fneg_neginv2pi_f32(i32 %c) #0 {
    402   %x = load volatile float, float addrspace(1)* undef
    403   %y = load volatile float, float addrspace(1)* undef
    404   %cmp = icmp eq i32 %c, 0
    405   %fneg.x = fsub float -0.0, %x
    406   %select = select i1 %cmp, float %fneg.x, float 0xBFC45F3060000000
    407   %add = fadd float %select, %y
    408   store volatile float %add, float addrspace(1)* undef
    409   ret void
    410 }
    411 
    412 ; GCN-LABEL: {{^}}add_select_negk_negk_f32:
    413 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    414 
    415 ; GCN: v_cmp_eq_u32_e64
    416 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
    417 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
    418 define amdgpu_kernel void @add_select_negk_negk_f32(i32 %c) #0 {
    419   %x = load volatile float, float addrspace(1)* undef
    420   %cmp = icmp eq i32 %c, 0
    421   %select = select i1 %cmp, float -2.0, float -1.0
    422   %add = fadd float %select, %x
    423   store volatile float %add, float addrspace(1)* undef
    424   ret void
    425 }
    426 
    427 ; GCN-LABEL: {{^}}add_select_negliteralk_negliteralk_f32:
    428 ; GCN-DAG: v_mov_b32_e32 [[K0:v[0-9]+]], 0xc5000000
    429 ; GCN-DAG: v_mov_b32_e32 [[K1:v[0-9]+]], 0xc5800000
    430 ; GCN-DAG: buffer_load_dword [[X:v[0-9]+]]
    431 
    432 ; GCN: v_cmp_eq_u32_e64
    433 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K1]], [[K0]], vcc
    434 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
    435 define amdgpu_kernel void @add_select_negliteralk_negliteralk_f32(i32 %c) #0 {
    436   %x = load volatile float, float addrspace(1)* undef
    437   %cmp = icmp eq i32 %c, 0
    438   %select = select i1 %cmp, float -2048.0, float -4096.0
    439   %add = fadd float %select, %x
    440   store volatile float %add, float addrspace(1)* undef
    441   ret void
    442 }
    443 
    444 ; GCN-LABEL: {{^}}add_select_fneg_negk_negk_f32:
    445 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    446 
    447 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
    448 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[X]], [[SELECT]]
    449 define amdgpu_kernel void @add_select_fneg_negk_negk_f32(i32 %c) #0 {
    450   %x = load volatile float, float addrspace(1)* undef
    451   %cmp = icmp eq i32 %c, 0
    452   %select = select i1 %cmp, float -2.0, float -1.0
    453   %fneg.x = fsub float -0.0, %select
    454   %add = fadd float %fneg.x, %x
    455   store volatile float %add, float addrspace(1)* undef
    456   ret void
    457 }
    458 
    459 ; GCN-LABEL: {{^}}add_select_negk_fneg_f32:
    460 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    461 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    462 
    463 ; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
    464 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
    465 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
    466 define amdgpu_kernel void @add_select_negk_fneg_f32(i32 %c) #0 {
    467   %x = load volatile float, float addrspace(1)* undef
    468   %y = load volatile float, float addrspace(1)* undef
    469   %cmp = icmp eq i32 %c, 0
    470   %fneg.x = fsub float -0.0, %x
    471   %select = select i1 %cmp, float -1.0, float %fneg.x
    472   %add = fadd float %select, %y
    473   store volatile float %add, float addrspace(1)* undef
    474   ret void
    475 }
    476 
    477 ; GCN-LABEL: {{^}}add_select_fneg_posk_f32:
    478 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    479 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    480 
    481 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc
    482 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
    483 define amdgpu_kernel void @add_select_fneg_posk_f32(i32 %c) #0 {
    484   %x = load volatile float, float addrspace(1)* undef
    485   %y = load volatile float, float addrspace(1)* undef
    486   %cmp = icmp eq i32 %c, 0
    487   %fneg.x = fsub float -0.0, %x
    488   %select = select i1 %cmp, float %fneg.x, float 1.0
    489   %add = fadd float %select, %y
    490   store volatile float %add, float addrspace(1)* undef
    491   ret void
    492 }
    493 
    494 ; GCN-LABEL: {{^}}add_select_posk_fneg_f32:
    495 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    496 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    497 
    498 ; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
    499 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc
    500 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
    501 define amdgpu_kernel void @add_select_posk_fneg_f32(i32 %c) #0 {
    502   %x = load volatile float, float addrspace(1)* undef
    503   %y = load volatile float, float addrspace(1)* undef
    504   %cmp = icmp eq i32 %c, 0
    505   %fneg.x = fsub float -0.0, %x
    506   %select = select i1 %cmp, float 1.0, float %fneg.x
    507   %add = fadd float %select, %y
    508   store volatile float %add, float addrspace(1)* undef
    509   ret void
    510 }
    511 
    512 ; GCN-LABEL: {{^}}add_select_negfabs_fabs_f32:
    513 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    514 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    515 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
    516 
    517 ; GCN-DAG: v_or_b32_e32 [[X_NEG_ABS:v[0-9]+]], 0x80000000, [[X]]
    518 ; GCN-DAG: v_and_b32_e32 [[Y_ABS:v[0-9]+]], 0x7fffffff, [[Y]]
    519 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_ABS]], [[X_NEG_ABS]], vcc
    520 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
    521 define amdgpu_kernel void @add_select_negfabs_fabs_f32(i32 %c) #0 {
    522   %x = load volatile float, float addrspace(1)* undef
    523   %y = load volatile float, float addrspace(1)* undef
    524   %z = load volatile float, float addrspace(1)* undef
    525   %cmp = icmp eq i32 %c, 0
    526   %fabs.x = call float @llvm.fabs.f32(float %x)
    527   %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
    528   %fabs.y = call float @llvm.fabs.f32(float %y)
    529   %select = select i1 %cmp, float %fneg.fabs.x, float %fabs.y
    530   %add = fadd float %select, %z
    531   store volatile float %add, float addrspace(1)* undef
    532   ret void
    533 }
    534 
    535 ; GCN-LABEL: {{^}}add_select_fabs_negfabs_f32:
    536 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    537 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    538 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
    539 
    540 ; GCN-DAG: v_or_b32_e32 [[Y_NEG_ABS:v[0-9]+]], 0x80000000, [[Y]]
    541 ; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
    542 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_NEG_ABS]], [[X_ABS]], vcc
    543 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
    544 define amdgpu_kernel void @add_select_fabs_negfabs_f32(i32 %c) #0 {
    545   %x = load volatile float, float addrspace(1)* undef
    546   %y = load volatile float, float addrspace(1)* undef
    547   %z = load volatile float, float addrspace(1)* undef
    548   %cmp = icmp eq i32 %c, 0
    549   %fabs.x = call float @llvm.fabs.f32(float %x)
    550   %fabs.y = call float @llvm.fabs.f32(float %y)
    551   %fneg.fabs.y = fsub float -0.000000e+00, %fabs.y
    552   %select = select i1 %cmp, float %fabs.x, float %fneg.fabs.y
    553   %add = fadd float %select, %z
    554   store volatile float %add, float addrspace(1)* undef
    555   ret void
    556 }
    557 
    558 ; GCN-LABEL: {{^}}add_select_neg_fabs_f32:
    559 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    560 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    561 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
    562 
    563 ; GCN-DAG: v_xor_b32_e32 [[X_NEG:v[0-9]+]], 0x80000000, [[X]]
    564 ; GCN-DAG: v_and_b32_e32 [[Y_ABS:v[0-9]+]], 0x7fffffff, [[Y]]
    565 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_ABS]], [[X_NEG]], vcc
    566 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
    567 define amdgpu_kernel void @add_select_neg_fabs_f32(i32 %c) #0 {
    568   %x = load volatile float, float addrspace(1)* undef
    569   %y = load volatile float, float addrspace(1)* undef
    570   %z = load volatile float, float addrspace(1)* undef
    571   %cmp = icmp eq i32 %c, 0
    572   %fneg.x = fsub float -0.000000e+00, %x
    573   %fabs.y = call float @llvm.fabs.f32(float %y)
    574   %select = select i1 %cmp, float %fneg.x, float %fabs.y
    575   %add = fadd float %select, %z
    576   store volatile float %add, float addrspace(1)* undef
    577   ret void
    578 }
    579 
    580 ; GCN-LABEL: {{^}}add_select_fabs_neg_f32:
    581 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    582 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    583 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
    584 
    585 ; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
    586 ; GCN-DAG: v_xor_b32_e32 [[Y_NEG:v[0-9]+]], 0x80000000, [[Y]]
    587 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_NEG]], [[X_ABS]], vcc
    588 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
    589 define amdgpu_kernel void @add_select_fabs_neg_f32(i32 %c) #0 {
    590   %x = load volatile float, float addrspace(1)* undef
    591   %y = load volatile float, float addrspace(1)* undef
    592   %z = load volatile float, float addrspace(1)* undef
    593   %cmp = icmp eq i32 %c, 0
    594   %fabs.x = call float @llvm.fabs.f32(float %x)
    595   %fneg.y = fsub float -0.000000e+00, %y
    596   %select = select i1 %cmp, float %fabs.x, float %fneg.y
    597   %add = fadd float %select, %z
    598   store volatile float %add, float addrspace(1)* undef
    599   ret void
    600 }
    601 
    602 ; GCN-LABEL: {{^}}add_select_neg_negfabs_f32:
    603 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    604 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    605 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
    606 
    607 ; GCN-DAG: v_and_b32_e32 [[Y_ABS:v[0-9]+]], 0x7fffffff, [[Y]]
    608 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_ABS]], [[X]], vcc
    609 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
    610 define amdgpu_kernel void @add_select_neg_negfabs_f32(i32 %c) #0 {
    611   %x = load volatile float, float addrspace(1)* undef
    612   %y = load volatile float, float addrspace(1)* undef
    613   %z = load volatile float, float addrspace(1)* undef
    614   %cmp = icmp eq i32 %c, 0
    615   %fneg.x = fsub float -0.000000e+00, %x
    616   %fabs.y = call float @llvm.fabs.f32(float %y)
    617   %fneg.fabs.y = fsub float -0.000000e+00, %fabs.y
    618   %select = select i1 %cmp, float %fneg.x, float %fneg.fabs.y
    619   %add = fadd float %select, %z
    620   store volatile float %add, float addrspace(1)* undef
    621   ret void
    622 }
    623 
    624 ; GCN-LABEL: {{^}}add_select_negfabs_neg_f32:
    625 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    626 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    627 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
    628 
    629 ; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
    630 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[X_ABS]], [[Y]], vcc
    631 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
    632 define amdgpu_kernel void @add_select_negfabs_neg_f32(i32 %c) #0 {
    633   %x = load volatile float, float addrspace(1)* undef
    634   %y = load volatile float, float addrspace(1)* undef
    635   %z = load volatile float, float addrspace(1)* undef
    636   %cmp = icmp eq i32 %c, 0
    637   %fabs.x = call float @llvm.fabs.f32(float %x)
    638   %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
    639   %fneg.y = fsub float -0.000000e+00, %y
    640   %select = select i1 %cmp, float %fneg.y, float %fneg.fabs.x
    641   %add = fadd float %select, %z
    642   store volatile float %add, float addrspace(1)* undef
    643   ret void
    644 }
    645 
    646 ; GCN-LABEL: {{^}}mul_select_negfabs_posk_f32:
    647 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    648 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    649 
    650 ; GCN-DAG: v_cmp_eq_u32_e64 vcc,
    651 ; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
    652 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -4.0, [[X_ABS]], vcc
    653 ; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]]
    654 define amdgpu_kernel void @mul_select_negfabs_posk_f32(i32 %c) #0 {
    655   %x = load volatile float, float addrspace(1)* undef
    656   %y = load volatile float, float addrspace(1)* undef
    657   %cmp = icmp eq i32 %c, 0
    658   %fabs.x = call float @llvm.fabs.f32(float %x)
    659   %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
    660   %select = select i1 %cmp, float %fneg.fabs.x, float 4.0
    661   %add = fmul float %select, %y
    662   store volatile float %add, float addrspace(1)* undef
    663   ret void
    664 }
    665 
    666 ; GCN-LABEL: {{^}}mul_select_posk_negfabs_f32:
    667 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    668 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    669 
    670 ; GCN-DAG: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0
    671 ; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
    672 
    673 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -4.0, [[X_ABS]], vcc
    674 ; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]]
    675 define amdgpu_kernel void @mul_select_posk_negfabs_f32(i32 %c) #0 {
    676   %x = load volatile float, float addrspace(1)* undef
    677   %y = load volatile float, float addrspace(1)* undef
    678   %cmp = icmp eq i32 %c, 0
    679   %fabs.x = call float @llvm.fabs.f32(float %x)
    680   %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
    681   %select = select i1 %cmp, float 4.0, float %fneg.fabs.x
    682   %add = fmul float %select, %y
    683   store volatile float %add, float addrspace(1)* undef
    684   ret void
    685 }
    686 
    687 ; GCN-LABEL: {{^}}mul_select_negfabs_negk_f32:
    688 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    689 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    690 
    691 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 4.0, [[X]], vcc
    692 ; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]]
    693 define amdgpu_kernel void @mul_select_negfabs_negk_f32(i32 %c) #0 {
    694   %x = load volatile float, float addrspace(1)* undef
    695   %y = load volatile float, float addrspace(1)* undef
    696   %cmp = icmp eq i32 %c, 0
    697   %fabs.x = call float @llvm.fabs.f32(float %x)
    698   %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
    699   %select = select i1 %cmp, float %fneg.fabs.x, float -4.0
    700   %add = fmul float %select, %y
    701   store volatile float %add, float addrspace(1)* undef
    702   ret void
    703 }
    704 
    705 ; GCN-LABEL: {{^}}mul_select_negk_negfabs_f32:
    706 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    707 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    708 
    709 ; GCN: v_cmp_ne_u32_e64 vcc
    710 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 4.0, [[X]], vcc
    711 ; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]]
    712 define amdgpu_kernel void @mul_select_negk_negfabs_f32(i32 %c) #0 {
    713   %x = load volatile float, float addrspace(1)* undef
    714   %y = load volatile float, float addrspace(1)* undef
    715   %cmp = icmp eq i32 %c, 0
    716   %fabs.x = call float @llvm.fabs.f32(float %x)
    717   %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
    718   %select = select i1 %cmp, float -4.0, float %fneg.fabs.x
    719   %add = fmul float %select, %y
    720   store volatile float %add, float addrspace(1)* undef
    721   ret void
    722 }
    723 
    724 ; --------------------------------------------------------------------------------
    725 ; Don't fold if fneg can fold into the source
    726 ; --------------------------------------------------------------------------------
    727 
    728 ; GCN-LABEL: {{^}}select_fneg_posk_src_add_f32:
    729 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    730 ; GCN: buffer_load_dword [[Y:v[0-9]+]]
    731 
    732 ; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], -4.0, [[X]]
    733 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
    734 ; GCN-NEXT: buffer_store_dword [[SELECT]]
    735 define amdgpu_kernel void @select_fneg_posk_src_add_f32(i32 %c) #0 {
    736   %x = load volatile float, float addrspace(1)* undef
    737   %y = load volatile float, float addrspace(1)* undef
    738   %cmp = icmp eq i32 %c, 0
    739   %add = fadd float %x, 4.0
    740   %fneg = fsub float -0.0, %add
    741   %select = select i1 %cmp, float %fneg, float 2.0
    742   store volatile float %select, float addrspace(1)* undef
    743   ret void
    744 }
    745 
    746 ; GCN-LABEL: {{^}}select_fneg_posk_src_sub_f32:
    747 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    748 
    749 ; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], 4.0, [[X]]
    750 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
    751 ; GCN-NEXT: buffer_store_dword [[SELECT]]
    752 define amdgpu_kernel void @select_fneg_posk_src_sub_f32(i32 %c) #0 {
    753   %x = load volatile float, float addrspace(1)* undef
    754   %cmp = icmp eq i32 %c, 0
    755   %add = fsub float %x, 4.0
    756   %fneg = fsub float -0.0, %add
    757   %select = select i1 %cmp, float %fneg, float 2.0
    758   store volatile float %select, float addrspace(1)* undef
    759   ret void
    760 }
    761 
    762 ; GCN-LABEL: {{^}}select_fneg_posk_src_mul_f32:
    763 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    764 
    765 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[X]]
    766 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[MUL]], vcc
    767 ; GCN-NEXT: buffer_store_dword [[SELECT]]
    768 define amdgpu_kernel void @select_fneg_posk_src_mul_f32(i32 %c) #0 {
    769   %x = load volatile float, float addrspace(1)* undef
    770   %cmp = icmp eq i32 %c, 0
    771   %mul = fmul float %x, 4.0
    772   %fneg = fsub float -0.0, %mul
    773   %select = select i1 %cmp, float %fneg, float 2.0
    774   store volatile float %select, float addrspace(1)* undef
    775   ret void
    776 }
    777 
    778 ; GCN-LABEL: {{^}}select_fneg_posk_src_fma_f32:
    779 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    780 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
    781 
    782 ; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[X]], -4.0, -[[Z]]
    783 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[FMA]], vcc
    784 ; GCN-NEXT: buffer_store_dword [[SELECT]]
    785 define amdgpu_kernel void @select_fneg_posk_src_fma_f32(i32 %c) #0 {
    786   %x = load volatile float, float addrspace(1)* undef
    787   %z = load volatile float, float addrspace(1)* undef
    788   %cmp = icmp eq i32 %c, 0
    789   %fma = call float @llvm.fma.f32(float %x, float 4.0, float %z)
    790   %fneg = fsub float -0.0, %fma
    791   %select = select i1 %cmp, float %fneg, float 2.0
    792   store volatile float %select, float addrspace(1)* undef
    793   ret void
    794 }
    795 
    796 ; GCN-LABEL: {{^}}select_fneg_posk_src_fmad_f32:
    797 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    798 ; GCN: buffer_load_dword [[Z:v[0-9]+]]
    799 
    800 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[X]], vcc
    801 ; GCN-NEXT: buffer_store_dword [[SELECT]]
    802 define amdgpu_kernel void @select_fneg_posk_src_fmad_f32(i32 %c) #0 {
    803   %x = load volatile float, float addrspace(1)* undef
    804   %z = load volatile float, float addrspace(1)* undef
    805   %cmp = icmp eq i32 %c, 0
    806   %fmad = call float @llvm.fmuladd.f32(float %x, float 4.0, float %z)
    807   %fneg = fsub float -0.0, %fmad
    808   %select = select i1 %cmp, float %fneg, float 2.0
    809   store volatile float %select, float addrspace(1)* undef
    810   ret void
    811 }
    812 
    813 ; FIXME: This one should fold to rcp
    814 ; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_f32:
    815 ; GCN: buffer_load_dword [[X:v[0-9]+]]
    816 
    817 ; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[X]]
    818 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc
    819 ; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]]
    820 ; GCN-NEXT: buffer_store_dword [[NEG_SELECT]]
    821 define amdgpu_kernel void @select_fneg_posk_src_rcp_f32(i32 %c) #0 {
    822   %x = load volatile float, float addrspace(1)* undef
    823   %y = load volatile float, float addrspace(1)* undef
    824   %cmp = icmp eq i32 %c, 0
    825   %rcp = call float @llvm.amdgcn.rcp.f32(float %x)
    826   %fneg = fsub float -0.0, %rcp
    827   %select = select i1 %cmp, float %fneg, float 2.0
    828   store volatile float %select, float addrspace(1)* undef
    829   ret void
    830 }
    831 
    832 declare float @llvm.fabs.f32(float) #1
    833 declare float @llvm.fma.f32(float, float, float) #1
    834 declare float @llvm.fmuladd.f32(float, float, float) #1
    835 declare float @llvm.amdgcn.rcp.f32(float) #1
    836 declare float @llvm.amdgcn.rcp.legacy(float) #1
    837 declare float @llvm.amdgcn.fmul.legacy(float, float) #1
    838 
    839 attributes #0 = { nounwind }
    840 attributes #1 = { nounwind readnone }
    841