Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN:  llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
      2 
      3 declare float @llvm.fabs.f32(float) #0
      4 declare float @llvm.canonicalize.f32(float) #0
      5 declare double @llvm.fabs.f64(double) #0
      6 declare double @llvm.canonicalize.f64(double) #0
      7 declare half @llvm.canonicalize.f16(half) #0
      8 declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
      9 declare i32 @llvm.amdgcn.workitem.id.x() #0
     10 declare <2 x double> @llvm.canonicalize.v2f64(<2 x double>) #0
     11 
     12 ; GCN-LABEL: {{^}}v_test_canonicalize_var_f32:
     13 ; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
     14 ; GCN: buffer_store_dword [[REG]]
     15 define amdgpu_kernel void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 {
     16   %val = load float, float addrspace(1)* %out
     17   %canonicalized = call float @llvm.canonicalize.f32(float %val)
     18   store float %canonicalized, float addrspace(1)* %out
     19   ret void
     20 }
     21 
     22 ; GCN-LABEL: {{^}}s_test_canonicalize_var_f32:
     23 ; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, {{s[0-9]+}}
     24 ; GCN: buffer_store_dword [[REG]]
     25 define amdgpu_kernel void @s_test_canonicalize_var_f32(float addrspace(1)* %out, float %val) #1 {
     26   %canonicalized = call float @llvm.canonicalize.f32(float %val)
     27   store float %canonicalized, float addrspace(1)* %out
     28   ret void
     29 }
     30 
     31 ; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f32:
     32 ; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, |{{v[0-9]+}}|
     33 ; GCN: buffer_store_dword [[REG]]
     34 define amdgpu_kernel void @v_test_canonicalize_fabs_var_f32(float addrspace(1)* %out) #1 {
     35   %val = load float, float addrspace(1)* %out
     36   %val.fabs = call float @llvm.fabs.f32(float %val)
     37   %canonicalized = call float @llvm.canonicalize.f32(float %val.fabs)
     38   store float %canonicalized, float addrspace(1)* %out
     39   ret void
     40 }
     41 
     42 ; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f32:
     43 ; GCN: v_mul_f32_e64 [[REG:v[0-9]+]], -1.0, |{{v[0-9]+}}|
     44 ; GCN: buffer_store_dword [[REG]]
     45 define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(float addrspace(1)* %out) #1 {
     46   %val = load float, float addrspace(1)* %out
     47   %val.fabs = call float @llvm.fabs.f32(float %val)
     48   %val.fabs.fneg = fsub float -0.0, %val.fabs
     49   %canonicalized = call float @llvm.canonicalize.f32(float %val.fabs.fneg)
     50   store float %canonicalized, float addrspace(1)* %out
     51   ret void
     52 }
     53 
     54 ; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f32:
     55 ; GCN: v_mul_f32_e32 [[REG:v[0-9]+]], -1.0, {{v[0-9]+}}
     56 ; GCN: buffer_store_dword [[REG]]
     57 define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(float addrspace(1)* %out) #1 {
     58   %val = load float, float addrspace(1)* %out
     59   %val.fneg = fsub float -0.0, %val
     60   %canonicalized = call float @llvm.canonicalize.f32(float %val.fneg)
     61   store float %canonicalized, float addrspace(1)* %out
     62   ret void
     63 }
     64 
     65 ; GCN-LABEL: {{^}}test_fold_canonicalize_undef_f32:
     66 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
     67 ; GCN: buffer_store_dword [[REG]]
     68 define amdgpu_kernel void @test_fold_canonicalize_undef_f32(float addrspace(1)* %out) #1 {
     69   %canonicalized = call float @llvm.canonicalize.f32(float undef)
     70   store float %canonicalized, float addrspace(1)* %out
     71   ret void
     72 }
     73 
     74 ; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f32:
     75 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
     76 ; GCN: buffer_store_dword [[REG]]
     77 define amdgpu_kernel void @test_fold_canonicalize_p0_f32(float addrspace(1)* %out) #1 {
     78   %canonicalized = call float @llvm.canonicalize.f32(float 0.0)
     79   store float %canonicalized, float addrspace(1)* %out
     80   ret void
     81 }
     82 
     83 ; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f32:
     84 ; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
     85 ; GCN: buffer_store_dword [[REG]]
     86 define amdgpu_kernel void @test_fold_canonicalize_n0_f32(float addrspace(1)* %out) #1 {
     87   %canonicalized = call float @llvm.canonicalize.f32(float -0.0)
     88   store float %canonicalized, float addrspace(1)* %out
     89   ret void
     90 }
     91 
     92 ; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f32:
     93 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0{{$}}
     94 ; GCN: buffer_store_dword [[REG]]
     95 define amdgpu_kernel void @test_fold_canonicalize_p1_f32(float addrspace(1)* %out) #1 {
     96   %canonicalized = call float @llvm.canonicalize.f32(float 1.0)
     97   store float %canonicalized, float addrspace(1)* %out
     98   ret void
     99 }
    100 
    101 ; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f32:
    102 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], -1.0{{$}}
    103 ; GCN: buffer_store_dword [[REG]]
    104 define amdgpu_kernel void @test_fold_canonicalize_n1_f32(float addrspace(1)* %out) #1 {
    105   %canonicalized = call float @llvm.canonicalize.f32(float -1.0)
    106   store float %canonicalized, float addrspace(1)* %out
    107   ret void
    108 }
    109 
    110 ; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f32:
    111 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x41800000{{$}}
    112 ; GCN: buffer_store_dword [[REG]]
    113 define amdgpu_kernel void @test_fold_canonicalize_literal_f32(float addrspace(1)* %out) #1 {
    114   %canonicalized = call float @llvm.canonicalize.f32(float 16.0)
    115   store float %canonicalized, float addrspace(1)* %out
    116   ret void
    117 }
    118 
    119 ; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f32:
    120 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
    121 ; GCN: buffer_store_dword [[REG]]
    122 define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #1 {
    123   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
    124   store float %canonicalized, float addrspace(1)* %out
    125   ret void
    126 }
    127 
    128 ; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f32:
    129 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fffff{{$}}
    130 ; GCN: buffer_store_dword [[REG]]
    131 define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #3 {
    132   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
    133   store float %canonicalized, float addrspace(1)* %out
    134   ret void
    135 }
    136 
    137 ; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f32:
    138 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
    139 ; GCN: buffer_store_dword [[REG]]
    140 define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #1 {
    141   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
    142   store float %canonicalized, float addrspace(1)* %out
    143   ret void
    144 }
    145 
    146 ; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f32:
    147 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x807fffff{{$}}
    148 ; GCN: buffer_store_dword [[REG]]
    149 define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #3 {
    150   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
    151   store float %canonicalized, float addrspace(1)* %out
    152   ret void
    153 }
    154 
    155 ; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f32:
    156 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
    157 ; GCN: buffer_store_dword [[REG]]
    158 define amdgpu_kernel void @test_fold_canonicalize_qnan_f32(float addrspace(1)* %out) #1 {
    159   %canonicalized = call float @llvm.canonicalize.f32(float 0x7FF8000000000000)
    160   store float %canonicalized, float addrspace(1)* %out
    161   ret void
    162 }
    163 
    164 ; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f32:
    165 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
    166 ; GCN: buffer_store_dword [[REG]]
    167 define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f32(float addrspace(1)* %out) #1 {
    168   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 -1 to float))
    169   store float %canonicalized, float addrspace(1)* %out
    170   ret void
    171 }
    172 
    173 ; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f32:
    174 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
    175 ; GCN: buffer_store_dword [[REG]]
    176 define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f32(float addrspace(1)* %out) #1 {
    177   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 -2 to float))
    178   store float %canonicalized, float addrspace(1)* %out
    179   ret void
    180 }
    181 
    182 ; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f32:
    183 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
    184 ; GCN: buffer_store_dword [[REG]]
    185 define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f32(float addrspace(1)* %out) #1 {
    186   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2139095041 to float))
    187   store float %canonicalized, float addrspace(1)* %out
    188   ret void
    189 }
    190 
    191 ; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f32:
    192 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
    193 ; GCN: buffer_store_dword [[REG]]
    194 define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f32(float addrspace(1)* %out) #1 {
    195   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2143289343 to float))
    196   store float %canonicalized, float addrspace(1)* %out
    197   ret void
    198 }
    199 
    200 ; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f32:
    201 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
    202 ; GCN: buffer_store_dword [[REG]]
    203 define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f32(float addrspace(1)* %out) #1 {
    204   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 4286578689 to float))
    205   store float %canonicalized, float addrspace(1)* %out
    206   ret void
    207 }
    208 
    209 ; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f32:
    210 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
    211 ; GCN: buffer_store_dword [[REG]]
    212 define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f32(float addrspace(1)* %out) #1 {
    213   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 4290772991 to float))
    214   store float %canonicalized, float addrspace(1)* %out
    215   ret void
    216 }
    217 
    218 ; GCN-LABEL: {{^}}v_test_canonicalize_var_f64:
    219 ; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
    220 ; GCN: buffer_store_dwordx2 [[REG]]
    221 define amdgpu_kernel void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 {
    222   %val = load double, double addrspace(1)* %out
    223   %canonicalized = call double @llvm.canonicalize.f64(double %val)
    224   store double %canonicalized, double addrspace(1)* %out
    225   ret void
    226 }
    227 
    228 ; GCN-LABEL: {{^}}s_test_canonicalize_var_f64:
    229 ; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
    230 ; GCN: buffer_store_dwordx2 [[REG]]
    231 define amdgpu_kernel void @s_test_canonicalize_var_f64(double addrspace(1)* %out, double %val) #1 {
    232   %canonicalized = call double @llvm.canonicalize.f64(double %val)
    233   store double %canonicalized, double addrspace(1)* %out
    234   ret void
    235 }
    236 
    237 ; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f64:
    238 ; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], |{{v\[[0-9]+:[0-9]+\]}}|, |{{v\[[0-9]+:[0-9]+\]}}|
    239 ; GCN: buffer_store_dwordx2 [[REG]]
    240 define amdgpu_kernel void @v_test_canonicalize_fabs_var_f64(double addrspace(1)* %out) #1 {
    241   %val = load double, double addrspace(1)* %out
    242   %val.fabs = call double @llvm.fabs.f64(double %val)
    243   %canonicalized = call double @llvm.canonicalize.f64(double %val.fabs)
    244   store double %canonicalized, double addrspace(1)* %out
    245   ret void
    246 }
    247 
    248 ; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f64:
    249 ; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]\]]], -|{{v\[[0-9]+:[0-9]+\]}}|, -|{{v\[[0-9]+:[0-9]+\]}}|
    250 ; GCN: buffer_store_dwordx2 [[REG]]
    251 define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f64(double addrspace(1)* %out) #1 {
    252   %val = load double, double addrspace(1)* %out
    253   %val.fabs = call double @llvm.fabs.f64(double %val)
    254   %val.fabs.fneg = fsub double -0.0, %val.fabs
    255   %canonicalized = call double @llvm.canonicalize.f64(double %val.fabs.fneg)
    256   store double %canonicalized, double addrspace(1)* %out
    257   ret void
    258 }
    259 
    260 ; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f64:
    261 ; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -{{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]}}
    262 ; GCN: buffer_store_dwordx2 [[REG]]
    263 define amdgpu_kernel void @v_test_canonicalize_fneg_var_f64(double addrspace(1)* %out) #1 {
    264   %val = load double, double addrspace(1)* %out
    265   %val.fneg = fsub double -0.0, %val
    266   %canonicalized = call double @llvm.canonicalize.f64(double %val.fneg)
    267   store double %canonicalized, double addrspace(1)* %out
    268   ret void
    269 }
    270 
    271 ; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f64:
    272 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
    273 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
    274 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    275 define amdgpu_kernel void @test_fold_canonicalize_p0_f64(double addrspace(1)* %out) #1 {
    276   %canonicalized = call double @llvm.canonicalize.f64(double 0.0)
    277   store double %canonicalized, double addrspace(1)* %out
    278   ret void
    279 }
    280 
    281 ; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f64:
    282 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
    283 ; GCN-DAG: v_bfrev_b32_e32 v[[HI:[0-9]+]], 1{{$}}
    284 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    285 define amdgpu_kernel void @test_fold_canonicalize_n0_f64(double addrspace(1)* %out) #1 {
    286   %canonicalized = call double @llvm.canonicalize.f64(double -0.0)
    287   store double %canonicalized, double addrspace(1)* %out
    288   ret void
    289 }
    290 
    291 ; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f64:
    292 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
    293 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x3ff00000{{$}}
    294 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    295 define amdgpu_kernel void @test_fold_canonicalize_p1_f64(double addrspace(1)* %out) #1 {
    296   %canonicalized = call double @llvm.canonicalize.f64(double 1.0)
    297   store double %canonicalized, double addrspace(1)* %out
    298   ret void
    299 }
    300 
    301 ; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f64:
    302 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
    303 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xbff00000{{$}}
    304 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    305 define amdgpu_kernel void @test_fold_canonicalize_n1_f64(double addrspace(1)* %out) #1 {
    306   %canonicalized = call double @llvm.canonicalize.f64(double -1.0)
    307   store double %canonicalized, double addrspace(1)* %out
    308   ret void
    309 }
    310 
    311 ; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f64:
    312 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
    313 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x40300000{{$}}
    314 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    315 define amdgpu_kernel void @test_fold_canonicalize_literal_f64(double addrspace(1)* %out) #1 {
    316   %canonicalized = call double @llvm.canonicalize.f64(double 16.0)
    317   store double %canonicalized, double addrspace(1)* %out
    318   ret void
    319 }
    320 
    321 ; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f64:
    322 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
    323 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
    324 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    325 define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #2 {
    326   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
    327   store double %canonicalized, double addrspace(1)* %out
    328   ret void
    329 }
    330 
    331 ; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f64:
    332 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
    333 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xfffff{{$}}
    334 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    335 define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #3 {
    336   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
    337   store double %canonicalized, double addrspace(1)* %out
    338   ret void
    339 }
    340 
    341 ; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f64:
    342 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
    343 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
    344 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    345 define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #2 {
    346   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
    347   store double %canonicalized, double addrspace(1)* %out
    348   ret void
    349 }
    350 
    351 ; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f64:
    352 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
    353 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x800fffff{{$}}
    354 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    355 define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #3 {
    356   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
    357   store double %canonicalized, double addrspace(1)* %out
    358   ret void
    359 }
    360 
    361 ; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f64:
    362 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
    363 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
    364 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    365 define amdgpu_kernel void @test_fold_canonicalize_qnan_f64(double addrspace(1)* %out) #1 {
    366   %canonicalized = call double @llvm.canonicalize.f64(double 0x7FF8000000000000)
    367   store double %canonicalized, double addrspace(1)* %out
    368   ret void
    369 }
    370 
    371 ; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f64:
    372 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
    373 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
    374 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    375 define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f64(double addrspace(1)* %out) #1 {
    376   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -1 to double))
    377   store double %canonicalized, double addrspace(1)* %out
    378   ret void
    379 }
    380 
    381 ; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f64:
    382 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
    383 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
    384 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    385 define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f64(double addrspace(1)* %out) #1 {
    386   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -2 to double))
    387   store double %canonicalized, double addrspace(1)* %out
    388   ret void
    389 }
    390 
    391 ; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f64:
    392 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
    393 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
    394 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    395 define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f64(double addrspace(1)* %out) #1 {
    396   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9218868437227405313 to double))
    397   store double %canonicalized, double addrspace(1)* %out
    398   ret void
    399 }
    400 
    401 ; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f64:
    402 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
    403 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
    404 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    405 define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f64(double addrspace(1)* %out) #1 {
    406   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9223372036854775807 to double))
    407   store double %canonicalized, double addrspace(1)* %out
    408   ret void
    409 }
    410 
    411 ; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f64:
    412 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
    413 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
    414 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    415 define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f64(double addrspace(1)* %out) #1 {
    416   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18442240474082181121 to double))
    417   store double %canonicalized, double addrspace(1)* %out
    418   ret void
    419 }
    420 
    421 ; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f64:
    422 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
    423 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
    424 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    425 define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f64(double addrspace(1)* %out) #1 {
    426   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18446744073709551615 to double))
    427   store double %canonicalized, double addrspace(1)* %out
    428   ret void
    429 }
    430 
    431 ; GCN-LABEL:  {{^}}test_canonicalize_value_f64_flush:
    432 ; GCN: v_mul_f64 v[{{[0-9:]+}}], 1.0, v[{{[0-9:]+}}]
    433 define amdgpu_kernel void @test_canonicalize_value_f64_flush(double addrspace(1)* %arg, double addrspace(1)* %out) #4 {
    434   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
    435   %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
    436   %v = load double, double addrspace(1)* %gep, align 8
    437   %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
    438   %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id
    439   store double %canonicalized, double addrspace(1)* %gep2, align 8
    440   ret void
    441 }
    442 
    443 ; GCN-LABEL:  {{^}}test_canonicalize_value_f32_flush:
    444 ; GCN: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
    445 define amdgpu_kernel void @test_canonicalize_value_f32_flush(float addrspace(1)* %arg, float addrspace(1)* %out) #4 {
    446   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
    447   %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
    448   %v = load float, float addrspace(1)* %gep, align 4
    449   %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
    450   %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
    451   store float %canonicalized, float addrspace(1)* %gep2, align 4
    452   ret void
    453 }
    454 
    455 ; GCN-LABEL:  {{^}}test_canonicalize_value_f16_flush:
    456 ; GCN: v_mul_f16_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
    457 define amdgpu_kernel void @test_canonicalize_value_f16_flush(half addrspace(1)* %arg, half addrspace(1)* %out) #4 {
    458   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
    459   %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
    460   %v = load half, half addrspace(1)* %gep, align 2
    461   %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
    462   %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
    463   store half %canonicalized, half addrspace(1)* %gep2, align 2
    464   ret void
    465 }
    466 
    467 ; GCN-LABEL:  {{^}}test_canonicalize_value_v2f16_flush_gfx8:
    468 ; GCN:     v_mov_b32_e32 [[ONE:v[0-9]+]], 0x3c00
    469 ; GCN-DAG: v_mul_f16_sdwa v{{[0-9]+}}, [[ONE]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
    470 ; GCN-DAG: v_mul_f16_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
    471 define amdgpu_kernel void @test_canonicalize_value_v2f16_flush_gfx8(<2 x half> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) #4 {
    472   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
    473   %gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %id
    474   %v = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4
    475   %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
    476   %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id
    477   store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 2
    478   ret void
    479 }
    480 
    481 ; GCN-LABEL:  {{^}}test_canonicalize_value_v2f16_flush_gfx9:
    482 ; GCN-DAG: v_pk_mul_f16 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
    483 define amdgpu_kernel void @test_canonicalize_value_v2f16_flush_gfx9(<2 x half> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) #6 {
    484   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
    485   %gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %id
    486   %v = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4
    487   %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
    488   %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id
    489   store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 2
    490   ret void
    491 }
    492 
    493 ; GCN-LABEL:  {{^}}test_canonicalize_value_f64_denorm:
    494 ; GCN: v_max_f64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
    495 define amdgpu_kernel void @test_canonicalize_value_f64_denorm(double addrspace(1)* %arg, double addrspace(1)* %out) #5 {
    496   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
    497   %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
    498   %v = load double, double addrspace(1)* %gep, align 8
    499   %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
    500   %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id
    501   store double %canonicalized, double addrspace(1)* %gep2, align 8
    502   ret void
    503 }
    504 
    505 ; GCN-LABEL:  {{^}}test_canonicalize_value_f32_denorm:
    506 ; GCN: v_max_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
    507 define amdgpu_kernel void @test_canonicalize_value_f32_denorm(float addrspace(1)* %arg, float addrspace(1)* %out) #5 {
    508   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
    509   %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
    510   %v = load float, float addrspace(1)* %gep, align 4
    511   %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
    512   %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
    513   store float %canonicalized, float addrspace(1)* %gep2, align 4
    514   ret void
    515 }
    516 
    517 ; GCN-LABEL:  {{^}}test_canonicalize_value_f16_denorm:
    518 ; GCN: v_max_f16_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
    519 define amdgpu_kernel void @test_canonicalize_value_f16_denorm(half addrspace(1)* %arg, half addrspace(1)* %out) #5 {
    520   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
    521   %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
    522   %v = load half, half addrspace(1)* %gep, align 2
    523   %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
    524   %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
    525   store half %canonicalized, half addrspace(1)* %gep2, align 2
    526   ret void
    527 }
    528 
    529 ; GCN-LABEL:  {{^}}test_canonicalize_value_v2f16_denorm:
    530 ; GCN: v_pk_max_f16 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
    531 define amdgpu_kernel void @test_canonicalize_value_v2f16_denorm(<2 x half> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) #5 {
    532   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
    533   %gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %id
    534   %v = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4
    535   %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
    536   %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id
    537   store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 2
    538   ret void
    539 }
    540 
    541 ; GCN-LABEL: {{^}}v_test_canonicalize_var_v2f64:
    542 ; GCN: v_max_f64
    543 ; GCN: v_max_f64
    544 define amdgpu_kernel void @v_test_canonicalize_var_v2f64(<2 x double> addrspace(1)* %out) #1 {
    545   %tid = call i32 @llvm.amdgcn.workitem.id.x()
    546   %gep = getelementptr <2 x double>, <2 x double> addrspace(1)* %out, i32 %tid
    547   %val = load <2 x double>, <2 x double> addrspace(1)* %gep
    548   %canonicalized = call <2 x double> @llvm.canonicalize.v2f64(<2 x double> %val)
    549   store <2 x double> %canonicalized, <2 x double> addrspace(1)* %out
    550   ret void
    551 }
    552 
    553 attributes #0 = { nounwind readnone }
    554 attributes #1 = { nounwind }
    555 attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
    556 attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }
    557 attributes #4 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" "target-cpu"="tonga" }
    558 attributes #5 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" "target-cpu"="gfx900" }
    559 attributes #6 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" "target-cpu"="gfx900" }
    560