Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      4 
      5 ; This test just checks that the compiler doesn't crash.
      6 
      7 ; FUNC-LABEL: {{^}}v32i8_to_v8i32:
      8 define amdgpu_ps float @v32i8_to_v8i32(<32 x i8> addrspace(4)* inreg) #0 {
      9 entry:
     10   %1 = load <32 x i8>, <32 x i8> addrspace(4)* %0
     11   %2 = bitcast <32 x i8> %1 to <8 x i32>
     12   %3 = extractelement <8 x i32> %2, i32 1
     13   %4 = icmp ne i32 %3, 0
     14   %5 = select i1 %4, float 0.0, float 1.0
     15   ret float %5
     16 }
     17 
     18 ; FUNC-LABEL: {{^}}i8ptr_v16i8ptr:
     19 ; SI: s_endpgm
     20 define amdgpu_kernel void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) {
     21 entry:
     22   %0 = bitcast i8 addrspace(1)* %in to <16 x i8> addrspace(1)*
     23   %1 = load <16 x i8>, <16 x i8> addrspace(1)* %0
     24   store <16 x i8> %1, <16 x i8> addrspace(1)* %out
     25   ret void
     26 }
     27 
     28 define amdgpu_kernel void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
     29   %load = load float, float addrspace(1)* %in, align 4
     30   %fadd32 = fadd float %load, 1.0
     31   %bc = bitcast float %fadd32 to <2 x i16>
     32   %add.bitcast = add <2 x i16> %bc, <i16 2, i16 2>
     33   store <2 x i16> %add.bitcast, <2 x i16> addrspace(1)* %out
     34   ret void
     35 }
     36 
     37 define amdgpu_kernel void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind {
     38   %load = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4
     39   %add.v2i16 = add <2 x i16> %load, <i16 2, i16 2>
     40   %bc = bitcast <2 x i16> %add.v2i16 to float
     41   %fadd.bitcast = fadd float %bc, 1.0
     42   store float %fadd.bitcast, float addrspace(1)* %out
     43   ret void
     44 }
     45 
     46 define amdgpu_kernel void @f32_to_v2f16(<2 x half> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
     47   %load = load float, float addrspace(1)* %in, align 4
     48   %fadd32 = fadd float %load, 1.0
     49   %bc = bitcast float %fadd32 to <2 x half>
     50   %add.bitcast = fadd <2 x half> %bc, <half 2.0, half 2.0>
     51   store <2 x half> %add.bitcast, <2 x half> addrspace(1)* %out
     52   ret void
     53 }
     54 
     55 define amdgpu_kernel void @v2f16_to_f32(float addrspace(1)* %out, <2 x half> addrspace(1)* %in) nounwind {
     56   %load = load <2 x half>, <2 x half> addrspace(1)* %in, align 4
     57   %add.v2f16 = fadd <2 x half> %load, <half 2.0, half 2.0>
     58   %bc = bitcast <2 x half> %add.v2f16 to float
     59   %fadd.bitcast = fadd float %bc, 1.0
     60   store float %fadd.bitcast, float addrspace(1)* %out
     61   ret void
     62 }
     63 
     64 define amdgpu_kernel void @v4i8_to_i32(i32 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
     65   %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
     66   %bc = bitcast <4 x i8> %load to i32
     67   store i32 %bc, i32 addrspace(1)* %out, align 4
     68   ret void
     69 }
     70 
     71 define amdgpu_kernel void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
     72   %load = load i32, i32 addrspace(1)* %in, align 4
     73   %bc = bitcast i32 %load to <4 x i8>
     74   store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4
     75   ret void
     76 }
     77 
     78 ; FUNC-LABEL: {{^}}bitcast_v2i32_to_f64:
     79 ; SI: s_endpgm
     80 define amdgpu_kernel void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
     81   %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
     82   %add = add <2 x i32> %val, <i32 4, i32 9>
     83   %bc = bitcast <2 x i32> %add to double
     84   %fadd.bc = fadd double %bc, 1.0
     85   store double %fadd.bc, double addrspace(1)* %out, align 8
     86   ret void
     87 }
     88 
     89 ; FUNC-LABEL: {{^}}bitcast_f64_to_v2i32:
     90 ; SI: s_endpgm
     91 define amdgpu_kernel void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace(1)* %in) {
     92   %val = load double, double addrspace(1)* %in, align 8
     93   %add = fadd double %val, 4.0
     94   %bc = bitcast double %add to <2 x i32>
     95   store <2 x i32> %bc, <2 x i32> addrspace(1)* %out, align 8
     96   ret void
     97 }
     98 
     99 ; FUNC-LABEL: {{^}}bitcast_v2i64_to_v2f64:
    100 define amdgpu_kernel void @bitcast_v2i64_to_v2f64(i32 %cond, <2 x double> addrspace(1)* %out, <2 x i64> %value) {
    101 entry:
    102   %cmp0 = icmp eq i32 %cond, 0
    103   br i1 %cmp0, label %if, label %end
    104 
    105 if:
    106   %cast = bitcast <2 x i64> %value to <2 x double>
    107   br label %end
    108 
    109 end:
    110   %phi = phi <2 x double> [zeroinitializer, %entry], [%cast, %if]
    111   store <2 x double> %phi, <2 x double> addrspace(1)* %out
    112   ret void
    113 }
    114 
    115 ; FUNC-LABEL: {{^}}bitcast_v2f64_to_v2i64:
    116 define amdgpu_kernel void @bitcast_v2f64_to_v2i64(i32 %cond, <2 x i64> addrspace(1)* %out, <2 x double> %value) {
    117 entry:
    118   %cmp0 = icmp eq i32 %cond, 0
    119   br i1 %cmp0, label %if, label %end
    120 
    121 if:
    122   %cast = bitcast <2 x double> %value to <2 x i64>
    123   br label %end
    124 
    125 end:
    126   %phi = phi <2 x i64> [zeroinitializer, %entry], [%cast, %if]
    127   store <2 x i64> %phi, <2 x i64> addrspace(1)* %out
    128   ret void
    129 }
    130 
    131 ; FUNC-LABEL: {{^}}v4i16_to_f64:
    132 define amdgpu_kernel void @v4i16_to_f64(double addrspace(1)* %out, <4 x i16> addrspace(1)* %in) nounwind {
    133   %load = load <4 x i16>, <4 x i16> addrspace(1)* %in, align 4
    134   %add.v4i16 = add <4 x i16> %load, <i16 4, i16 4, i16 4, i16 4>
    135   %bc = bitcast <4 x i16> %add.v4i16 to double
    136   %fadd.bitcast = fadd double %bc, 1.0
    137   store double %fadd.bitcast, double addrspace(1)* %out
    138   ret void
    139 }
    140 
    141 ; FUNC-LABEL: {{^}}v4f16_to_f64:
    142 define amdgpu_kernel void @v4f16_to_f64(double addrspace(1)* %out, <4 x half> addrspace(1)* %in) nounwind {
    143   %load = load <4 x half>, <4 x half> addrspace(1)* %in, align 4
    144   %add.v4half = fadd <4 x half> %load, <half 4.0, half 4.0, half 4.0, half 4.0>
    145   %bc = bitcast <4 x half> %add.v4half to double
    146   %fadd.bitcast = fadd double %bc, 1.0
    147   store double %fadd.bitcast, double addrspace(1)* %out
    148   ret void
    149 }
    150 
    151 ; FUNC-LABEL: {{^}}f64_to_v4f16:
    152 define amdgpu_kernel void @f64_to_v4f16(<4 x half> addrspace(1)* %out, double addrspace(1)* %in) nounwind {
    153   %load = load double, double addrspace(1)* %in, align 4
    154   %fadd32 = fadd double %load, 1.0
    155   %bc = bitcast double %fadd32 to <4 x half>
    156   %add.bitcast = fadd <4 x half> %bc, <half 2.0, half 2.0, half 2.0, half 2.0>
    157   store <4 x half> %add.bitcast, <4 x half> addrspace(1)* %out
    158   ret void
    159 }
    160 
    161 ; FUNC-LABEL: {{^}}f64_to_v4i16:
    162 define amdgpu_kernel void @f64_to_v4i16(<4 x i16> addrspace(1)* %out, double addrspace(1)* %in) nounwind {
    163   %load = load double, double addrspace(1)* %in, align 4
    164   %fadd32 = fadd double %load, 1.0
    165   %bc = bitcast double %fadd32 to <4 x i16>
    166   %add.bitcast = add <4 x i16> %bc, <i16 2, i16 2, i16 2, i16 2>
    167   store <4 x i16> %add.bitcast, <4 x i16> addrspace(1)* %out
    168   ret void
    169 }
    170 
    171 ; FUNC-LABEL: {{^}}v4i16_to_i64:
    172 define amdgpu_kernel void @v4i16_to_i64(i64 addrspace(1)* %out, <4 x i16> addrspace(1)* %in) nounwind {
    173   %load = load <4 x i16>, <4 x i16> addrspace(1)* %in, align 4
    174   %add.v4i16 = add <4 x i16> %load, <i16 4, i16 4, i16 4, i16 4>
    175   %bc = bitcast <4 x i16> %add.v4i16 to i64
    176   %add.bitcast = add i64 %bc, 1
    177   store i64 %add.bitcast, i64 addrspace(1)* %out
    178   ret void
    179 }
    180 
    181 ; FUNC-LABEL: {{^}}v4f16_to_i64:
    182 define amdgpu_kernel void @v4f16_to_i64(i64 addrspace(1)* %out, <4 x half> addrspace(1)* %in) nounwind {
    183   %load = load <4 x half>, <4 x half> addrspace(1)* %in, align 4
    184   %add.v4half = fadd <4 x half> %load, <half 4.0, half 4.0, half 4.0, half 4.0>
    185   %bc = bitcast <4 x half> %add.v4half to i64
    186   %add.bitcast = add i64 %bc, 1
    187   store i64 %add.bitcast, i64 addrspace(1)* %out
    188   ret void
    189 }
    190 
    191 ; FUNC-LABEL: {{^}}bitcast_i64_to_v4i16:
    192 define amdgpu_kernel void @bitcast_i64_to_v4i16(<4 x i16> addrspace(1)* %out, i64 addrspace(1)* %in) {
    193   %val = load i64, i64 addrspace(1)* %in, align 8
    194   %add = add i64 %val, 4
    195   %bc = bitcast i64 %add to <4 x i16>
    196   %add.v4i16 = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
    197   store <4 x i16> %add.v4i16, <4 x i16> addrspace(1)* %out, align 8
    198   ret void
    199 }
    200 
    201 ; FUNC-LABEL: {{^}}bitcast_i64_to_v4f16:
    202 define amdgpu_kernel void @bitcast_i64_to_v4f16(<4 x half> addrspace(1)* %out, i64 addrspace(1)* %in) {
    203   %val = load i64, i64 addrspace(1)* %in, align 8
    204   %add = add i64 %val, 4
    205   %bc = bitcast i64 %add to <4 x half>
    206   %add.v4i16 = fadd <4 x half> %bc, <half 1.0, half 2.0, half 4.0, half 8.0>
    207   store <4 x half> %add.v4i16, <4 x half> addrspace(1)* %out, align 8
    208   ret void
    209 }
    210 
    211 ; FUNC-LABEL: {{^}}v4i16_to_v2f32:
    212 define amdgpu_kernel void @v4i16_to_v2f32(<2 x float> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) nounwind {
    213   %load = load <4 x i16>, <4 x i16> addrspace(1)* %in, align 4
    214   %add.v4i16 = add <4 x i16> %load, <i16 4, i16 4, i16 4, i16 4>
    215   %bc = bitcast <4 x i16> %add.v4i16 to <2 x float>
    216   %fadd.bitcast = fadd <2 x float> %bc, <float 1.0, float 1.0>
    217   store <2 x float> %fadd.bitcast, <2 x float> addrspace(1)* %out
    218   ret void
    219 }
    220 
    221 ; FUNC-LABEL: {{^}}v4f16_to_v2f32:
    222 define amdgpu_kernel void @v4f16_to_v2f32(<2 x float> addrspace(1)* %out, <4 x half> addrspace(1)* %in) nounwind {
    223   %load = load <4 x half>, <4 x half> addrspace(1)* %in, align 4
    224   %add.v4half = fadd <4 x half> %load, <half 4.0, half 4.0, half 4.0, half 4.0>
    225   %bc = bitcast <4 x half> %add.v4half to <2 x float>
    226   %fadd.bitcast = fadd <2 x float> %bc, <float 1.0, float 1.0>
    227   store <2 x float> %fadd.bitcast, <2 x float> addrspace(1)* %out
    228   ret void
    229 }
    230 
    231 ; FUNC-LABEL: {{^}}v2f32_to_v4i16:
    232 define amdgpu_kernel void @v2f32_to_v4i16(<4 x i16> addrspace(1)* %out, <2 x float> addrspace(1)* %in) nounwind {
    233   %load = load <2 x float>, <2 x float> addrspace(1)* %in, align 4
    234   %add.v2f32 = fadd <2 x float> %load, <float 2.0, float 4.0>
    235   %bc = bitcast <2 x float> %add.v2f32 to <4 x i16>
    236   %add.bitcast = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
    237   store <4 x i16> %add.bitcast, <4 x i16> addrspace(1)* %out
    238   ret void
    239 }
    240 
    241 ; FUNC-LABEL: {{^}}v2f32_to_v4f16:
    242 define amdgpu_kernel void @v2f32_to_v4f16(<4 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) nounwind {
    243   %load = load <2 x float>, <2 x float> addrspace(1)* %in, align 4
    244   %add.v2f32 = fadd <2 x float> %load, <float 2.0, float 4.0>
    245   %bc = bitcast <2 x float> %add.v2f32 to <4 x half>
    246   %add.bitcast = fadd <4 x half> %bc, <half 1.0, half 2.0, half 4.0, half 8.0>
    247   store <4 x half> %add.bitcast, <4 x half> addrspace(1)* %out
    248   ret void
    249 }
    250 
    251 ; FUNC-LABEL: {{^}}v4i16_to_v2i32:
    252 define amdgpu_kernel void @v4i16_to_v2i32(<2 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) nounwind {
    253   %load = load <4 x i16>, <4 x i16> addrspace(1)* %in, align 4
    254   %add.v4i16 = add <4 x i16> %load, <i16 4, i16 4, i16 4, i16 4>
    255   %bc = bitcast <4 x i16> %add.v4i16 to <2 x i32>
    256   %add.bitcast = add <2 x i32> %bc, <i32 1, i32 1>
    257   store <2 x i32> %add.bitcast, <2 x i32> addrspace(1)* %out
    258   ret void
    259 }
    260 
    261 ; FUNC-LABEL: {{^}}v4f16_to_v2i32:
    262 define amdgpu_kernel void @v4f16_to_v2i32(<2 x i32> addrspace(1)* %out, <4 x half> addrspace(1)* %in) nounwind {
    263   %load = load <4 x half>, <4 x half> addrspace(1)* %in, align 4
    264   %add.v4half = fadd <4 x half> %load, <half 4.0, half 4.0, half 4.0, half 4.0>
    265   %bc = bitcast <4 x half> %add.v4half to <2 x i32>
    266   %add.bitcast = add <2 x i32> %bc, <i32 1, i32 1>
    267   store <2 x i32> %add.bitcast, <2 x i32> addrspace(1)* %out
    268   ret void
    269 }
    270 
    271 ; FUNC-LABEL: {{^}}v2i32_to_v4i16:
    272 define amdgpu_kernel void @v2i32_to_v4i16(<4 x i16> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
    273   %load = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 4
    274   %add.v2i32 = add <2 x i32> %load, <i32 2, i32 4>
    275   %bc = bitcast <2 x i32> %add.v2i32 to <4 x i16>
    276   %add.bitcast = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
    277   store <4 x i16> %add.bitcast, <4 x i16> addrspace(1)* %out
    278   ret void
    279 }
    280 
    281 ; FUNC-LABEL: {{^}}v2i32_to_v4f16:
    282 define amdgpu_kernel void @v2i32_to_v4f16(<4 x half> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
    283   %load = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 4
    284   %add.v2i32 = add <2 x i32> %load, <i32 2, i32 4>
    285   %bc = bitcast <2 x i32> %add.v2i32 to <4 x half>
    286   %add.bitcast = fadd <4 x half> %bc, <half 1.0, half 2.0, half 4.0, half 8.0>
    287   store <4 x half> %add.bitcast, <4 x half> addrspace(1)* %out
    288   ret void
    289 }
    290