Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG %s -check-prefix=FUNC
      2 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      3 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      4 
      5 ; mul24 and mad24 are affected
      6 
      7 ; FUNC-LABEL: {{^}}test_mul_v2i32:
      8 ; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
      9 ; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
     10 
     11 ; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
     12 ; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
     13 
     14 define void @test_mul_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
     15   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
     16   %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
     17   %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
     18   %result = mul <2 x i32> %a, %b
     19   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
     20   ret void
     21 }
     22 
     23 ; FUNC-LABEL: {{^}}v_mul_v4i32:
     24 ; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
     25 ; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
     26 ; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
     27 ; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
     28 
     29 ; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
     30 ; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
     31 ; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
     32 ; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
     33 
     34 define void @v_mul_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
     35   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
     36   %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
     37   %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
     38   %result = mul <4 x i32> %a, %b
     39   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
     40   ret void
     41 }
     42 
     43 ; FUNC-LABEL: {{^}}s_trunc_i64_mul_to_i32:
     44 ; SI: s_load_dword
     45 ; SI: s_load_dword
     46 ; SI: s_mul_i32
     47 ; SI: buffer_store_dword
     48 define void @s_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
     49   %mul = mul i64 %b, %a
     50   %trunc = trunc i64 %mul to i32
     51   store i32 %trunc, i32 addrspace(1)* %out, align 8
     52   ret void
     53 }
     54 
     55 ; FUNC-LABEL: {{^}}v_trunc_i64_mul_to_i32:
     56 ; SI: s_load_dword
     57 ; SI: s_load_dword
     58 ; SI: v_mul_lo_i32
     59 ; SI: buffer_store_dword
     60 define void @v_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
     61   %a = load i64, i64 addrspace(1)* %aptr, align 8
     62   %b = load i64, i64 addrspace(1)* %bptr, align 8
     63   %mul = mul i64 %b, %a
     64   %trunc = trunc i64 %mul to i32
     65   store i32 %trunc, i32 addrspace(1)* %out, align 8
     66   ret void
     67 }
     68 
     69 ; This 64-bit multiply should just use MUL_HI and MUL_LO, since the top
     70 ; 32-bits of both arguments are sign bits.
     71 ; FUNC-LABEL: {{^}}mul64_sext_c:
     72 ; EG-DAG: MULLO_INT
     73 ; EG-DAG: MULHI_INT
     74 ; SI-DAG: s_mul_i32
     75 ; SI-DAG: v_mul_hi_i32
     76 define void @mul64_sext_c(i64 addrspace(1)* %out, i32 %in) {
     77 entry:
     78   %0 = sext i32 %in to i64
     79   %1 = mul i64 %0, 80
     80   store i64 %1, i64 addrspace(1)* %out
     81   ret void
     82 }
     83 
     84 ; FUNC-LABEL: {{^}}v_mul64_sext_c:
     85 ; EG-DAG: MULLO_INT
     86 ; EG-DAG: MULHI_INT
     87 ; SI-DAG: v_mul_lo_i32
     88 ; SI-DAG: v_mul_hi_i32
     89 ; SI: s_endpgm
     90 define void @v_mul64_sext_c(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
     91   %val = load i32, i32 addrspace(1)* %in, align 4
     92   %ext = sext i32 %val to i64
     93   %mul = mul i64 %ext, 80
     94   store i64 %mul, i64 addrspace(1)* %out, align 8
     95   ret void
     96 }
     97 
     98 ; FUNC-LABEL: {{^}}v_mul64_sext_inline_imm:
     99 ; SI-DAG: v_mul_lo_i32 v{{[0-9]+}}, 9, v{{[0-9]+}}
    100 ; SI-DAG: v_mul_hi_i32 v{{[0-9]+}}, 9, v{{[0-9]+}}
    101 ; SI: s_endpgm
    102 define void @v_mul64_sext_inline_imm(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
    103   %val = load i32, i32 addrspace(1)* %in, align 4
    104   %ext = sext i32 %val to i64
    105   %mul = mul i64 %ext, 9
    106   store i64 %mul, i64 addrspace(1)* %out, align 8
    107   ret void
    108 }
    109 
    110 ; FUNC-LABEL: {{^}}s_mul_i32:
    111 ; SI: s_load_dword [[SRC0:s[0-9]+]],
    112 ; SI: s_load_dword [[SRC1:s[0-9]+]],
    113 ; SI: s_mul_i32 [[SRESULT:s[0-9]+]], [[SRC0]], [[SRC1]]
    114 ; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
    115 ; SI: buffer_store_dword [[VRESULT]],
    116 ; SI: s_endpgm
    117 define void @s_mul_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
    118   %mul = mul i32 %a, %b
    119   store i32 %mul, i32 addrspace(1)* %out, align 4
    120   ret void
    121 }
    122 
    123 ; FUNC-LABEL: {{^}}v_mul_i32:
    124 ; SI: v_mul_lo_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
    125 define void @v_mul_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
    126   %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
    127   %a = load i32, i32 addrspace(1)* %in
    128   %b = load i32, i32 addrspace(1)* %b_ptr
    129   %result = mul i32 %a, %b
    130   store i32 %result, i32 addrspace(1)* %out
    131   ret void
    132 }
    133 
    134 ; A standard 64-bit multiply.  The expansion should be around 6 instructions.
    135 ; It would be difficult to match the expansion correctly without writing
    136 ; a really complicated list of FileCheck expressions.  I don't want
    137 ; to confuse people who may 'break' this test with a correct optimization,
    138 ; so this test just uses FUNC-LABEL to make sure the compiler does not
    139 ; crash with a 'failed to select' error.
    140 
    141 ; FUNC-LABEL: {{^}}s_mul_i64:
    142 define void @s_mul_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
    143   %mul = mul i64 %a, %b
    144   store i64 %mul, i64 addrspace(1)* %out, align 8
    145   ret void
    146 }
    147 
    148 ; FUNC-LABEL: {{^}}v_mul_i64:
    149 ; SI: v_mul_lo_i32
    150 define void @v_mul_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
    151   %a = load i64, i64 addrspace(1)* %aptr, align 8
    152   %b = load i64, i64 addrspace(1)* %bptr, align 8
    153   %mul = mul i64 %a, %b
    154   store i64 %mul, i64 addrspace(1)* %out, align 8
    155   ret void
    156 }
    157 
    158 ; FUNC-LABEL: {{^}}mul32_in_branch:
    159 ; SI: s_mul_i32
    160 define void @mul32_in_branch(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b, i32 %c) {
    161 entry:
    162   %0 = icmp eq i32 %a, 0
    163   br i1 %0, label %if, label %else
    164 
    165 if:
    166   %1 = load i32, i32 addrspace(1)* %in
    167   br label %endif
    168 
    169 else:
    170   %2 = mul i32 %a, %b
    171   br label %endif
    172 
    173 endif:
    174   %3 = phi i32 [%1, %if], [%2, %else]
    175   store i32 %3, i32 addrspace(1)* %out
    176   ret void
    177 }
    178 
    179 ; FUNC-LABEL: {{^}}mul64_in_branch:
    180 ; SI-DAG: s_mul_i32
    181 ; SI-DAG: v_mul_hi_u32
    182 ; SI: s_endpgm
    183 define void @mul64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
    184 entry:
    185   %0 = icmp eq i64 %a, 0
    186   br i1 %0, label %if, label %else
    187 
    188 if:
    189   %1 = load i64, i64 addrspace(1)* %in
    190   br label %endif
    191 
    192 else:
    193   %2 = mul i64 %a, %b
    194   br label %endif
    195 
    196 endif:
    197   %3 = phi i64 [%1, %if], [%2, %else]
    198   store i64 %3, i64 addrspace(1)* %out
    199   ret void
    200 }
    201