Home | History | Annotate | Download | only in AMDGPU
      1 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
      2 
      3 ; CHECK: {{^}}main1:
      4 ; CHECK: MOV * T{{[0-9]+\.[XYZW], KC0}}
      5 define void @main1() {
      6 main_body:
      7   %0 = load <4 x float>, <4 x float> addrspace(8)* null
      8   %1 = extractelement <4 x float> %0, i32 0
      9   %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
     10   %3 = extractelement <4 x float> %2, i32 0
     11   %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
     12   %5 = extractelement <4 x float> %4, i32 0
     13   %6 = fcmp ogt float %1, 0.000000e+00
     14   %7 = select i1 %6, float %3, float %5
     15   %8 = load <4 x float>, <4 x float> addrspace(8)* null
     16   %9 = extractelement <4 x float> %8, i32 1
     17   %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
     18   %11 = extractelement <4 x float> %10, i32 1
     19   %12 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
     20   %13 = extractelement <4 x float> %12, i32 1
     21   %14 = fcmp ogt float %9, 0.000000e+00
     22   %15 = select i1 %14, float %11, float %13
     23   %16 = load <4 x float>, <4 x float> addrspace(8)* null
     24   %17 = extractelement <4 x float> %16, i32 2
     25   %18 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
     26   %19 = extractelement <4 x float> %18, i32 2
     27   %20 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
     28   %21 = extractelement <4 x float> %20, i32 2
     29   %22 = fcmp ogt float %17, 0.000000e+00
     30   %23 = select i1 %22, float %19, float %21
     31   %24 = load <4 x float>, <4 x float> addrspace(8)* null
     32   %25 = extractelement <4 x float> %24, i32 3
     33   %26 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
     34   %27 = extractelement <4 x float> %26, i32 3
     35   %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
     36   %29 = extractelement <4 x float> %28, i32 3
     37   %30 = fcmp ogt float %25, 0.000000e+00
     38   %31 = select i1 %30, float %27, float %29
     39   %32 = call float @llvm.AMDGPU.clamp.f32(float %7, float 0.000000e+00, float 1.000000e+00)
     40   %33 = call float @llvm.AMDGPU.clamp.f32(float %15, float 0.000000e+00, float 1.000000e+00)
     41   %34 = call float @llvm.AMDGPU.clamp.f32(float %23, float 0.000000e+00, float 1.000000e+00)
     42   %35 = call float @llvm.AMDGPU.clamp.f32(float %31, float 0.000000e+00, float 1.000000e+00)
     43   %36 = insertelement <4 x float> undef, float %32, i32 0
     44   %37 = insertelement <4 x float> %36, float %33, i32 1
     45   %38 = insertelement <4 x float> %37, float %34, i32 2
     46   %39 = insertelement <4 x float> %38, float %35, i32 3
     47   call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0)
     48   ret void
     49 }
     50 
     51 ; CHECK: {{^}}main2:
     52 ; CHECK-NOT: MOV
     53 define void @main2() {
     54 main_body:
     55   %0 = load <4 x float>, <4 x float> addrspace(8)* null
     56   %1 = extractelement <4 x float> %0, i32 0
     57   %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
     58   %3 = extractelement <4 x float> %2, i32 0
     59   %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
     60   %5 = extractelement <4 x float> %4, i32 1
     61   %6 = fcmp ogt float %1, 0.000000e+00
     62   %7 = select i1 %6, float %3, float %5
     63   %8 = load <4 x float>, <4 x float> addrspace(8)* null
     64   %9 = extractelement <4 x float> %8, i32 1
     65   %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
     66   %11 = extractelement <4 x float> %10, i32 0
     67   %12 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
     68   %13 = extractelement <4 x float> %12, i32 1
     69   %14 = fcmp ogt float %9, 0.000000e+00
     70   %15 = select i1 %14, float %11, float %13
     71   %16 = load <4 x float>, <4 x float> addrspace(8)* null
     72   %17 = extractelement <4 x float> %16, i32 2
     73   %18 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
     74   %19 = extractelement <4 x float> %18, i32 3
     75   %20 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
     76   %21 = extractelement <4 x float> %20, i32 2
     77   %22 = fcmp ogt float %17, 0.000000e+00
     78   %23 = select i1 %22, float %19, float %21
     79   %24 = load <4 x float>, <4 x float> addrspace(8)* null
     80   %25 = extractelement <4 x float> %24, i32 3
     81   %26 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
     82   %27 = extractelement <4 x float> %26, i32 3
     83   %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
     84   %29 = extractelement <4 x float> %28, i32 2
     85   %30 = fcmp ogt float %25, 0.000000e+00
     86   %31 = select i1 %30, float %27, float %29
     87   %32 = call float @llvm.AMDGPU.clamp.f32(float %7, float 0.000000e+00, float 1.000000e+00)
     88   %33 = call float @llvm.AMDGPU.clamp.f32(float %15, float 0.000000e+00, float 1.000000e+00)
     89   %34 = call float @llvm.AMDGPU.clamp.f32(float %23, float 0.000000e+00, float 1.000000e+00)
     90   %35 = call float @llvm.AMDGPU.clamp.f32(float %31, float 0.000000e+00, float 1.000000e+00)
     91   %36 = insertelement <4 x float> undef, float %32, i32 0
     92   %37 = insertelement <4 x float> %36, float %33, i32 1
     93   %38 = insertelement <4 x float> %37, float %34, i32 2
     94   %39 = insertelement <4 x float> %38, float %35, i32 3
     95   call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0)
     96   ret void
     97 }
     98 
     99 declare float @llvm.AMDGPU.clamp.f32(float, float, float) readnone
    100 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
    101