Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc < %s -march=r600 -mcpu=rv710 | FileCheck %s
      2 ; RUN: llc < %s -march=r600 -mcpu=rv730 | FileCheck %s
      3 ; RUN: llc < %s -march=r600 -mcpu=rv770 | FileCheck %s
      4 ; RUN: llc < %s -march=r600 -mcpu=cedar | FileCheck %s
      5 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
      6 ; RUN: llc < %s -march=r600 -mcpu=sumo | FileCheck %s
      7 ; RUN: llc < %s -march=r600 -mcpu=juniper | FileCheck %s
      8 ; RUN: llc < %s -march=r600 -mcpu=cypress | FileCheck %s
      9 ; RUN: llc < %s -march=r600 -mcpu=barts | FileCheck %s
     10 ; RUN: llc < %s -march=r600 -mcpu=turks | FileCheck %s
     11 ; RUN: llc < %s -march=r600 -mcpu=caicos | FileCheck %s
     12 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s
     13 
     14 ; r700+ supports 16 fetches in a clause
     15 ; CHECK: {{^}}fetch_limits_r700:
     16 ; CHECK: Fetch clause
     17 ; CHECK: Fetch clause
     18 
     19 define amdgpu_ps void @fetch_limits_r700() {
     20 entry:
     21   %0 = load <4 x float>, <4 x float> addrspace(8)* null
     22   %1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
     23   %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
     24   %3 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
     25   %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
     26   %5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
     27   %6 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
     28   %7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
     29   %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
     30   %9 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
     31   %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
     32   %11 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
     33   %12 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
     34   %13 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
     35   %14 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
     36   %15 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
     37   %16 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
     38   %17 = shufflevector <4 x float> %0, <4 x float> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     39   %18 = call <4 x float> @llvm.r600.tex(<4 x float> %17, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
     40   %19 = shufflevector <4 x float> %1, <4 x float> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     41   %20 = call <4 x float> @llvm.r600.tex(<4 x float> %19, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
     42   %21 = shufflevector <4 x float> %2, <4 x float> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     43   %22 = call <4 x float> @llvm.r600.tex(<4 x float> %21, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
     44   %23 = shufflevector <4 x float> %3, <4 x float> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     45   %24 = call <4 x float> @llvm.r600.tex(<4 x float> %23, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
     46   %25 = shufflevector <4 x float> %4, <4 x float> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     47   %26 = call <4 x float> @llvm.r600.tex(<4 x float> %25, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
     48   %27 = shufflevector <4 x float> %5, <4 x float> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     49   %28 = call <4 x float> @llvm.r600.tex(<4 x float> %27, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
     50   %29 = shufflevector <4 x float> %6, <4 x float> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     51   %30 = call <4 x float> @llvm.r600.tex(<4 x float> %29, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
     52   %31 = shufflevector <4 x float> %7, <4 x float> %7, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     53   %32 = call <4 x float> @llvm.r600.tex(<4 x float> %31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
     54   %33 = shufflevector <4 x float> %8, <4 x float> %8, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     55   %34 = call <4 x float> @llvm.r600.tex(<4 x float> %33, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
     56   %35 = shufflevector <4 x float> %9, <4 x float> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     57   %36 = call <4 x float> @llvm.r600.tex(<4 x float> %35, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
     58   %37 = shufflevector <4 x float> %10, <4 x float> %10, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     59   %38 = call <4 x float> @llvm.r600.tex(<4 x float> %37, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
     60   %39 = shufflevector <4 x float> %11, <4 x float> %11, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     61   %40 = call <4 x float> @llvm.r600.tex(<4 x float> %39, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
     62   %41 = shufflevector <4 x float> %12, <4 x float> %12, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     63   %42 = call <4 x float> @llvm.r600.tex(<4 x float> %41, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
     64   %43 = shufflevector <4 x float> %13, <4 x float> %13, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     65   %44 = call <4 x float> @llvm.r600.tex(<4 x float> %43, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
     66   %45 = shufflevector <4 x float> %14, <4 x float> %14, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     67   %46 = call <4 x float> @llvm.r600.tex(<4 x float> %45, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
     68   %47 = shufflevector <4 x float> %15, <4 x float> %15, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     69   %48 = call <4 x float> @llvm.r600.tex(<4 x float> %47, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
     70   %49 = shufflevector <4 x float> %16, <4 x float> %16, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     71   %50 = call <4 x float> @llvm.r600.tex(<4 x float> %49, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
     72   %a = fadd <4 x float> %18, %20
     73   %b = fadd <4 x float> %22, %24
     74   %c = fadd <4 x float> %26, %28
     75   %d = fadd <4 x float> %30, %32
     76   %e = fadd <4 x float> %34, %36
     77   %f = fadd <4 x float> %38, %40
     78   %g = fadd <4 x float> %42, %44
     79   %h = fadd <4 x float> %46, %48
     80   %i = fadd <4 x float> %50, %a
     81   %bc = fadd <4 x float> %b, %c
     82   %de = fadd <4 x float> %d, %e
     83   %fg = fadd <4 x float> %f, %g
     84   %hi = fadd <4 x float> %h, %i
     85   %bcde = fadd <4 x float> %bc, %de
     86   %fghi = fadd <4 x float> %fg, %hi
     87   %bcdefghi = fadd <4 x float> %bcde, %fghi
     88   call void @llvm.R600.store.swizzle(<4 x float> %bcdefghi, i32 0, i32 1)
     89   ret void
     90 }
     91 
     92 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
     93 
     94 ; Function Attrs: readnone
     95 declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
     96 
     97 attributes #0 = { readnone }
     98