1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3 4 declare i32 @llvm.bswap.i32(i32) nounwind readnone 5 declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) nounwind readnone 6 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone 7 declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>) nounwind readnone 8 declare i64 @llvm.bswap.i64(i64) nounwind readnone 9 declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) nounwind readnone 10 declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) nounwind readnone 11 12 ; FUNC-LABEL: @test_bswap_i32 13 ; SI: buffer_load_dword [[VAL:v[0-9]+]] 14 ; SI-DAG: v_alignbit_b32 [[TMP0:v[0-9]+]], [[VAL]], [[VAL]], 8 15 ; SI-DAG: v_alignbit_b32 [[TMP1:v[0-9]+]], [[VAL]], [[VAL]], 24 16 ; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0xff00ff 17 ; SI: v_bfi_b32 [[RESULT:v[0-9]+]], [[K]], [[TMP1]], [[TMP0]] 18 ; SI: buffer_store_dword [[RESULT]] 19 ; SI: s_endpgm 20 define void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 21 %val = load i32, i32 addrspace(1)* %in, align 4 22 %bswap = call i32 @llvm.bswap.i32(i32 %val) nounwind readnone 23 store i32 %bswap, i32 addrspace(1)* %out, align 4 24 ret void 25 } 26 27 ; FUNC-LABEL: @test_bswap_v2i32 28 ; SI-DAG: v_alignbit_b32 29 ; SI-DAG: v_alignbit_b32 30 ; SI-DAG: v_bfi_b32 31 ; SI-DAG: v_alignbit_b32 32 ; SI-DAG: v_alignbit_b32 33 ; SI-DAG: v_bfi_b32 34 ; SI: s_endpgm 35 define void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind { 36 %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8 37 %bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %val) nounwind readnone 38 store <2 x i32> %bswap, <2 x i32> addrspace(1)* %out, align 8 39 ret void 40 } 41 42 ; FUNC-LABEL: @test_bswap_v4i32 43 ; SI-DAG: v_alignbit_b32 44 ; SI-DAG: v_alignbit_b32 45 ; SI-DAG: v_bfi_b32 46 ; SI-DAG: v_alignbit_b32 47 ; SI-DAG: v_alignbit_b32 48 ; SI-DAG: v_bfi_b32 49 ; SI-DAG: v_alignbit_b32 50 ; SI-DAG: v_alignbit_b32 51 ; SI-DAG: v_bfi_b32 52 ; SI-DAG: v_alignbit_b32 53 ; SI-DAG: v_alignbit_b32 54 ; SI-DAG: v_bfi_b32 55 ; SI: s_endpgm 56 define void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) nounwind { 57 %val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16 58 %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) nounwind readnone 59 store <4 x i32> %bswap, <4 x i32> addrspace(1)* %out, align 16 60 ret void 61 } 62 63 ; FUNC-LABEL: @test_bswap_v8i32 64 ; SI-DAG: v_alignbit_b32 65 ; SI-DAG: v_alignbit_b32 66 ; SI-DAG: v_bfi_b32 67 ; SI-DAG: v_alignbit_b32 68 ; SI-DAG: v_alignbit_b32 69 ; SI-DAG: v_bfi_b32 70 ; SI-DAG: v_alignbit_b32 71 ; SI-DAG: v_alignbit_b32 72 ; SI-DAG: v_bfi_b32 73 ; SI-DAG: v_alignbit_b32 74 ; SI-DAG: v_alignbit_b32 75 ; SI-DAG: v_bfi_b32 76 ; SI-DAG: v_alignbit_b32 77 ; SI-DAG: v_alignbit_b32 78 ; SI-DAG: v_bfi_b32 79 ; SI-DAG: v_alignbit_b32 80 ; SI-DAG: v_alignbit_b32 81 ; SI-DAG: v_bfi_b32 82 ; SI-DAG: v_alignbit_b32 83 ; SI-DAG: v_alignbit_b32 84 ; SI-DAG: v_bfi_b32 85 ; SI-DAG: v_alignbit_b32 86 ; SI-DAG: v_alignbit_b32 87 ; SI-DAG: v_bfi_b32 88 ; SI: s_endpgm 89 define void @test_bswap_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) nounwind { 90 %val = load <8 x i32>, <8 x i32> addrspace(1)* %in, align 32 91 %bswap = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %val) nounwind readnone 92 store <8 x i32> %bswap, <8 x i32> addrspace(1)* %out, align 32 93 ret void 94 } 95 96 define void @test_bswap_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind { 97 %val = load i64, i64 addrspace(1)* %in, align 8 98 %bswap = call i64 @llvm.bswap.i64(i64 %val) nounwind readnone 99 store i64 %bswap, i64 addrspace(1)* %out, align 8 100 ret void 101 } 102 103 define void @test_bswap_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) nounwind { 104 %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16 105 %bswap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val) nounwind readnone 106 store <2 x i64> %bswap, <2 x i64> addrspace(1)* %out, align 16 107 ret void 108 } 109 110 define void @test_bswap_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) nounwind { 111 %val = load <4 x i64>, <4 x i64> addrspace(1)* %in, align 32 112 %bswap = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %val) nounwind readnone 113 store <4 x i64> %bswap, <4 x i64> addrspace(1)* %out, align 32 114 ret void 115 } 116