1 ; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s 2 3 ; SI-LABEL: @no_reorder_v2f64_global_load_store 4 ; SI: BUFFER_LOAD_DWORDX2 5 ; SI: BUFFER_LOAD_DWORDX2 6 ; SI: BUFFER_LOAD_DWORDX2 7 ; SI: BUFFER_LOAD_DWORDX2 8 ; SI: BUFFER_STORE_DWORDX2 9 ; SI: BUFFER_STORE_DWORDX2 10 ; SI: BUFFER_STORE_DWORDX2 11 ; SI: BUFFER_STORE_DWORDX2 12 ; SI: S_ENDPGM 13 define void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocapture %x, <2 x double> addrspace(1)* nocapture %y) nounwind { 14 %tmp1 = load <2 x double> addrspace(1)* %x, align 16 15 %tmp4 = load <2 x double> addrspace(1)* %y, align 16 16 store <2 x double> %tmp4, <2 x double> addrspace(1)* %x, align 16 17 store <2 x double> %tmp1, <2 x double> addrspace(1)* %y, align 16 18 ret void 19 } 20 21 ; SI-LABEL: @no_reorder_scalarized_v2f64_local_load_store 22 ; SI: DS_READ_B64 23 ; SI: DS_READ_B64 24 ; SI: DS_WRITE_B64 25 ; SI: DS_WRITE_B64 26 ; SI: S_ENDPGM 27 define void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace(3)* nocapture %x, <2 x double> addrspace(3)* nocapture %y) nounwind { 28 %tmp1 = load <2 x double> addrspace(3)* %x, align 16 29 %tmp4 = load <2 x double> addrspace(3)* %y, align 16 30 store <2 x double> %tmp4, <2 x double> addrspace(3)* %x, align 16 31 store <2 x double> %tmp1, <2 x double> addrspace(3)* %y, align 16 32 ret void 33 } 34 35 ; SI-LABEL: @no_reorder_split_v8i32_global_load_store 36 ; SI: BUFFER_LOAD_DWORD 37 ; SI: BUFFER_LOAD_DWORD 38 ; SI: BUFFER_LOAD_DWORD 39 ; SI: BUFFER_LOAD_DWORD 40 41 ; SI: BUFFER_LOAD_DWORD 42 ; SI: BUFFER_LOAD_DWORD 43 ; SI: BUFFER_LOAD_DWORD 44 ; SI: BUFFER_LOAD_DWORD 45 46 ; SI: BUFFER_LOAD_DWORD 47 ; SI: BUFFER_LOAD_DWORD 48 ; SI: BUFFER_LOAD_DWORD 49 ; SI: BUFFER_LOAD_DWORD 50 51 ; SI: BUFFER_LOAD_DWORD 52 ; SI: BUFFER_LOAD_DWORD 53 ; SI: BUFFER_LOAD_DWORD 54 ; SI: BUFFER_LOAD_DWORD 55 56 57 ; SI: BUFFER_STORE_DWORD 58 ; SI: BUFFER_STORE_DWORD 59 ; SI: BUFFER_STORE_DWORD 60 ; SI: BUFFER_STORE_DWORD 61 62 ; SI: BUFFER_STORE_DWORD 63 ; SI: BUFFER_STORE_DWORD 64 ; SI: BUFFER_STORE_DWORD 65 ; SI: BUFFER_STORE_DWORD 66 67 ; SI: BUFFER_STORE_DWORD 68 ; SI: BUFFER_STORE_DWORD 69 ; SI: BUFFER_STORE_DWORD 70 ; SI: BUFFER_STORE_DWORD 71 72 ; SI: BUFFER_STORE_DWORD 73 ; SI: BUFFER_STORE_DWORD 74 ; SI: BUFFER_STORE_DWORD 75 ; SI: BUFFER_STORE_DWORD 76 ; SI: S_ENDPGM 77 define void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* nocapture %x, <8 x i32> addrspace(1)* nocapture %y) nounwind { 78 %tmp1 = load <8 x i32> addrspace(1)* %x, align 32 79 %tmp4 = load <8 x i32> addrspace(1)* %y, align 32 80 store <8 x i32> %tmp4, <8 x i32> addrspace(1)* %x, align 32 81 store <8 x i32> %tmp1, <8 x i32> addrspace(1)* %y, align 32 82 ret void 83 } 84 85 ; SI-LABEL: @no_reorder_extload_64 86 ; SI: DS_READ_B64 87 ; SI: DS_READ_B64 88 ; SI: DS_WRITE_B64 89 ; SI-NOT: DS_READ 90 ; SI: DS_WRITE_B64 91 ; SI: S_ENDPGM 92 define void @no_reorder_extload_64(<2 x i32> addrspace(3)* nocapture %x, <2 x i32> addrspace(3)* nocapture %y) nounwind { 93 %tmp1 = load <2 x i32> addrspace(3)* %x, align 8 94 %tmp4 = load <2 x i32> addrspace(3)* %y, align 8 95 %tmp1ext = zext <2 x i32> %tmp1 to <2 x i64> 96 %tmp4ext = zext <2 x i32> %tmp4 to <2 x i64> 97 %tmp7 = add <2 x i64> %tmp1ext, <i64 1, i64 1> 98 %tmp9 = add <2 x i64> %tmp4ext, <i64 1, i64 1> 99 %trunctmp9 = trunc <2 x i64> %tmp9 to <2 x i32> 100 %trunctmp7 = trunc <2 x i64> %tmp7 to <2 x i32> 101 store <2 x i32> %trunctmp9, <2 x i32> addrspace(3)* %x, align 8 102 store <2 x i32> %trunctmp7, <2 x i32> addrspace(3)* %y, align 8 103 ret void 104 } 105