1 ; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s 2 3 ; The main purpose of this test is to find out whether copyPhysReg can deal with 4 ; the memmove-like situation arising in tuples, where an early copy can clobber 5 ; the value needed by a later one if the tuples overlap. 6 7 ; We use dummy inline asm to force LLVM to generate a COPY between the registers 8 ; we want by clobbering all the others. 9 10 define void @test_D1D2_from_D0D1(i8* %addr) #0 { 11 ; CHECK-LABEL: test_D1D2_from_D0D1: 12 ; CHECK: mov.8b v2, v1 13 ; CHECK: mov.8b v1, v0 14 entry: 15 %addr_v8i8 = bitcast i8* %addr to <8 x i8>* 16 %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) 17 %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0 18 %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1 19 tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() 20 tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) 21 22 tail call void asm sideeffect "", "~{v0},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() 23 tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) 24 ret void 25 } 26 27 define void @test_D0D1_from_D1D2(i8* %addr) #0 { 28 ; CHECK-LABEL: test_D0D1_from_D1D2: 29 ; CHECK: mov.8b v0, v1 30 ; CHECK: mov.8b v1, v2 31 entry: 32 %addr_v8i8 = bitcast i8* %addr to <8 x i8>* 33 %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) 34 %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0 35 %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1 36 tail call void asm sideeffect "", "~{v0},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() 37 tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) 38 39 tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() 40 tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) 41 ret void 42 } 43 44 define void @test_D0D1_from_D31D0(i8* %addr) #0 { 45 ; CHECK-LABEL: test_D0D1_from_D31D0: 46 ; CHECK: mov.8b v1, v0 47 ; CHECK: mov.8b v0, v31 48 entry: 49 %addr_v8i8 = bitcast i8* %addr to <8 x i8>* 50 %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) 51 %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0 52 %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1 53 tail call void asm sideeffect "", "~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30}"() 54 tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) 55 56 tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() 57 tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) 58 ret void 59 } 60 61 define void @test_D31D0_from_D0D1(i8* %addr) #0 { 62 ; CHECK-LABEL: test_D31D0_from_D0D1: 63 ; CHECK: mov.8b v31, v0 64 ; CHECK: mov.8b v0, v1 65 entry: 66 %addr_v8i8 = bitcast i8* %addr to <8 x i8>* 67 %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) 68 %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0 69 %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1 70 tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() 71 tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) 72 73 tail call void asm sideeffect "", "~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30}"() 74 tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) 75 ret void 76 } 77 78 define void @test_D2D3D4_from_D0D1D2(i8* %addr) #0 { 79 ; CHECK-LABEL: test_D2D3D4_from_D0D1D2: 80 ; CHECK: mov.8b v4, v2 81 ; CHECK: mov.8b v3, v1 82 ; CHECK: mov.8b v2, v0 83 entry: 84 %addr_v8i8 = bitcast i8* %addr to <8 x i8>* 85 %vec = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) 86 %vec0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 0 87 %vec1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 1 88 %vec2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 2 89 90 tail call void asm sideeffect "", "~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() 91 tail call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, <8 x i8> %vec2, i8* %addr) 92 93 tail call void asm sideeffect "", "~{v0},~{v1},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() 94 tail call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, <8 x i8> %vec2, i8* %addr) 95 ret void 96 } 97 98 define void @test_Q0Q1Q2_from_Q1Q2Q3(i8* %addr) #0 { 99 ; CHECK-LABEL: test_Q0Q1Q2_from_Q1Q2Q3: 100 ; CHECK: mov.16b v0, v1 101 ; CHECK: mov.16b v1, v2 102 ; CHECK: mov.16b v2, v3 103 entry: 104 %addr_v16i8 = bitcast i8* %addr to <16 x i8>* 105 %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* %addr_v16i8) 106 %vec0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 0 107 %vec1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 1 108 %vec2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 2 109 tail call void asm sideeffect "", "~{v0},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() 110 tail call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, i8* %addr) 111 112 tail call void asm sideeffect "", "~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() 113 tail call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, i8* %addr) 114 ret void 115 } 116 117 define void @test_Q1Q2Q3Q4_from_Q30Q31Q0Q1(i8* %addr) #0 { 118 ; CHECK-LABEL: test_Q1Q2Q3Q4_from_Q30Q31Q0Q1: 119 ; CHECK: mov.16b v4, v1 120 ; CHECK: mov.16b v3, v0 121 ; CHECK: mov.16b v2, v31 122 ; CHECK: mov.16b v1, v30 123 %addr_v16i8 = bitcast i8* %addr to <16 x i8>* 124 %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* %addr_v16i8) 125 %vec0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 0 126 %vec1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 1 127 %vec2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 2 128 %vec3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 3 129 130 tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}"() 131 tail call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, i8* %addr) 132 133 tail call void asm sideeffect "", "~{v0},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() 134 tail call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, i8* %addr) 135 ret void 136 } 137 138 declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>*) 139 declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>*) 140 declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>*) 141 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>*) 142 143 declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) 144 declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) 145 declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) 146 declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) 147