1 ; RUN: opt < %s -mtriple=aarch64--linux-gnu -cost-model -analyze | FileCheck %s --check-prefix=COST 2 ; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE 3 4 ; COST-LABEL: trn1.v8i8 5 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 6 ; CODE-LABEL: trn1.v8i8 7 ; CODE: trn1 v0.8b, v0.8b, v1.8b 8 define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) { 9 %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 10 ret <8 x i8> %tmp0 11 } 12 13 ; COST-LABEL: trn2.v8i8 14 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 15 ; CODE-LABEL: trn2.v8i8 16 ; CODE: trn2 v0.8b, v0.8b, v1.8b 17 define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) { 18 %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 19 ret <8 x i8> %tmp0 20 } 21 22 ; COST-LABEL: trn1.v16i8 23 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 24 ; CODE-LABEL: trn1.v16i8 25 ; CODE: trn1 v0.16b, v0.16b, v1.16b 26 define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) { 27 %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 28 ret <16 x i8> %tmp0 29 } 30 31 ; COST-LABEL: trn2.v16i8 32 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 33 ; CODE-LABEL: trn2.v16i8 34 ; CODE: trn2 v0.16b, v0.16b, v1.16b 35 define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) { 36 %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 37 ret <16 x i8> %tmp0 38 } 39 40 ; COST-LABEL: trn1.v4i16 41 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 42 ; CODE-LABEL: trn1.v4i16 43 ; CODE: trn1 v0.4h, v0.4h, v1.4h 44 define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) { 45 %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 46 ret <4 x i16> %tmp0 47 } 48 49 ; COST-LABEL: trn2.v4i16 50 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 51 ; CODE-LABEL: trn2.v4i16 52 ; CODE: trn2 v0.4h, v0.4h, v1.4h 53 define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) { 54 %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 55 ret <4 x i16> %tmp0 56 } 57 58 ; COST-LABEL: trn1.v8i16 59 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 60 ; CODE-LABEL: trn1.v8i16 61 ; CODE: trn1 v0.8h, v0.8h, v1.8h 62 define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) { 63 %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 64 ret <8 x i16> %tmp0 65 } 66 67 ; COST-LABEL: trn2.v8i16 68 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 69 ; CODE-LABEL: trn2.v8i16 70 ; CODE: trn2 v0.8h, v0.8h, v1.8h 71 define <8 x i16> @trn2.v8i16(<8 x i16> %v0, <8 x i16> %v1) { 72 %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 73 ret <8 x i16> %tmp0 74 } 75 76 ; COST-LABEL: trn1.v2i32 77 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 2> 78 ; CODE-LABEL: trn1.v2i32 79 ; CODE: zip1 v0.2s, v0.2s, v1.2s 80 define <2 x i32> @trn1.v2i32(<2 x i32> %v0, <2 x i32> %v1) { 81 %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 2> 82 ret <2 x i32> %tmp0 83 } 84 85 ; COST-LABEL: trn2.v2i32 86 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 1, i32 3> 87 ; CODE-LABEL: trn2.v2i32 88 ; CODE: zip2 v0.2s, v0.2s, v1.2s 89 define <2 x i32> @trn2.v2i32(<2 x i32> %v0, <2 x i32> %v1) { 90 %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 1, i32 3> 91 ret <2 x i32> %tmp0 92 } 93 94 ; COST-LABEL: trn1.v4i32 95 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 96 ; CODE-LABEL: trn1.v4i32 97 ; CODE: trn1 v0.4s, v0.4s, v1.4s 98 define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) { 99 %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 100 ret <4 x i32> %tmp0 101 } 102 103 ; COST-LABEL: trn2.v4i32 104 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 105 ; CODE-LABEL: trn2.v4i32 106 ; CODE: trn2 v0.4s, v0.4s, v1.4s 107 define <4 x i32> @trn2.v4i32(<4 x i32> %v0, <4 x i32> %v1) { 108 %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 109 ret <4 x i32> %tmp0 110 } 111 112 ; COST-LABEL: trn1.v2i64 113 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 0, i32 2> 114 ; CODE-LABEL: trn1.v2i64 115 ; CODE: zip1 v0.2d, v0.2d, v1.2d 116 define <2 x i64> @trn1.v2i64(<2 x i64> %v0, <2 x i64> %v1) { 117 %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 0, i32 2> 118 ret <2 x i64> %tmp0 119 } 120 121 ; COST-LABEL: trn2.v2i64 122 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 1, i32 3> 123 ; CODE-LABEL: trn2.v2i64 124 ; CODE: zip2 v0.2d, v0.2d, v1.2d 125 define <2 x i64> @trn2.v2i64(<2 x i64> %v0, <2 x i64> %v1) { 126 %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 1, i32 3> 127 ret <2 x i64> %tmp0 128 } 129 130 ; COST-LABEL: trn1.v2f32 131 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 0, i32 2> 132 ; CODE-LABEL: trn1.v2f32 133 ; CODE: zip1 v0.2s, v0.2s, v1.2s 134 define <2 x float> @trn1.v2f32(<2 x float> %v0, <2 x float> %v1) { 135 %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 0, i32 2> 136 ret <2 x float> %tmp0 137 } 138 139 ; COST-LABEL: trn2.v2f32 140 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 1, i32 3> 141 ; CODE-LABEL: trn2.v2f32 142 ; CODE: zip2 v0.2s, v0.2s, v1.2s 143 define <2 x float> @trn2.v2f32(<2 x float> %v0, <2 x float> %v1) { 144 %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 1, i32 3> 145 ret <2 x float> %tmp0 146 } 147 148 ; COST-LABEL: trn1.v4f32 149 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 150 ; CODE-LABEL: trn1.v4f32 151 ; CODE: trn1 v0.4s, v0.4s, v1.4s 152 define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) { 153 %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 154 ret <4 x float> %tmp0 155 } 156 157 ; COST-LABEL: trn2.v4f32 158 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 159 ; CODE-LABEL: trn2.v4f32 160 ; CODE: trn2 v0.4s, v0.4s, v1.4s 161 define <4 x float> @trn2.v4f32(<4 x float> %v0, <4 x float> %v1) { 162 %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 163 ret <4 x float> %tmp0 164 } 165 166 ; COST-LABEL: trn1.v2f64 167 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 0, i32 2> 168 ; CODE-LABEL: trn1.v2f64 169 ; CODE: zip1 v0.2d, v0.2d, v1.2d 170 define <2 x double> @trn1.v2f64(<2 x double> %v0, <2 x double> %v1) { 171 %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 0, i32 2> 172 ret <2 x double> %tmp0 173 } 174 175 ; COST-LABEL: trn2.v2f64 176 ; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 1, i32 3> 177 ; CODE-LABEL: trn2.v2f64 178 ; CODE: zip2 v0.2d, v0.2d, v1.2d 179 define <2 x double> @trn2.v2f64(<2 x double> %v0, <2 x double> %v1) { 180 %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 1, i32 3> 181 ret <2 x double> %tmp0 182 } 183