1 ; Test various representations of pack-like operations. 2 ; 3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s 4 5 ; One way of writing a <4 x i32> -> <8 x i16> pack. 6 define <8 x i16> @f1(<4 x i32> %val0, <4 x i32> %val1) { 7 ; CHECK-LABEL: f1: 8 ; CHECK: vpkf %v24, %v24, %v26 9 ; CHECK: br %r14 10 %elem0 = extractelement <4 x i32> %val0, i32 0 11 %elem1 = extractelement <4 x i32> %val0, i32 1 12 %elem2 = extractelement <4 x i32> %val0, i32 2 13 %elem3 = extractelement <4 x i32> %val0, i32 3 14 %elem4 = extractelement <4 x i32> %val1, i32 0 15 %elem5 = extractelement <4 x i32> %val1, i32 1 16 %elem6 = extractelement <4 x i32> %val1, i32 2 17 %elem7 = extractelement <4 x i32> %val1, i32 3 18 %hboth0 = bitcast i32 %elem0 to <2 x i16> 19 %hboth1 = bitcast i32 %elem1 to <2 x i16> 20 %hboth2 = bitcast i32 %elem2 to <2 x i16> 21 %hboth3 = bitcast i32 %elem3 to <2 x i16> 22 %hboth4 = bitcast i32 %elem4 to <2 x i16> 23 %hboth5 = bitcast i32 %elem5 to <2 x i16> 24 %hboth6 = bitcast i32 %elem6 to <2 x i16> 25 %hboth7 = bitcast i32 %elem7 to <2 x i16> 26 %hlow0 = shufflevector <2 x i16> %hboth0, <2 x i16> %hboth1, 27 <2 x i32> <i32 1, i32 3> 28 %hlow1 = shufflevector <2 x i16> %hboth2, <2 x i16> %hboth3, 29 <2 x i32> <i32 1, i32 3> 30 %hlow2 = shufflevector <2 x i16> %hboth4, <2 x i16> %hboth5, 31 <2 x i32> <i32 1, i32 3> 32 %hlow3 = shufflevector <2 x i16> %hboth6, <2 x i16> %hboth7, 33 <2 x i32> <i32 1, i32 3> 34 %join0 = shufflevector <2 x i16> %hlow0, <2 x i16> %hlow1, 35 <4 x i32> <i32 0, i32 1, i32 2, i32 3> 36 %join1 = shufflevector <2 x i16> %hlow2, <2 x i16> %hlow3, 37 <4 x i32> <i32 0, i32 1, i32 2, i32 3> 38 %ret = shufflevector <4 x i16> %join0, <4 x i16> %join1, 39 <8 x i32> <i32 0, i32 1, i32 2, i32 3, 40 i32 4, i32 5, i32 6, i32 7> 41 ret <8 x i16> %ret 42 } 43 44 ; A different way of writing a <4 x i32> -> <8 x i16> pack. 45 define <8 x i16> @f2(<4 x i32> %val0, <4 x i32> %val1) { 46 ; CHECK-LABEL: f2: 47 ; CHECK: vpkf %v24, %v24, %v26 48 ; CHECK: br %r14 49 %elem0 = extractelement <4 x i32> %val0, i32 0 50 %elem1 = extractelement <4 x i32> %val0, i32 1 51 %elem2 = extractelement <4 x i32> %val0, i32 2 52 %elem3 = extractelement <4 x i32> %val0, i32 3 53 %elem4 = extractelement <4 x i32> %val1, i32 0 54 %elem5 = extractelement <4 x i32> %val1, i32 1 55 %elem6 = extractelement <4 x i32> %val1, i32 2 56 %elem7 = extractelement <4 x i32> %val1, i32 3 57 %wvec0 = insertelement <4 x i32> undef, i32 %elem0, i32 0 58 %wvec1 = insertelement <4 x i32> undef, i32 %elem1, i32 0 59 %wvec2 = insertelement <4 x i32> undef, i32 %elem2, i32 0 60 %wvec3 = insertelement <4 x i32> undef, i32 %elem3, i32 0 61 %wvec4 = insertelement <4 x i32> undef, i32 %elem4, i32 0 62 %wvec5 = insertelement <4 x i32> undef, i32 %elem5, i32 0 63 %wvec6 = insertelement <4 x i32> undef, i32 %elem6, i32 0 64 %wvec7 = insertelement <4 x i32> undef, i32 %elem7, i32 0 65 %hvec0 = bitcast <4 x i32> %wvec0 to <8 x i16> 66 %hvec1 = bitcast <4 x i32> %wvec1 to <8 x i16> 67 %hvec2 = bitcast <4 x i32> %wvec2 to <8 x i16> 68 %hvec3 = bitcast <4 x i32> %wvec3 to <8 x i16> 69 %hvec4 = bitcast <4 x i32> %wvec4 to <8 x i16> 70 %hvec5 = bitcast <4 x i32> %wvec5 to <8 x i16> 71 %hvec6 = bitcast <4 x i32> %wvec6 to <8 x i16> 72 %hvec7 = bitcast <4 x i32> %wvec7 to <8 x i16> 73 %hlow0 = shufflevector <8 x i16> %hvec0, <8 x i16> %hvec1, 74 <8 x i32> <i32 1, i32 9, i32 undef, i32 undef, 75 i32 undef, i32 undef, i32 undef, i32 undef> 76 %hlow1 = shufflevector <8 x i16> %hvec2, <8 x i16> %hvec3, 77 <8 x i32> <i32 1, i32 9, i32 undef, i32 undef, 78 i32 undef, i32 undef, i32 undef, i32 undef> 79 %hlow2 = shufflevector <8 x i16> %hvec4, <8 x i16> %hvec5, 80 <8 x i32> <i32 1, i32 9, i32 undef, i32 undef, 81 i32 undef, i32 undef, i32 undef, i32 undef> 82 %hlow3 = shufflevector <8 x i16> %hvec6, <8 x i16> %hvec7, 83 <8 x i32> <i32 1, i32 9, i32 undef, i32 undef, 84 i32 undef, i32 undef, i32 undef, i32 undef> 85 %join0 = shufflevector <8 x i16> %hlow0, <8 x i16> %hlow1, 86 <8 x i32> <i32 0, i32 1, i32 8, i32 9, 87 i32 undef, i32 undef, i32 undef, i32 undef> 88 %join1 = shufflevector <8 x i16> %hlow2, <8 x i16> %hlow3, 89 <8 x i32> <i32 0, i32 1, i32 8, i32 9, 90 i32 undef, i32 undef, i32 undef, i32 undef> 91 %ret = shufflevector <8 x i16> %join0, <8 x i16> %join1, 92 <8 x i32> <i32 0, i32 1, i32 2, i32 3, 93 i32 8, i32 9, i32 10, i32 11> 94 ret <8 x i16> %ret 95 } 96 97 ; A direct pack operation. 98 define <8 x i16> @f3(<4 x i32> %val0, <4 x i32> %val1) { 99 ; CHECK-LABEL: f3: 100 ; CHECK: vpkf %v24, %v24, %v26 101 ; CHECK: br %r14 102 %bitcast0 = bitcast <4 x i32> %val0 to <8 x i16> 103 %bitcast1 = bitcast <4 x i32> %val1 to <8 x i16> 104 %ret = shufflevector <8 x i16> %bitcast0, <8 x i16> %bitcast1, 105 <8 x i32> <i32 1, i32 3, i32 5, i32 7, 106 i32 9, i32 11, i32 13, i32 15> 107 ret <8 x i16> %ret 108 } 109 110 ; One way of writing a <4 x i32> -> <16 x i8> pack. It doesn't matter 111 ; whether the first pack is VPKF or VPKH since the even bytes of the 112 ; result are discarded. 113 define <16 x i8> @f4(<4 x i32> %val0, <4 x i32> %val1, 114 <4 x i32> %val2, <4 x i32> %val3) { 115 ; CHECK-LABEL: f4: 116 ; CHECK-DAG: vpk{{[hf]}} [[REG1:%v[0-9]+]], %v24, %v26 117 ; CHECK-DAG: vpk{{[hf]}} [[REG2:%v[0-9]+]], %v28, %v30 118 ; CHECK: vpkh %v24, [[REG1]], [[REG2]] 119 ; CHECK: br %r14 120 %bitcast0 = bitcast <4 x i32> %val0 to <8 x i16> 121 %bitcast1 = bitcast <4 x i32> %val1 to <8 x i16> 122 %bitcast2 = bitcast <4 x i32> %val2 to <8 x i16> 123 %bitcast3 = bitcast <4 x i32> %val3 to <8 x i16> 124 %join0 = shufflevector <8 x i16> %bitcast0, <8 x i16> %bitcast1, 125 <8 x i32> <i32 1, i32 3, i32 5, i32 7, 126 i32 9, i32 11, i32 13, i32 15> 127 %join1 = shufflevector <8 x i16> %bitcast2, <8 x i16> %bitcast3, 128 <8 x i32> <i32 1, i32 3, i32 5, i32 7, 129 i32 9, i32 11, i32 13, i32 15> 130 %bitcast4 = bitcast <8 x i16> %join0 to <16 x i8> 131 %bitcast5 = bitcast <8 x i16> %join1 to <16 x i8> 132 %ret = shufflevector <16 x i8> %bitcast4, <16 x i8> %bitcast5, 133 <16 x i32> <i32 1, i32 3, i32 5, i32 7, 134 i32 9, i32 11, i32 13, i32 15, 135 i32 17, i32 19, i32 21, i32 23, 136 i32 25, i32 27, i32 29, i32 31> 137 ret <16 x i8> %ret 138 } 139 140 ; Check the same operation, but with elements being extracted from the result. 141 define void @f5(<4 x i32> %val0, <4 x i32> %val1, 142 <4 x i32> %val2, <4 x i32> %val3, 143 i8 *%base) { 144 ; CHECK-LABEL: f5: 145 ; CHECK-DAG: vsteb %v24, 0(%r2), 11 146 ; CHECK-DAG: vsteb %v26, 1(%r2), 15 147 ; CHECK-DAG: vsteb %v28, 2(%r2), 3 148 ; CHECK-DAG: vsteb %v30, 3(%r2), 7 149 ; CHECK: br %r14 150 %bitcast0 = bitcast <4 x i32> %val0 to <8 x i16> 151 %bitcast1 = bitcast <4 x i32> %val1 to <8 x i16> 152 %bitcast2 = bitcast <4 x i32> %val2 to <8 x i16> 153 %bitcast3 = bitcast <4 x i32> %val3 to <8 x i16> 154 %join0 = shufflevector <8 x i16> %bitcast0, <8 x i16> %bitcast1, 155 <8 x i32> <i32 1, i32 3, i32 5, i32 7, 156 i32 9, i32 11, i32 13, i32 15> 157 %join1 = shufflevector <8 x i16> %bitcast2, <8 x i16> %bitcast3, 158 <8 x i32> <i32 1, i32 3, i32 5, i32 7, 159 i32 9, i32 11, i32 13, i32 15> 160 %bitcast4 = bitcast <8 x i16> %join0 to <16 x i8> 161 %bitcast5 = bitcast <8 x i16> %join1 to <16 x i8> 162 %vec = shufflevector <16 x i8> %bitcast4, <16 x i8> %bitcast5, 163 <16 x i32> <i32 1, i32 3, i32 5, i32 7, 164 i32 9, i32 11, i32 13, i32 15, 165 i32 17, i32 19, i32 21, i32 23, 166 i32 25, i32 27, i32 29, i32 31> 167 168 %ptr0 = getelementptr i8, i8 *%base, i64 0 169 %ptr1 = getelementptr i8, i8 *%base, i64 1 170 %ptr2 = getelementptr i8, i8 *%base, i64 2 171 %ptr3 = getelementptr i8, i8 *%base, i64 3 172 173 %byte0 = extractelement <16 x i8> %vec, i32 2 174 %byte1 = extractelement <16 x i8> %vec, i32 7 175 %byte2 = extractelement <16 x i8> %vec, i32 8 176 %byte3 = extractelement <16 x i8> %vec, i32 13 177 178 store i8 %byte0, i8 *%ptr0 179 store i8 %byte1, i8 *%ptr1 180 store i8 %byte2, i8 *%ptr2 181 store i8 %byte3, i8 *%ptr3 182 183 ret void 184 } 185 186 ; A different way of writing a <4 x i32> -> <16 x i8> pack. 187 define <16 x i8> @f6(<4 x i32> %val0, <4 x i32> %val1, 188 <4 x i32> %val2, <4 x i32> %val3) { 189 ; CHECK-LABEL: f6: 190 ; CHECK-DAG: vpk{{[hf]}} [[REG1:%v[0-9]+]], %v24, %v26 191 ; CHECK-DAG: vpk{{[hf]}} [[REG2:%v[0-9]+]], %v28, %v30 192 ; CHECK: vpkh %v24, [[REG1]], [[REG2]] 193 ; CHECK: br %r14 194 %elem0 = extractelement <4 x i32> %val0, i32 0 195 %elem1 = extractelement <4 x i32> %val0, i32 1 196 %elem2 = extractelement <4 x i32> %val0, i32 2 197 %elem3 = extractelement <4 x i32> %val0, i32 3 198 %elem4 = extractelement <4 x i32> %val1, i32 0 199 %elem5 = extractelement <4 x i32> %val1, i32 1 200 %elem6 = extractelement <4 x i32> %val1, i32 2 201 %elem7 = extractelement <4 x i32> %val1, i32 3 202 %elem8 = extractelement <4 x i32> %val2, i32 0 203 %elem9 = extractelement <4 x i32> %val2, i32 1 204 %elem10 = extractelement <4 x i32> %val2, i32 2 205 %elem11 = extractelement <4 x i32> %val2, i32 3 206 %elem12 = extractelement <4 x i32> %val3, i32 0 207 %elem13 = extractelement <4 x i32> %val3, i32 1 208 %elem14 = extractelement <4 x i32> %val3, i32 2 209 %elem15 = extractelement <4 x i32> %val3, i32 3 210 %bitcast0 = bitcast i32 %elem0 to <2 x i16> 211 %bitcast1 = bitcast i32 %elem1 to <2 x i16> 212 %bitcast2 = bitcast i32 %elem2 to <2 x i16> 213 %bitcast3 = bitcast i32 %elem3 to <2 x i16> 214 %bitcast4 = bitcast i32 %elem4 to <2 x i16> 215 %bitcast5 = bitcast i32 %elem5 to <2 x i16> 216 %bitcast6 = bitcast i32 %elem6 to <2 x i16> 217 %bitcast7 = bitcast i32 %elem7 to <2 x i16> 218 %bitcast8 = bitcast i32 %elem8 to <2 x i16> 219 %bitcast9 = bitcast i32 %elem9 to <2 x i16> 220 %bitcast10 = bitcast i32 %elem10 to <2 x i16> 221 %bitcast11 = bitcast i32 %elem11 to <2 x i16> 222 %bitcast12 = bitcast i32 %elem12 to <2 x i16> 223 %bitcast13 = bitcast i32 %elem13 to <2 x i16> 224 %bitcast14 = bitcast i32 %elem14 to <2 x i16> 225 %bitcast15 = bitcast i32 %elem15 to <2 x i16> 226 %low0 = shufflevector <2 x i16> %bitcast0, <2 x i16> %bitcast1, 227 <2 x i32> <i32 1, i32 3> 228 %low1 = shufflevector <2 x i16> %bitcast2, <2 x i16> %bitcast3, 229 <2 x i32> <i32 1, i32 3> 230 %low2 = shufflevector <2 x i16> %bitcast4, <2 x i16> %bitcast5, 231 <2 x i32> <i32 1, i32 3> 232 %low3 = shufflevector <2 x i16> %bitcast6, <2 x i16> %bitcast7, 233 <2 x i32> <i32 1, i32 3> 234 %low4 = shufflevector <2 x i16> %bitcast8, <2 x i16> %bitcast9, 235 <2 x i32> <i32 1, i32 3> 236 %low5 = shufflevector <2 x i16> %bitcast10, <2 x i16> %bitcast11, 237 <2 x i32> <i32 1, i32 3> 238 %low6 = shufflevector <2 x i16> %bitcast12, <2 x i16> %bitcast13, 239 <2 x i32> <i32 1, i32 3> 240 %low7 = shufflevector <2 x i16> %bitcast14, <2 x i16> %bitcast15, 241 <2 x i32> <i32 1, i32 3> 242 %bytes0 = bitcast <2 x i16> %low0 to <4 x i8> 243 %bytes1 = bitcast <2 x i16> %low1 to <4 x i8> 244 %bytes2 = bitcast <2 x i16> %low2 to <4 x i8> 245 %bytes3 = bitcast <2 x i16> %low3 to <4 x i8> 246 %bytes4 = bitcast <2 x i16> %low4 to <4 x i8> 247 %bytes5 = bitcast <2 x i16> %low5 to <4 x i8> 248 %bytes6 = bitcast <2 x i16> %low6 to <4 x i8> 249 %bytes7 = bitcast <2 x i16> %low7 to <4 x i8> 250 %blow0 = shufflevector <4 x i8> %bytes0, <4 x i8> %bytes1, 251 <4 x i32> <i32 1, i32 3, i32 5, i32 7> 252 %blow1 = shufflevector <4 x i8> %bytes2, <4 x i8> %bytes3, 253 <4 x i32> <i32 1, i32 3, i32 5, i32 7> 254 %blow2 = shufflevector <4 x i8> %bytes4, <4 x i8> %bytes5, 255 <4 x i32> <i32 1, i32 3, i32 5, i32 7> 256 %blow3 = shufflevector <4 x i8> %bytes6, <4 x i8> %bytes7, 257 <4 x i32> <i32 1, i32 3, i32 5, i32 7> 258 %join0 = shufflevector <4 x i8> %blow0, <4 x i8> %blow1, 259 <8 x i32> <i32 0, i32 1, i32 2, i32 3, 260 i32 4, i32 5, i32 6, i32 7> 261 %join1 = shufflevector <4 x i8> %blow2, <4 x i8> %blow3, 262 <8 x i32> <i32 0, i32 1, i32 2, i32 3, 263 i32 4, i32 5, i32 6, i32 7> 264 %ret = shufflevector <8 x i8> %join0, <8 x i8> %join1, 265 <16 x i32> <i32 0, i32 1, i32 2, i32 3, 266 i32 4, i32 5, i32 6, i32 7, 267 i32 8, i32 9, i32 10, i32 11, 268 i32 12, i32 13, i32 14, i32 15> 269 ret <16 x i8> %ret 270 } 271 272 ; One way of writing a <2 x i64> -> <16 x i8> pack. 273 define <16 x i8> @f7(<2 x i64> %val0, <2 x i64> %val1, 274 <2 x i64> %val2, <2 x i64> %val3, 275 <2 x i64> %val4, <2 x i64> %val5, 276 <2 x i64> %val6, <2 x i64> %val7) { 277 ; CHECK-LABEL: f7: 278 ; CHECK-DAG: vpk{{[hfg]}} [[REG1:%v[0-9]+]], %v24, %v26 279 ; CHECK-DAG: vpk{{[hfg]}} [[REG2:%v[0-9]+]], %v28, %v30 280 ; CHECK-DAG: vpk{{[hfg]}} [[REG3:%v[0-9]+]], %v25, %v27 281 ; CHECK-DAG: vpk{{[hfg]}} [[REG4:%v[0-9]+]], %v29, %v31 282 ; CHECK-DAG: vpk{{[hf]}} [[REG5:%v[0-9]+]], [[REG1]], [[REG2]] 283 ; CHECK-DAG: vpk{{[hf]}} [[REG6:%v[0-9]+]], [[REG3]], [[REG4]] 284 ; CHECK: vpkh %v24, [[REG5]], [[REG6]] 285 ; CHECK: br %r14 286 %elem0 = extractelement <2 x i64> %val0, i32 0 287 %elem1 = extractelement <2 x i64> %val0, i32 1 288 %elem2 = extractelement <2 x i64> %val1, i32 0 289 %elem3 = extractelement <2 x i64> %val1, i32 1 290 %elem4 = extractelement <2 x i64> %val2, i32 0 291 %elem5 = extractelement <2 x i64> %val2, i32 1 292 %elem6 = extractelement <2 x i64> %val3, i32 0 293 %elem7 = extractelement <2 x i64> %val3, i32 1 294 %elem8 = extractelement <2 x i64> %val4, i32 0 295 %elem9 = extractelement <2 x i64> %val4, i32 1 296 %elem10 = extractelement <2 x i64> %val5, i32 0 297 %elem11 = extractelement <2 x i64> %val5, i32 1 298 %elem12 = extractelement <2 x i64> %val6, i32 0 299 %elem13 = extractelement <2 x i64> %val6, i32 1 300 %elem14 = extractelement <2 x i64> %val7, i32 0 301 %elem15 = extractelement <2 x i64> %val7, i32 1 302 %bitcast0 = bitcast i64 %elem0 to <2 x i32> 303 %bitcast1 = bitcast i64 %elem1 to <2 x i32> 304 %bitcast2 = bitcast i64 %elem2 to <2 x i32> 305 %bitcast3 = bitcast i64 %elem3 to <2 x i32> 306 %bitcast4 = bitcast i64 %elem4 to <2 x i32> 307 %bitcast5 = bitcast i64 %elem5 to <2 x i32> 308 %bitcast6 = bitcast i64 %elem6 to <2 x i32> 309 %bitcast7 = bitcast i64 %elem7 to <2 x i32> 310 %bitcast8 = bitcast i64 %elem8 to <2 x i32> 311 %bitcast9 = bitcast i64 %elem9 to <2 x i32> 312 %bitcast10 = bitcast i64 %elem10 to <2 x i32> 313 %bitcast11 = bitcast i64 %elem11 to <2 x i32> 314 %bitcast12 = bitcast i64 %elem12 to <2 x i32> 315 %bitcast13 = bitcast i64 %elem13 to <2 x i32> 316 %bitcast14 = bitcast i64 %elem14 to <2 x i32> 317 %bitcast15 = bitcast i64 %elem15 to <2 x i32> 318 %low0 = shufflevector <2 x i32> %bitcast0, <2 x i32> %bitcast1, 319 <2 x i32> <i32 1, i32 3> 320 %low1 = shufflevector <2 x i32> %bitcast2, <2 x i32> %bitcast3, 321 <2 x i32> <i32 1, i32 3> 322 %low2 = shufflevector <2 x i32> %bitcast4, <2 x i32> %bitcast5, 323 <2 x i32> <i32 1, i32 3> 324 %low3 = shufflevector <2 x i32> %bitcast6, <2 x i32> %bitcast7, 325 <2 x i32> <i32 1, i32 3> 326 %low4 = shufflevector <2 x i32> %bitcast8, <2 x i32> %bitcast9, 327 <2 x i32> <i32 1, i32 3> 328 %low5 = shufflevector <2 x i32> %bitcast10, <2 x i32> %bitcast11, 329 <2 x i32> <i32 1, i32 3> 330 %low6 = shufflevector <2 x i32> %bitcast12, <2 x i32> %bitcast13, 331 <2 x i32> <i32 1, i32 3> 332 %low7 = shufflevector <2 x i32> %bitcast14, <2 x i32> %bitcast15, 333 <2 x i32> <i32 1, i32 3> 334 %half0 = bitcast <2 x i32> %low0 to <4 x i16> 335 %half1 = bitcast <2 x i32> %low1 to <4 x i16> 336 %half2 = bitcast <2 x i32> %low2 to <4 x i16> 337 %half3 = bitcast <2 x i32> %low3 to <4 x i16> 338 %half4 = bitcast <2 x i32> %low4 to <4 x i16> 339 %half5 = bitcast <2 x i32> %low5 to <4 x i16> 340 %half6 = bitcast <2 x i32> %low6 to <4 x i16> 341 %half7 = bitcast <2 x i32> %low7 to <4 x i16> 342 %hlow0 = shufflevector <4 x i16> %half0, <4 x i16> %half1, 343 <4 x i32> <i32 1, i32 3, i32 5, i32 7> 344 %hlow1 = shufflevector <4 x i16> %half2, <4 x i16> %half3, 345 <4 x i32> <i32 1, i32 3, i32 5, i32 7> 346 %hlow2 = shufflevector <4 x i16> %half4, <4 x i16> %half5, 347 <4 x i32> <i32 1, i32 3, i32 5, i32 7> 348 %hlow3 = shufflevector <4 x i16> %half6, <4 x i16> %half7, 349 <4 x i32> <i32 1, i32 3, i32 5, i32 7> 350 %bytes0 = bitcast <4 x i16> %hlow0 to <8 x i8> 351 %bytes1 = bitcast <4 x i16> %hlow1 to <8 x i8> 352 %bytes2 = bitcast <4 x i16> %hlow2 to <8 x i8> 353 %bytes3 = bitcast <4 x i16> %hlow3 to <8 x i8> 354 %join0 = shufflevector <8 x i8> %bytes0, <8 x i8> %bytes1, 355 <8 x i32> <i32 1, i32 3, i32 5, i32 7, 356 i32 9, i32 11, i32 13, i32 15> 357 %join1 = shufflevector <8 x i8> %bytes2, <8 x i8> %bytes3, 358 <8 x i32> <i32 1, i32 3, i32 5, i32 7, 359 i32 9, i32 11, i32 13, i32 15> 360 %ret = shufflevector <8 x i8> %join0, <8 x i8> %join1, 361 <16 x i32> <i32 0, i32 1, i32 2, i32 3, 362 i32 4, i32 5, i32 6, i32 7, 363 i32 8, i32 9, i32 10, i32 11, 364 i32 12, i32 13, i32 14, i32 15> 365 ret <16 x i8> %ret 366 } 367 368 ; Test a <2 x i64> -> <4 x f32> pack in which only individual elements are 369 ; needed. 370 define float @f8(i64 %scalar0, i64 %scalar1, i64 %scalar2, i64 %scalar3) { 371 ; CHECK-LABEL: f8: 372 ; CHECK-NOT: vperm 373 ; CHECK-NOT: vpk 374 ; CHECK-NOT: vmrh 375 ; CHECK: aebr {{%f[0-7]}}, 376 ; CHECK: aebr {{%f[0-7]}}, 377 ; CHECK: meebr %f0, 378 ; CHECK: br %r14 379 %vec0 = insertelement <2 x i64> undef, i64 %scalar0, i32 0 380 %vec1 = insertelement <2 x i64> undef, i64 %scalar1, i32 0 381 %vec2 = insertelement <2 x i64> undef, i64 %scalar2, i32 0 382 %vec3 = insertelement <2 x i64> undef, i64 %scalar3, i32 0 383 %join0 = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, 384 <2 x i32> <i32 0, i32 2> 385 %join1 = shufflevector <2 x i64> %vec2, <2 x i64> %vec3, 386 <2 x i32> <i32 0, i32 2> 387 %bitcast0 = bitcast <2 x i64> %join0 to <4 x float> 388 %bitcast1 = bitcast <2 x i64> %join1 to <4 x float> 389 %pack = shufflevector <4 x float> %bitcast0, <4 x float> %bitcast1, 390 <4 x i32> <i32 1, i32 3, i32 5, i32 7> 391 %elt0 = extractelement <4 x float> %pack, i32 0 392 %elt1 = extractelement <4 x float> %pack, i32 1 393 %elt2 = extractelement <4 x float> %pack, i32 2 394 %elt3 = extractelement <4 x float> %pack, i32 3 395 %add0 = fadd float %elt0, %elt2 396 %add1 = fadd float %elt1, %elt3 397 %ret = fmul float %add0, %add1 398 ret float %ret 399 } 400 401 ; Test a <2 x f64> -> <4 x i32> pack in which only individual elements are 402 ; needed. 403 define i32 @f9(double %scalar0, double %scalar1, double %scalar2, 404 double %scalar3) { 405 ; CHECK-LABEL: f9: 406 ; CHECK-NOT: vperm 407 ; CHECK-NOT: vpk 408 ; CHECK-NOT: vmrh 409 ; CHECK: ar {{%r[0-5]}}, 410 ; CHECK: ar {{%r[0-5]}}, 411 ; CHECK: or %r2, 412 ; CHECK: br %r14 413 %vec0 = insertelement <2 x double> undef, double %scalar0, i32 0 414 %vec1 = insertelement <2 x double> undef, double %scalar1, i32 0 415 %vec2 = insertelement <2 x double> undef, double %scalar2, i32 0 416 %vec3 = insertelement <2 x double> undef, double %scalar3, i32 0 417 %join0 = shufflevector <2 x double> %vec0, <2 x double> %vec1, 418 <2 x i32> <i32 0, i32 2> 419 %join1 = shufflevector <2 x double> %vec2, <2 x double> %vec3, 420 <2 x i32> <i32 0, i32 2> 421 %bitcast0 = bitcast <2 x double> %join0 to <4 x i32> 422 %bitcast1 = bitcast <2 x double> %join1 to <4 x i32> 423 %pack = shufflevector <4 x i32> %bitcast0, <4 x i32> %bitcast1, 424 <4 x i32> <i32 1, i32 3, i32 5, i32 7> 425 %elt0 = extractelement <4 x i32> %pack, i32 0 426 %elt1 = extractelement <4 x i32> %pack, i32 1 427 %elt2 = extractelement <4 x i32> %pack, i32 2 428 %elt3 = extractelement <4 x i32> %pack, i32 3 429 %add0 = add i32 %elt0, %elt2 430 %add1 = add i32 %elt1, %elt3 431 %ret = or i32 %add0, %add1 432 ret i32 %ret 433 } 434