Home | History | Annotate | Download | only in SystemZ
      1 ; Test various representations of pack-like operations.
      2 ;
      3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
      4 
      5 ; One way of writing a <4 x i32> -> <8 x i16> pack.
      6 define <8 x i16> @f1(<4 x i32> %val0, <4 x i32> %val1) {
      7 ; CHECK-LABEL: f1:
      8 ; CHECK: vpkf %v24, %v24, %v26
      9 ; CHECK: br %r14
     10   %elem0 = extractelement <4 x i32> %val0, i32 0
     11   %elem1 = extractelement <4 x i32> %val0, i32 1
     12   %elem2 = extractelement <4 x i32> %val0, i32 2
     13   %elem3 = extractelement <4 x i32> %val0, i32 3
     14   %elem4 = extractelement <4 x i32> %val1, i32 0
     15   %elem5 = extractelement <4 x i32> %val1, i32 1
     16   %elem6 = extractelement <4 x i32> %val1, i32 2
     17   %elem7 = extractelement <4 x i32> %val1, i32 3
     18   %hboth0 = bitcast i32 %elem0 to <2 x i16>
     19   %hboth1 = bitcast i32 %elem1 to <2 x i16>
     20   %hboth2 = bitcast i32 %elem2 to <2 x i16>
     21   %hboth3 = bitcast i32 %elem3 to <2 x i16>
     22   %hboth4 = bitcast i32 %elem4 to <2 x i16>
     23   %hboth5 = bitcast i32 %elem5 to <2 x i16>
     24   %hboth6 = bitcast i32 %elem6 to <2 x i16>
     25   %hboth7 = bitcast i32 %elem7 to <2 x i16>
     26   %hlow0 = shufflevector <2 x i16> %hboth0, <2 x i16> %hboth1,
     27                          <2 x i32> <i32 1, i32 3>
     28   %hlow1 = shufflevector <2 x i16> %hboth2, <2 x i16> %hboth3,
     29                          <2 x i32> <i32 1, i32 3>
     30   %hlow2 = shufflevector <2 x i16> %hboth4, <2 x i16> %hboth5,
     31                          <2 x i32> <i32 1, i32 3>
     32   %hlow3 = shufflevector <2 x i16> %hboth6, <2 x i16> %hboth7,
     33                          <2 x i32> <i32 1, i32 3>
     34   %join0 = shufflevector <2 x i16> %hlow0, <2 x i16> %hlow1,
     35                          <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     36   %join1 = shufflevector <2 x i16> %hlow2, <2 x i16> %hlow3,
     37                          <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     38   %ret = shufflevector <4 x i16> %join0, <4 x i16> %join1,
     39                        <8 x i32> <i32 0, i32 1, i32 2, i32 3,
     40                                   i32 4, i32 5, i32 6, i32 7>
     41   ret <8 x i16> %ret
     42 }
     43 
     44 ; A different way of writing a <4 x i32> -> <8 x i16> pack.
     45 define <8 x i16> @f2(<4 x i32> %val0, <4 x i32> %val1) {
     46 ; CHECK-LABEL: f2:
     47 ; CHECK: vpkf %v24, %v24, %v26
     48 ; CHECK: br %r14
     49   %elem0 = extractelement <4 x i32> %val0, i32 0
     50   %elem1 = extractelement <4 x i32> %val0, i32 1
     51   %elem2 = extractelement <4 x i32> %val0, i32 2
     52   %elem3 = extractelement <4 x i32> %val0, i32 3
     53   %elem4 = extractelement <4 x i32> %val1, i32 0
     54   %elem5 = extractelement <4 x i32> %val1, i32 1
     55   %elem6 = extractelement <4 x i32> %val1, i32 2
     56   %elem7 = extractelement <4 x i32> %val1, i32 3
     57   %wvec0 = insertelement <4 x i32> undef, i32 %elem0, i32 0
     58   %wvec1 = insertelement <4 x i32> undef, i32 %elem1, i32 0
     59   %wvec2 = insertelement <4 x i32> undef, i32 %elem2, i32 0
     60   %wvec3 = insertelement <4 x i32> undef, i32 %elem3, i32 0
     61   %wvec4 = insertelement <4 x i32> undef, i32 %elem4, i32 0
     62   %wvec5 = insertelement <4 x i32> undef, i32 %elem5, i32 0
     63   %wvec6 = insertelement <4 x i32> undef, i32 %elem6, i32 0
     64   %wvec7 = insertelement <4 x i32> undef, i32 %elem7, i32 0
     65   %hvec0 = bitcast <4 x i32> %wvec0 to <8 x i16>
     66   %hvec1 = bitcast <4 x i32> %wvec1 to <8 x i16>
     67   %hvec2 = bitcast <4 x i32> %wvec2 to <8 x i16>
     68   %hvec3 = bitcast <4 x i32> %wvec3 to <8 x i16>
     69   %hvec4 = bitcast <4 x i32> %wvec4 to <8 x i16>
     70   %hvec5 = bitcast <4 x i32> %wvec5 to <8 x i16>
     71   %hvec6 = bitcast <4 x i32> %wvec6 to <8 x i16>
     72   %hvec7 = bitcast <4 x i32> %wvec7 to <8 x i16>
     73   %hlow0 = shufflevector <8 x i16> %hvec0, <8 x i16> %hvec1,
     74                          <8 x i32> <i32 1, i32 9, i32 undef, i32 undef,
     75                                     i32 undef, i32 undef, i32 undef, i32 undef>
     76   %hlow1 = shufflevector <8 x i16> %hvec2, <8 x i16> %hvec3,
     77                          <8 x i32> <i32 1, i32 9, i32 undef, i32 undef,
     78                                     i32 undef, i32 undef, i32 undef, i32 undef>
     79   %hlow2 = shufflevector <8 x i16> %hvec4, <8 x i16> %hvec5,
     80                          <8 x i32> <i32 1, i32 9, i32 undef, i32 undef,
     81                                     i32 undef, i32 undef, i32 undef, i32 undef>
     82   %hlow3 = shufflevector <8 x i16> %hvec6, <8 x i16> %hvec7,
     83                          <8 x i32> <i32 1, i32 9, i32 undef, i32 undef,
     84                                     i32 undef, i32 undef, i32 undef, i32 undef>
     85   %join0 = shufflevector <8 x i16> %hlow0, <8 x i16> %hlow1,
     86                          <8 x i32> <i32 0, i32 1, i32 8, i32 9,
     87                                     i32 undef, i32 undef, i32 undef, i32 undef>
     88   %join1 = shufflevector <8 x i16> %hlow2, <8 x i16> %hlow3,
     89                          <8 x i32> <i32 0, i32 1, i32 8, i32 9,
     90                                     i32 undef, i32 undef, i32 undef, i32 undef>
     91   %ret = shufflevector <8 x i16> %join0, <8 x i16> %join1,
     92                        <8 x i32> <i32 0, i32 1, i32 2, i32 3,
     93                                   i32 8, i32 9, i32 10, i32 11>
     94   ret <8 x i16> %ret
     95 }
     96 
     97 ; A direct pack operation.
     98 define <8 x i16> @f3(<4 x i32> %val0, <4 x i32> %val1) {
     99 ; CHECK-LABEL: f3:
    100 ; CHECK: vpkf %v24, %v24, %v26
    101 ; CHECK: br %r14
    102   %bitcast0 = bitcast <4 x i32> %val0 to <8 x i16>
    103   %bitcast1 = bitcast <4 x i32> %val1 to <8 x i16>
    104   %ret = shufflevector <8 x i16> %bitcast0, <8 x i16> %bitcast1,
    105                        <8 x i32> <i32 1, i32 3, i32 5, i32 7,
    106                                   i32 9, i32 11, i32 13, i32 15>
    107   ret <8 x i16> %ret
    108 }
    109 
    110 ; One way of writing a <4 x i32> -> <16 x i8> pack.  It doesn't matter
    111 ; whether the first pack is VPKF or VPKH since the even bytes of the
    112 ; result are discarded.
    113 define <16 x i8> @f4(<4 x i32> %val0, <4 x i32> %val1,
    114                      <4 x i32> %val2, <4 x i32> %val3) {
    115 ; CHECK-LABEL: f4:
    116 ; CHECK-DAG: vpk{{[hf]}} [[REG1:%v[0-9]+]], %v24, %v26
    117 ; CHECK-DAG: vpk{{[hf]}} [[REG2:%v[0-9]+]], %v28, %v30
    118 ; CHECK: vpkh %v24, [[REG1]], [[REG2]]
    119 ; CHECK: br %r14
    120   %bitcast0 = bitcast <4 x i32> %val0 to <8 x i16>
    121   %bitcast1 = bitcast <4 x i32> %val1 to <8 x i16>
    122   %bitcast2 = bitcast <4 x i32> %val2 to <8 x i16>
    123   %bitcast3 = bitcast <4 x i32> %val3 to <8 x i16>
    124   %join0 = shufflevector <8 x i16> %bitcast0, <8 x i16> %bitcast1,
    125                          <8 x i32> <i32 1, i32 3, i32 5, i32 7,
    126                                     i32 9, i32 11, i32 13, i32 15>
    127   %join1 = shufflevector <8 x i16> %bitcast2, <8 x i16> %bitcast3,
    128                          <8 x i32> <i32 1, i32 3, i32 5, i32 7,
    129                                     i32 9, i32 11, i32 13, i32 15>
    130   %bitcast4 = bitcast <8 x i16> %join0 to <16 x i8>
    131   %bitcast5 = bitcast <8 x i16> %join1 to <16 x i8>
    132   %ret = shufflevector <16 x i8> %bitcast4, <16 x i8> %bitcast5,
    133                        <16 x i32> <i32 1, i32 3, i32 5, i32 7,
    134                                    i32 9, i32 11, i32 13, i32 15,
    135                                    i32 17, i32 19, i32 21, i32 23,
    136                                    i32 25, i32 27, i32 29, i32 31>
    137   ret <16 x i8> %ret
    138 }
    139 
    140 ; Check the same operation, but with elements being extracted from the result.
    141 define void @f5(<4 x i32> %val0, <4 x i32> %val1,
    142                 <4 x i32> %val2, <4 x i32> %val3,
    143                 i8 *%base) {
    144 ; CHECK-LABEL: f5:
    145 ; CHECK-DAG: vsteb %v24, 0(%r2), 11
    146 ; CHECK-DAG: vsteb %v26, 1(%r2), 15
    147 ; CHECK-DAG: vsteb %v28, 2(%r2), 3
    148 ; CHECK-DAG: vsteb %v30, 3(%r2), 7
    149 ; CHECK: br %r14
    150   %bitcast0 = bitcast <4 x i32> %val0 to <8 x i16>
    151   %bitcast1 = bitcast <4 x i32> %val1 to <8 x i16>
    152   %bitcast2 = bitcast <4 x i32> %val2 to <8 x i16>
    153   %bitcast3 = bitcast <4 x i32> %val3 to <8 x i16>
    154   %join0 = shufflevector <8 x i16> %bitcast0, <8 x i16> %bitcast1,
    155                          <8 x i32> <i32 1, i32 3, i32 5, i32 7,
    156                                     i32 9, i32 11, i32 13, i32 15>
    157   %join1 = shufflevector <8 x i16> %bitcast2, <8 x i16> %bitcast3,
    158                          <8 x i32> <i32 1, i32 3, i32 5, i32 7,
    159                                     i32 9, i32 11, i32 13, i32 15>
    160   %bitcast4 = bitcast <8 x i16> %join0 to <16 x i8>
    161   %bitcast5 = bitcast <8 x i16> %join1 to <16 x i8>
    162   %vec = shufflevector <16 x i8> %bitcast4, <16 x i8> %bitcast5,
    163                        <16 x i32> <i32 1, i32 3, i32 5, i32 7,
    164                                    i32 9, i32 11, i32 13, i32 15,
    165                                    i32 17, i32 19, i32 21, i32 23,
    166                                    i32 25, i32 27, i32 29, i32 31>
    167 
    168   %ptr0 = getelementptr i8, i8 *%base, i64 0
    169   %ptr1 = getelementptr i8, i8 *%base, i64 1
    170   %ptr2 = getelementptr i8, i8 *%base, i64 2
    171   %ptr3 = getelementptr i8, i8 *%base, i64 3
    172 
    173   %byte0 = extractelement <16 x i8> %vec, i32 2
    174   %byte1 = extractelement <16 x i8> %vec, i32 7
    175   %byte2 = extractelement <16 x i8> %vec, i32 8
    176   %byte3 = extractelement <16 x i8> %vec, i32 13
    177 
    178   store i8 %byte0, i8 *%ptr0
    179   store i8 %byte1, i8 *%ptr1
    180   store i8 %byte2, i8 *%ptr2
    181   store i8 %byte3, i8 *%ptr3
    182 
    183   ret void
    184 }
    185 
    186 ; A different way of writing a <4 x i32> -> <16 x i8> pack.
    187 define <16 x i8> @f6(<4 x i32> %val0, <4 x i32> %val1,
    188                      <4 x i32> %val2, <4 x i32> %val3) {
    189 ; CHECK-LABEL: f6:
    190 ; CHECK-DAG: vpk{{[hf]}} [[REG1:%v[0-9]+]], %v24, %v26
    191 ; CHECK-DAG: vpk{{[hf]}} [[REG2:%v[0-9]+]], %v28, %v30
    192 ; CHECK: vpkh %v24, [[REG1]], [[REG2]]
    193 ; CHECK: br %r14
    194   %elem0 = extractelement <4 x i32> %val0, i32 0
    195   %elem1 = extractelement <4 x i32> %val0, i32 1
    196   %elem2 = extractelement <4 x i32> %val0, i32 2
    197   %elem3 = extractelement <4 x i32> %val0, i32 3
    198   %elem4 = extractelement <4 x i32> %val1, i32 0
    199   %elem5 = extractelement <4 x i32> %val1, i32 1
    200   %elem6 = extractelement <4 x i32> %val1, i32 2
    201   %elem7 = extractelement <4 x i32> %val1, i32 3
    202   %elem8 = extractelement <4 x i32> %val2, i32 0
    203   %elem9 = extractelement <4 x i32> %val2, i32 1
    204   %elem10 = extractelement <4 x i32> %val2, i32 2
    205   %elem11 = extractelement <4 x i32> %val2, i32 3
    206   %elem12 = extractelement <4 x i32> %val3, i32 0
    207   %elem13 = extractelement <4 x i32> %val3, i32 1
    208   %elem14 = extractelement <4 x i32> %val3, i32 2
    209   %elem15 = extractelement <4 x i32> %val3, i32 3
    210   %bitcast0 = bitcast i32 %elem0 to <2 x i16>
    211   %bitcast1 = bitcast i32 %elem1 to <2 x i16>
    212   %bitcast2 = bitcast i32 %elem2 to <2 x i16>
    213   %bitcast3 = bitcast i32 %elem3 to <2 x i16>
    214   %bitcast4 = bitcast i32 %elem4 to <2 x i16>
    215   %bitcast5 = bitcast i32 %elem5 to <2 x i16>
    216   %bitcast6 = bitcast i32 %elem6 to <2 x i16>
    217   %bitcast7 = bitcast i32 %elem7 to <2 x i16>
    218   %bitcast8 = bitcast i32 %elem8 to <2 x i16>
    219   %bitcast9 = bitcast i32 %elem9 to <2 x i16>
    220   %bitcast10 = bitcast i32 %elem10 to <2 x i16>
    221   %bitcast11 = bitcast i32 %elem11 to <2 x i16>
    222   %bitcast12 = bitcast i32 %elem12 to <2 x i16>
    223   %bitcast13 = bitcast i32 %elem13 to <2 x i16>
    224   %bitcast14 = bitcast i32 %elem14 to <2 x i16>
    225   %bitcast15 = bitcast i32 %elem15 to <2 x i16>
    226   %low0 = shufflevector <2 x i16> %bitcast0, <2 x i16> %bitcast1,
    227                         <2 x i32> <i32 1, i32 3>
    228   %low1 = shufflevector <2 x i16> %bitcast2, <2 x i16> %bitcast3,
    229                         <2 x i32> <i32 1, i32 3>
    230   %low2 = shufflevector <2 x i16> %bitcast4, <2 x i16> %bitcast5,
    231                         <2 x i32> <i32 1, i32 3>
    232   %low3 = shufflevector <2 x i16> %bitcast6, <2 x i16> %bitcast7,
    233                         <2 x i32> <i32 1, i32 3>
    234   %low4 = shufflevector <2 x i16> %bitcast8, <2 x i16> %bitcast9,
    235                         <2 x i32> <i32 1, i32 3>
    236   %low5 = shufflevector <2 x i16> %bitcast10, <2 x i16> %bitcast11,
    237                         <2 x i32> <i32 1, i32 3>
    238   %low6 = shufflevector <2 x i16> %bitcast12, <2 x i16> %bitcast13,
    239                         <2 x i32> <i32 1, i32 3>
    240   %low7 = shufflevector <2 x i16> %bitcast14, <2 x i16> %bitcast15,
    241                         <2 x i32> <i32 1, i32 3>
    242   %bytes0 = bitcast <2 x i16> %low0 to <4 x i8>
    243   %bytes1 = bitcast <2 x i16> %low1 to <4 x i8>
    244   %bytes2 = bitcast <2 x i16> %low2 to <4 x i8>
    245   %bytes3 = bitcast <2 x i16> %low3 to <4 x i8>
    246   %bytes4 = bitcast <2 x i16> %low4 to <4 x i8>
    247   %bytes5 = bitcast <2 x i16> %low5 to <4 x i8>
    248   %bytes6 = bitcast <2 x i16> %low6 to <4 x i8>
    249   %bytes7 = bitcast <2 x i16> %low7 to <4 x i8>
    250   %blow0 = shufflevector <4 x i8> %bytes0, <4 x i8> %bytes1,
    251                          <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    252   %blow1 = shufflevector <4 x i8> %bytes2, <4 x i8> %bytes3,
    253                          <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    254   %blow2 = shufflevector <4 x i8> %bytes4, <4 x i8> %bytes5,
    255                          <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    256   %blow3 = shufflevector <4 x i8> %bytes6, <4 x i8> %bytes7,
    257                          <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    258   %join0 = shufflevector <4 x i8> %blow0, <4 x i8> %blow1,
    259                          <8 x i32> <i32 0, i32 1, i32 2, i32 3,
    260                                     i32 4, i32 5, i32 6, i32 7>
    261   %join1 = shufflevector <4 x i8> %blow2, <4 x i8> %blow3,
    262                          <8 x i32> <i32 0, i32 1, i32 2, i32 3,
    263                                     i32 4, i32 5, i32 6, i32 7>
    264   %ret = shufflevector <8 x i8> %join0, <8 x i8> %join1,
    265                        <16 x i32> <i32 0, i32 1, i32 2, i32 3,
    266                                    i32 4, i32 5, i32 6, i32 7,
    267                                    i32 8, i32 9, i32 10, i32 11,
    268                                    i32 12, i32 13, i32 14, i32 15>
    269   ret <16 x i8> %ret
    270 }
    271 
    272 ; One way of writing a <2 x i64> -> <16 x i8> pack.
    273 define <16 x i8> @f7(<2 x i64> %val0, <2 x i64> %val1,
    274                      <2 x i64> %val2, <2 x i64> %val3,
    275                      <2 x i64> %val4, <2 x i64> %val5,
    276                      <2 x i64> %val6, <2 x i64> %val7) {
    277 ; CHECK-LABEL: f7:
    278 ; CHECK-DAG: vpk{{[hfg]}} [[REG1:%v[0-9]+]], %v24, %v26
    279 ; CHECK-DAG: vpk{{[hfg]}} [[REG2:%v[0-9]+]], %v28, %v30
    280 ; CHECK-DAG: vpk{{[hfg]}} [[REG3:%v[0-9]+]], %v25, %v27
    281 ; CHECK-DAG: vpk{{[hfg]}} [[REG4:%v[0-9]+]], %v29, %v31
    282 ; CHECK-DAG: vpk{{[hf]}} [[REG5:%v[0-9]+]], [[REG1]], [[REG2]]
    283 ; CHECK-DAG: vpk{{[hf]}} [[REG6:%v[0-9]+]], [[REG3]], [[REG4]]
    284 ; CHECK: vpkh %v24, [[REG5]], [[REG6]]
    285 ; CHECK: br %r14
    286   %elem0 = extractelement <2 x i64> %val0, i32 0
    287   %elem1 = extractelement <2 x i64> %val0, i32 1
    288   %elem2 = extractelement <2 x i64> %val1, i32 0
    289   %elem3 = extractelement <2 x i64> %val1, i32 1
    290   %elem4 = extractelement <2 x i64> %val2, i32 0
    291   %elem5 = extractelement <2 x i64> %val2, i32 1
    292   %elem6 = extractelement <2 x i64> %val3, i32 0
    293   %elem7 = extractelement <2 x i64> %val3, i32 1
    294   %elem8 = extractelement <2 x i64> %val4, i32 0
    295   %elem9 = extractelement <2 x i64> %val4, i32 1
    296   %elem10 = extractelement <2 x i64> %val5, i32 0
    297   %elem11 = extractelement <2 x i64> %val5, i32 1
    298   %elem12 = extractelement <2 x i64> %val6, i32 0
    299   %elem13 = extractelement <2 x i64> %val6, i32 1
    300   %elem14 = extractelement <2 x i64> %val7, i32 0
    301   %elem15 = extractelement <2 x i64> %val7, i32 1
    302   %bitcast0 = bitcast i64 %elem0 to <2 x i32>
    303   %bitcast1 = bitcast i64 %elem1 to <2 x i32>
    304   %bitcast2 = bitcast i64 %elem2 to <2 x i32>
    305   %bitcast3 = bitcast i64 %elem3 to <2 x i32>
    306   %bitcast4 = bitcast i64 %elem4 to <2 x i32>
    307   %bitcast5 = bitcast i64 %elem5 to <2 x i32>
    308   %bitcast6 = bitcast i64 %elem6 to <2 x i32>
    309   %bitcast7 = bitcast i64 %elem7 to <2 x i32>
    310   %bitcast8 = bitcast i64 %elem8 to <2 x i32>
    311   %bitcast9 = bitcast i64 %elem9 to <2 x i32>
    312   %bitcast10 = bitcast i64 %elem10 to <2 x i32>
    313   %bitcast11 = bitcast i64 %elem11 to <2 x i32>
    314   %bitcast12 = bitcast i64 %elem12 to <2 x i32>
    315   %bitcast13 = bitcast i64 %elem13 to <2 x i32>
    316   %bitcast14 = bitcast i64 %elem14 to <2 x i32>
    317   %bitcast15 = bitcast i64 %elem15 to <2 x i32>
    318   %low0 = shufflevector <2 x i32> %bitcast0, <2 x i32> %bitcast1,
    319                         <2 x i32> <i32 1, i32 3>
    320   %low1 = shufflevector <2 x i32> %bitcast2, <2 x i32> %bitcast3,
    321                         <2 x i32> <i32 1, i32 3>
    322   %low2 = shufflevector <2 x i32> %bitcast4, <2 x i32> %bitcast5,
    323                         <2 x i32> <i32 1, i32 3>
    324   %low3 = shufflevector <2 x i32> %bitcast6, <2 x i32> %bitcast7,
    325                         <2 x i32> <i32 1, i32 3>
    326   %low4 = shufflevector <2 x i32> %bitcast8, <2 x i32> %bitcast9,
    327                         <2 x i32> <i32 1, i32 3>
    328   %low5 = shufflevector <2 x i32> %bitcast10, <2 x i32> %bitcast11,
    329                         <2 x i32> <i32 1, i32 3>
    330   %low6 = shufflevector <2 x i32> %bitcast12, <2 x i32> %bitcast13,
    331                         <2 x i32> <i32 1, i32 3>
    332   %low7 = shufflevector <2 x i32> %bitcast14, <2 x i32> %bitcast15,
    333                         <2 x i32> <i32 1, i32 3>
    334   %half0 = bitcast <2 x i32> %low0 to <4 x i16>
    335   %half1 = bitcast <2 x i32> %low1 to <4 x i16>
    336   %half2 = bitcast <2 x i32> %low2 to <4 x i16>
    337   %half3 = bitcast <2 x i32> %low3 to <4 x i16>
    338   %half4 = bitcast <2 x i32> %low4 to <4 x i16>
    339   %half5 = bitcast <2 x i32> %low5 to <4 x i16>
    340   %half6 = bitcast <2 x i32> %low6 to <4 x i16>
    341   %half7 = bitcast <2 x i32> %low7 to <4 x i16>
    342   %hlow0 = shufflevector <4 x i16> %half0, <4 x i16> %half1,
    343                          <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    344   %hlow1 = shufflevector <4 x i16> %half2, <4 x i16> %half3,
    345                          <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    346   %hlow2 = shufflevector <4 x i16> %half4, <4 x i16> %half5,
    347                          <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    348   %hlow3 = shufflevector <4 x i16> %half6, <4 x i16> %half7,
    349                          <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    350   %bytes0 = bitcast <4 x i16> %hlow0 to <8 x i8>
    351   %bytes1 = bitcast <4 x i16> %hlow1 to <8 x i8>
    352   %bytes2 = bitcast <4 x i16> %hlow2 to <8 x i8>
    353   %bytes3 = bitcast <4 x i16> %hlow3 to <8 x i8>
    354   %join0 = shufflevector <8 x i8> %bytes0, <8 x i8> %bytes1,
    355                          <8 x i32> <i32 1, i32 3, i32 5, i32 7,
    356                                     i32 9, i32 11, i32 13, i32 15>
    357   %join1 = shufflevector <8 x i8> %bytes2, <8 x i8> %bytes3,
    358                          <8 x i32> <i32 1, i32 3, i32 5, i32 7,
    359                                     i32 9, i32 11, i32 13, i32 15>
    360   %ret = shufflevector <8 x i8> %join0, <8 x i8> %join1,
    361                        <16 x i32> <i32 0, i32 1, i32 2, i32 3,
    362                                    i32 4, i32 5, i32 6, i32 7,
    363                                    i32 8, i32 9, i32 10, i32 11,
    364                                    i32 12, i32 13, i32 14, i32 15>
    365   ret <16 x i8> %ret
    366 }
    367 
    368 ; Test a <2 x i64> -> <4 x f32> pack in which only individual elements are
    369 ; needed.
    370 define float @f8(i64 %scalar0, i64 %scalar1, i64 %scalar2, i64 %scalar3) {
    371 ; CHECK-LABEL: f8:
    372 ; CHECK-NOT: vperm
    373 ; CHECK-NOT: vpk
    374 ; CHECK-NOT: vmrh
    375 ; CHECK: aebr {{%f[0-7]}},
    376 ; CHECK: aebr {{%f[0-7]}},
    377 ; CHECK: meebr %f0,
    378 ; CHECK: br %r14
    379   %vec0 = insertelement <2 x i64> undef, i64 %scalar0, i32 0
    380   %vec1 = insertelement <2 x i64> undef, i64 %scalar1, i32 0
    381   %vec2 = insertelement <2 x i64> undef, i64 %scalar2, i32 0
    382   %vec3 = insertelement <2 x i64> undef, i64 %scalar3, i32 0
    383   %join0 = shufflevector <2 x i64> %vec0, <2 x i64> %vec1,
    384                          <2 x i32> <i32 0, i32 2>
    385   %join1 = shufflevector <2 x i64> %vec2, <2 x i64> %vec3,
    386                          <2 x i32> <i32 0, i32 2>
    387   %bitcast0 = bitcast <2 x i64> %join0 to <4 x float>
    388   %bitcast1 = bitcast <2 x i64> %join1 to <4 x float>
    389   %pack = shufflevector <4 x float> %bitcast0, <4 x float> %bitcast1,
    390                         <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    391   %elt0 = extractelement <4 x float> %pack, i32 0
    392   %elt1 = extractelement <4 x float> %pack, i32 1
    393   %elt2 = extractelement <4 x float> %pack, i32 2
    394   %elt3 = extractelement <4 x float> %pack, i32 3
    395   %add0 = fadd float %elt0, %elt2
    396   %add1 = fadd float %elt1, %elt3
    397   %ret = fmul float %add0, %add1
    398   ret float %ret
    399 }
    400 
    401 ; Test a <2 x f64> -> <4 x i32> pack in which only individual elements are
    402 ; needed.
    403 define i32 @f9(double %scalar0, double %scalar1, double %scalar2,
    404                double %scalar3) {
    405 ; CHECK-LABEL: f9:
    406 ; CHECK-NOT: vperm
    407 ; CHECK-NOT: vpk
    408 ; CHECK-NOT: vmrh
    409 ; CHECK: ar {{%r[0-5]}},
    410 ; CHECK: ar {{%r[0-5]}},
    411 ; CHECK: or %r2,
    412 ; CHECK: br %r14
    413   %vec0 = insertelement <2 x double> undef, double %scalar0, i32 0
    414   %vec1 = insertelement <2 x double> undef, double %scalar1, i32 0
    415   %vec2 = insertelement <2 x double> undef, double %scalar2, i32 0
    416   %vec3 = insertelement <2 x double> undef, double %scalar3, i32 0
    417   %join0 = shufflevector <2 x double> %vec0, <2 x double> %vec1,
    418                          <2 x i32> <i32 0, i32 2>
    419   %join1 = shufflevector <2 x double> %vec2, <2 x double> %vec3,
    420                          <2 x i32> <i32 0, i32 2>
    421   %bitcast0 = bitcast <2 x double> %join0 to <4 x i32>
    422   %bitcast1 = bitcast <2 x double> %join1 to <4 x i32>
    423   %pack = shufflevector <4 x i32> %bitcast0, <4 x i32> %bitcast1,
    424                         <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    425   %elt0 = extractelement <4 x i32> %pack, i32 0
    426   %elt1 = extractelement <4 x i32> %pack, i32 1
    427   %elt2 = extractelement <4 x i32> %pack, i32 2
    428   %elt3 = extractelement <4 x i32> %pack, i32 3
    429   %add0 = add i32 %elt0, %elt2
    430   %add1 = add i32 %elt1, %elt3
    431   %ret = or i32 %add0, %add1
    432   ret i32 %ret
    433 }
    434