1 ; RUN: opt < %s -instcombine | \ 2 ; RUN: llc -march=ppc32 -mcpu=g5 | not grep vperm 3 ; RUN: llc < %s -march=ppc32 -mcpu=g5 > %t 4 ; RUN: grep vsldoi %t | count 2 5 ; RUN: grep vmrgh %t | count 7 6 ; RUN: grep vmrgl %t | count 6 7 ; RUN: grep vpkuhum %t | count 1 8 ; RUN: grep vpkuwum %t | count 1 9 10 define void @VSLDOI_xy(<8 x i16>* %A, <8 x i16>* %B) { 11 entry: 12 %tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=1] 13 %tmp2 = load <8 x i16>* %B ; <<8 x i16>> [#uses=1] 14 %tmp.upgrd.1 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=11] 15 %tmp2.upgrd.2 = bitcast <8 x i16> %tmp2 to <16 x i8> ; <<16 x i8>> [#uses=5] 16 %tmp.upgrd.3 = extractelement <16 x i8> %tmp.upgrd.1, i32 5 ; <i8> [#uses=1] 17 %tmp3 = extractelement <16 x i8> %tmp.upgrd.1, i32 6 ; <i8> [#uses=1] 18 %tmp4 = extractelement <16 x i8> %tmp.upgrd.1, i32 7 ; <i8> [#uses=1] 19 %tmp5 = extractelement <16 x i8> %tmp.upgrd.1, i32 8 ; <i8> [#uses=1] 20 %tmp6 = extractelement <16 x i8> %tmp.upgrd.1, i32 9 ; <i8> [#uses=1] 21 %tmp7 = extractelement <16 x i8> %tmp.upgrd.1, i32 10 ; <i8> [#uses=1] 22 %tmp8 = extractelement <16 x i8> %tmp.upgrd.1, i32 11 ; <i8> [#uses=1] 23 %tmp9 = extractelement <16 x i8> %tmp.upgrd.1, i32 12 ; <i8> [#uses=1] 24 %tmp10 = extractelement <16 x i8> %tmp.upgrd.1, i32 13 ; <i8> [#uses=1] 25 %tmp11 = extractelement <16 x i8> %tmp.upgrd.1, i32 14 ; <i8> [#uses=1] 26 %tmp12 = extractelement <16 x i8> %tmp.upgrd.1, i32 15 ; <i8> [#uses=1] 27 %tmp13 = extractelement <16 x i8> %tmp2.upgrd.2, i32 0 ; <i8> [#uses=1] 28 %tmp14 = extractelement <16 x i8> %tmp2.upgrd.2, i32 1 ; <i8> [#uses=1] 29 %tmp15 = extractelement <16 x i8> %tmp2.upgrd.2, i32 2 ; <i8> [#uses=1] 30 %tmp16 = extractelement <16 x i8> %tmp2.upgrd.2, i32 3 ; <i8> [#uses=1] 31 %tmp17 = extractelement <16 x i8> %tmp2.upgrd.2, i32 4 ; <i8> [#uses=1] 32 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.3, i32 0 ; <<16 x i8>> [#uses=1] 33 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1] 34 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1] 35 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1] 36 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1] 37 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1] 38 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1] 39 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1] 40 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1] 41 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1] 42 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1] 43 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1] 44 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1] 45 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1] 46 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1] 47 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1] 48 %tmp33.upgrd.4 = bitcast <16 x i8> %tmp33 to <8 x i16> ; <<8 x i16>> [#uses=1] 49 store <8 x i16> %tmp33.upgrd.4, <8 x i16>* %A 50 ret void 51 } 52 53 define void @VSLDOI_xx(<8 x i16>* %A, <8 x i16>* %B) { 54 %tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=1] 55 %tmp2 = load <8 x i16>* %A ; <<8 x i16>> [#uses=1] 56 %tmp.upgrd.5 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=11] 57 %tmp2.upgrd.6 = bitcast <8 x i16> %tmp2 to <16 x i8> ; <<16 x i8>> [#uses=5] 58 %tmp.upgrd.7 = extractelement <16 x i8> %tmp.upgrd.5, i32 5 ; <i8> [#uses=1] 59 %tmp3 = extractelement <16 x i8> %tmp.upgrd.5, i32 6 ; <i8> [#uses=1] 60 %tmp4 = extractelement <16 x i8> %tmp.upgrd.5, i32 7 ; <i8> [#uses=1] 61 %tmp5 = extractelement <16 x i8> %tmp.upgrd.5, i32 8 ; <i8> [#uses=1] 62 %tmp6 = extractelement <16 x i8> %tmp.upgrd.5, i32 9 ; <i8> [#uses=1] 63 %tmp7 = extractelement <16 x i8> %tmp.upgrd.5, i32 10 ; <i8> [#uses=1] 64 %tmp8 = extractelement <16 x i8> %tmp.upgrd.5, i32 11 ; <i8> [#uses=1] 65 %tmp9 = extractelement <16 x i8> %tmp.upgrd.5, i32 12 ; <i8> [#uses=1] 66 %tmp10 = extractelement <16 x i8> %tmp.upgrd.5, i32 13 ; <i8> [#uses=1] 67 %tmp11 = extractelement <16 x i8> %tmp.upgrd.5, i32 14 ; <i8> [#uses=1] 68 %tmp12 = extractelement <16 x i8> %tmp.upgrd.5, i32 15 ; <i8> [#uses=1] 69 %tmp13 = extractelement <16 x i8> %tmp2.upgrd.6, i32 0 ; <i8> [#uses=1] 70 %tmp14 = extractelement <16 x i8> %tmp2.upgrd.6, i32 1 ; <i8> [#uses=1] 71 %tmp15 = extractelement <16 x i8> %tmp2.upgrd.6, i32 2 ; <i8> [#uses=1] 72 %tmp16 = extractelement <16 x i8> %tmp2.upgrd.6, i32 3 ; <i8> [#uses=1] 73 %tmp17 = extractelement <16 x i8> %tmp2.upgrd.6, i32 4 ; <i8> [#uses=1] 74 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.7, i32 0 ; <<16 x i8>> [#uses=1] 75 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1] 76 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1] 77 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1] 78 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1] 79 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1] 80 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1] 81 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1] 82 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1] 83 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1] 84 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1] 85 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1] 86 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1] 87 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1] 88 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1] 89 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1] 90 %tmp33.upgrd.8 = bitcast <16 x i8> %tmp33 to <8 x i16> ; <<8 x i16>> [#uses=1] 91 store <8 x i16> %tmp33.upgrd.8, <8 x i16>* %A 92 ret void 93 } 94 95 define void @VPERM_promote(<8 x i16>* %A, <8 x i16>* %B) { 96 entry: 97 %tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=1] 98 %tmp.upgrd.9 = bitcast <8 x i16> %tmp to <4 x i32> ; <<4 x i32>> [#uses=1] 99 %tmp2 = load <8 x i16>* %B ; <<8 x i16>> [#uses=1] 100 %tmp2.upgrd.10 = bitcast <8 x i16> %tmp2 to <4 x i32> ; <<4 x i32>> [#uses=1] 101 %tmp3 = call <4 x i32> @llvm.ppc.altivec.vperm( <4 x i32> %tmp.upgrd.9, <4 x i32> %tmp2.upgrd.10, <16 x i8> < i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14 > ) ; <<4 x i32>> [#uses=1] 102 %tmp3.upgrd.11 = bitcast <4 x i32> %tmp3 to <8 x i16> ; <<8 x i16>> [#uses=1] 103 store <8 x i16> %tmp3.upgrd.11, <8 x i16>* %A 104 ret void 105 } 106 107 declare <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32>, <4 x i32>, <16 x i8>) 108 109 define void @tb_l(<16 x i8>* %A, <16 x i8>* %B) { 110 entry: 111 %tmp = load <16 x i8>* %A ; <<16 x i8>> [#uses=8] 112 %tmp2 = load <16 x i8>* %B ; <<16 x i8>> [#uses=8] 113 %tmp.upgrd.12 = extractelement <16 x i8> %tmp, i32 8 ; <i8> [#uses=1] 114 %tmp3 = extractelement <16 x i8> %tmp2, i32 8 ; <i8> [#uses=1] 115 %tmp4 = extractelement <16 x i8> %tmp, i32 9 ; <i8> [#uses=1] 116 %tmp5 = extractelement <16 x i8> %tmp2, i32 9 ; <i8> [#uses=1] 117 %tmp6 = extractelement <16 x i8> %tmp, i32 10 ; <i8> [#uses=1] 118 %tmp7 = extractelement <16 x i8> %tmp2, i32 10 ; <i8> [#uses=1] 119 %tmp8 = extractelement <16 x i8> %tmp, i32 11 ; <i8> [#uses=1] 120 %tmp9 = extractelement <16 x i8> %tmp2, i32 11 ; <i8> [#uses=1] 121 %tmp10 = extractelement <16 x i8> %tmp, i32 12 ; <i8> [#uses=1] 122 %tmp11 = extractelement <16 x i8> %tmp2, i32 12 ; <i8> [#uses=1] 123 %tmp12 = extractelement <16 x i8> %tmp, i32 13 ; <i8> [#uses=1] 124 %tmp13 = extractelement <16 x i8> %tmp2, i32 13 ; <i8> [#uses=1] 125 %tmp14 = extractelement <16 x i8> %tmp, i32 14 ; <i8> [#uses=1] 126 %tmp15 = extractelement <16 x i8> %tmp2, i32 14 ; <i8> [#uses=1] 127 %tmp16 = extractelement <16 x i8> %tmp, i32 15 ; <i8> [#uses=1] 128 %tmp17 = extractelement <16 x i8> %tmp2, i32 15 ; <i8> [#uses=1] 129 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.12, i32 0 ; <<16 x i8>> [#uses=1] 130 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1] 131 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1] 132 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1] 133 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1] 134 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1] 135 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1] 136 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1] 137 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1] 138 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1] 139 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1] 140 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1] 141 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1] 142 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1] 143 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1] 144 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1] 145 store <16 x i8> %tmp33, <16 x i8>* %A 146 ret void 147 } 148 149 define void @th_l(<8 x i16>* %A, <8 x i16>* %B) { 150 entry: 151 %tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=4] 152 %tmp2 = load <8 x i16>* %B ; <<8 x i16>> [#uses=4] 153 %tmp.upgrd.13 = extractelement <8 x i16> %tmp, i32 4 ; <i16> [#uses=1] 154 %tmp3 = extractelement <8 x i16> %tmp2, i32 4 ; <i16> [#uses=1] 155 %tmp4 = extractelement <8 x i16> %tmp, i32 5 ; <i16> [#uses=1] 156 %tmp5 = extractelement <8 x i16> %tmp2, i32 5 ; <i16> [#uses=1] 157 %tmp6 = extractelement <8 x i16> %tmp, i32 6 ; <i16> [#uses=1] 158 %tmp7 = extractelement <8 x i16> %tmp2, i32 6 ; <i16> [#uses=1] 159 %tmp8 = extractelement <8 x i16> %tmp, i32 7 ; <i16> [#uses=1] 160 %tmp9 = extractelement <8 x i16> %tmp2, i32 7 ; <i16> [#uses=1] 161 %tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.13, i32 0 ; <<8 x i16>> [#uses=1] 162 %tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1] 163 %tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1] 164 %tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1] 165 %tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1] 166 %tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1] 167 %tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1] 168 %tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1] 169 store <8 x i16> %tmp17, <8 x i16>* %A 170 ret void 171 } 172 173 define void @tw_l(<4 x i32>* %A, <4 x i32>* %B) { 174 entry: 175 %tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=2] 176 %tmp2 = load <4 x i32>* %B ; <<4 x i32>> [#uses=2] 177 %tmp.upgrd.14 = extractelement <4 x i32> %tmp, i32 2 ; <i32> [#uses=1] 178 %tmp3 = extractelement <4 x i32> %tmp2, i32 2 ; <i32> [#uses=1] 179 %tmp4 = extractelement <4 x i32> %tmp, i32 3 ; <i32> [#uses=1] 180 %tmp5 = extractelement <4 x i32> %tmp2, i32 3 ; <i32> [#uses=1] 181 %tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.14, i32 0 ; <<4 x i32>> [#uses=1] 182 %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1] 183 %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1] 184 %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1] 185 store <4 x i32> %tmp9, <4 x i32>* %A 186 ret void 187 } 188 189 define void @tb_h(<16 x i8>* %A, <16 x i8>* %B) { 190 entry: 191 %tmp = load <16 x i8>* %A ; <<16 x i8>> [#uses=8] 192 %tmp2 = load <16 x i8>* %B ; <<16 x i8>> [#uses=8] 193 %tmp.upgrd.15 = extractelement <16 x i8> %tmp, i32 0 ; <i8> [#uses=1] 194 %tmp3 = extractelement <16 x i8> %tmp2, i32 0 ; <i8> [#uses=1] 195 %tmp4 = extractelement <16 x i8> %tmp, i32 1 ; <i8> [#uses=1] 196 %tmp5 = extractelement <16 x i8> %tmp2, i32 1 ; <i8> [#uses=1] 197 %tmp6 = extractelement <16 x i8> %tmp, i32 2 ; <i8> [#uses=1] 198 %tmp7 = extractelement <16 x i8> %tmp2, i32 2 ; <i8> [#uses=1] 199 %tmp8 = extractelement <16 x i8> %tmp, i32 3 ; <i8> [#uses=1] 200 %tmp9 = extractelement <16 x i8> %tmp2, i32 3 ; <i8> [#uses=1] 201 %tmp10 = extractelement <16 x i8> %tmp, i32 4 ; <i8> [#uses=1] 202 %tmp11 = extractelement <16 x i8> %tmp2, i32 4 ; <i8> [#uses=1] 203 %tmp12 = extractelement <16 x i8> %tmp, i32 5 ; <i8> [#uses=1] 204 %tmp13 = extractelement <16 x i8> %tmp2, i32 5 ; <i8> [#uses=1] 205 %tmp14 = extractelement <16 x i8> %tmp, i32 6 ; <i8> [#uses=1] 206 %tmp15 = extractelement <16 x i8> %tmp2, i32 6 ; <i8> [#uses=1] 207 %tmp16 = extractelement <16 x i8> %tmp, i32 7 ; <i8> [#uses=1] 208 %tmp17 = extractelement <16 x i8> %tmp2, i32 7 ; <i8> [#uses=1] 209 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.15, i32 0 ; <<16 x i8>> [#uses=1] 210 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1] 211 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1] 212 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1] 213 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1] 214 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1] 215 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1] 216 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1] 217 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1] 218 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1] 219 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1] 220 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1] 221 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1] 222 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1] 223 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1] 224 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1] 225 store <16 x i8> %tmp33, <16 x i8>* %A 226 ret void 227 } 228 229 define void @th_h(<8 x i16>* %A, <8 x i16>* %B) { 230 entry: 231 %tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=4] 232 %tmp2 = load <8 x i16>* %B ; <<8 x i16>> [#uses=4] 233 %tmp.upgrd.16 = extractelement <8 x i16> %tmp, i32 0 ; <i16> [#uses=1] 234 %tmp3 = extractelement <8 x i16> %tmp2, i32 0 ; <i16> [#uses=1] 235 %tmp4 = extractelement <8 x i16> %tmp, i32 1 ; <i16> [#uses=1] 236 %tmp5 = extractelement <8 x i16> %tmp2, i32 1 ; <i16> [#uses=1] 237 %tmp6 = extractelement <8 x i16> %tmp, i32 2 ; <i16> [#uses=1] 238 %tmp7 = extractelement <8 x i16> %tmp2, i32 2 ; <i16> [#uses=1] 239 %tmp8 = extractelement <8 x i16> %tmp, i32 3 ; <i16> [#uses=1] 240 %tmp9 = extractelement <8 x i16> %tmp2, i32 3 ; <i16> [#uses=1] 241 %tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.16, i32 0 ; <<8 x i16>> [#uses=1] 242 %tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1] 243 %tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1] 244 %tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1] 245 %tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1] 246 %tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1] 247 %tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1] 248 %tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1] 249 store <8 x i16> %tmp17, <8 x i16>* %A 250 ret void 251 } 252 253 define void @tw_h(<4 x i32>* %A, <4 x i32>* %B) { 254 entry: 255 %tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=2] 256 %tmp2 = load <4 x i32>* %B ; <<4 x i32>> [#uses=2] 257 %tmp.upgrd.17 = extractelement <4 x i32> %tmp2, i32 0 ; <i32> [#uses=1] 258 %tmp3 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1] 259 %tmp4 = extractelement <4 x i32> %tmp2, i32 1 ; <i32> [#uses=1] 260 %tmp5 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1] 261 %tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.17, i32 0 ; <<4 x i32>> [#uses=1] 262 %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1] 263 %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1] 264 %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1] 265 store <4 x i32> %tmp9, <4 x i32>* %A 266 ret void 267 } 268 269 define void @tw_h_flop(<4 x i32>* %A, <4 x i32>* %B) { 270 %tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=2] 271 %tmp2 = load <4 x i32>* %B ; <<4 x i32>> [#uses=2] 272 %tmp.upgrd.18 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1] 273 %tmp3 = extractelement <4 x i32> %tmp2, i32 0 ; <i32> [#uses=1] 274 %tmp4 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1] 275 %tmp5 = extractelement <4 x i32> %tmp2, i32 1 ; <i32> [#uses=1] 276 %tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.18, i32 0 ; <<4 x i32>> [#uses=1] 277 %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1] 278 %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1] 279 %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1] 280 store <4 x i32> %tmp9, <4 x i32>* %A 281 ret void 282 } 283 284 define void @VMRG_UNARY_tb_l(<16 x i8>* %A, <16 x i8>* %B) { 285 entry: 286 %tmp = load <16 x i8>* %A ; <<16 x i8>> [#uses=16] 287 %tmp.upgrd.19 = extractelement <16 x i8> %tmp, i32 8 ; <i8> [#uses=1] 288 %tmp3 = extractelement <16 x i8> %tmp, i32 8 ; <i8> [#uses=1] 289 %tmp4 = extractelement <16 x i8> %tmp, i32 9 ; <i8> [#uses=1] 290 %tmp5 = extractelement <16 x i8> %tmp, i32 9 ; <i8> [#uses=1] 291 %tmp6 = extractelement <16 x i8> %tmp, i32 10 ; <i8> [#uses=1] 292 %tmp7 = extractelement <16 x i8> %tmp, i32 10 ; <i8> [#uses=1] 293 %tmp8 = extractelement <16 x i8> %tmp, i32 11 ; <i8> [#uses=1] 294 %tmp9 = extractelement <16 x i8> %tmp, i32 11 ; <i8> [#uses=1] 295 %tmp10 = extractelement <16 x i8> %tmp, i32 12 ; <i8> [#uses=1] 296 %tmp11 = extractelement <16 x i8> %tmp, i32 12 ; <i8> [#uses=1] 297 %tmp12 = extractelement <16 x i8> %tmp, i32 13 ; <i8> [#uses=1] 298 %tmp13 = extractelement <16 x i8> %tmp, i32 13 ; <i8> [#uses=1] 299 %tmp14 = extractelement <16 x i8> %tmp, i32 14 ; <i8> [#uses=1] 300 %tmp15 = extractelement <16 x i8> %tmp, i32 14 ; <i8> [#uses=1] 301 %tmp16 = extractelement <16 x i8> %tmp, i32 15 ; <i8> [#uses=1] 302 %tmp17 = extractelement <16 x i8> %tmp, i32 15 ; <i8> [#uses=1] 303 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.19, i32 0 ; <<16 x i8>> [#uses=1] 304 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1] 305 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1] 306 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1] 307 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1] 308 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1] 309 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1] 310 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1] 311 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1] 312 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1] 313 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1] 314 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1] 315 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1] 316 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1] 317 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1] 318 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1] 319 store <16 x i8> %tmp33, <16 x i8>* %A 320 ret void 321 } 322 323 define void @VMRG_UNARY_th_l(<8 x i16>* %A, <8 x i16>* %B) { 324 entry: 325 %tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=8] 326 %tmp.upgrd.20 = extractelement <8 x i16> %tmp, i32 4 ; <i16> [#uses=1] 327 %tmp3 = extractelement <8 x i16> %tmp, i32 4 ; <i16> [#uses=1] 328 %tmp4 = extractelement <8 x i16> %tmp, i32 5 ; <i16> [#uses=1] 329 %tmp5 = extractelement <8 x i16> %tmp, i32 5 ; <i16> [#uses=1] 330 %tmp6 = extractelement <8 x i16> %tmp, i32 6 ; <i16> [#uses=1] 331 %tmp7 = extractelement <8 x i16> %tmp, i32 6 ; <i16> [#uses=1] 332 %tmp8 = extractelement <8 x i16> %tmp, i32 7 ; <i16> [#uses=1] 333 %tmp9 = extractelement <8 x i16> %tmp, i32 7 ; <i16> [#uses=1] 334 %tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.20, i32 0 ; <<8 x i16>> [#uses=1] 335 %tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1] 336 %tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1] 337 %tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1] 338 %tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1] 339 %tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1] 340 %tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1] 341 %tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1] 342 store <8 x i16> %tmp17, <8 x i16>* %A 343 ret void 344 } 345 346 define void @VMRG_UNARY_tw_l(<4 x i32>* %A, <4 x i32>* %B) { 347 entry: 348 %tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=4] 349 %tmp.upgrd.21 = extractelement <4 x i32> %tmp, i32 2 ; <i32> [#uses=1] 350 %tmp3 = extractelement <4 x i32> %tmp, i32 2 ; <i32> [#uses=1] 351 %tmp4 = extractelement <4 x i32> %tmp, i32 3 ; <i32> [#uses=1] 352 %tmp5 = extractelement <4 x i32> %tmp, i32 3 ; <i32> [#uses=1] 353 %tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.21, i32 0 ; <<4 x i32>> [#uses=1] 354 %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1] 355 %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1] 356 %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1] 357 store <4 x i32> %tmp9, <4 x i32>* %A 358 ret void 359 } 360 361 define void @VMRG_UNARY_tb_h(<16 x i8>* %A, <16 x i8>* %B) { 362 entry: 363 %tmp = load <16 x i8>* %A ; <<16 x i8>> [#uses=16] 364 %tmp.upgrd.22 = extractelement <16 x i8> %tmp, i32 0 ; <i8> [#uses=1] 365 %tmp3 = extractelement <16 x i8> %tmp, i32 0 ; <i8> [#uses=1] 366 %tmp4 = extractelement <16 x i8> %tmp, i32 1 ; <i8> [#uses=1] 367 %tmp5 = extractelement <16 x i8> %tmp, i32 1 ; <i8> [#uses=1] 368 %tmp6 = extractelement <16 x i8> %tmp, i32 2 ; <i8> [#uses=1] 369 %tmp7 = extractelement <16 x i8> %tmp, i32 2 ; <i8> [#uses=1] 370 %tmp8 = extractelement <16 x i8> %tmp, i32 3 ; <i8> [#uses=1] 371 %tmp9 = extractelement <16 x i8> %tmp, i32 3 ; <i8> [#uses=1] 372 %tmp10 = extractelement <16 x i8> %tmp, i32 4 ; <i8> [#uses=1] 373 %tmp11 = extractelement <16 x i8> %tmp, i32 4 ; <i8> [#uses=1] 374 %tmp12 = extractelement <16 x i8> %tmp, i32 5 ; <i8> [#uses=1] 375 %tmp13 = extractelement <16 x i8> %tmp, i32 5 ; <i8> [#uses=1] 376 %tmp14 = extractelement <16 x i8> %tmp, i32 6 ; <i8> [#uses=1] 377 %tmp15 = extractelement <16 x i8> %tmp, i32 6 ; <i8> [#uses=1] 378 %tmp16 = extractelement <16 x i8> %tmp, i32 7 ; <i8> [#uses=1] 379 %tmp17 = extractelement <16 x i8> %tmp, i32 7 ; <i8> [#uses=1] 380 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.22, i32 0 ; <<16 x i8>> [#uses=1] 381 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1] 382 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1] 383 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1] 384 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1] 385 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1] 386 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1] 387 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1] 388 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1] 389 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1] 390 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1] 391 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1] 392 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1] 393 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1] 394 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1] 395 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1] 396 store <16 x i8> %tmp33, <16 x i8>* %A 397 ret void 398 } 399 400 define void @VMRG_UNARY_th_h(<8 x i16>* %A, <8 x i16>* %B) { 401 entry: 402 %tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=8] 403 %tmp.upgrd.23 = extractelement <8 x i16> %tmp, i32 0 ; <i16> [#uses=1] 404 %tmp3 = extractelement <8 x i16> %tmp, i32 0 ; <i16> [#uses=1] 405 %tmp4 = extractelement <8 x i16> %tmp, i32 1 ; <i16> [#uses=1] 406 %tmp5 = extractelement <8 x i16> %tmp, i32 1 ; <i16> [#uses=1] 407 %tmp6 = extractelement <8 x i16> %tmp, i32 2 ; <i16> [#uses=1] 408 %tmp7 = extractelement <8 x i16> %tmp, i32 2 ; <i16> [#uses=1] 409 %tmp8 = extractelement <8 x i16> %tmp, i32 3 ; <i16> [#uses=1] 410 %tmp9 = extractelement <8 x i16> %tmp, i32 3 ; <i16> [#uses=1] 411 %tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.23, i32 0 ; <<8 x i16>> [#uses=1] 412 %tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1] 413 %tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1] 414 %tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1] 415 %tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1] 416 %tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1] 417 %tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1] 418 %tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1] 419 store <8 x i16> %tmp17, <8 x i16>* %A 420 ret void 421 } 422 423 define void @VMRG_UNARY_tw_h(<4 x i32>* %A, <4 x i32>* %B) { 424 entry: 425 %tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=4] 426 %tmp.upgrd.24 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1] 427 %tmp3 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1] 428 %tmp4 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1] 429 %tmp5 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1] 430 %tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.24, i32 0 ; <<4 x i32>> [#uses=1] 431 %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1] 432 %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1] 433 %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1] 434 store <4 x i32> %tmp9, <4 x i32>* %A 435 ret void 436 } 437 438 define void @VPCKUHUM_unary(<8 x i16>* %A, <8 x i16>* %B) { 439 entry: 440 %tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=2] 441 %tmp.upgrd.25 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=8] 442 %tmp3 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=8] 443 %tmp.upgrd.26 = extractelement <16 x i8> %tmp.upgrd.25, i32 1 ; <i8> [#uses=1] 444 %tmp4 = extractelement <16 x i8> %tmp.upgrd.25, i32 3 ; <i8> [#uses=1] 445 %tmp5 = extractelement <16 x i8> %tmp.upgrd.25, i32 5 ; <i8> [#uses=1] 446 %tmp6 = extractelement <16 x i8> %tmp.upgrd.25, i32 7 ; <i8> [#uses=1] 447 %tmp7 = extractelement <16 x i8> %tmp.upgrd.25, i32 9 ; <i8> [#uses=1] 448 %tmp8 = extractelement <16 x i8> %tmp.upgrd.25, i32 11 ; <i8> [#uses=1] 449 %tmp9 = extractelement <16 x i8> %tmp.upgrd.25, i32 13 ; <i8> [#uses=1] 450 %tmp10 = extractelement <16 x i8> %tmp.upgrd.25, i32 15 ; <i8> [#uses=1] 451 %tmp11 = extractelement <16 x i8> %tmp3, i32 1 ; <i8> [#uses=1] 452 %tmp12 = extractelement <16 x i8> %tmp3, i32 3 ; <i8> [#uses=1] 453 %tmp13 = extractelement <16 x i8> %tmp3, i32 5 ; <i8> [#uses=1] 454 %tmp14 = extractelement <16 x i8> %tmp3, i32 7 ; <i8> [#uses=1] 455 %tmp15 = extractelement <16 x i8> %tmp3, i32 9 ; <i8> [#uses=1] 456 %tmp16 = extractelement <16 x i8> %tmp3, i32 11 ; <i8> [#uses=1] 457 %tmp17 = extractelement <16 x i8> %tmp3, i32 13 ; <i8> [#uses=1] 458 %tmp18 = extractelement <16 x i8> %tmp3, i32 15 ; <i8> [#uses=1] 459 %tmp19 = insertelement <16 x i8> undef, i8 %tmp.upgrd.26, i32 0 ; <<16 x i8>> [#uses=1] 460 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 1 ; <<16 x i8>> [#uses=1] 461 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 2 ; <<16 x i8>> [#uses=1] 462 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 3 ; <<16 x i8>> [#uses=1] 463 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 4 ; <<16 x i8>> [#uses=1] 464 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 5 ; <<16 x i8>> [#uses=1] 465 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 6 ; <<16 x i8>> [#uses=1] 466 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 7 ; <<16 x i8>> [#uses=1] 467 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 8 ; <<16 x i8>> [#uses=1] 468 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 9 ; <<16 x i8>> [#uses=1] 469 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 10 ; <<16 x i8>> [#uses=1] 470 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 11 ; <<16 x i8>> [#uses=1] 471 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 12 ; <<16 x i8>> [#uses=1] 472 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 13 ; <<16 x i8>> [#uses=1] 473 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 14 ; <<16 x i8>> [#uses=1] 474 %tmp34 = insertelement <16 x i8> %tmp33, i8 %tmp18, i32 15 ; <<16 x i8>> [#uses=1] 475 %tmp34.upgrd.27 = bitcast <16 x i8> %tmp34 to <8 x i16> ; <<8 x i16>> [#uses=1] 476 store <8 x i16> %tmp34.upgrd.27, <8 x i16>* %A 477 ret void 478 } 479 480 define void @VPCKUWUM_unary(<4 x i32>* %A, <4 x i32>* %B) { 481 entry: 482 %tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=2] 483 %tmp.upgrd.28 = bitcast <4 x i32> %tmp to <8 x i16> ; <<8 x i16>> [#uses=4] 484 %tmp3 = bitcast <4 x i32> %tmp to <8 x i16> ; <<8 x i16>> [#uses=4] 485 %tmp.upgrd.29 = extractelement <8 x i16> %tmp.upgrd.28, i32 1 ; <i16> [#uses=1] 486 %tmp4 = extractelement <8 x i16> %tmp.upgrd.28, i32 3 ; <i16> [#uses=1] 487 %tmp5 = extractelement <8 x i16> %tmp.upgrd.28, i32 5 ; <i16> [#uses=1] 488 %tmp6 = extractelement <8 x i16> %tmp.upgrd.28, i32 7 ; <i16> [#uses=1] 489 %tmp7 = extractelement <8 x i16> %tmp3, i32 1 ; <i16> [#uses=1] 490 %tmp8 = extractelement <8 x i16> %tmp3, i32 3 ; <i16> [#uses=1] 491 %tmp9 = extractelement <8 x i16> %tmp3, i32 5 ; <i16> [#uses=1] 492 %tmp10 = extractelement <8 x i16> %tmp3, i32 7 ; <i16> [#uses=1] 493 %tmp11 = insertelement <8 x i16> undef, i16 %tmp.upgrd.29, i32 0 ; <<8 x i16>> [#uses=1] 494 %tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 1 ; <<8 x i16>> [#uses=1] 495 %tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 2 ; <<8 x i16>> [#uses=1] 496 %tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 3 ; <<8 x i16>> [#uses=1] 497 %tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 4 ; <<8 x i16>> [#uses=1] 498 %tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 5 ; <<8 x i16>> [#uses=1] 499 %tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 6 ; <<8 x i16>> [#uses=1] 500 %tmp18 = insertelement <8 x i16> %tmp17, i16 %tmp10, i32 7 ; <<8 x i16>> [#uses=1] 501 %tmp18.upgrd.30 = bitcast <8 x i16> %tmp18 to <4 x i32> ; <<4 x i32>> [#uses=1] 502 store <4 x i32> %tmp18.upgrd.30, <4 x i32>* %A 503 ret void 504 } 505