Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
      2 
      3 define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
      4 ; CHECK-LABEL: vzipi8:
      5 ; CHECK:       @ BB#0:
      6 ; CHECK-NEXT:    vldr d16, [r1]
      7 ; CHECK-NEXT:    vldr d17, [r0]
      8 ; CHECK-NEXT:    vzip.8 d17, d16
      9 ; CHECK-NEXT:    vadd.i8 d16, d17, d16
     10 ; CHECK-NEXT:    vmov r0, r1, d16
     11 ; CHECK-NEXT:    mov pc, lr
     12 	%tmp1 = load <8 x i8>, <8 x i8>* %A
     13 	%tmp2 = load <8 x i8>, <8 x i8>* %B
     14 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
     15 	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
     16         %tmp5 = add <8 x i8> %tmp3, %tmp4
     17 	ret <8 x i8> %tmp5
     18 }
     19 
     20 define <16 x i8> @vzipi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
     21 ; CHECK-LABEL: vzipi8_Qres:
     22 ; CHECK:       @ BB#0:
     23 ; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
     24 ; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
     25 ; CHECK-NEXT:    vzip.8 [[LDR0]], [[LDR1]]
     26 ; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
     27 ; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
     28 ; CHECK-NEXT:    mov pc, lr
     29 	%tmp1 = load <8 x i8>, <8 x i8>* %A
     30 	%tmp2 = load <8 x i8>, <8 x i8>* %B
     31 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
     32 	ret <16 x i8> %tmp3
     33 }
     34 
     35 define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     36 ; CHECK-LABEL: vzipi16:
     37 ; CHECK:       @ BB#0:
     38 ; CHECK-NEXT:    vldr d16, [r1]
     39 ; CHECK-NEXT:    vldr d17, [r0]
     40 ; CHECK-NEXT:    vzip.16 d17, d16
     41 ; CHECK-NEXT:    vadd.i16 d16, d17, d16
     42 ; CHECK-NEXT:    vmov r0, r1, d16
     43 ; CHECK-NEXT:    mov pc, lr
     44 	%tmp1 = load <4 x i16>, <4 x i16>* %A
     45 	%tmp2 = load <4 x i16>, <4 x i16>* %B
     46 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
     47 	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
     48         %tmp5 = add <4 x i16> %tmp3, %tmp4
     49 	ret <4 x i16> %tmp5
     50 }
     51 
     52 define <8 x i16> @vzipi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     53 ; CHECK-LABEL: vzipi16_Qres:
     54 ; CHECK:       @ BB#0:
     55 ; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
     56 ; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
     57 ; CHECK-NEXT:    vzip.16 [[LDR0]], [[LDR1]]
     58 ; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
     59 ; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
     60 ; CHECK-NEXT:    mov pc, lr
     61 	%tmp1 = load <4 x i16>, <4 x i16>* %A
     62 	%tmp2 = load <4 x i16>, <4 x i16>* %B
     63 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
     64 	ret <8 x i16> %tmp3
     65 }
     66 
     67 ; VZIP.32 is equivalent to VTRN.32 for 64-bit vectors.
     68 
     69 define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     70 ; CHECK-LABEL: vzipQi8:
     71 ; CHECK:       @ BB#0:
     72 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
     73 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
     74 ; CHECK-NEXT:    vzip.8 q9, q8
     75 ; CHECK-NEXT:    vadd.i8 q8, q9, q8
     76 ; CHECK-NEXT:    vmov r0, r1, d16
     77 ; CHECK-NEXT:    vmov r2, r3, d17
     78 ; CHECK-NEXT:    mov pc, lr
     79 	%tmp1 = load <16 x i8>, <16 x i8>* %A
     80 	%tmp2 = load <16 x i8>, <16 x i8>* %B
     81 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
     82 	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
     83         %tmp5 = add <16 x i8> %tmp3, %tmp4
     84 	ret <16 x i8> %tmp5
     85 }
     86 
     87 define <32 x i8> @vzipQi8_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     88 ; CHECK-LABEL: vzipQi8_QQres:
     89 ; CHECK:       @ BB#0:
     90 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
     91 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
     92 ; CHECK-NEXT:    vzip.8 q9, q8
     93 ; CHECK-NEXT:    vst1.8 {d18, d19}, [r0:128]!
     94 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
     95 ; CHECK-NEXT:    mov pc, lr
     96 	%tmp1 = load <16 x i8>, <16 x i8>* %A
     97 	%tmp2 = load <16 x i8>, <16 x i8>* %B
     98 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
     99 	ret <32 x i8> %tmp3
    100 }
    101 
    102 define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    103 ; CHECK-LABEL: vzipQi16:
    104 ; CHECK:       @ BB#0:
    105 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
    106 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
    107 ; CHECK-NEXT:    vzip.16 q9, q8
    108 ; CHECK-NEXT:    vadd.i16 q8, q9, q8
    109 ; CHECK-NEXT:    vmov r0, r1, d16
    110 ; CHECK-NEXT:    vmov r2, r3, d17
    111 ; CHECK-NEXT:    mov pc, lr
    112 	%tmp1 = load <8 x i16>, <8 x i16>* %A
    113 	%tmp2 = load <8 x i16>, <8 x i16>* %B
    114 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
    115 	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
    116         %tmp5 = add <8 x i16> %tmp3, %tmp4
    117 	ret <8 x i16> %tmp5
    118 }
    119 
    120 define <16 x i16> @vzipQi16_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    121 ; CHECK-LABEL: vzipQi16_QQres:
    122 ; CHECK:       @ BB#0:
    123 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
    124 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
    125 ; CHECK-NEXT:    vzip.16 q9, q8
    126 ; CHECK-NEXT:    vst1.16 {d18, d19}, [r0:128]!
    127 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
    128 ; CHECK-NEXT:    mov pc, lr
    129 	%tmp1 = load <8 x i16>, <8 x i16>* %A
    130 	%tmp2 = load <8 x i16>, <8 x i16>* %B
    131 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
    132 	ret <16 x i16> %tmp3
    133 }
    134 
    135 define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    136 ; CHECK-LABEL: vzipQi32:
    137 ; CHECK:       @ BB#0:
    138 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
    139 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
    140 ; CHECK-NEXT:    vzip.32 q9, q8
    141 ; CHECK-NEXT:    vadd.i32 q8, q9, q8
    142 ; CHECK-NEXT:    vmov r0, r1, d16
    143 ; CHECK-NEXT:    vmov r2, r3, d17
    144 ; CHECK-NEXT:    mov pc, lr
    145 	%tmp1 = load <4 x i32>, <4 x i32>* %A
    146 	%tmp2 = load <4 x i32>, <4 x i32>* %B
    147 	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
    148 	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
    149         %tmp5 = add <4 x i32> %tmp3, %tmp4
    150 	ret <4 x i32> %tmp5
    151 }
    152 
    153 define <8 x i32> @vzipQi32_QQres(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    154 ; CHECK-LABEL: vzipQi32_QQres:
    155 ; CHECK:       @ BB#0:
    156 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
    157 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
    158 ; CHECK-NEXT:    vzip.32 q9, q8
    159 ; CHECK-NEXT:    vst1.32 {d18, d19}, [r0:128]!
    160 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
    161 ; CHECK-NEXT:    mov pc, lr
    162 	%tmp1 = load <4 x i32>, <4 x i32>* %A
    163 	%tmp2 = load <4 x i32>, <4 x i32>* %B
    164 	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
    165 	ret <8 x i32> %tmp3
    166 }
    167 
    168 define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
    169 ; CHECK-LABEL: vzipQf:
    170 ; CHECK:       @ BB#0:
    171 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
    172 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
    173 ; CHECK-NEXT:    vzip.32 q9, q8
    174 ; CHECK-NEXT:    vadd.f32 q8, q9, q8
    175 ; CHECK-NEXT:    vmov r0, r1, d16
    176 ; CHECK-NEXT:    vmov r2, r3, d17
    177 ; CHECK-NEXT:    mov pc, lr
    178 	%tmp1 = load <4 x float>, <4 x float>* %A
    179 	%tmp2 = load <4 x float>, <4 x float>* %B
    180 	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
    181 	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
    182         %tmp5 = fadd <4 x float> %tmp3, %tmp4
    183 	ret <4 x float> %tmp5
    184 }
    185 
    186 define <8 x float> @vzipQf_QQres(<4 x float>* %A, <4 x float>* %B) nounwind {
    187 ; CHECK-LABEL: vzipQf_QQres:
    188 ; CHECK:       @ BB#0:
    189 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
    190 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
    191 ; CHECK-NEXT:    vzip.32 q9, q8
    192 ; CHECK-NEXT:    vst1.32 {d18, d19}, [r0:128]!
    193 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
    194 ; CHECK-NEXT:    mov pc, lr
    195 	%tmp1 = load <4 x float>, <4 x float>* %A
    196 	%tmp2 = load <4 x float>, <4 x float>* %B
    197 	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
    198 	ret <8 x float> %tmp3
    199 }
    200 
    201 ; Undef shuffle indices should not prevent matching to VZIP:
    202 
    203 define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    204 ; CHECK-LABEL: vzipi8_undef:
    205 ; CHECK:       @ BB#0:
    206 ; CHECK-NEXT:    vldr d16, [r1]
    207 ; CHECK-NEXT:    vldr d17, [r0]
    208 ; CHECK-NEXT:    vzip.8 d17, d16
    209 ; CHECK-NEXT:    vadd.i8 d16, d17, d16
    210 ; CHECK-NEXT:    vmov r0, r1, d16
    211 ; CHECK-NEXT:    mov pc, lr
    212 	%tmp1 = load <8 x i8>, <8 x i8>* %A
    213 	%tmp2 = load <8 x i8>, <8 x i8>* %B
    214 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11>
    215 	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
    216         %tmp5 = add <8 x i8> %tmp3, %tmp4
    217 	ret <8 x i8> %tmp5
    218 }
    219 
    220 define <16 x i8> @vzipi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    221 ; CHECK-LABEL: vzipi8_undef_Qres:
    222 ; CHECK:       @ BB#0:
    223 ; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
    224 ; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
    225 ; CHECK-NEXT:    vzip.8 [[LDR0]], [[LDR1]]
    226 ; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
    227 ; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
    228 ; CHECK-NEXT:    mov pc, lr
    229 	%tmp1 = load <8 x i8>, <8 x i8>* %A
    230 	%tmp2 = load <8 x i8>, <8 x i8>* %B
    231 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
    232 	ret <16 x i8> %tmp3
    233 }
    234 
    235 define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    236 ; CHECK-LABEL: vzipQi8_undef:
    237 ; CHECK:       @ BB#0:
    238 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
    239 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
    240 ; CHECK-NEXT:    vzip.8 q9, q8
    241 ; CHECK-NEXT:    vadd.i8 q8, q9, q8
    242 ; CHECK-NEXT:    vmov r0, r1, d16
    243 ; CHECK-NEXT:    vmov r2, r3, d17
    244 ; CHECK-NEXT:    mov pc, lr
    245 	%tmp1 = load <16 x i8>, <16 x i8>* %A
    246 	%tmp2 = load <16 x i8>, <16 x i8>* %B
    247 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
    248 	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
    249         %tmp5 = add <16 x i8> %tmp3, %tmp4
    250 	ret <16 x i8> %tmp5
    251 }
    252 
    253 define <32 x i8> @vzipQi8_undef_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    254 ; CHECK-LABEL: vzipQi8_undef_QQres:
    255 ; CHECK:       @ BB#0:
    256 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
    257 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
    258 ; CHECK-NEXT:    vzip.8 q9, q8
    259 ; CHECK-NEXT:    vst1.8 {d18, d19}, [r0:128]!
    260 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
    261 ; CHECK-NEXT:    mov pc, lr
    262 	%tmp1 = load <16 x i8>, <16 x i8>* %A
    263 	%tmp2 = load <16 x i8>, <16 x i8>* %B
    264 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
    265 	ret <32 x i8> %tmp3
    266 }
    267 
    268 define <8 x i16> @vzip_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) {
    269 entry:
    270   ; CHECK-LABEL: vzip_lower_shufflemask_undef
    271   ; CHECK: vzip
    272 	%tmp1 = load <4 x i16>, <4 x i16>* %A
    273 	%tmp2 = load <4 x i16>, <4 x i16>* %B
    274   %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
    275   ret <8 x i16> %0
    276 }
    277 
    278 define <4 x i32> @vzip_lower_shufflemask_zeroed(<2 x i32>* %A) {
    279 entry:
    280   ; CHECK-LABEL: vzip_lower_shufflemask_zeroed
    281   ; CHECK-NOT: vtrn
    282   ; CHECK: vzip
    283   %tmp1 = load <2 x i32>, <2 x i32>* %A
    284   %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 0, i32 1, i32 0>
    285   ret <4 x i32> %0
    286 }
    287 
    288 define <4 x i32> @vzip_lower_shufflemask_vuzp(<2 x i32>* %A) {
    289 entry:
    290   ; CHECK-LABEL: vzip_lower_shufflemask_vuzp
    291   ; CHECK-NOT: vuzp
    292   ; CHECK: vzip
    293   %tmp1 = load <2 x i32>, <2 x i32>* %A
    294   %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 2, i32 1, i32 0>
    295   ret <4 x i32> %0
    296 }
    297 
    298 define void @vzip_undef_rev_shufflemask_vtrn(<2 x i32>* %A, <4 x i32>* %B) {
    299 entry:
    300   ; CHECK-LABEL: vzip_undef_rev_shufflemask_vtrn
    301   ; CHECK-NOT: vtrn
    302   ; CHECK: vzip
    303   %tmp1 = load <2 x i32>, <2 x i32>* %A
    304   %0 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
    305   store <4 x i32> %0, <4 x i32>* %B
    306   ret void
    307 }
    308 
    309 define void @vzip_vext_factor(<8 x i16>* %A, <4 x i16>* %B) {
    310 entry:
    311   ; CHECK-LABEL: vzip_vext_factor
    312   ; CHECK: vext.16 d16, d16, d17, #3
    313   ; CHECK: vzip
    314   %tmp1 = load <8 x i16>, <8 x i16>* %A
    315   %0 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 4, i32 4, i32 5, i32 3>
    316   store <4 x i16> %0, <4 x i16>* %B
    317   ret void
    318 }
    319