Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
      2 
      3 define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
      4 ; CHECK-LABEL: vuzpi8:
      5 ; CHECK:       @ BB#0:
      6 ; CHECK-NEXT:    vldr d16, [r1]
      7 ; CHECK-NEXT:    vldr d17, [r0]
      8 ; CHECK-NEXT:    vuzp.8 d17, d16
      9 ; CHECK-NEXT:    vadd.i8 d16, d17, d16
     10 ; CHECK-NEXT:    vmov r0, r1, d16
     11 ; CHECK-NEXT:    mov pc, lr
     12 	%tmp1 = load <8 x i8>, <8 x i8>* %A
     13 	%tmp2 = load <8 x i8>, <8 x i8>* %B
     14 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
     15 	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
     16         %tmp5 = add <8 x i8> %tmp3, %tmp4
     17 	ret <8 x i8> %tmp5
     18 }
     19 
     20 define <16 x i8> @vuzpi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
     21 ; CHECK-LABEL: vuzpi8_Qres:
     22 ; CHECK:       @ BB#0:
     23 ; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
     24 ; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
     25 ; CHECK-NEXT:    vuzp.8 [[LDR0]], [[LDR1]]
     26 ; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
     27 ; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
     28 ; CHECK-NEXT:    mov pc, lr
     29 	%tmp1 = load <8 x i8>, <8 x i8>* %A
     30 	%tmp2 = load <8 x i8>, <8 x i8>* %B
     31 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
     32 	ret <16 x i8> %tmp3
     33 }
     34 
     35 define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     36 ; CHECK-LABEL: vuzpi16:
     37 ; CHECK:       @ BB#0:
     38 ; CHECK-NEXT:    vldr d16, [r1]
     39 ; CHECK-NEXT:    vldr d17, [r0]
     40 ; CHECK-NEXT:    vuzp.16 d17, d16
     41 ; CHECK-NEXT:    vadd.i16 d16, d17, d16
     42 ; CHECK-NEXT:    vmov r0, r1, d16
     43 ; CHECK-NEXT:    mov pc, lr
     44 	%tmp1 = load <4 x i16>, <4 x i16>* %A
     45 	%tmp2 = load <4 x i16>, <4 x i16>* %B
     46 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
     47 	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
     48         %tmp5 = add <4 x i16> %tmp3, %tmp4
     49 	ret <4 x i16> %tmp5
     50 }
     51 
     52 define <8 x i16> @vuzpi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     53 ; CHECK-LABEL: vuzpi16_Qres:
     54 ; CHECK:       @ BB#0:
     55 ; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
     56 ; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
     57 ; CHECK-NEXT:    vuzp.16 [[LDR0]], [[LDR1]]
     58 ; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
     59 ; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
     60 ; CHECK-NEXT:    mov pc, lr
     61 	%tmp1 = load <4 x i16>, <4 x i16>* %A
     62 	%tmp2 = load <4 x i16>, <4 x i16>* %B
     63 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
     64 	ret <8 x i16> %tmp3
     65 }
     66 
     67 ; VUZP.32 is equivalent to VTRN.32 for 64-bit vectors.
     68 
     69 define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     70 ; CHECK-LABEL: vuzpQi8:
     71 ; CHECK:       @ BB#0:
     72 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
     73 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
     74 ; CHECK-NEXT:    vuzp.8 q9, q8
     75 ; CHECK-NEXT:    vadd.i8 q8, q9, q8
     76 ; CHECK-NEXT:    vmov r0, r1, d16
     77 ; CHECK-NEXT:    vmov r2, r3, d17
     78 ; CHECK-NEXT:    mov pc, lr
     79 	%tmp1 = load <16 x i8>, <16 x i8>* %A
     80 	%tmp2 = load <16 x i8>, <16 x i8>* %B
     81 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
     82 	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
     83         %tmp5 = add <16 x i8> %tmp3, %tmp4
     84 	ret <16 x i8> %tmp5
     85 }
     86 
     87 define <32 x i8> @vuzpQi8_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     88 ; CHECK-LABEL: vuzpQi8_QQres:
     89 ; CHECK:       @ BB#0:
     90 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
     91 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
     92 ; CHECK-NEXT:    vuzp.8 q9, q8
     93 ; CHECK-NEXT:    vst1.8 {d18, d19}, [r0:128]!
     94 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
     95 ; CHECK-NEXT:    mov pc, lr
     96 	%tmp1 = load <16 x i8>, <16 x i8>* %A
     97 	%tmp2 = load <16 x i8>, <16 x i8>* %B
     98 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
     99 	ret <32 x i8> %tmp3
    100 }
    101 
    102 define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    103 ; CHECK-LABEL: vuzpQi16:
    104 ; CHECK:       @ BB#0:
    105 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
    106 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
    107 ; CHECK-NEXT:    vuzp.16 q9, q8
    108 ; CHECK-NEXT:    vadd.i16 q8, q9, q8
    109 ; CHECK-NEXT:    vmov r0, r1, d16
    110 ; CHECK-NEXT:    vmov r2, r3, d17
    111 ; CHECK-NEXT:    mov pc, lr
    112 	%tmp1 = load <8 x i16>, <8 x i16>* %A
    113 	%tmp2 = load <8 x i16>, <8 x i16>* %B
    114 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
    115 	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
    116         %tmp5 = add <8 x i16> %tmp3, %tmp4
    117 	ret <8 x i16> %tmp5
    118 }
    119 
    120 define <16 x i16> @vuzpQi16_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    121 ; CHECK-LABEL: vuzpQi16_QQres:
    122 ; CHECK:       @ BB#0:
    123 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
    124 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
    125 ; CHECK-NEXT:    vuzp.16 q9, q8
    126 ; CHECK-NEXT:    vst1.16 {d18, d19}, [r0:128]!
    127 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
    128 ; CHECK-NEXT:    mov pc, lr
    129 	%tmp1 = load <8 x i16>, <8 x i16>* %A
    130 	%tmp2 = load <8 x i16>, <8 x i16>* %B
    131 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
    132 	ret <16 x i16> %tmp3
    133 }
    134 
    135 define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    136 ; CHECK-LABEL: vuzpQi32:
    137 ; CHECK:       @ BB#0:
    138 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
    139 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
    140 ; CHECK-NEXT:    vuzp.32 q9, q8
    141 ; CHECK-NEXT:    vadd.i32 q8, q9, q8
    142 ; CHECK-NEXT:    vmov r0, r1, d16
    143 ; CHECK-NEXT:    vmov r2, r3, d17
    144 ; CHECK-NEXT:    mov pc, lr
    145 	%tmp1 = load <4 x i32>, <4 x i32>* %A
    146 	%tmp2 = load <4 x i32>, <4 x i32>* %B
    147 	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
    148 	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    149         %tmp5 = add <4 x i32> %tmp3, %tmp4
    150 	ret <4 x i32> %tmp5
    151 }
    152 
    153 define <8 x i32> @vuzpQi32_QQres(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    154 ; CHECK-LABEL: vuzpQi32_QQres:
    155 ; CHECK:       @ BB#0:
    156 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
    157 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
    158 ; CHECK-NEXT:    vuzp.32 q9, q8
    159 ; CHECK-NEXT:    vst1.32 {d18, d19}, [r0:128]!
    160 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
    161 ; CHECK-NEXT:    mov pc, lr
    162 	%tmp1 = load <4 x i32>, <4 x i32>* %A
    163 	%tmp2 = load <4 x i32>, <4 x i32>* %B
    164 	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
    165 	ret <8 x i32> %tmp3
    166 }
    167 
    168 define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
    169 ; CHECK-LABEL: vuzpQf:
    170 ; CHECK:       @ BB#0:
    171 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
    172 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
    173 ; CHECK-NEXT:    vuzp.32 q9, q8
    174 ; CHECK-NEXT:    vadd.f32 q8, q9, q8
    175 ; CHECK-NEXT:    vmov r0, r1, d16
    176 ; CHECK-NEXT:    vmov r2, r3, d17
    177 ; CHECK-NEXT:    mov pc, lr
    178 	%tmp1 = load <4 x float>, <4 x float>* %A
    179 	%tmp2 = load <4 x float>, <4 x float>* %B
    180 	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
    181 	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    182         %tmp5 = fadd <4 x float> %tmp3, %tmp4
    183 	ret <4 x float> %tmp5
    184 }
    185 
    186 define <8 x float> @vuzpQf_QQres(<4 x float>* %A, <4 x float>* %B) nounwind {
    187 ; CHECK-LABEL: vuzpQf_QQres:
    188 ; CHECK:       @ BB#0:
    189 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
    190 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
    191 ; CHECK-NEXT:    vuzp.32 q9, q8
    192 ; CHECK-NEXT:    vst1.32 {d18, d19}, [r0:128]!
    193 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
    194 ; CHECK-NEXT:    mov pc, lr
    195 	%tmp1 = load <4 x float>, <4 x float>* %A
    196 	%tmp2 = load <4 x float>, <4 x float>* %B
    197 	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
    198 	ret <8 x float> %tmp3
    199 }
    200 
    201 ; Undef shuffle indices should not prevent matching to VUZP:
    202 
    203 define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    204 ; CHECK-LABEL: vuzpi8_undef:
    205 ; CHECK:       @ BB#0:
    206 ; CHECK-NEXT:    vldr d16, [r1]
    207 ; CHECK-NEXT:    vldr d17, [r0]
    208 ; CHECK-NEXT:    vuzp.8 d17, d16
    209 ; CHECK-NEXT:    vadd.i8 d16, d17, d16
    210 ; CHECK-NEXT:    vmov r0, r1, d16
    211 ; CHECK-NEXT:    mov pc, lr
    212 	%tmp1 = load <8 x i8>, <8 x i8>* %A
    213 	%tmp2 = load <8 x i8>, <8 x i8>* %B
    214 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14>
    215 	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15>
    216         %tmp5 = add <8 x i8> %tmp3, %tmp4
    217 	ret <8 x i8> %tmp5
    218 }
    219 
    220 define <16 x i8> @vuzpi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    221 ; CHECK-LABEL: vuzpi8_undef_Qres:
    222 ; CHECK:       @ BB#0:
    223 ; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
    224 ; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
    225 ; CHECK-NEXT:    vuzp.8 [[LDR0]], [[LDR1]]
    226 ; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
    227 ; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
    228 ; CHECK-NEXT:    mov pc, lr
    229 	%tmp1 = load <8 x i8>, <8 x i8>* %A
    230 	%tmp2 = load <8 x i8>, <8 x i8>* %B
    231 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15>
    232 	ret <16 x i8> %tmp3
    233 }
    234 
    235 define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    236 ; CHECK-LABEL: vuzpQi16_undef:
    237 ; CHECK:       @ BB#0:
    238 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
    239 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
    240 ; CHECK-NEXT:    vuzp.16 q9, q8
    241 ; CHECK-NEXT:    vadd.i16 q8, q9, q8
    242 ; CHECK-NEXT:    vmov r0, r1, d16
    243 ; CHECK-NEXT:    vmov r2, r3, d17
    244 ; CHECK-NEXT:    mov pc, lr
    245 	%tmp1 = load <8 x i16>, <8 x i16>* %A
    246 	%tmp2 = load <8 x i16>, <8 x i16>* %B
    247 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14>
    248 	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
    249         %tmp5 = add <8 x i16> %tmp3, %tmp4
    250 	ret <8 x i16> %tmp5
    251 }
    252 
    253 define <16 x i16> @vuzpQi16_undef_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    254 ; CHECK-LABEL: vuzpQi16_undef_QQres:
    255 ; CHECK:       @ BB#0:
    256 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
    257 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
    258 ; CHECK-NEXT:    vuzp.16 q9, q8
    259 ; CHECK-NEXT:    vst1.16 {d18, d19}, [r0:128]!
    260 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
    261 ; CHECK-NEXT:    mov pc, lr
    262 	%tmp1 = load <8 x i16>, <8 x i16>* %A
    263 	%tmp2 = load <8 x i16>, <8 x i16>* %B
    264 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
    265 	ret <16 x i16> %tmp3
    266 }
    267 
    268 define <8 x i16> @vuzp_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) {
    269 entry:
    270   ; CHECK-LABEL: vuzp_lower_shufflemask_undef
    271   ; CHECK: vuzp
    272 	%tmp1 = load <4 x i16>, <4 x i16>* %A
    273 	%tmp2 = load <4 x i16>, <4 x i16>* %B
    274   %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 3, i32 5, i32 7>
    275   ret <8 x i16> %0
    276 }
    277 
    278 define <4 x i32> @vuzp_lower_shufflemask_zeroed(<2 x i32>* %A, <2 x i32>* %B) {
    279 entry:
    280   ; CHECK-LABEL: vuzp_lower_shufflemask_zeroed
    281   ; CHECK-NOT: vtrn
    282   ; CHECK: vuzp
    283   %tmp1 = load <2 x i32>, <2 x i32>* %A
    284 	%tmp2 = load <2 x i32>, <2 x i32>* %B
    285   %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 0, i32 1, i32 3>
    286   ret <4 x i32> %0
    287 }
    288 
    289 define void @vuzp_rev_shufflemask_vtrn(<2 x i32>* %A, <2 x i32>* %B, <4 x i32>* %C) {
    290 entry:
    291   ; CHECK-LABEL: vuzp_rev_shufflemask_vtrn
    292   ; CHECK-NOT: vtrn
    293   ; CHECK: vuzp
    294   %tmp1 = load <2 x i32>, <2 x i32>* %A
    295   %tmp2 = load <2 x i32>, <2 x i32>* %B
    296   %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 0, i32 2>
    297   store <4 x i32> %0, <4 x i32>* %C
    298   ret void
    299 }
    300 
    301 define <8 x i8> @vuzp_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8 x i32> %cmp1) {
    302 ; In order to create the select we need to truncate the vcgt result from a vector of i32 to a vector of i8.
    303 ; This results in a build_vector with mismatched types. We will generate two vmovn.i32 instructions to
    304 ; truncate from i32 to i16 and one vuzp to perform the final truncation for i8.
    305 ; CHECK-LABEL: vuzp_trunc
    306 ; CHECK: vmovn.i32
    307 ; CHECK: vmovn.i32
    308 ; CHECK: vuzp
    309 ; CHECK: vbsl
    310   %c = icmp ult <8 x i32> %cmp0, %cmp1
    311   %res = select <8 x i1> %c, <8 x i8> %in0, <8 x i8> %in1
    312   ret <8 x i8> %res
    313 }
    314 
    315 ; Shuffle the result from the compare with a <4 x i8>.
    316 ; We need to extend the loaded <4 x i8> to <4 x i16>. Otherwise we wouldn't be able
    317 ; to perform the vuzp and get the vbsl mask.
    318 define <8 x i8> @vuzp_trunc_and_shuffle(<8 x i8> %tr0, <8 x i8> %tr1,
    319                          <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
    320 ; CHECK-LABEL: vuzp_trunc_and_shuffle
    321 ; CHECK: vmovl
    322 ; CHECK: vuzp
    323 ; CHECK: vbsl
    324   %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
    325   %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
    326   %c0 = icmp ult <4 x i32> %cmp0, %cmp1
    327   %c = shufflevector <4 x i1> %c0, <4 x i1> %cmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    328   %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
    329   ret <8 x i8> %rv
    330 }
    331 
    332 ; Use an undef value for the <4 x i8> that is being shuffled with the compare result.
    333 ; This produces a build_vector with some of the operands undefs.
    334 define <8 x i8> @vuzp_trunc_and_shuffle_undef_right(<8 x i8> %tr0, <8 x i8> %tr1,
    335                          <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
    336 ; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_right
    337 ; CHECK: vuzp
    338 ; CHECK: vbsl
    339   %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
    340   %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
    341   %c0 = icmp ult <4 x i32> %cmp0, %cmp1
    342   %c = shufflevector <4 x i1> %c0, <4 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    343   %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
    344   ret <8 x i8> %rv
    345 }
    346 
    347 define <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x i8> %tr1,
    348                          <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
    349 ; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_left
    350 ; CHECK: vuzp
    351 ; CHECK: vbsl
    352   %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
    353   %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
    354   %c0 = icmp ult <4 x i32> %cmp0, %cmp1
    355   %c = shufflevector <4 x i1> undef, <4 x i1> %c0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    356   %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
    357   ret <8 x i8> %rv
    358 }
    359 
    360 ; We're using large data types here, and we have to fill with undef values until we
    361 ; get some vector size that we can represent.
    362 define <10 x i8> @vuzp_wide_type(<10 x i8> %tr0, <10 x i8> %tr1,
    363                             <5 x i32> %cmp0, <5 x i32> %cmp1, <5 x i8> *%cmp2_ptr) {
    364 ; CHECK-LABEL: vuzp_wide_type
    365 ; CHECK: vbsl
    366   %cmp2_load = load <5 x i8>, <5 x i8> * %cmp2_ptr, align 4
    367   %cmp2 = trunc <5 x i8> %cmp2_load to <5 x i1>
    368   %c0 = icmp ult <5 x i32> %cmp0, %cmp1
    369   %c = shufflevector <5 x i1> %c0, <5 x i1> %cmp2, <10 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
    370   %rv = select <10 x i1> %c, <10 x i8> %tr0, <10 x i8> %tr1
    371   ret <10 x i8> %rv
    372 }
    373