Home | History | Annotate | Download | only in arch
      1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
      2 target triple = "armv7-none-linux-gnueabi"
      3 
      4 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
      5 ;;;;;;;;;               INTRINSICS               ;;;;;;;;;;
      6 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
      7 
      8 declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
      9 declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
     10 declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
     11 declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
     12 declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
     13 declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
     14 
     15 declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
     16 declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
     17 declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
     18 declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
     19 declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
     20 declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
     21 
     22 declare <8 x i8>  @llvm.arm.neon.vqshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
     23 declare <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
     24 declare <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
     25 
     26 declare <8 x i8>  @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
     27 declare <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
     28 declare <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
     29 
     30 declare <8 x i8>  @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
     31 declare <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
     32 declare <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
     33 
     34 declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone
     35 declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
     36 
     37 declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone
     38 declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
     39 
     40 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     41 ;;;;;;;;;                HELPERS                 ;;;;;;;;;;
     42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     43 
     44 define internal <4 x float> @smear_4f(float %in) nounwind readnone alwaysinline {
     45   %1 = insertelement <4 x float> undef, float %in, i32 0
     46   %2 = insertelement <4 x float> %1, float %in, i32 1
     47   %3 = insertelement <4 x float> %2, float %in, i32 2
     48   %4 = insertelement <4 x float> %3, float %in, i32 3
     49   ret <4 x float> %4
     50 }
     51 
     52 define internal <2 x float> @smear_2f(float %in) nounwind readnone alwaysinline {
     53   %1 = insertelement <2 x float> undef, float %in, i32 0
     54   %2 = insertelement <2 x float> %1, float %in, i32 1
     55   ret <2 x float> %2
     56 }
     57 
     58 define internal <4 x i32> @smear_4i32(i32 %in) nounwind readnone alwaysinline {
     59   %1 = insertelement <4 x i32> undef, i32 %in, i32 0
     60   %2 = insertelement <4 x i32> %1, i32 %in, i32 1
     61   %3 = insertelement <4 x i32> %2, i32 %in, i32 2
     62   %4 = insertelement <4 x i32> %3, i32 %in, i32 3
     63   ret <4 x i32> %4
     64 }
     65 
     66 
     67 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     68 ;;;;;;;;;                 CLAMP                  ;;;;;;;;;;
     69 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     70 
     71 define <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %low, <4 x float> %high) nounwind readonly {
     72   %1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %value, <4 x float> %high) nounwind readnone
     73   %2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %low) nounwind readnone
     74   ret <4 x float> %2
     75 }
     76 
     77 define <4 x float> @_Z5clampDv4_fff(<4 x float> %value, float %low, float %high) nounwind readonly {
     78   %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone
     79   %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone
     80   %out = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %_low, <4 x float> %_high) nounwind readonly
     81   ret <4 x float> %out
     82 }
     83 
     84 define <3 x float> @_Z5clampDv3_fS_S_(<3 x float> %value, <3 x float> %low, <3 x float> %high) nounwind readonly {
     85   %_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     86   %_low = shufflevector <3 x float> %low, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     87   %_high = shufflevector <3 x float> %high, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     88   %a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nounwind readnone
     89   %b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind readnone
     90   %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
     91   ret <3 x float> %c
     92 }
     93 
     94 define <3 x float> @_Z5clampDv3_fff(<3 x float> %value, float %low, float %high) nounwind readonly {
     95   %_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     96   %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone
     97   %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone
     98   %a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nounwind readnone
     99   %b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind readnone
    100   %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
    101   ret <3 x float> %c
    102 }
    103 
    104 define <2 x float> @_Z5clampDv2_fS_S_(<2 x float> %value, <2 x float> %low, <2 x float> %high) nounwind readonly {
    105   %1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %high) nounwind readnone
    106   %2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %1, <2 x float> %low) nounwind readnone
    107   ret <2 x float> %2
    108 }
    109 
    110 define <2 x float> @_Z5clampDv2_fff(<2 x float> %value, float %low, float %high) nounwind readonly {
    111   %_high = tail call <2 x float> @smear_2f(float %high) nounwind readnone
    112   %_low = tail call <2 x float> @smear_2f(float %low) nounwind readnone
    113   %a = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %_high) nounwind readnone
    114   %b = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %_low) nounwind readnone
    115   ret <2 x float> %b
    116 }
    117 
    118 define float @_Z5clampfff(float %value, float %low, float %high) nounwind readonly {
    119   %1 = fcmp olt float %value, %high
    120   %2 = select i1 %1, float %value, float %high
    121   %3 = fcmp ogt float %2, %low
    122   %4 = select i1 %3, float %2, float %low
    123   ret float %4
    124 }
    125 
    126 
    127 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    128 ;;;;;;;;;                  FMAX                  ;;;;;;;;;;
    129 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    130 
    131 define <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly {
    132   %1 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %v1, <4 x float> %v2) nounwind readnone
    133   ret <4 x float> %1
    134 }
    135 
    136 define <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2) nounwind readonly {
    137   %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
    138   %2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %v1, <4 x float> %1) nounwind readnone
    139   ret <4 x float> %2
    140 }
    141 
    142 define <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly {
    143   %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    144   %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    145   %3 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone
    146   %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
    147   ret <3 x float> %4
    148 }
    149 
    150 define <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2) nounwind readonly {
    151   %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    152   %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
    153   %3 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone
    154   %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
    155   ret <3 x float> %c
    156 }
    157 
    158 define <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly {
    159   %1 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %v1, <2 x float> %v2) nounwind readnone
    160   ret <2 x float> %1
    161 }
    162 
    163 define <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2) nounwind readonly {
    164   %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone
    165   %2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %v1, <2 x float> %1) nounwind readnone
    166   ret <2 x float> %2
    167 }
    168 
    169 define float @_Z4fmaxff(float %v1, float %v2) nounwind readonly {
    170   %1 = fcmp ogt float %v1, %v2
    171   %2 = select i1 %1, float %v1, float %v2
    172   ret float %2
    173 }
    174 
    175 
    176 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    177 ;;;;;;;;;                  FMIN                  ;;;;;;;;;;
    178 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    179 
    180 define <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly {
    181   %1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %v1, <4 x float> %v2) nounwind readnone
    182   ret <4 x float> %1
    183 }
    184 
    185 define <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2) nounwind readonly {
    186   %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
    187   %2 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %v1, <4 x float> %1) nounwind readnone
    188   ret <4 x float> %2
    189 }
    190 
    191 define <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly {
    192   %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    193   %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    194   %3 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone
    195   %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
    196   ret <3 x float> %4
    197 }
    198 
    199 define <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2) nounwind readonly {
    200   %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    201   %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
    202   %3 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone
    203   %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
    204   ret <3 x float> %c
    205 }
    206 
    207 define <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly {
    208   %1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %v1, <2 x float> %v2) nounwind readnone
    209   ret <2 x float> %1
    210 }
    211 
    212 define <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2) nounwind readonly {
    213   %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone
    214   %2 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %v1, <2 x float> %1) nounwind readnone
    215   ret <2 x float> %2
    216 }
    217 
    218 define float @_Z4fminff(float %v1, float %v2) nounwind readnone {
    219   %1 = fcmp olt float %v1, %v2
    220   %2 = select i1 %1, float %v1, float %v2
    221   ret float %2
    222 }
    223 
    224 
    225 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    226 ;;;;;;;;;                  MAX                   ;;;;;;;;;;
    227 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    228 
    229 define signext i8 @_Z3maxcc(i8 signext %v1, i8 signext %v2) nounwind readnone {
    230   %1 = icmp sgt i8 %v1, %v2
    231   %2 = select i1 %1, i8 %v1, i8 %v2
    232   ret i8 %2
    233 }
    234 
    235 define <2 x i8> @_Z3maxDv2_cS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
    236   %1 = sext <2 x i8> %v1 to <2 x i32>
    237   %2 = sext <2 x i8> %v2 to <2 x i32>
    238   %3 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
    239   %4 = trunc <2 x i32> %3 to <2 x i8>
    240   ret <2 x i8> %4
    241 }
    242 
    243 define <3 x i8> @_Z3maxDv3_cS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone {
    244   %1 = sext <3 x i8> %v1 to <3 x i32>
    245   %2 = sext <3 x i8> %v2 to <3 x i32>
    246   %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    247   %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    248   %5 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
    249   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    250   %7 = trunc <3 x i32> %6 to <3 x i8>
    251   ret <3 x i8> %7
    252 }
    253 
    254 define <4 x i8> @_Z3maxDv4_cS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
    255   %1 = sext <4 x i8> %v1 to <4 x i32>
    256   %2 = sext <4 x i8> %v2 to <4 x i32>
    257   %3 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
    258   %4 = trunc <4 x i32> %3 to <4 x i8>
    259   ret <4 x i8> %4
    260 }
    261 
    262 define signext i16 @_Z3maxss(i16 signext %v1, i16 signext %v2) nounwind readnone {
    263   %1 = icmp sgt i16 %v1, %v2
    264   %2 = select i1 %1, i16 %v1, i16 %v2
    265   ret i16 %2
    266 }
    267 
    268 define <2 x i16> @_Z3maxDv2_sS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
    269   %1 = sext <2 x i16> %v1 to <2 x i32>
    270   %2 = sext <2 x i16> %v2 to <2 x i32>
    271   %3 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
    272   %4 = trunc <2 x i32> %3 to <2 x i16>
    273   ret <2 x i16> %4
    274 }
    275 
    276 define <3 x i16> @_Z3maxDv3_sS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
    277   %1 = sext <3 x i16> %v1 to <3 x i32>
    278   %2 = sext <3 x i16> %v2 to <3 x i32>
    279   %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    280   %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    281   %5 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
    282   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    283   %7 = trunc <3 x i32> %6 to <3 x i16>
    284   ret <3 x i16> %7
    285 }
    286 
    287 define <4 x i16> @_Z3maxDv4_sS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
    288   %1 = sext <4 x i16> %v1 to <4 x i32>
    289   %2 = sext <4 x i16> %v2 to <4 x i32>
    290   %3 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
    291   %4 = trunc <4 x i32> %3 to <4 x i16>
    292   ret <4 x i16> %4
    293 }
    294 
    295 define i32 @_Z3maxii(i32 %v1, i32 %v2) nounwind readnone {
    296   %1 = icmp sgt i32 %v1, %v2
    297   %2 = select i1 %1, i32 %v1, i32 %v2
    298   ret i32 %2
    299 }
    300 
    301 define <2 x i32> @_Z3maxDv2_iS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
    302   %1 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone
    303   ret <2 x i32> %1
    304 }
    305 
    306 define <3 x i32> @_Z3maxDv3_iS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
    307   %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    308   %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    309   %3 = tail call <4 x i32   > @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
    310   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    311   ret <3 x i32> %4
    312 }
    313 
    314 define <4 x i32> @_Z3maxDv4_iS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
    315   %1 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone
    316   ret <4 x i32> %1
    317 }
    318 
    319 define i64 @_Z3maxxx(i64 %v1, i64 %v2) nounwind readnone {
    320   %1 = icmp sgt i64 %v1, %v2
    321   %2 = select i1 %1, i64 %v1, i64 %v2
    322   ret i64 %2
    323 }
    324 
    325 ; TODO:  long vector types
    326 
    327 define zeroext i8 @_Z3maxhh(i8 zeroext %v1, i8 zeroext %v2) nounwind readnone {
    328   %1 = icmp ugt i8 %v1, %v2
    329   %2 = select i1 %1, i8 %v1, i8 %v2
    330   ret i8 %2
    331 }
    332 
    333 define <2 x i8> @_Z3maxDv2_hS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
    334   %1 = zext <2 x i8> %v1 to <2 x i32>
    335   %2 = zext <2 x i8> %v2 to <2 x i32>
    336   %3 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
    337   %4 = trunc <2 x i32> %3 to <2 x i8>
    338   ret <2 x i8> %4
    339 }
    340 
    341 define <3 x i8> @_Z3maxDv3_hS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone {
    342   %1 = zext <3 x i8> %v1 to <3 x i32>
    343   %2 = zext <3 x i8> %v2 to <3 x i32>
    344   %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    345   %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    346   %5 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
    347   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    348   %7 = trunc <3 x i32> %6 to <3 x i8>
    349   ret <3 x i8> %7
    350 }
    351 
    352 define <4 x i8> @_Z3maxDv4_hS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
    353   %1 = zext <4 x i8> %v1 to <4 x i32>
    354   %2 = zext <4 x i8> %v2 to <4 x i32>
    355   %3 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
    356   %4 = trunc <4 x i32> %3 to <4 x i8>
    357   ret <4 x i8> %4
    358 }
    359 
    360 define zeroext i16 @_Z3maxtt(i16 zeroext %v1, i16 zeroext %v2) nounwind readnone {
    361   %1 = icmp ugt i16 %v1, %v2
    362   %2 = select i1 %1, i16 %v1, i16 %v2
    363   ret i16 %2
    364 }
    365 
    366 define <2 x i16> @_Z3maxDv2_tS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
    367   %1 = zext <2 x i16> %v1 to <2 x i32>
    368   %2 = zext <2 x i16> %v2 to <2 x i32>
    369   %3 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
    370   %4 = trunc <2 x i32> %3 to <2 x i16>
    371   ret <2 x i16> %4
    372 }
    373 
    374 define <3 x i16> @_Z3maxDv3_tS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
    375   %1 = zext <3 x i16> %v1 to <3 x i32>
    376   %2 = zext <3 x i16> %v2 to <3 x i32>
    377   %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    378   %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    379   %5 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
    380   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    381   %7 = trunc <3 x i32> %6 to <3 x i16>
    382   ret <3 x i16> %7
    383 }
    384 
    385 define <4 x i16> @_Z3maxDv4_tS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
    386   %1 = zext <4 x i16> %v1 to <4 x i32>
    387   %2 = zext <4 x i16> %v2 to <4 x i32>
    388   %3 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
    389   %4 = trunc <4 x i32> %3 to <4 x i16>
    390   ret <4 x i16> %4
    391 }
    392 
    393 define i32 @_Z3maxjj(i32 %v1, i32 %v2) nounwind readnone {
    394   %1 = icmp ugt i32 %v1, %v2
    395   %2 = select i1 %1, i32 %v1, i32 %v2
    396   ret i32 %2
    397 }
    398 
    399 define <2 x i32> @_Z3maxDv2_jS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
    400   %1 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone
    401   ret <2 x i32> %1
    402 }
    403 
    404 define <3 x i32> @_Z3maxDv3_jS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
    405   %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    406   %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    407   %3 = tail call <4 x i32   > @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
    408   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    409   ret <3 x i32> %4
    410 }
    411 
    412 define <4 x i32> @_Z3maxDv4_jS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
    413   %1 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone
    414   ret <4 x i32> %1
    415 }
    416 
    417 define i64 @_Z3maxyy(i64 %v1, i64 %v2) nounwind readnone {
    418   %1 = icmp ugt i64 %v1, %v2
    419   %2 = select i1 %1, i64 %v1, i64 %v2
    420   ret i64 %2
    421 }
    422 
    423 ; TODO:  long vector types
    424 
    425 define float @_Z3maxff(float %v1, float %v2) nounwind readnone {
    426   %1 = tail call float @_Z4fmaxff(float %v1, float %v2)
    427   ret float %1
    428 }
    429 
    430 define <2 x float> @_Z3maxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone {
    431   %1 = tail call <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2)
    432   ret <2 x float> %1
    433 }
    434 
    435 define <2 x float> @_Z3maxDv2_ff(<2 x float> %v1, float %v2) nounwind readnone {
    436   %1 = tail call <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2)
    437   ret <2 x float> %1
    438 }
    439 
    440 define <3 x float> @_Z3maxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone {
    441   %1 = tail call <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2)
    442   ret <3 x float> %1
    443 }
    444 
    445 define <3 x float> @_Z3maxDv3_ff(<3 x float> %v1, float %v2) nounwind readnone {
    446   %1 = tail call <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2)
    447   ret <3 x float> %1
    448 }
    449 
    450 define <4 x float> @_Z3maxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone {
    451   %1 = tail call <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2)
    452   ret <4 x float> %1
    453 }
    454 
    455 define <4 x float> @_Z3maxDv4_ff(<4 x float> %v1, float %v2) nounwind readnone {
    456   %1 = tail call <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2)
    457   ret <4 x float> %1
    458 }
    459 
    460 
    461 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    462 ;;;;;;;;;                  MIN                   ;;;;;;;;;;
    463 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    464 
    465 define signext i8 @_Z3mincc(i8 signext %v1, i8 signext %v2) nounwind readnone {
    466   %1 = icmp slt i8 %v1, %v2
    467   %2 = select i1 %1, i8 %v1, i8 %v2
    468   ret i8 %2
    469 }
    470 
    471 define <2 x i8> @_Z3minDv2_cS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
    472   %1 = sext <2 x i8> %v1 to <2 x i32>
    473   %2 = sext <2 x i8> %v2 to <2 x i32>
    474   %3 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
    475   %4 = trunc <2 x i32> %3 to <2 x i8>
    476   ret <2 x i8> %4
    477 }
    478 
    479 define <3 x i8> @_Z3minDv3_cS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone {
    480   %1 = sext <3 x i8> %v1 to <3 x i32>
    481   %2 = sext <3 x i8> %v2 to <3 x i32>
    482   %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    483   %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    484   %5 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
    485   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    486   %7 = trunc <3 x i32> %6 to <3 x i8>
    487   ret <3 x i8> %7
    488 }
    489 
    490 define <4 x i8> @_Z3minDv4_cS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
    491   %1 = sext <4 x i8> %v1 to <4 x i32>
    492   %2 = sext <4 x i8> %v2 to <4 x i32>
    493   %3 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
    494   %4 = trunc <4 x i32> %3 to <4 x i8>
    495   ret <4 x i8> %4
    496 }
    497 
    498 define signext i16 @_Z3minss(i16 signext %v1, i16 signext %v2) nounwind readnone {
    499   %1 = icmp slt i16 %v1, %v2
    500   %2 = select i1 %1, i16 %v1, i16 %v2
    501   ret i16 %2
    502 }
    503 
    504 define <2 x i16> @_Z3minDv2_sS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
    505   %1 = sext <2 x i16> %v1 to <2 x i32>
    506   %2 = sext <2 x i16> %v2 to <2 x i32>
    507   %3 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
    508   %4 = trunc <2 x i32> %3 to <2 x i16>
    509   ret <2 x i16> %4
    510 }
    511 
    512 define <3 x i16> @_Z3minDv3_sS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
    513   %1 = sext <3 x i16> %v1 to <3 x i32>
    514   %2 = sext <3 x i16> %v2 to <3 x i32>
    515   %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    516   %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    517   %5 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
    518   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    519   %7 = trunc <3 x i32> %6 to <3 x i16>
    520   ret <3 x i16> %7
    521 }
    522 
    523 define <4 x i16> @_Z3minDv4_sS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
    524   %1 = sext <4 x i16> %v1 to <4 x i32>
    525   %2 = sext <4 x i16> %v2 to <4 x i32>
    526   %3 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
    527   %4 = trunc <4 x i32> %3 to <4 x i16>
    528   ret <4 x i16> %4
    529 }
    530 
    531 define i32 @_Z3minii(i32 %v1, i32 %v2) nounwind readnone {
    532   %1 = icmp slt i32 %v1, %v2
    533   %2 = select i1 %1, i32 %v1, i32 %v2
    534   ret i32 %2
    535 }
    536 
    537 define <2 x i32> @_Z3minDv2_iS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
    538   %1 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone
    539   ret <2 x i32> %1
    540 }
    541 
    542 define <3 x i32> @_Z3minDv3_iS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
    543   %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    544   %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    545   %3 = tail call <4 x i32   > @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
    546   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    547   ret <3 x i32> %4
    548 }
    549 
    550 define <4 x i32> @_Z3minDv4_iS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
    551   %1 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone
    552   ret <4 x i32> %1
    553 }
    554 
    555 define i64 @_Z3minxx(i64 %v1, i64 %v2) nounwind readnone {
    556   %1 = icmp slt i64 %v1, %v2
    557   %2 = select i1 %1, i64 %v1, i64 %v2
    558   ret i64 %2
    559 }
    560 
    561 ; TODO:  long vector types
    562 
    563 define zeroext i8 @_Z3minhh(i8 zeroext %v1, i8 zeroext %v2) nounwind readnone {
    564   %1 = icmp ult i8 %v1, %v2
    565   %2 = select i1 %1, i8 %v1, i8 %v2
    566   ret i8 %2
    567 }
    568 
    569 define <2 x i8> @_Z3minDv2_hS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
    570   %1 = zext <2 x i8> %v1 to <2 x i32>
    571   %2 = zext <2 x i8> %v2 to <2 x i32>
    572   %3 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
    573   %4 = trunc <2 x i32> %3 to <2 x i8>
    574   ret <2 x i8> %4
    575 }
    576 
    577 define <3 x i8> @_Z3minDv3_hS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone {
    578   %1 = zext <3 x i8> %v1 to <3 x i32>
    579   %2 = zext <3 x i8> %v2 to <3 x i32>
    580   %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    581   %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    582   %5 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
    583   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    584   %7 = trunc <3 x i32> %6 to <3 x i8>
    585   ret <3 x i8> %7
    586 }
    587 
    588 define <4 x i8> @_Z3minDv4_hS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
    589   %1 = zext <4 x i8> %v1 to <4 x i32>
    590   %2 = zext <4 x i8> %v2 to <4 x i32>
    591   %3 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
    592   %4 = trunc <4 x i32> %3 to <4 x i8>
    593   ret <4 x i8> %4
    594 }
    595 
    596 define zeroext i16 @_Z3mintt(i16 zeroext %v1, i16 zeroext %v2) nounwind readnone {
    597   %1 = icmp ult i16 %v1, %v2
    598   %2 = select i1 %1, i16 %v1, i16 %v2
    599   ret i16 %2
    600 }
    601 
    602 define <2 x i16> @_Z3minDv2_tS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
    603   %1 = zext <2 x i16> %v1 to <2 x i32>
    604   %2 = zext <2 x i16> %v2 to <2 x i32>
    605   %3 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
    606   %4 = trunc <2 x i32> %3 to <2 x i16>
    607   ret <2 x i16> %4
    608 }
    609 
    610 define <3 x i16> @_Z3minDv3_tS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
    611   %1 = zext <3 x i16> %v1 to <3 x i32>
    612   %2 = zext <3 x i16> %v2 to <3 x i32>
    613   %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    614   %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    615   %5 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
    616   %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    617   %7 = trunc <3 x i32> %6 to <3 x i16>
    618   ret <3 x i16> %7
    619 }
    620 
    621 define <4 x i16> @_Z3minDv4_tS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
    622   %1 = zext <4 x i16> %v1 to <4 x i32>
    623   %2 = zext <4 x i16> %v2 to <4 x i32>
    624   %3 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
    625   %4 = trunc <4 x i32> %3 to <4 x i16>
    626   ret <4 x i16> %4
    627 }
    628 
    629 define i32 @_Z3minjj(i32 %v1, i32 %v2) nounwind readnone {
    630   %1 = icmp ult i32 %v1, %v2
    631   %2 = select i1 %1, i32 %v1, i32 %v2
    632   ret i32 %2
    633 }
    634 
    635 define <2 x i32> @_Z3minDv2_jS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
    636   %1 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone
    637   ret <2 x i32> %1
    638 }
    639 
    640 define <3 x i32> @_Z3minDv3_jS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
    641   %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    642   %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    643   %3 = tail call <4 x i32   > @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
    644   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    645   ret <3 x i32> %4
    646 }
    647 
    648 define <4 x i32> @_Z3minDv4_jS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
    649   %1 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone
    650   ret <4 x i32> %1
    651 }
    652 
    653 define i64 @_Z3minyy(i64 %v1, i64 %v2) nounwind readnone {
    654   %1 = icmp ult i64 %v1, %v2
    655   %2 = select i1 %1, i64 %v1, i64 %v2
    656   ret i64 %2
    657 }
    658 
    659 ; TODO:  long vector types
    660 
    661 define float @_Z3minff(float %v1, float %v2) nounwind readnone {
    662   %1 = tail call float @_Z4fminff(float %v1, float %v2)
    663   ret float %1
    664 }
    665 
    666 define <2 x float> @_Z3minDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone {
    667   %1 = tail call <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2)
    668   ret <2 x float> %1
    669 }
    670 
    671 define <2 x float> @_Z3minDv2_ff(<2 x float> %v1, float %v2) nounwind readnone {
    672   %1 = tail call <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2)
    673   ret <2 x float> %1
    674 }
    675 
    676 define <3 x float> @_Z3minDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone {
    677   %1 = tail call <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2)
    678   ret <3 x float> %1
    679 }
    680 
    681 define <3 x float> @_Z3minDv3_ff(<3 x float> %v1, float %v2) nounwind readnone {
    682   %1 = tail call <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2)
    683   ret <3 x float> %1
    684 }
    685 
    686 define <4 x float> @_Z3minDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone {
    687   %1 = tail call <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2)
    688   ret <4 x float> %1
    689 }
    690 
    691 define <4 x float> @_Z3minDv4_ff(<4 x float> %v1, float %v2) nounwind readnone {
    692   %1 = tail call <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2)
    693   ret <4 x float> %1
    694 }
    695 
    696 
    697 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    698 ;;;;;;;;;                  YUV                   ;;;;;;;;;;
    699 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    700 
    701 @yuv_U = internal constant <4 x i32> <i32 0, i32 -100, i32 516, i32 0>, align 16
    702 @yuv_V = internal constant <4 x i32> <i32 409, i32 -208, i32 0, i32 0>, align 16
    703 @yuv_0 = internal constant <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
    704 @yuv_255 = internal constant <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>, align 16
    705 
    706 
    707 define <4 x i8> @_Z18rsYuvToRGBA_uchar4hhh(i8 %pY, i8 %pU, i8 %pV) nounwind readnone alwaysinline {
    708   %_sy = zext i8 %pY to i32
    709   %_su = zext i8 %pU to i32
    710   %_sv = zext i8 %pV to i32
    711 
    712   %_sy2 = add i32 -16, %_sy
    713   %_sy3 = mul i32 298, %_sy2
    714   %_su2 = add i32 -128, %_su
    715   %_sv2 = add i32 -128, %_sv
    716   %_y = tail call <4 x i32> @smear_4i32(i32 %_sy3) nounwind readnone
    717   %_u = tail call <4 x i32> @smear_4i32(i32 %_su2) nounwind readnone
    718   %_v = tail call <4 x i32> @smear_4i32(i32 %_sv2) nounwind readnone
    719 
    720   %mu = load <4 x i32>* @yuv_U, align 8
    721   %mv = load <4 x i32>* @yuv_V, align 8
    722   %_u2 = mul <4 x i32> %_u, %mu
    723   %_v2 = mul <4 x i32> %_v, %mv
    724   %_y2 = add <4 x i32> %_y, %_u2
    725   %_y3 = add <4 x i32> %_y2, %_v2
    726 
    727  ; %r1 = tail call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %_y3, <4 x i32> <i32 8, i32 8, i32 8, i32 8>) nounwind readnone
    728 ;  %r2 = trunc <4 x i16> %r1 to <4 x i8>
    729 ;  ret <4 x i8> %r2
    730 
    731   %c0 = load <4 x i32>* @yuv_0, align 8
    732   %c255 = load <4 x i32>* @yuv_255, align 8
    733   %r1 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %_y3, <4 x i32> %c0) nounwind readnone
    734   %r2 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %r1, <4 x i32> %c255) nounwind readnone
    735   %r3 = lshr <4 x i32> %r2, <i32 8, i32 8, i32 8, i32 8>
    736   %r4 = trunc <4 x i32> %r3 to <4 x i8>
    737   ret <4 x i8> %r4
    738 }
    739 
    740 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    741 ;;;;;;;;;              half_RECIP              ;;;;;;;;;;
    742 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    743 
    744 define float @_Z10half_recipf(float %v) {
    745   %1 = insertelement <2 x float> undef, float %v, i32 0
    746   %2 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %1) nounwind readnone
    747   %3 = extractelement <2 x float> %2, i32 0
    748   ret float %3
    749 }
    750 
    751 define <2 x float> @_Z10half_recip2Dv2_h(<2 x float> %v) nounwind readnone {
    752   %1 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %v) nounwind readnone
    753   ret <2 x float> %1
    754 }
    755 
    756 define <3 x float> @_Z10half_recip3Dv3_h(<3 x float> %v) nounwind readnone {
    757   %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    758   %2 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %1) nounwind readnone
    759   %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
    760   ret <3 x float> %3
    761 }
    762 
    763 define <4 x float> @_Z10half_recip4Dv4_h(<4 x float> %v) nounwind readnone {
    764   %1 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %v) nounwind readnone
    765   ret <4 x float> %1
    766 }
    767 
    768 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    769 ;;;;;;;;;              half_SQRT               ;;;;;;;;;;
    770 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    771 
    772 define float @_Z9half_sqrtf(float %v) {
    773   %1 = insertelement <2 x float> undef, float %v, i32 0
    774   %2 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %1) nounwind readnone
    775   %3 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %2) nounwind readnone
    776   %4 = extractelement <2 x float> %3, i32 0
    777   ret float %4
    778 }
    779 
    780 define <2 x float> @_Z9half_sqrt2Dv2_h(<2 x float> %v) nounwind readnone {
    781   %1 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %v) nounwind readnone
    782   %2 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %1) nounwind readnone
    783   ret <2 x float> %2
    784 }
    785 
    786 define <3 x float> @_Z9half_sqrt3Dv3_h(<3 x float> %v) nounwind readnone {
    787   %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    788   %2 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %1) nounwind readnone
    789   %3 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %2) nounwind readnone
    790   %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
    791   ret <3 x float> %4
    792 }
    793 
    794 define <4 x float> @_Z9half_sqrt4Dv4_h(<4 x float> %v) nounwind readnone {
    795   %1 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %v) nounwind readnone
    796   %2 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %1) nounwind readnone
    797   ret <4 x float> %2
    798 }
    799 
    800 
    801 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    802 ;;;;;;;;;              half_RSQRT              ;;;;;;;;;;
    803 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    804 
    805 define float @_Z10half_rsqrtf(float %v) {
    806   %1 = insertelement <2 x float> undef, float %v, i32 0
    807   %2 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %1) nounwind readnone
    808   %3 = extractelement <2 x float> %2, i32 0
    809   ret float %3
    810 }
    811 
    812 define <2 x float> @_Z10half_rsqrt2Dv2_h(<2 x float> %v) nounwind readnone {
    813   %1 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %v) nounwind readnone
    814   ret <2 x float> %1
    815 }
    816 
    817 define <3 x float> @_Z10half_rsqrt3Dv3_h(<3 x float> %v) nounwind readnone {
    818   %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    819   %2 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %1) nounwind readnone
    820   %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
    821   ret <3 x float> %3
    822 }
    823 
    824 define <4 x float> @_Z10half_rsqrt4Dv4_h(<4 x float> %v) nounwind readnone {
    825   %1 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %v) nounwind readnone
    826   ret <4 x float> %1
    827 }
    828 
    829 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    830 ;;;;;;;;;              matrix                    ;;;;;;;;;;
    831 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    832 
    833 declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
    834 
    835 %struct.rs_matrix4x4 = type { [16 x float] }
    836 %struct.rs_matrix3x3 = type { [9 x float] }
    837 %struct.rs_matrix2x2 = type { [4 x float] }
    838 
    839 define internal <4 x float> @smear_f(float %in) nounwind readnone alwaysinline {
    840   %1 = insertelement <4 x float> undef, float %in, i32 0
    841   %2 = insertelement <4 x float> %1, float %in, i32 1
    842   %3 = insertelement <4 x float> %2, float %in, i32 2
    843   %4 = insertelement <4 x float> %3, float %in, i32 3
    844   ret <4 x float> %4
    845 }
    846 
    847 
    848 define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <3 x float> %in) nounwind readonly {
    849   %x0 = extractelement <3 x float> %in, i32 0
    850   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
    851   %y0 = extractelement <3 x float> %in, i32 1
    852   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
    853   %z0 = extractelement <3 x float> %in, i32 2
    854   %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
    855 
    856   %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0
    857   %px2 = bitcast float* %px to i8*
    858   %xm = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %px2, i32 4) nounwind
    859 
    860   %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3
    861   %py2 = bitcast float* %py to i8*
    862   %ym = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %py2, i32 4) nounwind
    863 
    864   %pz = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 5
    865   %pz2 = bitcast float* %pz to i8*
    866   %zm2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %pz2, i32 4) nounwind
    867   %zm = shufflevector <4 x float> %zm2, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
    868 
    869   %a1 = fmul <4 x float> %x, %xm
    870   %a2 = fmul <4 x float> %y, %ym
    871   %a3 = fadd <4 x float> %a1, %a2
    872   %a4 = fmul <4 x float> %z, %zm
    873   %a5 = fadd <4 x float> %a4, %a3
    874   %a6 = shufflevector <4 x float> %a5, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
    875   ret <3 x float> %a6
    876 }
    877 
    878 define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <2 x float> %in) nounwind readonly {
    879   %x0 = extractelement <2 x float> %in, i32 0
    880   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
    881   %y0 = extractelement <2 x float> %in, i32 1
    882   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
    883 
    884   %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0
    885   %px2 = bitcast float* %px to <4 x float>*
    886   %xm = load <4 x float>* %px2, align 4
    887   %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3
    888   %py2 = bitcast float* %py to <4 x float>*
    889   %ym = load <4 x float>* %py2, align 4
    890 
    891   %a1 = fmul <4 x float> %x, %xm
    892   %a2 = fmul <4 x float> %y, %ym
    893   %a3 = fadd <4 x float> %a1, %a2
    894   %a4 = shufflevector <4 x float> %a3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
    895   ret <3 x float> %a4
    896 }
    897 
    898 define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <4 x float> %in) nounwind readonly {
    899   %x0 = extractelement <4 x float> %in, i32 0
    900   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
    901   %y0 = extractelement <4 x float> %in, i32 1
    902   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
    903   %z0 = extractelement <4 x float> %in, i32 2
    904   %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
    905   %w0 = extractelement <4 x float> %in, i32 3
    906   %w = tail call <4 x float> @smear_f(float %w0) nounwind readnone
    907 
    908   %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
    909   %px2 = bitcast float* %px to <4 x float>*
    910   %xm = load <4 x float>* %px2, align 4
    911   %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
    912   %py2 = bitcast float* %py to <4 x float>*
    913   %ym = load <4 x float>* %py2, align 4
    914   %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8
    915   %pz2 = bitcast float* %pz to <4 x float>*
    916   %zm = load <4 x float>* %pz2, align 4
    917   %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
    918   %pw2 = bitcast float* %pw to <4 x float>*
    919   %wm = load <4 x float>* %pw2, align 4
    920 
    921   %a1 = fmul <4 x float> %x, %xm
    922   %a2 = fmul <4 x float> %y, %ym
    923   %a3 = fadd <4 x float> %a1, %a2
    924   %a4 = fmul <4 x float> %z, %zm
    925   %a5 = fadd <4 x float> %a3, %a4
    926   %a6 = fmul <4 x float> %w, %wm
    927   %a7 = fadd <4 x float> %a5, %a6
    928   ret <4 x float> %a7
    929 }
    930 
    931 define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <3 x float> %in) nounwind readonly {
    932   %x0 = extractelement <3 x float> %in, i32 0
    933   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
    934   %y0 = extractelement <3 x float> %in, i32 1
    935   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
    936   %z0 = extractelement <3 x float> %in, i32 2
    937   %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
    938 
    939   %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
    940   %px2 = bitcast float* %px to <4 x float>*
    941   %xm = load <4 x float>* %px2, align 4
    942   %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
    943   %py2 = bitcast float* %py to <4 x float>*
    944   %ym = load <4 x float>* %py2, align 4
    945   %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8
    946   %pz2 = bitcast float* %pz to <4 x float>*
    947   %zm = load <4 x float>* %pz2, align 4
    948   %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
    949   %pw2 = bitcast float* %pw to <4 x float>*
    950   %wm = load <4 x float>* %pw2, align 4
    951 
    952   %a1 = fmul <4 x float> %x, %xm
    953   %a2 = fadd <4 x float> %wm, %a1
    954   %a3 = fmul <4 x float> %y, %ym
    955   %a4 = fadd <4 x float> %a2, %a3
    956   %a5 = fmul <4 x float> %z, %zm
    957   %a6 = fadd <4 x float> %a4, %a5
    958   ret <4 x float> %a6
    959 }
    960 
    961 define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <2 x float> %in) nounwind readonly {
    962   %x0 = extractelement <2 x float> %in, i32 0
    963   %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
    964   %y0 = extractelement <2 x float> %in, i32 1
    965   %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
    966 
    967   %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
    968   %px2 = bitcast float* %px to <4 x float>*
    969   %xm = load <4 x float>* %px2, align 4
    970   %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
    971   %py2 = bitcast float* %py to <4 x float>*
    972   %ym = load <4 x float>* %py2, align 4
    973   %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
    974   %pw2 = bitcast float* %pw to <4 x float>*
    975   %wm = load <4 x float>* %pw2, align 4
    976 
    977   %a1 = fmul <4 x float> %x, %xm
    978   %a2 = fadd <4 x float> %wm, %a1
    979   %a3 = fmul <4 x float> %y, %ym
    980   %a4 = fadd <4 x float> %a2, %a3
    981   ret <4 x float> %a4
    982 }
    983 
    984 
    985 
    986 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    987 ;;;;;;;;;              pixel ops                 ;;;;;;;;;;
    988 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    989 
    990 
    991 @fc_255.0 = internal constant <4 x float> <float 255.0, float 255.0, float 255.0, float 255.0>, align 16
    992 @fc_0.5 = internal constant <4 x float> <float 0.5, float 0.5, float 0.5, float 0.5>, align 16
    993 @fc_0 = internal constant <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, align 16
    994 
    995 declare <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %in) nounwind readnone
    996 declare <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> %in) nounwind readnone
    997 
    998 ; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color)
    999 define <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %color) nounwind readnone {
   1000     %f255 = load <4 x float>* @fc_255.0, align 16
   1001     %f05 = load <4 x float>* @fc_0.5, align 16
   1002     %f0 = load <4 x float>* @fc_0, align 16
   1003     %v1 = fmul <4 x float> %f255, %color
   1004     %v2 = fadd <4 x float> %f05, %v1
   1005     %v3 = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %v2, <4 x float> %f0, <4 x float> %f255) nounwind readnone
   1006     %v4 = tail call <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %v3) nounwind readnone
   1007     ret <4 x i8> %v4
   1008 }
   1009 
   1010 ; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color)
   1011 define <4 x i8> @_Z17rsPackColorTo8888Dv3_f(<3 x float> %color) nounwind readnone {
   1012     %1 = shufflevector <3 x float> %color, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1013     %2 = insertelement <4 x float> %1, float 1.0, i32 3
   1014     %3 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %2) nounwind readnone
   1015     ret <4 x i8> %3
   1016 }
   1017 
   1018 ; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b)
   1019 define <4 x i8> @_Z17rsPackColorTo8888fff(float %r, float %g, float %b) nounwind readnone {
   1020     %1 = insertelement <4 x float> undef, float %r, i32 0
   1021     %2 = insertelement <4 x float> %1, float %g, i32 1
   1022     %3 = insertelement <4 x float> %2, float %b, i32 2
   1023     %4 = insertelement <4 x float> %3, float 1.0, i32 3
   1024     %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone
   1025     ret <4 x i8> %5
   1026 }
   1027 
   1028 ; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a)
   1029 define <4 x i8> @_Z17rsPackColorTo8888ffff(float %r, float %g, float %b, float %a) nounwind readnone {
   1030     %1 = insertelement <4 x float> undef, float %r, i32 0
   1031     %2 = insertelement <4 x float> %1, float %g, i32 1
   1032     %3 = insertelement <4 x float> %2, float %b, i32 2
   1033     %4 = insertelement <4 x float> %3, float %a, i32 3
   1034     %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone
   1035     ret <4 x i8> %5
   1036 }
   1037 
   1038