Home | History | Annotate | Download | only in ll32
      1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
      2 target triple = "armv7-none-linux-gnueabi"
      3 
      4 declare i8* @rsOffset([1 x i32] %a.coerce, i32 %sizeOf, i32 %x, i32 %y, i32 %z)
      5 declare i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z)
      6 
      7 ; The loads and stores in this file are annotated with RenderScript-specific
      8 ; information for the type based alias analysis, such that the TBAA analysis
      9 ; understands that loads and stores from two allocations with different types
     10 ; can never access the same memory element. This is different from C, where
     11 ; a char or uchar load/store is special as it can alias with about everything.
     12 ;
     13 ; The TBAA tree in this file has the the node "RenderScript Distinct TBAA" as
     14 ; its root.
     15 ; This means all loads/stores that share this common root can be proven to not
     16 ; alias. However, the alias analysis still has to assume MayAlias between
     17 ; memory accesses in this file and memory accesses annotated with the C/C++
     18 ; TBAA metadata.
     19 ; A node named "RenderScript TBAA" wraps our distinct TBAA root node.
     20 ; If we can ensure that all accesses to elements loaded from RenderScript
     21 ; allocations are either annotated with the RenderScript TBAA information or
     22 ; not annotated at all, but never annotated with the C/C++ metadata, we
     23 ; can add the "RenderScript TBAA" tree under the C/C++ TBAA tree. This enables
     24 ; TBAA to prove that an access to data from the RenderScript allocation
     25 ; does not alias with a load/store accessing something not part of a RenderScript
     26 ; allocation.
     27 ; We do this by swapping the second operand of "RenderScript TBAA" with the node
     28 ; for "Simple C/C++ TBAA", thus connecting these TBAA groups. The other root
     29 ; node (with no children) can then safely be dropped from the analysis.
     30 
     31 !13 = !{!"RenderScript Distinct TBAA"}
     32 !14 = !{!"RenderScript TBAA", !13}
     33 !15 = !{!"allocation", !14}
     34 
     35 !21 = !{!"char", !15}
     36 define void @rsSetElementAtImpl_char([1 x i32] %a.coerce, i8 signext %val, i32 %x, i32 %y, i32 %z) #1 {
     37   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 1, i32 %x, i32 %y, i32 %z) #2
     38   store i8 %val, i8* %1, align 1, !tbaa !21
     39   ret void
     40 }
     41 
     42 define signext i8 @rsGetElementAtImpl_char([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
     43   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 1, i32 %x, i32 %y, i32 %z) #2
     44   %2 = load i8, i8* %1, align 1, !tbaa !21
     45   ret i8 %2
     46 }
     47 
     48 !22 = !{!"char2", !15}
     49 define void @rsSetElementAtImpl_char2([1 x i32] %a.coerce, <2 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
     50   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #2
     51   %2 = bitcast i8* %1 to <2 x i8>*
     52   store <2 x i8> %val, <2 x i8>* %2, align 2, !tbaa !22
     53   ret void
     54 }
     55 
     56 define <2 x i8> @rsGetElementAtImpl_char2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
     57   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #2
     58   %2 = bitcast i8* %1 to <2 x i8>*
     59   %3 = load <2 x i8>, <2 x i8>* %2, align 2, !tbaa !22
     60   ret <2 x i8> %3
     61 }
     62 
     63 !23 = !{!"char3", !15}
     64 define void @rsSetElementAtImpl_char3([1 x i32] %a.coerce, <3 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
     65   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
     66   %2 = shufflevector <3 x i8> %val, <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
     67   %3 = bitcast i8* %1 to <4 x i8>*
     68   store <4 x i8> %2, <4 x i8>* %3, align 4, !tbaa !23
     69   ret void
     70 }
     71 
     72 define <3 x i8> @rsGetElementAtImpl_char3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
     73   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
     74   %2 = bitcast i8* %1 to <4 x i8>*
     75   %3 = load <4 x i8>, <4 x i8>* %2, align 4, !tbaa !23
     76   %4 = shufflevector <4 x i8> %3, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
     77   ret <3 x i8> %4
     78 }
     79 
     80 !24 = !{!"char4", !15}
     81 define void @rsSetElementAtImpl_char4([1 x i32] %a.coerce, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
     82   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
     83   %2 = bitcast i8* %1 to <4 x i8>*
     84   store <4 x i8> %val, <4 x i8>* %2, align 4, !tbaa !24
     85   ret void
     86 }
     87 
     88 define <4 x i8> @rsGetElementAtImpl_char4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
     89   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
     90   %2 = bitcast i8* %1 to <4 x i8>*
     91   %3 = load <4 x i8>, <4 x i8>* %2, align 4, !tbaa !24
     92   ret <4 x i8> %3
     93 }
     94 
     95 !25 = !{!"uchar", !15}
     96 define void @rsSetElementAtImpl_uchar([1 x i32] %a.coerce, i8 zeroext %val, i32 %x, i32 %y, i32 %z) #1 {
     97   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 1, i32 %x, i32 %y, i32 %z) #2
     98   store i8 %val, i8* %1, align 1, !tbaa !25
     99   ret void
    100 }
    101 
    102 define zeroext i8 @rsGetElementAtImpl_uchar([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    103   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 1, i32 %x, i32 %y, i32 %z) #2
    104   %2 = load i8, i8* %1, align 1, !tbaa !25
    105   ret i8 %2
    106 }
    107 
    108 !26 = !{!"uchar2", !15}
    109 define void @rsSetElementAtImpl_uchar2([1 x i32] %a.coerce, <2 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
    110   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #2
    111   %2 = bitcast i8* %1 to <2 x i8>*
    112   store <2 x i8> %val, <2 x i8>* %2, align 2, !tbaa !26
    113   ret void
    114 }
    115 
    116 define <2 x i8> @rsGetElementAtImpl_uchar2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    117   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #2
    118   %2 = bitcast i8* %1 to <2 x i8>*
    119   %3 = load <2 x i8>, <2 x i8>* %2, align 2, !tbaa !26
    120   ret <2 x i8> %3
    121 }
    122 
    123 !27 = !{!"uchar3", !15}
    124 define void @rsSetElementAtImpl_uchar3([1 x i32] %a.coerce, <3 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
    125   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
    126   %2 = shufflevector <3 x i8> %val, <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    127   %3 = bitcast i8* %1 to <4 x i8>*
    128   store <4 x i8> %2, <4 x i8>* %3, align 4, !tbaa !27
    129   ret void
    130 }
    131 
    132 define <3 x i8> @rsGetElementAtImpl_uchar3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    133   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
    134   %2 = bitcast i8* %1 to <4 x i8>*
    135   %3 = load <4 x i8>, <4 x i8>* %2, align 4, !tbaa !27
    136   %4 = shufflevector <4 x i8> %3, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
    137   ret <3 x i8> %4
    138 }
    139 
    140 !28 = !{!"uchar4", !15}
    141 define void @rsSetElementAtImpl_uchar4([1 x i32] %a.coerce, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
    142   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
    143   %2 = bitcast i8* %1 to <4 x i8>*
    144   store <4 x i8> %val, <4 x i8>* %2, align 4, !tbaa !28
    145   ret void
    146 }
    147 
    148 define <4 x i8> @rsGetElementAtImpl_uchar4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    149   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
    150   %2 = bitcast i8* %1 to <4 x i8>*
    151   %3 = load <4 x i8>, <4 x i8>* %2, align 4, !tbaa !28
    152   ret <4 x i8> %3
    153 }
    154 
    155 !29 = !{!"short", !15}
    156 define void @rsSetElementAtImpl_short([1 x i32] %a.coerce, i16 signext %val, i32 %x, i32 %y, i32 %z) #1 {
    157   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #2
    158   %2 = bitcast i8* %1 to i16*
    159   store i16 %val, i16* %2, align 2, !tbaa !29
    160   ret void
    161 }
    162 
    163 define signext i16 @rsGetElementAtImpl_short([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    164   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #2
    165   %2 = bitcast i8* %1 to i16*
    166   %3 = load i16, i16* %2, align 2, !tbaa !29
    167   ret i16 %3
    168 }
    169 
    170 !30 = !{!"short2", !15}
    171 define void @rsSetElementAtImpl_short2([1 x i32] %a.coerce, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
    172   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
    173   %2 = bitcast i8* %1 to <2 x i16>*
    174   store <2 x i16> %val, <2 x i16>* %2, align 4, !tbaa !30
    175   ret void
    176 }
    177 
    178 define <2 x i16> @rsGetElementAtImpl_short2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    179   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
    180   %2 = bitcast i8* %1 to <2 x i16>*
    181   %3 = load <2 x i16>, <2 x i16>* %2, align 4, !tbaa !30
    182   ret <2 x i16> %3
    183 }
    184 
    185 !31 = !{!"short3", !15}
    186 define void @rsSetElementAtImpl_short3([1 x i32] %a.coerce, <3 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
    187   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    188   %2 = shufflevector <3 x i16> %val, <3 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    189   %3 = bitcast i8* %1 to <4 x i16>*
    190   store <4 x i16> %2, <4 x i16>* %3, align 8, !tbaa !31
    191   ret void
    192 }
    193 
    194 define <3 x i16> @rsGetElementAtImpl_short3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    195   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    196   %2 = bitcast i8* %1 to <4 x i16>*
    197   %3 = load <4 x i16>, <4 x i16>* %2, align 8, !tbaa !31
    198   %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <3 x i32> <i32 0, i32 1, i32 2>
    199   ret <3 x i16> %4
    200 }
    201 
    202 !32 = !{!"short4", !15}
    203 define void @rsSetElementAtImpl_short4([1 x i32] %a.coerce, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
    204   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    205   %2 = bitcast i8* %1 to <4 x i16>*
    206   store <4 x i16> %val, <4 x i16>* %2, align 8, !tbaa !32
    207   ret void
    208 }
    209 
    210 define <4 x i16> @rsGetElementAtImpl_short4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    211   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    212   %2 = bitcast i8* %1 to <4 x i16>*
    213   %3 = load <4 x i16>, <4 x i16>* %2, align 8, !tbaa !32
    214   ret <4 x i16> %3
    215 }
    216 
    217 !33 = !{!"ushort", !15}
    218 define void @rsSetElementAtImpl_ushort([1 x i32] %a.coerce, i16 zeroext %val, i32 %x, i32 %y, i32 %z) #1 {
    219   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #2
    220   %2 = bitcast i8* %1 to i16*
    221   store i16 %val, i16* %2, align 2, !tbaa !33
    222   ret void
    223 }
    224 
    225 define zeroext i16 @rsGetElementAtImpl_ushort([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    226   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #2
    227   %2 = bitcast i8* %1 to i16*
    228   %3 = load i16, i16* %2, align 2, !tbaa !33
    229   ret i16 %3
    230 }
    231 
    232 !34 = !{!"ushort2", !15}
    233 define void @rsSetElementAtImpl_ushort2([1 x i32] %a.coerce, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
    234   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
    235   %2 = bitcast i8* %1 to <2 x i16>*
    236   store <2 x i16> %val, <2 x i16>* %2, align 4, !tbaa !34
    237   ret void
    238 }
    239 
    240 define <2 x i16> @rsGetElementAtImpl_ushort2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    241   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
    242   %2 = bitcast i8* %1 to <2 x i16>*
    243   %3 = load <2 x i16>, <2 x i16>* %2, align 4, !tbaa !34
    244   ret <2 x i16> %3
    245 }
    246 
    247 !35 = !{!"ushort3", !15}
    248 define void @rsSetElementAtImpl_ushort3([1 x i32] %a.coerce, <3 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
    249   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    250   %2 = shufflevector <3 x i16> %val, <3 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    251   %3 = bitcast i8* %1 to <4 x i16>*
    252   store <4 x i16> %2, <4 x i16>* %3, align 8, !tbaa !35
    253   ret void
    254 }
    255 
    256 define <3 x i16> @rsGetElementAtImpl_ushort3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    257   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    258   %2 = bitcast i8* %1 to <4 x i16>*
    259   %3 = load <4 x i16>, <4 x i16>* %2, align 8, !tbaa !35
    260   %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <3 x i32> <i32 0, i32 1, i32 2>
    261   ret <3 x i16> %4
    262 }
    263 
    264 !36 = !{!"ushort4", !15}
    265 define void @rsSetElementAtImpl_ushort4([1 x i32] %a.coerce, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
    266   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    267   %2 = bitcast i8* %1 to <4 x i16>*
    268   store <4 x i16> %val, <4 x i16>* %2, align 8, !tbaa !36
    269   ret void
    270 }
    271 
    272 define <4 x i16> @rsGetElementAtImpl_ushort4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    273   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    274   %2 = bitcast i8* %1 to <4 x i16>*
    275   %3 = load <4 x i16>, <4 x i16>* %2, align 8, !tbaa !36
    276   ret <4 x i16> %3
    277 }
    278 
    279 !37 = !{!"int", !15}
    280 define void @rsSetElementAtImpl_int([1 x i32] %a.coerce, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
    281   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
    282   %2 = bitcast i8* %1 to i32*
    283   store i32 %val, i32* %2, align 4, !tbaa !37
    284   ret void
    285 }
    286 
    287 define i32 @rsGetElementAtImpl_int([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    288   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
    289   %2 = bitcast i8* %1 to i32*
    290   %3 = load i32, i32* %2, align 4, !tbaa !37
    291   ret i32 %3
    292 }
    293 
    294 !38 = !{!"int2", !15}
    295 define void @rsSetElementAtImpl_int2([1 x i32] %a.coerce, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    296   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    297   %2 = bitcast i8* %1 to <2 x i32>*
    298   store <2 x i32> %val, <2 x i32>* %2, align 8, !tbaa !38
    299   ret void
    300 }
    301 
    302 define <2 x i32> @rsGetElementAtImpl_int2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    303   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    304   %2 = bitcast i8* %1 to <2 x i32>*
    305   %3 = load <2 x i32>, <2 x i32>* %2, align 8, !tbaa !38
    306   ret <2 x i32> %3
    307 }
    308 
    309 !39 = !{!"int3", !15}
    310 define void @rsSetElementAtImpl_int3([1 x i32] %a.coerce, <3 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    311   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2
    312   %2 = shufflevector <3 x i32> %val, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    313   %3 = bitcast i8* %1 to <4 x i32>*
    314   store <4 x i32> %2, <4 x i32>* %3, align 16, !tbaa !39
    315   ret void
    316 }
    317 
    318 define <3 x i32> @rsGetElementAtImpl_int3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    319   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2
    320   %2 = bitcast i8* %1 to <4 x i32>*
    321   %3 = load <4 x i32>, <4 x i32>* %2, align 8, !tbaa !39
    322   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    323   ret <3 x i32> %4
    324 }
    325 
    326 !40 = !{!"int4", !15}
    327 define void @rsSetElementAtImpl_int4([1 x i32] %a.coerce, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    328   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2
    329   %2 = bitcast i8* %1 to <4 x i32>*
    330   store <4 x i32> %val, <4 x i32>* %2, align 16, !tbaa !40
    331   ret void
    332 }
    333 
    334 define <4 x i32> @rsGetElementAtImpl_int4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    335   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2
    336   %2 = bitcast i8* %1 to <4 x i32>*
    337   %3 = load <4 x i32>, <4 x i32>* %2, align 16, !tbaa !40
    338   ret <4 x i32> %3
    339 }
    340 
    341 !41 = !{!"uint", !15}
    342 define void @rsSetElementAtImpl_uint([1 x i32] %a.coerce, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
    343   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
    344   %2 = bitcast i8* %1 to i32*
    345   store i32 %val, i32* %2, align 4, !tbaa !41
    346   ret void
    347 }
    348 
    349 define i32 @rsGetElementAtImpl_uint([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    350   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
    351   %2 = bitcast i8* %1 to i32*
    352   %3 = load i32, i32* %2, align 4, !tbaa !41
    353   ret i32 %3
    354 }
    355 
    356 !42 = !{!"uint2", !15}
    357 define void @rsSetElementAtImpl_uint2([1 x i32] %a.coerce, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    358   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    359   %2 = bitcast i8* %1 to <2 x i32>*
    360   store <2 x i32> %val, <2 x i32>* %2, align 8, !tbaa !42
    361   ret void
    362 }
    363 
    364 define <2 x i32> @rsGetElementAtImpl_uint2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    365   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    366   %2 = bitcast i8* %1 to <2 x i32>*
    367   %3 = load <2 x i32>, <2 x i32>* %2, align 8, !tbaa !42
    368   ret <2 x i32> %3
    369 }
    370 
    371 !43 = !{!"uint3", !15}
    372 define void @rsSetElementAtImpl_uint3([1 x i32] %a.coerce, <3 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    373   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2
    374   %2 = shufflevector <3 x i32> %val, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    375   %3 = bitcast i8* %1 to <4 x i32>*
    376   store <4 x i32> %2, <4 x i32>* %3, align 16, !tbaa !43
    377   ret void
    378 }
    379 
    380 define <3 x i32> @rsGetElementAtImpl_uint3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    381   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2
    382   %2 = bitcast i8* %1 to <4 x i32>*
    383   %3 = load <4 x i32>, <4 x i32>* %2, align 8, !tbaa !43
    384   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    385   ret <3 x i32> %4
    386 }
    387 
    388 !44 = !{!"uint4", !15}
    389 define void @rsSetElementAtImpl_uint4([1 x i32] %a.coerce, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    390   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2
    391   %2 = bitcast i8* %1 to <4 x i32>*
    392   store <4 x i32> %val, <4 x i32>* %2, align 16, !tbaa !44
    393   ret void
    394 }
    395 
    396 define <4 x i32> @rsGetElementAtImpl_uint4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    397   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2
    398   %2 = bitcast i8* %1 to <4 x i32>*
    399   %3 = load <4 x i32>, <4 x i32>* %2, align 16, !tbaa !44
    400   ret <4 x i32> %3
    401 }
    402 
    403 !45 = !{!"long", !15}
    404 define void @rsSetElementAtImpl_long([1 x i32] %a.coerce, i64 %val, i32 %x, i32 %y, i32 %z) #1 {
    405   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    406   %2 = bitcast i8* %1 to i64*
    407   store i64 %val, i64* %2, align 8, !tbaa !45
    408   ret void
    409 }
    410 
    411 define i64 @rsGetElementAtImpl_long([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    412   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    413   %2 = bitcast i8* %1 to i64*
    414   %3 = load i64, i64* %2, align 8, !tbaa !45
    415   ret i64 %3
    416 }
    417 
    418 !46 = !{!"long2", !15}
    419 define void @rsSetElementAtImpl_long2([1 x i32] %a.coerce, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    420   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2
    421   %2 = bitcast i8* %1 to <2 x i64>*
    422   store <2 x i64> %val, <2 x i64>* %2, align 16, !tbaa !46
    423   ret void
    424 }
    425 
    426 define <2 x i64> @rsGetElementAtImpl_long2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    427   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2
    428   %2 = bitcast i8* %1 to <2 x i64>*
    429   %3 = load <2 x i64>, <2 x i64>* %2, align 16, !tbaa !46
    430   ret <2 x i64> %3
    431 }
    432 
    433 !47 = !{!"long3", !15}
    434 define void @rsSetElementAtImpl_long3([1 x i32] %a.coerce, <3 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    435   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #2
    436   %2 = shufflevector <3 x i64> %val, <3 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    437   %3 = bitcast i8* %1 to <4 x i64>*
    438   store <4 x i64> %2, <4 x i64>* %3, align 32, !tbaa !47
    439   ret void
    440 }
    441 
    442 define void @rsGetElementAtImpl_long3(<3 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
    443   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #2
    444   %2 = bitcast i8* %1 to <4 x i64>*
    445   %3 = load <4 x i64>, <4 x i64>* %2, align 32
    446   %4 = bitcast <3 x i64>* %agg.result to <4 x i64>*
    447   store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !47
    448   ret void
    449 }
    450 
    451 !48 = !{!"long4", !15}
    452 define void @rsSetElementAtImpl_long4([1 x i32] %a.coerce, <4 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    453   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #2
    454   %2 = bitcast i8* %1 to <4 x i64>*
    455   store <4 x i64> %val, <4 x i64>* %2, align 32, !tbaa !48
    456   ret void
    457 }
    458 
    459 define void @rsGetElementAtImpl_long4(<4 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
    460   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #2
    461   %2 = bitcast i8* %1 to <4 x i64>*
    462   %3 = load <4 x i64>, <4 x i64>* %2, align 32, !tbaa !15
    463   store <4 x i64> %3, <4 x i64>* %agg.result, align 32, !tbaa !48
    464   ret void
    465 }
    466 
    467 !49 = !{!"ulong", !15}
    468 define void @rsSetElementAtImpl_ulong([1 x i32] %a.coerce, i64 %val, i32 %x, i32 %y, i32 %z) #1 {
    469   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    470   %2 = bitcast i8* %1 to i64*
    471   store i64 %val, i64* %2, align 8, !tbaa !49
    472   ret void
    473 }
    474 
    475 define i64 @rsGetElementAtImpl_ulong([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    476   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    477   %2 = bitcast i8* %1 to i64*
    478   %3 = load i64, i64* %2, align 8, !tbaa !49
    479   ret i64 %3
    480 }
    481 
    482 !50 = !{!"ulong2", !15}
    483 define void @rsSetElementAtImpl_ulong2([1 x i32] %a.coerce, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    484   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2
    485   %2 = bitcast i8* %1 to <2 x i64>*
    486   store <2 x i64> %val, <2 x i64>* %2, align 16, !tbaa !50
    487   ret void
    488 }
    489 
    490 define <2 x i64> @rsGetElementAtImpl_ulong2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    491   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2
    492   %2 = bitcast i8* %1 to <2 x i64>*
    493   %3 = load <2 x i64>, <2 x i64>* %2, align 16, !tbaa !50
    494   ret <2 x i64> %3
    495 }
    496 
    497 !51 = !{!"ulong3", !15}
    498 define void @rsSetElementAtImpl_ulong3([1 x i32] %a.coerce, <3 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    499   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #2
    500   %2 = shufflevector <3 x i64> %val, <3 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    501   %3 = bitcast i8* %1 to <4 x i64>*
    502   store <4 x i64> %2, <4 x i64>* %3, align 32, !tbaa !51
    503   ret void
    504 }
    505 
    506 define void @rsGetElementAtImpl_ulong3(<3 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
    507   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #2
    508   %2 = bitcast i8* %1 to <4 x i64>*
    509   %3 = load <4 x i64>, <4 x i64>* %2, align 32
    510   %4 = bitcast <3 x i64>* %agg.result to <4 x i64>*
    511   store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !51
    512   ret void
    513 }
    514 
    515 !52 = !{!"ulong4", !15}
    516 define void @rsSetElementAtImpl_ulong4([1 x i32] %a.coerce, <4 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    517   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #2
    518   %2 = bitcast i8* %1 to <4 x i64>*
    519   store <4 x i64> %val, <4 x i64>* %2, align 32, !tbaa !52
    520   ret void
    521 }
    522 
    523 define void @rsGetElementAtImpl_ulong4(<4 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
    524   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #2
    525   %2 = bitcast i8* %1 to <4 x i64>*
    526   %3 = load <4 x i64>, <4 x i64>* %2, align 32, !tbaa !15
    527   store <4 x i64> %3, <4 x i64>* %agg.result, align 32, !tbaa !52
    528   ret void
    529 }
    530 
    531 !53 = !{!"float", !15}
    532 define void @rsSetElementAtImpl_float([1 x i32] %a.coerce, float %val, i32 %x, i32 %y, i32 %z) #1 {
    533   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
    534   %2 = bitcast i8* %1 to float*
    535   store float %val, float* %2, align 4, !tbaa !53
    536   ret void
    537 }
    538 
    539 define float @rsGetElementAtImpl_float([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    540   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
    541   %2 = bitcast i8* %1 to float*
    542   %3 = load float, float* %2, align 4, !tbaa !53
    543   ret float %3
    544 }
    545 
    546 !54 = !{!"float2", !15}
    547 define void @rsSetElementAtImpl_float2([1 x i32] %a.coerce, <2 x float> %val, i32 %x, i32 %y, i32 %z) #1 {
    548   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    549   %2 = bitcast i8* %1 to <2 x float>*
    550   store <2 x float> %val, <2 x float>* %2, align 8, !tbaa !54
    551   ret void
    552 }
    553 
    554 define <2 x float> @rsGetElementAtImpl_float2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    555   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    556   %2 = bitcast i8* %1 to <2 x float>*
    557   %3 = load <2 x float>, <2 x float>* %2, align 8, !tbaa !54
    558   ret <2 x float> %3
    559 }
    560 
    561 !55 = !{!"float3", !15}
    562 define void @rsSetElementAtImpl_float3([1 x i32] %a.coerce, <3 x float> %val, i32 %x, i32 %y, i32 %z) #1 {
    563   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2
    564   %2 = shufflevector <3 x float> %val, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    565   %3 = bitcast i8* %1 to <4 x float>*
    566   store <4 x float> %2, <4 x float>* %3, align 16, !tbaa !55
    567   ret void
    568 }
    569 
    570 define <3 x float> @rsGetElementAtImpl_float3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    571   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2
    572   %2 = bitcast i8* %1 to <4 x float>*
    573   %3 = load <4 x float>, <4 x float>* %2, align 8, !tbaa !55
    574   %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
    575   ret <3 x float> %4
    576 }
    577 
    578 !56 = !{!"float4", !15}
    579 define void @rsSetElementAtImpl_float4([1 x i32] %a.coerce, <4 x float> %val, i32 %x, i32 %y, i32 %z) #1 {
    580   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2
    581   %2 = bitcast i8* %1 to <4 x float>*
    582   store <4 x float> %val, <4 x float>* %2, align 16, !tbaa !56
    583   ret void
    584 }
    585 
    586 define <4 x float> @rsGetElementAtImpl_float4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    587   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2
    588   %2 = bitcast i8* %1 to <4 x float>*
    589   %3 = load <4 x float>, <4 x float>* %2, align 16, !tbaa !56
    590   ret <4 x float> %3
    591 }
    592 
    593 !57 = !{!"double", !15}
    594 define void @rsSetElementAtImpl_double([1 x i32] %a.coerce, double %val, i32 %x, i32 %y, i32 %z) #1 {
    595   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    596   %2 = bitcast i8* %1 to double*
    597   store double %val, double* %2, align 8, !tbaa !57
    598   ret void
    599 }
    600 
    601 define double @rsGetElementAtImpl_double([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    602   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    603   %2 = bitcast i8* %1 to double*
    604   %3 = load double, double* %2, align 8, !tbaa !57
    605   ret double %3
    606 }
    607 
    608 !58 = !{!"double2", !15}
    609 define void @rsSetElementAtImpl_double2([1 x i32] %a.coerce, <2 x double> %val, i32 %x, i32 %y, i32 %z) #1 {
    610   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2
    611   %2 = bitcast i8* %1 to <2 x double>*
    612   store <2 x double> %val, <2 x double>* %2, align 16, !tbaa !58
    613   ret void
    614 }
    615 
    616 define <2 x double> @rsGetElementAtImpl_double2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    617   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 16, i32 %x, i32 %y, i32 %z) #2
    618   %2 = bitcast i8* %1 to <2 x double>*
    619   %3 = load <2 x double>, <2 x double>* %2, align 16, !tbaa !58
    620   ret <2 x double> %3
    621 }
    622 
    623 !59 = !{!"double3", !15}
    624 define void @rsSetElementAtImpl_double3([1 x i32] %a.coerce, <3 x double> %val, i32 %x, i32 %y, i32 %z) #1 {
    625   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #2
    626   %2 = shufflevector <3 x double> %val, <3 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    627   %3 = bitcast i8* %1 to <4 x double>*
    628   store <4 x double> %2, <4 x double>* %3, align 32, !tbaa !59
    629   ret void
    630 }
    631 
    632 
    633 define void @rsGetElementAtImpl_double3(<3 x double>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
    634   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #2
    635   %2 = bitcast i8* %1 to <4 x double>*
    636   %3 = load <4 x double>, <4 x double>* %2, align 32
    637   %4 = bitcast <3 x double>* %agg.result to <4 x double>*
    638   store <4 x double> %3, <4 x double>* %4, align 32, !tbaa !59
    639   ret void
    640 }
    641 
    642 !60 = !{!"double4", !15}
    643 define void @rsSetElementAtImpl_double4([1 x i32] %a.coerce, <4 x double> %val, i32 %x, i32 %y, i32 %z) #1 {
    644   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #2
    645   %2 = bitcast i8* %1 to <4 x double>*
    646   store <4 x double> %val, <4 x double>* %2, align 32, !tbaa !60
    647   ret void
    648 }
    649 define void @rsGetElementAtImpl_double4(<4 x double>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
    650   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #2
    651   %2 = bitcast i8* %1 to <4 x double>*
    652   %3 = load <4 x double>, <4 x double>* %2, align 32, !tbaa !15
    653   store <4 x double> %3, <4 x double>* %agg.result, align 32, !tbaa !60
    654   ret void
    655 }
    656 
    657 !61 = !{!"half", !15}
    658 define void @rsSetElementAtImpl_half([1 x i32] %a.coerce, half %val, i32 %x, i32 %y, i32 %z) #1 {
    659   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #2
    660   %2 = bitcast i8* %1 to half*
    661   store half %val, half* %2, align 2, !tbaa !61
    662   ret void
    663 }
    664 
    665 define half @rsGetElementAtImpl_half([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    666   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #2
    667   %2 = bitcast i8* %1 to half*
    668   %3 = load half, half* %2, align 2, !tbaa !61
    669   ret half %3
    670 }
    671 
    672 !62 = !{!"half2", !15}
    673 define void @rsSetElementAtImpl_half2([1 x i32] %a.coerce, <2 x half> %val, i32 %x, i32 %y, i32 %z) #1 {
    674   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
    675   %2 = bitcast i8* %1 to <2 x half>*
    676   store <2 x half> %val, <2 x half>* %2, align 4, !tbaa !62
    677   ret void
    678 }
    679 
    680 define <2 x half> @rsGetElementAtImpl_half2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    681   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
    682   %2 = bitcast i8* %1 to <2 x half>*
    683   %3 = load <2 x half>, <2 x half>* %2, align 4, !tbaa !62
    684   ret <2 x half> %3
    685 }
    686 
    687 !63 = !{!"half3", !15}
    688 define void @rsSetElementAtImpl_half3([1 x i32] %a.coerce, <3 x half> %val, i32 %x, i32 %y, i32 %z) #1 {
    689   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    690   %2 = shufflevector <3 x half> %val, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    691   %3 = bitcast i8* %1 to <4 x half>*
    692   store <4 x half> %2, <4 x half>* %3, align 8, !tbaa !63
    693   ret void
    694 }
    695 
    696 define void @rsGetElementAtImpl_half3(<3 x half>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
    697   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 32, i32 %x, i32 %y, i32 %z) #2
    698   %2 = bitcast i8* %1 to <4 x half>*
    699   %3 = load <4 x half>, <4 x half>* %2, align 8
    700   %4 = bitcast <3 x half>* %agg.result to <4 x half>*
    701   store <4 x half> %3, <4 x half>* %4, align 8, !tbaa !63
    702   ret void
    703 }
    704 
    705 !64 = !{!"half4", !15}
    706 define void @rsSetElementAtImpl_half4([1 x i32] %a.coerce, <4 x half> %val, i32 %x, i32 %y, i32 %z) #1 {
    707   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
    708   %2 = bitcast i8* %1 to <4 x half>*
    709   store <4 x half> %val, <4 x half>* %2, align 8, !tbaa !64
    710   ret void
    711 }
    712 
    713 define <4 x half> @rsGetElementAtImpl_half4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    714   %1 = tail call i8* @rsOffset([1 x i32] %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    715   %2 = bitcast i8* %1 to <4 x half>*
    716   %3 = load <4 x half>, <4 x half>* %2, align 8, !tbaa !64
    717   ret <4 x half> %3
    718 }
    719 
    720 define void @__rsAllocationVLoadXImpl_long4(<4 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
    721   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    722   %2 = bitcast i8* %1 to <4 x i64>*
    723   %3 = load <4 x i64>, <4 x i64>* %2, align 8
    724   store <4 x i64> %3, <4 x i64>* %agg.result, align 32, !tbaa !52
    725   ret void
    726 }
    727 define void @__rsAllocationVLoadXImpl_long3(<3 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
    728   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    729   %2 = bitcast i8* %1 to <4 x i64>*
    730   %3 = load <4 x i64>, <4 x i64>* %2, align 8
    731   %4 = bitcast <3 x i64>* %agg.result to <4 x i64>*
    732   store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !47
    733   ret void
    734 }
    735 define <2 x i64> @__rsAllocationVLoadXImpl_long2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    736   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    737   %2 = bitcast i8* %1 to <2 x i64>*
    738   %3 = load <2 x i64>, <2 x i64>* %2, align 8
    739   ret <2 x i64> %3
    740 }
    741 
    742 define void @__rsAllocationVLoadXImpl_ulong4(<4 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
    743   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    744   %2 = bitcast i8* %1 to <4 x i64>*
    745   %3 = load <4 x i64>, <4 x i64>* %2, align 8
    746   store <4 x i64> %3, <4 x i64>* %agg.result, align 32, !tbaa !48
    747   ret void
    748 }
    749 define void @__rsAllocationVLoadXImpl_ulong3(<3 x i64>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
    750   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    751   %2 = bitcast i8* %1 to <4 x i64>*
    752   %3 = load <4 x i64>, <4 x i64>* %2, align 8
    753   %4 = bitcast <3 x i64>* %agg.result to <4 x i64>*
    754   store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !51
    755   ret void
    756 }
    757 define <2 x i64> @__rsAllocationVLoadXImpl_ulong2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    758   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    759   %2 = bitcast i8* %1 to <2 x i64>*
    760   %3 = load <2 x i64>, <2 x i64>* %2, align 8
    761   ret <2 x i64> %3
    762 }
    763 
    764 define <4 x i32> @__rsAllocationVLoadXImpl_int4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    765   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    766   %2 = bitcast i8* %1 to <4 x i32>*
    767   %3 = load <4 x i32>, <4 x i32>* %2, align 4
    768   ret <4 x i32> %3
    769 }
    770 define <3 x i32> @__rsAllocationVLoadXImpl_int3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    771   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    772   %2 = bitcast i8* %1 to <3 x i32>*
    773   %3 = load <3 x i32>, <3 x i32>* %2, align 4
    774   ret <3 x i32> %3
    775 }
    776 define <2 x i32> @__rsAllocationVLoadXImpl_int2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    777   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    778   %2 = bitcast i8* %1 to <2 x i32>*
    779   %3 = load <2 x i32>, <2 x i32>* %2, align 4
    780   ret <2 x i32> %3
    781 }
    782 
    783 define <4 x i32> @__rsAllocationVLoadXImpl_uint4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    784   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    785   %2 = bitcast i8* %1 to <4 x i32>*
    786   %3 = load <4 x i32>, <4 x i32>* %2, align 4
    787   ret <4 x i32> %3
    788 }
    789 define <3 x i32> @__rsAllocationVLoadXImpl_uint3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    790   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    791   %2 = bitcast i8* %1 to <3 x i32>*
    792   %3 = load <3 x i32>, <3 x i32>* %2, align 4
    793   ret <3 x i32> %3
    794 }
    795 define <2 x i32> @__rsAllocationVLoadXImpl_uint2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    796   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    797   %2 = bitcast i8* %1 to <2 x i32>*
    798   %3 = load <2 x i32>, <2 x i32>* %2, align 4
    799   ret <2 x i32> %3
    800 }
    801 
    802 define <4 x i16> @__rsAllocationVLoadXImpl_short4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    803   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    804   %2 = bitcast i8* %1 to <4 x i16>*
    805   %3 = load <4 x i16>, <4 x i16>* %2, align 2
    806   ret <4 x i16> %3
    807 }
    808 define <3 x i16> @__rsAllocationVLoadXImpl_short3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    809   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    810   %2 = bitcast i8* %1 to <3 x i16>*
    811   %3 = load <3 x i16>, <3 x i16>* %2, align 2
    812   ret <3 x i16> %3
    813 }
    814 define <2 x i16> @__rsAllocationVLoadXImpl_short2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    815   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    816   %2 = bitcast i8* %1 to <2 x i16>*
    817   %3 = load <2 x i16>, <2 x i16>* %2, align 2
    818   ret <2 x i16> %3
    819 }
    820 
    821 define <4 x i16> @__rsAllocationVLoadXImpl_ushort4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    822   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    823   %2 = bitcast i8* %1 to <4 x i16>*
    824   %3 = load <4 x i16>, <4 x i16>* %2, align 2
    825   ret <4 x i16> %3
    826 }
    827 define <3 x i16> @__rsAllocationVLoadXImpl_ushort3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    828   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    829   %2 = bitcast i8* %1 to <3 x i16>*
    830   %3 = load <3 x i16>, <3 x i16>* %2, align 2
    831   ret <3 x i16> %3
    832 }
    833 define <2 x i16> @__rsAllocationVLoadXImpl_ushort2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    834   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    835   %2 = bitcast i8* %1 to <2 x i16>*
    836   %3 = load <2 x i16>, <2 x i16>* %2, align 2
    837   ret <2 x i16> %3
    838 }
    839 
    840 define <4 x i8> @__rsAllocationVLoadXImpl_char4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    841   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    842   %2 = bitcast i8* %1 to <4 x i8>*
    843   %3 = load <4 x i8>, <4 x i8>* %2, align 1
    844   ret <4 x i8> %3
    845 }
    846 define <3 x i8> @__rsAllocationVLoadXImpl_char3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    847   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    848   %2 = bitcast i8* %1 to <3 x i8>*
    849   %3 = load <3 x i8>, <3 x i8>* %2, align 1
    850   ret <3 x i8> %3
    851 }
    852 define <2 x i8> @__rsAllocationVLoadXImpl_char2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    853   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    854   %2 = bitcast i8* %1 to <2 x i8>*
    855   %3 = load <2 x i8>, <2 x i8>* %2, align 1
    856   ret <2 x i8> %3
    857 }
    858 
    859 define <4 x i8> @__rsAllocationVLoadXImpl_uchar4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    860   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    861   %2 = bitcast i8* %1 to <4 x i8>*
    862   %3 = load <4 x i8>, <4 x i8>* %2, align 1
    863   ret <4 x i8> %3
    864 }
    865 define <3 x i8> @__rsAllocationVLoadXImpl_uchar3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    866   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    867   %2 = bitcast i8* %1 to <3 x i8>*
    868   %3 = load <3 x i8>, <3 x i8>* %2, align 1
    869   ret <3 x i8> %3
    870 }
    871 define <2 x i8> @__rsAllocationVLoadXImpl_uchar2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    872   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    873   %2 = bitcast i8* %1 to <2 x i8>*
    874   %3 = load <2 x i8>, <2 x i8>* %2, align 1
    875   ret <2 x i8> %3
    876 }
    877 
    878 define <4 x float> @__rsAllocationVLoadXImpl_float4([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    879   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    880   %2 = bitcast i8* %1 to <4 x float>*
    881   %3 = load <4 x float>, <4 x float>* %2, align 4
    882   ret <4 x float> %3
    883 }
    884 define <3 x float> @__rsAllocationVLoadXImpl_float3([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    885   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    886   %2 = bitcast i8* %1 to <3 x float>*
    887   %3 = load <3 x float>, <3 x float>* %2, align 4
    888   ret <3 x float> %3
    889 }
    890 define <2 x float> @__rsAllocationVLoadXImpl_float2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    891   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    892   %2 = bitcast i8* %1 to <2 x float>*
    893   %3 = load <2 x float>, <2 x float>* %2, align 4
    894   ret <2 x float> %3
    895 }
    896 
    897 define void @__rsAllocationVLoadXImpl_double4(<4 x double>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
    898   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    899   %2 = bitcast i8* %1 to <4 x double>*
    900   %3 = load <4 x double>, <4 x double>* %2, align 8
    901   store <4 x double> %3, <4 x double>* %agg.result, align 32, !tbaa !60
    902   ret void
    903 }
    904 define void @__rsAllocationVLoadXImpl_double3(<3 x double>* noalias nocapture sret %agg.result, [1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
    905   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    906   %2 = bitcast i8* %1 to <4 x double>*
    907   %3 = load <4 x double>, <4 x double>* %2, align 8
    908   %4 = bitcast <3 x double>* %agg.result to <4 x double>*
    909   store <4 x double> %3, <4 x double>* %4, align 32, !tbaa !59
    910   ret void
    911 }
    912 define <2 x double> @__rsAllocationVLoadXImpl_double2([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    913   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    914   %2 = bitcast i8* %1 to <2 x double>*
    915   %3 = load <2 x double>, <2 x double>* %2, align 8
    916   ret <2 x double> %3
    917 }
    918 
    919 
    920 define void @__rsAllocationVStoreXImpl_long4([1 x i32] %a.coerce, <4 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    921   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    922   %2 = bitcast i8* %1 to <4 x i64>*
    923   store <4 x i64> %val, <4 x i64>* %2, align 8
    924   ret void
    925 }
    926 define void @__rsAllocationVStoreXImpl_long3([1 x i32] %a.coerce, <3 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    927   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    928   %2 = bitcast i8* %1 to <3 x i64>*
    929   store <3 x i64> %val, <3 x i64>* %2, align 8
    930   ret void
    931 }
    932 define void @__rsAllocationVStoreXImpl_long2([1 x i32] %a.coerce, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    933   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    934   %2 = bitcast i8* %1 to <2 x i64>*
    935   store <2 x i64> %val, <2 x i64>* %2, align 8
    936   ret void
    937 }
    938 
    939 define void @__rsAllocationVStoreXImpl_ulong4([1 x i32] %a.coerce, <4 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    940   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    941   %2 = bitcast i8* %1 to <4 x i64>*
    942   store <4 x i64> %val, <4 x i64>* %2, align 8
    943   ret void
    944 }
    945 define void @__rsAllocationVStoreXImpl_ulong3([1 x i32] %a.coerce, <3 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    946   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    947   %2 = bitcast i8* %1 to <3 x i64>*
    948   store <3 x i64> %val, <3 x i64>* %2, align 8
    949   ret void
    950 }
    951 define void @__rsAllocationVStoreXImpl_ulong2([1 x i32] %a.coerce, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    952   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    953   %2 = bitcast i8* %1 to <2 x i64>*
    954   store <2 x i64> %val, <2 x i64>* %2, align 8
    955   ret void
    956 }
    957 
    958 define void @__rsAllocationVStoreXImpl_int4([1 x i32] %a.coerce, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    959   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    960   %2 = bitcast i8* %1 to <4 x i32>*
    961   store <4 x i32> %val, <4 x i32>* %2, align 4
    962   ret void
    963 }
    964 define void @__rsAllocationVStoreXImpl_int3([1 x i32] %a.coerce, <3 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    965   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    966   %2 = bitcast i8* %1 to <3 x i32>*
    967   store <3 x i32> %val, <3 x i32>* %2, align 4
    968   ret void
    969 }
    970 define void @__rsAllocationVStoreXImpl_int2([1 x i32] %a.coerce, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    971   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    972   %2 = bitcast i8* %1 to <2 x i32>*
    973   store <2 x i32> %val, <2 x i32>* %2, align 4
    974   ret void
    975 }
    976 
    977 define void @__rsAllocationVStoreXImpl_uint4([1 x i32] %a.coerce, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    978   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    979   %2 = bitcast i8* %1 to <4 x i32>*
    980   store <4 x i32> %val, <4 x i32>* %2, align 4
    981   ret void
    982 }
    983 define void @__rsAllocationVStoreXImpl_uint3([1 x i32] %a.coerce, <3 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    984   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    985   %2 = bitcast i8* %1 to <3 x i32>*
    986   store <3 x i32> %val, <3 x i32>* %2, align 4
    987   ret void
    988 }
    989 define void @__rsAllocationVStoreXImpl_uint2([1 x i32] %a.coerce, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    990   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    991   %2 = bitcast i8* %1 to <2 x i32>*
    992   store <2 x i32> %val, <2 x i32>* %2, align 4
    993   ret void
    994 }
    995 
    996 define void @__rsAllocationVStoreXImpl_short4([1 x i32] %a.coerce, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
    997   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
    998   %2 = bitcast i8* %1 to <4 x i16>*
    999   store <4 x i16> %val, <4 x i16>* %2, align 2
   1000   ret void
   1001 }
   1002 define void @__rsAllocationVStoreXImpl_short3([1 x i32] %a.coerce, <3 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
   1003   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
   1004   %2 = bitcast i8* %1 to <3 x i16>*
   1005   store <3 x i16> %val, <3 x i16>* %2, align 2
   1006   ret void
   1007 }
   1008 define void @__rsAllocationVStoreXImpl_short2([1 x i32] %a.coerce, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
   1009   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
   1010   %2 = bitcast i8* %1 to <2 x i16>*
   1011   store <2 x i16> %val, <2 x i16>* %2, align 2
   1012   ret void
   1013 }
   1014 
   1015 define void @__rsAllocationVStoreXImpl_ushort4([1 x i32] %a.coerce, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
   1016   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
   1017   %2 = bitcast i8* %1 to <4 x i16>*
   1018   store <4 x i16> %val, <4 x i16>* %2, align 2
   1019   ret void
   1020 }
   1021 define void @__rsAllocationVStoreXImpl_ushort3([1 x i32] %a.coerce, <3 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
   1022   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
   1023   %2 = bitcast i8* %1 to <3 x i16>*
   1024   store <3 x i16> %val, <3 x i16>* %2, align 2
   1025   ret void
   1026 }
   1027 define void @__rsAllocationVStoreXImpl_ushort2([1 x i32] %a.coerce, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
   1028   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
   1029   %2 = bitcast i8* %1 to <2 x i16>*
   1030   store <2 x i16> %val, <2 x i16>* %2, align 2
   1031   ret void
   1032 }
   1033 
   1034 define void @__rsAllocationVStoreXImpl_char4([1 x i32] %a.coerce, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
   1035   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
   1036   %2 = bitcast i8* %1 to <4 x i8>*
   1037   store <4 x i8> %val, <4 x i8>* %2, align 1
   1038   ret void
   1039 }
   1040 define void @__rsAllocationVStoreXImpl_char3([1 x i32] %a.coerce, <3 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
   1041   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
   1042   %2 = bitcast i8* %1 to <3 x i8>*
   1043   store <3 x i8> %val, <3 x i8>* %2, align 1
   1044   ret void
   1045 }
   1046 define void @__rsAllocationVStoreXImpl_char2([1 x i32] %a.coerce, <2 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
   1047   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
   1048   %2 = bitcast i8* %1 to <2 x i8>*
   1049   store <2 x i8> %val, <2 x i8>* %2, align 1
   1050   ret void
   1051 }
   1052 
   1053 define void @__rsAllocationVStoreXImpl_uchar4([1 x i32] %a.coerce, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
   1054   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
   1055   %2 = bitcast i8* %1 to <4 x i8>*
   1056   store <4 x i8> %val, <4 x i8>* %2, align 1
   1057   ret void
   1058 }
   1059 define void @__rsAllocationVStoreXImpl_uchar3([1 x i32] %a.coerce, <3 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
   1060   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
   1061   %2 = bitcast i8* %1 to <3 x i8>*
   1062   store <3 x i8> %val, <3 x i8>* %2, align 1
   1063   ret void
   1064 }
   1065 define void @__rsAllocationVStoreXImpl_uchar2([1 x i32] %a.coerce, <2 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
   1066   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
   1067   %2 = bitcast i8* %1 to <2 x i8>*
   1068   store <2 x i8> %val, <2 x i8>* %2, align 1
   1069   ret void
   1070 }
   1071 
   1072 define void @__rsAllocationVStoreXImpl_float4([1 x i32] %a.coerce, <4 x float> %val, i32 %x, i32 %y, i32 %z) #1 {
   1073   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
   1074   %2 = bitcast i8* %1 to <4 x float>*
   1075   store <4 x float> %val, <4 x float>* %2, align 4
   1076   ret void
   1077 }
   1078 define void @__rsAllocationVStoreXImpl_float3([1 x i32] %a.coerce, <3 x float> %val, i32 %x, i32 %y, i32 %z) #1 {
   1079   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
   1080   %2 = bitcast i8* %1 to <3 x float>*
   1081   store <3 x float> %val, <3 x float>* %2, align 4
   1082   ret void
   1083 }
   1084 define void @__rsAllocationVStoreXImpl_float2([1 x i32] %a.coerce, <2 x float> %val, i32 %x, i32 %y, i32 %z) #1 {
   1085   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
   1086   %2 = bitcast i8* %1 to <2 x float>*
   1087   store <2 x float> %val, <2 x float>* %2, align 4
   1088   ret void
   1089 }
   1090 
   1091 define void @__rsAllocationVStoreXImpl_double4([1 x i32] %a.coerce, <4 x double> %val, i32 %x, i32 %y, i32 %z) #1 {
   1092   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
   1093   %2 = bitcast i8* %1 to <4 x double>*
   1094   store <4 x double> %val, <4 x double>* %2, align 8
   1095   ret void
   1096 }
   1097 define void @__rsAllocationVStoreXImpl_double3([1 x i32] %a.coerce, <3 x double> %val, i32 %x, i32 %y, i32 %z) #1 {
   1098   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
   1099   %2 = bitcast i8* %1 to <3 x double>*
   1100   store <3 x double> %val, <3 x double>* %2, align 8
   1101   ret void
   1102 }
   1103 define void @__rsAllocationVStoreXImpl_double2([1 x i32] %a.coerce, <2 x double> %val, i32 %x, i32 %y, i32 %z) #1 {
   1104   %1 = tail call i8* @rsOffsetNs([1 x i32] %a.coerce, i32 %x, i32 %y, i32 %z) #2
   1105   %2 = bitcast i8* %1 to <2 x double>*
   1106   store <2 x double> %val, <2 x double>* %2, align 8
   1107   ret void
   1108 }
   1109 
   1110 
   1111 attributes #0 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
   1112 attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
   1113 attributes #2 = { nobuiltin }
   1114 
   1115