Home | History | Annotate | Download | only in ll64
      1 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
      2 target triple = "aarch64-linux-android"
      3 
      4 %struct.rs_allocation = type { i64*, i64*, i64*, i64* }
      5 
      6 declare i8* @rsOffset(%struct.rs_allocation* nocapture readonly %a, i32 %sizeOf, i32 %x, i32 %y, i32 %z)
      7 declare i8* @rsOffsetNs(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z)
      8 
      9 ; The loads and stores in this file are annotated with RenderScript-specific
     10 ; information for the type based alias analysis, such that the TBAA analysis
     11 ; understands that loads and stores from two allocations with different types
     12 ; can never access the same memory element. This is different from C, where
     13 ; a char or uchar load/store is special as it can alias with about everything.
     14 ;
     15 ; The TBAA tree in this file has the the node "RenderScript Distinct TBAA" as
     16 ; its root.
     17 ; This means all loads/stores that share this common root can be proven to not
     18 ; alias. However, the alias analysis still has to assume MayAlias between
     19 ; memory accesses in this file and memory accesses annotated with the C/C++
     20 ; TBAA metadata.
     21 ; A node named "RenderScript TBAA" wraps our distinct TBAA root node.
     22 ; If we can ensure that all accesses to elements loaded from RenderScript
     23 ; allocations are either annotated with the RenderScript TBAA information or
     24 ; not annotated at all, but never annotated with the C/C++ metadata, we
     25 ; can add the "RenderScript TBAA" tree under the C/C++ TBAA tree. This enables
     26 ; TBAA to prove that an access to data from the RenderScript allocation
     27 ; does not alias with a load/store accessing something not part of a RenderScript
     28 ; allocation.
     29 ; We do this by swapping the second operand of "RenderScript TBAA" with the node
     30 ; for "Simple C/C++ TBAA", thus connecting these TBAA groups. The other root
     31 ; node (with no children) can then safely be dropped from the analysis.
     32 
     33 !13 = !{!"RenderScript Distinct TBAA"}
     34 !14 = !{!"RenderScript TBAA", !13}
     35 !15 = !{!"allocation", !14}
     36 
     37 !21 = !{!"char", !15}
     38 define void @rsSetElementAtImpl_char(%struct.rs_allocation* nocapture readonly %a, i8 signext %val, i32 %x, i32 %y, i32 %z) #1 {
     39   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 1, i32 %x, i32 %y, i32 %z) #2
     40   store i8 %val, i8* %1, align 1, !tbaa !21
     41   ret void
     42 }
     43 
     44 define signext i8 @rsGetElementAtImpl_char(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
     45   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 1, i32 %x, i32 %y, i32 %z) #2
     46   %2 = load i8, i8* %1, align 1, !tbaa !21
     47   ret i8 %2
     48 }
     49 
     50 !22 = !{!"char2", !15}
     51 define void @rsSetElementAtImpl_char2(%struct.rs_allocation* nocapture readonly %a, i16 %val, i32 %x, i32 %y, i32 %z) #1 {
     52   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
     53   %2 = bitcast i8* %1 to <2 x i8>*
     54   %3 = bitcast i16 %val to <2 x i8>
     55   store <2 x i8> %3, <2 x i8>* %2, align 2, !tbaa !22
     56   ret void
     57 }
     58 
     59 define <2 x i8> @rsGetElementAtImpl_char2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
     60   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
     61   %2 = bitcast i8* %1 to <2 x i8>*
     62   %3 = load <2 x i8>, <2 x i8>* %2, align 2, !tbaa !22
     63   ret <2 x i8> %3
     64 }
     65 
     66 !23 = !{!"char3", !15}
     67 define void @rsSetElementAtImpl_char3(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
     68   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
     69   %2 = bitcast i32 %val to <4 x i8>
     70   %3 = shufflevector <4 x i8> %2, <4 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
     71   %4 = bitcast i8* %1 to <4 x i8>*
     72   store <4 x i8> %3, <4 x i8>* %4, align 4, !tbaa !23
     73   ret void
     74 }
     75 
     76 define <3 x i8> @rsGetElementAtImpl_char3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
     77   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
     78   %2 = bitcast i8* %1 to <4 x i8>*
     79   %3 = load <4 x i8>, <4 x i8>* %2, align 4, !tbaa !23
     80   %4 = shufflevector <4 x i8> %3, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
     81   ret <3 x i8> %4
     82 }
     83 
     84 !24 = !{!"char4", !15}
     85 define void @rsSetElementAtImpl_char4(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
     86   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
     87   %2 = bitcast i8* %1 to <4 x i8>*
     88   %3 = bitcast i32 %val to <4 x i8>
     89   store <4 x i8> %3, <4 x i8>* %2, align 4, !tbaa !24
     90   ret void
     91 }
     92 
     93 define <4 x i8> @rsGetElementAtImpl_char4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
     94   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
     95   %2 = bitcast i8* %1 to <4 x i8>*
     96   %3 = load <4 x i8>, <4 x i8>* %2, align 4, !tbaa !24
     97   ret <4 x i8> %3
     98 }
     99 
    100 !25 = !{!"uchar", !15}
    101 define void @rsSetElementAtImpl_uchar(%struct.rs_allocation* nocapture readonly %a, i8 zeroext %val, i32 %x, i32 %y, i32 %z) #1 {
    102   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 1, i32 %x, i32 %y, i32 %z) #2
    103   store i8 %val, i8* %1, align 1, !tbaa !25
    104   ret void
    105 }
    106 
    107 define zeroext i8 @rsGetElementAtImpl_uchar(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    108   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 1, i32 %x, i32 %y, i32 %z) #2
    109   %2 = load i8, i8* %1, align 1, !tbaa !25
    110   ret i8 %2
    111 }
    112 
    113 !26 = !{!"uchar2", !15}
    114 define void @rsSetElementAtImpl_uchar2(%struct.rs_allocation* nocapture readonly %a, i16 %val, i32 %x, i32 %y, i32 %z) #1 {
    115   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
    116   %2 = bitcast i8* %1 to <2 x i8>*
    117   %3 = bitcast i16 %val to <2 x i8>
    118   store <2 x i8> %3, <2 x i8>* %2, align 2, !tbaa !26
    119   ret void
    120 }
    121 
    122 define <2 x i8> @rsGetElementAtImpl_uchar2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    123   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
    124   %2 = bitcast i8* %1 to <2 x i8>*
    125   %3 = load <2 x i8>, <2 x i8>* %2, align 2, !tbaa !26
    126   ret <2 x i8> %3
    127 }
    128 
    129 !27 = !{!"uchar3", !15}
    130 define void @rsSetElementAtImpl_uchar3(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
    131   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    132   %2 = bitcast i32 %val to <4 x i8>
    133   %3 = shufflevector <4 x i8> %2, <4 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    134   %4 = bitcast i8* %1 to <4 x i8>*
    135   store <4 x i8> %3, <4 x i8>* %4, align 4, !tbaa !27
    136   ret void
    137 }
    138 
    139 define <3 x i8> @rsGetElementAtImpl_uchar3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    140   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    141   %2 = bitcast i8* %1 to <4 x i8>*
    142   %3 = load <4 x i8>, <4 x i8>* %2, align 4, !tbaa !27
    143   %4 = shufflevector <4 x i8> %3, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
    144   ret <3 x i8> %4
    145 }
    146 
    147 !28 = !{!"uchar4", !15}
    148 define void @rsSetElementAtImpl_uchar4(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
    149   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    150   %2 = bitcast i8* %1 to <4 x i8>*
    151   %3 = bitcast i32 %val to <4 x i8>
    152   store <4 x i8> %3, <4 x i8>* %2, align 4, !tbaa !28
    153   ret void
    154 }
    155 
    156 define <4 x i8> @rsGetElementAtImpl_uchar4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    157   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    158   %2 = bitcast i8* %1 to <4 x i8>*
    159   %3 = load <4 x i8>, <4 x i8>* %2, align 4, !tbaa !28
    160   ret <4 x i8> %3
    161 }
    162 
    163 !29 = !{!"short", !15}
    164 define void @rsSetElementAtImpl_short(%struct.rs_allocation* nocapture readonly %a, i16 signext %val, i32 %x, i32 %y, i32 %z) #1 {
    165   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
    166   %2 = bitcast i8* %1 to i16*
    167   store i16 %val, i16* %2, align 2, !tbaa !29
    168   ret void
    169 }
    170 
    171 define signext i16 @rsGetElementAtImpl_short(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    172   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
    173   %2 = bitcast i8* %1 to i16*
    174   %3 = load i16, i16* %2, align 2, !tbaa !29
    175   ret i16 %3
    176 }
    177 
    178 !30 = !{!"short2", !15}
    179 define void @rsSetElementAtImpl_short2(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
    180   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    181   %2 = bitcast i8* %1 to <2 x i16>*
    182   %3 = bitcast i32 %val to <2 x i16>
    183   store <2 x i16> %3, <2 x i16>* %2, align 4, !tbaa !30
    184   ret void
    185 }
    186 
    187 define <2 x i16> @rsGetElementAtImpl_short2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    188   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    189   %2 = bitcast i8* %1 to <2 x i16>*
    190   %3 = load <2 x i16>, <2 x i16>* %2, align 4, !tbaa !30
    191   ret <2 x i16> %3
    192 }
    193 
    194 !31 = !{!"short3", !15}
    195 define void @rsSetElementAtImpl_short3(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    196   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    197   %2 = bitcast <2 x i32> %val to <4 x i16>
    198   %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    199   %4 = bitcast i8* %1 to <4 x i16>*
    200   store <4 x i16> %3, <4 x i16>* %4, align 8, !tbaa !31
    201   ret void
    202 }
    203 
    204 define <3 x i16> @rsGetElementAtImpl_short3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    205   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    206   %2 = bitcast i8* %1 to <4 x i16>*
    207   %3 = load <4 x i16>, <4 x i16>* %2, align 8, !tbaa !31
    208   %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <3 x i32> <i32 0, i32 1, i32 2>
    209   ret <3 x i16> %4
    210 }
    211 
    212 !32 = !{!"short4", !15}
    213 define void @rsSetElementAtImpl_short4(%struct.rs_allocation* nocapture readonly %a, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
    214   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    215   %2 = bitcast i8* %1 to <4 x i16>*
    216   store <4 x i16> %val, <4 x i16>* %2, align 8, !tbaa !32
    217   ret void
    218 }
    219 
    220 define <4 x i16> @rsGetElementAtImpl_short4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    221   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    222   %2 = bitcast i8* %1 to <4 x i16>*
    223   %3 = load <4 x i16>, <4 x i16>* %2, align 8, !tbaa !32
    224   ret <4 x i16> %3
    225 }
    226 
    227 !33 = !{!"ushort", !15}
    228 define void @rsSetElementAtImpl_ushort(%struct.rs_allocation* nocapture readonly %a, i16 zeroext %val, i32 %x, i32 %y, i32 %z) #1 {
    229   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
    230   %2 = bitcast i8* %1 to i16*
    231   store i16 %val, i16* %2, align 2, !tbaa !33
    232   ret void
    233 }
    234 
    235 define zeroext i16 @rsGetElementAtImpl_ushort(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    236   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
    237   %2 = bitcast i8* %1 to i16*
    238   %3 = load i16, i16* %2, align 2, !tbaa !33
    239   ret i16 %3
    240 }
    241 
    242 !34 = !{!"ushort2", !15}
    243 define void @rsSetElementAtImpl_ushort2(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
    244   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    245   %2 = bitcast i8* %1 to <2 x i16>*
    246   %3 = bitcast i32 %val to <2 x i16>
    247   store <2 x i16> %3, <2 x i16>* %2, align 4, !tbaa !34
    248   ret void
    249 }
    250 
    251 define <2 x i16> @rsGetElementAtImpl_ushort2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    252   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    253   %2 = bitcast i8* %1 to <2 x i16>*
    254   %3 = load <2 x i16>, <2 x i16>* %2, align 4, !tbaa !34
    255   ret <2 x i16> %3
    256 }
    257 
    258 !35 = !{!"ushort3", !15}
    259 define void @rsSetElementAtImpl_ushort3(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    260   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    261   %2 = bitcast <2 x i32> %val to <4 x i16>
    262   %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    263   %4 = bitcast i8* %1 to <4 x i16>*
    264   store <4 x i16> %3, <4 x i16>* %4, align 8, !tbaa !35
    265   ret void
    266 }
    267 
    268 define <3 x i16> @rsGetElementAtImpl_ushort3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    269   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    270   %2 = bitcast i8* %1 to <4 x i16>*
    271   %3 = load <4 x i16>, <4 x i16>* %2, align 8, !tbaa !35
    272   %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <3 x i32> <i32 0, i32 1, i32 2>
    273   ret <3 x i16> %4
    274 }
    275 
    276 !36 = !{!"ushort4", !15}
    277 define void @rsSetElementAtImpl_ushort4(%struct.rs_allocation* nocapture readonly %a, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
    278   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    279   %2 = bitcast i8* %1 to <4 x i16>*
    280   store <4 x i16> %val, <4 x i16>* %2, align 8, !tbaa !36
    281   ret void
    282 }
    283 
    284 define <4 x i16> @rsGetElementAtImpl_ushort4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    285   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    286   %2 = bitcast i8* %1 to <4 x i16>*
    287   %3 = load <4 x i16>, <4 x i16>* %2, align 8, !tbaa !36
    288   ret <4 x i16> %3
    289 }
    290 
    291 !37 = !{!"int", !15}
    292 define void @rsSetElementAtImpl_int(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
    293   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    294   %2 = bitcast i8* %1 to i32*
    295   store i32 %val, i32* %2, align 4, !tbaa !37
    296   ret void
    297 }
    298 
    299 define i32 @rsGetElementAtImpl_int(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    300   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    301   %2 = bitcast i8* %1 to i32*
    302   %3 = load i32, i32* %2, align 4, !tbaa !37
    303   ret i32 %3
    304 }
    305 
    306 !38 = !{!"int2", !15}
    307 define void @rsSetElementAtImpl_int2(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    308   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    309   %2 = bitcast i8* %1 to <2 x i32>*
    310   store <2 x i32> %val, <2 x i32>* %2, align 8, !tbaa !38
    311   ret void
    312 }
    313 
    314 define <2 x i32> @rsGetElementAtImpl_int2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    315   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    316   %2 = bitcast i8* %1 to <2 x i32>*
    317   %3 = load <2 x i32>, <2 x i32>* %2, align 8, !tbaa !38
    318   ret <2 x i32> %3
    319 }
    320 
    321 !39 = !{!"int3", !15}
    322 define void @rsSetElementAtImpl_int3(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    323   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    324   %2 = shufflevector <4 x i32> %val, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    325   %3 = bitcast i8* %1 to <4 x i32>*
    326   store <4 x i32> %2, <4 x i32>* %3, align 16, !tbaa !39
    327   ret void
    328 }
    329 
    330 define <3 x i32> @rsGetElementAtImpl_int3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    331   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    332   %2 = bitcast i8* %1 to <4 x i32>*
    333   %3 = load <4 x i32>, <4 x i32>* %2, align 8, !tbaa !39
    334   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    335   ret <3 x i32> %4
    336 }
    337 
    338 !40 = !{!"int4", !15}
    339 define void @rsSetElementAtImpl_int4(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    340   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    341   %2 = bitcast i8* %1 to <4 x i32>*
    342   store <4 x i32> %val, <4 x i32>* %2, align 16, !tbaa !40
    343   ret void
    344 }
    345 
    346 define <4 x i32> @rsGetElementAtImpl_int4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    347   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    348   %2 = bitcast i8* %1 to <4 x i32>*
    349   %3 = load <4 x i32>, <4 x i32>* %2, align 16, !tbaa !40
    350   ret <4 x i32> %3
    351 }
    352 
    353 !41 = !{!"uint", !15}
    354 define void @rsSetElementAtImpl_uint(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
    355   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    356   %2 = bitcast i8* %1 to i32*
    357   store i32 %val, i32* %2, align 4, !tbaa !41
    358   ret void
    359 }
    360 
    361 define i32 @rsGetElementAtImpl_uint(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    362   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    363   %2 = bitcast i8* %1 to i32*
    364   %3 = load i32, i32* %2, align 4, !tbaa !41
    365   ret i32 %3
    366 }
    367 
    368 !42 = !{!"uint2", !15}
    369 define void @rsSetElementAtImpl_uint2(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    370   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    371   %2 = bitcast i8* %1 to <2 x i32>*
    372   store <2 x i32> %val, <2 x i32>* %2, align 8, !tbaa !42
    373   ret void
    374 }
    375 
    376 define <2 x i32> @rsGetElementAtImpl_uint2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    377   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    378   %2 = bitcast i8* %1 to <2 x i32>*
    379   %3 = load <2 x i32>, <2 x i32>* %2, align 8, !tbaa !42
    380   ret <2 x i32> %3
    381 }
    382 
    383 !43 = !{!"uint3", !15}
    384 define void @rsSetElementAtImpl_uint3(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    385   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    386   %2 = shufflevector <4 x i32> %val, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    387   %3 = bitcast i8* %1 to <4 x i32>*
    388   store <4 x i32> %2, <4 x i32>* %3, align 16, !tbaa !43
    389   ret void
    390 }
    391 
    392 define <3 x i32> @rsGetElementAtImpl_uint3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    393   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    394   %2 = bitcast i8* %1 to <4 x i32>*
    395   %3 = load <4 x i32>, <4 x i32>* %2, align 8, !tbaa !43
    396   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    397   ret <3 x i32> %4
    398 }
    399 
    400 !44 = !{!"uint4", !15}
    401 define void @rsSetElementAtImpl_uint4(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    402   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    403   %2 = bitcast i8* %1 to <4 x i32>*
    404   store <4 x i32> %val, <4 x i32>* %2, align 16, !tbaa !44
    405   ret void
    406 }
    407 
    408 define <4 x i32> @rsGetElementAtImpl_uint4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    409   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    410   %2 = bitcast i8* %1 to <4 x i32>*
    411   %3 = load <4 x i32>, <4 x i32>* %2, align 16, !tbaa !44
    412   ret <4 x i32> %3
    413 }
    414 
    415 !45 = !{!"long", !15}
    416 define void @rsSetElementAtImpl_long(%struct.rs_allocation* nocapture readonly %a, i64 %val, i32 %x, i32 %y, i32 %z) #1 {
    417   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    418   %2 = bitcast i8* %1 to i64*
    419   store i64 %val, i64* %2, align 8, !tbaa !45
    420   ret void
    421 }
    422 
    423 define i64 @rsGetElementAtImpl_long(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    424   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    425   %2 = bitcast i8* %1 to i64*
    426   %3 = load i64, i64* %2, align 8, !tbaa !45
    427   ret i64 %3
    428 }
    429 
    430 !46 = !{!"long2", !15}
    431 define void @rsSetElementAtImpl_long2(%struct.rs_allocation* nocapture readonly %a, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    432   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    433   %2 = bitcast i8* %1 to <2 x i64>*
    434   store <2 x i64> %val, <2 x i64>* %2, align 16, !tbaa !46
    435   ret void
    436 }
    437 
    438 define <2 x i64> @rsGetElementAtImpl_long2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    439   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    440   %2 = bitcast i8* %1 to <2 x i64>*
    441   %3 = load <2 x i64>, <2 x i64>* %2, align 16, !tbaa !46
    442   ret <2 x i64> %3
    443 }
    444 
    445 !47 = !{!"long3", !15}
    446 define void @rsSetElementAtImpl_long3(%struct.rs_allocation* nocapture readonly %a, <3 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    447   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    448   %2 = load <3 x i64>, <3 x i64>* %val
    449   %3 = shufflevector <3 x i64> %2, <3 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    450   %4 = bitcast i8* %1 to <4 x i64>*
    451   store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !47
    452   ret void
    453 }
    454 
    455 define void @rsGetElementAtImpl_long3(<3 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    456   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    457   %2 = bitcast i8* %1 to <4 x i64>*
    458   %3 = load <4 x i64>, <4 x i64>* %2, align 32
    459   %4 = bitcast <3 x i64>* %agg.result to <4 x i64>*
    460   store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !47
    461   ret void
    462 }
    463 
    464 !48 = !{!"long4", !15}
    465 define void @rsSetElementAtImpl_long4(%struct.rs_allocation* nocapture readonly %a, <4 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    466   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    467   %2 = load <4 x i64>, <4 x i64>* %val
    468   %3 = bitcast i8* %1 to <4 x i64>*
    469   store <4 x i64> %2, <4 x i64>* %3, align 32, !tbaa !48
    470   ret void
    471 }
    472 
    473 define void @rsGetElementAtImpl_long4(<4 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    474   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    475   %2 = bitcast i8* %1 to <4 x i64>*
    476   %3 = load <4 x i64>, <4 x i64>* %2, align 32, !tbaa !15
    477   store <4 x i64> %3, <4 x i64>* %agg.result, align 32, !tbaa !48
    478   ret void
    479 }
    480 
    481 !49 = !{!"ulong", !15}
    482 define void @rsSetElementAtImpl_ulong(%struct.rs_allocation* nocapture readonly %a, i64 %val, i32 %x, i32 %y, i32 %z) #1 {
    483   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    484   %2 = bitcast i8* %1 to i64*
    485   store i64 %val, i64* %2, align 8, !tbaa !49
    486   ret void
    487 }
    488 
    489 define i64 @rsGetElementAtImpl_ulong(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    490   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    491   %2 = bitcast i8* %1 to i64*
    492   %3 = load i64, i64* %2, align 8, !tbaa !49
    493   ret i64 %3
    494 }
    495 
    496 !50 = !{!"ulong2", !15}
    497 define void @rsSetElementAtImpl_ulong2(%struct.rs_allocation* nocapture readonly %a, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    498   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    499   %2 = bitcast i8* %1 to <2 x i64>*
    500   store <2 x i64> %val, <2 x i64>* %2, align 16, !tbaa !50
    501   ret void
    502 }
    503 
    504 define <2 x i64> @rsGetElementAtImpl_ulong2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    505   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    506   %2 = bitcast i8* %1 to <2 x i64>*
    507   %3 = load <2 x i64>, <2 x i64>* %2, align 16, !tbaa !50
    508   ret <2 x i64> %3
    509 }
    510 
    511 !51 = !{!"ulong3", !15}
    512 define void @rsSetElementAtImpl_ulong3(%struct.rs_allocation* nocapture readonly %a, <3 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    513   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    514   %2 = load <3 x i64>, <3 x i64>* %val
    515   %3 = shufflevector <3 x i64> %2, <3 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    516   %4 = bitcast i8* %1 to <4 x i64>*
    517   store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !51
    518   ret void
    519 }
    520 
    521 define void @rsGetElementAtImpl_ulong3(<3 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    522   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    523   %2 = bitcast i8* %1 to <4 x i64>*
    524   %3 = load <4 x i64>, <4 x i64>* %2, align 32
    525   %4 = bitcast <3 x i64>* %agg.result to <4 x i64>*
    526   store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !51
    527   ret void
    528 }
    529 
    530 !52 = !{!"ulong4", !15}
    531 define void @rsSetElementAtImpl_ulong4(%struct.rs_allocation* nocapture readonly %a, <4 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    532   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    533   %2 = load <4 x i64>, <4 x i64>* %val
    534   %3 = bitcast i8* %1 to <4 x i64>*
    535   store <4 x i64> %2, <4 x i64>* %3, align 32, !tbaa !52
    536   ret void
    537 }
    538 
    539 define void @rsGetElementAtImpl_ulong4(<4 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    540   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    541   %2 = bitcast i8* %1 to <4 x i64>*
    542   %3 = load <4 x i64>, <4 x i64>* %2, align 32, !tbaa !15
    543   store <4 x i64> %3, <4 x i64>* %agg.result, align 32, !tbaa !52
    544   ret void
    545 }
    546 
    547 !53 = !{!"float", !15}
    548 define void @rsSetElementAtImpl_float(%struct.rs_allocation* nocapture readonly %a, float %val, i32 %x, i32 %y, i32 %z) #1 {
    549   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    550   %2 = bitcast i8* %1 to float*
    551   store float %val, float* %2, align 4, !tbaa !53
    552   ret void
    553 }
    554 
    555 define float @rsGetElementAtImpl_float(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    556   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    557   %2 = bitcast i8* %1 to float*
    558   %3 = load float, float* %2, align 4, !tbaa !53
    559   ret float %3
    560 }
    561 
    562 !54 = !{!"float2", !15}
    563 define void @rsSetElementAtImpl_float2(%struct.rs_allocation* nocapture readonly %a, <2 x float> %val, i32 %x, i32 %y, i32 %z) #1 {
    564   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    565   %2 = bitcast i8* %1 to <2 x float>*
    566   store <2 x float> %val, <2 x float>* %2, align 8, !tbaa !54
    567   ret void
    568 }
    569 
    570 define <2 x float> @rsGetElementAtImpl_float2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    571   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    572   %2 = bitcast i8* %1 to <2 x float>*
    573   %3 = load <2 x float>, <2 x float>* %2, align 8, !tbaa !54
    574   ret <2 x float> %3
    575 }
    576 
    577 !55 = !{!"float3", !15}
    578 define void @rsSetElementAtImpl_float3(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    579   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    580   %2 = bitcast <4 x i32> %val to <4 x float>
    581   %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    582   %4 = bitcast i8* %1 to <4 x float>*
    583   store <4 x float> %3, <4 x float>* %4, align 16, !tbaa !55
    584   ret void
    585 }
    586 
    587 define <3 x float> @rsGetElementAtImpl_float3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    588   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    589   %2 = bitcast i8* %1 to <4 x float>*
    590   %3 = load <4 x float>, <4 x float>* %2, align 8, !tbaa !55
    591   %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
    592   ret <3 x float> %4
    593 }
    594 
    595 !56 = !{!"float4", !15}
    596 define void @rsSetElementAtImpl_float4(%struct.rs_allocation* nocapture readonly %a, <4 x float> %val, i32 %x, i32 %y, i32 %z) #1 {
    597   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    598   %2 = bitcast i8* %1 to <4 x float>*
    599   store <4 x float> %val, <4 x float>* %2, align 16, !tbaa !56
    600   ret void
    601 }
    602 
    603 define <4 x float> @rsGetElementAtImpl_float4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    604   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    605   %2 = bitcast i8* %1 to <4 x float>*
    606   %3 = load <4 x float>, <4 x float>* %2, align 16, !tbaa !56
    607   ret <4 x float> %3
    608 }
    609 
    610 !57 = !{!"double", !15}
    611 define void @rsSetElementAtImpl_double(%struct.rs_allocation* nocapture readonly %a, double %val, i32 %x, i32 %y, i32 %z) #1 {
    612   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    613   %2 = bitcast i8* %1 to double*
    614   store double %val, double* %2, align 8, !tbaa !57
    615   ret void
    616 }
    617 
    618 define double @rsGetElementAtImpl_double(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    619   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    620   %2 = bitcast i8* %1 to double*
    621   %3 = load double, double* %2, align 8, !tbaa !57
    622   ret double %3
    623 }
    624 
    625 !58 = !{!"double2", !15}
    626 define void @rsSetElementAtImpl_double2(%struct.rs_allocation* nocapture readonly %a, <2 x double> %val, i32 %x, i32 %y, i32 %z) #1 {
    627   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    628   %2 = bitcast i8* %1 to <2 x double>*
    629   store <2 x double> %val, <2 x double>* %2, align 16, !tbaa !58
    630   ret void
    631 }
    632 
    633 define <2 x double> @rsGetElementAtImpl_double2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    634   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    635   %2 = bitcast i8* %1 to <2 x double>*
    636   %3 = load <2 x double>, <2 x double>* %2, align 16, !tbaa !58
    637   ret <2 x double> %3
    638 }
    639 
    640 !59 = !{!"double3", !15}
    641 define void @rsSetElementAtImpl_double3(%struct.rs_allocation* nocapture readonly %a, <3 x double>* %val, i32 %x, i32 %y, i32 %z) #1 {
    642   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    643   %2 = load <3 x double>, <3 x double>* %val
    644   %3 = shufflevector <3 x double> %2, <3 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    645   %4 = bitcast i8* %1 to <4 x double>*
    646   store <4 x double> %3, <4 x double>* %4, align 32, !tbaa !59
    647   ret void
    648 }
    649 
    650 
    651 define void @rsGetElementAtImpl_double3(<3 x double>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    652   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    653   %2 = bitcast i8* %1 to <4 x double>*
    654   %3 = load <4 x double>, <4 x double>* %2, align 32
    655   %4 = bitcast <3 x double>* %agg.result to <4 x double>*
    656   store <4 x double> %3, <4 x double>* %4, align 32, !tbaa !59
    657   ret void
    658 }
    659 
    660 !60 = !{!"double4", !15}
    661 define void @rsSetElementAtImpl_double4(%struct.rs_allocation* nocapture readonly %a, <4 x double>* %val, i32 %x, i32 %y, i32 %z) #1 {
    662   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    663   %2 = load <4 x double>, <4 x double>* %val
    664   %3 = bitcast i8* %1 to <4 x double>*
    665   store <4 x double> %2, <4 x double>* %3, align 32, !tbaa !60
    666   ret void
    667 }
    668 define void @rsGetElementAtImpl_double4(<4 x double>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    669   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    670   %2 = bitcast i8* %1 to <4 x double>*
    671   %3 = load <4 x double>, <4 x double>* %2, align 32, !tbaa !15
    672   store <4 x double> %3, <4 x double>* %agg.result, align 32, !tbaa !60
    673   ret void
    674 }
    675 
    676 !61 = !{!"half", !15}
    677 define void @rsSetElementAtImpl_half(%struct.rs_allocation* nocapture readonly %a.coerce, half %val, i32 %x, i32 %y, i32 %z) #1 {
    678   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #2
    679   %2 = bitcast i8* %1 to half*
    680   store half %val, half* %2, align 2, !tbaa !61
    681   ret void
    682 }
    683 
    684 define half @rsGetElementAtImpl_half(%struct.rs_allocation* nocapture readonly %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    685   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #2
    686   %2 = bitcast i8* %1 to half*
    687   %3 = load half, half* %2, align 2, !tbaa !61
    688   ret half %3
    689 }
    690 
    691 !62 = !{!"half2", !15}
    692 define void @rsSetElementAtImpl_half2(%struct.rs_allocation* nocapture readonly %a.coerce, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
    693   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
    694   %2 = bitcast i8* %1 to <2 x half>*
    695   %3 = bitcast i32 %val to <2 x half>
    696   store <2 x half> %3, <2 x half>* %2, align 4, !tbaa !62
    697   ret void
    698 }
    699 
    700 define <2 x half> @rsGetElementAtImpl_half2(%struct.rs_allocation* nocapture readonly %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    701   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
    702   %2 = bitcast i8* %1 to <2 x half>*
    703   %3 = load <2 x half>, <2 x half>* %2, align 4, !tbaa !62
    704   ret <2 x half> %3
    705 }
    706 
    707 !63 = !{!"half3", !15}
    708 define void @rsSetElementAtImpl_half3(%struct.rs_allocation* nocapture readonly %a.coerce, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    709   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    710   %2 = bitcast <2 x i32> %val to <4 x half>
    711   %3 = shufflevector <4 x half> %2, <4 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    712   %4 = bitcast i8* %1 to <4 x half>*
    713   store <4 x half> %3, <4 x half>* %4, align 8, !tbaa !63
    714   ret void
    715 }
    716 
    717 define <3 x half> @rsGetElementAtImpl_half3(%struct.rs_allocation* nocapture readonly %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
    718   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    719   %2 = bitcast i8* %1 to <4 x half>*
    720   %3 = load <4 x half>, <4 x half>* %2, align 8, !tbaa !63
    721   %4 = shufflevector <4 x half> %3, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
    722   ret <3 x half> %4
    723 }
    724 
    725 !64 = !{!"half4", !15}
    726 define void @rsSetElementAtImpl_half4(%struct.rs_allocation* nocapture readonly %a.coerce, <4 x half> %val, i32 %x, i32 %y, i32 %z) #1 {
    727   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    728   %2 = bitcast i8* %1 to <4 x half>*
    729   store <4 x half> %val, <4 x half>* %2, align 8, !tbaa !64
    730   ret void
    731 }
    732 
    733 define <4 x half> @rsGetElementAtImpl_half4(%struct.rs_allocation* nocapture readonly %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    734   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    735   %2 = bitcast i8* %1 to <4 x half>*
    736   %3 = load <4 x half>, <4 x half>* %2, align 8, !tbaa !64
    737   ret <4 x half> %3
    738 }
    739 
    740 
    741 define void @__rsAllocationVLoadXImpl_long4(<4 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    742   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    743   %2 = bitcast i8* %1 to <4 x i64>*
    744   %3 = load <4 x i64>, <4 x i64>* %2, align 8
    745   store <4 x i64> %3, <4 x i64>* %agg.result
    746   ret void
    747 }
    748 define void @__rsAllocationVLoadXImpl_long3(<3 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    749   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    750   %2 = bitcast i8* %1 to <3 x i64>*
    751   %3 = load <3 x i64>, <3 x i64>* %2, align 8
    752   store <3 x i64> %3, <3 x i64>* %agg.result
    753   ret void
    754 }
    755 define <2 x i64> @__rsAllocationVLoadXImpl_long2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    756   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    757   %2 = bitcast i8* %1 to <2 x i64>*
    758   %3 = load <2 x i64>, <2 x i64>* %2, align 8
    759   ret <2 x i64> %3
    760 }
    761 
    762 define void @__rsAllocationVLoadXImpl_ulong4(<4 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    763   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    764   %2 = bitcast i8* %1 to <4 x i64>*
    765   %3 = load <4 x i64>, <4 x i64>* %2, align 8
    766   store <4 x i64> %3, <4 x i64>* %agg.result
    767   ret void
    768 }
    769 define void @__rsAllocationVLoadXImpl_ulong3(<3 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    770   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    771   %2 = bitcast i8* %1 to <3 x i64>*
    772   %3 = load <3 x i64>, <3 x i64>* %2, align 8
    773   store <3 x i64> %3, <3 x i64>* %agg.result
    774   ret void
    775 }
    776 define <2 x i64> @__rsAllocationVLoadXImpl_ulong2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    777   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    778   %2 = bitcast i8* %1 to <2 x i64>*
    779   %3 = load <2 x i64>, <2 x i64>* %2, align 8
    780   ret <2 x i64> %3
    781 }
    782 
    783 define <4 x i32> @__rsAllocationVLoadXImpl_int4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    784   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    785   %2 = bitcast i8* %1 to <4 x i32>*
    786   %3 = load <4 x i32>, <4 x i32>* %2, align 4
    787   ret <4 x i32> %3
    788 }
    789 define <3 x i32> @__rsAllocationVLoadXImpl_int3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    790   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    791   %2 = bitcast i8* %1 to <3 x i32>*
    792   %3 = load <3 x i32>, <3 x i32>* %2, align 4
    793   ret <3 x i32> %3
    794 }
    795 define <2 x i32> @__rsAllocationVLoadXImpl_int2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    796   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    797   %2 = bitcast i8* %1 to <2 x i32>*
    798   %3 = load <2 x i32>, <2 x i32>* %2, align 4
    799   ret <2 x i32> %3
    800 }
    801 
    802 define <4 x i32> @__rsAllocationVLoadXImpl_uint4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    803   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    804   %2 = bitcast i8* %1 to <4 x i32>*
    805   %3 = load <4 x i32>, <4 x i32>* %2, align 4
    806   ret <4 x i32> %3
    807 }
    808 define <3 x i32> @__rsAllocationVLoadXImpl_uint3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    809   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    810   %2 = bitcast i8* %1 to <3 x i32>*
    811   %3 = load <3 x i32>, <3 x i32>* %2, align 4
    812   ret <3 x i32> %3
    813 }
    814 define <2 x i32> @__rsAllocationVLoadXImpl_uint2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    815   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    816   %2 = bitcast i8* %1 to <2 x i32>*
    817   %3 = load <2 x i32>, <2 x i32>* %2, align 4
    818   ret <2 x i32> %3
    819 }
    820 
    821 define <4 x i16> @__rsAllocationVLoadXImpl_short4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    822   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    823   %2 = bitcast i8* %1 to <4 x i16>*
    824   %3 = load <4 x i16>, <4 x i16>* %2, align 2
    825   ret <4 x i16> %3
    826 }
    827 define <3 x i16> @__rsAllocationVLoadXImpl_short3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    828   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    829   %2 = bitcast i8* %1 to <3 x i16>*
    830   %3 = load <3 x i16>, <3 x i16>* %2, align 2
    831   ret <3 x i16> %3
    832 }
    833 define <2 x i16> @__rsAllocationVLoadXImpl_short2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    834   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    835   %2 = bitcast i8* %1 to <2 x i16>*
    836   %3 = load <2 x i16>, <2 x i16>* %2, align 2
    837   ret <2 x i16> %3
    838 }
    839 
    840 define <4 x i16> @__rsAllocationVLoadXImpl_ushort4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    841   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    842   %2 = bitcast i8* %1 to <4 x i16>*
    843   %3 = load <4 x i16>, <4 x i16>* %2, align 2
    844   ret <4 x i16> %3
    845 }
    846 define <3 x i16> @__rsAllocationVLoadXImpl_ushort3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    847   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    848   %2 = bitcast i8* %1 to <3 x i16>*
    849   %3 = load <3 x i16>, <3 x i16>* %2, align 2
    850   ret <3 x i16> %3
    851 }
    852 define <2 x i16> @__rsAllocationVLoadXImpl_ushort2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    853   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    854   %2 = bitcast i8* %1 to <2 x i16>*
    855   %3 = load <2 x i16>, <2 x i16>* %2, align 2
    856   ret <2 x i16> %3
    857 }
    858 
    859 define <4 x i8> @__rsAllocationVLoadXImpl_char4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    860   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    861   %2 = bitcast i8* %1 to <4 x i8>*
    862   %3 = load <4 x i8>, <4 x i8>* %2, align 1
    863   ret <4 x i8> %3
    864 }
    865 define <3 x i8> @__rsAllocationVLoadXImpl_char3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    866   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    867   %2 = bitcast i8* %1 to <3 x i8>*
    868   %3 = load <3 x i8>, <3 x i8>* %2, align 1
    869   ret <3 x i8> %3
    870 }
    871 define <2 x i8> @__rsAllocationVLoadXImpl_char2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    872   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    873   %2 = bitcast i8* %1 to <2 x i8>*
    874   %3 = load <2 x i8>, <2 x i8>* %2, align 1
    875   ret <2 x i8> %3
    876 }
    877 
    878 define <4 x i8> @__rsAllocationVLoadXImpl_uchar4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    879   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    880   %2 = bitcast i8* %1 to <4 x i8>*
    881   %3 = load <4 x i8>, <4 x i8>* %2, align 1
    882   ret <4 x i8> %3
    883 }
    884 define <3 x i8> @__rsAllocationVLoadXImpl_uchar3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    885   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    886   %2 = bitcast i8* %1 to <3 x i8>*
    887   %3 = load <3 x i8>, <3 x i8>* %2, align 1
    888   ret <3 x i8> %3
    889 }
    890 define <2 x i8> @__rsAllocationVLoadXImpl_uchar2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    891   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    892   %2 = bitcast i8* %1 to <2 x i8>*
    893   %3 = load <2 x i8>, <2 x i8>* %2, align 1
    894   ret <2 x i8> %3
    895 }
    896 
    897 define <4 x float> @__rsAllocationVLoadXImpl_float4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    898   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    899   %2 = bitcast i8* %1 to <4 x float>*
    900   %3 = load <4 x float>, <4 x float>* %2, align 4
    901   ret <4 x float> %3
    902 }
    903 define <3 x float> @__rsAllocationVLoadXImpl_float3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    904   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    905   %2 = bitcast i8* %1 to <3 x float>*
    906   %3 = load <3 x float>, <3 x float>* %2, align 4
    907   ret <3 x float> %3
    908 }
    909 define <2 x float> @__rsAllocationVLoadXImpl_float2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    910   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    911   %2 = bitcast i8* %1 to <2 x float>*
    912   %3 = load <2 x float>, <2 x float>* %2, align 4
    913   ret <2 x float> %3
    914 }
    915 
    916 define void @__rsAllocationVLoadXImpl_double4(<4 x double>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    917   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    918   %2 = bitcast i8* %1 to <4 x double>*
    919   %3 = load <4 x double>, <4 x double>* %2, align 8
    920   store <4 x double> %3, <4 x double>* %agg.result
    921   ret void
    922 }
    923 define void @__rsAllocationVLoadXImpl_double3(<3 x double>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    924   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    925   %2 = bitcast i8* %1 to <3 x double>*
    926   %3 = load <3 x double>, <3 x double>* %2, align 8
    927   store <3 x double> %3, <3 x double>* %agg.result
    928   ret void
    929 }
    930 define <2 x double> @__rsAllocationVLoadXImpl_double2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    931   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    932   %2 = bitcast i8* %1 to <2 x double>*
    933   %3 = load <2 x double>, <2 x double>* %2, align 8
    934   ret <2 x double> %3
    935 }
    936 
    937 
    938 define void @__rsAllocationVStoreXImpl_long4(%struct.rs_allocation* nocapture readonly %a, <4 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    939   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    940   %2 = load <4 x i64>, <4 x i64>* %val
    941   %3 = bitcast i8* %1 to <4 x i64>*
    942   store <4 x i64> %2, <4 x i64>* %3, align 8
    943   ret void
    944 }
    945 define void @__rsAllocationVStoreXImpl_long3(%struct.rs_allocation* nocapture readonly %a, <3 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    946   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    947   %2 = load <3 x i64>, <3 x i64>* %val
    948   %3 = bitcast i8* %1 to <3 x i64>*
    949   store <3 x i64> %2, <3 x i64>* %3, align 8
    950   ret void
    951 }
    952 define void @__rsAllocationVStoreXImpl_long2(%struct.rs_allocation* nocapture readonly %a, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    953   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    954   %2 = bitcast i8* %1 to <2 x i64>*
    955   store <2 x i64> %val, <2 x i64>* %2, align 8
    956   ret void
    957 }
    958 
    959 define void @__rsAllocationVStoreXImpl_ulong4(%struct.rs_allocation* nocapture readonly %a, <4 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    960   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    961   %2 = load <4 x i64>, <4 x i64>* %val
    962   %3 = bitcast i8* %1 to <4 x i64>*
    963   store <4 x i64> %2, <4 x i64>* %3, align 8
    964   ret void
    965 }
    966 define void @__rsAllocationVStoreXImpl_ulong3(%struct.rs_allocation* nocapture readonly %a, <3 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    967   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    968   %2 = load <3 x i64>, <3 x i64>* %val
    969   %3 = bitcast i8* %1 to <3 x i64>*
    970   store <3 x i64> %2, <3 x i64>* %3, align 8
    971   ret void
    972 }
    973 define void @__rsAllocationVStoreXImpl_ulong2(%struct.rs_allocation* nocapture readonly %a, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    974   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    975   %2 = bitcast i8* %1 to <2 x i64>*
    976   store <2 x i64> %val, <2 x i64>* %2, align 8
    977   ret void
    978 }
    979 
    980 define void @__rsAllocationVStoreXImpl_int4(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    981   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    982   %2 = bitcast i8* %1 to <4 x i32>*
    983   store <4 x i32> %val, <4 x i32>* %2, align 4
    984   ret void
    985 }
    986 define void @__rsAllocationVStoreXImpl_int3(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    987   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    988   %2 = bitcast i8* %1 to <3 x i32>*
    989   %3 = shufflevector <4 x i32> %val, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    990   store <3 x i32> %3, <3 x i32>* %2, align 4
    991   ret void
    992 }
    993 define void @__rsAllocationVStoreXImpl_int2(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    994   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    995   %2 = bitcast i8* %1 to <2 x i32>*
    996   store <2 x i32> %val, <2 x i32>* %2, align 4
    997   ret void
    998 }
    999 
   1000 define void @__rsAllocationVStoreXImpl_uint4(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
   1001   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1002   %2 = bitcast i8* %1 to <4 x i32>*
   1003   store <4 x i32> %val, <4 x i32>* %2, align 4
   1004   ret void
   1005 }
   1006 define void @__rsAllocationVStoreXImpl_uint3(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
   1007   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1008   %2 = bitcast i8* %1 to <3 x i32>*
   1009   %3 = shufflevector <4 x i32> %val, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
   1010   store <3 x i32> %3, <3 x i32>* %2, align 4
   1011   ret void
   1012 }
   1013 define void @__rsAllocationVStoreXImpl_uint2(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
   1014   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1015   %2 = bitcast i8* %1 to <2 x i32>*
   1016   store <2 x i32> %val, <2 x i32>* %2, align 4
   1017   ret void
   1018 }
   1019 
   1020 define void @__rsAllocationVStoreXImpl_short4(%struct.rs_allocation* nocapture readonly %a, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
   1021   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1022   %2 = bitcast i8* %1 to <4 x i16>*
   1023   store <4 x i16> %val, <4 x i16>* %2, align 2
   1024   ret void
   1025 }
   1026 define void @__rsAllocationVStoreXImpl_short3(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
   1027   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1028   %2 = bitcast i8* %1 to <3 x i16>*
   1029   %3 = bitcast <2 x i32> %val to <4 x i16>
   1030   %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <3 x i32> <i32 0, i32 1, i32 2>
   1031   store <3 x i16> %4, <3 x i16>* %2, align 2
   1032   ret void
   1033 }
   1034 define void @__rsAllocationVStoreXImpl_short2(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
   1035   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1036   %2 = bitcast i8* %1 to <2 x i16>*
   1037   %3 = bitcast i32 %val to <2 x i16>
   1038   store <2 x i16> %3, <2 x i16>* %2, align 2
   1039   ret void
   1040 }
   1041 
   1042 define void @__rsAllocationVStoreXImpl_ushort4(%struct.rs_allocation* nocapture readonly %a, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
   1043   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1044   %2 = bitcast i8* %1 to <4 x i16>*
   1045   store <4 x i16> %val, <4 x i16>* %2, align 2
   1046   ret void
   1047 }
   1048 define void @__rsAllocationVStoreXImpl_ushort3(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
   1049   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1050   %2 = bitcast i8* %1 to <3 x i16>*
   1051   %3 = bitcast <2 x i32> %val to <4 x i16>
   1052   %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <3 x i32> <i32 0, i32 1, i32 2>
   1053   store <3 x i16> %4, <3 x i16>* %2, align 2
   1054   ret void
   1055 }
   1056 define void @__rsAllocationVStoreXImpl_ushort2(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
   1057   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1058   %2 = bitcast i8* %1 to <2 x i16>*
   1059   %3 = bitcast i32 %val to <2 x i16>
   1060   store <2 x i16> %3, <2 x i16>* %2, align 2
   1061   ret void
   1062 }
   1063 
   1064 define void @__rsAllocationVStoreXImpl_char4(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
   1065   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1066   %2 = bitcast i8* %1 to <4 x i8>*
   1067   %3 = bitcast i32 %val to <4 x i8>
   1068   store <4 x i8> %3, <4 x i8>* %2, align 1
   1069   ret void
   1070 }
   1071 define void @__rsAllocationVStoreXImpl_char3(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
   1072   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1073   %2 = bitcast i8* %1 to <3 x i8>*
   1074   %3 = bitcast i32 %val to <4 x i8>
   1075   %4 = shufflevector <4 x i8> %3, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
   1076   store <3 x i8> %4, <3 x i8>* %2, align 1
   1077   ret void
   1078 }
   1079 define void @__rsAllocationVStoreXImpl_char2(%struct.rs_allocation* nocapture readonly %a, i16 %val, i32 %x, i32 %y, i32 %z) #1 {
   1080   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1081   %2 = bitcast i8* %1 to <2 x i8>*
   1082   %3 = bitcast i16 %val to <2 x i8>
   1083   store <2 x i8> %3, <2 x i8>* %2, align 8
   1084   ret void
   1085 }
   1086 
   1087 define void @__rsAllocationVStoreXImpl_uchar4(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
   1088   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1089   %2 = bitcast i8* %1 to <4 x i8>*
   1090   %3 = bitcast i32 %val to <4 x i8>
   1091   store <4 x i8> %3, <4 x i8>* %2, align 1
   1092   ret void
   1093 }
   1094 define void @__rsAllocationVStoreXImpl_uchar3(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
   1095   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1096   %2 = bitcast i8* %1 to <3 x i8>*
   1097   %3 = bitcast i32 %val to <4 x i8>
   1098   %4 = shufflevector <4 x i8> %3, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
   1099   store <3 x i8> %4, <3 x i8>* %2, align 1
   1100   ret void
   1101 }
   1102 define void @__rsAllocationVStoreXImpl_uchar2(%struct.rs_allocation* nocapture readonly %a, i16 %val, i32 %x, i32 %y, i32 %z) #1 {
   1103   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1104   %2 = bitcast i8* %1 to <2 x i8>*
   1105   %3 = bitcast i16 %val to <2 x i8>
   1106   store <2 x i8> %3, <2 x i8>* %2, align 8
   1107   ret void
   1108 }
   1109 
   1110 define void @__rsAllocationVStoreXImpl_float4(%struct.rs_allocation* nocapture readonly %a, <4 x float> %val, i32 %x, i32 %y, i32 %z) #1 {
   1111   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1112   %2 = bitcast i8* %1 to <4 x float>*
   1113   store <4 x float> %val, <4 x float>* %2, align 4
   1114   ret void
   1115 }
   1116 define void @__rsAllocationVStoreXImpl_float3(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
   1117   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1118   %2 = bitcast i8* %1 to <3 x float>*
   1119   %3 = bitcast <4 x i32> %val to <4 x float>
   1120   %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
   1121   store <3 x float> %4, <3 x float>* %2, align 4
   1122   ret void
   1123 }
   1124 define void @__rsAllocationVStoreXImpl_float2(%struct.rs_allocation* nocapture readonly %a, <2 x float> %val, i32 %x, i32 %y, i32 %z) #1 {
   1125   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1126   %2 = bitcast i8* %1 to <2 x float>*
   1127   store <2 x float> %val, <2 x float>* %2, align 4
   1128   ret void
   1129 }
   1130 
   1131 define void @__rsAllocationVStoreXImpl_double4(%struct.rs_allocation* nocapture readonly %a, <4 x double>* %val, i32 %x, i32 %y, i32 %z) #1 {
   1132   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1133   %2 = load <4 x double>, <4 x double>* %val
   1134   %3 = bitcast i8* %1 to <4 x double>*
   1135   store <4 x double> %2, <4 x double>* %3, align 8
   1136   ret void
   1137 }
   1138 define void @__rsAllocationVStoreXImpl_double3(%struct.rs_allocation* nocapture readonly %a, <3 x double>* %val, i32 %x, i32 %y, i32 %z) #1 {
   1139   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1140   %2 = load <3 x double>, <3 x double>* %val
   1141   %3 = bitcast i8* %1 to <3 x double>*
   1142   store <3 x double> %2, <3 x double>* %3, align 8
   1143   ret void
   1144 }
   1145 define void @__rsAllocationVStoreXImpl_double2(%struct.rs_allocation* nocapture readonly %a, <2 x double> %val, i32 %x, i32 %y, i32 %z) #1 {
   1146   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1147   %2 = bitcast i8* %1 to <2 x double>*
   1148   store <2 x double> %val, <2 x double>* %2, align 8
   1149   ret void
   1150 }
   1151 
   1152 
   1153 attributes #0 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
   1154 attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
   1155 attributes #2 = { nobuiltin }
   1156 
   1157