Home | History | Annotate | Download | only in ll64
      1 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
      2 target triple = "aarch64-linux-android"
      3 
      4 %struct.rs_allocation = type { i64*, i64*, i64*, i64* }
      5 
      6 declare i8* @rsOffset(%struct.rs_allocation* nocapture readonly %a, i32 %sizeOf, i32 %x, i32 %y, i32 %z)
      7 declare i8* @rsOffsetNs(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z)
      8 
      9 ; The loads and stores in this file are annotated with RenderScript-specific
     10 ; information for the type based alias analysis, such that the TBAA analysis
     11 ; understands that loads and stores from two allocations with different types
     12 ; can never access the same memory element. This is different from C, where
     13 ; a char or uchar load/store is special as it can alias with about everything.
     14 ;
     15 ; The TBAA tree in this file has the the node "RenderScript TBAA" as its root.
     16 ; This means all loads/stores that share this common root can be proven to not
     17 ; alias. However, the alias analysis still has to assume MayAlias between
     18 ; memory accesses in this file and memory accesses annotated with the C/C++
     19 ; TBAA metadata.
     20 ; If we can ensure that all accesses to elements loaded from RenderScript
     21 ; allocations are either annotated with the RenderScript TBAA information or
     22 ; not annotated at all, but never annotated with the C/C++ metadata, we
     23 ; can add the RenderScript TBAA tree under the C/C++ TBAA tree. This enables
     24 ; then the TBAA to prove that an access to data from the RenderScript allocation
     25 ; does not alias with a load/store accessing something not part of a RenderScript
     26 ; allocation.
     27 
     28 
     29 !14 = metadata !{metadata !"RenderScript TBAA"}
     30 !15 = metadata !{metadata !"allocation", metadata !14}
     31 
     32 !21 = metadata !{metadata !"char", metadata !15}
     33 define void @rsSetElementAtImpl_char(%struct.rs_allocation* nocapture readonly %a, i8 signext %val, i32 %x, i32 %y, i32 %z) #1 {
     34   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 1, i32 %x, i32 %y, i32 %z) #2
     35   store i8 %val, i8* %1, align 1, !tbaa !21
     36   ret void
     37 }
     38 
     39 define signext i8 @rsGetElementAtImpl_char(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
     40   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 1, i32 %x, i32 %y, i32 %z) #2
     41   %2 = load i8* %1, align 1, !tbaa !21
     42   ret i8 %2
     43 }
     44 
     45 !22 = metadata !{metadata !"char2", metadata !15}
     46 define void @rsSetElementAtImpl_char2(%struct.rs_allocation* nocapture readonly %a, i16 %val, i32 %x, i32 %y, i32 %z) #1 {
     47   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
     48   %2 = bitcast i8* %1 to <2 x i8>*
     49   %3 = bitcast i16 %val to <2 x i8>
     50   store <2 x i8> %3, <2 x i8>* %2, align 2, !tbaa !26
     51   ret void
     52 }
     53 
     54 define <2 x i8> @rsGetElementAtImpl_char2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
     55   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
     56   %2 = bitcast i8* %1 to <2 x i8>*
     57   %3 = load <2 x i8>* %2, align 2, !tbaa !22
     58   ret <2 x i8> %3
     59 }
     60 
     61 !23 = metadata !{metadata !"char3", metadata !15}
     62 define void @rsSetElementAtImpl_char3(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
     63   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
     64   %2 = bitcast i32 %val to <4 x i8>
     65   %3 = shufflevector <4 x i8> %2, <4 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
     66   %4 = bitcast i8* %1 to <4 x i8>*
     67   store <4 x i8> %3, <4 x i8>* %4, align 4, !tbaa !27
     68   ret void
     69 }
     70 
     71 define <3 x i8> @rsGetElementAtImpl_char3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
     72   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
     73   %2 = bitcast i8* %1 to <4 x i8>*
     74   %3 = load <4 x i8>* %2, align 4, !tbaa !23
     75   %4 = shufflevector <4 x i8> %3, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
     76   ret <3 x i8> %4
     77 }
     78 
     79 !24 = metadata !{metadata !"char4", metadata !15}
     80 define void @rsSetElementAtImpl_char4(%struct.rs_allocation* nocapture readonly %a, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
     81   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
     82   %2 = bitcast i8* %1 to <4 x i8>*
     83   store <4 x i8> %val, <4 x i8>* %2, align 4, !tbaa !24
     84   ret void
     85 }
     86 
     87 define <4 x i8> @rsGetElementAtImpl_char4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
     88   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
     89   %2 = bitcast i8* %1 to <4 x i8>*
     90   %3 = load <4 x i8>* %2, align 4, !tbaa !24
     91   ret <4 x i8> %3
     92 }
     93 
     94 !25 = metadata !{metadata !"uchar", metadata !15}
     95 define void @rsSetElementAtImpl_uchar(%struct.rs_allocation* nocapture readonly %a, i8 zeroext %val, i32 %x, i32 %y, i32 %z) #1 {
     96   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 1, i32 %x, i32 %y, i32 %z) #2
     97   store i8 %val, i8* %1, align 1, !tbaa !25
     98   ret void
     99 }
    100 
    101 define zeroext i8 @rsGetElementAtImpl_uchar(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    102   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 1, i32 %x, i32 %y, i32 %z) #2
    103   %2 = load i8* %1, align 1, !tbaa !25
    104   ret i8 %2
    105 }
    106 
    107 !26 = metadata !{metadata !"uchar2", metadata !15}
    108 define void @rsSetElementAtImpl_uchar2(%struct.rs_allocation* nocapture readonly %a, i16 %val, i32 %x, i32 %y, i32 %z) #1 {
    109   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
    110   %2 = bitcast i8* %1 to <2 x i8>*
    111   %3 = bitcast i16 %val to <2 x i8>
    112   store <2 x i8> %3, <2 x i8>* %2, align 2, !tbaa !26
    113   ret void
    114 }
    115 
    116 define <2 x i8> @rsGetElementAtImpl_uchar2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    117   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
    118   %2 = bitcast i8* %1 to <2 x i8>*
    119   %3 = load <2 x i8>* %2, align 2, !tbaa !26
    120   ret <2 x i8> %3
    121 }
    122 
    123 !27 = metadata !{metadata !"uchar3", metadata !15}
    124 define void @rsSetElementAtImpl_uchar3(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
    125   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    126   %2 = bitcast i32 %val to <4 x i8>
    127   %3 = shufflevector <4 x i8> %2, <4 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    128   %4 = bitcast i8* %1 to <4 x i8>*
    129   store <4 x i8> %3, <4 x i8>* %4, align 4, !tbaa !27
    130   ret void
    131 }
    132 
    133 define <3 x i8> @rsGetElementAtImpl_uchar3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    134   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    135   %2 = bitcast i8* %1 to <4 x i8>*
    136   %3 = load <4 x i8>* %2, align 4, !tbaa !27
    137   %4 = shufflevector <4 x i8> %3, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
    138   ret <3 x i8> %4
    139 }
    140 
    141 !28 = metadata !{metadata !"uchar4", metadata !15}
    142 define void @rsSetElementAtImpl_uchar4(%struct.rs_allocation* nocapture readonly %a, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
    143   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    144   %2 = bitcast i8* %1 to <4 x i8>*
    145   store <4 x i8> %val, <4 x i8>* %2, align 4, !tbaa !28
    146   ret void
    147 }
    148 
    149 define <4 x i8> @rsGetElementAtImpl_uchar4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    150   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    151   %2 = bitcast i8* %1 to <4 x i8>*
    152   %3 = load <4 x i8>* %2, align 4, !tbaa !28
    153   ret <4 x i8> %3
    154 }
    155 
    156 !29 = metadata !{metadata !"short", metadata !15}
    157 define void @rsSetElementAtImpl_short(%struct.rs_allocation* nocapture readonly %a, i16 signext %val, i32 %x, i32 %y, i32 %z) #1 {
    158   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
    159   %2 = bitcast i8* %1 to i16*
    160   store i16 %val, i16* %2, align 2, !tbaa !29
    161   ret void
    162 }
    163 
    164 define signext i16 @rsGetElementAtImpl_short(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    165   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
    166   %2 = bitcast i8* %1 to i16*
    167   %3 = load i16* %2, align 2, !tbaa !29
    168   ret i16 %3
    169 }
    170 
    171 !30 = metadata !{metadata !"short2", metadata !15}
    172 define void @rsSetElementAtImpl_short2(%struct.rs_allocation* nocapture readonly %a, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
    173   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    174   %2 = bitcast i8* %1 to <2 x i16>*
    175   store <2 x i16> %val, <2 x i16>* %2, align 4, !tbaa !30
    176   ret void
    177 }
    178 
    179 define <2 x i16> @rsGetElementAtImpl_short2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    180   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    181   %2 = bitcast i8* %1 to <2 x i16>*
    182   %3 = load <2 x i16>* %2, align 4, !tbaa !30
    183   ret <2 x i16> %3
    184 }
    185 
    186 !31 = metadata !{metadata !"short3", metadata !15}
    187 define void @rsSetElementAtImpl_short3(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    188   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    189   %2 = bitcast <2 x i32> %val to <4 x i16>
    190   %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    191   %4 = bitcast i8* %1 to <4 x i16>*
    192   store <4 x i16> %3, <4 x i16>* %4, align 8, !tbaa !31
    193   ret void
    194 }
    195 
    196 define <3 x i16> @rsGetElementAtImpl_short3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    197   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    198   %2 = bitcast i8* %1 to <4 x i16>*
    199   %3 = load <4 x i16>* %2, align 8, !tbaa !31
    200   %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <3 x i32> <i32 0, i32 1, i32 2>
    201   ret <3 x i16> %4
    202 }
    203 
    204 !32 = metadata !{metadata !"short4", metadata !15}
    205 define void @rsSetElementAtImpl_short4(%struct.rs_allocation* nocapture readonly %a, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
    206   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    207   %2 = bitcast i8* %1 to <4 x i16>*
    208   store <4 x i16> %val, <4 x i16>* %2, align 8, !tbaa !32
    209   ret void
    210 }
    211 
    212 define <4 x i16> @rsGetElementAtImpl_short4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    213   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    214   %2 = bitcast i8* %1 to <4 x i16>*
    215   %3 = load <4 x i16>* %2, align 8, !tbaa !32
    216   ret <4 x i16> %3
    217 }
    218 
    219 !33 = metadata !{metadata !"ushort", metadata !15}
    220 define void @rsSetElementAtImpl_ushort(%struct.rs_allocation* nocapture readonly %a, i16 zeroext %val, i32 %x, i32 %y, i32 %z) #1 {
    221   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
    222   %2 = bitcast i8* %1 to i16*
    223   store i16 %val, i16* %2, align 2, !tbaa !33
    224   ret void
    225 }
    226 
    227 define zeroext i16 @rsGetElementAtImpl_ushort(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    228   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
    229   %2 = bitcast i8* %1 to i16*
    230   %3 = load i16* %2, align 2, !tbaa !33
    231   ret i16 %3
    232 }
    233 
    234 !34 = metadata !{metadata !"ushort2", metadata !15}
    235 define void @rsSetElementAtImpl_ushort2(%struct.rs_allocation* nocapture readonly %a, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
    236   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    237   %2 = bitcast i8* %1 to <2 x i16>*
    238   store <2 x i16> %val, <2 x i16>* %2, align 4, !tbaa !34
    239   ret void
    240 }
    241 
    242 define <2 x i16> @rsGetElementAtImpl_ushort2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    243   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    244   %2 = bitcast i8* %1 to <2 x i16>*
    245   %3 = load <2 x i16>* %2, align 4, !tbaa !34
    246   ret <2 x i16> %3
    247 }
    248 
    249 !35 = metadata !{metadata !"ushort3", metadata !15}
    250 define void @rsSetElementAtImpl_ushort3(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    251   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    252   %2 = bitcast <2 x i32> %val to <4 x i16>
    253   %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    254   %4 = bitcast i8* %1 to <4 x i16>*
    255   store <4 x i16> %3, <4 x i16>* %4, align 8, !tbaa !31
    256   ret void
    257 }
    258 
    259 define <3 x i16> @rsGetElementAtImpl_ushort3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    260   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    261   %2 = bitcast i8* %1 to <4 x i16>*
    262   %3 = load <4 x i16>* %2, align 8, !tbaa !35
    263   %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <3 x i32> <i32 0, i32 1, i32 2>
    264   ret <3 x i16> %4
    265 }
    266 
    267 !36 = metadata !{metadata !"ushort4", metadata !15}
    268 define void @rsSetElementAtImpl_ushort4(%struct.rs_allocation* nocapture readonly %a, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
    269   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    270   %2 = bitcast i8* %1 to <4 x i16>*
    271   store <4 x i16> %val, <4 x i16>* %2, align 8, !tbaa !36
    272   ret void
    273 }
    274 
    275 define <4 x i16> @rsGetElementAtImpl_ushort4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    276   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    277   %2 = bitcast i8* %1 to <4 x i16>*
    278   %3 = load <4 x i16>* %2, align 8, !tbaa !36
    279   ret <4 x i16> %3
    280 }
    281 
    282 !37 = metadata !{metadata !"int", metadata !15}
    283 define void @rsSetElementAtImpl_int(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
    284   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    285   %2 = bitcast i8* %1 to i32*
    286   store i32 %val, i32* %2, align 4, !tbaa !37
    287   ret void
    288 }
    289 
    290 define i32 @rsGetElementAtImpl_int(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    291   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    292   %2 = bitcast i8* %1 to i32*
    293   %3 = load i32* %2, align 4, !tbaa !37
    294   ret i32 %3
    295 }
    296 
    297 !38 = metadata !{metadata !"int2", metadata !15}
    298 define void @rsSetElementAtImpl_int2(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    299   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    300   %2 = bitcast i8* %1 to <2 x i32>*
    301   store <2 x i32> %val, <2 x i32>* %2, align 8, !tbaa !38
    302   ret void
    303 }
    304 
    305 define <2 x i32> @rsGetElementAtImpl_int2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    306   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    307   %2 = bitcast i8* %1 to <2 x i32>*
    308   %3 = load <2 x i32>* %2, align 8, !tbaa !38
    309   ret <2 x i32> %3
    310 }
    311 
    312 !39 = metadata !{metadata !"int3", metadata !15}
    313 define void @rsSetElementAtImpl_int3(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    314   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    315   %2 = shufflevector <4 x i32> %val, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    316   %3 = bitcast i8* %1 to <4 x i32>*
    317   store <4 x i32> %2, <4 x i32>* %3, align 16, !tbaa !39
    318   ret void
    319 }
    320 
    321 define <3 x i32> @rsGetElementAtImpl_int3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    322   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    323   %2 = bitcast i8* %1 to <4 x i32>*
    324   %3 = load <4 x i32>* %2, align 8, !tbaa !39
    325   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    326   ret <3 x i32> %4
    327 }
    328 
    329 !40 = metadata !{metadata !"int4", metadata !15}
    330 define void @rsSetElementAtImpl_int4(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    331   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    332   %2 = bitcast i8* %1 to <4 x i32>*
    333   store <4 x i32> %val, <4 x i32>* %2, align 16, !tbaa !40
    334   ret void
    335 }
    336 
    337 define <4 x i32> @rsGetElementAtImpl_int4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    338   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    339   %2 = bitcast i8* %1 to <4 x i32>*
    340   %3 = load <4 x i32>* %2, align 16, !tbaa !40
    341   ret <4 x i32> %3
    342 }
    343 
    344 !41 = metadata !{metadata !"uint", metadata !15}
    345 define void @rsSetElementAtImpl_uint(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
    346   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    347   %2 = bitcast i8* %1 to i32*
    348   store i32 %val, i32* %2, align 4, !tbaa !41
    349   ret void
    350 }
    351 
    352 define i32 @rsGetElementAtImpl_uint(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    353   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    354   %2 = bitcast i8* %1 to i32*
    355   %3 = load i32* %2, align 4, !tbaa !41
    356   ret i32 %3
    357 }
    358 
    359 !42 = metadata !{metadata !"uint2", metadata !15}
    360 define void @rsSetElementAtImpl_uint2(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    361   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    362   %2 = bitcast i8* %1 to <2 x i32>*
    363   store <2 x i32> %val, <2 x i32>* %2, align 8, !tbaa !42
    364   ret void
    365 }
    366 
    367 define <2 x i32> @rsGetElementAtImpl_uint2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    368   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    369   %2 = bitcast i8* %1 to <2 x i32>*
    370   %3 = load <2 x i32>* %2, align 8, !tbaa !42
    371   ret <2 x i32> %3
    372 }
    373 
    374 !43 = metadata !{metadata !"uint3", metadata !15}
    375 define void @rsSetElementAtImpl_uint3(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    376   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    377   %2 = shufflevector <4 x i32> %val, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    378   %3 = bitcast i8* %1 to <4 x i32>*
    379   store <4 x i32> %2, <4 x i32>* %3, align 16, !tbaa !43
    380   ret void
    381 }
    382 
    383 define <3 x i32> @rsGetElementAtImpl_uint3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    384   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    385   %2 = bitcast i8* %1 to <4 x i32>*
    386   %3 = load <4 x i32>* %2, align 8, !tbaa !43
    387   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    388   ret <3 x i32> %4
    389 }
    390 
    391 !44 = metadata !{metadata !"uint4", metadata !15}
    392 define void @rsSetElementAtImpl_uint4(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    393   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    394   %2 = bitcast i8* %1 to <4 x i32>*
    395   store <4 x i32> %val, <4 x i32>* %2, align 16, !tbaa !44
    396   ret void
    397 }
    398 
    399 define <4 x i32> @rsGetElementAtImpl_uint4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    400   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    401   %2 = bitcast i8* %1 to <4 x i32>*
    402   %3 = load <4 x i32>* %2, align 16, !tbaa !44
    403   ret <4 x i32> %3
    404 }
    405 
    406 !45 = metadata !{metadata !"long", metadata !15}
    407 define void @rsSetElementAtImpl_long(%struct.rs_allocation* nocapture readonly %a, i64 %val, i32 %x, i32 %y, i32 %z) #1 {
    408   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    409   %2 = bitcast i8* %1 to i64*
    410   store i64 %val, i64* %2, align 8, !tbaa !45
    411   ret void
    412 }
    413 
    414 define i64 @rsGetElementAtImpl_long(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    415   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    416   %2 = bitcast i8* %1 to i64*
    417   %3 = load i64* %2, align 8, !tbaa !45
    418   ret i64 %3
    419 }
    420 
    421 !46 = metadata !{metadata !"long2", metadata !15}
    422 define void @rsSetElementAtImpl_long2(%struct.rs_allocation* nocapture readonly %a, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    423   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    424   %2 = bitcast i8* %1 to <2 x i64>*
    425   store <2 x i64> %val, <2 x i64>* %2, align 16, !tbaa !46
    426   ret void
    427 }
    428 
    429 define <2 x i64> @rsGetElementAtImpl_long2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    430   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    431   %2 = bitcast i8* %1 to <2 x i64>*
    432   %3 = load <2 x i64>* %2, align 16, !tbaa !46
    433   ret <2 x i64> %3
    434 }
    435 
    436 !47 = metadata !{metadata !"long3", metadata !15}
    437 define void @rsSetElementAtImpl_long3(%struct.rs_allocation* nocapture readonly %a, <3 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    438   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    439   %2 = load <3 x i64>* %val
    440   %3 = shufflevector <3 x i64> %2, <3 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    441   %4 = bitcast i8* %1 to <4 x i64>*
    442   store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !47
    443   ret void
    444 }
    445 
    446 define void @rsGetElementAtImpl_long3(<3 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    447   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    448   %2 = bitcast i8* %1 to <4 x i64>*
    449   %3 = load <4 x i64>* %2, align 32
    450   %4 = bitcast <3 x i64>* %agg.result to <4 x i64>*
    451   store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !47
    452   ret void
    453 }
    454 
    455 !48 = metadata !{metadata !"long4", metadata !15}
    456 define void @rsSetElementAtImpl_long4(%struct.rs_allocation* nocapture readonly %a, <4 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    457   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    458   %2 = load <4 x i64>* %val
    459   %3 = bitcast i8* %1 to <4 x i64>*
    460   store <4 x i64> %2, <4 x i64>* %3, align 32, !tbaa !48
    461   ret void
    462 }
    463 
    464 define void @rsGetElementAtImpl_long4(<4 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    465   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    466   %2 = bitcast i8* %1 to <4 x i64>*
    467   %3 = load <4 x i64>* %2, align 32, !tbaa !15
    468   store <4 x i64> %3, <4 x i64>* %agg.result, align 32, !tbaa !48
    469   ret void
    470 }
    471 
    472 !49 = metadata !{metadata !"ulong", metadata !15}
    473 define void @rsSetElementAtImpl_ulong(%struct.rs_allocation* nocapture readonly %a, i64 %val, i32 %x, i32 %y, i32 %z) #1 {
    474   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    475   %2 = bitcast i8* %1 to i64*
    476   store i64 %val, i64* %2, align 8, !tbaa !49
    477   ret void
    478 }
    479 
    480 define i64 @rsGetElementAtImpl_ulong(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    481   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    482   %2 = bitcast i8* %1 to i64*
    483   %3 = load i64* %2, align 8, !tbaa !49
    484   ret i64 %3
    485 }
    486 
    487 !50 = metadata !{metadata !"ulong2", metadata !15}
    488 define void @rsSetElementAtImpl_ulong2(%struct.rs_allocation* nocapture readonly %a, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    489   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    490   %2 = bitcast i8* %1 to <2 x i64>*
    491   store <2 x i64> %val, <2 x i64>* %2, align 16, !tbaa !50
    492   ret void
    493 }
    494 
    495 define <2 x i64> @rsGetElementAtImpl_ulong2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    496   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    497   %2 = bitcast i8* %1 to <2 x i64>*
    498   %3 = load <2 x i64>* %2, align 16, !tbaa !50
    499   ret <2 x i64> %3
    500 }
    501 
    502 !51 = metadata !{metadata !"ulong3", metadata !15}
    503 define void @rsSetElementAtImpl_ulong3(%struct.rs_allocation* nocapture readonly %a, <3 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    504   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    505   %2 = load <3 x i64>* %val
    506   %3 = shufflevector <3 x i64> %2, <3 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    507   %4 = bitcast i8* %1 to <4 x i64>*
    508   store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !47
    509   ret void
    510 }
    511 
    512 define void @rsGetElementAtImpl_ulong3(<3 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    513   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    514   %2 = bitcast i8* %1 to <4 x i64>*
    515   %3 = load <4 x i64>* %2, align 32
    516   %4 = bitcast <3 x i64>* %agg.result to <4 x i64>*
    517   store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !51
    518   ret void
    519 }
    520 
    521 !52 = metadata !{metadata !"ulong4", metadata !15}
    522 define void @rsSetElementAtImpl_ulong4(%struct.rs_allocation* nocapture readonly %a, <4 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    523   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    524   %2 = load <4 x i64>* %val
    525   %3 = bitcast i8* %1 to <4 x i64>*
    526   store <4 x i64> %2, <4 x i64>* %3, align 32, !tbaa !52
    527   ret void
    528 }
    529 
    530 define void @rsGetElementAtImpl_ulong4(<4 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    531   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    532   %2 = bitcast i8* %1 to <4 x i64>*
    533   %3 = load <4 x i64>* %2, align 32, !tbaa !15
    534   store <4 x i64> %3, <4 x i64>* %agg.result, align 32, !tbaa !52
    535   ret void
    536 }
    537 
    538 !53 = metadata !{metadata !"float", metadata !15}
    539 define void @rsSetElementAtImpl_float(%struct.rs_allocation* nocapture readonly %a, float %val, i32 %x, i32 %y, i32 %z) #1 {
    540   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    541   %2 = bitcast i8* %1 to float*
    542   store float %val, float* %2, align 4, !tbaa !53
    543   ret void
    544 }
    545 
    546 define float @rsGetElementAtImpl_float(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    547   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    548   %2 = bitcast i8* %1 to float*
    549   %3 = load float* %2, align 4, !tbaa !53
    550   ret float %3
    551 }
    552 
    553 !54 = metadata !{metadata !"float2", metadata !15}
    554 define void @rsSetElementAtImpl_float2(%struct.rs_allocation* nocapture readonly %a, <2 x float> %val, i32 %x, i32 %y, i32 %z) #1 {
    555   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    556   %2 = bitcast i8* %1 to <2 x float>*
    557   store <2 x float> %val, <2 x float>* %2, align 8, !tbaa !54
    558   ret void
    559 }
    560 
    561 define <2 x float> @rsGetElementAtImpl_float2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    562   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    563   %2 = bitcast i8* %1 to <2 x float>*
    564   %3 = load <2 x float>* %2, align 8, !tbaa !54
    565   ret <2 x float> %3
    566 }
    567 
    568 !55 = metadata !{metadata !"float3", metadata !15}
    569 define void @rsSetElementAtImpl_float3(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    570   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    571   %2 = bitcast <4 x i32> %val to <4 x float>
    572   %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    573   %4 = bitcast i8* %1 to <4 x float>*
    574   store <4 x float> %3, <4 x float>* %4, align 16, !tbaa !55
    575   ret void
    576 }
    577 
    578 define <3 x float> @rsGetElementAtImpl_float3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    579   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    580   %2 = bitcast i8* %1 to <4 x float>*
    581   %3 = load <4 x float>* %2, align 8, !tbaa !55
    582   %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
    583   ret <3 x float> %4
    584 }
    585 
    586 !56 = metadata !{metadata !"float4", metadata !15}
    587 define void @rsSetElementAtImpl_float4(%struct.rs_allocation* nocapture readonly %a, <4 x float> %val, i32 %x, i32 %y, i32 %z) #1 {
    588   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    589   %2 = bitcast i8* %1 to <4 x float>*
    590   store <4 x float> %val, <4 x float>* %2, align 16, !tbaa !56
    591   ret void
    592 }
    593 
    594 define <4 x float> @rsGetElementAtImpl_float4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    595   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    596   %2 = bitcast i8* %1 to <4 x float>*
    597   %3 = load <4 x float>* %2, align 16, !tbaa !56
    598   ret <4 x float> %3
    599 }
    600 
    601 !57 = metadata !{metadata !"double", metadata !15}
    602 define void @rsSetElementAtImpl_double(%struct.rs_allocation* nocapture readonly %a, double %val, i32 %x, i32 %y, i32 %z) #1 {
    603   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    604   %2 = bitcast i8* %1 to double*
    605   store double %val, double* %2, align 8, !tbaa !57
    606   ret void
    607 }
    608 
    609 define double @rsGetElementAtImpl_double(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    610   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    611   %2 = bitcast i8* %1 to double*
    612   %3 = load double* %2, align 8, !tbaa !57
    613   ret double %3
    614 }
    615 
    616 !58 = metadata !{metadata !"double2", metadata !15}
    617 define void @rsSetElementAtImpl_double2(%struct.rs_allocation* nocapture readonly %a, <2 x double> %val, i32 %x, i32 %y, i32 %z) #1 {
    618   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    619   %2 = bitcast i8* %1 to <2 x double>*
    620   store <2 x double> %val, <2 x double>* %2, align 16, !tbaa !58
    621   ret void
    622 }
    623 
    624 define <2 x double> @rsGetElementAtImpl_double2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    625   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    626   %2 = bitcast i8* %1 to <2 x double>*
    627   %3 = load <2 x double>* %2, align 16, !tbaa !58
    628   ret <2 x double> %3
    629 }
    630 
    631 !59 = metadata !{metadata !"double3", metadata !15}
    632 define void @rsSetElementAtImpl_double3(%struct.rs_allocation* nocapture readonly %a, <3 x double>* %val, i32 %x, i32 %y, i32 %z) #1 {
    633   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    634   %2 = load <3 x double>* %val
    635   %3 = shufflevector <3 x double> %2, <3 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    636   %4 = bitcast i8* %1 to <4 x double>*
    637   store <4 x double> %3, <4 x double>* %4, align 32, !tbaa !47
    638   ret void
    639 }
    640 
    641 
    642 define void @rsGetElementAtImpl_double3(<3 x double>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    643   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    644   %2 = bitcast i8* %1 to <4 x double>*
    645   %3 = load <4 x double>* %2, align 32
    646   %4 = bitcast <3 x double>* %agg.result to <4 x double>*
    647   store <4 x double> %3, <4 x double>* %4, align 32, !tbaa !59
    648   ret void
    649 }
    650 
    651 !60 = metadata !{metadata !"double4", metadata !15}
    652 define void @rsSetElementAtImpl_double4(%struct.rs_allocation* nocapture readonly %a, <4 x double>* %val, i32 %x, i32 %y, i32 %z) #1 {
    653   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    654   %2 = load <4 x double>* %val
    655   %3 = bitcast i8* %1 to <4 x double>*
    656   store <4 x double> %2, <4 x double>* %3, align 32, !tbaa !60
    657   ret void
    658 }
    659 define void @rsGetElementAtImpl_double4(<4 x double>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    660   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    661   %2 = bitcast i8* %1 to <4 x double>*
    662   %3 = load <4 x double>* %2, align 32, !tbaa !15
    663   store <4 x double> %3, <4 x double>* %agg.result, align 32, !tbaa !60
    664   ret void
    665 }
    666 
    667 
    668 define void @__rsAllocationVLoadXImpl_long4(<4 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    669   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    670   %2 = bitcast i8* %1 to <4 x i64>*
    671   %3 = load <4 x i64>* %2, align 8
    672   store <4 x i64> %3, <4 x i64>* %agg.result
    673   ret void
    674 }
    675 define void @__rsAllocationVLoadXImpl_long3(<3 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    676   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    677   %2 = bitcast i8* %1 to <3 x i64>*
    678   %3 = load <3 x i64>* %2, align 8
    679   store <3 x i64> %3, <3 x i64>* %agg.result
    680   ret void
    681 }
    682 define <2 x i64> @__rsAllocationVLoadXImpl_long2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    683   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    684   %2 = bitcast i8* %1 to <2 x i64>*
    685   %3 = load <2 x i64>* %2, align 8
    686   ret <2 x i64> %3
    687 }
    688 
    689 define void @__rsAllocationVLoadXImpl_ulong4(<4 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    690   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    691   %2 = bitcast i8* %1 to <4 x i64>*
    692   %3 = load <4 x i64>* %2, align 8
    693   store <4 x i64> %3, <4 x i64>* %agg.result
    694   ret void
    695 }
    696 define void @__rsAllocationVLoadXImpl_ulong3(<3 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    697   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    698   %2 = bitcast i8* %1 to <3 x i64>*
    699   %3 = load <3 x i64>* %2, align 8
    700   store <3 x i64> %3, <3 x i64>* %agg.result
    701   ret void
    702 }
    703 define <2 x i64> @__rsAllocationVLoadXImpl_ulong2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    704   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    705   %2 = bitcast i8* %1 to <2 x i64>*
    706   %3 = load <2 x i64>* %2, align 8
    707   ret <2 x i64> %3
    708 }
    709 
    710 define <4 x i32> @__rsAllocationVLoadXImpl_int4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    711   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    712   %2 = bitcast i8* %1 to <4 x i32>*
    713   %3 = load <4 x i32>* %2, align 4
    714   ret <4 x i32> %3
    715 }
    716 define <3 x i32> @__rsAllocationVLoadXImpl_int3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    717   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    718   %2 = bitcast i8* %1 to <3 x i32>*
    719   %3 = load <3 x i32>* %2, align 4
    720   ret <3 x i32> %3
    721 }
    722 define <2 x i32> @__rsAllocationVLoadXImpl_int2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    723   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    724   %2 = bitcast i8* %1 to <2 x i32>*
    725   %3 = load <2 x i32>* %2, align 4
    726   ret <2 x i32> %3
    727 }
    728 
    729 define <4 x i32> @__rsAllocationVLoadXImpl_uint4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    730   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    731   %2 = bitcast i8* %1 to <4 x i32>*
    732   %3 = load <4 x i32>* %2, align 4
    733   ret <4 x i32> %3
    734 }
    735 define <3 x i32> @__rsAllocationVLoadXImpl_uint3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    736   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    737   %2 = bitcast i8* %1 to <3 x i32>*
    738   %3 = load <3 x i32>* %2, align 4
    739   ret <3 x i32> %3
    740 }
    741 define <2 x i32> @__rsAllocationVLoadXImpl_uint2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    742   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    743   %2 = bitcast i8* %1 to <2 x i32>*
    744   %3 = load <2 x i32>* %2, align 4
    745   ret <2 x i32> %3
    746 }
    747 
    748 define <4 x i16> @__rsAllocationVLoadXImpl_short4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    749   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    750   %2 = bitcast i8* %1 to <4 x i16>*
    751   %3 = load <4 x i16>* %2, align 2
    752   ret <4 x i16> %3
    753 }
    754 define <3 x i16> @__rsAllocationVLoadXImpl_short3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    755   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    756   %2 = bitcast i8* %1 to <3 x i16>*
    757   %3 = load <3 x i16>* %2, align 2
    758   ret <3 x i16> %3
    759 }
    760 define <2 x i16> @__rsAllocationVLoadXImpl_short2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    761   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    762   %2 = bitcast i8* %1 to <2 x i16>*
    763   %3 = load <2 x i16>* %2, align 2
    764   ret <2 x i16> %3
    765 }
    766 
    767 define <4 x i16> @__rsAllocationVLoadXImpl_ushort4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    768   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    769   %2 = bitcast i8* %1 to <4 x i16>*
    770   %3 = load <4 x i16>* %2, align 2
    771   ret <4 x i16> %3
    772 }
    773 define <3 x i16> @__rsAllocationVLoadXImpl_ushort3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    774   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    775   %2 = bitcast i8* %1 to <3 x i16>*
    776   %3 = load <3 x i16>* %2, align 2
    777   ret <3 x i16> %3
    778 }
    779 define <2 x i16> @__rsAllocationVLoadXImpl_ushort2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    780   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    781   %2 = bitcast i8* %1 to <2 x i16>*
    782   %3 = load <2 x i16>* %2, align 2
    783   ret <2 x i16> %3
    784 }
    785 
    786 define <4 x i8> @__rsAllocationVLoadXImpl_char4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    787   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    788   %2 = bitcast i8* %1 to <4 x i8>*
    789   %3 = load <4 x i8>* %2, align 1
    790   ret <4 x i8> %3
    791 }
    792 define <3 x i8> @__rsAllocationVLoadXImpl_char3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    793   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    794   %2 = bitcast i8* %1 to <3 x i8>*
    795   %3 = load <3 x i8>* %2, align 1
    796   ret <3 x i8> %3
    797 }
    798 define <2 x i8> @__rsAllocationVLoadXImpl_char2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    799   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    800   %2 = bitcast i8* %1 to <2 x i8>*
    801   %3 = load <2 x i8>* %2, align 1
    802   ret <2 x i8> %3
    803 }
    804 
    805 define <4 x i8> @__rsAllocationVLoadXImpl_uchar4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    806   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    807   %2 = bitcast i8* %1 to <4 x i8>*
    808   %3 = load <4 x i8>* %2, align 1
    809   ret <4 x i8> %3
    810 }
    811 define <3 x i8> @__rsAllocationVLoadXImpl_uchar3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    812   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    813   %2 = bitcast i8* %1 to <3 x i8>*
    814   %3 = load <3 x i8>* %2, align 1
    815   ret <3 x i8> %3
    816 }
    817 define <2 x i8> @__rsAllocationVLoadXImpl_uchar2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    818   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    819   %2 = bitcast i8* %1 to <2 x i8>*
    820   %3 = load <2 x i8>* %2, align 1
    821   ret <2 x i8> %3
    822 }
    823 
    824 define <4 x float> @__rsAllocationVLoadXImpl_float4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    825   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    826   %2 = bitcast i8* %1 to <4 x float>*
    827   %3 = load <4 x float>* %2, align 4
    828   ret <4 x float> %3
    829 }
    830 define <3 x float> @__rsAllocationVLoadXImpl_float3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    831   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    832   %2 = bitcast i8* %1 to <3 x float>*
    833   %3 = load <3 x float>* %2, align 4
    834   ret <3 x float> %3
    835 }
    836 define <2 x float> @__rsAllocationVLoadXImpl_float2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    837   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    838   %2 = bitcast i8* %1 to <2 x float>*
    839   %3 = load <2 x float>* %2, align 4
    840   ret <2 x float> %3
    841 }
    842 
    843 define void @__rsAllocationVLoadXImpl_double4(<4 x double>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    844   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    845   %2 = bitcast i8* %1 to <4 x double>*
    846   %3 = load <4 x double>* %2, align 8
    847   store <4 x double> %3, <4 x double>* %agg.result
    848   ret void
    849 }
    850 define void @__rsAllocationVLoadXImpl_double3(<3 x double>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    851   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    852   %2 = bitcast i8* %1 to <3 x double>*
    853   %3 = load <3 x double>* %2, align 8
    854   store <3 x double> %3, <3 x double>* %agg.result
    855   ret void
    856 }
    857 define <2 x double> @__rsAllocationVLoadXImpl_double2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    858   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    859   %2 = bitcast i8* %1 to <2 x double>*
    860   %3 = load <2 x double>* %2, align 8
    861   ret <2 x double> %3
    862 }
    863 
    864 
    865 define void @__rsAllocationVStoreXImpl_long4(%struct.rs_allocation* nocapture readonly %a, <4 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    866   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    867   %2 = load <4 x i64>* %val
    868   %3 = bitcast i8* %1 to <4 x i64>*
    869   store <4 x i64> %2, <4 x i64>* %3, align 8
    870   ret void
    871 }
    872 define void @__rsAllocationVStoreXImpl_long3(%struct.rs_allocation* nocapture readonly %a, <3 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    873   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    874   %2 = load <3 x i64>* %val
    875   %3 = bitcast i8* %1 to <3 x i64>*
    876   store <3 x i64> %2, <3 x i64>* %3, align 8
    877   ret void
    878 }
    879 define void @__rsAllocationVStoreXImpl_long2(%struct.rs_allocation* nocapture readonly %a, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    880   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    881   %2 = bitcast i8* %1 to <2 x i64>*
    882   store <2 x i64> %val, <2 x i64>* %2, align 8
    883   ret void
    884 }
    885 
    886 define void @__rsAllocationVStoreXImpl_ulong4(%struct.rs_allocation* nocapture readonly %a, <4 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    887   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    888   %2 = load <4 x i64>* %val
    889   %3 = bitcast i8* %1 to <4 x i64>*
    890   store <4 x i64> %2, <4 x i64>* %3, align 8
    891   ret void
    892 }
    893 define void @__rsAllocationVStoreXImpl_ulong3(%struct.rs_allocation* nocapture readonly %a, <3 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    894   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    895   %2 = load <3 x i64>* %val
    896   %3 = bitcast i8* %1 to <3 x i64>*
    897   store <3 x i64> %2, <3 x i64>* %3, align 8
    898   ret void
    899 }
    900 define void @__rsAllocationVStoreXImpl_ulong2(%struct.rs_allocation* nocapture readonly %a, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    901   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    902   %2 = bitcast i8* %1 to <2 x i64>*
    903   store <2 x i64> %val, <2 x i64>* %2, align 8
    904   ret void
    905 }
    906 
    907 define void @__rsAllocationVStoreXImpl_int4(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    908   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    909   %2 = bitcast i8* %1 to <4 x i32>*
    910   store <4 x i32> %val, <4 x i32>* %2, align 4
    911   ret void
    912 }
    913 define void @__rsAllocationVStoreXImpl_int3(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    914   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    915   %2 = bitcast i8* %1 to <3 x i32>*
    916   %3 = shufflevector <4 x i32> %val, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    917   store <3 x i32> %3, <3 x i32>* %2, align 4
    918   ret void
    919 }
    920 define void @__rsAllocationVStoreXImpl_int2(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    921   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    922   %2 = bitcast i8* %1 to <2 x i32>*
    923   store <2 x i32> %val, <2 x i32>* %2, align 4
    924   ret void
    925 }
    926 
    927 define void @__rsAllocationVStoreXImpl_uint4(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    928   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    929   %2 = bitcast i8* %1 to <4 x i32>*
    930   store <4 x i32> %val, <4 x i32>* %2, align 4
    931   ret void
    932 }
    933 define void @__rsAllocationVStoreXImpl_uint3(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    934   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    935   %2 = bitcast i8* %1 to <3 x i32>*
    936   %3 = shufflevector <4 x i32> %val, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    937   store <3 x i32> %3, <3 x i32>* %2, align 4
    938   ret void
    939 }
    940 define void @__rsAllocationVStoreXImpl_uint2(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    941   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    942   %2 = bitcast i8* %1 to <2 x i32>*
    943   store <2 x i32> %val, <2 x i32>* %2, align 4
    944   ret void
    945 }
    946 
    947 define void @__rsAllocationVStoreXImpl_short4(%struct.rs_allocation* nocapture readonly %a, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
    948   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    949   %2 = bitcast i8* %1 to <4 x i16>*
    950   store <4 x i16> %val, <4 x i16>* %2, align 2
    951   ret void
    952 }
    953 define void @__rsAllocationVStoreXImpl_short3(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    954   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    955   %2 = bitcast i8* %1 to <3 x i16>*
    956   %3 = bitcast <2 x i32> %val to <4 x i16>
    957   %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <3 x i32> <i32 0, i32 1, i32 2>
    958   store <3 x i16> %4, <3 x i16>* %2, align 2
    959   ret void
    960 }
    961 define void @__rsAllocationVStoreXImpl_short2(%struct.rs_allocation* nocapture readonly %a, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
    962   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    963   %2 = bitcast i8* %1 to <2 x i16>*
    964   store <2 x i16> %val, <2 x i16>* %2, align 2
    965   ret void
    966 }
    967 
    968 define void @__rsAllocationVStoreXImpl_ushort4(%struct.rs_allocation* nocapture readonly %a, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
    969   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    970   %2 = bitcast i8* %1 to <4 x i16>*
    971   store <4 x i16> %val, <4 x i16>* %2, align 2
    972   ret void
    973 }
    974 define void @__rsAllocationVStoreXImpl_ushort3(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    975   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    976   %2 = bitcast i8* %1 to <3 x i16>*
    977   %3 = bitcast <2 x i32> %val to <4 x i16>
    978   %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <3 x i32> <i32 0, i32 1, i32 2>
    979   store <3 x i16> %4, <3 x i16>* %2, align 2
    980   ret void
    981 }
    982 define void @__rsAllocationVStoreXImpl_ushort2(%struct.rs_allocation* nocapture readonly %a, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
    983   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    984   %2 = bitcast i8* %1 to <2 x i16>*
    985   store <2 x i16> %val, <2 x i16>* %2, align 2
    986   ret void
    987 }
    988 
    989 define void @__rsAllocationVStoreXImpl_char4(%struct.rs_allocation* nocapture readonly %a, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
    990   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    991   %2 = bitcast i8* %1 to <4 x i8>*
    992   store <4 x i8> %val, <4 x i8>* %2, align 1
    993   ret void
    994 }
    995 define void @__rsAllocationVStoreXImpl_char3(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
    996   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    997   %2 = bitcast i8* %1 to <3 x i8>*
    998   %3 = bitcast i32 %val to <4 x i8>
    999   %4 = shufflevector <4 x i8> %3, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
   1000   store <3 x i8> %4, <3 x i8>* %2, align 1
   1001   ret void
   1002 }
   1003 define void @__rsAllocationVStoreXImpl_char2(%struct.rs_allocation* nocapture readonly %a, i16 %val, i32 %x, i32 %y, i32 %z) #1 {
   1004   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1005   %2 = bitcast i8* %1 to <2 x i8>*
   1006   %3 = bitcast i16 %val to <2 x i8>
   1007   store <2 x i8> %3, <2 x i8>* %2, align 8
   1008   ret void
   1009 }
   1010 
   1011 define void @__rsAllocationVStoreXImpl_uchar4(%struct.rs_allocation* nocapture readonly %a, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
   1012   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1013   %2 = bitcast i8* %1 to <4 x i8>*
   1014   store <4 x i8> %val, <4 x i8>* %2, align 1
   1015   ret void
   1016 }
   1017 define void @__rsAllocationVStoreXImpl_uchar3(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
   1018   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1019   %2 = bitcast i8* %1 to <3 x i8>*
   1020   %3 = bitcast i32 %val to <4 x i8>
   1021   %4 = shufflevector <4 x i8> %3, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
   1022   store <3 x i8> %4, <3 x i8>* %2, align 1
   1023   ret void
   1024 }
   1025 define void @__rsAllocationVStoreXImpl_uchar2(%struct.rs_allocation* nocapture readonly %a, i16 %val, i32 %x, i32 %y, i32 %z) #1 {
   1026   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1027   %2 = bitcast i8* %1 to <2 x i8>*
   1028   %3 = bitcast i16 %val to <2 x i8>
   1029   store <2 x i8> %3, <2 x i8>* %2, align 8
   1030   ret void
   1031 }
   1032 
   1033 define void @__rsAllocationVStoreXImpl_float4(%struct.rs_allocation* nocapture readonly %a, <4 x float> %val, i32 %x, i32 %y, i32 %z) #1 {
   1034   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1035   %2 = bitcast i8* %1 to <4 x float>*
   1036   store <4 x float> %val, <4 x float>* %2, align 4
   1037   ret void
   1038 }
   1039 define void @__rsAllocationVStoreXImpl_float3(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
   1040   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1041   %2 = bitcast i8* %1 to <3 x float>*
   1042   %3 = bitcast <4 x i32> %val to <4 x float>
   1043   %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
   1044   store <3 x float> %4, <3 x float>* %2, align 4
   1045   ret void
   1046 }
   1047 define void @__rsAllocationVStoreXImpl_float2(%struct.rs_allocation* nocapture readonly %a, <2 x float> %val, i32 %x, i32 %y, i32 %z) #1 {
   1048   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1049   %2 = bitcast i8* %1 to <2 x float>*
   1050   store <2 x float> %val, <2 x float>* %2, align 4
   1051   ret void
   1052 }
   1053 
   1054 define void @__rsAllocationVStoreXImpl_double4(%struct.rs_allocation* nocapture readonly %a, <4 x double>* %val, i32 %x, i32 %y, i32 %z) #1 {
   1055   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1056   %2 = load <4 x double>* %val
   1057   %3 = bitcast i8* %1 to <4 x double>*
   1058   store <4 x double> %2, <4 x double>* %3, align 8
   1059   ret void
   1060 }
   1061 define void @__rsAllocationVStoreXImpl_double3(%struct.rs_allocation* nocapture readonly %a, <3 x double>* %val, i32 %x, i32 %y, i32 %z) #1 {
   1062   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1063   %2 = load <3 x double>* %val
   1064   %3 = bitcast i8* %1 to <3 x double>*
   1065   store <3 x double> %2, <3 x double>* %3, align 8
   1066   ret void
   1067 }
   1068 define void @__rsAllocationVStoreXImpl_double2(%struct.rs_allocation* nocapture readonly %a, <2 x double> %val, i32 %x, i32 %y, i32 %z) #1 {
   1069   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1070   %2 = bitcast i8* %1 to <2 x double>*
   1071   store <2 x double> %val, <2 x double>* %2, align 8
   1072   ret void
   1073 }
   1074 
   1075 
   1076 attributes #0 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
   1077 attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
   1078 attributes #2 = { nobuiltin }
   1079 
   1080