Home | History | Annotate | Download | only in ll64
      1 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
      2 target triple = "aarch64-linux-android"
      3 
      4 %struct.rs_allocation = type { i64*, i64*, i64*, i64* }
      5 
      6 declare i8* @rsOffset(%struct.rs_allocation* nocapture readonly %a, i32 %sizeOf, i32 %x, i32 %y, i32 %z)
      7 declare i8* @rsOffsetNs(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z)
      8 
      9 ; The loads and stores in this file are annotated with RenderScript-specific
     10 ; information for the type based alias analysis, such that the TBAA analysis
     11 ; understands that loads and stores from two allocations with different types
     12 ; can never access the same memory element. This is different from C, where
     13 ; a char or uchar load/store is special as it can alias with about everything.
     14 ;
     15 ; The TBAA tree in this file has the the node "RenderScript Distinct TBAA" as
     16 ; its root.
     17 ; This means all loads/stores that share this common root can be proven to not
     18 ; alias. However, the alias analysis still has to assume MayAlias between
     19 ; memory accesses in this file and memory accesses annotated with the C/C++
     20 ; TBAA metadata.
     21 ; A node named "RenderScript TBAA" wraps our distinct TBAA root node.
     22 ; If we can ensure that all accesses to elements loaded from RenderScript
     23 ; allocations are either annotated with the RenderScript TBAA information or
     24 ; not annotated at all, but never annotated with the C/C++ metadata, we
     25 ; can add the "RenderScript TBAA" tree under the C/C++ TBAA tree. This enables
     26 ; TBAA to prove that an access to data from the RenderScript allocation
     27 ; does not alias with a load/store accessing something not part of a RenderScript
     28 ; allocation.
     29 ; We do this by swapping the second operand of "RenderScript TBAA" with the node
     30 ; for "Simple C/C++ TBAA", thus connecting these TBAA groups. The other root
     31 ; node (with no children) can then safely be dropped from the analysis.
     32 
     33 !13 = !{!"RenderScript Distinct TBAA"}
     34 !14 = !{!"RenderScript TBAA", !13}
     35 !15 = !{!"allocation", !14}
     36 
     37 !21 = !{!"char", !15}
     38 define void @rsSetElementAtImpl_char(%struct.rs_allocation* nocapture readonly %a, i8 signext %val, i32 %x, i32 %y, i32 %z) #1 {
     39   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 1, i32 %x, i32 %y, i32 %z) #2
     40   store i8 %val, i8* %1, align 1, !tbaa !21
     41   ret void
     42 }
     43 
     44 define signext i8 @rsGetElementAtImpl_char(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
     45   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 1, i32 %x, i32 %y, i32 %z) #2
     46   %2 = load i8, i8* %1, align 1, !tbaa !21
     47   ret i8 %2
     48 }
     49 
     50 !22 = !{!"char2", !15}
     51 define void @rsSetElementAtImpl_char2(%struct.rs_allocation* nocapture readonly %a, i16 %val, i32 %x, i32 %y, i32 %z) #1 {
     52   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
     53   %2 = bitcast i8* %1 to <2 x i8>*
     54   %3 = bitcast i16 %val to <2 x i8>
     55   store <2 x i8> %3, <2 x i8>* %2, align 2, !tbaa !22
     56   ret void
     57 }
     58 
     59 define <2 x i8> @rsGetElementAtImpl_char2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
     60   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
     61   %2 = bitcast i8* %1 to <2 x i8>*
     62   %3 = load <2 x i8>, <2 x i8>* %2, align 2, !tbaa !22
     63   ret <2 x i8> %3
     64 }
     65 
     66 !23 = !{!"char3", !15}
     67 define void @rsSetElementAtImpl_char3(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
     68   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
     69   %2 = bitcast i32 %val to <4 x i8>
     70   %3 = shufflevector <4 x i8> %2, <4 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
     71   %4 = bitcast i8* %1 to <4 x i8>*
     72   store <4 x i8> %3, <4 x i8>* %4, align 4, !tbaa !23
     73   ret void
     74 }
     75 
     76 define <3 x i8> @rsGetElementAtImpl_char3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
     77   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
     78   %2 = bitcast i8* %1 to <4 x i8>*
     79   %3 = load <4 x i8>, <4 x i8>* %2, align 4, !tbaa !23
     80   %4 = shufflevector <4 x i8> %3, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
     81   ret <3 x i8> %4
     82 }
     83 
     84 !24 = !{!"char4", !15}
     85 define void @rsSetElementAtImpl_char4(%struct.rs_allocation* nocapture readonly %a, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
     86   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
     87   %2 = bitcast i8* %1 to <4 x i8>*
     88   store <4 x i8> %val, <4 x i8>* %2, align 4, !tbaa !24
     89   ret void
     90 }
     91 
     92 define <4 x i8> @rsGetElementAtImpl_char4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
     93   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
     94   %2 = bitcast i8* %1 to <4 x i8>*
     95   %3 = load <4 x i8>, <4 x i8>* %2, align 4, !tbaa !24
     96   ret <4 x i8> %3
     97 }
     98 
     99 !25 = !{!"uchar", !15}
    100 define void @rsSetElementAtImpl_uchar(%struct.rs_allocation* nocapture readonly %a, i8 zeroext %val, i32 %x, i32 %y, i32 %z) #1 {
    101   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 1, i32 %x, i32 %y, i32 %z) #2
    102   store i8 %val, i8* %1, align 1, !tbaa !25
    103   ret void
    104 }
    105 
    106 define zeroext i8 @rsGetElementAtImpl_uchar(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    107   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 1, i32 %x, i32 %y, i32 %z) #2
    108   %2 = load i8, i8* %1, align 1, !tbaa !25
    109   ret i8 %2
    110 }
    111 
    112 !26 = !{!"uchar2", !15}
    113 define void @rsSetElementAtImpl_uchar2(%struct.rs_allocation* nocapture readonly %a, i16 %val, i32 %x, i32 %y, i32 %z) #1 {
    114   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
    115   %2 = bitcast i8* %1 to <2 x i8>*
    116   %3 = bitcast i16 %val to <2 x i8>
    117   store <2 x i8> %3, <2 x i8>* %2, align 2, !tbaa !26
    118   ret void
    119 }
    120 
    121 define <2 x i8> @rsGetElementAtImpl_uchar2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    122   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
    123   %2 = bitcast i8* %1 to <2 x i8>*
    124   %3 = load <2 x i8>, <2 x i8>* %2, align 2, !tbaa !26
    125   ret <2 x i8> %3
    126 }
    127 
    128 !27 = !{!"uchar3", !15}
    129 define void @rsSetElementAtImpl_uchar3(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
    130   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    131   %2 = bitcast i32 %val to <4 x i8>
    132   %3 = shufflevector <4 x i8> %2, <4 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    133   %4 = bitcast i8* %1 to <4 x i8>*
    134   store <4 x i8> %3, <4 x i8>* %4, align 4, !tbaa !27
    135   ret void
    136 }
    137 
    138 define <3 x i8> @rsGetElementAtImpl_uchar3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    139   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    140   %2 = bitcast i8* %1 to <4 x i8>*
    141   %3 = load <4 x i8>, <4 x i8>* %2, align 4, !tbaa !27
    142   %4 = shufflevector <4 x i8> %3, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
    143   ret <3 x i8> %4
    144 }
    145 
    146 !28 = !{!"uchar4", !15}
    147 define void @rsSetElementAtImpl_uchar4(%struct.rs_allocation* nocapture readonly %a, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
    148   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    149   %2 = bitcast i8* %1 to <4 x i8>*
    150   store <4 x i8> %val, <4 x i8>* %2, align 4, !tbaa !28
    151   ret void
    152 }
    153 
    154 define <4 x i8> @rsGetElementAtImpl_uchar4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    155   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    156   %2 = bitcast i8* %1 to <4 x i8>*
    157   %3 = load <4 x i8>, <4 x i8>* %2, align 4, !tbaa !28
    158   ret <4 x i8> %3
    159 }
    160 
    161 !29 = !{!"short", !15}
    162 define void @rsSetElementAtImpl_short(%struct.rs_allocation* nocapture readonly %a, i16 signext %val, i32 %x, i32 %y, i32 %z) #1 {
    163   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
    164   %2 = bitcast i8* %1 to i16*
    165   store i16 %val, i16* %2, align 2, !tbaa !29
    166   ret void
    167 }
    168 
    169 define signext i16 @rsGetElementAtImpl_short(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    170   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
    171   %2 = bitcast i8* %1 to i16*
    172   %3 = load i16, i16* %2, align 2, !tbaa !29
    173   ret i16 %3
    174 }
    175 
    176 !30 = !{!"short2", !15}
    177 define void @rsSetElementAtImpl_short2(%struct.rs_allocation* nocapture readonly %a, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
    178   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    179   %2 = bitcast i8* %1 to <2 x i16>*
    180   store <2 x i16> %val, <2 x i16>* %2, align 4, !tbaa !30
    181   ret void
    182 }
    183 
    184 define <2 x i16> @rsGetElementAtImpl_short2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    185   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    186   %2 = bitcast i8* %1 to <2 x i16>*
    187   %3 = load <2 x i16>, <2 x i16>* %2, align 4, !tbaa !30
    188   ret <2 x i16> %3
    189 }
    190 
    191 !31 = !{!"short3", !15}
    192 define void @rsSetElementAtImpl_short3(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    193   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    194   %2 = bitcast <2 x i32> %val to <4 x i16>
    195   %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    196   %4 = bitcast i8* %1 to <4 x i16>*
    197   store <4 x i16> %3, <4 x i16>* %4, align 8, !tbaa !31
    198   ret void
    199 }
    200 
    201 define <3 x i16> @rsGetElementAtImpl_short3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    202   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    203   %2 = bitcast i8* %1 to <4 x i16>*
    204   %3 = load <4 x i16>, <4 x i16>* %2, align 8, !tbaa !31
    205   %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <3 x i32> <i32 0, i32 1, i32 2>
    206   ret <3 x i16> %4
    207 }
    208 
    209 !32 = !{!"short4", !15}
    210 define void @rsSetElementAtImpl_short4(%struct.rs_allocation* nocapture readonly %a, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
    211   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    212   %2 = bitcast i8* %1 to <4 x i16>*
    213   store <4 x i16> %val, <4 x i16>* %2, align 8, !tbaa !32
    214   ret void
    215 }
    216 
    217 define <4 x i16> @rsGetElementAtImpl_short4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    218   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    219   %2 = bitcast i8* %1 to <4 x i16>*
    220   %3 = load <4 x i16>, <4 x i16>* %2, align 8, !tbaa !32
    221   ret <4 x i16> %3
    222 }
    223 
    224 !33 = !{!"ushort", !15}
    225 define void @rsSetElementAtImpl_ushort(%struct.rs_allocation* nocapture readonly %a, i16 zeroext %val, i32 %x, i32 %y, i32 %z) #1 {
    226   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
    227   %2 = bitcast i8* %1 to i16*
    228   store i16 %val, i16* %2, align 2, !tbaa !33
    229   ret void
    230 }
    231 
    232 define zeroext i16 @rsGetElementAtImpl_ushort(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    233   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 2, i32 %x, i32 %y, i32 %z) #2
    234   %2 = bitcast i8* %1 to i16*
    235   %3 = load i16, i16* %2, align 2, !tbaa !33
    236   ret i16 %3
    237 }
    238 
    239 !34 = !{!"ushort2", !15}
    240 define void @rsSetElementAtImpl_ushort2(%struct.rs_allocation* nocapture readonly %a, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
    241   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    242   %2 = bitcast i8* %1 to <2 x i16>*
    243   store <2 x i16> %val, <2 x i16>* %2, align 4, !tbaa !34
    244   ret void
    245 }
    246 
    247 define <2 x i16> @rsGetElementAtImpl_ushort2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    248   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    249   %2 = bitcast i8* %1 to <2 x i16>*
    250   %3 = load <2 x i16>, <2 x i16>* %2, align 4, !tbaa !34
    251   ret <2 x i16> %3
    252 }
    253 
    254 !35 = !{!"ushort3", !15}
    255 define void @rsSetElementAtImpl_ushort3(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    256   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    257   %2 = bitcast <2 x i32> %val to <4 x i16>
    258   %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    259   %4 = bitcast i8* %1 to <4 x i16>*
    260   store <4 x i16> %3, <4 x i16>* %4, align 8, !tbaa !35
    261   ret void
    262 }
    263 
    264 define <3 x i16> @rsGetElementAtImpl_ushort3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    265   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    266   %2 = bitcast i8* %1 to <4 x i16>*
    267   %3 = load <4 x i16>, <4 x i16>* %2, align 8, !tbaa !35
    268   %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <3 x i32> <i32 0, i32 1, i32 2>
    269   ret <3 x i16> %4
    270 }
    271 
    272 !36 = !{!"ushort4", !15}
    273 define void @rsSetElementAtImpl_ushort4(%struct.rs_allocation* nocapture readonly %a, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
    274   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    275   %2 = bitcast i8* %1 to <4 x i16>*
    276   store <4 x i16> %val, <4 x i16>* %2, align 8, !tbaa !36
    277   ret void
    278 }
    279 
    280 define <4 x i16> @rsGetElementAtImpl_ushort4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    281   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    282   %2 = bitcast i8* %1 to <4 x i16>*
    283   %3 = load <4 x i16>, <4 x i16>* %2, align 8, !tbaa !36
    284   ret <4 x i16> %3
    285 }
    286 
    287 !37 = !{!"int", !15}
    288 define void @rsSetElementAtImpl_int(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
    289   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    290   %2 = bitcast i8* %1 to i32*
    291   store i32 %val, i32* %2, align 4, !tbaa !37
    292   ret void
    293 }
    294 
    295 define i32 @rsGetElementAtImpl_int(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    296   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    297   %2 = bitcast i8* %1 to i32*
    298   %3 = load i32, i32* %2, align 4, !tbaa !37
    299   ret i32 %3
    300 }
    301 
    302 !38 = !{!"int2", !15}
    303 define void @rsSetElementAtImpl_int2(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    304   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    305   %2 = bitcast i8* %1 to <2 x i32>*
    306   store <2 x i32> %val, <2 x i32>* %2, align 8, !tbaa !38
    307   ret void
    308 }
    309 
    310 define <2 x i32> @rsGetElementAtImpl_int2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    311   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    312   %2 = bitcast i8* %1 to <2 x i32>*
    313   %3 = load <2 x i32>, <2 x i32>* %2, align 8, !tbaa !38
    314   ret <2 x i32> %3
    315 }
    316 
    317 !39 = !{!"int3", !15}
    318 define void @rsSetElementAtImpl_int3(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    319   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    320   %2 = shufflevector <4 x i32> %val, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    321   %3 = bitcast i8* %1 to <4 x i32>*
    322   store <4 x i32> %2, <4 x i32>* %3, align 16, !tbaa !39
    323   ret void
    324 }
    325 
    326 define <3 x i32> @rsGetElementAtImpl_int3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    327   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    328   %2 = bitcast i8* %1 to <4 x i32>*
    329   %3 = load <4 x i32>, <4 x i32>* %2, align 8, !tbaa !39
    330   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    331   ret <3 x i32> %4
    332 }
    333 
    334 !40 = !{!"int4", !15}
    335 define void @rsSetElementAtImpl_int4(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    336   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    337   %2 = bitcast i8* %1 to <4 x i32>*
    338   store <4 x i32> %val, <4 x i32>* %2, align 16, !tbaa !40
    339   ret void
    340 }
    341 
    342 define <4 x i32> @rsGetElementAtImpl_int4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    343   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    344   %2 = bitcast i8* %1 to <4 x i32>*
    345   %3 = load <4 x i32>, <4 x i32>* %2, align 16, !tbaa !40
    346   ret <4 x i32> %3
    347 }
    348 
    349 !41 = !{!"uint", !15}
    350 define void @rsSetElementAtImpl_uint(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
    351   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    352   %2 = bitcast i8* %1 to i32*
    353   store i32 %val, i32* %2, align 4, !tbaa !41
    354   ret void
    355 }
    356 
    357 define i32 @rsGetElementAtImpl_uint(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    358   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    359   %2 = bitcast i8* %1 to i32*
    360   %3 = load i32, i32* %2, align 4, !tbaa !41
    361   ret i32 %3
    362 }
    363 
    364 !42 = !{!"uint2", !15}
    365 define void @rsSetElementAtImpl_uint2(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    366   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    367   %2 = bitcast i8* %1 to <2 x i32>*
    368   store <2 x i32> %val, <2 x i32>* %2, align 8, !tbaa !42
    369   ret void
    370 }
    371 
    372 define <2 x i32> @rsGetElementAtImpl_uint2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    373   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    374   %2 = bitcast i8* %1 to <2 x i32>*
    375   %3 = load <2 x i32>, <2 x i32>* %2, align 8, !tbaa !42
    376   ret <2 x i32> %3
    377 }
    378 
    379 !43 = !{!"uint3", !15}
    380 define void @rsSetElementAtImpl_uint3(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    381   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    382   %2 = shufflevector <4 x i32> %val, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    383   %3 = bitcast i8* %1 to <4 x i32>*
    384   store <4 x i32> %2, <4 x i32>* %3, align 16, !tbaa !43
    385   ret void
    386 }
    387 
    388 define <3 x i32> @rsGetElementAtImpl_uint3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    389   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    390   %2 = bitcast i8* %1 to <4 x i32>*
    391   %3 = load <4 x i32>, <4 x i32>* %2, align 8, !tbaa !43
    392   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    393   ret <3 x i32> %4
    394 }
    395 
    396 !44 = !{!"uint4", !15}
    397 define void @rsSetElementAtImpl_uint4(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    398   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    399   %2 = bitcast i8* %1 to <4 x i32>*
    400   store <4 x i32> %val, <4 x i32>* %2, align 16, !tbaa !44
    401   ret void
    402 }
    403 
    404 define <4 x i32> @rsGetElementAtImpl_uint4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    405   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    406   %2 = bitcast i8* %1 to <4 x i32>*
    407   %3 = load <4 x i32>, <4 x i32>* %2, align 16, !tbaa !44
    408   ret <4 x i32> %3
    409 }
    410 
    411 !45 = !{!"long", !15}
    412 define void @rsSetElementAtImpl_long(%struct.rs_allocation* nocapture readonly %a, i64 %val, i32 %x, i32 %y, i32 %z) #1 {
    413   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    414   %2 = bitcast i8* %1 to i64*
    415   store i64 %val, i64* %2, align 8, !tbaa !45
    416   ret void
    417 }
    418 
    419 define i64 @rsGetElementAtImpl_long(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    420   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    421   %2 = bitcast i8* %1 to i64*
    422   %3 = load i64, i64* %2, align 8, !tbaa !45
    423   ret i64 %3
    424 }
    425 
    426 !46 = !{!"long2", !15}
    427 define void @rsSetElementAtImpl_long2(%struct.rs_allocation* nocapture readonly %a, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    428   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    429   %2 = bitcast i8* %1 to <2 x i64>*
    430   store <2 x i64> %val, <2 x i64>* %2, align 16, !tbaa !46
    431   ret void
    432 }
    433 
    434 define <2 x i64> @rsGetElementAtImpl_long2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    435   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    436   %2 = bitcast i8* %1 to <2 x i64>*
    437   %3 = load <2 x i64>, <2 x i64>* %2, align 16, !tbaa !46
    438   ret <2 x i64> %3
    439 }
    440 
    441 !47 = !{!"long3", !15}
    442 define void @rsSetElementAtImpl_long3(%struct.rs_allocation* nocapture readonly %a, <3 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    443   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    444   %2 = load <3 x i64>, <3 x i64>* %val
    445   %3 = shufflevector <3 x i64> %2, <3 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    446   %4 = bitcast i8* %1 to <4 x i64>*
    447   store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !47
    448   ret void
    449 }
    450 
    451 define void @rsGetElementAtImpl_long3(<3 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    452   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    453   %2 = bitcast i8* %1 to <4 x i64>*
    454   %3 = load <4 x i64>, <4 x i64>* %2, align 32
    455   %4 = bitcast <3 x i64>* %agg.result to <4 x i64>*
    456   store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !47
    457   ret void
    458 }
    459 
    460 !48 = !{!"long4", !15}
    461 define void @rsSetElementAtImpl_long4(%struct.rs_allocation* nocapture readonly %a, <4 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    462   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    463   %2 = load <4 x i64>, <4 x i64>* %val
    464   %3 = bitcast i8* %1 to <4 x i64>*
    465   store <4 x i64> %2, <4 x i64>* %3, align 32, !tbaa !48
    466   ret void
    467 }
    468 
    469 define void @rsGetElementAtImpl_long4(<4 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    470   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    471   %2 = bitcast i8* %1 to <4 x i64>*
    472   %3 = load <4 x i64>, <4 x i64>* %2, align 32, !tbaa !15
    473   store <4 x i64> %3, <4 x i64>* %agg.result, align 32, !tbaa !48
    474   ret void
    475 }
    476 
    477 !49 = !{!"ulong", !15}
    478 define void @rsSetElementAtImpl_ulong(%struct.rs_allocation* nocapture readonly %a, i64 %val, i32 %x, i32 %y, i32 %z) #1 {
    479   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    480   %2 = bitcast i8* %1 to i64*
    481   store i64 %val, i64* %2, align 8, !tbaa !49
    482   ret void
    483 }
    484 
    485 define i64 @rsGetElementAtImpl_ulong(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    486   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    487   %2 = bitcast i8* %1 to i64*
    488   %3 = load i64, i64* %2, align 8, !tbaa !49
    489   ret i64 %3
    490 }
    491 
    492 !50 = !{!"ulong2", !15}
    493 define void @rsSetElementAtImpl_ulong2(%struct.rs_allocation* nocapture readonly %a, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    494   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    495   %2 = bitcast i8* %1 to <2 x i64>*
    496   store <2 x i64> %val, <2 x i64>* %2, align 16, !tbaa !50
    497   ret void
    498 }
    499 
    500 define <2 x i64> @rsGetElementAtImpl_ulong2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    501   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    502   %2 = bitcast i8* %1 to <2 x i64>*
    503   %3 = load <2 x i64>, <2 x i64>* %2, align 16, !tbaa !50
    504   ret <2 x i64> %3
    505 }
    506 
    507 !51 = !{!"ulong3", !15}
    508 define void @rsSetElementAtImpl_ulong3(%struct.rs_allocation* nocapture readonly %a, <3 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    509   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    510   %2 = load <3 x i64>, <3 x i64>* %val
    511   %3 = shufflevector <3 x i64> %2, <3 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    512   %4 = bitcast i8* %1 to <4 x i64>*
    513   store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !51
    514   ret void
    515 }
    516 
    517 define void @rsGetElementAtImpl_ulong3(<3 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    518   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    519   %2 = bitcast i8* %1 to <4 x i64>*
    520   %3 = load <4 x i64>, <4 x i64>* %2, align 32
    521   %4 = bitcast <3 x i64>* %agg.result to <4 x i64>*
    522   store <4 x i64> %3, <4 x i64>* %4, align 32, !tbaa !51
    523   ret void
    524 }
    525 
    526 !52 = !{!"ulong4", !15}
    527 define void @rsSetElementAtImpl_ulong4(%struct.rs_allocation* nocapture readonly %a, <4 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    528   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    529   %2 = load <4 x i64>, <4 x i64>* %val
    530   %3 = bitcast i8* %1 to <4 x i64>*
    531   store <4 x i64> %2, <4 x i64>* %3, align 32, !tbaa !52
    532   ret void
    533 }
    534 
    535 define void @rsGetElementAtImpl_ulong4(<4 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    536   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    537   %2 = bitcast i8* %1 to <4 x i64>*
    538   %3 = load <4 x i64>, <4 x i64>* %2, align 32, !tbaa !15
    539   store <4 x i64> %3, <4 x i64>* %agg.result, align 32, !tbaa !52
    540   ret void
    541 }
    542 
    543 !53 = !{!"float", !15}
    544 define void @rsSetElementAtImpl_float(%struct.rs_allocation* nocapture readonly %a, float %val, i32 %x, i32 %y, i32 %z) #1 {
    545   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    546   %2 = bitcast i8* %1 to float*
    547   store float %val, float* %2, align 4, !tbaa !53
    548   ret void
    549 }
    550 
    551 define float @rsGetElementAtImpl_float(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    552   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 4, i32 %x, i32 %y, i32 %z) #2
    553   %2 = bitcast i8* %1 to float*
    554   %3 = load float, float* %2, align 4, !tbaa !53
    555   ret float %3
    556 }
    557 
    558 !54 = !{!"float2", !15}
    559 define void @rsSetElementAtImpl_float2(%struct.rs_allocation* nocapture readonly %a, <2 x float> %val, i32 %x, i32 %y, i32 %z) #1 {
    560   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    561   %2 = bitcast i8* %1 to <2 x float>*
    562   store <2 x float> %val, <2 x float>* %2, align 8, !tbaa !54
    563   ret void
    564 }
    565 
    566 define <2 x float> @rsGetElementAtImpl_float2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    567   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    568   %2 = bitcast i8* %1 to <2 x float>*
    569   %3 = load <2 x float>, <2 x float>* %2, align 8, !tbaa !54
    570   ret <2 x float> %3
    571 }
    572 
    573 !55 = !{!"float3", !15}
    574 define void @rsSetElementAtImpl_float3(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    575   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    576   %2 = bitcast <4 x i32> %val to <4 x float>
    577   %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    578   %4 = bitcast i8* %1 to <4 x float>*
    579   store <4 x float> %3, <4 x float>* %4, align 16, !tbaa !55
    580   ret void
    581 }
    582 
    583 define <3 x float> @rsGetElementAtImpl_float3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    584   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    585   %2 = bitcast i8* %1 to <4 x float>*
    586   %3 = load <4 x float>, <4 x float>* %2, align 8, !tbaa !55
    587   %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
    588   ret <3 x float> %4
    589 }
    590 
    591 !56 = !{!"float4", !15}
    592 define void @rsSetElementAtImpl_float4(%struct.rs_allocation* nocapture readonly %a, <4 x float> %val, i32 %x, i32 %y, i32 %z) #1 {
    593   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    594   %2 = bitcast i8* %1 to <4 x float>*
    595   store <4 x float> %val, <4 x float>* %2, align 16, !tbaa !56
    596   ret void
    597 }
    598 
    599 define <4 x float> @rsGetElementAtImpl_float4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    600   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    601   %2 = bitcast i8* %1 to <4 x float>*
    602   %3 = load <4 x float>, <4 x float>* %2, align 16, !tbaa !56
    603   ret <4 x float> %3
    604 }
    605 
    606 !57 = !{!"double", !15}
    607 define void @rsSetElementAtImpl_double(%struct.rs_allocation* nocapture readonly %a, double %val, i32 %x, i32 %y, i32 %z) #1 {
    608   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    609   %2 = bitcast i8* %1 to double*
    610   store double %val, double* %2, align 8, !tbaa !57
    611   ret void
    612 }
    613 
    614 define double @rsGetElementAtImpl_double(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    615   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 8, i32 %x, i32 %y, i32 %z) #2
    616   %2 = bitcast i8* %1 to double*
    617   %3 = load double, double* %2, align 8, !tbaa !57
    618   ret double %3
    619 }
    620 
    621 !58 = !{!"double2", !15}
    622 define void @rsSetElementAtImpl_double2(%struct.rs_allocation* nocapture readonly %a, <2 x double> %val, i32 %x, i32 %y, i32 %z) #1 {
    623   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    624   %2 = bitcast i8* %1 to <2 x double>*
    625   store <2 x double> %val, <2 x double>* %2, align 16, !tbaa !58
    626   ret void
    627 }
    628 
    629 define <2 x double> @rsGetElementAtImpl_double2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    630   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 16, i32 %x, i32 %y, i32 %z) #2
    631   %2 = bitcast i8* %1 to <2 x double>*
    632   %3 = load <2 x double>, <2 x double>* %2, align 16, !tbaa !58
    633   ret <2 x double> %3
    634 }
    635 
    636 !59 = !{!"double3", !15}
    637 define void @rsSetElementAtImpl_double3(%struct.rs_allocation* nocapture readonly %a, <3 x double>* %val, i32 %x, i32 %y, i32 %z) #1 {
    638   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    639   %2 = load <3 x double>, <3 x double>* %val
    640   %3 = shufflevector <3 x double> %2, <3 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    641   %4 = bitcast i8* %1 to <4 x double>*
    642   store <4 x double> %3, <4 x double>* %4, align 32, !tbaa !59
    643   ret void
    644 }
    645 
    646 
    647 define void @rsGetElementAtImpl_double3(<3 x double>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    648   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    649   %2 = bitcast i8* %1 to <4 x double>*
    650   %3 = load <4 x double>, <4 x double>* %2, align 32
    651   %4 = bitcast <3 x double>* %agg.result to <4 x double>*
    652   store <4 x double> %3, <4 x double>* %4, align 32, !tbaa !59
    653   ret void
    654 }
    655 
    656 !60 = !{!"double4", !15}
    657 define void @rsSetElementAtImpl_double4(%struct.rs_allocation* nocapture readonly %a, <4 x double>* %val, i32 %x, i32 %y, i32 %z) #1 {
    658   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    659   %2 = load <4 x double>, <4 x double>* %val
    660   %3 = bitcast i8* %1 to <4 x double>*
    661   store <4 x double> %2, <4 x double>* %3, align 32, !tbaa !60
    662   ret void
    663 }
    664 define void @rsGetElementAtImpl_double4(<4 x double>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    665   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a, i32 32, i32 %x, i32 %y, i32 %z) #2
    666   %2 = bitcast i8* %1 to <4 x double>*
    667   %3 = load <4 x double>, <4 x double>* %2, align 32, !tbaa !15
    668   store <4 x double> %3, <4 x double>* %agg.result, align 32, !tbaa !60
    669   ret void
    670 }
    671 
    672 !61 = !{!"half", !15}
    673 define void @rsSetElementAtImpl_half(%struct.rs_allocation* nocapture readonly %a.coerce, half %val, i32 %x, i32 %y, i32 %z) #1 {
    674   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #2
    675   %2 = bitcast i8* %1 to half*
    676   store half %val, half* %2, align 2, !tbaa !61
    677   ret void
    678 }
    679 
    680 define half @rsGetElementAtImpl_half(%struct.rs_allocation* nocapture readonly %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    681   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 2, i32 %x, i32 %y, i32 %z) #2
    682   %2 = bitcast i8* %1 to half*
    683   %3 = load half, half* %2, align 2, !tbaa !61
    684   ret half %3
    685 }
    686 
    687 !62 = !{!"half2", !15}
    688 define void @rsSetElementAtImpl_half2(%struct.rs_allocation* nocapture readonly %a.coerce, <2 x half> %val, i32 %x, i32 %y, i32 %z) #1 {
    689   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
    690   %2 = bitcast i8* %1 to <2 x half>*
    691   store <2 x half> %val, <2 x half>* %2, align 4, !tbaa !62
    692   ret void
    693 }
    694 
    695 define <2 x half> @rsGetElementAtImpl_half2(%struct.rs_allocation* nocapture readonly %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    696   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 4, i32 %x, i32 %y, i32 %z) #2
    697   %2 = bitcast i8* %1 to <2 x half>*
    698   %3 = load <2 x half>, <2 x half>* %2, align 4, !tbaa !62
    699   ret <2 x half> %3
    700 }
    701 
    702 !63 = !{!"half3", !15}
    703 define void @rsSetElementAtImpl_half3(%struct.rs_allocation* nocapture readonly %a.coerce, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    704   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    705   %2 = bitcast <2 x i32> %val to <4 x half>
    706   %3 = shufflevector <4 x half> %2, <4 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
    707   %4 = bitcast i8* %1 to <4 x half>*
    708   store <4 x half> %3, <4 x half>* %4, align 8, !tbaa !63
    709   ret void
    710 }
    711 
    712 define <3 x half> @rsGetElementAtImpl_half3(%struct.rs_allocation* nocapture readonly %a.coerce, i32 %x, i32 %y, i32 %z) #1 {
    713   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    714   %2 = bitcast i8* %1 to <4 x half>*
    715   %3 = load <4 x half>, <4 x half>* %2, align 8, !tbaa !63
    716   %4 = shufflevector <4 x half> %3, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
    717   ret <3 x half> %4
    718 }
    719 
    720 !64 = !{!"half4", !15}
    721 define void @rsSetElementAtImpl_half4(%struct.rs_allocation* nocapture readonly %a.coerce, <4 x half> %val, i32 %x, i32 %y, i32 %z) #1 {
    722   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    723   %2 = bitcast i8* %1 to <4 x half>*
    724   store <4 x half> %val, <4 x half>* %2, align 8, !tbaa !64
    725   ret void
    726 }
    727 
    728 define <4 x half> @rsGetElementAtImpl_half4(%struct.rs_allocation* nocapture readonly %a.coerce, i32 %x, i32 %y, i32 %z) #0 {
    729   %1 = tail call i8* @rsOffset(%struct.rs_allocation* %a.coerce, i32 8, i32 %x, i32 %y, i32 %z) #2
    730   %2 = bitcast i8* %1 to <4 x half>*
    731   %3 = load <4 x half>, <4 x half>* %2, align 8, !tbaa !64
    732   ret <4 x half> %3
    733 }
    734 
    735 
    736 define void @__rsAllocationVLoadXImpl_long4(<4 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    737   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    738   %2 = bitcast i8* %1 to <4 x i64>*
    739   %3 = load <4 x i64>, <4 x i64>* %2, align 8
    740   store <4 x i64> %3, <4 x i64>* %agg.result
    741   ret void
    742 }
    743 define void @__rsAllocationVLoadXImpl_long3(<3 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    744   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    745   %2 = bitcast i8* %1 to <3 x i64>*
    746   %3 = load <3 x i64>, <3 x i64>* %2, align 8
    747   store <3 x i64> %3, <3 x i64>* %agg.result
    748   ret void
    749 }
    750 define <2 x i64> @__rsAllocationVLoadXImpl_long2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    751   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    752   %2 = bitcast i8* %1 to <2 x i64>*
    753   %3 = load <2 x i64>, <2 x i64>* %2, align 8
    754   ret <2 x i64> %3
    755 }
    756 
    757 define void @__rsAllocationVLoadXImpl_ulong4(<4 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    758   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    759   %2 = bitcast i8* %1 to <4 x i64>*
    760   %3 = load <4 x i64>, <4 x i64>* %2, align 8
    761   store <4 x i64> %3, <4 x i64>* %agg.result
    762   ret void
    763 }
    764 define void @__rsAllocationVLoadXImpl_ulong3(<3 x i64>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    765   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    766   %2 = bitcast i8* %1 to <3 x i64>*
    767   %3 = load <3 x i64>, <3 x i64>* %2, align 8
    768   store <3 x i64> %3, <3 x i64>* %agg.result
    769   ret void
    770 }
    771 define <2 x i64> @__rsAllocationVLoadXImpl_ulong2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    772   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    773   %2 = bitcast i8* %1 to <2 x i64>*
    774   %3 = load <2 x i64>, <2 x i64>* %2, align 8
    775   ret <2 x i64> %3
    776 }
    777 
    778 define <4 x i32> @__rsAllocationVLoadXImpl_int4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    779   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    780   %2 = bitcast i8* %1 to <4 x i32>*
    781   %3 = load <4 x i32>, <4 x i32>* %2, align 4
    782   ret <4 x i32> %3
    783 }
    784 define <3 x i32> @__rsAllocationVLoadXImpl_int3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    785   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    786   %2 = bitcast i8* %1 to <3 x i32>*
    787   %3 = load <3 x i32>, <3 x i32>* %2, align 4
    788   ret <3 x i32> %3
    789 }
    790 define <2 x i32> @__rsAllocationVLoadXImpl_int2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    791   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    792   %2 = bitcast i8* %1 to <2 x i32>*
    793   %3 = load <2 x i32>, <2 x i32>* %2, align 4
    794   ret <2 x i32> %3
    795 }
    796 
    797 define <4 x i32> @__rsAllocationVLoadXImpl_uint4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    798   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    799   %2 = bitcast i8* %1 to <4 x i32>*
    800   %3 = load <4 x i32>, <4 x i32>* %2, align 4
    801   ret <4 x i32> %3
    802 }
    803 define <3 x i32> @__rsAllocationVLoadXImpl_uint3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    804   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    805   %2 = bitcast i8* %1 to <3 x i32>*
    806   %3 = load <3 x i32>, <3 x i32>* %2, align 4
    807   ret <3 x i32> %3
    808 }
    809 define <2 x i32> @__rsAllocationVLoadXImpl_uint2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    810   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    811   %2 = bitcast i8* %1 to <2 x i32>*
    812   %3 = load <2 x i32>, <2 x i32>* %2, align 4
    813   ret <2 x i32> %3
    814 }
    815 
    816 define <4 x i16> @__rsAllocationVLoadXImpl_short4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    817   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    818   %2 = bitcast i8* %1 to <4 x i16>*
    819   %3 = load <4 x i16>, <4 x i16>* %2, align 2
    820   ret <4 x i16> %3
    821 }
    822 define <3 x i16> @__rsAllocationVLoadXImpl_short3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    823   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    824   %2 = bitcast i8* %1 to <3 x i16>*
    825   %3 = load <3 x i16>, <3 x i16>* %2, align 2
    826   ret <3 x i16> %3
    827 }
    828 define <2 x i16> @__rsAllocationVLoadXImpl_short2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    829   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    830   %2 = bitcast i8* %1 to <2 x i16>*
    831   %3 = load <2 x i16>, <2 x i16>* %2, align 2
    832   ret <2 x i16> %3
    833 }
    834 
    835 define <4 x i16> @__rsAllocationVLoadXImpl_ushort4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    836   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    837   %2 = bitcast i8* %1 to <4 x i16>*
    838   %3 = load <4 x i16>, <4 x i16>* %2, align 2
    839   ret <4 x i16> %3
    840 }
    841 define <3 x i16> @__rsAllocationVLoadXImpl_ushort3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    842   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    843   %2 = bitcast i8* %1 to <3 x i16>*
    844   %3 = load <3 x i16>, <3 x i16>* %2, align 2
    845   ret <3 x i16> %3
    846 }
    847 define <2 x i16> @__rsAllocationVLoadXImpl_ushort2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    848   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    849   %2 = bitcast i8* %1 to <2 x i16>*
    850   %3 = load <2 x i16>, <2 x i16>* %2, align 2
    851   ret <2 x i16> %3
    852 }
    853 
    854 define <4 x i8> @__rsAllocationVLoadXImpl_char4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    855   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    856   %2 = bitcast i8* %1 to <4 x i8>*
    857   %3 = load <4 x i8>, <4 x i8>* %2, align 1
    858   ret <4 x i8> %3
    859 }
    860 define <3 x i8> @__rsAllocationVLoadXImpl_char3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    861   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    862   %2 = bitcast i8* %1 to <3 x i8>*
    863   %3 = load <3 x i8>, <3 x i8>* %2, align 1
    864   ret <3 x i8> %3
    865 }
    866 define <2 x i8> @__rsAllocationVLoadXImpl_char2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    867   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    868   %2 = bitcast i8* %1 to <2 x i8>*
    869   %3 = load <2 x i8>, <2 x i8>* %2, align 1
    870   ret <2 x i8> %3
    871 }
    872 
    873 define <4 x i8> @__rsAllocationVLoadXImpl_uchar4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    874   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    875   %2 = bitcast i8* %1 to <4 x i8>*
    876   %3 = load <4 x i8>, <4 x i8>* %2, align 1
    877   ret <4 x i8> %3
    878 }
    879 define <3 x i8> @__rsAllocationVLoadXImpl_uchar3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    880   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    881   %2 = bitcast i8* %1 to <3 x i8>*
    882   %3 = load <3 x i8>, <3 x i8>* %2, align 1
    883   ret <3 x i8> %3
    884 }
    885 define <2 x i8> @__rsAllocationVLoadXImpl_uchar2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    886   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    887   %2 = bitcast i8* %1 to <2 x i8>*
    888   %3 = load <2 x i8>, <2 x i8>* %2, align 1
    889   ret <2 x i8> %3
    890 }
    891 
    892 define <4 x float> @__rsAllocationVLoadXImpl_float4(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    893   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    894   %2 = bitcast i8* %1 to <4 x float>*
    895   %3 = load <4 x float>, <4 x float>* %2, align 4
    896   ret <4 x float> %3
    897 }
    898 define <3 x float> @__rsAllocationVLoadXImpl_float3(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    899   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    900   %2 = bitcast i8* %1 to <3 x float>*
    901   %3 = load <3 x float>, <3 x float>* %2, align 4
    902   ret <3 x float> %3
    903 }
    904 define <2 x float> @__rsAllocationVLoadXImpl_float2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    905   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    906   %2 = bitcast i8* %1 to <2 x float>*
    907   %3 = load <2 x float>, <2 x float>* %2, align 4
    908   ret <2 x float> %3
    909 }
    910 
    911 define void @__rsAllocationVLoadXImpl_double4(<4 x double>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    912   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    913   %2 = bitcast i8* %1 to <4 x double>*
    914   %3 = load <4 x double>, <4 x double>* %2, align 8
    915   store <4 x double> %3, <4 x double>* %agg.result
    916   ret void
    917 }
    918 define void @__rsAllocationVLoadXImpl_double3(<3 x double>* noalias nocapture sret %agg.result, %struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #1 {
    919   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    920   %2 = bitcast i8* %1 to <3 x double>*
    921   %3 = load <3 x double>, <3 x double>* %2, align 8
    922   store <3 x double> %3, <3 x double>* %agg.result
    923   ret void
    924 }
    925 define <2 x double> @__rsAllocationVLoadXImpl_double2(%struct.rs_allocation* nocapture readonly %a, i32 %x, i32 %y, i32 %z) #0 {
    926   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    927   %2 = bitcast i8* %1 to <2 x double>*
    928   %3 = load <2 x double>, <2 x double>* %2, align 8
    929   ret <2 x double> %3
    930 }
    931 
    932 
    933 define void @__rsAllocationVStoreXImpl_long4(%struct.rs_allocation* nocapture readonly %a, <4 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    934   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    935   %2 = load <4 x i64>, <4 x i64>* %val
    936   %3 = bitcast i8* %1 to <4 x i64>*
    937   store <4 x i64> %2, <4 x i64>* %3, align 8
    938   ret void
    939 }
    940 define void @__rsAllocationVStoreXImpl_long3(%struct.rs_allocation* nocapture readonly %a, <3 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    941   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    942   %2 = load <3 x i64>, <3 x i64>* %val
    943   %3 = bitcast i8* %1 to <3 x i64>*
    944   store <3 x i64> %2, <3 x i64>* %3, align 8
    945   ret void
    946 }
    947 define void @__rsAllocationVStoreXImpl_long2(%struct.rs_allocation* nocapture readonly %a, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    948   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    949   %2 = bitcast i8* %1 to <2 x i64>*
    950   store <2 x i64> %val, <2 x i64>* %2, align 8
    951   ret void
    952 }
    953 
    954 define void @__rsAllocationVStoreXImpl_ulong4(%struct.rs_allocation* nocapture readonly %a, <4 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    955   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    956   %2 = load <4 x i64>, <4 x i64>* %val
    957   %3 = bitcast i8* %1 to <4 x i64>*
    958   store <4 x i64> %2, <4 x i64>* %3, align 8
    959   ret void
    960 }
    961 define void @__rsAllocationVStoreXImpl_ulong3(%struct.rs_allocation* nocapture readonly %a, <3 x i64>* %val, i32 %x, i32 %y, i32 %z) #1 {
    962   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    963   %2 = load <3 x i64>, <3 x i64>* %val
    964   %3 = bitcast i8* %1 to <3 x i64>*
    965   store <3 x i64> %2, <3 x i64>* %3, align 8
    966   ret void
    967 }
    968 define void @__rsAllocationVStoreXImpl_ulong2(%struct.rs_allocation* nocapture readonly %a, <2 x i64> %val, i32 %x, i32 %y, i32 %z) #1 {
    969   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    970   %2 = bitcast i8* %1 to <2 x i64>*
    971   store <2 x i64> %val, <2 x i64>* %2, align 8
    972   ret void
    973 }
    974 
    975 define void @__rsAllocationVStoreXImpl_int4(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    976   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    977   %2 = bitcast i8* %1 to <4 x i32>*
    978   store <4 x i32> %val, <4 x i32>* %2, align 4
    979   ret void
    980 }
    981 define void @__rsAllocationVStoreXImpl_int3(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    982   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    983   %2 = bitcast i8* %1 to <3 x i32>*
    984   %3 = shufflevector <4 x i32> %val, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
    985   store <3 x i32> %3, <3 x i32>* %2, align 4
    986   ret void
    987 }
    988 define void @__rsAllocationVStoreXImpl_int2(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    989   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    990   %2 = bitcast i8* %1 to <2 x i32>*
    991   store <2 x i32> %val, <2 x i32>* %2, align 4
    992   ret void
    993 }
    994 
    995 define void @__rsAllocationVStoreXImpl_uint4(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
    996   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
    997   %2 = bitcast i8* %1 to <4 x i32>*
    998   store <4 x i32> %val, <4 x i32>* %2, align 4
    999   ret void
   1000 }
   1001 define void @__rsAllocationVStoreXImpl_uint3(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
   1002   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1003   %2 = bitcast i8* %1 to <3 x i32>*
   1004   %3 = shufflevector <4 x i32> %val, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
   1005   store <3 x i32> %3, <3 x i32>* %2, align 4
   1006   ret void
   1007 }
   1008 define void @__rsAllocationVStoreXImpl_uint2(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
   1009   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1010   %2 = bitcast i8* %1 to <2 x i32>*
   1011   store <2 x i32> %val, <2 x i32>* %2, align 4
   1012   ret void
   1013 }
   1014 
   1015 define void @__rsAllocationVStoreXImpl_short4(%struct.rs_allocation* nocapture readonly %a, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
   1016   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1017   %2 = bitcast i8* %1 to <4 x i16>*
   1018   store <4 x i16> %val, <4 x i16>* %2, align 2
   1019   ret void
   1020 }
   1021 define void @__rsAllocationVStoreXImpl_short3(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
   1022   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1023   %2 = bitcast i8* %1 to <3 x i16>*
   1024   %3 = bitcast <2 x i32> %val to <4 x i16>
   1025   %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <3 x i32> <i32 0, i32 1, i32 2>
   1026   store <3 x i16> %4, <3 x i16>* %2, align 2
   1027   ret void
   1028 }
   1029 define void @__rsAllocationVStoreXImpl_short2(%struct.rs_allocation* nocapture readonly %a, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
   1030   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1031   %2 = bitcast i8* %1 to <2 x i16>*
   1032   store <2 x i16> %val, <2 x i16>* %2, align 2
   1033   ret void
   1034 }
   1035 
   1036 define void @__rsAllocationVStoreXImpl_ushort4(%struct.rs_allocation* nocapture readonly %a, <4 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
   1037   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1038   %2 = bitcast i8* %1 to <4 x i16>*
   1039   store <4 x i16> %val, <4 x i16>* %2, align 2
   1040   ret void
   1041 }
   1042 define void @__rsAllocationVStoreXImpl_ushort3(%struct.rs_allocation* nocapture readonly %a, <2 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
   1043   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1044   %2 = bitcast i8* %1 to <3 x i16>*
   1045   %3 = bitcast <2 x i32> %val to <4 x i16>
   1046   %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <3 x i32> <i32 0, i32 1, i32 2>
   1047   store <3 x i16> %4, <3 x i16>* %2, align 2
   1048   ret void
   1049 }
   1050 define void @__rsAllocationVStoreXImpl_ushort2(%struct.rs_allocation* nocapture readonly %a, <2 x i16> %val, i32 %x, i32 %y, i32 %z) #1 {
   1051   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1052   %2 = bitcast i8* %1 to <2 x i16>*
   1053   store <2 x i16> %val, <2 x i16>* %2, align 2
   1054   ret void
   1055 }
   1056 
   1057 define void @__rsAllocationVStoreXImpl_char4(%struct.rs_allocation* nocapture readonly %a, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
   1058   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1059   %2 = bitcast i8* %1 to <4 x i8>*
   1060   store <4 x i8> %val, <4 x i8>* %2, align 1
   1061   ret void
   1062 }
   1063 define void @__rsAllocationVStoreXImpl_char3(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
   1064   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1065   %2 = bitcast i8* %1 to <3 x i8>*
   1066   %3 = bitcast i32 %val to <4 x i8>
   1067   %4 = shufflevector <4 x i8> %3, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
   1068   store <3 x i8> %4, <3 x i8>* %2, align 1
   1069   ret void
   1070 }
   1071 define void @__rsAllocationVStoreXImpl_char2(%struct.rs_allocation* nocapture readonly %a, i16 %val, i32 %x, i32 %y, i32 %z) #1 {
   1072   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1073   %2 = bitcast i8* %1 to <2 x i8>*
   1074   %3 = bitcast i16 %val to <2 x i8>
   1075   store <2 x i8> %3, <2 x i8>* %2, align 8
   1076   ret void
   1077 }
   1078 
   1079 define void @__rsAllocationVStoreXImpl_uchar4(%struct.rs_allocation* nocapture readonly %a, <4 x i8> %val, i32 %x, i32 %y, i32 %z) #1 {
   1080   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1081   %2 = bitcast i8* %1 to <4 x i8>*
   1082   store <4 x i8> %val, <4 x i8>* %2, align 1
   1083   ret void
   1084 }
   1085 define void @__rsAllocationVStoreXImpl_uchar3(%struct.rs_allocation* nocapture readonly %a, i32 %val, i32 %x, i32 %y, i32 %z) #1 {
   1086   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1087   %2 = bitcast i8* %1 to <3 x i8>*
   1088   %3 = bitcast i32 %val to <4 x i8>
   1089   %4 = shufflevector <4 x i8> %3, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
   1090   store <3 x i8> %4, <3 x i8>* %2, align 1
   1091   ret void
   1092 }
   1093 define void @__rsAllocationVStoreXImpl_uchar2(%struct.rs_allocation* nocapture readonly %a, i16 %val, i32 %x, i32 %y, i32 %z) #1 {
   1094   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1095   %2 = bitcast i8* %1 to <2 x i8>*
   1096   %3 = bitcast i16 %val to <2 x i8>
   1097   store <2 x i8> %3, <2 x i8>* %2, align 8
   1098   ret void
   1099 }
   1100 
   1101 define void @__rsAllocationVStoreXImpl_float4(%struct.rs_allocation* nocapture readonly %a, <4 x float> %val, i32 %x, i32 %y, i32 %z) #1 {
   1102   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1103   %2 = bitcast i8* %1 to <4 x float>*
   1104   store <4 x float> %val, <4 x float>* %2, align 4
   1105   ret void
   1106 }
   1107 define void @__rsAllocationVStoreXImpl_float3(%struct.rs_allocation* nocapture readonly %a, <4 x i32> %val, i32 %x, i32 %y, i32 %z) #1 {
   1108   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1109   %2 = bitcast i8* %1 to <3 x float>*
   1110   %3 = bitcast <4 x i32> %val to <4 x float>
   1111   %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
   1112   store <3 x float> %4, <3 x float>* %2, align 4
   1113   ret void
   1114 }
   1115 define void @__rsAllocationVStoreXImpl_float2(%struct.rs_allocation* nocapture readonly %a, <2 x float> %val, i32 %x, i32 %y, i32 %z) #1 {
   1116   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1117   %2 = bitcast i8* %1 to <2 x float>*
   1118   store <2 x float> %val, <2 x float>* %2, align 4
   1119   ret void
   1120 }
   1121 
   1122 define void @__rsAllocationVStoreXImpl_double4(%struct.rs_allocation* nocapture readonly %a, <4 x double>* %val, i32 %x, i32 %y, i32 %z) #1 {
   1123   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1124   %2 = load <4 x double>, <4 x double>* %val
   1125   %3 = bitcast i8* %1 to <4 x double>*
   1126   store <4 x double> %2, <4 x double>* %3, align 8
   1127   ret void
   1128 }
   1129 define void @__rsAllocationVStoreXImpl_double3(%struct.rs_allocation* nocapture readonly %a, <3 x double>* %val, i32 %x, i32 %y, i32 %z) #1 {
   1130   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1131   %2 = load <3 x double>, <3 x double>* %val
   1132   %3 = bitcast i8* %1 to <3 x double>*
   1133   store <3 x double> %2, <3 x double>* %3, align 8
   1134   ret void
   1135 }
   1136 define void @__rsAllocationVStoreXImpl_double2(%struct.rs_allocation* nocapture readonly %a, <2 x double> %val, i32 %x, i32 %y, i32 %z) #1 {
   1137   %1 = tail call i8* @rsOffsetNs(%struct.rs_allocation* %a, i32 %x, i32 %y, i32 %z) #2
   1138   %2 = bitcast i8* %1 to <2 x double>*
   1139   store <2 x double> %val, <2 x double>* %2, align 8
   1140   ret void
   1141 }
   1142 
   1143 
   1144 attributes #0 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
   1145 attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
   1146 attributes #2 = { nobuiltin }
   1147 
   1148