1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" 2 target triple = "armv7-none-linux-gnueabi" 3 4 5 %struct.rs_matrix4x4 = type { [16 x float] } 6 %struct.rs_matrix3x3 = type { [9 x float] } 7 %struct.rs_matrix2x2 = type { [4 x float] } 8 9 define internal <4 x float> @smear_f(float %in) nounwind readnone alwaysinline { 10 %1 = insertelement <4 x float> undef, float %in, i32 0 11 %2 = insertelement <4 x float> %1, float %in, i32 1 12 %3 = insertelement <4 x float> %2, float %in, i32 2 13 %4 = insertelement <4 x float> %3, float %in, i32 3 14 ret <4 x float> %4 15 } 16 17 18 define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <3 x float> %in) nounwind readonly { 19 %x0 = extractelement <3 x float> %in, i32 0 20 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone 21 %y0 = extractelement <3 x float> %in, i32 1 22 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone 23 %z0 = extractelement <3 x float> %in, i32 2 24 %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone 25 26 %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0 27 %px2 = bitcast float* %px to <4 x float>* 28 %xm = load <4 x float>* %px2, align 4 29 %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3 30 %py2 = bitcast float* %py to <4 x float>* 31 %ym = load <4 x float>* %py2, align 4 32 %pz = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 6 33 %pz2 = bitcast float* %pz to <3 x float>* 34 %zm2 = load <3 x float>* %pz2, align 4 35 %zm = shufflevector <3 x float> %zm2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 36 37 %a1 = fmul <4 x float> %x, %xm 38 %a2 = fmul <4 x float> %y, %ym 39 %a3 = fadd <4 x float> %a1, %a2 40 %a4 = fmul <4 x float> %z, %zm 41 %a5 = fadd <4 x float> %a4, %a3 42 %a6 = shufflevector <4 x float> %a5, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 43 ret <3 x float> %a6 44 } 45 46 define <3 x float> @_Z16rsMatrixMultiplyP12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <3 x float> %in) nounwind readonly { 47 %r = tail call <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <3 x float> %in) nounwind 48 ret <3 x float> %r 49 } 50 51 define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <2 x float> %in) nounwind readonly { 52 %x0 = extractelement <2 x float> %in, i32 0 53 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone 54 %y0 = extractelement <2 x float> %in, i32 1 55 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone 56 57 %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0 58 %px2 = bitcast float* %px to <4 x float>* 59 %xm = load <4 x float>* %px2, align 4 60 %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3 61 %py2 = bitcast float* %py to <4 x float>* 62 %ym = load <4 x float>* %py2, align 4 63 64 %a1 = fmul <4 x float> %x, %xm 65 %a2 = fmul <4 x float> %y, %ym 66 %a3 = fadd <4 x float> %a1, %a2 67 %a4 = shufflevector <4 x float> %a3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 68 ret <3 x float> %a4 69 } 70 71 define <3 x float> @_Z16rsMatrixMultiplyP12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <2 x float> %in) nounwind readonly { 72 %r = tail call <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <2 x float> %in) nounwind 73 ret <3 x float> %r 74 } 75 76 define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <4 x float> %in) nounwind readonly { 77 %x0 = extractelement <4 x float> %in, i32 0 78 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone 79 %y0 = extractelement <4 x float> %in, i32 1 80 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone 81 %z0 = extractelement <4 x float> %in, i32 2 82 %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone 83 %w0 = extractelement <4 x float> %in, i32 3 84 %w = tail call <4 x float> @smear_f(float %w0) nounwind readnone 85 86 %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 87 %px2 = bitcast float* %px to <4 x float>* 88 %xm = load <4 x float>* %px2, align 4 89 %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 90 %py2 = bitcast float* %py to <4 x float>* 91 %ym = load <4 x float>* %py2, align 4 92 %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8 93 %pz2 = bitcast float* %pz to <4 x float>* 94 %zm = load <4 x float>* %pz2, align 4 95 %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 96 %pw2 = bitcast float* %pw to <4 x float>* 97 %wm = load <4 x float>* %pw2, align 4 98 99 %a1 = fmul <4 x float> %x, %xm 100 %a2 = fmul <4 x float> %y, %ym 101 %a3 = fadd <4 x float> %a1, %a2 102 %a4 = fmul <4 x float> %z, %zm 103 %a5 = fadd <4 x float> %a3, %a4 104 %a6 = fmul <4 x float> %w, %wm 105 %a7 = fadd <4 x float> %a5, %a6 106 ret <4 x float> %a7 107 } 108 109 define <4 x float> @_Z16rsMatrixMultiplyP12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <4 x float> %in) nounwind readonly { 110 %r = tail call <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <4 x float> %in) nounwind 111 ret <4 x float> %r 112 } 113 114 define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <3 x float> %in) nounwind readonly { 115 %x0 = extractelement <3 x float> %in, i32 0 116 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone 117 %y0 = extractelement <3 x float> %in, i32 1 118 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone 119 %z0 = extractelement <3 x float> %in, i32 2 120 %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone 121 122 %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 123 %px2 = bitcast float* %px to <4 x float>* 124 %xm = load <4 x float>* %px2, align 4 125 %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 126 %py2 = bitcast float* %py to <4 x float>* 127 %ym = load <4 x float>* %py2, align 4 128 %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8 129 %pz2 = bitcast float* %pz to <4 x float>* 130 %zm = load <4 x float>* %pz2, align 4 131 %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 132 %pw2 = bitcast float* %pw to <4 x float>* 133 %wm = load <4 x float>* %pw2, align 4 134 135 %a1 = fmul <4 x float> %x, %xm 136 %a2 = fadd <4 x float> %wm, %a1 137 %a3 = fmul <4 x float> %y, %ym 138 %a4 = fadd <4 x float> %a2, %a3 139 %a5 = fmul <4 x float> %z, %zm 140 %a6 = fadd <4 x float> %a4, %a5 141 ret <4 x float> %a6 142 } 143 144 define <4 x float> @_Z16rsMatrixMultiplyP12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <3 x float> %in) nounwind readonly { 145 %r = tail call <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <3 x float> %in) nounwind 146 ret <4 x float> %r 147 } 148 149 define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <2 x float> %in) nounwind readonly { 150 %x0 = extractelement <2 x float> %in, i32 0 151 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone 152 %y0 = extractelement <2 x float> %in, i32 1 153 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone 154 155 %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 156 %px2 = bitcast float* %px to <4 x float>* 157 %xm = load <4 x float>* %px2, align 4 158 %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 159 %py2 = bitcast float* %py to <4 x float>* 160 %ym = load <4 x float>* %py2, align 4 161 %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 162 %pw2 = bitcast float* %pw to <4 x float>* 163 %wm = load <4 x float>* %pw2, align 4 164 165 %a1 = fmul <4 x float> %x, %xm 166 %a2 = fadd <4 x float> %wm, %a1 167 %a3 = fmul <4 x float> %y, %ym 168 %a4 = fadd <4 x float> %a2, %a3 169 ret <4 x float> %a4 170 } 171 172 define <4 x float> @_Z16rsMatrixMultiplyP12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <2 x float> %in) nounwind readonly { 173 %r = tail call <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <2 x float> %in) nounwind 174 ret <4 x float> %r 175 } 176 177