1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" 2 target triple = "armv7-none-linux-gnueabi" 3 4 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 5 ;;;;;;;;; INTRINSICS ;;;;;;;;;; 6 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 7 8 declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone 9 declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone 10 declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 11 declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 12 declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 13 declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 14 declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 15 declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 16 17 declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone 18 declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone 19 declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 20 declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 21 declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 22 declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 23 declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 24 declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 25 26 declare <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone 27 declare <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone 28 declare <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone 29 30 declare <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone 31 declare <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone 32 declare <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone 33 34 declare <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone 35 declare <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone 36 declare <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone 37 38 declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone 39 declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone 40 41 declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone 42 declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone 43 44 declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>) nounwind readnone 45 declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone 46 47 declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone 48 declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone 49 50 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 51 ;;;;;;;;; HELPERS ;;;;;;;;;; 52 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 53 54 define internal <4 x float> @smear_4f(float %in) nounwind readnone alwaysinline { 55 %1 = insertelement <4 x float> undef, float %in, i32 0 56 %2 = insertelement <4 x float> %1, float %in, i32 1 57 %3 = insertelement <4 x float> %2, float %in, i32 2 58 %4 = insertelement <4 x float> %3, float %in, i32 3 59 ret <4 x float> %4 60 } 61 62 define internal <4 x i32> @smear_4i(i32 %in) nounwind readnone alwaysinline { 63 %1 = insertelement <4 x i32> undef, i32 %in, i32 0 64 %2 = insertelement <4 x i32> %1, i32 %in, i32 1 65 %3 = insertelement <4 x i32> %2, i32 %in, i32 2 66 %4 = insertelement <4 x i32> %3, i32 %in, i32 3 67 ret <4 x i32> %4 68 } 69 70 define internal <4 x i16> @smear_4s(i16 %in) nounwind readnone alwaysinline { 71 %1 = insertelement <4 x i16> undef, i16 %in, i32 0 72 %2 = insertelement <4 x i16> %1, i16 %in, i32 1 73 %3 = insertelement <4 x i16> %2, i16 %in, i32 2 74 %4 = insertelement <4 x i16> %3, i16 %in, i32 3 75 ret <4 x i16> %4 76 } 77 78 79 80 define internal <2 x float> @smear_2f(float %in) nounwind readnone alwaysinline { 81 %1 = insertelement <2 x float> undef, float %in, i32 0 82 %2 = insertelement <2 x float> %1, float %in, i32 1 83 ret <2 x float> %2 84 } 85 86 define internal <2 x i32> @smear_2i(i32 %in) nounwind readnone alwaysinline { 87 %1 = insertelement <2 x i32> undef, i32 %in, i32 0 88 %2 = insertelement <2 x i32> %1, i32 %in, i32 1 89 ret <2 x i32> %2 90 } 91 92 define internal <2 x i16> @smear_2s(i16 %in) nounwind readnone alwaysinline { 93 %1 = insertelement <2 x i16> undef, i16 %in, i32 0 94 %2 = insertelement <2 x i16> %1, i16 %in, i32 1 95 ret <2 x i16> %2 96 } 97 98 99 define internal <4 x i32> @smear_4i32(i32 %in) nounwind readnone alwaysinline { 100 %1 = insertelement <4 x i32> undef, i32 %in, i32 0 101 %2 = insertelement <4 x i32> %1, i32 %in, i32 1 102 %3 = insertelement <4 x i32> %2, i32 %in, i32 2 103 %4 = insertelement <4 x i32> %3, i32 %in, i32 3 104 ret <4 x i32> %4 105 } 106 107 108 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 109 ;;;;;;;;; CLAMP ;;;;;;;;;; 110 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 111 112 define <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %low, <4 x float> %high) nounwind readonly { 113 %1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %value, <4 x float> %high) nounwind readnone 114 %2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %low) nounwind readnone 115 ret <4 x float> %2 116 } 117 118 define <4 x float> @_Z5clampDv4_fff(<4 x float> %value, float %low, float %high) nounwind readonly { 119 %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone 120 %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone 121 %out = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %_low, <4 x float> %_high) nounwind readonly 122 ret <4 x float> %out 123 } 124 125 define <3 x float> @_Z5clampDv3_fS_S_(<3 x float> %value, <3 x float> %low, <3 x float> %high) nounwind readonly { 126 %_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 127 %_low = shufflevector <3 x float> %low, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 128 %_high = shufflevector <3 x float> %high, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 129 %a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nounwind readnone 130 %b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind readnone 131 %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 132 ret <3 x float> %c 133 } 134 135 define <3 x float> @_Z5clampDv3_fff(<3 x float> %value, float %low, float %high) nounwind readonly { 136 %_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 137 %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone 138 %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone 139 %a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nounwind readnone 140 %b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind readnone 141 %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 142 ret <3 x float> %c 143 } 144 145 define <2 x float> @_Z5clampDv2_fS_S_(<2 x float> %value, <2 x float> %low, <2 x float> %high) nounwind readonly { 146 %1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %high) nounwind readnone 147 %2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %1, <2 x float> %low) nounwind readnone 148 ret <2 x float> %2 149 } 150 151 define <2 x float> @_Z5clampDv2_fff(<2 x float> %value, float %low, float %high) nounwind readonly { 152 %_high = tail call <2 x float> @smear_2f(float %high) nounwind readnone 153 %_low = tail call <2 x float> @smear_2f(float %low) nounwind readnone 154 %a = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %_high) nounwind readnone 155 %b = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %_low) nounwind readnone 156 ret <2 x float> %b 157 } 158 159 define float @_Z5clampfff(float %value, float %low, float %high) nounwind readonly { 160 %1 = fcmp olt float %value, %high 161 %2 = select i1 %1, float %value, float %high 162 %3 = fcmp ogt float %2, %low 163 %4 = select i1 %3, float %2, float %low 164 ret float %4 165 } 166 167 168 169 define <4 x i32> @_Z5clampDv4_iS_S_(<4 x i32> %value, <4 x i32> %low, <4 x i32> %high) nounwind readonly { 170 %1 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %value, <4 x i32> %high) nounwind readnone 171 %2 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %low) nounwind readnone 172 ret <4 x i32> %2 173 } 174 175 define <4 x i32> @_Z5clampDv4_iii(<4 x i32> %value, i32 %low, i32 %high) nounwind readonly { 176 %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone 177 %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone 178 %1 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %value, <4 x i32> %_high) nounwind readnone 179 %2 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %_low) nounwind readnone 180 ret <4 x i32> %2 181 } 182 183 define <3 x i32> @_Z5clampDv3_iS_S_(<3 x i32> %value, <3 x i32> %low, <3 x i32> %high) nounwind readonly { 184 %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 185 %_low = shufflevector <3 x i32> %low, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 186 %_high = shufflevector <3 x i32> %high, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 187 %a = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwind readnone 188 %b = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind readnone 189 %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 190 ret <3 x i32> %c 191 } 192 193 define <3 x i32> @_Z5clampDv3_iii(<3 x i32> %value, i32 %low, i32 %high) nounwind readonly { 194 %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 195 %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone 196 %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone 197 %a = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwind readnone 198 %b = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind readnone 199 %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 200 ret <3 x i32> %c 201 } 202 203 define <2 x i32> @_Z5clampDv2_iS_S_(<2 x i32> %value, <2 x i32> %low, <2 x i32> %high) nounwind readonly { 204 %1 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %value, <2 x i32> %high) nounwind readnone 205 %2 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %low) nounwind readnone 206 ret <2 x i32> %2 207 } 208 209 define <2 x i32> @_Z5clampDv2_iii(<2 x i32> %value, i32 %low, i32 %high) nounwind readonly { 210 %_high = tail call <2 x i32> @smear_2i(i32 %high) nounwind readnone 211 %_low = tail call <2 x i32> @smear_2i(i32 %low) nounwind readnone 212 %a = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %value, <2 x i32> %_high) nounwind readnone 213 %b = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %a, <2 x i32> %_low) nounwind readnone 214 ret <2 x i32> %b 215 } 216 217 218 219 define <4 x i32> @_Z5clampDv4_jS_S_(<4 x i32> %value, <4 x i32> %low, <4 x i32> %high) nounwind readonly { 220 %1 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %value, <4 x i32> %high) nounwind readnone 221 %2 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %low) nounwind readnone 222 ret <4 x i32> %2 223 } 224 225 define <4 x i32> @_Z5clampDv4_jjj(<4 x i32> %value, i32 %low, i32 %high) nounwind readonly { 226 %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone 227 %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone 228 %1 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %value, <4 x i32> %_high) nounwind readnone 229 %2 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %_low) nounwind readnone 230 ret <4 x i32> %2 231 } 232 233 define <3 x i32> @_Z5clampDv3_jS_S_(<3 x i32> %value, <3 x i32> %low, <3 x i32> %high) nounwind readonly { 234 %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 235 %_low = shufflevector <3 x i32> %low, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 236 %_high = shufflevector <3 x i32> %high, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 237 %a = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwind readnone 238 %b = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind readnone 239 %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 240 ret <3 x i32> %c 241 } 242 243 define <3 x i32> @_Z5clampDv3_jjj(<3 x i32> %value, i32 %low, i32 %high) nounwind readonly { 244 %_value = shufflevector <3 x i32> %value, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 245 %_high = tail call <4 x i32> @smear_4i(i32 %high) nounwind readnone 246 %_low = tail call <4 x i32> @smear_4i(i32 %low) nounwind readnone 247 %a = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %_value, <4 x i32> %_high) nounwind readnone 248 %b = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %a, <4 x i32> %_low) nounwind readnone 249 %c = shufflevector <4 x i32> %b, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 250 ret <3 x i32> %c 251 } 252 253 define <2 x i32> @_Z5clampDv2_jS_S_(<2 x i32> %value, <2 x i32> %low, <2 x i32> %high) nounwind readonly { 254 %1 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %value, <2 x i32> %high) nounwind readnone 255 %2 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %low) nounwind readnone 256 ret <2 x i32> %2 257 } 258 259 define <2 x i32> @_Z5clampDv2_jjj(<2 x i32> %value, i32 %low, i32 %high) nounwind readonly { 260 %_high = tail call <2 x i32> @smear_2i(i32 %high) nounwind readnone 261 %_low = tail call <2 x i32> @smear_2i(i32 %low) nounwind readnone 262 %a = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %value, <2 x i32> %_high) nounwind readnone 263 %b = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %a, <2 x i32> %_low) nounwind readnone 264 ret <2 x i32> %b 265 } 266 267 268 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 269 ;;;;;;;;; FMAX ;;;;;;;;;; 270 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 271 272 define <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly { 273 %1 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %v1, <4 x float> %v2) nounwind readnone 274 ret <4 x float> %1 275 } 276 277 define <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2) nounwind readonly { 278 %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone 279 %2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %v1, <4 x float> %1) nounwind readnone 280 ret <4 x float> %2 281 } 282 283 define <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly { 284 %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 285 %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 286 %3 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone 287 %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 288 ret <3 x float> %4 289 } 290 291 define <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2) nounwind readonly { 292 %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 293 %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone 294 %3 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone 295 %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 296 ret <3 x float> %c 297 } 298 299 define <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly { 300 %1 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %v1, <2 x float> %v2) nounwind readnone 301 ret <2 x float> %1 302 } 303 304 define <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2) nounwind readonly { 305 %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone 306 %2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %v1, <2 x float> %1) nounwind readnone 307 ret <2 x float> %2 308 } 309 310 define float @_Z4fmaxff(float %v1, float %v2) nounwind readonly { 311 %1 = fcmp ogt float %v1, %v2 312 %2 = select i1 %1, float %v1, float %v2 313 ret float %2 314 } 315 316 317 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 318 ;;;;;;;;; FMIN ;;;;;;;;;; 319 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 320 321 define <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly { 322 %1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %v1, <4 x float> %v2) nounwind readnone 323 ret <4 x float> %1 324 } 325 326 define <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2) nounwind readonly { 327 %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone 328 %2 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %v1, <4 x float> %1) nounwind readnone 329 ret <4 x float> %2 330 } 331 332 define <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly { 333 %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 334 %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 335 %3 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone 336 %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 337 ret <3 x float> %4 338 } 339 340 define <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2) nounwind readonly { 341 %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 342 %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone 343 %3 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone 344 %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 345 ret <3 x float> %c 346 } 347 348 define <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly { 349 %1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %v1, <2 x float> %v2) nounwind readnone 350 ret <2 x float> %1 351 } 352 353 define <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2) nounwind readonly { 354 %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone 355 %2 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %v1, <2 x float> %1) nounwind readnone 356 ret <2 x float> %2 357 } 358 359 define float @_Z4fminff(float %v1, float %v2) nounwind readnone { 360 %1 = fcmp olt float %v1, %v2 361 %2 = select i1 %1, float %v1, float %v2 362 ret float %2 363 } 364 365 366 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 367 ;;;;;;;;; MAX ;;;;;;;;;; 368 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 369 370 define signext i8 @_Z3maxcc(i8 signext %v1, i8 signext %v2) nounwind readnone { 371 %1 = icmp sgt i8 %v1, %v2 372 %2 = select i1 %1, i8 %v1, i8 %v2 373 ret i8 %2 374 } 375 376 define <2 x i8> @_Z3maxDv2_cS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { 377 %1 = sext <2 x i8> %v1 to <2 x i32> 378 %2 = sext <2 x i8> %v2 to <2 x i32> 379 %3 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 380 %4 = trunc <2 x i32> %3 to <2 x i8> 381 ret <2 x i8> %4 382 } 383 384 define <3 x i8> @_Z3maxDv3_cS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone { 385 %1 = sext <3 x i8> %v1 to <3 x i32> 386 %2 = sext <3 x i8> %v2 to <3 x i32> 387 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 388 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 389 %5 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 390 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 391 %7 = trunc <3 x i32> %6 to <3 x i8> 392 ret <3 x i8> %7 393 } 394 395 define <4 x i8> @_Z3maxDv4_cS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone { 396 %1 = sext <4 x i8> %v1 to <4 x i32> 397 %2 = sext <4 x i8> %v2 to <4 x i32> 398 %3 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 399 %4 = trunc <4 x i32> %3 to <4 x i8> 400 ret <4 x i8> %4 401 } 402 403 define signext i16 @_Z3maxss(i16 signext %v1, i16 signext %v2) nounwind readnone { 404 %1 = icmp sgt i16 %v1, %v2 405 %2 = select i1 %1, i16 %v1, i16 %v2 406 ret i16 %2 407 } 408 409 define <2 x i16> @_Z3maxDv2_sS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone { 410 %1 = sext <2 x i16> %v1 to <2 x i32> 411 %2 = sext <2 x i16> %v2 to <2 x i32> 412 %3 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 413 %4 = trunc <2 x i32> %3 to <2 x i16> 414 ret <2 x i16> %4 415 } 416 417 define <3 x i16> @_Z3maxDv3_sS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone { 418 %1 = sext <3 x i16> %v1 to <3 x i32> 419 %2 = sext <3 x i16> %v2 to <3 x i32> 420 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 421 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 422 %5 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 423 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 424 %7 = trunc <3 x i32> %6 to <3 x i16> 425 ret <3 x i16> %7 426 } 427 428 define <4 x i16> @_Z3maxDv4_sS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone { 429 %1 = sext <4 x i16> %v1 to <4 x i32> 430 %2 = sext <4 x i16> %v2 to <4 x i32> 431 %3 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 432 %4 = trunc <4 x i32> %3 to <4 x i16> 433 ret <4 x i16> %4 434 } 435 436 define i32 @_Z3maxii(i32 %v1, i32 %v2) nounwind readnone { 437 %1 = icmp sgt i32 %v1, %v2 438 %2 = select i1 %1, i32 %v1, i32 %v2 439 ret i32 %2 440 } 441 442 define <2 x i32> @_Z3maxDv2_iS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { 443 %1 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone 444 ret <2 x i32> %1 445 } 446 447 define <3 x i32> @_Z3maxDv3_iS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone { 448 %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 449 %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 450 %3 = tail call <4 x i32 > @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 451 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 452 ret <3 x i32> %4 453 } 454 455 define <4 x i32> @_Z3maxDv4_iS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone { 456 %1 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone 457 ret <4 x i32> %1 458 } 459 460 define i64 @_Z3maxxx(i64 %v1, i64 %v2) nounwind readnone { 461 %1 = icmp sgt i64 %v1, %v2 462 %2 = select i1 %1, i64 %v1, i64 %v2 463 ret i64 %2 464 } 465 466 ; TODO: long vector types 467 468 define zeroext i8 @_Z3maxhh(i8 zeroext %v1, i8 zeroext %v2) nounwind readnone { 469 %1 = icmp ugt i8 %v1, %v2 470 %2 = select i1 %1, i8 %v1, i8 %v2 471 ret i8 %2 472 } 473 474 define <2 x i8> @_Z3maxDv2_hS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { 475 %1 = zext <2 x i8> %v1 to <2 x i32> 476 %2 = zext <2 x i8> %v2 to <2 x i32> 477 %3 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 478 %4 = trunc <2 x i32> %3 to <2 x i8> 479 ret <2 x i8> %4 480 } 481 482 define <3 x i8> @_Z3maxDv3_hS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone { 483 %1 = zext <3 x i8> %v1 to <3 x i32> 484 %2 = zext <3 x i8> %v2 to <3 x i32> 485 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 486 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 487 %5 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 488 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 489 %7 = trunc <3 x i32> %6 to <3 x i8> 490 ret <3 x i8> %7 491 } 492 493 define <4 x i8> @_Z3maxDv4_hS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone { 494 %1 = zext <4 x i8> %v1 to <4 x i32> 495 %2 = zext <4 x i8> %v2 to <4 x i32> 496 %3 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 497 %4 = trunc <4 x i32> %3 to <4 x i8> 498 ret <4 x i8> %4 499 } 500 501 define zeroext i16 @_Z3maxtt(i16 zeroext %v1, i16 zeroext %v2) nounwind readnone { 502 %1 = icmp ugt i16 %v1, %v2 503 %2 = select i1 %1, i16 %v1, i16 %v2 504 ret i16 %2 505 } 506 507 define <2 x i16> @_Z3maxDv2_tS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone { 508 %1 = zext <2 x i16> %v1 to <2 x i32> 509 %2 = zext <2 x i16> %v2 to <2 x i32> 510 %3 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 511 %4 = trunc <2 x i32> %3 to <2 x i16> 512 ret <2 x i16> %4 513 } 514 515 define <3 x i16> @_Z3maxDv3_tS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone { 516 %1 = zext <3 x i16> %v1 to <3 x i32> 517 %2 = zext <3 x i16> %v2 to <3 x i32> 518 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 519 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 520 %5 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 521 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 522 %7 = trunc <3 x i32> %6 to <3 x i16> 523 ret <3 x i16> %7 524 } 525 526 define <4 x i16> @_Z3maxDv4_tS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone { 527 %1 = zext <4 x i16> %v1 to <4 x i32> 528 %2 = zext <4 x i16> %v2 to <4 x i32> 529 %3 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 530 %4 = trunc <4 x i32> %3 to <4 x i16> 531 ret <4 x i16> %4 532 } 533 534 define i32 @_Z3maxjj(i32 %v1, i32 %v2) nounwind readnone { 535 %1 = icmp ugt i32 %v1, %v2 536 %2 = select i1 %1, i32 %v1, i32 %v2 537 ret i32 %2 538 } 539 540 define <2 x i32> @_Z3maxDv2_jS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { 541 %1 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone 542 ret <2 x i32> %1 543 } 544 545 define <3 x i32> @_Z3maxDv3_jS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone { 546 %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 547 %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 548 %3 = tail call <4 x i32 > @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 549 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 550 ret <3 x i32> %4 551 } 552 553 define <4 x i32> @_Z3maxDv4_jS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone { 554 %1 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone 555 ret <4 x i32> %1 556 } 557 558 559 ; TODO: long vector types 560 561 define float @_Z3maxff(float %v1, float %v2) nounwind readnone { 562 %1 = tail call float @_Z4fmaxff(float %v1, float %v2) 563 ret float %1 564 } 565 566 define <2 x float> @_Z3maxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone { 567 %1 = tail call <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2) 568 ret <2 x float> %1 569 } 570 571 define <2 x float> @_Z3maxDv2_ff(<2 x float> %v1, float %v2) nounwind readnone { 572 %1 = tail call <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2) 573 ret <2 x float> %1 574 } 575 576 define <3 x float> @_Z3maxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone { 577 %1 = tail call <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2) 578 ret <3 x float> %1 579 } 580 581 define <3 x float> @_Z3maxDv3_ff(<3 x float> %v1, float %v2) nounwind readnone { 582 %1 = tail call <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2) 583 ret <3 x float> %1 584 } 585 586 define <4 x float> @_Z3maxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone { 587 %1 = tail call <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2) 588 ret <4 x float> %1 589 } 590 591 define <4 x float> @_Z3maxDv4_ff(<4 x float> %v1, float %v2) nounwind readnone { 592 %1 = tail call <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2) 593 ret <4 x float> %1 594 } 595 596 597 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 598 ;;;;;;;;; MIN ;;;;;;;;;; 599 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 600 601 define signext i8 @_Z3mincc(i8 signext %v1, i8 signext %v2) nounwind readnone { 602 %1 = icmp slt i8 %v1, %v2 603 %2 = select i1 %1, i8 %v1, i8 %v2 604 ret i8 %2 605 } 606 607 define <2 x i8> @_Z3minDv2_cS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { 608 %1 = sext <2 x i8> %v1 to <2 x i32> 609 %2 = sext <2 x i8> %v2 to <2 x i32> 610 %3 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 611 %4 = trunc <2 x i32> %3 to <2 x i8> 612 ret <2 x i8> %4 613 } 614 615 define <3 x i8> @_Z3minDv3_cS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone { 616 %1 = sext <3 x i8> %v1 to <3 x i32> 617 %2 = sext <3 x i8> %v2 to <3 x i32> 618 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 619 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 620 %5 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 621 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 622 %7 = trunc <3 x i32> %6 to <3 x i8> 623 ret <3 x i8> %7 624 } 625 626 define <4 x i8> @_Z3minDv4_cS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone { 627 %1 = sext <4 x i8> %v1 to <4 x i32> 628 %2 = sext <4 x i8> %v2 to <4 x i32> 629 %3 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 630 %4 = trunc <4 x i32> %3 to <4 x i8> 631 ret <4 x i8> %4 632 } 633 634 define signext i16 @_Z3minss(i16 signext %v1, i16 signext %v2) nounwind readnone { 635 %1 = icmp slt i16 %v1, %v2 636 %2 = select i1 %1, i16 %v1, i16 %v2 637 ret i16 %2 638 } 639 640 define <2 x i16> @_Z3minDv2_sS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone { 641 %1 = sext <2 x i16> %v1 to <2 x i32> 642 %2 = sext <2 x i16> %v2 to <2 x i32> 643 %3 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 644 %4 = trunc <2 x i32> %3 to <2 x i16> 645 ret <2 x i16> %4 646 } 647 648 define <3 x i16> @_Z3minDv3_sS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone { 649 %1 = sext <3 x i16> %v1 to <3 x i32> 650 %2 = sext <3 x i16> %v2 to <3 x i32> 651 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 652 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 653 %5 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 654 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 655 %7 = trunc <3 x i32> %6 to <3 x i16> 656 ret <3 x i16> %7 657 } 658 659 define <4 x i16> @_Z3minDv4_sS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone { 660 %1 = sext <4 x i16> %v1 to <4 x i32> 661 %2 = sext <4 x i16> %v2 to <4 x i32> 662 %3 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 663 %4 = trunc <4 x i32> %3 to <4 x i16> 664 ret <4 x i16> %4 665 } 666 667 define i32 @_Z3minii(i32 %v1, i32 %v2) nounwind readnone { 668 %1 = icmp slt i32 %v1, %v2 669 %2 = select i1 %1, i32 %v1, i32 %v2 670 ret i32 %2 671 } 672 673 define <2 x i32> @_Z3minDv2_iS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { 674 %1 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone 675 ret <2 x i32> %1 676 } 677 678 define <3 x i32> @_Z3minDv3_iS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone { 679 %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 680 %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 681 %3 = tail call <4 x i32 > @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 682 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 683 ret <3 x i32> %4 684 } 685 686 define <4 x i32> @_Z3minDv4_iS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone { 687 %1 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone 688 ret <4 x i32> %1 689 } 690 691 define i64 @_Z3minxx(i64 %v1, i64 %v2) nounwind readnone { 692 %1 = icmp slt i64 %v1, %v2 693 %2 = select i1 %1, i64 %v1, i64 %v2 694 ret i64 %2 695 } 696 697 ; TODO: long vector types 698 699 define zeroext i8 @_Z3minhh(i8 zeroext %v1, i8 zeroext %v2) nounwind readnone { 700 %1 = icmp ult i8 %v1, %v2 701 %2 = select i1 %1, i8 %v1, i8 %v2 702 ret i8 %2 703 } 704 705 define <2 x i8> @_Z3minDv2_hS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { 706 %1 = zext <2 x i8> %v1 to <2 x i32> 707 %2 = zext <2 x i8> %v2 to <2 x i32> 708 %3 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 709 %4 = trunc <2 x i32> %3 to <2 x i8> 710 ret <2 x i8> %4 711 } 712 713 define <3 x i8> @_Z3minDv3_hS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone { 714 %1 = zext <3 x i8> %v1 to <3 x i32> 715 %2 = zext <3 x i8> %v2 to <3 x i32> 716 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 717 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 718 %5 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 719 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 720 %7 = trunc <3 x i32> %6 to <3 x i8> 721 ret <3 x i8> %7 722 } 723 724 define <4 x i8> @_Z3minDv4_hS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone { 725 %1 = zext <4 x i8> %v1 to <4 x i32> 726 %2 = zext <4 x i8> %v2 to <4 x i32> 727 %3 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 728 %4 = trunc <4 x i32> %3 to <4 x i8> 729 ret <4 x i8> %4 730 } 731 732 define zeroext i16 @_Z3mintt(i16 zeroext %v1, i16 zeroext %v2) nounwind readnone { 733 %1 = icmp ult i16 %v1, %v2 734 %2 = select i1 %1, i16 %v1, i16 %v2 735 ret i16 %2 736 } 737 738 define <2 x i16> @_Z3minDv2_tS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone { 739 %1 = zext <2 x i16> %v1 to <2 x i32> 740 %2 = zext <2 x i16> %v2 to <2 x i32> 741 %3 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone 742 %4 = trunc <2 x i32> %3 to <2 x i16> 743 ret <2 x i16> %4 744 } 745 746 define <3 x i16> @_Z3minDv3_tS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone { 747 %1 = zext <3 x i16> %v1 to <3 x i32> 748 %2 = zext <3 x i16> %v2 to <3 x i32> 749 %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 750 %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 751 %5 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone 752 %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 753 %7 = trunc <3 x i32> %6 to <3 x i16> 754 ret <3 x i16> %7 755 } 756 757 define <4 x i16> @_Z3minDv4_tS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone { 758 %1 = zext <4 x i16> %v1 to <4 x i32> 759 %2 = zext <4 x i16> %v2 to <4 x i32> 760 %3 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 761 %4 = trunc <4 x i32> %3 to <4 x i16> 762 ret <4 x i16> %4 763 } 764 765 define i32 @_Z3minjj(i32 %v1, i32 %v2) nounwind readnone { 766 %1 = icmp ult i32 %v1, %v2 767 %2 = select i1 %1, i32 %v1, i32 %v2 768 ret i32 %2 769 } 770 771 define <2 x i32> @_Z3minDv2_jS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { 772 %1 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone 773 ret <2 x i32> %1 774 } 775 776 define <3 x i32> @_Z3minDv3_jS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone { 777 %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 778 %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 779 %3 = tail call <4 x i32 > @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone 780 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2> 781 ret <3 x i32> %4 782 } 783 784 define <4 x i32> @_Z3minDv4_jS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone { 785 %1 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone 786 ret <4 x i32> %1 787 } 788 789 790 ; TODO: long vector types 791 792 define float @_Z3minff(float %v1, float %v2) nounwind readnone { 793 %1 = tail call float @_Z4fminff(float %v1, float %v2) 794 ret float %1 795 } 796 797 define <2 x float> @_Z3minDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone { 798 %1 = tail call <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2) 799 ret <2 x float> %1 800 } 801 802 define <2 x float> @_Z3minDv2_ff(<2 x float> %v1, float %v2) nounwind readnone { 803 %1 = tail call <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2) 804 ret <2 x float> %1 805 } 806 807 define <3 x float> @_Z3minDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone { 808 %1 = tail call <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2) 809 ret <3 x float> %1 810 } 811 812 define <3 x float> @_Z3minDv3_ff(<3 x float> %v1, float %v2) nounwind readnone { 813 %1 = tail call <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2) 814 ret <3 x float> %1 815 } 816 817 define <4 x float> @_Z3minDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone { 818 %1 = tail call <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2) 819 ret <4 x float> %1 820 } 821 822 define <4 x float> @_Z3minDv4_ff(<4 x float> %v1, float %v2) nounwind readnone { 823 %1 = tail call <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2) 824 ret <4 x float> %1 825 } 826 827 828 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 829 ;;;;;;;;; YUV ;;;;;;;;;; 830 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 831 832 @yuv_U = internal constant <4 x i32> <i32 0, i32 -100, i32 516, i32 0>, align 16 833 @yuv_V = internal constant <4 x i32> <i32 409, i32 -208, i32 0, i32 0>, align 16 834 @yuv_0 = internal constant <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16 835 @yuv_255 = internal constant <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>, align 16 836 837 838 define <4 x i8> @_Z18rsYuvToRGBA_uchar4hhh(i8 %pY, i8 %pU, i8 %pV) nounwind readnone alwaysinline { 839 %_sy = zext i8 %pY to i32 840 %_su = zext i8 %pU to i32 841 %_sv = zext i8 %pV to i32 842 843 %_sy2 = add i32 -16, %_sy 844 %_sy3 = mul i32 298, %_sy2 845 %_su2 = add i32 -128, %_su 846 %_sv2 = add i32 -128, %_sv 847 %_y = tail call <4 x i32> @smear_4i32(i32 %_sy3) nounwind readnone 848 %_u = tail call <4 x i32> @smear_4i32(i32 %_su2) nounwind readnone 849 %_v = tail call <4 x i32> @smear_4i32(i32 %_sv2) nounwind readnone 850 851 %mu = load <4 x i32>, <4 x i32>* @yuv_U, align 8 852 %mv = load <4 x i32>, <4 x i32>* @yuv_V, align 8 853 %_u2 = mul <4 x i32> %_u, %mu 854 %_v2 = mul <4 x i32> %_v, %mv 855 %_y2 = add <4 x i32> %_y, %_u2 856 %_y3 = add <4 x i32> %_y2, %_v2 857 858 ; %r1 = tail call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %_y3, <4 x i32> <i32 8, i32 8, i32 8, i32 8>) nounwind readnone 859 ; %r2 = trunc <4 x i16> %r1 to <4 x i8> 860 ; ret <4 x i8> %r2 861 862 %c0 = load <4 x i32>, <4 x i32>* @yuv_0, align 8 863 %c255 = load <4 x i32>, <4 x i32>* @yuv_255, align 8 864 %r1 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %_y3, <4 x i32> %c0) nounwind readnone 865 %r2 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %r1, <4 x i32> %c255) nounwind readnone 866 %r3 = lshr <4 x i32> %r2, <i32 8, i32 8, i32 8, i32 8> 867 %r4 = trunc <4 x i32> %r3 to <4 x i8> 868 ret <4 x i8> %r4 869 } 870 871 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 872 ;;;;;;;;; half_RECIP ;;;;;;;;;; 873 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 874 875 define <2 x float> @_Z10half_recipDv2_f(<2 x float> %v) nounwind readnone { 876 %1 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %v) nounwind readnone 877 %2 = tail call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %1, <2 x float> %v) nounwind readnone 878 %3 = fmul <2 x float> %1, %2 879 %4 = tail call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %3, <2 x float> %v) nounwind readnone 880 %5 = fmul <2 x float> %4, %3 881 ret <2 x float> %5 882 } 883 884 define <4 x float> @_Z10half_recipDv4_f(<4 x float> %v) nounwind readnone { 885 %1 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %v) nounwind readnone 886 %2 = tail call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %1, <4 x float> %v) nounwind readnone 887 %3 = fmul <4 x float> %1, %2 888 %4 = tail call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %3, <4 x float> %v) nounwind readnone 889 %5 = fmul <4 x float> %4, %3 890 ret <4 x float> %5 891 } 892 893 define <3 x float> @_Z10half_recipDv3_f(<3 x float> %v) nounwind readnone { 894 %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 895 %2 = tail call <4 x float> @_Z10half_recipDv4_f(<4 x float> %1) nounwind readnone 896 %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 897 ret <3 x float> %3 898 } 899 900 901 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 902 ;;;;;;;;; half_RSQRT ;;;;;;;;;; 903 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 904 905 define float @_Z10half_rsqrtf(float %v) { 906 %1 = insertelement <2 x float> undef, float %v, i32 0 907 %2 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %1) nounwind readnone 908 %3 = extractelement <2 x float> %2, i32 0 909 ret float %3 910 } 911 912 define <2 x float> @_Z10half_rsqrtDv2_f(<2 x float> %v) nounwind readnone { 913 %1 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %v) nounwind readnone 914 ret <2 x float> %1 915 } 916 917 define <3 x float> @_Z10half_rsqrtDv3_f(<3 x float> %v) nounwind readnone { 918 %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 919 %2 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %1) nounwind readnone 920 %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 921 ret <3 x float> %3 922 } 923 924 define <4 x float> @_Z10half_rsqrtDv4_f(<4 x float> %v) nounwind readnone { 925 %1 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %v) nounwind readnone 926 ret <4 x float> %1 927 } 928 929 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 930 ;;;;;;;;; matrix ;;;;;;;;;; 931 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 932 933 declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly 934 935 %struct.rs_matrix4x4 = type { [16 x float] } 936 %struct.rs_matrix3x3 = type { [9 x float] } 937 %struct.rs_matrix2x2 = type { [4 x float] } 938 939 define internal <4 x float> @smear_f(float %in) nounwind readnone alwaysinline { 940 %1 = insertelement <4 x float> undef, float %in, i32 0 941 %2 = insertelement <4 x float> %1, float %in, i32 1 942 %3 = insertelement <4 x float> %2, float %in, i32 2 943 %4 = insertelement <4 x float> %3, float %in, i32 3 944 ret <4 x float> %4 945 } 946 947 948 define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <3 x float> %in) nounwind readonly { 949 %x0 = extractelement <3 x float> %in, i32 0 950 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone 951 %y0 = extractelement <3 x float> %in, i32 1 952 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone 953 %z0 = extractelement <3 x float> %in, i32 2 954 %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone 955 956 %px = getelementptr inbounds %struct.rs_matrix3x3, %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0 957 %px2 = bitcast float* %px to i8* 958 %xm = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %px2, i32 4) nounwind 959 960 %py = getelementptr inbounds %struct.rs_matrix3x3, %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3 961 %py2 = bitcast float* %py to i8* 962 %ym = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %py2, i32 4) nounwind 963 964 %pz = getelementptr inbounds %struct.rs_matrix3x3, %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 5 965 %pz2 = bitcast float* %pz to i8* 966 %zm2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %pz2, i32 4) nounwind 967 %zm = shufflevector <4 x float> %zm2, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 4> 968 969 %a1 = fmul <4 x float> %x, %xm 970 %a2 = fmul <4 x float> %y, %ym 971 %a3 = fadd <4 x float> %a1, %a2 972 %a4 = fmul <4 x float> %z, %zm 973 %a5 = fadd <4 x float> %a4, %a3 974 %a6 = shufflevector <4 x float> %a5, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 975 ret <3 x float> %a6 976 } 977 978 define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <2 x float> %in) nounwind readonly { 979 %x0 = extractelement <2 x float> %in, i32 0 980 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone 981 %y0 = extractelement <2 x float> %in, i32 1 982 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone 983 984 %px = getelementptr inbounds %struct.rs_matrix3x3, %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0 985 %px2 = bitcast float* %px to <4 x float>* 986 %xm = load <4 x float>, <4 x float>* %px2, align 4 987 %py = getelementptr inbounds %struct.rs_matrix3x3, %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3 988 %py2 = bitcast float* %py to <4 x float>* 989 %ym = load <4 x float>, <4 x float>* %py2, align 4 990 991 %a1 = fmul <4 x float> %x, %xm 992 %a2 = fmul <4 x float> %y, %ym 993 %a3 = fadd <4 x float> %a1, %a2 994 %a4 = shufflevector <4 x float> %a3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 995 ret <3 x float> %a4 996 } 997 998 define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <4 x float> %in) nounwind readonly { 999 %x0 = extractelement <4 x float> %in, i32 0 1000 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone 1001 %y0 = extractelement <4 x float> %in, i32 1 1002 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone 1003 %z0 = extractelement <4 x float> %in, i32 2 1004 %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone 1005 %w0 = extractelement <4 x float> %in, i32 3 1006 %w = tail call <4 x float> @smear_f(float %w0) nounwind readnone 1007 1008 %px = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 1009 %px2 = bitcast float* %px to <4 x float>* 1010 %xm = load <4 x float>, <4 x float>* %px2, align 4 1011 %py = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 1012 %py2 = bitcast float* %py to <4 x float>* 1013 %ym = load <4 x float>, <4 x float>* %py2, align 4 1014 %pz = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8 1015 %pz2 = bitcast float* %pz to <4 x float>* 1016 %zm = load <4 x float>, <4 x float>* %pz2, align 4 1017 %pw = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 1018 %pw2 = bitcast float* %pw to <4 x float>* 1019 %wm = load <4 x float>, <4 x float>* %pw2, align 4 1020 1021 %a1 = fmul <4 x float> %x, %xm 1022 %a2 = fmul <4 x float> %y, %ym 1023 %a3 = fadd <4 x float> %a1, %a2 1024 %a4 = fmul <4 x float> %z, %zm 1025 %a5 = fadd <4 x float> %a3, %a4 1026 %a6 = fmul <4 x float> %w, %wm 1027 %a7 = fadd <4 x float> %a5, %a6 1028 ret <4 x float> %a7 1029 } 1030 1031 define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <3 x float> %in) nounwind readonly { 1032 %x0 = extractelement <3 x float> %in, i32 0 1033 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone 1034 %y0 = extractelement <3 x float> %in, i32 1 1035 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone 1036 %z0 = extractelement <3 x float> %in, i32 2 1037 %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone 1038 1039 %px = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 1040 %px2 = bitcast float* %px to <4 x float>* 1041 %xm = load <4 x float>, <4 x float>* %px2, align 4 1042 %py = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 1043 %py2 = bitcast float* %py to <4 x float>* 1044 %ym = load <4 x float>, <4 x float>* %py2, align 4 1045 %pz = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8 1046 %pz2 = bitcast float* %pz to <4 x float>* 1047 %zm = load <4 x float>, <4 x float>* %pz2, align 4 1048 %pw = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 1049 %pw2 = bitcast float* %pw to <4 x float>* 1050 %wm = load <4 x float>, <4 x float>* %pw2, align 4 1051 1052 %a1 = fmul <4 x float> %x, %xm 1053 %a2 = fadd <4 x float> %wm, %a1 1054 %a3 = fmul <4 x float> %y, %ym 1055 %a4 = fadd <4 x float> %a2, %a3 1056 %a5 = fmul <4 x float> %z, %zm 1057 %a6 = fadd <4 x float> %a4, %a5 1058 ret <4 x float> %a6 1059 } 1060 1061 define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <2 x float> %in) nounwind readonly { 1062 %x0 = extractelement <2 x float> %in, i32 0 1063 %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone 1064 %y0 = extractelement <2 x float> %in, i32 1 1065 %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone 1066 1067 %px = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0 1068 %px2 = bitcast float* %px to <4 x float>* 1069 %xm = load <4 x float>, <4 x float>* %px2, align 4 1070 %py = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4 1071 %py2 = bitcast float* %py to <4 x float>* 1072 %ym = load <4 x float>, <4 x float>* %py2, align 4 1073 %pw = getelementptr inbounds %struct.rs_matrix4x4, %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12 1074 %pw2 = bitcast float* %pw to <4 x float>* 1075 %wm = load <4 x float>, <4 x float>* %pw2, align 4 1076 1077 %a1 = fmul <4 x float> %x, %xm 1078 %a2 = fadd <4 x float> %wm, %a1 1079 %a3 = fmul <4 x float> %y, %ym 1080 %a4 = fadd <4 x float> %a2, %a3 1081 ret <4 x float> %a4 1082 } 1083 1084 1085 1086 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1087 ;;;;;;;;; pixel ops ;;;;;;;;;; 1088 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1089 1090 1091 @fc_255.0 = internal constant <4 x float> <float 255.0, float 255.0, float 255.0, float 255.0>, align 16 1092 @fc_0.5 = internal constant <4 x float> <float 0.5, float 0.5, float 0.5, float 0.5>, align 16 1093 @fc_0 = internal constant <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, align 16 1094 1095 declare <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %in) nounwind readnone 1096 declare <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> %in) nounwind readnone 1097 1098 ; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color) 1099 define <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %color) nounwind readnone { 1100 %f255 = load <4 x float>, <4 x float>* @fc_255.0, align 16 1101 %f05 = load <4 x float>, <4 x float>* @fc_0.5, align 16 1102 %f0 = load <4 x float>, <4 x float>* @fc_0, align 16 1103 %v1 = fmul <4 x float> %f255, %color 1104 %v2 = fadd <4 x float> %f05, %v1 1105 %v3 = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %v2, <4 x float> %f0, <4 x float> %f255) nounwind readnone 1106 %v4 = tail call <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %v3) nounwind readnone 1107 ret <4 x i8> %v4 1108 } 1109 1110 ; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color) 1111 define <4 x i8> @_Z17rsPackColorTo8888Dv3_f(<3 x float> %color) nounwind readnone { 1112 %1 = shufflevector <3 x float> %color, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1113 %2 = insertelement <4 x float> %1, float 1.0, i32 3 1114 %3 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %2) nounwind readnone 1115 ret <4 x i8> %3 1116 } 1117 1118 ; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b) 1119 define <4 x i8> @_Z17rsPackColorTo8888fff(float %r, float %g, float %b) nounwind readnone { 1120 %1 = insertelement <4 x float> undef, float %r, i32 0 1121 %2 = insertelement <4 x float> %1, float %g, i32 1 1122 %3 = insertelement <4 x float> %2, float %b, i32 2 1123 %4 = insertelement <4 x float> %3, float 1.0, i32 3 1124 %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone 1125 ret <4 x i8> %5 1126 } 1127 1128 ; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a) 1129 define <4 x i8> @_Z17rsPackColorTo8888ffff(float %r, float %g, float %b, float %a) nounwind readnone { 1130 %1 = insertelement <4 x float> undef, float %r, i32 0 1131 %2 = insertelement <4 x float> %1, float %g, i32 1 1132 %3 = insertelement <4 x float> %2, float %b, i32 2 1133 %4 = insertelement <4 x float> %3, float %a, i32 3 1134 %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone 1135 ret <4 x i8> %5 1136 } 1137 1138