1 /* 2 * Copyright (C) 2011 University of Szeged 3 * Copyright (C) 2011 Zoltan Herczeg 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "config.h" 28 #include "FELightingNEON.h" 29 30 #if CPU(ARM_NEON) && COMPILER(GCC) 31 32 #include <wtf/Vector.h> 33 34 namespace WebCore { 35 36 // These constants are copied to the following SIMD registers: 37 // ALPHAX_Q ALPHAY_Q REMAPX_D REMAPY_D 38 39 WTF_ALIGNED(short, s_FELightingConstantsForNeon[], 16) = { 40 // Alpha coefficients. 41 -2, 1, 0, -1, 2, 1, 0, -1, 42 0, -1, -2, -1, 0, 1, 2, 1, 43 // Remapping indicies. 44 0x0f0e, 0x0302, 0x0504, 0x0706, 45 0x0b0a, 0x1312, 0x1514, 0x1716, 46 }; 47 48 short* feLightingConstantsForNeon() 49 { 50 return s_FELightingConstantsForNeon; 51 } 52 53 #define ASSTRING(str) #str 54 #define TOSTRING(value) ASSTRING(value) 55 56 #define PIXELS_OFFSET TOSTRING(0) 57 #define WIDTH_OFFSET TOSTRING(4) 58 #define HEIGHT_OFFSET TOSTRING(8) 59 #define FLAGS_OFFSET TOSTRING(12) 60 #define SPECULAR_EXPONENT_OFFSET TOSTRING(16) 61 #define CONE_EXPONENT_OFFSET TOSTRING(20) 62 #define FLOAT_ARGUMENTS_OFFSET TOSTRING(24) 63 #define DRAWING_CONSTANTS_OFFSET TOSTRING(28) 64 #define NL "\n" 65 66 // Register allocation 67 #define PAINTING_DATA_R "r11" 68 #define RESET_WIDTH_R PAINTING_DATA_R 69 #define PIXELS_R "r4" 70 #define WIDTH_R "r5" 71 #define HEIGHT_R "r6" 72 #define FLAGS_R "r7" 73 #define SPECULAR_EXPONENT_R "r8" 74 #define CONE_EXPONENT_R "r10" 75 #define SCANLINE_R "r12" 76 77 #define TMP1_Q "q0" 78 #define TMP1_D0 "d0" 79 #define TMP1_S0 "s0" 80 #define TMP1_S1 "s1" 81 #define TMP1_D1 "d1" 82 #define TMP1_S2 "s2" 83 #define TMP1_S3 "s3" 84 #define TMP2_Q "q1" 85 #define TMP2_D0 "d2" 86 #define TMP2_S0 "s4" 87 #define TMP2_S1 "s5" 88 #define TMP2_D1 "d3" 89 #define TMP2_S2 "s6" 90 #define TMP2_S3 "s7" 91 #define TMP3_Q "q2" 92 #define TMP3_D0 "d4" 93 #define TMP3_S0 "s8" 94 #define TMP3_S1 "s9" 95 #define TMP3_D1 "d5" 96 #define TMP3_S2 "s10" 97 #define TMP3_S3 "s11" 98 99 #define COSINE_OF_ANGLE "s12" 100 #define POWF_INT_S "s13" 101 #define POWF_FRAC_S "s14" 102 #define SPOT_COLOR_Q "q4" 103 104 // Because of VMIN and VMAX CONST_ZERO_S and CONST_ONE_S 105 // must be placed on the same side of the double vector 106 107 // Current pixel position 108 #define POSITION_Q "q5" 109 #define POSITION_X_S "s20" 110 #define POSITION_Y_S "s21" 111 #define POSITION_Z_S "s22" 112 #define CONST_ZERO_HI_D "d11" 113 #define CONST_ZERO_S "s23" 114 115 // ------------------------------- 116 // Variable arguments 117 // Misc arguments 118 #define READ1_RANGE "d12-d15" 119 #define READ2_RANGE "d16-d19" 120 #define READ3_RANGE "d20-d21" 121 122 #define SCALE_S "s24" 123 #define SCALE_DIV4_S "s25" 124 #define DIFFUSE_CONST_S "s26" 125 126 // Light source position 127 #define CONE_CUT_OFF_S "s28" 128 #define CONE_FULL_LIGHT_S "s29" 129 #define CONE_CUT_OFF_RANGE_S "s30" 130 #define CONST_ONE_HI_D "d15" 131 #define CONST_ONE_S "s31" 132 133 #define LIGHT_Q "q8" 134 #define DIRECTION_Q "q9" 135 #define COLOR_Q "q10" 136 // ------------------------------- 137 // Constant coefficients 138 #define READ4_RANGE "d22-d25" 139 #define READ5_RANGE "d26-d27" 140 141 #define ALPHAX_Q "q11" 142 #define ALPHAY_Q "q12" 143 #define REMAPX_D "d26" 144 #define REMAPY_D "d27" 145 // ------------------------------- 146 147 #define ALL_ROWS_D "{d28,d29,d30}" 148 #define TOP_ROW_D "d28" 149 #define MIDDLE_ROW_D "d29" 150 #define BOTTOM_ROW_D "d30" 151 152 #define GET_LENGTH(source, temp) \ 153 "vmul.f32 " temp##_Q ", " source##_Q ", " source##_Q NL \ 154 "vadd.f32 " source##_S3 ", " temp##_S0 ", " temp##_S1 NL \ 155 "vadd.f32 " source##_S3 ", " source##_S3 ", " temp##_S2 NL \ 156 "vsqrt.f32 " source##_S3 ", " source##_S3 NL 157 158 // destination##_S3 can contain the multiply of length. 159 #define DOT_PRODUCT(destination, source1, source2) \ 160 "vmul.f32 " destination##_Q ", " source1##_Q ", " source2##_Q NL \ 161 "vadd.f32 " destination##_S0 ", " destination##_S0 ", " destination##_S1 NL \ 162 "vadd.f32 " destination##_S0 ", " destination##_S0 ", " destination##_S2 NL 163 164 #define MULTIPLY_BY_DIFFUSE_CONST(normalVectorLength, dotProductLength) \ 165 "tst " FLAGS_R ", #" TOSTRING(FLAG_DIFFUSE_CONST_IS_1) NL \ 166 "vmuleq.f32 " TMP2_S1 ", " DIFFUSE_CONST_S ", " normalVectorLength NL \ 167 "vdiveq.f32 " TMP2_S1 ", " TMP2_S1 ", " dotProductLength NL \ 168 "vdivne.f32 " TMP2_S1 ", " normalVectorLength ", " dotProductLength NL 169 170 #define POWF_SQR(value, exponent, current, remaining) \ 171 "tst " exponent ", #" ASSTRING(current) NL \ 172 "vmulne.f32 " value ", " value ", " POWF_INT_S NL \ 173 "tst " exponent ", #" ASSTRING(remaining) NL \ 174 "vmulne.f32 " POWF_INT_S ", " POWF_INT_S ", " POWF_INT_S NL 175 176 #define POWF_SQRT(value, exponent, current, remaining) \ 177 "tst " exponent ", #" ASSTRING(remaining) NL \ 178 "vsqrtne.f32 " POWF_FRAC_S ", " POWF_FRAC_S NL \ 179 "tst " exponent ", #" ASSTRING(current) NL \ 180 "vmulne.f32 " value ", " value ", " POWF_FRAC_S NL 181 182 // This simplified powf function is sufficiently accurate. 183 #define POWF(value, exponent) \ 184 "tst " exponent ", #0xfc0" NL \ 185 "vmovne.f32 " POWF_INT_S ", " value NL \ 186 "tst " exponent ", #0x03f" NL \ 187 "vmovne.f32 " POWF_FRAC_S ", " value NL \ 188 "vmov.f32 " value ", " CONST_ONE_S NL \ 189 \ 190 POWF_SQR(value, exponent, 0x040, 0xf80) \ 191 POWF_SQR(value, exponent, 0x080, 0xf00) \ 192 POWF_SQR(value, exponent, 0x100, 0xe00) \ 193 POWF_SQR(value, exponent, 0x200, 0xc00) \ 194 POWF_SQR(value, exponent, 0x400, 0x800) \ 195 "tst " exponent ", #0x800" NL \ 196 "vmulne.f32 " value ", " value ", " POWF_INT_S NL \ 197 \ 198 POWF_SQRT(value, exponent, 0x20, 0x3f) \ 199 POWF_SQRT(value, exponent, 0x10, 0x1f) \ 200 POWF_SQRT(value, exponent, 0x08, 0x0f) \ 201 POWF_SQRT(value, exponent, 0x04, 0x07) \ 202 POWF_SQRT(value, exponent, 0x02, 0x03) \ 203 POWF_SQRT(value, exponent, 0x01, 0x01) 204 205 // The following algorithm is an ARM-NEON optimized version of 206 // the main loop found in FELighting.cpp. Since the whole code 207 // is redesigned to be as effective as possible (ARM specific 208 // thinking), it is four times faster than its C++ counterpart. 209 210 asm ( // NOLINT 211 ".globl " TOSTRING(neonDrawLighting) NL 212 TOSTRING(neonDrawLighting) ":" NL 213 // Because of the clever register allocation, nothing is stored on the stack 214 // except the saved registers. 215 // Stack must be aligned to 8 bytes. 216 "stmdb sp!, {r4-r8, r10, r11, lr}" NL 217 "vstmdb sp!, {d8-d15}" NL 218 "mov " PAINTING_DATA_R ", r0" NL 219 220 // The following two arguments are loaded to SIMD registers. 221 "ldr r0, [" PAINTING_DATA_R ", #" FLOAT_ARGUMENTS_OFFSET "]" NL 222 "ldr r1, [" PAINTING_DATA_R ", #" DRAWING_CONSTANTS_OFFSET "]" NL 223 "ldr " PIXELS_R ", [" PAINTING_DATA_R ", #" PIXELS_OFFSET "]" NL 224 "ldr " WIDTH_R ", [" PAINTING_DATA_R ", #" WIDTH_OFFSET "]" NL 225 "ldr " HEIGHT_R ", [" PAINTING_DATA_R ", #" HEIGHT_OFFSET "]" NL 226 "ldr " FLAGS_R ", [" PAINTING_DATA_R ", #" FLAGS_OFFSET "]" NL 227 "ldr " SPECULAR_EXPONENT_R ", [" PAINTING_DATA_R ", #" SPECULAR_EXPONENT_OFFSET "]" NL 228 "ldr " CONE_EXPONENT_R ", [" PAINTING_DATA_R ", #" CONE_EXPONENT_OFFSET "]" NL 229 230 // Load all data to the SIMD registers with the least number of instructions. 231 "vld1.f32 { " READ1_RANGE " }, [r0]!" NL 232 "vld1.f32 { " READ2_RANGE " }, [r0]!" NL 233 "vld1.f32 { " READ3_RANGE " }, [r0]!" NL 234 "vld1.s16 {" READ4_RANGE "}, [r1]!" NL 235 "vld1.s16 {" READ5_RANGE "}, [r1]!" NL 236 237 // Initializing local variables. 238 "mov " SCANLINE_R ", " WIDTH_R ", lsl #2" NL 239 "add " SCANLINE_R ", " SCANLINE_R ", #8" NL 240 "add " PIXELS_R ", " PIXELS_R ", " SCANLINE_R NL 241 "add " PIXELS_R ", " PIXELS_R ", #3" NL 242 "mov r0, #0" NL 243 "vmov.f32 " CONST_ZERO_S ", r0" NL 244 "vmov.f32 " POSITION_Y_S ", " CONST_ONE_S NL 245 "tst " FLAGS_R ", #" TOSTRING(FLAG_SPOT_LIGHT) NL 246 "vmov.f32 " SPOT_COLOR_Q ", " COLOR_Q NL 247 "mov " RESET_WIDTH_R ", " WIDTH_R NL 248 249 ".mainloop:" NL 250 "mov r3, #3" NL 251 "vmov.f32 " POSITION_X_S ", " CONST_ONE_S NL 252 253 ".scanline:" NL 254 // The ROW registers are storing the alpha channel of the last three pixels. 255 // The alpha channel is stored as signed short (sint16) values. The fourth value 256 // is garbage. The following instructions are shifting out the unnecessary alpha 257 // values and load the next ones. 258 "ldrb r0, [" PIXELS_R ", -" SCANLINE_R "]" NL 259 "ldrb r1, [" PIXELS_R ", +" SCANLINE_R "]" NL 260 "ldrb r2, [" PIXELS_R "], #4" NL 261 "vext.s16 " TOP_ROW_D ", " TOP_ROW_D ", " TOP_ROW_D ", #3" NL 262 "vext.s16 " MIDDLE_ROW_D ", " MIDDLE_ROW_D ", " MIDDLE_ROW_D ", #3" NL 263 "vext.s16 " BOTTOM_ROW_D ", " BOTTOM_ROW_D ", " BOTTOM_ROW_D ", #3" NL 264 "vmov.s16 " TOP_ROW_D "[1], r0" NL 265 "vmov.s16 " MIDDLE_ROW_D "[1], r2" NL 266 "vmov.s16 " BOTTOM_ROW_D "[1], r1" NL 267 268 // The two border pixels (rightmost and leftmost) are skipped when 269 // the next scanline is reached. It also jumps, when the algorithm 270 // is started, and the first free alpha values are loaded to each row. 271 "subs r3, r3, #1" NL 272 "bne .scanline" NL 273 274 // The light vector goes to TMP1_Q. It is constant in case of distant light. 275 // The fourth value contains the length of the light vector. 276 "tst " FLAGS_R ", #" TOSTRING(FLAG_POINT_LIGHT | FLAG_SPOT_LIGHT) NL 277 "beq .distantLight" NL 278 279 "vmov.s16 r3, " MIDDLE_ROW_D "[2]" NL 280 "vmov.f32 " POSITION_Z_S ", r3" NL 281 "vcvt.f32.s32 " POSITION_Z_S ", " POSITION_Z_S NL 282 "vmul.f32 " POSITION_Z_S ", " POSITION_Z_S ", " SCALE_S NL 283 284 "vsub.f32 " TMP1_Q ", " LIGHT_Q ", " POSITION_Q NL 285 GET_LENGTH(TMP1, TMP2) 286 287 "tst " FLAGS_R ", #" TOSTRING(FLAG_SPOT_LIGHT) NL 288 "bne .cosineOfAngle" NL 289 ".visiblePixel:" NL 290 291 // | -1 0 1 | | -1 -2 -1 | 292 // X = | -2 0 2 | Y = | 0 0 0 | 293 // | -1 0 1 | | 1 2 1 | 294 295 // Multiply the alpha values by the X and Y matrices. 296 297 // Moving the 8 alpha value to TMP3. 298 "vtbl.8 " TMP3_D0 ", " ALL_ROWS_D ", " REMAPX_D NL 299 "vtbl.8 " TMP3_D1 ", " ALL_ROWS_D ", " REMAPY_D NL 300 301 "vmul.s16 " TMP2_Q ", " TMP3_Q ", " ALPHAX_Q NL 302 "vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D1 NL 303 "vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D0 NL 304 "vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D0 NL 305 "vmov.s16 r0, " TMP2_D0 "[0]" NL 306 307 "vmul.s16 " TMP2_Q ", " TMP3_Q ", " ALPHAY_Q NL 308 "vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D1 NL 309 "vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D0 NL 310 "vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D0 NL 311 "vmov.s16 r1, " TMP2_D0 "[0]" NL 312 313 // r0 and r1 contains the X and Y coordinates of the 314 // normal vector, respectively. 315 316 // Calculating the spot light strength. 317 "tst " FLAGS_R ", #" TOSTRING(FLAG_SPOT_LIGHT) NL 318 "beq .endLight" NL 319 320 "vneg.f32 " TMP3_S1 ", " COSINE_OF_ANGLE NL 321 "tst " FLAGS_R ", #" TOSTRING(FLAG_CONE_EXPONENT_IS_1) NL 322 "beq .coneExpPowf" NL 323 ".coneExpPowfFinished:" NL 324 325 // Smoothing the cone edge if necessary. 326 "vcmp.f32 " COSINE_OF_ANGLE ", " CONE_FULL_LIGHT_S NL 327 "fmstat" NL 328 "bhi .cutOff" NL 329 ".cutOffFinished:" NL 330 331 "vmin.f32 " TMP3_D0 ", " TMP3_D0 ", " CONST_ONE_HI_D NL 332 "vmul.f32 " COLOR_Q ", " SPOT_COLOR_Q ", " TMP3_D0 "[1]" NL 333 334 ".endLight:" NL 335 // Summarize: 336 // r0 and r1 contains the normalVector. 337 // TMP1_Q contains the light vector and its length. 338 // COLOR_Q contains the color of the light vector. 339 340 // Test whether both r0 and r1 are zero (Normal vector is (0, 0, 1)). 341 "orrs r2, r0, r1" NL 342 "bne .normalVectorIsNonZero" NL 343 344 "tst " FLAGS_R ", #" TOSTRING(FLAG_SPECULAR_LIGHT) NL 345 "bne .specularLight1" NL 346 347 // Calculate diffuse light strength. 348 MULTIPLY_BY_DIFFUSE_CONST(TMP1_S2, TMP1_S3) 349 "b .lightStrengthCalculated" NL 350 351 ".specularLight1:" NL 352 // Calculating specular light strength. 353 "vadd.f32 " TMP1_S2 ", " TMP1_S2 ", " TMP1_S3 NL 354 GET_LENGTH(TMP1, TMP2) 355 356 // When the exponent is 1, we don't need to call an expensive powf function. 357 "tst " FLAGS_R ", #" TOSTRING(FLAG_SPECULAR_EXPONENT_IS_1) NL 358 "vdiveq.f32 " TMP2_S1 ", " TMP1_S2 ", " TMP1_S3 NL 359 "beq .specularExpPowf" NL 360 361 MULTIPLY_BY_DIFFUSE_CONST(TMP1_S2, TMP1_S3) 362 "b .lightStrengthCalculated" NL 363 364 ".normalVectorIsNonZero:" NL 365 // Normal vector goes to TMP2, and its length is calculated as well. 366 "vmov.s32 " TMP2_S0 ", r0" NL 367 "vcvt.f32.s32 " TMP2_S0 ", " TMP2_S0 NL 368 "vmul.f32 " TMP2_S0 ", " TMP2_S0 ", " SCALE_DIV4_S NL 369 "vmov.s32 " TMP2_S1 ", r1" NL 370 "vcvt.f32.s32 " TMP2_S1 ", " TMP2_S1 NL 371 "vmul.f32 " TMP2_S1 ", " TMP2_S1 ", " SCALE_DIV4_S NL 372 "vmov.f32 " TMP2_S2 ", " CONST_ONE_S NL 373 GET_LENGTH(TMP2, TMP3) 374 375 "tst " FLAGS_R ", #" TOSTRING(FLAG_SPECULAR_LIGHT) NL 376 "bne .specularLight2" NL 377 378 // Calculating diffuse light strength. 379 DOT_PRODUCT(TMP3, TMP2, TMP1) 380 MULTIPLY_BY_DIFFUSE_CONST(TMP3_S0, TMP3_S3) 381 "b .lightStrengthCalculated" NL 382 383 ".specularLight2:" NL 384 // Calculating specular light strength. 385 "vadd.f32 " TMP1_S2 ", " TMP1_S2 ", " TMP1_S3 NL 386 GET_LENGTH(TMP1, TMP3) 387 DOT_PRODUCT(TMP3, TMP2, TMP1) 388 389 // When the exponent is 1, we don't need to call an expensive powf function. 390 "tst " FLAGS_R ", #" TOSTRING(FLAG_SPECULAR_EXPONENT_IS_1) NL 391 "vdiveq.f32 " TMP2_S1 ", " TMP3_S0 ", " TMP3_S3 NL 392 "beq .specularExpPowf" NL 393 MULTIPLY_BY_DIFFUSE_CONST(TMP3_S0, TMP3_S3) 394 395 ".lightStrengthCalculated:" NL 396 // TMP2_S1 contains the light strength. Clamp it to [0, 1] 397 "vmax.f32 " TMP2_D0 ", " TMP2_D0 ", " CONST_ZERO_HI_D NL 398 "vmin.f32 " TMP2_D0 ", " TMP2_D0 ", " CONST_ONE_HI_D NL 399 "vmul.f32 " TMP3_Q ", " COLOR_Q ", " TMP2_D0 "[1]" NL 400 "vcvt.u32.f32 " TMP3_Q ", " TMP3_Q NL 401 "vmov.u32 r2, r3, " TMP3_S0 ", " TMP3_S1 NL 402 // The color values are stored in-place. 403 "strb r2, [" PIXELS_R ", #-11]" NL 404 "strb r3, [" PIXELS_R ", #-10]" NL 405 "vmov.u32 r2, " TMP3_S2 NL 406 "strb r2, [" PIXELS_R ", #-9]" NL 407 408 // Continue to the next pixel. 409 ".blackPixel:" NL 410 "vadd.f32 " POSITION_X_S ", " CONST_ONE_S NL 411 "mov r3, #1" NL 412 "subs " WIDTH_R ", " WIDTH_R ", #1" NL 413 "bne .scanline" NL 414 415 // If the end of the scanline is reached, we continue 416 // to the next scanline. 417 "vadd.f32 " POSITION_Y_S ", " CONST_ONE_S NL 418 "mov " WIDTH_R ", " RESET_WIDTH_R NL 419 "subs " HEIGHT_R ", " HEIGHT_R ", #1" NL 420 "bne .mainloop" NL 421 422 // Return. 423 "vldmia sp!, {d8-d15}" NL 424 "ldmia sp!, {r4-r8, r10, r11, pc}" NL 425 426 ".distantLight:" NL 427 // In case of distant light, the light vector is constant, 428 // we simply copy it. 429 "vmov.f32 " TMP1_Q ", " LIGHT_Q NL 430 "b .visiblePixel" NL 431 432 ".cosineOfAngle:" NL 433 // If the pixel is outside of the cone angle, it is simply a black pixel. 434 DOT_PRODUCT(TMP3, TMP1, DIRECTION) 435 "vdiv.f32 " COSINE_OF_ANGLE ", " TMP3_S0 ", " TMP1_S3 NL 436 "vcmp.f32 " COSINE_OF_ANGLE ", " CONE_CUT_OFF_S NL 437 "fmstat" NL 438 "bls .visiblePixel" NL 439 "mov r0, #0" NL 440 "strh r0, [" PIXELS_R ", #-11]" NL 441 "strb r0, [" PIXELS_R ", #-9]" NL 442 "b .blackPixel" NL 443 444 ".cutOff:" NL 445 // Smoothing the light strength on the cone edge. 446 "vsub.f32 " TMP3_S0 ", " CONE_CUT_OFF_S ", " COSINE_OF_ANGLE NL 447 "vdiv.f32 " TMP3_S0 ", " TMP3_S0 ", " CONE_CUT_OFF_RANGE_S NL 448 "vmul.f32 " TMP3_S1 ", " TMP3_S1 ", " TMP3_S0 NL 449 "b .cutOffFinished" NL 450 451 ".coneExpPowf:" NL 452 POWF(TMP3_S1, CONE_EXPONENT_R) 453 "b .coneExpPowfFinished" NL 454 455 ".specularExpPowf:" NL 456 POWF(TMP2_S1, SPECULAR_EXPONENT_R) 457 "tst " FLAGS_R ", #" TOSTRING(FLAG_DIFFUSE_CONST_IS_1) NL 458 "vmuleq.f32 " TMP2_S1 ", " TMP2_S1 ", " DIFFUSE_CONST_S NL 459 "b .lightStrengthCalculated" NL 460 ); // NOLINT 461 462 } // namespace WebCore 463 464 #endif // CPU(ARM_NEON) && COMPILER(GCC) 465