Home | History | Annotate | Download | only in opts
      1 /*
      2  * Copyright 2014 The Android Open Source Project
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #ifdef CRBUG_399842_FIXED
      9 
     10 #if defined(__clang__) || (defined(__GNUC__) && !defined(SK_BUILD_FOR_MAC))
     11 
     12 #define EXTRACT_ALPHA(var1, var2) \
     13     movdqa      %var1, %var2;           /* Clone source pixels to extract alpha */\
     14     psrlw       $8, %var2;              /* Discard red and blue, leaving alpha and green */\
     15     pshufhw     $0xF5, %var2, %var2;    /* Repeat alpha for scaling (high) */\
     16     movdqa      %xmm6, %xmm4;           \
     17     pshuflw     $0xF5, %var2, %var2;    /* Repeat alpha for scaling (low) */\
     18     movdqa      %xmm5, %xmm3;           \
     19     psubw       %var2, %xmm4            /* Finalize alpha calculations */
     20 
     21 #define SCALE_PIXELS \
     22     psllw       $8, %xmm5;              /* Filter out red and blue components */\
     23     pmulhuw     %xmm4, %xmm5;           /* Scale red and blue */\
     24     psrlw       $8, %xmm3;              /* Filter out alpha and green components */\
     25     pmullw      %xmm4, %xmm3            /* Scale alpha and green */
     26 
     27 
     28 /*
     29  * void S32A_Opaque_BlitRow32_SSE4(SkPMColor* SK_RESTRICT dst,
     30  *                                 const SkPMColor* SK_RESTRICT src,
     31  *                                 int count, U8CPU alpha)
     32  *
     33  * This function is divided into six blocks: initialization, blit 4-15 pixels,
     34  * blit 0-3 pixels, align destination for 16+ pixel blits,
     35  * blit 16+ pixels with source unaligned, blit 16+ pixels with source aligned.
     36  * There are some code reuse between the blocks.
     37  *
     38  * The primary optimization comes from checking the source pixels' alpha value.
     39  * If the alpha is zero, the pixel can be skipped entirely.
     40  * If the alpha is fully opaque, the pixel can be copied directly to the destination.
     41  * According to collected statistics, these two cases are the most common.
     42  * The main loop(s) uses pre-loading and unrolling in an attempt to reduce the
     43  * memory latency worse-case.
     44  */
     45 
     46 #ifdef __clang__
     47     .text
     48 #else
     49     .section .text.sse4.2,"ax",@progbits
     50     .type S32A_Opaque_BlitRow32_SSE4_asm, @function
     51 #endif
     52     .p2align 4
     53 #if defined(SK_BUILD_FOR_MAC)
     54     .global _S32A_Opaque_BlitRow32_SSE4_asm
     55     .private_extern _S32A_Opaque_BlitRow32_SSE4_asm
     56 _S32A_Opaque_BlitRow32_SSE4_asm:
     57 #else
     58     .global S32A_Opaque_BlitRow32_SSE4_asm
     59     .hidden S32A_Opaque_BlitRow32_SSE4_asm
     60 S32A_Opaque_BlitRow32_SSE4_asm:
     61 #endif
     62     .cfi_startproc
     63     prefetcht0  (%rsi)
     64     movl        %edx, %ecx              // Pixel count
     65     movq        %rdi, %rdx              // Destination pointer
     66     movq        %rsi, %rax              // Source pointer
     67 
     68     // Setup SSE constants
     69     movdqa      .LAlphaCheckMask(%rip), %xmm7  // 0xFF000000 mask to check alpha
     70     movdqa      .LInverseAlphaCalc(%rip), %xmm6// 16-bit 256 to calculate inv. alpha
     71     movdqa      .LResultMergeMask(%rip), %xmm0 // 0x00FF00FF mask (Must be in xmm0 because of pblendvb)
     72 
     73     subl        $4, %ecx                // Check if we have only 0-3 pixels
     74     js          .LReallySmall
     75     cmpl        $11, %ecx               // Do we have enough pixels to run the main loop?
     76     ja          .LBigBlit
     77 
     78     // Handle small blits (4-15 pixels)
     79     ////////////////////////////////////////////////////////////////////////////////
     80     xorq        %rdi, %rdi              // Reset offset to zero
     81 
     82 .LSmallLoop:
     83     lddqu       (%rax, %rdi), %xmm1     // Load four source pixels
     84     ptest       %xmm7, %xmm1            // Check if all alphas are zero or opaque
     85     ja          .LSmallAlphaNotOpaqueOrZero
     86     jz          .LSmallAlphaZero
     87     movdqu      %xmm1, (%rdx, %rdi)     // Store four destination pixels
     88 .LSmallAlphaZero:
     89     addq        $16, %rdi
     90     subl        $4, %ecx                // Check if there are four additional pixels, at least
     91     jns         .LSmallLoop
     92     jmp         .LSmallRemaining
     93 
     94     // Handle mixed alphas (calculate and scale)
     95     .p2align 4
     96 .LSmallAlphaNotOpaqueOrZero:
     97     lddqu       (%rdx, %rdi), %xmm5     // Load four destination pixels
     98     EXTRACT_ALPHA(xmm1, xmm2)           // Extract and clone alpha value
     99     SCALE_PIXELS                        // Scale pixels using alpha
    100 
    101     addq        $16, %rdi
    102     subl        $4, %ecx                // Check if there are four additional pixels, at least
    103     pblendvb    %xmm5, %xmm3            // Mask in %xmm0, implicitly
    104     paddb       %xmm3, %xmm1            // Add source and destination pixels together
    105     movdqu      %xmm1, -16(%rdx, %rdi)  // Store four destination pixels
    106     jns         .LSmallLoop
    107 
    108     // Handle the last 0-3 pixels (also used by the main loops)
    109 .LSmallRemaining:
    110     cmpl        $-4, %ecx               // Check if we are done
    111     je          .LSmallExit
    112     sall        $2, %ecx                // Calculate offset for last pixels
    113     movslq      %ecx, %rcx
    114     addq        %rcx, %rdi
    115 
    116     lddqu       (%rax, %rdi), %xmm1     // Load last four source pixels (overlapping)
    117     ptest       %xmm7, %xmm1            // Check if all alphas are zero or opaque
    118     jc          .LSmallRemainingStoreAll// If all alphas are opaque, just store (overlapping)
    119     jz          .LSmallExit             // If all alphas are zero, skip the pixels completely
    120 
    121     // Handle mixed alphas (calculate and scale)
    122     lddqu       (%rdx, %rdi), %xmm5     // Load last four destination pixels (overlapping)
    123     EXTRACT_ALPHA(xmm1, xmm2)           // Extract and clone alpha value
    124 
    125     psllw       $8, %xmm3               // Filter out red and blue components
    126     pmulhuw     %xmm4, %xmm3            // Scale red and blue
    127     movdqa      %xmm5, %xmm2
    128     psrlw       $8, %xmm2               // Filter out alpha and green components
    129     pmullw      %xmm4, %xmm2            // Scale alpha and green
    130 
    131     cmpl        $-8, %ecx               // Check how many pixels should be written
    132     pblendvb    %xmm3, %xmm2            // Combine results (mask in %xmm0, implicitly)
    133     paddb       %xmm2, %xmm1            // Add source and destination pixels together
    134     jb          .LSmallPixelsLeft1
    135     ja          .LSmallPixelsLeft3      // To avoid double-blending the overlapping pixels...
    136     pblendw     $0xF0, %xmm1, %xmm5     // Merge only the final two pixels to the destination
    137     movdqu      %xmm5, (%rdx, %rdi)     // Store last two destination pixels
    138 .LSmallExit:
    139     ret
    140 
    141 .LSmallPixelsLeft1:
    142     pblendw     $0xC0, %xmm1, %xmm5     // Merge only the final pixel to the destination
    143     movdqu      %xmm5, (%rdx, %rdi)     // Store last destination pixel
    144     ret
    145 
    146 .LSmallPixelsLeft3:
    147     pblendw     $0xFC, %xmm1, %xmm5     // Merge only the final three pixels to the destination
    148     movdqu      %xmm5, (%rdx, %rdi)     // Store last three destination pixels
    149     ret
    150 
    151 .LSmallRemainingStoreAll:
    152     movdqu      %xmm1, (%rdx, %rdi)     // Store last destination pixels (overwrite)
    153     ret
    154 
    155     // Handle really small blits (0-3 pixels)
    156     ////////////////////////////////////////////////////////////////////////////////
    157 .LReallySmall:
    158     addl        $4, %ecx
    159     jle         .LReallySmallExit
    160     pcmpeqd     %xmm1, %xmm1
    161     cmpl        $2, %ecx                // Check how many pixels should be read
    162     pinsrd      $0x0, (%rax), %xmm1     // Load one source pixel
    163     pinsrd      $0x0, (%rdx), %xmm5     // Load one destination pixel
    164     jb          .LReallySmallCalc
    165     pinsrd      $0x1, 4(%rax), %xmm1    // Load second source pixel
    166     pinsrd      $0x1, 4(%rdx), %xmm5    // Load second destination pixel
    167     je          .LReallySmallCalc
    168     pinsrd      $0x2, 8(%rax), %xmm1    // Load third source pixel
    169     pinsrd      $0x2, 8(%rdx), %xmm5    // Load third destination pixel
    170 
    171 .LReallySmallCalc:
    172     ptest       %xmm7, %xmm1            // Check if all alphas are opaque
    173     jc          .LReallySmallStore      // If all alphas are opaque, just store
    174 
    175     // Handle mixed alphas (calculate and scale)
    176     EXTRACT_ALPHA(xmm1, xmm2)           // Extract and clone alpha value
    177 
    178     pand        %xmm0, %xmm5            // Filter out red and blue components
    179     pmullw      %xmm4, %xmm5            // Scale red and blue
    180     psrlw       $8, %xmm3               // Filter out alpha and green components
    181     pmullw      %xmm4, %xmm3            // Scale alpha and green
    182 
    183     psrlw       $8, %xmm5               // Combine results
    184     pblendvb    %xmm5, %xmm3            // Mask in %xmm0, implicitly
    185     paddb       %xmm3, %xmm1            // Add source and destination pixels together
    186 
    187 .LReallySmallStore:
    188     cmpl        $2, %ecx                // Check how many pixels should be written
    189     pextrd      $0x0, %xmm1, (%rdx)     // Store one destination pixel
    190     jb          .LReallySmallExit
    191     pextrd      $0x1, %xmm1, 4(%rdx)    // Store second destination pixel
    192     je          .LReallySmallExit
    193     pextrd      $0x2, %xmm1, 8(%rdx)    // Store third destination pixel
    194 .LReallySmallExit:
    195     ret
    196 
    197     // Handle bigger blit operations (16+ pixels)
    198     ////////////////////////////////////////////////////////////////////////////////
    199     .p2align 4
    200 .LBigBlit:
    201     // Align destination?
    202     testl       $0xF, %edx
    203     lddqu       (%rax), %xmm1           // Pre-load four source pixels
    204     jz          .LAligned
    205 
    206     movq        %rdx, %rdi              // Calculate alignment of destination pointer
    207     negq        %rdi
    208     andl        $0xF, %edi
    209 
    210     // Handle 1-3 pixels to align destination
    211     ptest       %xmm7, %xmm1            // Check if all alphas are zero or opaque
    212     jz          .LAlignDone             // If all alphas are zero, just skip
    213     lddqu       (%rdx), %xmm5           // Load four destination pixels
    214     jc          .LAlignStore            // If all alphas are opaque, just store
    215 
    216     // Handle mixed alphas (calculate and scale)
    217     EXTRACT_ALPHA(xmm1, xmm2)           // Extract and clone alpha value
    218 
    219     psllw       $8, %xmm3               // Filter out red and blue components
    220     pmulhuw     %xmm4, %xmm3            // Scale red and blue
    221     movdqa      %xmm5, %xmm2
    222     psrlw       $8, %xmm2               // Filter out alpha and green components
    223     pmullw      %xmm4, %xmm2            // Scale alpha and green
    224 
    225     pblendvb    %xmm3, %xmm2            // Combine results (mask in %xmm0, implicitly)
    226     paddb       %xmm2, %xmm1            // Add source and destination pixels together
    227 
    228 .LAlignStore:
    229     cmpl        $8, %edi                // Check how many pixels should be written
    230     jb          .LAlignPixelsLeft1
    231     ja          .LAlignPixelsLeft3
    232     pblendw     $0x0F, %xmm1, %xmm5     // Blend two pixels
    233     jmp .LAlignStorePixels
    234 
    235 .LAlignPixelsLeft1:
    236     pblendw     $0x03, %xmm1, %xmm5     // Blend one pixel
    237     jmp .LAlignStorePixels
    238 
    239 .LAlignPixelsLeft3:
    240     pblendw     $0x3F, %xmm1, %xmm5     // Blend three pixels
    241 
    242 .LAlignStorePixels:
    243     movdqu      %xmm5, (%rdx)           // Store destination pixels
    244 
    245 .LAlignDone:
    246     addq        %rdi, %rax              // Adjust pointers and pixel count
    247     addq        %rdi, %rdx
    248     shrq        $2, %rdi
    249     lddqu       (%rax), %xmm1           // Pre-load new source pixels (after alignment)
    250     subl        %edi, %ecx
    251 
    252 .LAligned:                              // Destination is guaranteed to be 16 byte aligned
    253     xorq        %rdi, %rdi              // Reset offset to zero
    254     subl        $8, %ecx                // Decrease counter (Reserve four pixels for the cleanup)
    255     testl       $0xF, %eax              // Check alignment of source pointer
    256     jz          .LAlignedLoop
    257 
    258     // Source not aligned to destination
    259     ////////////////////////////////////////////////////////////////////////////////
    260     .p2align 4
    261 .LUnalignedLoop:                        // Main loop for unaligned, handles eight pixels per iteration
    262     ptest       %xmm7, %xmm1            // Check if all alphas are zero or opaque
    263     ja          .LAlphaNotOpaqueOrZero00
    264     lddqu       16(%rax, %rdi), %xmm2   // Pre-load four source pixels
    265     jz          .LAlphaZero00
    266     movdqa      %xmm1, (%rdx, %rdi)     // Store four destination pixels
    267 
    268 .LAlphaZero00:
    269     ptest       %xmm7, %xmm2            // Check if all alphas are zero or opaque
    270     ja          .LAlphaNotOpaqueOrZero01
    271     lddqu       32(%rax, %rdi), %xmm1   // Pre-load four source pixels
    272     jz          .LAlphaZero01
    273     movdqa      %xmm2, 16(%rdx, %rdi)   // Store four destination pixels
    274 
    275 .LAlphaZero01:
    276     addq        $32, %rdi               // Adjust offset and pixel count
    277     subl        $8, %ecx
    278     jae         .LUnalignedLoop
    279     addl        $8, %ecx                // Adjust pixel count
    280     jmp         .LLoopCleanup0
    281 
    282     .p2align 4
    283 .LAlphaNotOpaqueOrZero00:
    284     movdqa      (%rdx, %rdi), %xmm5     // Load four destination pixels
    285     EXTRACT_ALPHA(xmm1, xmm2)           // Extract and clone alpha value
    286     SCALE_PIXELS                        // Scale pixels using alpha
    287 
    288     lddqu       16(%rax, %rdi), %xmm2   // Pre-load four source pixels
    289     pblendvb    %xmm5, %xmm3            // Combine results (mask in %xmm0, implicitly)
    290     paddb       %xmm3, %xmm1            // Add source and destination pixels together
    291     movdqa      %xmm1, (%rdx, %rdi)     // Store four destination pixels
    292 
    293     // Handle next four pixels
    294     ptest       %xmm7, %xmm2            // Check if all alphas are zero or opaque
    295     ja          .LAlphaNotOpaqueOrZero01
    296     lddqu       32(%rax, %rdi), %xmm1   // Pre-load four source pixels
    297     jz          .LAlphaZero02
    298     movdqa      %xmm2, 16(%rdx, %rdi)   // Store four destination pixels
    299 .LAlphaZero02:
    300     addq        $32, %rdi               // Adjust offset and pixel count
    301     subl        $8, %ecx
    302     jae         .LUnalignedLoop
    303     addl        $8, %ecx                // Adjust pixel count
    304     jmp         .LLoopCleanup0
    305 
    306     .p2align 4
    307 .LAlphaNotOpaqueOrZero01:
    308     movdqa      16(%rdx, %rdi), %xmm5   // Load four destination pixels
    309     EXTRACT_ALPHA(xmm2, xmm1)           // Extract and clone alpha value
    310     SCALE_PIXELS                        // Scale pixels using alpha
    311 
    312     lddqu       32(%rax, %rdi), %xmm1   // Pre-load four source pixels
    313     addq        $32, %rdi
    314     pblendvb    %xmm5, %xmm3            // Combine results (mask in %xmm0, implicitly)
    315     paddb       %xmm3, %xmm2            // Add source and destination pixels together
    316     subl        $8, %ecx
    317     movdqa      %xmm2, -16(%rdx, %rdi)  // Store four destination pixels
    318     jae         .LUnalignedLoop
    319     addl        $8, %ecx                // Adjust pixel count
    320 
    321     // Cleanup - handle pending pixels from loop
    322 .LLoopCleanup0:
    323     ptest       %xmm7, %xmm1            // Check if all alphas are zero or opaque
    324     ja          .LAlphaNotOpaqueOrZero02
    325     jz          .LAlphaZero03
    326     movdqa      %xmm1, (%rdx, %rdi)     // Store four destination pixels
    327 .LAlphaZero03:
    328     addq        $16, %rdi
    329     subl        $4, %ecx
    330     js          .LSmallRemaining        // Reuse code from small loop
    331 
    332 .LRemain0:
    333     lddqu       (%rax, %rdi), %xmm1     // Load four source pixels
    334     ptest       %xmm7, %xmm1            // Check if all alphas are zero or opaque
    335     ja          .LAlphaNotOpaqueOrZero02
    336     jz          .LAlphaZero04
    337     movdqa      %xmm1, (%rdx, %rdi)     // Store four destination pixels
    338 .LAlphaZero04:
    339     addq        $16, %rdi
    340     subl        $4, %ecx
    341     jmp         .LSmallRemaining        // Reuse code from small loop
    342 
    343 .LAlphaNotOpaqueOrZero02:
    344     movdqa      (%rdx, %rdi), %xmm5     // Load four destination pixels
    345     EXTRACT_ALPHA(xmm1, xmm2)           // Extract and clone alpha value
    346     SCALE_PIXELS                        // Scale pixels using alpha
    347 
    348     addq        $16, %rdi
    349     subl        $4, %ecx
    350     pblendvb    %xmm5, %xmm3            // Combine results (mask in %xmm0, implicitly)
    351     paddb       %xmm3, %xmm1            // Add source and destination pixels together
    352     movdqa      %xmm1, -16(%rdx, %rdi)  // Store four destination pixels
    353     js          .LSmallRemaining        // Reuse code from small loop
    354     jmp         .LRemain0
    355 
    356     // Source aligned to destination
    357     ////////////////////////////////////////////////////////////////////////////////
    358     .p2align 4
    359 .LAlignedLoop:                          // Main loop for aligned, handles eight pixels per iteration
    360     ptest       %xmm7, %xmm1            // Check if all alphas are zero or opaque
    361     ja          .LAlphaNotOpaqueOrZero10
    362     movdqa      16(%rax, %rdi), %xmm2   // Pre-load four source pixels
    363     jz          .LAlphaZero10
    364     movdqa      %xmm1, (%rdx, %rdi)     // Store four destination pixels
    365 
    366 .LAlphaZero10:
    367     ptest       %xmm7, %xmm2            // Check if all alphas are zero or opaque
    368     ja          .LAlphaNotOpaqueOrZero11
    369     movdqa      32(%rax, %rdi), %xmm1   // Pre-load four source pixels
    370     jz          .LAlphaZero11
    371     movdqa      %xmm2, 16(%rdx, %rdi)   // Store four destination pixels
    372 
    373 .LAlphaZero11:
    374     addq        $32, %rdi               // Adjust offset and pixel count
    375     subl        $8, %ecx
    376     jae         .LAlignedLoop
    377     addl        $8, %ecx                // Adjust pixel count
    378     jmp         .LLoopCleanup1
    379 
    380     .p2align 4
    381 .LAlphaNotOpaqueOrZero10:
    382     movdqa      (%rdx, %rdi), %xmm5     // Load four destination pixels
    383     EXTRACT_ALPHA(xmm1, xmm2)           // Extract and clone alpha value
    384     SCALE_PIXELS                        // Scale pixels using alpha
    385 
    386     movdqa      16(%rax, %rdi), %xmm2   // Pre-load four source pixels
    387     pblendvb    %xmm5, %xmm3            // Combine results (mask in %xmm0, implicitly)
    388     paddb       %xmm3, %xmm1            // Add source and destination pixels together
    389     movdqa      %xmm1, (%rdx, %rdi)     // Store four destination pixels
    390 
    391     // Handle next four pixels
    392     ptest       %xmm7, %xmm2            // Check if all alphas are zero or opaque
    393     ja          .LAlphaNotOpaqueOrZero11
    394     movdqa      32(%rax, %rdi), %xmm1   // Pre-load four source pixels
    395     jz          .LAlphaZero12
    396     movdqa      %xmm2, 16(%rdx, %rdi)   // Store four destination pixels
    397 .LAlphaZero12:
    398     addq        $32, %rdi               // Adjust offset and pixel count
    399     subl        $8, %ecx
    400     jae         .LAlignedLoop
    401     addl        $8, %ecx                // Adjust pixel count
    402     jmp         .LLoopCleanup1
    403 
    404     .p2align 4
    405 .LAlphaNotOpaqueOrZero11:
    406     movdqa      16(%rdx, %rdi), %xmm5   // Load four destination pixels
    407     EXTRACT_ALPHA(xmm2, xmm1)           // Extract and clone alpha value
    408     SCALE_PIXELS                        // Scale pixels using alpha
    409     movdqa      32(%rax, %rdi), %xmm1   // Pre-load four source pixels
    410 
    411     addq        $32, %rdi
    412     pblendvb    %xmm5, %xmm3            // Combine results (mask in %xmm0, implicitly)
    413     paddb       %xmm3, %xmm2            // Add source and destination pixels together
    414     subl        $8, %ecx
    415     movdqa      %xmm2, -16(%rdx, %rdi)  // Store four destination pixels
    416     jae         .LAlignedLoop
    417     addl        $8, %ecx                // Adjust pixel count
    418 
    419     // Cleanup - handle four pending pixels from loop
    420 .LLoopCleanup1:
    421     ptest       %xmm7, %xmm1            // Check if all alphas are zero or opaque
    422     ja          .LAlphaNotOpaqueOrZero12
    423     jz          .LAlphaZero13
    424     movdqa      %xmm1, (%rdx, %rdi)     // Store four destination pixels
    425 .LAlphaZero13:
    426     addq        $16, %rdi
    427     subl        $4, %ecx
    428     js          .LSmallRemaining        // Reuse code from small loop
    429 
    430 .LRemain1:
    431     movdqa      (%rax, %rdi), %xmm1     // Pre-load four source pixels
    432     ptest       %xmm7, %xmm1            // Check if all alphas are zero or opaque
    433     ja          .LAlphaNotOpaqueOrZero12
    434     jz          .LAlphaZero14
    435     movdqa      %xmm1, (%rdx, %rdi)     // Store four destination pixels
    436 .LAlphaZero14:
    437     addq        $16, %rdi
    438     subl        $4, %ecx
    439     jmp         .LSmallRemaining        // Reuse code from small loop
    440 
    441 .LAlphaNotOpaqueOrZero12:
    442     movdqa      (%rdx, %rdi), %xmm5     // Load four destination pixels
    443     EXTRACT_ALPHA(xmm1, xmm2)           // Extract and clone alpha value
    444     SCALE_PIXELS                        // Scale pixels using alpha
    445 
    446     addq        $16, %rdi
    447     subl        $4, %ecx
    448     pblendvb    %xmm5, %xmm3            // Combine results (mask in %xmm0, implicitly)
    449     paddb       %xmm3, %xmm1            // Add source and destination pixels together
    450     movdqa      %xmm1, -16(%rdx, %rdi)  // Store four destination pixels
    451     js          .LSmallRemaining        // Reuse code from small loop
    452     jmp         .LRemain1
    453 
    454     .cfi_endproc
    455 #ifndef __clang__
    456     .size S32A_Opaque_BlitRow32_SSE4_asm, .-S32A_Opaque_BlitRow32_SSE4_asm
    457 #endif
    458 
    459     // Constants for SSE code
    460 #ifndef __clang__
    461     .section .rodata
    462 #endif
    463     .p2align 4
    464 .LAlphaCheckMask:
    465     .long   0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000
    466 .LInverseAlphaCalc:
    467     .word   256, 256, 256, 256, 256, 256, 256, 256
    468 .LResultMergeMask:
    469     .long   0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF
    470 #endif
    471 
    472 #endif // CRBUG_399842_FIXED
    473