Home | History | Annotate | Download | only in libpixelflinger
      1 /* libs/pixelflinger/t32cb16blend.S
      2 **
      3 ** Copyright 2006, The Android Open Source Project
      4 **
      5 ** Licensed under the Apache License, Version 2.0 (the "License");
      6 ** you may not use this file except in compliance with the License.
      7 ** You may obtain a copy of the License at
      8 **
      9 **     http://www.apache.org/licenses/LICENSE-2.0
     10 **
     11 ** Unless required by applicable law or agreed to in writing, software
     12 ** distributed under the License is distributed on an "AS IS" BASIS,
     13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 ** See the License for the specific language governing permissions and
     15 ** limitations under the License.
     16 */
     17 
     18 
     19 	.text
     20 	.align
     21 
     22 	.global scanline_t32cb16blend_arm
     23 
     24 
     25 /*
     26  * .macro pixel
     27  *
     28  * \DREG is a 32-bit register containing *two* original destination RGB565
     29  *       pixels, with the even one in the low-16 bits, and the odd one in the
     30  *       high 16 bits.
     31  *
     32  * \SRC is a 32-bit 0xAABBGGRR pixel value, with pre-multiplied colors.
     33  *
     34  * \FB is a target register that will contain the blended pixel values.
     35  *
     36  * \ODD is either 0 or 1 and indicates if we're blending the lower or
     37  *      upper 16-bit pixels in DREG into FB
     38  *
     39  *
     40  * clobbered: r6, r7, lr
     41  *
     42  */
     43 
     44 .macro pixel,   DREG, SRC, FB, ODD
     45 
     46     // SRC = 0xAABBGGRR
     47     mov     r7, \SRC, lsr #24           // sA
     48     add     r7, r7, r7, lsr #7          // sA + (sA >> 7)
     49     rsb     r7, r7, #0x100              // sA = 0x100 - (sA+(sA>>7))
     50 
     51 1:
     52 
     53 .if \ODD
     54 
     55     // red
     56     mov     lr, \DREG, lsr #(16 + 11)
     57     smulbb  lr, r7, lr
     58     mov     r6, \SRC, lsr #3
     59     and     r6, r6, #0x1F
     60     add     lr, r6, lr, lsr #8
     61     cmp     lr, #0x1F
     62     orrhs   \FB, \FB, #(0x1F<<(16 + 11))
     63     orrlo   \FB, \FB, lr, lsl #(16 + 11)
     64 
     65         // green
     66         and     r6, \DREG, #(0x3F<<(16 + 5))
     67         smulbt  r6, r7, r6
     68         mov     lr, \SRC, lsr #(8+2)
     69         and     lr, lr, #0x3F
     70         add     r6, lr, r6, lsr #(5+8)
     71         cmp     r6, #0x3F
     72         orrhs   \FB, \FB, #(0x3F<<(16 + 5))
     73         orrlo   \FB, \FB, r6, lsl #(16 + 5)
     74 
     75             // blue
     76             and     lr, \DREG, #(0x1F << 16)
     77             smulbt  lr, r7, lr
     78             mov     r6, \SRC, lsr #(8+8+3)
     79             and     r6, r6, #0x1F
     80             add     lr, r6, lr, lsr #8
     81             cmp     lr, #0x1F
     82             orrhs   \FB, \FB, #(0x1F << 16)
     83             orrlo   \FB, \FB, lr, lsl #16
     84 
     85 .else
     86 
     87     // red
     88     mov     lr, \DREG, lsr #11
     89     and     lr, lr, #0x1F
     90     smulbb  lr, r7, lr
     91     mov     r6, \SRC, lsr #3
     92     and     r6, r6, #0x1F
     93     add     lr, r6, lr, lsr #8
     94     cmp     lr, #0x1F
     95     movhs   \FB, #(0x1F<<11)
     96     movlo   \FB, lr, lsl #11
     97 
     98 
     99         // green
    100         and     r6, \DREG, #(0x3F<<5)
    101         smulbb  r6, r7, r6
    102         mov     lr, \SRC, lsr #(8+2)
    103         and     lr, lr, #0x3F
    104         add     r6, lr, r6, lsr #(5+8)
    105         cmp     r6, #0x3F
    106         orrhs   \FB, \FB, #(0x3F<<5)
    107         orrlo   \FB, \FB, r6, lsl #5
    108 
    109             // blue
    110             and     lr, \DREG, #0x1F
    111             smulbb  lr, r7, lr
    112             mov     r6, \SRC, lsr #(8+8+3)
    113             and     r6, r6, #0x1F
    114             add     lr, r6, lr, lsr #8
    115             cmp     lr, #0x1F
    116             orrhs   \FB, \FB, #0x1F
    117             orrlo   \FB, \FB, lr
    118 
    119 .endif
    120 
    121     .endm
    122 
    123 
    124 // r0:  dst ptr
    125 // r1:  src ptr
    126 // r2:  count
    127 // r3:  d
    128 // r4:  s0
    129 // r5:  s1
    130 // r6:  pixel
    131 // r7:  pixel
    132 // r8:  free
    133 // r9:  free
    134 // r10: free
    135 // r11: free
    136 // r12: scratch
    137 // r14: pixel
    138 
    139 scanline_t32cb16blend_arm:
    140     stmfd	sp!, {r4-r7, lr}
    141 
    142     pld     [r0]
    143     pld     [r1]
    144 
    145     // align DST to 32 bits
    146     tst     r0, #0x3
    147     beq     aligned
    148     subs    r2, r2, #1
    149     ldmlofd	sp!, {r4-r7, lr}        // return
    150     bxlo    lr
    151 
    152 last:
    153     ldr     r4, [r1], #4
    154     ldrh    r3, [r0]
    155     pixel   r3, r4, r12, 0
    156     strh    r12, [r0], #2
    157 
    158 aligned:
    159     subs    r2, r2, #2
    160     blo     9f
    161 
    162     // The main loop is unrolled twice and processes 4 pixels
    163 8:  ldmia   r1!, {r4, r5}
    164     // stream the source
    165     pld     [r1, #32]
    166     add     r0, r0, #4
    167     // it's all zero, skip this pixel
    168     orrs    r3, r4, r5
    169     beq     7f
    170 
    171     // load the destination
    172     ldr     r3, [r0, #-4]
    173     // stream the destination
    174     pld     [r0, #32]
    175     pixel   r3, r4, r12, 0
    176     pixel   r3, r5, r12, 1
    177     // effectively, we're getting write-combining by virtue of the
    178     // cpu's write-back cache.
    179     str     r12, [r0, #-4]
    180 
    181     // 2nd iterration of the loop, don't stream anything
    182     subs    r2, r2, #2
    183     movlt   r4, r5
    184     blt     9f
    185     ldmia   r1!, {r4, r5}
    186     add     r0, r0, #4
    187     orrs    r3, r4, r5
    188     beq     7f
    189     ldr     r3, [r0, #-4]
    190     pixel   r3, r4, r12, 0
    191     pixel   r3, r5, r12, 16
    192     str     r12, [r0, #-4]
    193 
    194 
    195 7:  subs    r2, r2, #2
    196     bhs     8b
    197     mov     r4, r5
    198 
    199 9:  adds    r2, r2, #1
    200     ldmlofd sp!, {r4-r7, lr}        // return
    201     bxlo    lr
    202     b       last
    203