Home | History | Annotate | Download | only in arch-arm64
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  * All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  *  * Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  *  * Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in
     12  *    the documentation and/or other materials provided with the
     13  *    distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     16  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     17  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     18  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
     19  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
     22  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
     23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
     25  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28     .text
     29     .balign 0
     30 
     31     .global scanline_t32cb16blend_arm64
     32 
     33 /*
     34  * .macro pixel
     35  *
     36  *  This macro alpha blends RGB565 original pixel located in either
     37  *  top or bottom 16 bits of DREG register with SRC 32 bit pixel value
     38  *  and writes the result to FB register
     39  *
     40  * \DREG is a 32-bit register containing *two* original destination RGB565
     41  *       pixels, with the even one in the low-16 bits, and the odd one in the
     42  *       high 16 bits.
     43  *
     44  * \SRC is a 32-bit 0xAABBGGRR pixel value, with pre-multiplied colors.
     45  *
     46  * \FB is a target register that will contain the blended pixel values.
     47  *
     48  * \ODD is either 0 or 1 and indicates if we're blending the lower or
     49  *      upper 16-bit pixels in DREG into FB
     50  *
     51  *
     52  * clobbered: w6, w7, w16, w17, w18
     53  *
     54  */
     55 
     56 .macro pixel,   DREG, SRC, FB, ODD
     57 
     58     // SRC = 0xAABBGGRR
     59     lsr     w7, \SRC, #24               // sA
     60     add     w7, w7, w7, lsr #7          // sA + (sA >> 7)
     61     mov     w6, #0x100
     62     sub     w7, w6, w7                  // sA = 0x100 - (sA+(sA>>7))
     63 
     64 1:
     65 
     66 .if \ODD //Blending odd pixel present in top 16 bits of DREG register
     67 
     68     // red
     69     lsr     w16, \DREG, #(16 + 11)
     70     mul     w16, w7, w16
     71     lsr     w6, \SRC, #3
     72     and     w6, w6, #0x1F
     73     add     w16, w6, w16, lsr #8
     74     cmp     w16, #0x1F
     75     orr     w17, \FB, #(0x1F<<(16 + 11))
     76     orr     w18, \FB, w16, lsl #(16 + 11)
     77     csel    \FB, w17, w18, hi
     78         // green
     79         and     w6, \DREG, #(0x3F<<(16 + 5))
     80         lsr     w17,w6,#(16+5)
     81         mul     w6, w7, w17
     82         lsr     w16, \SRC, #(8+2)
     83         and     w16, w16, #0x3F
     84         add     w6, w16, w6, lsr #8
     85         cmp     w6, #0x3F
     86         orr     w17, \FB, #(0x3F<<(16 + 5))
     87         orr     w18, \FB, w6, lsl #(16 + 5)
     88         csel    \FB, w17, w18, hi
     89             // blue
     90             and     w16, \DREG, #(0x1F << 16)
     91             lsr     w17,w16,#16
     92             mul     w16, w7, w17
     93             lsr     w6, \SRC, #(8+8+3)
     94             and     w6, w6, #0x1F
     95             add     w16, w6, w16, lsr #8
     96             cmp     w16, #0x1F
     97             orr     w17, \FB, #(0x1F << 16)
     98             orr     w18, \FB, w16, lsl #16
     99             csel    \FB, w17, w18, hi
    100 
    101 .else //Blending even pixel present in bottom 16 bits of DREG register
    102 
    103     // red
    104     lsr     w16, \DREG, #11
    105     and     w16, w16, #0x1F
    106     mul     w16, w7, w16
    107     lsr     w6, \SRC, #3
    108     and     w6, w6, #0x1F
    109     add     w16, w6, w16, lsr #8
    110     cmp     w16, #0x1F
    111     mov     w17, #(0x1F<<11)
    112     lsl     w18, w16, #11
    113     csel    \FB, w17, w18, hi
    114 
    115 
    116         // green
    117         and     w6, \DREG, #(0x3F<<5)
    118         mul     w6, w7, w6
    119         lsr     w16, \SRC, #(8+2)
    120         and     w16, w16, #0x3F
    121         add     w6, w16, w6, lsr #(5+8)
    122         cmp     w6, #0x3F
    123         orr     w17, \FB, #(0x3F<<5)
    124         orr     w18, \FB, w6, lsl #5
    125         csel    \FB, w17, w18, hi
    126 
    127             // blue
    128             and     w16, \DREG, #0x1F
    129             mul     w16, w7, w16
    130             lsr     w6, \SRC, #(8+8+3)
    131             and     w6, w6, #0x1F
    132             add     w16, w6, w16, lsr #8
    133             cmp     w16, #0x1F
    134             orr     w17, \FB, #0x1F
    135             orr     w18, \FB, w16
    136             csel    \FB, w17, w18, hi
    137 
    138 .endif // End of blending even pixel
    139 
    140 .endm // End of pixel macro
    141 
    142 
    143 // x0:  dst ptr
    144 // x1:  src ptr
    145 // w2:  count
    146 // w3:  d
    147 // w4:  s0
    148 // w5:  s1
    149 // w6:  pixel
    150 // w7:  pixel
    151 // w8:  free
    152 // w9:  free
    153 // w10: free
    154 // w11: free
    155 // w12: scratch
    156 // w14: pixel
    157 
    158 scanline_t32cb16blend_arm64:
    159 
    160     // align DST to 32 bits
    161     tst     x0, #0x3
    162     b.eq    aligned
    163     subs    w2, w2, #1
    164     b.lo    return
    165 
    166 last:
    167     ldr     w4, [x1], #4
    168     ldrh    w3, [x0]
    169     pixel   w3, w4, w12, 0
    170     strh    w12, [x0], #2
    171 
    172 aligned:
    173     subs    w2, w2, #2
    174     b.lo    9f
    175 
    176     // The main loop is unrolled twice and processes 4 pixels
    177 8:
    178     ldp   w4,w5, [x1], #8
    179     add     x0, x0, #4
    180     // it's all zero, skip this pixel
    181     orr     w3, w4, w5
    182     cbz     w3, 7f
    183 
    184     // load the destination
    185     ldr     w3, [x0, #-4]
    186     // stream the destination
    187     pixel   w3, w4, w12, 0
    188     pixel   w3, w5, w12, 1
    189     str     w12, [x0, #-4]
    190 
    191     // 2nd iteration of the loop, don't stream anything
    192     subs    w2, w2, #2
    193     csel    w4, w5, w4, lt
    194     blt     9f
    195     ldp     w4,w5, [x1], #8
    196     add     x0, x0, #4
    197     orr     w3, w4, w5
    198     cbz     w3, 7f
    199     ldr     w3, [x0, #-4]
    200     pixel   w3, w4, w12, 0
    201     pixel   w3, w5, w12, 1
    202     str     w12, [x0, #-4]
    203 
    204 7:  subs    w2, w2, #2
    205     bhs     8b
    206     mov     w4, w5
    207 
    208 9:  adds    w2, w2, #1
    209     b.lo    return
    210     b       last
    211 
    212 return:
    213     ret
    214