Home | History | Annotate | Download | only in opts
      1 /*
      2  * Copyright 2014 The Android Open Source Project
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "SkBlitRow.h"
      9 #include "SkBlitMask.h"
     10 #include "SkColorPriv.h"
     11 #include "SkDither.h"
     12 #include "SkMathPriv.h"
     13 
     14 static void S32_D565_Blend_mips_dsp(uint16_t* SK_RESTRICT dst,
     15                                     const SkPMColor* SK_RESTRICT src, int count,
     16                                     U8CPU alpha, int /*x*/, int /*y*/) {
     17     register uint32_t t0, t1, t2, t3, t4, t5, t6;
     18     register uint32_t s0, s1, s2, s4, s5, s6;
     19 
     20     alpha += 1;
     21     if (count >= 2) {
     22         __asm__ volatile (
     23            ".set             push                          \n\t"
     24            ".set             noreorder                     \n\t"
     25             "sll             %[s4],    %[alpha], 8         \n\t"
     26             "or              %[s4],    %[s4],    %[alpha]  \n\t"
     27             "repl.ph         %[s5],    0x1f                \n\t"
     28             "repl.ph         %[s6],    0x3f                \n\t"
     29         "1:                                                \n\t"
     30             "lw              %[s2],    0(%[src])           \n\t"
     31             "lw              %[s1],    4(%[src])           \n\t"
     32             "lwr             %[s0],    0(%[dst])           \n\t"
     33             "lwl             %[s0],    3(%[dst])           \n\t"
     34             "and             %[t1],    %[s0],    %[s5]     \n\t"
     35             "shra.ph         %[t0],    %[s0],    5         \n\t"
     36             "and             %[t2],    %[t0],    %[s6]     \n\t"
     37 #ifdef SK_MIPS_HAS_DSPR2
     38             "shrl.ph         %[t3],    %[s0],    11        \n\t"
     39 #else
     40             "shra.ph         %[t0],    %[s0],    11        \n\t"
     41             "and             %[t3],    %[t0],    %[s5]     \n\t"
     42 #endif
     43             "precrq.ph.w     %[t0],    %[s1],    %[s2]     \n\t"
     44             "shrl.qb         %[t5],    %[t0],    3         \n\t"
     45             "and             %[t4],    %[t5],    %[s5]     \n\t"
     46             "ins             %[s2],    %[s1],    16, 16    \n\t"
     47             "preceu.ph.qbra  %[t0],    %[s2]               \n\t"
     48             "shrl.qb         %[t6],    %[t0],    3         \n\t"
     49 #ifdef SK_MIPS_HAS_DSPR2
     50             "shrl.ph         %[t5],    %[s2],    10        \n\t"
     51 #else
     52             "shra.ph         %[t0],    %[s2],    10        \n\t"
     53             "and             %[t5],    %[t0],    %[s6]     \n\t"
     54 #endif
     55             "subu.qb         %[t4],    %[t4],    %[t1]     \n\t"
     56             "subu.qb         %[t5],    %[t5],    %[t2]     \n\t"
     57             "subu.qb         %[t6],    %[t6],    %[t3]     \n\t"
     58             "muleu_s.ph.qbr  %[t4],    %[s4],    %[t4]     \n\t"
     59             "muleu_s.ph.qbr  %[t5],    %[s4],    %[t5]     \n\t"
     60             "muleu_s.ph.qbr  %[t6],    %[s4],    %[t6]     \n\t"
     61             "addiu           %[count], %[count], -2        \n\t"
     62             "addiu           %[src],   %[src],   8         \n\t"
     63             "shra.ph         %[t4],    %[t4],    8         \n\t"
     64             "shra.ph         %[t5],    %[t5],    8         \n\t"
     65             "shra.ph         %[t6],    %[t6],    8         \n\t"
     66             "addu.qb         %[t4],    %[t4],    %[t1]     \n\t"
     67             "addu.qb         %[t5],    %[t5],    %[t2]     \n\t"
     68             "addu.qb         %[t6],    %[t6],    %[t3]     \n\t"
     69             "andi            %[s0],    %[t4],    0xffff    \n\t"
     70             "andi            %[t0],    %[t5],    0xffff    \n\t"
     71             "sll             %[t0],    %[t0],    0x5       \n\t"
     72             "or              %[s0],    %[s0],    %[t0]     \n\t"
     73             "sll             %[t0],    %[t6],    0xb       \n\t"
     74             "or              %[t0],    %[t0],    %[s0]     \n\t"
     75             "sh              %[t0],    0(%[dst])           \n\t"
     76             "srl             %[s1],    %[t4],    16        \n\t"
     77             "srl             %[t0],    %[t5],    16        \n\t"
     78             "sll             %[t5],    %[t0],    5         \n\t"
     79             "or              %[t0],    %[t5],    %[s1]     \n\t"
     80             "srl             %[s0],    %[t6],    16        \n\t"
     81             "sll             %[s2],    %[s0],    0xb       \n\t"
     82             "or              %[s1],    %[s2],    %[t0]     \n\t"
     83             "sh              %[s1],    2(%[dst])           \n\t"
     84             "bge             %[count], 2,        1b        \n\t"
     85             " addiu          %[dst],   %[dst],   4         \n\t"
     86             ".set            pop                           \n\t"
     87             : [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
     88               [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [s0]"=&r"(s0),
     89               [s1]"=&r"(s1), [s2]"=&r"(s2), [s4]"=&r"(s4), [s5]"=&r"(s5),
     90               [s6]"=&r"(s6), [count]"+r"(count), [dst]"+r"(dst),
     91               [src]"+r"(src)
     92             : [alpha]"r"(alpha)
     93             : "memory", "hi", "lo"
     94         );
     95     }
     96 
     97     if (count == 1) {
     98         SkPMColor c = *src++;
     99         SkPMColorAssert(c);
    100         SkASSERT(SkGetPackedA32(c) == 255);
    101         uint16_t d = *dst;
    102         *dst++ = SkPackRGB16(SkAlphaBlend(SkPacked32ToR16(c), SkGetPackedR16(d), alpha),
    103                              SkAlphaBlend(SkPacked32ToG16(c), SkGetPackedG16(d), alpha),
    104                              SkAlphaBlend(SkPacked32ToB16(c), SkGetPackedB16(d), alpha));
    105     }
    106 }
    107 
    108 static void S32A_D565_Opaque_Dither_mips_dsp(uint16_t* __restrict__ dst,
    109                                              const SkPMColor* __restrict__ src,
    110                                              int count, U8CPU alpha, int x, int y) {
    111     __asm__ volatile (
    112         "pref  0,   0(%[src])     \n\t"
    113         "pref  1,   0(%[dst])     \n\t"
    114         "pref  0,   32(%[src])    \n\t"
    115         "pref  1,   32(%[dst])    \n\t"
    116         :
    117         : [src]"r"(src), [dst]"r"(dst)
    118         : "memory"
    119     );
    120 
    121     register int32_t t0, t1, t2, t3, t4, t5, t6;
    122     register int32_t t7, t8, t9, s0, s1, s2, s3;
    123     const uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3];
    124 
    125     if (count >= 2) {
    126         __asm__ volatile (
    127             ".set            push                                \n\t"
    128             ".set            noreorder                           \n\t"
    129             "li              %[s1],    0x01010101                \n\t"
    130             "li              %[s2],    -2017                     \n\t"
    131         "1:                                                      \n\t"
    132             "bnez            %[s3],    4f                        \n\t"
    133             " li             %[s3],    2                         \n\t"
    134             "pref            0,        64(%[src])                \n\t"
    135             "pref            1,        64(%[dst])                \n\t"
    136         "4:                                                      \n\t"
    137             "addiu           %[s3],    %[s3],    -1              \n\t"
    138             "lw              %[t1],    0(%[src])                 \n\t"
    139             "andi            %[t3],    %[x],     0x3             \n\t"
    140             "addiu           %[x],     %[x],     1               \n\t"
    141             "sll             %[t4],    %[t3],    2               \n\t"
    142             "srav            %[t5],    %[dither_scan], %[t4]     \n\t"
    143             "andi            %[t3],    %[t5],    0xf             \n\t"
    144             "lw              %[t2],    4(%[src])                 \n\t"
    145             "andi            %[t4],    %[x],     0x3             \n\t"
    146             "sll             %[t5],    %[t4],    2               \n\t"
    147             "srav            %[t6],    %[dither_scan], %[t5]     \n\t"
    148             "addiu           %[x],     %[x],     1               \n\t"
    149             "ins             %[t3],    %[t6],    8,    4         \n\t"
    150             "srl             %[t4],    %[t1],    24              \n\t"
    151             "addiu           %[t0],    %[t4],    1               \n\t"
    152             "srl             %[t4],    %[t2],    24              \n\t"
    153             "addiu           %[t5],    %[t4],    1               \n\t"
    154             "ins             %[t0],    %[t5],    16,   16        \n\t"
    155             "muleu_s.ph.qbr  %[t4],    %[t3],    %[t0]           \n\t"
    156             "preceu.ph.qbla  %[t3],    %[t4]                     \n\t"
    157             "andi            %[t4],    %[t1],    0xff            \n\t"
    158             "ins             %[t4],    %[t2],    16,   8         \n\t"
    159             "shrl.qb         %[t5],    %[t4],    5               \n\t"
    160             "subu.qb         %[t6],    %[t3],    %[t5]           \n\t"
    161             "addq.ph         %[t5],    %[t6],    %[t4]           \n\t"
    162             "ext             %[t4],    %[t1],    8,    8         \n\t"
    163             "srl             %[t6],    %[t2],    8               \n\t"
    164             "ins             %[t4],    %[t6],    16,   8         \n\t"
    165             "shrl.qb         %[t6],    %[t4],    6               \n\t"
    166             "shrl.qb         %[t7],    %[t3],    1               \n\t"
    167             "subu.qb         %[t8],    %[t7],    %[t6]           \n\t"
    168             "addq.ph         %[t6],    %[t8],    %[t4]           \n\t"
    169             "ext             %[t4],    %[t1],    16,   8         \n\t"
    170             "srl             %[t7],    %[t2],    16              \n\t"
    171             "ins             %[t4],    %[t7],    16,   8         \n\t"
    172             "shrl.qb         %[t7],    %[t4],    5               \n\t"
    173             "subu.qb         %[t8],    %[t3],    %[t7]           \n\t"
    174             "addq.ph         %[t7],    %[t8],    %[t4]           \n\t"
    175             "shll.ph         %[t4],    %[t7],    2               \n\t"
    176             "andi            %[t9],    %[t4],    0xffff          \n\t"
    177             "srl             %[s0],    %[t4],    16              \n\t"
    178             "andi            %[t3],    %[t6],    0xffff          \n\t"
    179             "srl             %[t4],    %[t6],    16              \n\t"
    180             "andi            %[t6],    %[t5],    0xffff          \n\t"
    181             "srl             %[t7],    %[t5],    16              \n\t"
    182             "subq.ph         %[t5],    %[s1],    %[t0]           \n\t"
    183             "srl             %[t0],    %[t5],    3               \n\t"
    184             "beqz            %[t1],    3f                        \n\t"
    185             " lhu            %[t5],    0(%[dst])                 \n\t"
    186             "sll             %[t1],    %[t6],    13              \n\t"
    187             "or              %[t8],    %[t9],    %[t1]           \n\t"
    188             "sll             %[t1],    %[t3],    24              \n\t"
    189             "or              %[t9],    %[t1],    %[t8]           \n\t"
    190             "andi            %[t3],    %[t5],    0x7e0           \n\t"
    191             "sll             %[t6],    %[t3],    0x10            \n\t"
    192             "and             %[t8],    %[s2],    %[t5]           \n\t"
    193             "or              %[t5],    %[t6],    %[t8]           \n\t"
    194             "andi            %[t6],    %[t0],    0xff            \n\t"
    195             "mul             %[t1],    %[t6],    %[t5]           \n\t"
    196             "addu            %[t5],    %[t1],    %[t9]           \n\t"
    197             "srl             %[t6],    %[t5],    5               \n\t"
    198             "and             %[t5],    %[s2],    %[t6]           \n\t"
    199             "srl             %[t8],    %[t6],    16              \n\t"
    200             "andi            %[t6],    %[t8],    0x7e0           \n\t"
    201             "or              %[t1],    %[t5],    %[t6]           \n\t"
    202             "sh              %[t1],    0(%[dst])                 \n\t"
    203         "3:                                                      \n\t"
    204             "beqz            %[t2],    2f                        \n\t"
    205             " lhu            %[t5],    2(%[dst])                 \n\t"
    206             "sll             %[t1],    %[t7],    13              \n\t"
    207             "or              %[t8],    %[s0],    %[t1]           \n\t"
    208             "sll             %[t1],    %[t4],    24              \n\t"
    209             "or              %[t9],    %[t1],    %[t8]           \n\t"
    210             "andi            %[t3],    %[t5],    0x7e0           \n\t"
    211             "sll             %[t6],    %[t3],    0x10            \n\t"
    212             "and             %[t8],    %[s2],    %[t5]           \n\t"
    213             "or              %[t5],    %[t6],    %[t8]           \n\t"
    214             "srl             %[t6],    %[t0],    16              \n\t"
    215             "mul             %[t1],    %[t6],    %[t5]           \n\t"
    216             "addu            %[t5],    %[t1],    %[t9]           \n\t"
    217             "srl             %[t6],    %[t5],    5               \n\t"
    218             "and             %[t5],    %[s2],    %[t6]           \n\t"
    219             "srl             %[t8],    %[t6],    16              \n\t"
    220             "andi            %[t6],    %[t8],    0x7e0           \n\t"
    221             "or              %[t1],    %[t5],    %[t6]           \n\t"
    222             "sh              %[t1],    2(%[dst])                 \n\t"
    223         "2:                                                      \n\t"
    224             "addiu           %[count], %[count], -2              \n\t"
    225             "addiu           %[src],   %[src],   8               \n\t"
    226             "addiu           %[t1],    %[count], -1              \n\t"
    227             "bgtz            %[t1],    1b                        \n\t"
    228             " addiu          %[dst],  %[dst],    4               \n\t"
    229             ".set            pop                                 \n\t"
    230             : [src]"+r"(src), [count]"+r"(count), [dst]"+r"(dst), [x]"+r"(x),
    231               [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
    232               [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7),
    233               [t8]"=&r"(t8),  [t9]"=&r"(t9), [s0]"=&r"(s0), [s1]"=&r"(s1),
    234               [s2]"=&r"(s2), [s3]"=&r"(s3)
    235             : [dither_scan]"r"(dither_scan)
    236             : "memory", "hi", "lo"
    237         );
    238     }
    239 
    240     if (count == 1) {
    241         SkPMColor c = *src++;
    242         SkPMColorAssert(c);
    243         if (c) {
    244             unsigned a = SkGetPackedA32(c);
    245             int d = SkAlphaMul(DITHER_VALUE(x), SkAlpha255To256(a));
    246 
    247             unsigned sr = SkGetPackedR32(c);
    248             unsigned sg = SkGetPackedG32(c);
    249             unsigned sb = SkGetPackedB32(c);
    250             sr = SkDITHER_R32_FOR_565(sr, d);
    251             sg = SkDITHER_G32_FOR_565(sg, d);
    252             sb = SkDITHER_B32_FOR_565(sb, d);
    253 
    254             uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2);
    255             uint32_t dst_expanded = SkExpand_rgb_16(*dst);
    256             dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3);
    257             // now src and dst expanded are in g:11 r:10 x:1 b:10
    258             *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);
    259         }
    260         dst += 1;
    261         DITHER_INC_X(x);
    262     }
    263 }
    264 
    265 static void S32_D565_Opaque_Dither_mips_dsp(uint16_t* __restrict__ dst,
    266                                             const SkPMColor* __restrict__ src,
    267                                             int count, U8CPU alpha, int x, int y) {
    268     uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3];
    269     register uint32_t t0, t1, t2, t3, t4, t5;
    270     register uint32_t t6, t7, t8, t9, s0;
    271     int dither[4];
    272     int i;
    273 
    274     for (i = 0; i < 4; i++, x++) {
    275         dither[i] = (dither_scan >> ((x & 3) << 2)) & 0xF;
    276     }
    277 
    278     __asm__ volatile (
    279         ".set            push                          \n\t"
    280         ".set            noreorder                     \n\t"
    281         "li              %[s0],    1                   \n\t"
    282     "2:                                                \n\t"
    283         "beqz            %[count], 1f                  \n\t"
    284         " nop                                          \n\t"
    285         "addiu           %[t0],    %[count], -1        \n\t"
    286         "beqz            %[t0],    1f                  \n\t"
    287         " nop                                          \n\t"
    288         "beqz            %[s0],    3f                  \n\t"
    289         " nop                                          \n\t"
    290         "lw              %[t0],    0(%[dither])        \n\t"
    291         "lw              %[t1],    4(%[dither])        \n\t"
    292         "li              %[s0],    0                   \n\t"
    293         "b               4f                            \n\t"
    294         " nop                                          \n\t"
    295     "3:                                                \n\t"
    296         "lw              %[t0],    8(%[dither])        \n\t"
    297         "lw              %[t1],    12(%[dither])       \n\t"
    298         "li              %[s0],    1                   \n\t"
    299     "4:                                                \n\t"
    300         "sll             %[t2],    %[t0],    16        \n\t"
    301         "or              %[t1],    %[t2],    %[t1]     \n\t"
    302         "lw              %[t0],    0(%[src])           \n\t"
    303         "lw              %[t2],    4(%[src])           \n\t"
    304         "precrq.ph.w     %[t3],    %[t0],    %[t2]     \n\t"
    305         "preceu.ph.qbra  %[t9],    %[t3]               \n\t"
    306 #ifdef SK_MIPS_HAS_DSPR2
    307         "append          %[t0],    %[t2],    16        \n\t"
    308         "preceu.ph.qbra  %[t4],    %[t0]               \n\t"
    309         "preceu.ph.qbla  %[t5],    %[t0]               \n\t"
    310 #else
    311         "sll             %[t6],    %[t0],    16        \n\t"
    312         "sll             %[t7],    %[t2],    16        \n\t"
    313         "precrq.ph.w     %[t8],    %[t6],    %[t7]     \n\t"
    314         "preceu.ph.qbra  %[t4],    %[t8]               \n\t"
    315         "preceu.ph.qbla  %[t5],    %[t8]               \n\t"
    316 #endif
    317         "addu.qb         %[t0],    %[t4],    %[t1]     \n\t"
    318         "shra.ph         %[t2],    %[t4],    5         \n\t"
    319         "subu.qb         %[t3],    %[t0],    %[t2]     \n\t"
    320         "shra.ph         %[t6],    %[t3],    3         \n\t"
    321         "addu.qb         %[t0],    %[t9],    %[t1]     \n\t"
    322         "shra.ph         %[t2],    %[t9],    5         \n\t"
    323         "subu.qb         %[t3],    %[t0],    %[t2]     \n\t"
    324         "shra.ph         %[t7],    %[t3],    3         \n\t"
    325         "shra.ph         %[t0],    %[t1],    1         \n\t"
    326         "shra.ph         %[t2],    %[t5],    6         \n\t"
    327         "addu.qb         %[t3],    %[t5],    %[t0]     \n\t"
    328         "subu.qb         %[t4],    %[t3],    %[t2]     \n\t"
    329         "shra.ph         %[t8],    %[t4],    2         \n\t"
    330         "precrq.ph.w     %[t0],    %[t6],    %[t7]     \n\t"
    331 #ifdef SK_MIPS_HAS_DSPR2
    332         "append          %[t6],    %[t7],    16        \n\t"
    333 #else
    334         "sll             %[t6],    %[t6],    16        \n\t"
    335         "sll             %[t2],    %[t7],    16        \n\t"
    336         "precrq.ph.w     %[t6],    %[t6],    %[t2]     \n\t"
    337 #endif
    338         "sra             %[t4],    %[t8],    16        \n\t"
    339         "andi            %[t5],    %[t8],    0xFF      \n\t"
    340         "sll             %[t7],    %[t4],    5         \n\t"
    341         "sra             %[t8],    %[t0],    5         \n\t"
    342         "or              %[t9],    %[t7],    %[t8]     \n\t"
    343         "or              %[t3],    %[t9],    %[t0]     \n\t"
    344         "andi            %[t4],    %[t3],    0xFFFF    \n\t"
    345         "sll             %[t7],    %[t5],    5         \n\t"
    346         "sra             %[t8],    %[t6],    5         \n\t"
    347         "or              %[t9],    %[t7],    %[t8]     \n\t"
    348         "or              %[t3],    %[t9],    %[t6]     \n\t"
    349         "and             %[t7],    %[t3],    0xFFFF    \n\t"
    350         "sh              %[t4],    0(%[dst])           \n\t"
    351         "sh              %[t7],    2(%[dst])           \n\t"
    352         "addiu           %[count], %[count], -2        \n\t"
    353         "addiu           %[src],   %[src],   8         \n\t"
    354         "b               2b                            \n\t"
    355         " addiu          %[dst],   %[dst],   4         \n\t"
    356     "1:                                                \n\t"
    357         ".set            pop                           \n\t"
    358         : [dst]"+r"(dst), [src]"+r"(src), [count]"+r"(count),
    359           [x]"+r"(x), [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2),
    360           [t3]"=&r"(t3), [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6),
    361           [t7]"=&r"(t7), [t8]"=&r"(t8), [t9]"=&r"(t9), [s0]"=&r"(s0)
    362         : [dither] "r" (dither)
    363         : "memory"
    364     );
    365 
    366     if (count == 1) {
    367         SkPMColor c = *src++;
    368         SkPMColorAssert(c); // only if DEBUG is turned on
    369         SkASSERT(SkGetPackedA32(c) == 255);
    370         unsigned dither = DITHER_VALUE(x);
    371         *dst++ = SkDitherRGB32To565(c, dither);
    372     }
    373 }
    374 
    375 static void S32_D565_Blend_Dither_mips_dsp(uint16_t* dst,
    376                                            const SkPMColor* src,
    377                                            int count, U8CPU alpha, int x, int y) {
    378     register int32_t t0, t1, t2, t3, t4, t5, t6;
    379     register int32_t s0, s1, s2, s3;
    380     register int x1 = 0;
    381     register uint32_t sc_mul;
    382     register uint32_t sc_add;
    383 #ifdef ENABLE_DITHER_MATRIX_4X4
    384     const uint8_t* dither_scan = gDitherMatrix_3Bit_4X4[(y) & 3];
    385 #else // ENABLE_DITHER_MATRIX_4X4
    386     const uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3];
    387 #endif // ENABLE_DITHER_MATRIX_4X4
    388     int dither[4];
    389 
    390     for (int i = 0; i < 4; i++) {
    391         dither[i] = (dither_scan >> ((x & 3) << 2)) & 0xF;
    392         x += 1;
    393     }
    394     alpha += 1;
    395     __asm__ volatile (
    396         ".set            push                              \n\t"
    397         ".set            noreorder                         \n\t"
    398         "li              %[t0],     0x100                  \n\t"
    399         "subu            %[t0],     %[t0],     %[alpha]    \n\t"
    400         "replv.ph        %[sc_mul], %[alpha]               \n\t"
    401         "beqz            %[alpha],  1f                     \n\t"
    402         " nop                                              \n\t"
    403         "replv.qb        %[sc_add], %[t0]                  \n\t"
    404         "b               2f                                \n\t"
    405         " nop                                              \n\t"
    406     "1:                                                    \n\t"
    407         "replv.qb        %[sc_add], %[alpha]               \n\t"
    408     "2:                                                    \n\t"
    409         "addiu           %[t2],     %[count],  -1          \n\t"
    410         "blez            %[t2],     3f                     \n\t"
    411         " nop                                              \n\t"
    412         "lw              %[s0],     0(%[src])              \n\t"
    413         "lw              %[s1],     4(%[src])              \n\t"
    414         "bnez            %[x1],     4f                     \n\t"
    415         " nop                                              \n\t"
    416         "lw              %[t0],     0(%[dither])           \n\t"
    417         "lw              %[t1],     4(%[dither])           \n\t"
    418         "li              %[x1],     1                      \n\t"
    419         "b               5f                                \n\t"
    420         " nop                                              \n\t"
    421     "4:                                                    \n\t"
    422         "lw              %[t0],     8(%[dither])           \n\t"
    423         "lw              %[t1],     12(%[dither])          \n\t"
    424         "li              %[x1],     0                      \n\t"
    425     "5:                                                    \n\t"
    426         "sll             %[t3],     %[t0],     7           \n\t"
    427         "sll             %[t4],     %[t1],     7           \n\t"
    428 #ifdef SK_MIPS_HAS_DSPR2
    429         "append          %[t0],     %[t1],     16          \n\t"
    430 #else
    431         "sll             %[t0],     %[t0],     8           \n\t"
    432         "sll             %[t2],     %[t1],     8           \n\t"
    433         "precrq.qb.ph    %[t0],     %[t0],     %[t2]       \n\t"
    434 #endif
    435         "precrq.qb.ph    %[t1],     %[t3],     %[t4]       \n\t"
    436         "sll             %[t5],     %[s0],     8           \n\t"
    437         "sll             %[t6],     %[s1],     8           \n\t"
    438         "precrq.qb.ph    %[t4],     %[t5],     %[t6]       \n\t"
    439         "precrq.qb.ph    %[t6],     %[s0],     %[s1]       \n\t"
    440         "preceu.ph.qbla  %[t5],     %[t4]                  \n\t"
    441         "preceu.ph.qbra  %[t4],     %[t4]                  \n\t"
    442         "preceu.ph.qbra  %[t6],     %[t6]                  \n\t"
    443         "lh              %[t2],     0(%[dst])              \n\t"
    444         "lh              %[s1],     2(%[dst])              \n\t"
    445 #ifdef SK_MIPS_HAS_DSPR2
    446         "append          %[t2],     %[s1],     16          \n\t"
    447 #else
    448         "sll             %[s1],     %[s1],     16          \n\t"
    449         "packrl.ph       %[t2],     %[t2],     %[s1]       \n\t"
    450 #endif
    451         "shra.ph         %[s1],     %[t2],     11          \n\t"
    452         "and             %[s1],     %[s1],     0x1F001F    \n\t"
    453         "shra.ph         %[s2],     %[t2],     5           \n\t"
    454         "and             %[s2],     %[s2],     0x3F003F    \n\t"
    455         "and             %[s3],     %[t2],     0x1F001F    \n\t"
    456         "shrl.qb         %[t3],     %[t4],     5           \n\t"
    457         "addu.qb         %[t4],     %[t4],     %[t0]       \n\t"
    458         "subu.qb         %[t4],     %[t4],     %[t3]       \n\t"
    459         "shrl.qb         %[t4],     %[t4],     3           \n\t"
    460         "shrl.qb         %[t3],     %[t5],     5           \n\t"
    461         "addu.qb         %[t5],     %[t5],     %[t0]       \n\t"
    462         "subu.qb         %[t5],     %[t5],     %[t3]       \n\t"
    463         "shrl.qb         %[t5],     %[t5],     3           \n\t"
    464         "shrl.qb         %[t3],     %[t6],     6           \n\t"
    465         "addu.qb         %[t6],     %[t6],     %[t1]       \n\t"
    466         "subu.qb         %[t6],     %[t6],     %[t3]       \n\t"
    467         "shrl.qb         %[t6],     %[t6],     2           \n\t"
    468         "cmpu.lt.qb      %[t4],     %[s1]                  \n\t"
    469         "pick.qb         %[s0],     %[sc_add], $0          \n\t"
    470         "addu.qb         %[s0],     %[s0],     %[s1]       \n\t"
    471         "subu.qb         %[t4],     %[t4],     %[s1]       \n\t"
    472         "muleu_s.ph.qbl  %[t0],     %[t4],     %[sc_mul]   \n\t"
    473         "muleu_s.ph.qbr  %[t1],     %[t4],     %[sc_mul]   \n\t"
    474         "precrq.qb.ph    %[t4],     %[t0],     %[t1]       \n\t"
    475         "addu.qb         %[t4],     %[t4],     %[s0]       \n\t"
    476         "cmpu.lt.qb      %[t5],     %[s3]                  \n\t"
    477         "pick.qb         %[s0],     %[sc_add], $0          \n\t"
    478         "addu.qb         %[s0],     %[s0],     %[s3]       \n\t"
    479         "subu.qb         %[t5],     %[t5],     %[s3]       \n\t"
    480         "muleu_s.ph.qbl  %[t0],     %[t5],     %[sc_mul]   \n\t"
    481         "muleu_s.ph.qbr  %[t1],     %[t5],     %[sc_mul]   \n\t"
    482         "precrq.qb.ph    %[t5],     %[t0],     %[t1]       \n\t"
    483         "addu.qb         %[t5],     %[t5],     %[s0]       \n\t"
    484         "cmpu.lt.qb      %[t6],     %[s2]                  \n\t"
    485         "pick.qb         %[s0],     %[sc_add], $0          \n\t"
    486         "addu.qb         %[s0],     %[s0],     %[s2]       \n\t"
    487         "subu.qb         %[t6],     %[t6],     %[s2]       \n\t"
    488         "muleu_s.ph.qbl  %[t0],     %[t6],     %[sc_mul]   \n\t"
    489         "muleu_s.ph.qbr  %[t1],     %[t6],     %[sc_mul]   \n\t"
    490         "precrq.qb.ph    %[t6],     %[t0],     %[t1]       \n\t"
    491         "addu.qb         %[t6],     %[t6],     %[s0]       \n\t"
    492         "shll.ph         %[s1],     %[t4],     11          \n\t"
    493         "shll.ph         %[t0],     %[t6],     5           \n\t"
    494         "or              %[s0],     %[s1],     %[t0]       \n\t"
    495         "or              %[s1],     %[s0],     %[t5]       \n\t"
    496         "srl             %[t2],     %[s1],     16          \n\t"
    497         "and             %[t3],     %[s1],     0xFFFF      \n\t"
    498         "sh              %[t2],     0(%[dst])              \n\t"
    499         "sh              %[t3],     2(%[dst])              \n\t"
    500         "addiu           %[src],    %[src],    8           \n\t"
    501         "addi            %[count],  %[count],  -2          \n\t"
    502         "b               2b                                \n\t"
    503         " addu           %[dst],    %[dst],    4           \n\t"
    504     "3:                                                    \n\t"
    505         ".set            pop                               \n\t"
    506         : [src]"+r"(src), [dst]"+r"(dst), [count]"+r"(count),
    507           [x1]"+r"(x1), [sc_mul]"=&r"(sc_mul), [sc_add]"=&r"(sc_add),
    508           [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
    509           [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [s0]"=&r"(s0),
    510           [s1]"=&r"(s1), [s2]"=&r"(s2), [s3]"=&r"(s3)
    511         : [dither]"r"(dither), [alpha]"r"(alpha)
    512         : "memory", "hi", "lo"
    513     );
    514 
    515     if(count == 1) {
    516         SkPMColor c = *src++;
    517         SkPMColorAssert(c);
    518         SkASSERT(SkGetPackedA32(c) == 255);
    519         DITHER_565_SCAN(y);
    520         int dither = DITHER_VALUE(x);
    521         int sr = SkGetPackedR32(c);
    522         int sg = SkGetPackedG32(c);
    523         int sb = SkGetPackedB32(c);
    524         sr = SkDITHER_R32To565(sr, dither);
    525         sg = SkDITHER_G32To565(sg, dither);
    526         sb = SkDITHER_B32To565(sb, dither);
    527 
    528         uint16_t d = *dst;
    529         *dst++ = SkPackRGB16(SkAlphaBlend(sr, SkGetPackedR16(d), alpha),
    530                              SkAlphaBlend(sg, SkGetPackedG16(d), alpha),
    531                              SkAlphaBlend(sb, SkGetPackedB16(d), alpha));
    532         DITHER_INC_X(x);
    533     }
    534 }
    535 
    536 static void S32A_D565_Opaque_mips_dsp(uint16_t* __restrict__ dst,
    537                                       const SkPMColor* __restrict__ src,
    538                                       int count, U8CPU alpha, int x, int y) {
    539 
    540     __asm__ volatile (
    541         "pref  0,  0(%[src])     \n\t"
    542         "pref  1,  0(%[dst])     \n\t"
    543         "pref  0,  32(%[src])    \n\t"
    544         "pref  1,  32(%[dst])    \n\t"
    545         :
    546         : [src]"r"(src), [dst]"r"(dst)
    547         : "memory"
    548     );
    549 
    550     register uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8;
    551     register uint32_t t16;
    552     register uint32_t add_x10 = 0x100010;
    553     register uint32_t add_x20 = 0x200020;
    554     register uint32_t sa = 0xff00ff;
    555 
    556     __asm__ volatile (
    557         ".set           push                            \n\t"
    558         ".set           noreorder                       \n\t"
    559         "blez           %[count], 1f                    \n\t"
    560         " nop                                           \n\t"
    561     "2:                                                 \n\t"
    562         "beqz           %[count], 1f                    \n\t"
    563         " nop                                           \n\t"
    564         "addiu          %[t0],    %[count], -1          \n\t"
    565         "beqz           %[t0],    1f                    \n\t"
    566         " nop                                           \n\t"
    567         "bnez           %[t16],   3f                    \n\t"
    568         " nop                                           \n\t"
    569         "li             %[t16],   2                     \n\t"
    570         "pref           0,        64(%[src])            \n\t"
    571         "pref           1,        64(%[dst])            \n\t"
    572     "3:                                                 \n\t"
    573         "addiu          %[t16],   %[t16],   -1          \n\t"
    574         "lw             %[t0],    0(%[src])             \n\t"
    575         "lw             %[t1],    4(%[src])             \n\t"
    576         "precrq.ph.w    %[t2],    %[t0],    %[t1]       \n\t"
    577         "preceu.ph.qbra %[t8],    %[t2]                 \n\t"
    578 #ifdef SK_MIPS_HAS_DSPR2
    579         "append         %[t0],    %[t1],    16          \n\t"
    580 #else
    581         "sll            %[t0],    %[t0],    16          \n\t"
    582         "sll            %[t6],    %[t1],    16          \n\t"
    583         "precrq.ph.w    %[t0],    %[t0],    %[t6]       \n\t"
    584 #endif
    585         "preceu.ph.qbra %[t3],    %[t0]                 \n\t"
    586         "preceu.ph.qbla %[t4],    %[t0]                 \n\t"
    587         "preceu.ph.qbla %[t0],    %[t2]                 \n\t"
    588         "subq.ph        %[t1],    %[sa],    %[t0]       \n\t"
    589         "sra            %[t2],    %[t1],    8           \n\t"
    590         "or             %[t5],    %[t2],    %[t1]       \n\t"
    591         "replv.ph       %[t2],    %[t5]                 \n\t"
    592         "lh             %[t0],    0(%[dst])             \n\t"
    593         "lh             %[t1],    2(%[dst])             \n\t"
    594         "and            %[t1],    %[t1],    0xffff      \n\t"
    595 #ifdef SK_MIPS_HAS_DSPR2
    596         "append         %[t0],    %[t1],    16          \n\t"
    597 #else
    598         "sll            %[t5],    %[t0],    16          \n\t"
    599         "or             %[t0],    %[t5],    %[t1]       \n\t"
    600 #endif
    601         "and            %[t1],    %[t0],    0x1f001f    \n\t"
    602         "shra.ph        %[t6],    %[t0],    11          \n\t"
    603         "and            %[t6],    %[t6],    0x1f001f    \n\t"
    604         "and            %[t7],    %[t0],    0x7e007e0   \n\t"
    605         "shra.ph        %[t5],    %[t7],    5           \n\t"
    606         "muleu_s.ph.qbl %[t0],    %[t2],    %[t6]       \n\t"
    607         "addq.ph        %[t7],    %[t0],    %[add_x10]  \n\t"
    608         "shra.ph        %[t6],    %[t7],    5           \n\t"
    609         "addq.ph        %[t6],    %[t7],    %[t6]       \n\t"
    610         "shra.ph        %[t0],    %[t6],    5           \n\t"
    611         "addq.ph        %[t7],    %[t0],    %[t3]       \n\t"
    612         "shra.ph        %[t6],    %[t7],    3           \n\t"
    613         "muleu_s.ph.qbl %[t0],    %[t2],    %[t1]       \n\t"
    614         "addq.ph        %[t7],    %[t0],    %[add_x10]  \n\t"
    615         "shra.ph        %[t0],    %[t7],    5           \n\t"
    616         "addq.ph        %[t7],    %[t7],    %[t0]       \n\t"
    617         "shra.ph        %[t0],    %[t7],    5           \n\t"
    618         "addq.ph        %[t7],    %[t0],    %[t8]       \n\t"
    619         "shra.ph        %[t3],    %[t7],    3           \n\t"
    620         "muleu_s.ph.qbl %[t0],    %[t2],    %[t5]       \n\t"
    621         "addq.ph        %[t7],    %[t0],    %[add_x20]  \n\t"
    622         "shra.ph        %[t0],    %[t7],    6           \n\t"
    623         "addq.ph        %[t8],    %[t7],    %[t0]       \n\t"
    624         "shra.ph        %[t0],    %[t8],    6           \n\t"
    625         "addq.ph        %[t7],    %[t0],    %[t4]       \n\t"
    626         "shra.ph        %[t8],    %[t7],    2           \n\t"
    627         "shll.ph        %[t0],    %[t8],    5           \n\t"
    628         "shll.ph        %[t1],    %[t6],    11          \n\t"
    629         "or             %[t2],    %[t0],    %[t1]       \n\t"
    630         "or             %[t3],    %[t2],    %[t3]       \n\t"
    631         "sra            %[t4],    %[t3],    16          \n\t"
    632         "sh             %[t4],    0(%[dst])             \n\t"
    633         "sh             %[t3],    2(%[dst])             \n\t"
    634         "addiu          %[count], %[count], -2          \n\t"
    635         "addiu          %[src],   %[src],   8           \n\t"
    636         "b              2b                              \n\t"
    637         " addiu         %[dst],   %[dst],   4           \n\t"
    638     "1:                                                 \n\t"
    639         ".set           pop                             \n\t"
    640         : [dst]"+r"(dst), [src]"+r"(src), [count]"+r"(count),
    641           [t16]"=&r"(t16), [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2),
    642           [t3]"=&r"(t3), [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6),
    643           [t7]"=&r"(t7), [t8]"=&r"(t8)
    644         : [add_x10]"r"(add_x10), [add_x20]"r"(add_x20), [sa]"r"(sa)
    645         : "memory", "hi", "lo"
    646     );
    647 
    648     if (count == 1) {
    649         SkPMColor c = *src++;
    650         SkPMColorAssert(c);
    651         if (c) {
    652             *dst = SkSrcOver32To16(c, *dst);
    653         }
    654         dst += 1;
    655     }
    656 }
    657 
    658 static void S32A_D565_Blend_mips_dsp(uint16_t* SK_RESTRICT dst,
    659                                      const SkPMColor* SK_RESTRICT src, int count,
    660                                      U8CPU alpha, int /*x*/, int /*y*/) {
    661     register uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
    662     register uint32_t  s0, s1, s2, s3;
    663     register unsigned dst_scale = 0;
    664 
    665     __asm__ volatile (
    666         ".set            push                                       \n\t"
    667         ".set            noreorder                                  \n\t"
    668         "replv.qb        %[t0],        %[alpha]                     \n\t"
    669         "repl.ph         %[t6],        0x80                         \n\t"
    670         "repl.ph         %[t7],        0xFF                         \n\t"
    671     "1:                                                             \n\t"
    672         "addiu           %[t8],        %[count],     -1             \n\t"
    673         "blez            %[t8],        2f                           \n\t"
    674         " nop                                                       \n\t"
    675         "lw              %[t8],        0(%[src])                    \n\t"
    676         "lw              %[t9],        4(%[src])                    \n\t"
    677         "lh              %[t4],        0(%[dst])                    \n\t"
    678         "lh              %[t5],        2(%[dst])                    \n\t"
    679         "sll             %[t5],        %[t5],        16             \n\t"
    680         "sll             %[t2],        %[t8],        8              \n\t"
    681         "sll             %[t3],        %[t9],        8              \n\t"
    682         "precrq.qb.ph    %[t1],        %[t2],        %[t3]          \n\t"
    683         "precrq.qb.ph    %[t3],        %[t8],        %[t9]          \n\t"
    684         "preceu.ph.qbla  %[t8],        %[t3]                        \n\t"
    685         "muleu_s.ph.qbr  %[s3],        %[t0],        %[t8]          \n\t"
    686         "preceu.ph.qbla  %[t2],        %[t1]                        \n\t"
    687         "preceu.ph.qbra  %[t1],        %[t1]                        \n\t"
    688         "preceu.ph.qbra  %[t3],        %[t3]                        \n\t"
    689         "packrl.ph       %[t9],        %[t4],        %[t5]          \n\t"
    690         "shra.ph         %[s0],        %[t9],        11             \n\t"
    691         "and             %[s0],        %[s0],        0x1F001F       \n\t"
    692         "shra.ph         %[s1],        %[t9],        5              \n\t"
    693         "and             %[s1],        %[s1],        0x3F003F       \n\t"
    694         "and             %[s2],        %[t9],        0x1F001F       \n\t"
    695         "addq.ph         %[s3],        %[s3],        %[t6]          \n\t"
    696         "shra.ph         %[t5],        %[s3],        8              \n\t"
    697         "and             %[t5],        %[t5],        0xFF00FF       \n\t"
    698         "addq.ph         %[dst_scale], %[s3],        %[t5]          \n\t"
    699         "shra.ph         %[dst_scale], %[dst_scale], 8              \n\t"
    700         "subq_s.ph       %[dst_scale], %[t7],        %[dst_scale]   \n\t"
    701         "sll             %[dst_scale], %[dst_scale], 8              \n\t"
    702         "precrq.qb.ph    %[dst_scale], %[dst_scale], %[dst_scale]   \n\t"
    703         "shrl.qb         %[t1],        %[t1],        3              \n\t"
    704         "shrl.qb         %[t2],        %[t2],        3              \n\t"
    705         "shrl.qb         %[t3],        %[t3],        2              \n\t"
    706         "muleu_s.ph.qbl  %[t1],        %[t0],        %[t1]          \n\t"
    707         "muleu_s.ph.qbl  %[t2],        %[t0],        %[t2]          \n\t"
    708         "muleu_s.ph.qbl  %[t3],        %[t0],        %[t3]          \n\t"
    709         "muleu_s.ph.qbl  %[t8],        %[dst_scale], %[s0]          \n\t"
    710         "muleu_s.ph.qbl  %[t9],        %[dst_scale], %[s2]          \n\t"
    711         "muleu_s.ph.qbl  %[t4],        %[dst_scale], %[s1]          \n\t"
    712         "addq.ph         %[t1],        %[t1],        %[t8]          \n\t"
    713         "addq.ph         %[t2],        %[t2],        %[t9]          \n\t"
    714         "addq.ph         %[t3],        %[t3],        %[t4]          \n\t"
    715         "addq.ph         %[t8],        %[t1],        %[t6]          \n\t"
    716         "addq.ph         %[t9],        %[t2],        %[t6]          \n\t"
    717         "addq.ph         %[t4],        %[t3],        %[t6]          \n\t"
    718         "shra.ph         %[t1],        %[t8],        8              \n\t"
    719         "addq.ph         %[t1],        %[t1],        %[t8]          \n\t"
    720         "preceu.ph.qbla  %[t1],        %[t1]                        \n\t"
    721         "shra.ph         %[t2],        %[t9],        8              \n\t"
    722         "addq.ph         %[t2],        %[t2],        %[t9]          \n\t"
    723         "preceu.ph.qbla  %[t2],        %[t2]                        \n\t"
    724         "shra.ph         %[t3],        %[t4],        8              \n\t"
    725         "addq.ph         %[t3],        %[t3],        %[t4]          \n\t"
    726         "preceu.ph.qbla  %[t3],        %[t3]                        \n\t"
    727         "shll.ph         %[t8],        %[t1],        11             \n\t"
    728         "shll.ph         %[t9],        %[t3],        5              \n\t"
    729         "or              %[t8],        %[t8],        %[t9]          \n\t"
    730         "or              %[s0],        %[t8],        %[t2]          \n\t"
    731         "srl             %[t8],        %[s0],        16             \n\t"
    732         "and             %[t9],        %[s0],        0xFFFF         \n\t"
    733         "sh              %[t8],        0(%[dst])                    \n\t"
    734         "sh              %[t9],        2(%[dst])                    \n\t"
    735         "addiu           %[src],       %[src],       8              \n\t"
    736         "addiu           %[count],     %[count],     -2             \n\t"
    737         "b               1b                                         \n\t"
    738         " addiu          %[dst],       %[dst],       4              \n\t"
    739     "2:                                                             \n\t"
    740         ".set            pop                                        \n\t"
    741         : [src]"+r"(src), [dst]"+r"(dst), [count]"+r"(count),
    742           [dst_scale]"+r"(dst_scale), [s0]"=&r"(s0), [s1]"=&r"(s1),
    743           [s2]"=&r"(s2), [s3]"=&r"(s3), [t0]"=&r"(t0), [t1]"=&r"(t1),
    744           [t2]"=&r"(t2), [t3]"=&r"(t3), [t4]"=&r"(t4), [t5]"=&r"(t5),
    745           [t6]"=&r"(t6), [t7]"=&r"(t7), [t8]"=&r"(t8), [t9]"=&r"(t9)
    746         : [alpha]"r"(alpha)
    747         : "memory", "hi", "lo"
    748     );
    749 
    750     if (count == 1) {
    751         SkPMColor sc = *src++;
    752         SkPMColorAssert(sc);
    753         if (sc) {
    754             uint16_t dc = *dst;
    755             unsigned dst_scale = 255 - SkMulDiv255Round(SkGetPackedA32(sc), alpha);
    756             unsigned dr = SkMulS16(SkPacked32ToR16(sc), alpha) +
    757                           SkMulS16(SkGetPackedR16(dc), dst_scale);
    758             unsigned dg = SkMulS16(SkPacked32ToG16(sc), alpha) +
    759                           SkMulS16(SkGetPackedG16(dc), dst_scale);
    760             unsigned db = SkMulS16(SkPacked32ToB16(sc), alpha) +
    761                           SkMulS16(SkGetPackedB16(dc), dst_scale);
    762             *dst = SkPackRGB16(SkDiv255Round(dr), SkDiv255Round(dg), SkDiv255Round(db));
    763         }
    764         dst += 1;
    765     }
    766 }
    767 
    768 static void S32_Blend_BlitRow32_mips_dsp(SkPMColor* SK_RESTRICT dst,
    769                                          const SkPMColor* SK_RESTRICT src,
    770                                          int count, U8CPU alpha) {
    771     register int32_t t0, t1, t2, t3, t4, t5, t6, t7;
    772 
    773     __asm__ volatile (
    774         ".set            push                         \n\t"
    775         ".set            noreorder                    \n\t"
    776         "li              %[t2],    0x100              \n\t"
    777         "addiu           %[t0],    %[alpha], 1        \n\t"
    778         "subu            %[t1],    %[t2],    %[t0]    \n\t"
    779         "replv.qb        %[t7],    %[t0]              \n\t"
    780         "replv.qb        %[t6],    %[t1]              \n\t"
    781     "1:                                               \n\t"
    782         "blez            %[count], 2f                 \n\t"
    783         "lw              %[t0],    0(%[src])          \n\t"
    784         "lw              %[t1],    0(%[dst])          \n\t"
    785         "preceu.ph.qbr   %[t2],    %[t0]              \n\t"
    786         "preceu.ph.qbl   %[t3],    %[t0]              \n\t"
    787         "preceu.ph.qbr   %[t4],    %[t1]              \n\t"
    788         "preceu.ph.qbl   %[t5],    %[t1]              \n\t"
    789         "muleu_s.ph.qbr  %[t2],    %[t7],    %[t2]    \n\t"
    790         "muleu_s.ph.qbr  %[t3],    %[t7],    %[t3]    \n\t"
    791         "muleu_s.ph.qbr  %[t4],    %[t6],    %[t4]    \n\t"
    792         "muleu_s.ph.qbr  %[t5],    %[t6],    %[t5]    \n\t"
    793         "addiu           %[src],   %[src],   4        \n\t"
    794         "addiu           %[count], %[count], -1       \n\t"
    795         "precrq.qb.ph    %[t0],    %[t3],    %[t2]    \n\t"
    796         "precrq.qb.ph    %[t2],    %[t5],    %[t4]    \n\t"
    797         "addu            %[t1],    %[t0],    %[t2]    \n\t"
    798         "sw              %[t1],    0(%[dst])          \n\t"
    799         "b               1b                           \n\t"
    800         " addi           %[dst],   %[dst],   4        \n\t"
    801     "2:                                               \n\t"
    802         ".set            pop                          \n\t"
    803         : [src]"+r"(src), [dst]"+r"(dst), [count]"+r"(count),
    804           [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
    805           [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
    806         : [alpha]"r"(alpha)
    807         : "memory", "hi", "lo"
    808     );
    809 }
    810 
    811 void blitmask_d565_opaque_mips(int width, int height, uint16_t* device,
    812                                unsigned deviceRB, const uint8_t* alpha,
    813                                uint32_t expanded32, unsigned maskRB) {
    814     register uint32_t s0, s1, s2, s3;
    815 
    816     __asm__ volatile (
    817         ".set            push                                    \n\t"
    818         ".set            noreorder                               \n\t"
    819         ".set            noat                                    \n\t"
    820         "li              $t9,       0x7E0F81F                    \n\t"
    821     "1:                                                          \n\t"
    822         "move            $t8,       %[width]                     \n\t"
    823         "addiu           %[height], %[height],     -1            \n\t"
    824     "2:                                                          \n\t"
    825         "beqz            $t8,       4f                           \n\t"
    826         " addiu          $t0,       $t8,           -4            \n\t"
    827         "bltz            $t0,       3f                           \n\t"
    828         " nop                                                    \n\t"
    829         "addiu           $t8,       $t8,           -4            \n\t"
    830         "lhu             $t0,       0(%[device])                 \n\t"
    831         "lhu             $t1,       2(%[device])                 \n\t"
    832         "lhu             $t2,       4(%[device])                 \n\t"
    833         "lhu             $t3,       6(%[device])                 \n\t"
    834         "lbu             $t4,       0(%[alpha])                  \n\t"
    835         "lbu             $t5,       1(%[alpha])                  \n\t"
    836         "lbu             $t6,       2(%[alpha])                  \n\t"
    837         "lbu             $t7,       3(%[alpha])                  \n\t"
    838         "replv.ph        $t0,       $t0                          \n\t"
    839         "replv.ph        $t1,       $t1                          \n\t"
    840         "replv.ph        $t2,       $t2                          \n\t"
    841         "replv.ph        $t3,       $t3                          \n\t"
    842         "addiu           %[s0],     $t4,           1             \n\t"
    843         "addiu           %[s1],     $t5,           1             \n\t"
    844         "addiu           %[s2],     $t6,           1             \n\t"
    845         "addiu           %[s3],     $t7,           1             \n\t"
    846         "srl             %[s0],     %[s0],         3             \n\t"
    847         "srl             %[s1],     %[s1],         3             \n\t"
    848         "srl             %[s2],     %[s2],         3             \n\t"
    849         "srl             %[s3],     %[s3],         3             \n\t"
    850         "and             $t0,       $t0,           $t9           \n\t"
    851         "and             $t1,       $t1,           $t9           \n\t"
    852         "and             $t2,       $t2,           $t9           \n\t"
    853         "and             $t3,       $t3,           $t9           \n\t"
    854         "subu            $t4,       %[expanded32], $t0           \n\t"
    855         "subu            $t5,       %[expanded32], $t1           \n\t"
    856         "subu            $t6,       %[expanded32], $t2           \n\t"
    857         "subu            $t7,       %[expanded32], $t3           \n\t"
    858         "mul             $t4,       $t4,           %[s0]         \n\t"
    859         "mul             $t5,       $t5,           %[s1]         \n\t"
    860         "mul             $t6,       $t6,           %[s2]         \n\t"
    861         "mul             $t7,       $t7,           %[s3]         \n\t"
    862         "addiu           %[alpha],  %[alpha],      4             \n\t"
    863         "srl             $t4,       $t4,           5             \n\t"
    864         "srl             $t5,       $t5,           5             \n\t"
    865         "srl             $t6,       $t6,           5             \n\t"
    866         "srl             $t7,       $t7,           5             \n\t"
    867         "addu            $t4,       $t0,           $t4           \n\t"
    868         "addu            $t5,       $t1,           $t5           \n\t"
    869         "addu            $t6,       $t2,           $t6           \n\t"
    870         "addu            $t7,       $t3,           $t7           \n\t"
    871         "and             $t4,       $t4,           $t9           \n\t"
    872         "and             $t5,       $t5,           $t9           \n\t"
    873         "and             $t6,       $t6,           $t9           \n\t"
    874         "and             $t7,       $t7,           $t9           \n\t"
    875         "srl             $t0,       $t4,           16            \n\t"
    876         "srl             $t1,       $t5,           16            \n\t"
    877         "srl             $t2,       $t6,           16            \n\t"
    878         "srl             $t3,       $t7,           16            \n\t"
    879         "or              %[s0],     $t0,           $t4           \n\t"
    880         "or              %[s1],     $t1,           $t5           \n\t"
    881         "or              %[s2],     $t2,           $t6           \n\t"
    882         "or              %[s3],     $t3,           $t7           \n\t"
    883         "sh              %[s0],     0(%[device])                 \n\t"
    884         "sh              %[s1],     2(%[device])                 \n\t"
    885         "sh              %[s2],     4(%[device])                 \n\t"
    886         "sh              %[s3],     6(%[device])                 \n\t"
    887         "b               2b                                      \n\t"
    888         " addiu          %[device], %[device],     8             \n\t"
    889     "3:                                                          \n\t"
    890         "lhu             $t0,       0(%[device])                 \n\t"
    891         "lbu             $t1,       0(%[alpha])                  \n\t"
    892         "addiu           $t8,       $t8,           -1            \n\t"
    893         "replv.ph        $t2,       $t0                          \n\t"
    894         "and             $t2,       $t2,           $t9           \n\t"
    895         "addiu           $t0,       $t1,           1             \n\t"
    896         "srl             $t0,       $t0,           3             \n\t"
    897         "subu            $t3,       %[expanded32], $t2           \n\t"
    898         "mul             $t3,       $t3,           $t0           \n\t"
    899         "addiu           %[alpha],  %[alpha],      1             \n\t"
    900         "srl             $t3,       $t3,           5             \n\t"
    901         "addu            $t3,       $t2,           $t3           \n\t"
    902         "and             $t3,       $t3,           $t9           \n\t"
    903         "srl             $t4,       $t3,           16            \n\t"
    904         "or              %[s0],     $t4,           $t3           \n\t"
    905         "sh              %[s0],     0(%[device])                 \n\t"
    906         "bnez            $t8,       3b                           \n\t"
    907          "addiu          %[device], %[device],     2             \n\t"
    908     "4:                                                          \n\t"
    909         "addu            %[device], %[device],     %[deviceRB]   \n\t"
    910         "bgtz            %[height], 1b                           \n\t"
    911         " addu           %[alpha],  %[alpha],      %[maskRB]     \n\t"
    912         ".set            pop                                     \n\t"
    913         : [height]"+r"(height), [alpha]"+r"(alpha), [device]"+r"(device),
    914           [deviceRB]"+r"(deviceRB), [maskRB]"+r"(maskRB), [s0]"=&r"(s0),
    915           [s1]"=&r"(s1), [s2]"=&r"(s2), [s3]"=&r"(s3)
    916         : [expanded32] "r" (expanded32), [width] "r" (width)
    917         : "memory", "hi", "lo", "t0", "t1", "t2", "t3",
    918           "t4", "t5", "t6", "t7", "t8", "t9"
    919     );
    920 }
    921 
    922 ///////////////////////////////////////////////////////////////////////////////////////////////////
    923 
    924 const SkBlitRow::Proc platform_565_procs_mips_dsp[] = {
    925     // no dither
    926     NULL,
    927     S32_D565_Blend_mips_dsp,
    928     S32A_D565_Opaque_mips_dsp,
    929     S32A_D565_Blend_mips_dsp,
    930 
    931     // dither
    932     S32_D565_Opaque_Dither_mips_dsp,
    933     S32_D565_Blend_Dither_mips_dsp,
    934     S32A_D565_Opaque_Dither_mips_dsp,
    935     NULL,
    936 };
    937 
    938 static const SkBlitRow::Proc32 platform_32_procs_mips_dsp[] = {
    939     NULL,   // S32_Opaque,
    940     S32_Blend_BlitRow32_mips_dsp,   // S32_Blend,
    941     NULL,   // S32A_Opaque,
    942     NULL,   // S32A_Blend,
    943 };
    944 
    945 SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
    946     return platform_565_procs_mips_dsp[flags];
    947 }
    948 
    949 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
    950     return platform_32_procs_mips_dsp[flags];
    951 }
    952 
    953 SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {
    954     return NULL;
    955 }
    956 
    957 SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
    958     return NULL;
    959 }
    960