Home | History | Annotate | Download | only in dsp
      1 // Copyright 2012 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 // Image transforms and color space conversion methods for lossless decoder.
     11 //
     12 // Authors: Vikas Arora (vikaas.arora (at) gmail.com)
     13 //          Jyrki Alakuijala (jyrki (at) google.com)
     14 //          Urvang Joshi (urvang (at) google.com)
     15 
     16 #include "./dsp.h"
     17 
     18 #include <math.h>
     19 #include <stdlib.h>
     20 #include "../dec/vp8li.h"
     21 #include "../utils/endian_inl.h"
     22 #include "./lossless.h"
     23 #include "./yuv.h"
     24 
     25 #define MAX_DIFF_COST (1e30f)
     26 
     27 // lookup table for small values of log2(int)
     28 const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
     29   0.0000000000000000f, 0.0000000000000000f,
     30   1.0000000000000000f, 1.5849625007211560f,
     31   2.0000000000000000f, 2.3219280948873621f,
     32   2.5849625007211560f, 2.8073549220576041f,
     33   3.0000000000000000f, 3.1699250014423121f,
     34   3.3219280948873621f, 3.4594316186372973f,
     35   3.5849625007211560f, 3.7004397181410921f,
     36   3.8073549220576041f, 3.9068905956085187f,
     37   4.0000000000000000f, 4.0874628412503390f,
     38   4.1699250014423121f, 4.2479275134435852f,
     39   4.3219280948873626f, 4.3923174227787606f,
     40   4.4594316186372973f, 4.5235619560570130f,
     41   4.5849625007211560f, 4.6438561897747243f,
     42   4.7004397181410917f, 4.7548875021634682f,
     43   4.8073549220576037f, 4.8579809951275718f,
     44   4.9068905956085187f, 4.9541963103868749f,
     45   5.0000000000000000f, 5.0443941193584533f,
     46   5.0874628412503390f, 5.1292830169449663f,
     47   5.1699250014423121f, 5.2094533656289501f,
     48   5.2479275134435852f, 5.2854022188622487f,
     49   5.3219280948873626f, 5.3575520046180837f,
     50   5.3923174227787606f, 5.4262647547020979f,
     51   5.4594316186372973f, 5.4918530963296747f,
     52   5.5235619560570130f, 5.5545888516776376f,
     53   5.5849625007211560f, 5.6147098441152083f,
     54   5.6438561897747243f, 5.6724253419714951f,
     55   5.7004397181410917f, 5.7279204545631987f,
     56   5.7548875021634682f, 5.7813597135246599f,
     57   5.8073549220576037f, 5.8328900141647412f,
     58   5.8579809951275718f, 5.8826430493618415f,
     59   5.9068905956085187f, 5.9307373375628866f,
     60   5.9541963103868749f, 5.9772799234999167f,
     61   6.0000000000000000f, 6.0223678130284543f,
     62   6.0443941193584533f, 6.0660891904577720f,
     63   6.0874628412503390f, 6.1085244567781691f,
     64   6.1292830169449663f, 6.1497471195046822f,
     65   6.1699250014423121f, 6.1898245588800175f,
     66   6.2094533656289501f, 6.2288186904958804f,
     67   6.2479275134435852f, 6.2667865406949010f,
     68   6.2854022188622487f, 6.3037807481771030f,
     69   6.3219280948873626f, 6.3398500028846243f,
     70   6.3575520046180837f, 6.3750394313469245f,
     71   6.3923174227787606f, 6.4093909361377017f,
     72   6.4262647547020979f, 6.4429434958487279f,
     73   6.4594316186372973f, 6.4757334309663976f,
     74   6.4918530963296747f, 6.5077946401986963f,
     75   6.5235619560570130f, 6.5391588111080309f,
     76   6.5545888516776376f, 6.5698556083309478f,
     77   6.5849625007211560f, 6.5999128421871278f,
     78   6.6147098441152083f, 6.6293566200796094f,
     79   6.6438561897747243f, 6.6582114827517946f,
     80   6.6724253419714951f, 6.6865005271832185f,
     81   6.7004397181410917f, 6.7142455176661224f,
     82   6.7279204545631987f, 6.7414669864011464f,
     83   6.7548875021634682f, 6.7681843247769259f,
     84   6.7813597135246599f, 6.7944158663501061f,
     85   6.8073549220576037f, 6.8201789624151878f,
     86   6.8328900141647412f, 6.8454900509443747f,
     87   6.8579809951275718f, 6.8703647195834047f,
     88   6.8826430493618415f, 6.8948177633079437f,
     89   6.9068905956085187f, 6.9188632372745946f,
     90   6.9307373375628866f, 6.9425145053392398f,
     91   6.9541963103868749f, 6.9657842846620869f,
     92   6.9772799234999167f, 6.9886846867721654f,
     93   7.0000000000000000f, 7.0112272554232539f,
     94   7.0223678130284543f, 7.0334230015374501f,
     95   7.0443941193584533f, 7.0552824355011898f,
     96   7.0660891904577720f, 7.0768155970508308f,
     97   7.0874628412503390f, 7.0980320829605263f,
     98   7.1085244567781691f, 7.1189410727235076f,
     99   7.1292830169449663f, 7.1395513523987936f,
    100   7.1497471195046822f, 7.1598713367783890f,
    101   7.1699250014423121f, 7.1799090900149344f,
    102   7.1898245588800175f, 7.1996723448363644f,
    103   7.2094533656289501f, 7.2191685204621611f,
    104   7.2288186904958804f, 7.2384047393250785f,
    105   7.2479275134435852f, 7.2573878426926521f,
    106   7.2667865406949010f, 7.2761244052742375f,
    107   7.2854022188622487f, 7.2946207488916270f,
    108   7.3037807481771030f, 7.3128829552843557f,
    109   7.3219280948873626f, 7.3309168781146167f,
    110   7.3398500028846243f, 7.3487281542310771f,
    111   7.3575520046180837f, 7.3663222142458160f,
    112   7.3750394313469245f, 7.3837042924740519f,
    113   7.3923174227787606f, 7.4008794362821843f,
    114   7.4093909361377017f, 7.4178525148858982f,
    115   7.4262647547020979f, 7.4346282276367245f,
    116   7.4429434958487279f, 7.4512111118323289f,
    117   7.4594316186372973f, 7.4676055500829976f,
    118   7.4757334309663976f, 7.4838157772642563f,
    119   7.4918530963296747f, 7.4998458870832056f,
    120   7.5077946401986963f, 7.5156998382840427f,
    121   7.5235619560570130f, 7.5313814605163118f,
    122   7.5391588111080309f, 7.5468944598876364f,
    123   7.5545888516776376f, 7.5622424242210728f,
    124   7.5698556083309478f, 7.5774288280357486f,
    125   7.5849625007211560f, 7.5924570372680806f,
    126   7.5999128421871278f, 7.6073303137496104f,
    127   7.6147098441152083f, 7.6220518194563764f,
    128   7.6293566200796094f, 7.6366246205436487f,
    129   7.6438561897747243f, 7.6510516911789281f,
    130   7.6582114827517946f, 7.6653359171851764f,
    131   7.6724253419714951f, 7.6794800995054464f,
    132   7.6865005271832185f, 7.6934869574993252f,
    133   7.7004397181410917f, 7.7073591320808825f,
    134   7.7142455176661224f, 7.7210991887071855f,
    135   7.7279204545631987f, 7.7347096202258383f,
    136   7.7414669864011464f, 7.7481928495894605f,
    137   7.7548875021634682f, 7.7615512324444795f,
    138   7.7681843247769259f, 7.7747870596011736f,
    139   7.7813597135246599f, 7.7879025593914317f,
    140   7.7944158663501061f, 7.8008998999203047f,
    141   7.8073549220576037f, 7.8137811912170374f,
    142   7.8201789624151878f, 7.8265484872909150f,
    143   7.8328900141647412f, 7.8392037880969436f,
    144   7.8454900509443747f, 7.8517490414160571f,
    145   7.8579809951275718f, 7.8641861446542797f,
    146   7.8703647195834047f, 7.8765169465649993f,
    147   7.8826430493618415f, 7.8887432488982591f,
    148   7.8948177633079437f, 7.9008668079807486f,
    149   7.9068905956085187f, 7.9128893362299619f,
    150   7.9188632372745946f, 7.9248125036057812f,
    151   7.9307373375628866f, 7.9366379390025709f,
    152   7.9425145053392398f, 7.9483672315846778f,
    153   7.9541963103868749f, 7.9600019320680805f,
    154   7.9657842846620869f, 7.9715435539507719f,
    155   7.9772799234999167f, 7.9829935746943103f,
    156   7.9886846867721654f, 7.9943534368588577f
    157 };
    158 
    159 const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
    160   0.00000000f,    0.00000000f,  2.00000000f,   4.75488750f,
    161   8.00000000f,   11.60964047f,  15.50977500f,  19.65148445f,
    162   24.00000000f,  28.52932501f,  33.21928095f,  38.05374781f,
    163   43.01955001f,  48.10571634f,  53.30296891f,  58.60335893f,
    164   64.00000000f,  69.48686830f,  75.05865003f,  80.71062276f,
    165   86.43856190f,  92.23866588f,  98.10749561f,  104.04192499f,
    166   110.03910002f, 116.09640474f, 122.21143267f, 128.38196256f,
    167   134.60593782f, 140.88144886f, 147.20671787f, 153.58008562f,
    168   160.00000000f, 166.46500594f, 172.97373660f, 179.52490559f,
    169   186.11730005f, 192.74977453f, 199.42124551f, 206.13068654f,
    170   212.87712380f, 219.65963219f, 226.47733176f, 233.32938445f,
    171   240.21499122f, 247.13338933f, 254.08384998f, 261.06567603f,
    172   268.07820003f, 275.12078236f, 282.19280949f, 289.29369244f,
    173   296.42286534f, 303.57978409f, 310.76392512f, 317.97478424f,
    174   325.21187564f, 332.47473081f, 339.76289772f, 347.07593991f,
    175   354.41343574f, 361.77497759f, 369.16017124f, 376.56863518f,
    176   384.00000000f, 391.45390785f, 398.93001188f, 406.42797576f,
    177   413.94747321f, 421.48818752f, 429.04981119f, 436.63204548f,
    178   444.23460010f, 451.85719280f, 459.49954906f, 467.16140179f,
    179   474.84249102f, 482.54256363f, 490.26137307f, 497.99867911f,
    180   505.75424759f, 513.52785023f, 521.31926438f, 529.12827280f,
    181   536.95466351f, 544.79822957f, 552.65876890f, 560.53608414f,
    182   568.42998244f, 576.34027536f, 584.26677867f, 592.20931226f,
    183   600.16769996f, 608.14176943f, 616.13135206f, 624.13628279f,
    184   632.15640007f, 640.19154569f, 648.24156472f, 656.30630539f,
    185   664.38561898f, 672.47935976f, 680.58738488f, 688.70955430f,
    186   696.84573069f, 704.99577935f, 713.15956818f, 721.33696754f,
    187   729.52785023f, 737.73209140f, 745.94956849f, 754.18016116f,
    188   762.42375127f, 770.68022275f, 778.94946161f, 787.23135586f,
    189   795.52579543f, 803.83267219f, 812.15187982f, 820.48331383f,
    190   828.82687147f, 837.18245171f, 845.54995518f, 853.92928416f,
    191   862.32034249f, 870.72303558f, 879.13727036f, 887.56295522f,
    192   896.00000000f, 904.44831595f, 912.90781569f, 921.37841320f,
    193   929.86002376f, 938.35256392f, 946.85595152f, 955.37010560f,
    194   963.89494641f, 972.43039537f, 980.97637504f, 989.53280911f,
    195   998.09962237f, 1006.67674069f, 1015.26409097f, 1023.86160116f,
    196   1032.46920021f, 1041.08681805f, 1049.71438560f, 1058.35183469f,
    197   1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f,
    198   1101.68498204f, 1110.38033993f, 1119.08512727f, 1127.79928282f,
    199   1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f,
    200   1171.50849518f, 1180.27761738f, 1189.05570047f, 1197.84268914f,
    201   1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f,
    202   1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f,
    203   1277.31753781f, 1286.19068338f, 1295.07216828f, 1303.96194457f,
    204   1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f,
    205   1348.53355734f, 1357.47210556f, 1366.41862452f, 1375.37307041f,
    206   1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f,
    207   1420.26270412f, 1429.26381818f, 1438.27256558f, 1447.28890615f,
    208   1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f,
    209   1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f,
    210   1528.77123795f, 1537.86138993f, 1546.95871952f, 1556.06319119f,
    211   1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f,
    212   1601.69146137f, 1610.83805860f, 1619.99155871f, 1629.15192882f,
    213   1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f,
    214   1675.05570047f, 1684.25661744f, 1693.46418280f, 1702.67836605f,
    215   1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f,
    216   1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f,
    217   1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f,
    218   1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f,
    219   1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f,
    220   1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f,
    221   1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f,
    222   1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f,
    223   2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f
    224 };
    225 
    226 const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX] = {
    227   { 0, 0}, { 0, 0}, { 1, 0}, { 2, 0}, { 3, 0}, { 4, 1}, { 4, 1}, { 5, 1},
    228   { 5, 1}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 7, 2}, { 7, 2}, { 7, 2},
    229   { 7, 2}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3},
    230   { 8, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3},
    231   { 9, 3}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4},
    232   {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4},
    233   {10, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4},
    234   {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4},
    235   {11, 4}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
    236   {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
    237   {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
    238   {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
    239   {12, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
    240   {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
    241   {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
    242   {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
    243   {13, 5}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
    244   {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
    245   {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
    246   {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
    247   {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
    248   {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
    249   {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
    250   {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
    251   {14, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
    252   {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
    253   {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
    254   {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
    255   {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
    256   {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
    257   {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
    258   {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
    259   {15, 6}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    260   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    261   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    262   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    263   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    264   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    265   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    266   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    267   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    268   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    269   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    270   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    271   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    272   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    273   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    274   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    275   {16, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    276   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    277   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    278   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    279   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    280   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    281   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    282   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    283   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    284   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    285   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    286   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    287   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    288   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    289   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    290   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    291 };
    292 
    293 const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
    294    0,  0,  0,  0,  0,  0,  1,  0,  1,  0,  1,  2,  3,  0,  1,  2,  3,
    295    0,  1,  2,  3,  4,  5,  6,  7,  0,  1,  2,  3,  4,  5,  6,  7,
    296    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
    297    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
    298    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
    299   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
    300    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
    301   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
    302    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
    303   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
    304   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
    305   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
    306    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
    307   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
    308   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
    309   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
    310    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
    311   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
    312   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
    313   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
    314   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
    315   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
    316   96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
    317   112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
    318   127,
    319    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
    320   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
    321   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
    322   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
    323   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
    324   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
    325   96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
    326   112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
    327 };
    328 
    329 // The threshold till approximate version of log_2 can be used.
    330 // Practically, we can get rid of the call to log() as the two values match to
    331 // very high degree (the ratio of these two is 0.99999x).
    332 // Keeping a high threshold for now.
    333 #define APPROX_LOG_WITH_CORRECTION_MAX  65536
    334 #define APPROX_LOG_MAX                   4096
    335 #define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
    336 static float FastSLog2Slow(uint32_t v) {
    337   assert(v >= LOG_LOOKUP_IDX_MAX);
    338   if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
    339     int log_cnt = 0;
    340     uint32_t y = 1;
    341     int correction = 0;
    342     const float v_f = (float)v;
    343     const uint32_t orig_v = v;
    344     do {
    345       ++log_cnt;
    346       v = v >> 1;
    347       y = y << 1;
    348     } while (v >= LOG_LOOKUP_IDX_MAX);
    349     // vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256
    350     // Xf = floor(Xf) * (1 + (v % y) / v)
    351     // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
    352     // The correction factor: log(1 + d) ~ d; for very small d values, so
    353     // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
    354     // LOG_2_RECIPROCAL ~ 23/16
    355     correction = (23 * (orig_v & (y - 1))) >> 4;
    356     return v_f * (kLog2Table[v] + log_cnt) + correction;
    357   } else {
    358     return (float)(LOG_2_RECIPROCAL * v * log((double)v));
    359   }
    360 }
    361 
    362 static float FastLog2Slow(uint32_t v) {
    363   assert(v >= LOG_LOOKUP_IDX_MAX);
    364   if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
    365     int log_cnt = 0;
    366     uint32_t y = 1;
    367     const uint32_t orig_v = v;
    368     double log_2;
    369     do {
    370       ++log_cnt;
    371       v = v >> 1;
    372       y = y << 1;
    373     } while (v >= LOG_LOOKUP_IDX_MAX);
    374     log_2 = kLog2Table[v] + log_cnt;
    375     if (orig_v >= APPROX_LOG_MAX) {
    376       // Since the division is still expensive, add this correction factor only
    377       // for large values of 'v'.
    378       const int correction = (23 * (orig_v & (y - 1))) >> 4;
    379       log_2 += (double)correction / orig_v;
    380     }
    381     return (float)log_2;
    382   } else {
    383     return (float)(LOG_2_RECIPROCAL * log((double)v));
    384   }
    385 }
    386 
    387 //------------------------------------------------------------------------------
    388 // Image transforms.
    389 
    390 // Mostly used to reduce code size + readability
    391 static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }
    392 
    393 // In-place sum of each component with mod 256.
    394 static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
    395   const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
    396   const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
    397   *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
    398 }
    399 
    400 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
    401   return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1);
    402 }
    403 
    404 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
    405   return Average2(Average2(a0, a2), a1);
    406 }
    407 
    408 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
    409                                      uint32_t a2, uint32_t a3) {
    410   return Average2(Average2(a0, a1), Average2(a2, a3));
    411 }
    412 
    413 static WEBP_INLINE uint32_t Clip255(uint32_t a) {
    414   if (a < 256) {
    415     return a;
    416   }
    417   // return 0, when a is a negative integer.
    418   // return 255, when a is positive.
    419   return ~a >> 24;
    420 }
    421 
    422 static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
    423   return Clip255(a + b - c);
    424 }
    425 
    426 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
    427                                                    uint32_t c2) {
    428   const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
    429   const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
    430                                          (c1 >> 16) & 0xff,
    431                                          (c2 >> 16) & 0xff);
    432   const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
    433                                          (c1 >> 8) & 0xff,
    434                                          (c2 >> 8) & 0xff);
    435   const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
    436   return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
    437 }
    438 
    439 static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
    440   return Clip255(a + (a - b) / 2);
    441 }
    442 
    443 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
    444                                                    uint32_t c2) {
    445   const uint32_t ave = Average2(c0, c1);
    446   const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
    447   const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
    448   const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
    449   const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
    450   return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
    451 }
    452 
    453 static WEBP_INLINE int Sub3(int a, int b, int c) {
    454   const int pb = b - c;
    455   const int pa = a - c;
    456   return abs(pb) - abs(pa);
    457 }
    458 
    459 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
    460   const int pa_minus_pb =
    461       Sub3((a >> 24)       , (b >> 24)       , (c >> 24)       ) +
    462       Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
    463       Sub3((a >>  8) & 0xff, (b >>  8) & 0xff, (c >>  8) & 0xff) +
    464       Sub3((a      ) & 0xff, (b      ) & 0xff, (c      ) & 0xff);
    465   return (pa_minus_pb <= 0) ? a : b;
    466 }
    467 
    468 //------------------------------------------------------------------------------
    469 // Predictors
    470 
    471 static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
    472   (void)top;
    473   (void)left;
    474   return ARGB_BLACK;
    475 }
    476 static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
    477   (void)top;
    478   return left;
    479 }
    480 static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
    481   (void)left;
    482   return top[0];
    483 }
    484 static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
    485   (void)left;
    486   return top[1];
    487 }
    488 static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
    489   (void)left;
    490   return top[-1];
    491 }
    492 static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
    493   const uint32_t pred = Average3(left, top[0], top[1]);
    494   return pred;
    495 }
    496 static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
    497   const uint32_t pred = Average2(left, top[-1]);
    498   return pred;
    499 }
    500 static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
    501   const uint32_t pred = Average2(left, top[0]);
    502   return pred;
    503 }
    504 static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
    505   const uint32_t pred = Average2(top[-1], top[0]);
    506   (void)left;
    507   return pred;
    508 }
    509 static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
    510   const uint32_t pred = Average2(top[0], top[1]);
    511   (void)left;
    512   return pred;
    513 }
    514 static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
    515   const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
    516   return pred;
    517 }
    518 static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
    519   const uint32_t pred = Select(top[0], left, top[-1]);
    520   return pred;
    521 }
    522 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
    523   const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
    524   return pred;
    525 }
    526 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
    527   const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
    528   return pred;
    529 }
    530 
    531 static const VP8LPredictorFunc kPredictorsC[16] = {
    532   Predictor0, Predictor1, Predictor2, Predictor3,
    533   Predictor4, Predictor5, Predictor6, Predictor7,
    534   Predictor8, Predictor9, Predictor10, Predictor11,
    535   Predictor12, Predictor13,
    536   Predictor0, Predictor0    // <- padding security sentinels
    537 };
    538 
    539 static float PredictionCostSpatial(const int counts[256], int weight_0,
    540                                    double exp_val) {
    541   const int significant_symbols = 256 >> 4;
    542   const double exp_decay_factor = 0.6;
    543   double bits = weight_0 * counts[0];
    544   int i;
    545   for (i = 1; i < significant_symbols; ++i) {
    546     bits += exp_val * (counts[i] + counts[256 - i]);
    547     exp_val *= exp_decay_factor;
    548   }
    549   return (float)(-0.1 * bits);
    550 }
    551 
    552 // Compute the combined Shanon's entropy for distribution {X} and {X+Y}
    553 static float CombinedShannonEntropy(const int X[256], const int Y[256]) {
    554   int i;
    555   double retval = 0.;
    556   int sumX = 0, sumXY = 0;
    557   for (i = 0; i < 256; ++i) {
    558     const int x = X[i];
    559     const int xy = x + Y[i];
    560     if (x != 0) {
    561       sumX += x;
    562       retval -= VP8LFastSLog2(x);
    563       sumXY += xy;
    564       retval -= VP8LFastSLog2(xy);
    565     } else if (xy != 0) {
    566       sumXY += xy;
    567       retval -= VP8LFastSLog2(xy);
    568     }
    569   }
    570   retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
    571   return (float)retval;
    572 }
    573 
    574 static float PredictionCostSpatialHistogram(const int accumulated[4][256],
    575                                             const int tile[4][256]) {
    576   int i;
    577   double retval = 0;
    578   for (i = 0; i < 4; ++i) {
    579     const double kExpValue = 0.94;
    580     retval += PredictionCostSpatial(tile[i], 1, kExpValue);
    581     retval += CombinedShannonEntropy(tile[i], accumulated[i]);
    582   }
    583   return (float)retval;
    584 }
    585 
    586 static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) {
    587   ++histo_argb[0][argb >> 24];
    588   ++histo_argb[1][(argb >> 16) & 0xff];
    589   ++histo_argb[2][(argb >> 8) & 0xff];
    590   ++histo_argb[3][argb & 0xff];
    591 }
    592 
    593 static int GetBestPredictorForTile(int width, int height,
    594                                    int tile_x, int tile_y, int bits,
    595                                    const int accumulated[4][256],
    596                                    const uint32_t* const argb_scratch) {
    597   const int kNumPredModes = 14;
    598   const int col_start = tile_x << bits;
    599   const int row_start = tile_y << bits;
    600   const int tile_size = 1 << bits;
    601   const int max_y = GetMin(tile_size, height - row_start);
    602   const int max_x = GetMin(tile_size, width - col_start);
    603   float best_diff = MAX_DIFF_COST;
    604   int best_mode = 0;
    605   int mode;
    606   for (mode = 0; mode < kNumPredModes; ++mode) {
    607     const uint32_t* current_row = argb_scratch;
    608     const VP8LPredictorFunc pred_func = VP8LPredictors[mode];
    609     float cur_diff;
    610     int y;
    611     int histo_argb[4][256];
    612     memset(histo_argb, 0, sizeof(histo_argb));
    613     for (y = 0; y < max_y; ++y) {
    614       int x;
    615       const int row = row_start + y;
    616       const uint32_t* const upper_row = current_row;
    617       current_row = upper_row + width;
    618       for (x = 0; x < max_x; ++x) {
    619         const int col = col_start + x;
    620         uint32_t predict;
    621         if (row == 0) {
    622           predict = (col == 0) ? ARGB_BLACK : current_row[col - 1];  // Left.
    623         } else if (col == 0) {
    624           predict = upper_row[col];  // Top.
    625         } else {
    626           predict = pred_func(current_row[col - 1], upper_row + col);
    627         }
    628         UpdateHisto(histo_argb, VP8LSubPixels(current_row[col], predict));
    629       }
    630     }
    631     cur_diff = PredictionCostSpatialHistogram(
    632         accumulated, (const int (*)[256])histo_argb);
    633     if (cur_diff < best_diff) {
    634       best_diff = cur_diff;
    635       best_mode = mode;
    636     }
    637   }
    638 
    639   return best_mode;
    640 }
    641 
    642 static void CopyTileWithPrediction(int width, int height,
    643                                    int tile_x, int tile_y, int bits, int mode,
    644                                    const uint32_t* const argb_scratch,
    645                                    uint32_t* const argb) {
    646   const int col_start = tile_x << bits;
    647   const int row_start = tile_y << bits;
    648   const int tile_size = 1 << bits;
    649   const int max_y = GetMin(tile_size, height - row_start);
    650   const int max_x = GetMin(tile_size, width - col_start);
    651   const VP8LPredictorFunc pred_func = VP8LPredictors[mode];
    652   const uint32_t* current_row = argb_scratch;
    653 
    654   int y;
    655   for (y = 0; y < max_y; ++y) {
    656     int x;
    657     const int row = row_start + y;
    658     const uint32_t* const upper_row = current_row;
    659     current_row = upper_row + width;
    660     for (x = 0; x < max_x; ++x) {
    661       const int col = col_start + x;
    662       const int pix = row * width + col;
    663       uint32_t predict;
    664       if (row == 0) {
    665         predict = (col == 0) ? ARGB_BLACK : current_row[col - 1];  // Left.
    666       } else if (col == 0) {
    667         predict = upper_row[col];  // Top.
    668       } else {
    669         predict = pred_func(current_row[col - 1], upper_row + col);
    670       }
    671       argb[pix] = VP8LSubPixels(current_row[col], predict);
    672     }
    673   }
    674 }
    675 
    676 void VP8LResidualImage(int width, int height, int bits,
    677                        uint32_t* const argb, uint32_t* const argb_scratch,
    678                        uint32_t* const image) {
    679   const int max_tile_size = 1 << bits;
    680   const int tiles_per_row = VP8LSubSampleSize(width, bits);
    681   const int tiles_per_col = VP8LSubSampleSize(height, bits);
    682   uint32_t* const upper_row = argb_scratch;
    683   uint32_t* const current_tile_rows = argb_scratch + width;
    684   int tile_y;
    685   int histo[4][256];
    686   memset(histo, 0, sizeof(histo));
    687   for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
    688     const int tile_y_offset = tile_y * max_tile_size;
    689     const int this_tile_height =
    690         (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
    691     int tile_x;
    692     if (tile_y > 0) {
    693       memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
    694              width * sizeof(*upper_row));
    695     }
    696     memcpy(current_tile_rows, &argb[tile_y_offset * width],
    697            this_tile_height * width * sizeof(*current_tile_rows));
    698     for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
    699       int pred;
    700       int y;
    701       const int tile_x_offset = tile_x * max_tile_size;
    702       int all_x_max = tile_x_offset + max_tile_size;
    703       if (all_x_max > width) {
    704         all_x_max = width;
    705       }
    706       pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits,
    707                                      (const int (*)[256])histo,
    708                                      argb_scratch);
    709       image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8);
    710       CopyTileWithPrediction(width, height, tile_x, tile_y, bits, pred,
    711                              argb_scratch, argb);
    712       for (y = 0; y < max_tile_size; ++y) {
    713         int ix;
    714         int all_x;
    715         int all_y = tile_y_offset + y;
    716         if (all_y >= height) {
    717           break;
    718         }
    719         ix = all_y * width + tile_x_offset;
    720         for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
    721           UpdateHisto(histo, argb[ix]);
    722         }
    723       }
    724     }
    725   }
    726 }
    727 
    728 // Inverse prediction.
    729 static void PredictorInverseTransform(const VP8LTransform* const transform,
    730                                       int y_start, int y_end, uint32_t* data) {
    731   const int width = transform->xsize_;
    732   if (y_start == 0) {  // First Row follows the L (mode=1) mode.
    733     int x;
    734     const uint32_t pred0 = Predictor0(data[-1], NULL);
    735     AddPixelsEq(data, pred0);
    736     for (x = 1; x < width; ++x) {
    737       const uint32_t pred1 = Predictor1(data[x - 1], NULL);
    738       AddPixelsEq(data + x, pred1);
    739     }
    740     data += width;
    741     ++y_start;
    742   }
    743 
    744   {
    745     int y = y_start;
    746     const int tile_width = 1 << transform->bits_;
    747     const int mask = tile_width - 1;
    748     const int safe_width = width & ~mask;
    749     const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
    750     const uint32_t* pred_mode_base =
    751         transform->data_ + (y >> transform->bits_) * tiles_per_row;
    752 
    753     while (y < y_end) {
    754       const uint32_t pred2 = Predictor2(data[-1], data - width);
    755       const uint32_t* pred_mode_src = pred_mode_base;
    756       VP8LPredictorFunc pred_func;
    757       int x = 1;
    758       int t = 1;
    759       // First pixel follows the T (mode=2) mode.
    760       AddPixelsEq(data, pred2);
    761       // .. the rest:
    762       while (x < safe_width) {
    763         pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
    764         for (; t < tile_width; ++t, ++x) {
    765           const uint32_t pred = pred_func(data[x - 1], data + x - width);
    766           AddPixelsEq(data + x, pred);
    767         }
    768         t = 0;
    769       }
    770       if (x < width) {
    771         pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
    772         for (; x < width; ++x) {
    773           const uint32_t pred = pred_func(data[x - 1], data + x - width);
    774           AddPixelsEq(data + x, pred);
    775         }
    776       }
    777       data += width;
    778       ++y;
    779       if ((y & mask) == 0) {   // Use the same mask, since tiles are squares.
    780         pred_mode_base += tiles_per_row;
    781       }
    782     }
    783   }
    784 }
    785 
    786 void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) {
    787   int i;
    788   for (i = 0; i < num_pixels; ++i) {
    789     const uint32_t argb = argb_data[i];
    790     const uint32_t green = (argb >> 8) & 0xff;
    791     const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
    792     const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
    793     argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
    794   }
    795 }
    796 
    797 // Add green to blue and red channels (i.e. perform the inverse transform of
    798 // 'subtract green').
    799 void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels) {
    800   int i;
    801   for (i = 0; i < num_pixels; ++i) {
    802     const uint32_t argb = data[i];
    803     const uint32_t green = ((argb >> 8) & 0xff);
    804     uint32_t red_blue = (argb & 0x00ff00ffu);
    805     red_blue += (green << 16) | green;
    806     red_blue &= 0x00ff00ffu;
    807     data[i] = (argb & 0xff00ff00u) | red_blue;
    808   }
    809 }
    810 
    811 static WEBP_INLINE void MultipliersClear(VP8LMultipliers* const m) {
    812   m->green_to_red_ = 0;
    813   m->green_to_blue_ = 0;
    814   m->red_to_blue_ = 0;
    815 }
    816 
    817 static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
    818                                                 int8_t color) {
    819   return (uint32_t)((int)(color_pred) * color) >> 5;
    820 }
    821 
    822 static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
    823                                                VP8LMultipliers* const m) {
    824   m->green_to_red_  = (color_code >>  0) & 0xff;
    825   m->green_to_blue_ = (color_code >>  8) & 0xff;
    826   m->red_to_blue_   = (color_code >> 16) & 0xff;
    827 }
    828 
    829 static WEBP_INLINE uint32_t MultipliersToColorCode(
    830     const VP8LMultipliers* const m) {
    831   return 0xff000000u |
    832          ((uint32_t)(m->red_to_blue_) << 16) |
    833          ((uint32_t)(m->green_to_blue_) << 8) |
    834          m->green_to_red_;
    835 }
    836 
    837 void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,
    838                           int num_pixels) {
    839   int i;
    840   for (i = 0; i < num_pixels; ++i) {
    841     const uint32_t argb = data[i];
    842     const uint32_t green = argb >> 8;
    843     const uint32_t red = argb >> 16;
    844     uint32_t new_red = red;
    845     uint32_t new_blue = argb;
    846     new_red -= ColorTransformDelta(m->green_to_red_, green);
    847     new_red &= 0xff;
    848     new_blue -= ColorTransformDelta(m->green_to_blue_, green);
    849     new_blue -= ColorTransformDelta(m->red_to_blue_, red);
    850     new_blue &= 0xff;
    851     data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
    852   }
    853 }
    854 
    855 void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, uint32_t* data,
    856                                  int num_pixels) {
    857   int i;
    858   for (i = 0; i < num_pixels; ++i) {
    859     const uint32_t argb = data[i];
    860     const uint32_t green = argb >> 8;
    861     const uint32_t red = argb >> 16;
    862     uint32_t new_red = red;
    863     uint32_t new_blue = argb;
    864     new_red += ColorTransformDelta(m->green_to_red_, green);
    865     new_red &= 0xff;
    866     new_blue += ColorTransformDelta(m->green_to_blue_, green);
    867     new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
    868     new_blue &= 0xff;
    869     data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
    870   }
    871 }
    872 
    873 static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
    874                                              uint32_t argb) {
    875   const uint32_t green = argb >> 8;
    876   uint32_t new_red = argb >> 16;
    877   new_red -= ColorTransformDelta(green_to_red, green);
    878   return (new_red & 0xff);
    879 }
    880 
    881 static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
    882                                               uint8_t red_to_blue,
    883                                               uint32_t argb) {
    884   const uint32_t green = argb >> 8;
    885   const uint32_t red = argb >> 16;
    886   uint8_t new_blue = argb;
    887   new_blue -= ColorTransformDelta(green_to_blue, green);
    888   new_blue -= ColorTransformDelta(red_to_blue, red);
    889   return (new_blue & 0xff);
    890 }
    891 
    892 static float PredictionCostCrossColor(const int accumulated[256],
    893                                       const int counts[256]) {
    894   // Favor low entropy, locally and globally.
    895   // Favor small absolute values for PredictionCostSpatial
    896   static const double kExpValue = 2.4;
    897   return CombinedShannonEntropy(counts, accumulated) +
    898          PredictionCostSpatial(counts, 3, kExpValue);
    899 }
    900 
    901 static float GetPredictionCostCrossColorRed(
    902     int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
    903     int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red,
    904     const int accumulated_red_histo[256], const uint32_t* const argb) {
    905   int all_y;
    906   int histo[256] = { 0 };
    907   float cur_diff;
    908   for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
    909     int ix = all_y * xsize + tile_x_offset;
    910     int all_x;
    911     for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
    912       ++histo[TransformColorRed(green_to_red, argb[ix])];  // red.
    913     }
    914   }
    915   cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo);
    916   if ((uint8_t)green_to_red == prev_x.green_to_red_) {
    917     cur_diff -= 3;  // favor keeping the areas locally similar
    918   }
    919   if ((uint8_t)green_to_red == prev_y.green_to_red_) {
    920     cur_diff -= 3;  // favor keeping the areas locally similar
    921   }
    922   if (green_to_red == 0) {
    923     cur_diff -= 3;
    924   }
    925   return cur_diff;
    926 }
    927 
    928 static void GetBestGreenToRed(
    929     int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
    930     int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y,
    931     const int accumulated_red_histo[256], const uint32_t* const argb,
    932     VP8LMultipliers* const best_tx) {
    933   int min_green_to_red = -64;
    934   int max_green_to_red = 64;
    935   int green_to_red = 0;
    936   int eval_min = 1;
    937   int eval_max = 1;
    938   float cur_diff_min = MAX_DIFF_COST;
    939   float cur_diff_max = MAX_DIFF_COST;
    940   // Do a binary search to find the optimal green_to_red color transform.
    941   while (max_green_to_red - min_green_to_red > 2) {
    942     if (eval_min) {
    943       cur_diff_min = GetPredictionCostCrossColorRed(
    944           tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
    945           prev_x, prev_y, min_green_to_red, accumulated_red_histo, argb);
    946       eval_min = 0;
    947     }
    948     if (eval_max) {
    949       cur_diff_max = GetPredictionCostCrossColorRed(
    950           tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
    951           prev_x, prev_y, max_green_to_red, accumulated_red_histo, argb);
    952       eval_max = 0;
    953     }
    954     if (cur_diff_min < cur_diff_max) {
    955       green_to_red = min_green_to_red;
    956       max_green_to_red = (max_green_to_red + min_green_to_red) / 2;
    957       eval_max = 1;
    958     } else {
    959       green_to_red = max_green_to_red;
    960       min_green_to_red = (max_green_to_red + min_green_to_red) / 2;
    961       eval_min = 1;
    962     }
    963   }
    964   best_tx->green_to_red_ = green_to_red;
    965 }
    966 
    967 static float GetPredictionCostCrossColorBlue(
    968     int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
    969     int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y,
    970     int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256],
    971     const uint32_t* const argb) {
    972   int all_y;
    973   int histo[256] = { 0 };
    974   float cur_diff;
    975   for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
    976     int all_x;
    977     int ix = all_y * xsize + tile_x_offset;
    978     for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
    979       ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])];
    980     }
    981   }
    982   cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo);
    983   if ((uint8_t)green_to_blue == prev_x.green_to_blue_) {
    984     cur_diff -= 3;  // favor keeping the areas locally similar
    985   }
    986   if ((uint8_t)green_to_blue == prev_y.green_to_blue_) {
    987     cur_diff -= 3;  // favor keeping the areas locally similar
    988   }
    989   if ((uint8_t)red_to_blue == prev_x.red_to_blue_) {
    990     cur_diff -= 3;  // favor keeping the areas locally similar
    991   }
    992   if ((uint8_t)red_to_blue == prev_y.red_to_blue_) {
    993     cur_diff -= 3;  // favor keeping the areas locally similar
    994   }
    995   if (green_to_blue == 0) {
    996     cur_diff -= 3;
    997   }
    998   if (red_to_blue == 0) {
    999     cur_diff -= 3;
   1000   }
   1001   return cur_diff;
   1002 }
   1003 
   1004 static void GetBestGreenRedToBlue(
   1005     int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
   1006     int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,
   1007     const int accumulated_blue_histo[256], const uint32_t* const argb,
   1008     VP8LMultipliers* const best_tx) {
   1009   float best_diff = MAX_DIFF_COST;
   1010   float cur_diff;
   1011   const int step = (quality < 25) ? 32 : (quality > 50) ? 8 : 16;
   1012   const int min_green_to_blue = -32;
   1013   const int max_green_to_blue = 32;
   1014   const int min_red_to_blue = -32;
   1015   const int max_red_to_blue = 32;
   1016   const int num_iters =
   1017       (1 + (max_green_to_blue - min_green_to_blue) / step) *
   1018       (1 + (max_red_to_blue - min_red_to_blue) / step);
   1019   // Number of tries to get optimal green_to_blue & red_to_blue color transforms
   1020   // after finding a local minima.
   1021   const int max_tries_after_min = 4 + (num_iters >> 2);
   1022   int num_tries_after_min = 0;
   1023   int green_to_blue;
   1024   for (green_to_blue = min_green_to_blue;
   1025        green_to_blue <= max_green_to_blue &&
   1026        num_tries_after_min < max_tries_after_min;
   1027        green_to_blue += step) {
   1028     int red_to_blue;
   1029     for (red_to_blue = min_red_to_blue;
   1030          red_to_blue <= max_red_to_blue &&
   1031          num_tries_after_min < max_tries_after_min;
   1032          red_to_blue += step) {
   1033       cur_diff = GetPredictionCostCrossColorBlue(
   1034           tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize, prev_x,
   1035           prev_y, green_to_blue, red_to_blue, accumulated_blue_histo, argb);
   1036       if (cur_diff < best_diff) {
   1037         best_diff = cur_diff;
   1038         best_tx->green_to_blue_ = green_to_blue;
   1039         best_tx->red_to_blue_ = red_to_blue;
   1040         num_tries_after_min = 0;
   1041       } else {
   1042         ++num_tries_after_min;
   1043       }
   1044     }
   1045   }
   1046 }
   1047 
   1048 static VP8LMultipliers GetBestColorTransformForTile(
   1049     int tile_x, int tile_y, int bits,
   1050     VP8LMultipliers prev_x,
   1051     VP8LMultipliers prev_y,
   1052     int quality, int xsize, int ysize,
   1053     const int accumulated_red_histo[256],
   1054     const int accumulated_blue_histo[256],
   1055     const uint32_t* const argb) {
   1056   const int max_tile_size = 1 << bits;
   1057   const int tile_y_offset = tile_y * max_tile_size;
   1058   const int tile_x_offset = tile_x * max_tile_size;
   1059   const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize);
   1060   const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize);
   1061   VP8LMultipliers best_tx;
   1062   MultipliersClear(&best_tx);
   1063 
   1064   GetBestGreenToRed(tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
   1065                     prev_x, prev_y, accumulated_red_histo, argb, &best_tx);
   1066   GetBestGreenRedToBlue(tile_x_offset, tile_y_offset, all_x_max, all_y_max,
   1067                         xsize, prev_x, prev_y, quality, accumulated_blue_histo,
   1068                         argb, &best_tx);
   1069   return best_tx;
   1070 }
   1071 
   1072 static void CopyTileWithColorTransform(int xsize, int ysize,
   1073                                        int tile_x, int tile_y,
   1074                                        int max_tile_size,
   1075                                        VP8LMultipliers color_transform,
   1076                                        uint32_t* argb) {
   1077   const int xscan = GetMin(max_tile_size, xsize - tile_x);
   1078   int yscan = GetMin(max_tile_size, ysize - tile_y);
   1079   argb += tile_y * xsize + tile_x;
   1080   while (yscan-- > 0) {
   1081     VP8LTransformColor(&color_transform, argb, xscan);
   1082     argb += xsize;
   1083   }
   1084 }
   1085 
   1086 void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
   1087                              uint32_t* const argb, uint32_t* image) {
   1088   const int max_tile_size = 1 << bits;
   1089   const int tile_xsize = VP8LSubSampleSize(width, bits);
   1090   const int tile_ysize = VP8LSubSampleSize(height, bits);
   1091   int accumulated_red_histo[256] = { 0 };
   1092   int accumulated_blue_histo[256] = { 0 };
   1093   int tile_x, tile_y;
   1094   VP8LMultipliers prev_x, prev_y;
   1095   MultipliersClear(&prev_y);
   1096   MultipliersClear(&prev_x);
   1097   for (tile_y = 0; tile_y < tile_ysize; ++tile_y) {
   1098     for (tile_x = 0; tile_x < tile_xsize; ++tile_x) {
   1099       int y;
   1100       const int tile_x_offset = tile_x * max_tile_size;
   1101       const int tile_y_offset = tile_y * max_tile_size;
   1102       const int all_x_max = GetMin(tile_x_offset + max_tile_size, width);
   1103       const int all_y_max = GetMin(tile_y_offset + max_tile_size, height);
   1104       const int offset = tile_y * tile_xsize + tile_x;
   1105       if (tile_y != 0) {
   1106         ColorCodeToMultipliers(image[offset - tile_xsize], &prev_y);
   1107       }
   1108       prev_x = GetBestColorTransformForTile(tile_x, tile_y, bits,
   1109                                             prev_x, prev_y,
   1110                                             quality, width, height,
   1111                                             accumulated_red_histo,
   1112                                             accumulated_blue_histo,
   1113                                             argb);
   1114       image[offset] = MultipliersToColorCode(&prev_x);
   1115       CopyTileWithColorTransform(width, height, tile_x_offset, tile_y_offset,
   1116                                  max_tile_size, prev_x, argb);
   1117 
   1118       // Gather accumulated histogram data.
   1119       for (y = tile_y_offset; y < all_y_max; ++y) {
   1120         int ix = y * width + tile_x_offset;
   1121         const int ix_end = ix + all_x_max - tile_x_offset;
   1122         for (; ix < ix_end; ++ix) {
   1123           const uint32_t pix = argb[ix];
   1124           if (ix >= 2 &&
   1125               pix == argb[ix - 2] &&
   1126               pix == argb[ix - 1]) {
   1127             continue;  // repeated pixels are handled by backward references
   1128           }
   1129           if (ix >= width + 2 &&
   1130               argb[ix - 2] == argb[ix - width - 2] &&
   1131               argb[ix - 1] == argb[ix - width - 1] &&
   1132               pix == argb[ix - width]) {
   1133             continue;  // repeated pixels are handled by backward references
   1134           }
   1135           ++accumulated_red_histo[(pix >> 16) & 0xff];
   1136           ++accumulated_blue_histo[(pix >> 0) & 0xff];
   1137         }
   1138       }
   1139     }
   1140   }
   1141 }
   1142 
   1143 // Color space inverse transform.
   1144 static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
   1145                                        int y_start, int y_end, uint32_t* data) {
   1146   const int width = transform->xsize_;
   1147   const int tile_width = 1 << transform->bits_;
   1148   const int mask = tile_width - 1;
   1149   const int safe_width = width & ~mask;
   1150   const int remaining_width = width - safe_width;
   1151   const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
   1152   int y = y_start;
   1153   const uint32_t* pred_row =
   1154       transform->data_ + (y >> transform->bits_) * tiles_per_row;
   1155 
   1156   while (y < y_end) {
   1157     const uint32_t* pred = pred_row;
   1158     VP8LMultipliers m = { 0, 0, 0 };
   1159     const uint32_t* const data_safe_end = data + safe_width;
   1160     const uint32_t* const data_end = data + width;
   1161     while (data < data_safe_end) {
   1162       ColorCodeToMultipliers(*pred++, &m);
   1163       VP8LTransformColorInverse(&m, data, tile_width);
   1164       data += tile_width;
   1165     }
   1166     if (data < data_end) {  // Left-overs using C-version.
   1167       ColorCodeToMultipliers(*pred++, &m);
   1168       VP8LTransformColorInverse(&m, data, remaining_width);
   1169       data += remaining_width;
   1170     }
   1171     ++y;
   1172     if ((y & mask) == 0) pred_row += tiles_per_row;;
   1173   }
   1174 }
   1175 
   1176 // Separate out pixels packed together using pixel-bundling.
   1177 // We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
   1178 #define COLOR_INDEX_INVERSE(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE)             \
   1179 void FUNC_NAME(const VP8LTransform* const transform,                           \
   1180                int y_start, int y_end, const TYPE* src, TYPE* dst) {           \
   1181   int y;                                                                       \
   1182   const int bits_per_pixel = 8 >> transform->bits_;                            \
   1183   const int width = transform->xsize_;                                         \
   1184   const uint32_t* const color_map = transform->data_;                          \
   1185   if (bits_per_pixel < 8) {                                                    \
   1186     const int pixels_per_byte = 1 << transform->bits_;                         \
   1187     const int count_mask = pixels_per_byte - 1;                                \
   1188     const uint32_t bit_mask = (1 << bits_per_pixel) - 1;                       \
   1189     for (y = y_start; y < y_end; ++y) {                                        \
   1190       uint32_t packed_pixels = 0;                                              \
   1191       int x;                                                                   \
   1192       for (x = 0; x < width; ++x) {                                            \
   1193         /* We need to load fresh 'packed_pixels' once every                */  \
   1194         /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */  \
   1195         /* is a power of 2, so can just use a mask for that, instead of    */  \
   1196         /* decrementing a counter.                                         */  \
   1197         if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++);          \
   1198         *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]);               \
   1199         packed_pixels >>= bits_per_pixel;                                      \
   1200       }                                                                        \
   1201     }                                                                          \
   1202   } else {                                                                     \
   1203     for (y = y_start; y < y_end; ++y) {                                        \
   1204       int x;                                                                   \
   1205       for (x = 0; x < width; ++x) {                                            \
   1206         *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                      \
   1207       }                                                                        \
   1208     }                                                                          \
   1209   }                                                                            \
   1210 }
   1211 
   1212 static WEBP_INLINE uint32_t GetARGBIndex(uint32_t idx) {
   1213   return (idx >> 8) & 0xff;
   1214 }
   1215 
   1216 static WEBP_INLINE uint8_t GetAlphaIndex(uint8_t idx) {
   1217   return idx;
   1218 }
   1219 
   1220 static WEBP_INLINE uint32_t GetARGBValue(uint32_t val) {
   1221   return val;
   1222 }
   1223 
   1224 static WEBP_INLINE uint8_t GetAlphaValue(uint32_t val) {
   1225   return (val >> 8) & 0xff;
   1226 }
   1227 
   1228 static COLOR_INDEX_INVERSE(ColorIndexInverseTransform, uint32_t, GetARGBIndex,
   1229                            GetARGBValue)
   1230 COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, uint8_t, GetAlphaIndex,
   1231                     GetAlphaValue)
   1232 
   1233 #undef COLOR_INDEX_INVERSE
   1234 
   1235 void VP8LInverseTransform(const VP8LTransform* const transform,
   1236                           int row_start, int row_end,
   1237                           const uint32_t* const in, uint32_t* const out) {
   1238   const int width = transform->xsize_;
   1239   assert(row_start < row_end);
   1240   assert(row_end <= transform->ysize_);
   1241   switch (transform->type_) {
   1242     case SUBTRACT_GREEN:
   1243       VP8LAddGreenToBlueAndRed(out, (row_end - row_start) * width);
   1244       break;
   1245     case PREDICTOR_TRANSFORM:
   1246       PredictorInverseTransform(transform, row_start, row_end, out);
   1247       if (row_end != transform->ysize_) {
   1248         // The last predicted row in this iteration will be the top-pred row
   1249         // for the first row in next iteration.
   1250         memcpy(out - width, out + (row_end - row_start - 1) * width,
   1251                width * sizeof(*out));
   1252       }
   1253       break;
   1254     case CROSS_COLOR_TRANSFORM:
   1255       ColorSpaceInverseTransform(transform, row_start, row_end, out);
   1256       break;
   1257     case COLOR_INDEXING_TRANSFORM:
   1258       if (in == out && transform->bits_ > 0) {
   1259         // Move packed pixels to the end of unpacked region, so that unpacking
   1260         // can occur seamlessly.
   1261         // Also, note that this is the only transform that applies on
   1262         // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
   1263         // transforms work on effective width of xsize_.
   1264         const int out_stride = (row_end - row_start) * width;
   1265         const int in_stride = (row_end - row_start) *
   1266             VP8LSubSampleSize(transform->xsize_, transform->bits_);
   1267         uint32_t* const src = out + out_stride - in_stride;
   1268         memmove(src, out, in_stride * sizeof(*src));
   1269         ColorIndexInverseTransform(transform, row_start, row_end, src, out);
   1270       } else {
   1271         ColorIndexInverseTransform(transform, row_start, row_end, in, out);
   1272       }
   1273       break;
   1274   }
   1275 }
   1276 
   1277 //------------------------------------------------------------------------------
   1278 // Color space conversion.
   1279 
   1280 static int is_big_endian(void) {
   1281   static const union {
   1282     uint16_t w;
   1283     uint8_t b[2];
   1284   } tmp = { 1 };
   1285   return (tmp.b[0] != 1);
   1286 }
   1287 
   1288 void VP8LConvertBGRAToRGB_C(const uint32_t* src,
   1289                             int num_pixels, uint8_t* dst) {
   1290   const uint32_t* const src_end = src + num_pixels;
   1291   while (src < src_end) {
   1292     const uint32_t argb = *src++;
   1293     *dst++ = (argb >> 16) & 0xff;
   1294     *dst++ = (argb >>  8) & 0xff;
   1295     *dst++ = (argb >>  0) & 0xff;
   1296   }
   1297 }
   1298 
   1299 void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
   1300                              int num_pixels, uint8_t* dst) {
   1301   const uint32_t* const src_end = src + num_pixels;
   1302   while (src < src_end) {
   1303     const uint32_t argb = *src++;
   1304     *dst++ = (argb >> 16) & 0xff;
   1305     *dst++ = (argb >>  8) & 0xff;
   1306     *dst++ = (argb >>  0) & 0xff;
   1307     *dst++ = (argb >> 24) & 0xff;
   1308   }
   1309 }
   1310 
   1311 void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
   1312                                  int num_pixels, uint8_t* dst) {
   1313   const uint32_t* const src_end = src + num_pixels;
   1314   while (src < src_end) {
   1315     const uint32_t argb = *src++;
   1316     const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
   1317     const uint8_t ba = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
   1318 #ifdef WEBP_SWAP_16BIT_CSP
   1319     *dst++ = ba;
   1320     *dst++ = rg;
   1321 #else
   1322     *dst++ = rg;
   1323     *dst++ = ba;
   1324 #endif
   1325   }
   1326 }
   1327 
   1328 void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
   1329                                int num_pixels, uint8_t* dst) {
   1330   const uint32_t* const src_end = src + num_pixels;
   1331   while (src < src_end) {
   1332     const uint32_t argb = *src++;
   1333     const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
   1334     const uint8_t gb = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
   1335 #ifdef WEBP_SWAP_16BIT_CSP
   1336     *dst++ = gb;
   1337     *dst++ = rg;
   1338 #else
   1339     *dst++ = rg;
   1340     *dst++ = gb;
   1341 #endif
   1342   }
   1343 }
   1344 
   1345 void VP8LConvertBGRAToBGR_C(const uint32_t* src,
   1346                             int num_pixels, uint8_t* dst) {
   1347   const uint32_t* const src_end = src + num_pixels;
   1348   while (src < src_end) {
   1349     const uint32_t argb = *src++;
   1350     *dst++ = (argb >>  0) & 0xff;
   1351     *dst++ = (argb >>  8) & 0xff;
   1352     *dst++ = (argb >> 16) & 0xff;
   1353   }
   1354 }
   1355 
   1356 static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
   1357                        int swap_on_big_endian) {
   1358   if (is_big_endian() == swap_on_big_endian) {
   1359     const uint32_t* const src_end = src + num_pixels;
   1360     while (src < src_end) {
   1361       const uint32_t argb = *src++;
   1362 
   1363 #if !defined(WORDS_BIGENDIAN)
   1364 #if !defined(WEBP_REFERENCE_IMPLEMENTATION)
   1365       *(uint32_t*)dst = BSwap32(argb);
   1366 #else  // WEBP_REFERENCE_IMPLEMENTATION
   1367       dst[0] = (argb >> 24) & 0xff;
   1368       dst[1] = (argb >> 16) & 0xff;
   1369       dst[2] = (argb >>  8) & 0xff;
   1370       dst[3] = (argb >>  0) & 0xff;
   1371 #endif
   1372 #else  // WORDS_BIGENDIAN
   1373       dst[0] = (argb >>  0) & 0xff;
   1374       dst[1] = (argb >>  8) & 0xff;
   1375       dst[2] = (argb >> 16) & 0xff;
   1376       dst[3] = (argb >> 24) & 0xff;
   1377 #endif
   1378       dst += sizeof(argb);
   1379     }
   1380   } else {
   1381     memcpy(dst, src, num_pixels * sizeof(*src));
   1382   }
   1383 }
   1384 
   1385 void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
   1386                          WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
   1387   switch (out_colorspace) {
   1388     case MODE_RGB:
   1389       VP8LConvertBGRAToRGB(in_data, num_pixels, rgba);
   1390       break;
   1391     case MODE_RGBA:
   1392       VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
   1393       break;
   1394     case MODE_rgbA:
   1395       VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
   1396       WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
   1397       break;
   1398     case MODE_BGR:
   1399       VP8LConvertBGRAToBGR(in_data, num_pixels, rgba);
   1400       break;
   1401     case MODE_BGRA:
   1402       CopyOrSwap(in_data, num_pixels, rgba, 1);
   1403       break;
   1404     case MODE_bgrA:
   1405       CopyOrSwap(in_data, num_pixels, rgba, 1);
   1406       WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
   1407       break;
   1408     case MODE_ARGB:
   1409       CopyOrSwap(in_data, num_pixels, rgba, 0);
   1410       break;
   1411     case MODE_Argb:
   1412       CopyOrSwap(in_data, num_pixels, rgba, 0);
   1413       WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
   1414       break;
   1415     case MODE_RGBA_4444:
   1416       VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
   1417       break;
   1418     case MODE_rgbA_4444:
   1419       VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
   1420       WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
   1421       break;
   1422     case MODE_RGB_565:
   1423       VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba);
   1424       break;
   1425     default:
   1426       assert(0);          // Code flow should not reach here.
   1427   }
   1428 }
   1429 
   1430 //------------------------------------------------------------------------------
   1431 // Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
   1432 void VP8LBundleColorMap(const uint8_t* const row, int width,
   1433                         int xbits, uint32_t* const dst) {
   1434   int x;
   1435   if (xbits > 0) {
   1436     const int bit_depth = 1 << (3 - xbits);
   1437     const int mask = (1 << xbits) - 1;
   1438     uint32_t code = 0xff000000;
   1439     for (x = 0; x < width; ++x) {
   1440       const int xsub = x & mask;
   1441       if (xsub == 0) {
   1442         code = 0xff000000;
   1443       }
   1444       code |= row[x] << (8 + bit_depth * xsub);
   1445       dst[x >> xbits] = code;
   1446     }
   1447   } else {
   1448     for (x = 0; x < width; ++x) dst[x] = 0xff000000 | (row[x] << 8);
   1449   }
   1450 }
   1451 
   1452 //------------------------------------------------------------------------------
   1453 
   1454 static double ExtraCost(const uint32_t* population, int length) {
   1455   int i;
   1456   double cost = 0.;
   1457   for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
   1458   return cost;
   1459 }
   1460 
   1461 static double ExtraCostCombined(const uint32_t* X, const uint32_t* Y,
   1462                                 int length) {
   1463   int i;
   1464   double cost = 0.;
   1465   for (i = 2; i < length - 2; ++i) {
   1466     const int xy = X[i + 2] + Y[i + 2];
   1467     cost += (i >> 1) * xy;
   1468   }
   1469   return cost;
   1470 }
   1471 
   1472 // Returns the various RLE counts
   1473 static VP8LStreaks HuffmanCostCount(const uint32_t* population, int length) {
   1474   int i;
   1475   int streak = 0;
   1476   VP8LStreaks stats;
   1477   memset(&stats, 0, sizeof(stats));
   1478   for (i = 0; i < length - 1; ++i) {
   1479     ++streak;
   1480     if (population[i] == population[i + 1]) {
   1481       continue;
   1482     }
   1483     stats.counts[population[i] != 0] += (streak > 3);
   1484     stats.streaks[population[i] != 0][(streak > 3)] += streak;
   1485     streak = 0;
   1486   }
   1487   ++streak;
   1488   stats.counts[population[i] != 0] += (streak > 3);
   1489   stats.streaks[population[i] != 0][(streak > 3)] += streak;
   1490   return stats;
   1491 }
   1492 
   1493 static VP8LStreaks HuffmanCostCombinedCount(const uint32_t* X,
   1494                                             const uint32_t* Y, int length) {
   1495   int i;
   1496   int streak = 0;
   1497   VP8LStreaks stats;
   1498   memset(&stats, 0, sizeof(stats));
   1499   for (i = 0; i < length - 1; ++i) {
   1500     const int xy = X[i] + Y[i];
   1501     const int xy_next = X[i + 1] + Y[i + 1];
   1502     ++streak;
   1503     if (xy == xy_next) {
   1504       continue;
   1505     }
   1506     stats.counts[xy != 0] += (streak > 3);
   1507     stats.streaks[xy != 0][(streak > 3)] += streak;
   1508     streak = 0;
   1509   }
   1510   {
   1511     const int xy = X[i] + Y[i];
   1512     ++streak;
   1513     stats.counts[xy != 0] += (streak > 3);
   1514     stats.streaks[xy != 0][(streak > 3)] += streak;
   1515   }
   1516   return stats;
   1517 }
   1518 
   1519 //------------------------------------------------------------------------------
   1520 
   1521 static void HistogramAdd(const VP8LHistogram* const a,
   1522                          const VP8LHistogram* const b,
   1523                          VP8LHistogram* const out) {
   1524   int i;
   1525   const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
   1526   assert(a->palette_code_bits_ == b->palette_code_bits_);
   1527   if (b != out) {
   1528     for (i = 0; i < literal_size; ++i) {
   1529       out->literal_[i] = a->literal_[i] + b->literal_[i];
   1530     }
   1531     for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
   1532       out->distance_[i] = a->distance_[i] + b->distance_[i];
   1533     }
   1534     for (i = 0; i < NUM_LITERAL_CODES; ++i) {
   1535       out->red_[i] = a->red_[i] + b->red_[i];
   1536       out->blue_[i] = a->blue_[i] + b->blue_[i];
   1537       out->alpha_[i] = a->alpha_[i] + b->alpha_[i];
   1538     }
   1539   } else {
   1540     for (i = 0; i < literal_size; ++i) {
   1541       out->literal_[i] += a->literal_[i];
   1542     }
   1543     for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
   1544       out->distance_[i] += a->distance_[i];
   1545     }
   1546     for (i = 0; i < NUM_LITERAL_CODES; ++i) {
   1547       out->red_[i] += a->red_[i];
   1548       out->blue_[i] += a->blue_[i];
   1549       out->alpha_[i] += a->alpha_[i];
   1550     }
   1551   }
   1552 }
   1553 
   1554 //------------------------------------------------------------------------------
   1555 
   1556 VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
   1557 VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
   1558 VP8LPredictorFunc VP8LPredictors[16];
   1559 
   1560 VP8LTransformColorFunc VP8LTransformColor;
   1561 VP8LTransformColorFunc VP8LTransformColorInverse;
   1562 
   1563 VP8LConvertFunc VP8LConvertBGRAToRGB;
   1564 VP8LConvertFunc VP8LConvertBGRAToRGBA;
   1565 VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
   1566 VP8LConvertFunc VP8LConvertBGRAToRGB565;
   1567 VP8LConvertFunc VP8LConvertBGRAToBGR;
   1568 
   1569 VP8LFastLog2SlowFunc VP8LFastLog2Slow;
   1570 VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
   1571 
   1572 VP8LCostFunc VP8LExtraCost;
   1573 VP8LCostCombinedFunc VP8LExtraCostCombined;
   1574 
   1575 VP8LCostCountFunc VP8LHuffmanCostCount;
   1576 VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount;
   1577 
   1578 VP8LHistogramAddFunc VP8LHistogramAdd;
   1579 
   1580 extern void VP8LDspInitSSE2(void);
   1581 extern void VP8LDspInitNEON(void);
   1582 extern void VP8LDspInitMIPS32(void);
   1583 
   1584 void VP8LDspInit(void) {
   1585   memcpy(VP8LPredictors, kPredictorsC, sizeof(VP8LPredictors));
   1586 
   1587   VP8LSubtractGreenFromBlueAndRed = VP8LSubtractGreenFromBlueAndRed_C;
   1588   VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
   1589 
   1590   VP8LTransformColor = VP8LTransformColor_C;
   1591   VP8LTransformColorInverse = VP8LTransformColorInverse_C;
   1592 
   1593   VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
   1594   VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
   1595   VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
   1596   VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
   1597   VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
   1598 
   1599   VP8LFastLog2Slow = FastLog2Slow;
   1600   VP8LFastSLog2Slow = FastSLog2Slow;
   1601 
   1602   VP8LExtraCost = ExtraCost;
   1603   VP8LExtraCostCombined = ExtraCostCombined;
   1604 
   1605   VP8LHuffmanCostCount = HuffmanCostCount;
   1606   VP8LHuffmanCostCombinedCount = HuffmanCostCombinedCount;
   1607 
   1608   VP8LHistogramAdd = HistogramAdd;
   1609 
   1610   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
   1611   if (VP8GetCPUInfo != NULL) {
   1612 #if defined(WEBP_USE_SSE2)
   1613     if (VP8GetCPUInfo(kSSE2)) {
   1614       VP8LDspInitSSE2();
   1615     }
   1616 #endif
   1617 #if defined(WEBP_USE_NEON)
   1618     if (VP8GetCPUInfo(kNEON)) {
   1619       VP8LDspInitNEON();
   1620     }
   1621 #endif
   1622 #if defined(WEBP_USE_MIPS32)
   1623     if (VP8GetCPUInfo(kMIPS32)) {
   1624       VP8LDspInitMIPS32();
   1625     }
   1626 #endif
   1627   }
   1628 }
   1629 
   1630 //------------------------------------------------------------------------------
   1631