Home | History | Annotate | Download | only in dsp
      1 // Copyright 2015 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 // Image transform methods for lossless encoder.
     11 //
     12 // Authors: Vikas Arora (vikaas.arora (at) gmail.com)
     13 //          Jyrki Alakuijala (jyrki (at) google.com)
     14 //          Urvang Joshi (urvang (at) google.com)
     15 
     16 #include "./dsp.h"
     17 
     18 #include <math.h>
     19 #include <stdlib.h>
     20 #include "../dec/vp8li.h"
     21 #include "../utils/endian_inl.h"
     22 #include "./lossless.h"
     23 #include "./yuv.h"
     24 
     25 #define MAX_DIFF_COST (1e30f)
     26 
     27 static const int kPredLowEffort = 11;
     28 static const uint32_t kMaskAlpha = 0xff000000;
     29 
     30 // lookup table for small values of log2(int)
     31 const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
     32   0.0000000000000000f, 0.0000000000000000f,
     33   1.0000000000000000f, 1.5849625007211560f,
     34   2.0000000000000000f, 2.3219280948873621f,
     35   2.5849625007211560f, 2.8073549220576041f,
     36   3.0000000000000000f, 3.1699250014423121f,
     37   3.3219280948873621f, 3.4594316186372973f,
     38   3.5849625007211560f, 3.7004397181410921f,
     39   3.8073549220576041f, 3.9068905956085187f,
     40   4.0000000000000000f, 4.0874628412503390f,
     41   4.1699250014423121f, 4.2479275134435852f,
     42   4.3219280948873626f, 4.3923174227787606f,
     43   4.4594316186372973f, 4.5235619560570130f,
     44   4.5849625007211560f, 4.6438561897747243f,
     45   4.7004397181410917f, 4.7548875021634682f,
     46   4.8073549220576037f, 4.8579809951275718f,
     47   4.9068905956085187f, 4.9541963103868749f,
     48   5.0000000000000000f, 5.0443941193584533f,
     49   5.0874628412503390f, 5.1292830169449663f,
     50   5.1699250014423121f, 5.2094533656289501f,
     51   5.2479275134435852f, 5.2854022188622487f,
     52   5.3219280948873626f, 5.3575520046180837f,
     53   5.3923174227787606f, 5.4262647547020979f,
     54   5.4594316186372973f, 5.4918530963296747f,
     55   5.5235619560570130f, 5.5545888516776376f,
     56   5.5849625007211560f, 5.6147098441152083f,
     57   5.6438561897747243f, 5.6724253419714951f,
     58   5.7004397181410917f, 5.7279204545631987f,
     59   5.7548875021634682f, 5.7813597135246599f,
     60   5.8073549220576037f, 5.8328900141647412f,
     61   5.8579809951275718f, 5.8826430493618415f,
     62   5.9068905956085187f, 5.9307373375628866f,
     63   5.9541963103868749f, 5.9772799234999167f,
     64   6.0000000000000000f, 6.0223678130284543f,
     65   6.0443941193584533f, 6.0660891904577720f,
     66   6.0874628412503390f, 6.1085244567781691f,
     67   6.1292830169449663f, 6.1497471195046822f,
     68   6.1699250014423121f, 6.1898245588800175f,
     69   6.2094533656289501f, 6.2288186904958804f,
     70   6.2479275134435852f, 6.2667865406949010f,
     71   6.2854022188622487f, 6.3037807481771030f,
     72   6.3219280948873626f, 6.3398500028846243f,
     73   6.3575520046180837f, 6.3750394313469245f,
     74   6.3923174227787606f, 6.4093909361377017f,
     75   6.4262647547020979f, 6.4429434958487279f,
     76   6.4594316186372973f, 6.4757334309663976f,
     77   6.4918530963296747f, 6.5077946401986963f,
     78   6.5235619560570130f, 6.5391588111080309f,
     79   6.5545888516776376f, 6.5698556083309478f,
     80   6.5849625007211560f, 6.5999128421871278f,
     81   6.6147098441152083f, 6.6293566200796094f,
     82   6.6438561897747243f, 6.6582114827517946f,
     83   6.6724253419714951f, 6.6865005271832185f,
     84   6.7004397181410917f, 6.7142455176661224f,
     85   6.7279204545631987f, 6.7414669864011464f,
     86   6.7548875021634682f, 6.7681843247769259f,
     87   6.7813597135246599f, 6.7944158663501061f,
     88   6.8073549220576037f, 6.8201789624151878f,
     89   6.8328900141647412f, 6.8454900509443747f,
     90   6.8579809951275718f, 6.8703647195834047f,
     91   6.8826430493618415f, 6.8948177633079437f,
     92   6.9068905956085187f, 6.9188632372745946f,
     93   6.9307373375628866f, 6.9425145053392398f,
     94   6.9541963103868749f, 6.9657842846620869f,
     95   6.9772799234999167f, 6.9886846867721654f,
     96   7.0000000000000000f, 7.0112272554232539f,
     97   7.0223678130284543f, 7.0334230015374501f,
     98   7.0443941193584533f, 7.0552824355011898f,
     99   7.0660891904577720f, 7.0768155970508308f,
    100   7.0874628412503390f, 7.0980320829605263f,
    101   7.1085244567781691f, 7.1189410727235076f,
    102   7.1292830169449663f, 7.1395513523987936f,
    103   7.1497471195046822f, 7.1598713367783890f,
    104   7.1699250014423121f, 7.1799090900149344f,
    105   7.1898245588800175f, 7.1996723448363644f,
    106   7.2094533656289501f, 7.2191685204621611f,
    107   7.2288186904958804f, 7.2384047393250785f,
    108   7.2479275134435852f, 7.2573878426926521f,
    109   7.2667865406949010f, 7.2761244052742375f,
    110   7.2854022188622487f, 7.2946207488916270f,
    111   7.3037807481771030f, 7.3128829552843557f,
    112   7.3219280948873626f, 7.3309168781146167f,
    113   7.3398500028846243f, 7.3487281542310771f,
    114   7.3575520046180837f, 7.3663222142458160f,
    115   7.3750394313469245f, 7.3837042924740519f,
    116   7.3923174227787606f, 7.4008794362821843f,
    117   7.4093909361377017f, 7.4178525148858982f,
    118   7.4262647547020979f, 7.4346282276367245f,
    119   7.4429434958487279f, 7.4512111118323289f,
    120   7.4594316186372973f, 7.4676055500829976f,
    121   7.4757334309663976f, 7.4838157772642563f,
    122   7.4918530963296747f, 7.4998458870832056f,
    123   7.5077946401986963f, 7.5156998382840427f,
    124   7.5235619560570130f, 7.5313814605163118f,
    125   7.5391588111080309f, 7.5468944598876364f,
    126   7.5545888516776376f, 7.5622424242210728f,
    127   7.5698556083309478f, 7.5774288280357486f,
    128   7.5849625007211560f, 7.5924570372680806f,
    129   7.5999128421871278f, 7.6073303137496104f,
    130   7.6147098441152083f, 7.6220518194563764f,
    131   7.6293566200796094f, 7.6366246205436487f,
    132   7.6438561897747243f, 7.6510516911789281f,
    133   7.6582114827517946f, 7.6653359171851764f,
    134   7.6724253419714951f, 7.6794800995054464f,
    135   7.6865005271832185f, 7.6934869574993252f,
    136   7.7004397181410917f, 7.7073591320808825f,
    137   7.7142455176661224f, 7.7210991887071855f,
    138   7.7279204545631987f, 7.7347096202258383f,
    139   7.7414669864011464f, 7.7481928495894605f,
    140   7.7548875021634682f, 7.7615512324444795f,
    141   7.7681843247769259f, 7.7747870596011736f,
    142   7.7813597135246599f, 7.7879025593914317f,
    143   7.7944158663501061f, 7.8008998999203047f,
    144   7.8073549220576037f, 7.8137811912170374f,
    145   7.8201789624151878f, 7.8265484872909150f,
    146   7.8328900141647412f, 7.8392037880969436f,
    147   7.8454900509443747f, 7.8517490414160571f,
    148   7.8579809951275718f, 7.8641861446542797f,
    149   7.8703647195834047f, 7.8765169465649993f,
    150   7.8826430493618415f, 7.8887432488982591f,
    151   7.8948177633079437f, 7.9008668079807486f,
    152   7.9068905956085187f, 7.9128893362299619f,
    153   7.9188632372745946f, 7.9248125036057812f,
    154   7.9307373375628866f, 7.9366379390025709f,
    155   7.9425145053392398f, 7.9483672315846778f,
    156   7.9541963103868749f, 7.9600019320680805f,
    157   7.9657842846620869f, 7.9715435539507719f,
    158   7.9772799234999167f, 7.9829935746943103f,
    159   7.9886846867721654f, 7.9943534368588577f
    160 };
    161 
    162 const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
    163   0.00000000f,    0.00000000f,  2.00000000f,   4.75488750f,
    164   8.00000000f,   11.60964047f,  15.50977500f,  19.65148445f,
    165   24.00000000f,  28.52932501f,  33.21928095f,  38.05374781f,
    166   43.01955001f,  48.10571634f,  53.30296891f,  58.60335893f,
    167   64.00000000f,  69.48686830f,  75.05865003f,  80.71062276f,
    168   86.43856190f,  92.23866588f,  98.10749561f,  104.04192499f,
    169   110.03910002f, 116.09640474f, 122.21143267f, 128.38196256f,
    170   134.60593782f, 140.88144886f, 147.20671787f, 153.58008562f,
    171   160.00000000f, 166.46500594f, 172.97373660f, 179.52490559f,
    172   186.11730005f, 192.74977453f, 199.42124551f, 206.13068654f,
    173   212.87712380f, 219.65963219f, 226.47733176f, 233.32938445f,
    174   240.21499122f, 247.13338933f, 254.08384998f, 261.06567603f,
    175   268.07820003f, 275.12078236f, 282.19280949f, 289.29369244f,
    176   296.42286534f, 303.57978409f, 310.76392512f, 317.97478424f,
    177   325.21187564f, 332.47473081f, 339.76289772f, 347.07593991f,
    178   354.41343574f, 361.77497759f, 369.16017124f, 376.56863518f,
    179   384.00000000f, 391.45390785f, 398.93001188f, 406.42797576f,
    180   413.94747321f, 421.48818752f, 429.04981119f, 436.63204548f,
    181   444.23460010f, 451.85719280f, 459.49954906f, 467.16140179f,
    182   474.84249102f, 482.54256363f, 490.26137307f, 497.99867911f,
    183   505.75424759f, 513.52785023f, 521.31926438f, 529.12827280f,
    184   536.95466351f, 544.79822957f, 552.65876890f, 560.53608414f,
    185   568.42998244f, 576.34027536f, 584.26677867f, 592.20931226f,
    186   600.16769996f, 608.14176943f, 616.13135206f, 624.13628279f,
    187   632.15640007f, 640.19154569f, 648.24156472f, 656.30630539f,
    188   664.38561898f, 672.47935976f, 680.58738488f, 688.70955430f,
    189   696.84573069f, 704.99577935f, 713.15956818f, 721.33696754f,
    190   729.52785023f, 737.73209140f, 745.94956849f, 754.18016116f,
    191   762.42375127f, 770.68022275f, 778.94946161f, 787.23135586f,
    192   795.52579543f, 803.83267219f, 812.15187982f, 820.48331383f,
    193   828.82687147f, 837.18245171f, 845.54995518f, 853.92928416f,
    194   862.32034249f, 870.72303558f, 879.13727036f, 887.56295522f,
    195   896.00000000f, 904.44831595f, 912.90781569f, 921.37841320f,
    196   929.86002376f, 938.35256392f, 946.85595152f, 955.37010560f,
    197   963.89494641f, 972.43039537f, 980.97637504f, 989.53280911f,
    198   998.09962237f, 1006.67674069f, 1015.26409097f, 1023.86160116f,
    199   1032.46920021f, 1041.08681805f, 1049.71438560f, 1058.35183469f,
    200   1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f,
    201   1101.68498204f, 1110.38033993f, 1119.08512727f, 1127.79928282f,
    202   1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f,
    203   1171.50849518f, 1180.27761738f, 1189.05570047f, 1197.84268914f,
    204   1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f,
    205   1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f,
    206   1277.31753781f, 1286.19068338f, 1295.07216828f, 1303.96194457f,
    207   1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f,
    208   1348.53355734f, 1357.47210556f, 1366.41862452f, 1375.37307041f,
    209   1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f,
    210   1420.26270412f, 1429.26381818f, 1438.27256558f, 1447.28890615f,
    211   1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f,
    212   1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f,
    213   1528.77123795f, 1537.86138993f, 1546.95871952f, 1556.06319119f,
    214   1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f,
    215   1601.69146137f, 1610.83805860f, 1619.99155871f, 1629.15192882f,
    216   1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f,
    217   1675.05570047f, 1684.25661744f, 1693.46418280f, 1702.67836605f,
    218   1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f,
    219   1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f,
    220   1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f,
    221   1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f,
    222   1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f,
    223   1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f,
    224   1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f,
    225   1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f,
    226   2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f
    227 };
    228 
    229 const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX] = {
    230   { 0, 0}, { 0, 0}, { 1, 0}, { 2, 0}, { 3, 0}, { 4, 1}, { 4, 1}, { 5, 1},
    231   { 5, 1}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 7, 2}, { 7, 2}, { 7, 2},
    232   { 7, 2}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3},
    233   { 8, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3},
    234   { 9, 3}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4},
    235   {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4},
    236   {10, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4},
    237   {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4},
    238   {11, 4}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
    239   {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
    240   {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
    241   {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
    242   {12, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
    243   {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
    244   {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
    245   {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
    246   {13, 5}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
    247   {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
    248   {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
    249   {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
    250   {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
    251   {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
    252   {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
    253   {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
    254   {14, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
    255   {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
    256   {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
    257   {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
    258   {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
    259   {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
    260   {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
    261   {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
    262   {15, 6}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    263   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    264   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    265   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    266   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    267   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    268   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    269   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    270   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    271   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    272   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    273   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    274   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    275   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    276   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    277   {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
    278   {16, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    279   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    280   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    281   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    282   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    283   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    284   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    285   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    286   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    287   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    288   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    289   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    290   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    291   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    292   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    293   {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
    294 };
    295 
    296 const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
    297    0,  0,  0,  0,  0,  0,  1,  0,  1,  0,  1,  2,  3,  0,  1,  2,  3,
    298    0,  1,  2,  3,  4,  5,  6,  7,  0,  1,  2,  3,  4,  5,  6,  7,
    299    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
    300    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
    301    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
    302   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
    303    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
    304   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
    305    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
    306   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
    307   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
    308   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
    309    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
    310   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
    311   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
    312   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
    313    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
    314   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
    315   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
    316   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
    317   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
    318   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
    319   96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
    320   112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
    321   127,
    322    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
    323   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
    324   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
    325   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
    326   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
    327   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
    328   96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
    329   112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
    330 };
    331 
    332 static float FastSLog2Slow(uint32_t v) {
    333   assert(v >= LOG_LOOKUP_IDX_MAX);
    334   if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
    335     int log_cnt = 0;
    336     uint32_t y = 1;
    337     int correction = 0;
    338     const float v_f = (float)v;
    339     const uint32_t orig_v = v;
    340     do {
    341       ++log_cnt;
    342       v = v >> 1;
    343       y = y << 1;
    344     } while (v >= LOG_LOOKUP_IDX_MAX);
    345     // vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256
    346     // Xf = floor(Xf) * (1 + (v % y) / v)
    347     // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
    348     // The correction factor: log(1 + d) ~ d; for very small d values, so
    349     // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
    350     // LOG_2_RECIPROCAL ~ 23/16
    351     correction = (23 * (orig_v & (y - 1))) >> 4;
    352     return v_f * (kLog2Table[v] + log_cnt) + correction;
    353   } else {
    354     return (float)(LOG_2_RECIPROCAL * v * log((double)v));
    355   }
    356 }
    357 
    358 static float FastLog2Slow(uint32_t v) {
    359   assert(v >= LOG_LOOKUP_IDX_MAX);
    360   if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
    361     int log_cnt = 0;
    362     uint32_t y = 1;
    363     const uint32_t orig_v = v;
    364     double log_2;
    365     do {
    366       ++log_cnt;
    367       v = v >> 1;
    368       y = y << 1;
    369     } while (v >= LOG_LOOKUP_IDX_MAX);
    370     log_2 = kLog2Table[v] + log_cnt;
    371     if (orig_v >= APPROX_LOG_MAX) {
    372       // Since the division is still expensive, add this correction factor only
    373       // for large values of 'v'.
    374       const int correction = (23 * (orig_v & (y - 1))) >> 4;
    375       log_2 += (double)correction / orig_v;
    376     }
    377     return (float)log_2;
    378   } else {
    379     return (float)(LOG_2_RECIPROCAL * log((double)v));
    380   }
    381 }
    382 
    383 // Mostly used to reduce code size + readability
    384 static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }
    385 
    386 //------------------------------------------------------------------------------
    387 // Methods to calculate Entropy (Shannon).
    388 
    389 static float PredictionCostSpatial(const int counts[256], int weight_0,
    390                                    double exp_val) {
    391   const int significant_symbols = 256 >> 4;
    392   const double exp_decay_factor = 0.6;
    393   double bits = weight_0 * counts[0];
    394   int i;
    395   for (i = 1; i < significant_symbols; ++i) {
    396     bits += exp_val * (counts[i] + counts[256 - i]);
    397     exp_val *= exp_decay_factor;
    398   }
    399   return (float)(-0.1 * bits);
    400 }
    401 
    402 // Compute the combined Shanon's entropy for distribution {X} and {X+Y}
    403 static float CombinedShannonEntropy(const int X[256], const int Y[256]) {
    404   int i;
    405   double retval = 0.;
    406   int sumX = 0, sumXY = 0;
    407   for (i = 0; i < 256; ++i) {
    408     const int x = X[i];
    409     if (x != 0) {
    410       const int xy = x + Y[i];
    411       sumX += x;
    412       retval -= VP8LFastSLog2(x);
    413       sumXY += xy;
    414       retval -= VP8LFastSLog2(xy);
    415     } else if (Y[i] != 0) {
    416       sumXY += Y[i];
    417       retval -= VP8LFastSLog2(Y[i]);
    418     }
    419   }
    420   retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
    421   return (float)retval;
    422 }
    423 
    424 static float PredictionCostSpatialHistogram(const int accumulated[4][256],
    425                                             const int tile[4][256]) {
    426   int i;
    427   double retval = 0;
    428   for (i = 0; i < 4; ++i) {
    429     const double kExpValue = 0.94;
    430     retval += PredictionCostSpatial(tile[i], 1, kExpValue);
    431     retval += VP8LCombinedShannonEntropy(tile[i], accumulated[i]);
    432   }
    433   return (float)retval;
    434 }
    435 
    436 void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {
    437   entropy->entropy = 0.;
    438   entropy->sum = 0;
    439   entropy->nonzeros = 0;
    440   entropy->max_val = 0;
    441   entropy->nonzero_code = VP8L_NON_TRIVIAL_SYM;
    442 }
    443 
    444 void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
    445                               VP8LBitEntropy* const entropy) {
    446   int i;
    447 
    448   VP8LBitEntropyInit(entropy);
    449 
    450   for (i = 0; i < n; ++i) {
    451     if (array[i] != 0) {
    452       entropy->sum += array[i];
    453       entropy->nonzero_code = i;
    454       ++entropy->nonzeros;
    455       entropy->entropy -= VP8LFastSLog2(array[i]);
    456       if (entropy->max_val < array[i]) {
    457         entropy->max_val = array[i];
    458       }
    459     }
    460   }
    461   entropy->entropy += VP8LFastSLog2(entropy->sum);
    462 }
    463 
    464 static WEBP_INLINE void GetEntropyUnrefinedHelper(
    465     uint32_t val, int i, uint32_t* const val_prev, int* const i_prev,
    466     VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats) {
    467   const int streak = i - *i_prev;
    468 
    469   // Gather info for the bit entropy.
    470   if (*val_prev != 0) {
    471     bit_entropy->sum += (*val_prev) * streak;
    472     bit_entropy->nonzeros += streak;
    473     bit_entropy->nonzero_code = *i_prev;
    474     bit_entropy->entropy -= VP8LFastSLog2(*val_prev) * streak;
    475     if (bit_entropy->max_val < *val_prev) {
    476       bit_entropy->max_val = *val_prev;
    477     }
    478   }
    479 
    480   // Gather info for the Huffman cost.
    481   stats->counts[*val_prev != 0] += (streak > 3);
    482   stats->streaks[*val_prev != 0][(streak > 3)] += streak;
    483 
    484   *val_prev = val;
    485   *i_prev = i;
    486 }
    487 
    488 void VP8LGetEntropyUnrefined(const uint32_t* const X, int length,
    489                              VP8LBitEntropy* const bit_entropy,
    490                              VP8LStreaks* const stats) {
    491   int i;
    492   int i_prev = 0;
    493   uint32_t x_prev = X[0];
    494 
    495   memset(stats, 0, sizeof(*stats));
    496   VP8LBitEntropyInit(bit_entropy);
    497 
    498   for (i = 1; i < length; ++i) {
    499     const uint32_t x = X[i];
    500     if (x != x_prev) {
    501       VP8LGetEntropyUnrefinedHelper(x, i, &x_prev, &i_prev, bit_entropy, stats);
    502     }
    503   }
    504   VP8LGetEntropyUnrefinedHelper(0, i, &x_prev, &i_prev, bit_entropy, stats);
    505 
    506   bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);
    507 }
    508 
    509 void VP8LGetCombinedEntropyUnrefined(const uint32_t* const X,
    510                                      const uint32_t* const Y, int length,
    511                                      VP8LBitEntropy* const bit_entropy,
    512                                      VP8LStreaks* const stats) {
    513   int i = 1;
    514   int i_prev = 0;
    515   uint32_t xy_prev = X[0] + Y[0];
    516 
    517   memset(stats, 0, sizeof(*stats));
    518   VP8LBitEntropyInit(bit_entropy);
    519 
    520   for (i = 1; i < length; ++i) {
    521     const uint32_t xy = X[i] + Y[i];
    522     if (xy != xy_prev) {
    523       VP8LGetEntropyUnrefinedHelper(xy, i, &xy_prev, &i_prev, bit_entropy,
    524                                     stats);
    525     }
    526   }
    527   VP8LGetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, bit_entropy, stats);
    528 
    529   bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);
    530 }
    531 
    532 static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) {
    533   ++histo_argb[0][argb >> 24];
    534   ++histo_argb[1][(argb >> 16) & 0xff];
    535   ++histo_argb[2][(argb >> 8) & 0xff];
    536   ++histo_argb[3][argb & 0xff];
    537 }
    538 
    539 //------------------------------------------------------------------------------
    540 
    541 static WEBP_INLINE uint32_t Predict(VP8LPredictorFunc pred_func,
    542                                     int x, int y,
    543                                     const uint32_t* current_row,
    544                                     const uint32_t* upper_row) {
    545   if (y == 0) {
    546     return (x == 0) ? ARGB_BLACK : current_row[x - 1];  // Left.
    547   } else if (x == 0) {
    548     return upper_row[x];  // Top.
    549   } else {
    550     return pred_func(current_row[x - 1], upper_row + x);
    551   }
    552 }
    553 
    554 // Returns best predictor and updates the accumulated histogram.
    555 static int GetBestPredictorForTile(int width, int height,
    556                                    int tile_x, int tile_y, int bits,
    557                                    int accumulated[4][256],
    558                                    const uint32_t* const argb_scratch,
    559                                    int exact) {
    560   const int kNumPredModes = 14;
    561   const int col_start = tile_x << bits;
    562   const int row_start = tile_y << bits;
    563   const int tile_size = 1 << bits;
    564   const int max_y = GetMin(tile_size, height - row_start);
    565   const int max_x = GetMin(tile_size, width - col_start);
    566   float best_diff = MAX_DIFF_COST;
    567   int best_mode = 0;
    568   int mode;
    569   int histo_stack_1[4][256];
    570   int histo_stack_2[4][256];
    571   // Need pointers to be able to swap arrays.
    572   int (*histo_argb)[256] = histo_stack_1;
    573   int (*best_histo)[256] = histo_stack_2;
    574 
    575   int i, j;
    576   for (mode = 0; mode < kNumPredModes; ++mode) {
    577     const uint32_t* current_row = argb_scratch;
    578     const VP8LPredictorFunc pred_func = VP8LPredictors[mode];
    579     float cur_diff;
    580     int y;
    581     memset(histo_argb, 0, sizeof(histo_stack_1));
    582     for (y = 0; y < max_y; ++y) {
    583       int x;
    584       const int row = row_start + y;
    585       const uint32_t* const upper_row = current_row;
    586       current_row = upper_row + width;
    587       for (x = 0; x < max_x; ++x) {
    588         const int col = col_start + x;
    589         const uint32_t predict =
    590             Predict(pred_func, col, row, current_row, upper_row);
    591         uint32_t residual = VP8LSubPixels(current_row[col], predict);
    592         if (!exact && (current_row[col] & kMaskAlpha) == 0) {
    593           residual &= kMaskAlpha;  // See CopyTileWithPrediction.
    594         }
    595         UpdateHisto(histo_argb, residual);
    596       }
    597     }
    598     cur_diff = PredictionCostSpatialHistogram(
    599         (const int (*)[256])accumulated, (const int (*)[256])histo_argb);
    600     if (cur_diff < best_diff) {
    601       int (*tmp)[256] = histo_argb;
    602       histo_argb = best_histo;
    603       best_histo = tmp;
    604       best_diff = cur_diff;
    605       best_mode = mode;
    606     }
    607   }
    608 
    609   for (i = 0; i < 4; i++) {
    610     for (j = 0; j < 256; j++) {
    611       accumulated[i][j] += best_histo[i][j];
    612     }
    613   }
    614 
    615   return best_mode;
    616 }
    617 
    618 static void CopyImageWithPrediction(int width, int height,
    619                                     int bits, uint32_t* const modes,
    620                                     uint32_t* const argb_scratch,
    621                                     uint32_t* const argb,
    622                                     int low_effort, int exact) {
    623   const int tiles_per_row = VP8LSubSampleSize(width, bits);
    624   const int mask = (1 << bits) - 1;
    625   // The row size is one pixel longer to allow the top right pixel to point to
    626   // the leftmost pixel of the next row when at the right edge.
    627   uint32_t* current_row = argb_scratch;
    628   uint32_t* upper_row = argb_scratch + width + 1;
    629   int y;
    630   VP8LPredictorFunc pred_func =
    631       low_effort ? VP8LPredictors[kPredLowEffort] : NULL;
    632 
    633   for (y = 0; y < height; ++y) {
    634     int x;
    635     uint32_t* tmp = upper_row;
    636     upper_row = current_row;
    637     current_row = tmp;
    638     memcpy(current_row, argb + y * width, sizeof(*current_row) * width);
    639     current_row[width] = (y + 1 < height) ? argb[(y + 1) * width] : ARGB_BLACK;
    640 
    641     if (low_effort) {
    642       for (x = 0; x < width; ++x) {
    643         const uint32_t predict =
    644             Predict(pred_func, x, y, current_row, upper_row);
    645         argb[y * width + x] = VP8LSubPixels(current_row[x], predict);
    646       }
    647     } else {
    648       for (x = 0; x < width; ++x) {
    649         uint32_t predict, residual;
    650         if ((x & mask) == 0) {
    651           const int mode =
    652               (modes[(y >> bits) * tiles_per_row + (x >> bits)] >> 8) & 0xff;
    653           pred_func = VP8LPredictors[mode];
    654         }
    655         predict = Predict(pred_func, x, y, current_row, upper_row);
    656         residual = VP8LSubPixels(current_row[x], predict);
    657         if (!exact && (current_row[x] & kMaskAlpha) == 0) {
    658           // If alpha is 0, cleanup RGB. We can choose the RGB values of the
    659           // residual for best compression. The prediction of alpha itself can
    660           // be non-zero and must be kept though. We choose RGB of the residual
    661           // to be 0.
    662           residual &= kMaskAlpha;
    663           // Update input image so that next predictions use correct RGB value.
    664           current_row[x] = predict & ~kMaskAlpha;
    665           if (x == 0 && y != 0) upper_row[width] = current_row[x];
    666         }
    667         argb[y * width + x] = residual;
    668       }
    669     }
    670   }
    671 }
    672 
    673 void VP8LResidualImage(int width, int height, int bits, int low_effort,
    674                        uint32_t* const argb, uint32_t* const argb_scratch,
    675                        uint32_t* const image, int exact) {
    676   const int max_tile_size = 1 << bits;
    677   const int tiles_per_row = VP8LSubSampleSize(width, bits);
    678   const int tiles_per_col = VP8LSubSampleSize(height, bits);
    679   uint32_t* const upper_row = argb_scratch;
    680   uint32_t* const current_tile_rows = argb_scratch + width;
    681   int tile_y;
    682   int histo[4][256];
    683   if (low_effort) {
    684     int i;
    685     for (i = 0; i < tiles_per_row * tiles_per_col; ++i) {
    686       image[i] = ARGB_BLACK | (kPredLowEffort << 8);
    687     }
    688   } else {
    689     memset(histo, 0, sizeof(histo));
    690     for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
    691       const int tile_y_offset = tile_y * max_tile_size;
    692       const int this_tile_height =
    693           (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
    694       int tile_x;
    695       if (tile_y > 0) {
    696         memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
    697                width * sizeof(*upper_row));
    698       }
    699       memcpy(current_tile_rows, &argb[tile_y_offset * width],
    700              this_tile_height * width * sizeof(*current_tile_rows));
    701       for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
    702         const int pred = GetBestPredictorForTile(width, height, tile_x, tile_y,
    703             bits, (int (*)[256])histo, argb_scratch, exact);
    704         image[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (pred << 8);
    705       }
    706     }
    707   }
    708 
    709   CopyImageWithPrediction(width, height, bits,
    710                           image, argb_scratch, argb, low_effort, exact);
    711 }
    712 
    713 void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) {
    714   int i;
    715   for (i = 0; i < num_pixels; ++i) {
    716     const uint32_t argb = argb_data[i];
    717     const uint32_t green = (argb >> 8) & 0xff;
    718     const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
    719     const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
    720     argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
    721   }
    722 }
    723 
    724 static WEBP_INLINE void MultipliersClear(VP8LMultipliers* const m) {
    725   m->green_to_red_ = 0;
    726   m->green_to_blue_ = 0;
    727   m->red_to_blue_ = 0;
    728 }
    729 
    730 static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
    731                                                 int8_t color) {
    732   return (uint32_t)((int)(color_pred) * color) >> 5;
    733 }
    734 
    735 static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
    736                                                VP8LMultipliers* const m) {
    737   m->green_to_red_  = (color_code >>  0) & 0xff;
    738   m->green_to_blue_ = (color_code >>  8) & 0xff;
    739   m->red_to_blue_   = (color_code >> 16) & 0xff;
    740 }
    741 
    742 static WEBP_INLINE uint32_t MultipliersToColorCode(
    743     const VP8LMultipliers* const m) {
    744   return 0xff000000u |
    745          ((uint32_t)(m->red_to_blue_) << 16) |
    746          ((uint32_t)(m->green_to_blue_) << 8) |
    747          m->green_to_red_;
    748 }
    749 
    750 void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,
    751                           int num_pixels) {
    752   int i;
    753   for (i = 0; i < num_pixels; ++i) {
    754     const uint32_t argb = data[i];
    755     const uint32_t green = argb >> 8;
    756     const uint32_t red = argb >> 16;
    757     uint32_t new_red = red;
    758     uint32_t new_blue = argb;
    759     new_red -= ColorTransformDelta(m->green_to_red_, green);
    760     new_red &= 0xff;
    761     new_blue -= ColorTransformDelta(m->green_to_blue_, green);
    762     new_blue -= ColorTransformDelta(m->red_to_blue_, red);
    763     new_blue &= 0xff;
    764     data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
    765   }
    766 }
    767 
    768 static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
    769                                              uint32_t argb) {
    770   const uint32_t green = argb >> 8;
    771   uint32_t new_red = argb >> 16;
    772   new_red -= ColorTransformDelta(green_to_red, green);
    773   return (new_red & 0xff);
    774 }
    775 
    776 static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
    777                                               uint8_t red_to_blue,
    778                                               uint32_t argb) {
    779   const uint32_t green = argb >> 8;
    780   const uint32_t red = argb >> 16;
    781   uint8_t new_blue = argb;
    782   new_blue -= ColorTransformDelta(green_to_blue, green);
    783   new_blue -= ColorTransformDelta(red_to_blue, red);
    784   return (new_blue & 0xff);
    785 }
    786 
    787 static float PredictionCostCrossColor(const int accumulated[256],
    788                                       const int counts[256]) {
    789   // Favor low entropy, locally and globally.
    790   // Favor small absolute values for PredictionCostSpatial
    791   static const double kExpValue = 2.4;
    792   return VP8LCombinedShannonEntropy(counts, accumulated) +
    793          PredictionCostSpatial(counts, 3, kExpValue);
    794 }
    795 
    796 void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
    797                                      int tile_width, int tile_height,
    798                                      int green_to_red, int histo[]) {
    799   while (tile_height-- > 0) {
    800     int x;
    801     for (x = 0; x < tile_width; ++x) {
    802       ++histo[TransformColorRed(green_to_red, argb[x])];
    803     }
    804     argb += stride;
    805   }
    806 }
    807 
    808 static float GetPredictionCostCrossColorRed(
    809     const uint32_t* argb, int stride, int tile_width, int tile_height,
    810     VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red,
    811     const int accumulated_red_histo[256]) {
    812   int histo[256] = { 0 };
    813   float cur_diff;
    814 
    815   VP8LCollectColorRedTransforms(argb, stride, tile_width, tile_height,
    816                                 green_to_red, histo);
    817 
    818   cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo);
    819   if ((uint8_t)green_to_red == prev_x.green_to_red_) {
    820     cur_diff -= 3;  // favor keeping the areas locally similar
    821   }
    822   if ((uint8_t)green_to_red == prev_y.green_to_red_) {
    823     cur_diff -= 3;  // favor keeping the areas locally similar
    824   }
    825   if (green_to_red == 0) {
    826     cur_diff -= 3;
    827   }
    828   return cur_diff;
    829 }
    830 
    831 static void GetBestGreenToRed(
    832     const uint32_t* argb, int stride, int tile_width, int tile_height,
    833     VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,
    834     const int accumulated_red_histo[256], VP8LMultipliers* const best_tx) {
    835   const int kMaxIters = 4 + ((7 * quality) >> 8);  // in range [4..6]
    836   int green_to_red_best = 0;
    837   int iter, offset;
    838   float best_diff = GetPredictionCostCrossColorRed(
    839       argb, stride, tile_width, tile_height, prev_x, prev_y,
    840       green_to_red_best, accumulated_red_histo);
    841   for (iter = 0; iter < kMaxIters; ++iter) {
    842     // ColorTransformDelta is a 3.5 bit fixed point, so 32 is equal to
    843     // one in color computation. Having initial delta here as 1 is sufficient
    844     // to explore the range of (-2, 2).
    845     const int delta = 32 >> iter;
    846     // Try a negative and a positive delta from the best known value.
    847     for (offset = -delta; offset <= delta; offset += 2 * delta) {
    848       const int green_to_red_cur = offset + green_to_red_best;
    849       const float cur_diff = GetPredictionCostCrossColorRed(
    850           argb, stride, tile_width, tile_height, prev_x, prev_y,
    851           green_to_red_cur, accumulated_red_histo);
    852       if (cur_diff < best_diff) {
    853         best_diff = cur_diff;
    854         green_to_red_best = green_to_red_cur;
    855       }
    856     }
    857   }
    858   best_tx->green_to_red_ = green_to_red_best;
    859 }
    860 
    861 void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
    862                                       int tile_width, int tile_height,
    863                                       int green_to_blue, int red_to_blue,
    864                                       int histo[]) {
    865   while (tile_height-- > 0) {
    866     int x;
    867     for (x = 0; x < tile_width; ++x) {
    868       ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[x])];
    869     }
    870     argb += stride;
    871   }
    872 }
    873 
    874 static float GetPredictionCostCrossColorBlue(
    875     const uint32_t* argb, int stride, int tile_width, int tile_height,
    876     VP8LMultipliers prev_x, VP8LMultipliers prev_y,
    877     int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256]) {
    878   int histo[256] = { 0 };
    879   float cur_diff;
    880 
    881   VP8LCollectColorBlueTransforms(argb, stride, tile_width, tile_height,
    882                                  green_to_blue, red_to_blue, histo);
    883 
    884   cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo);
    885   if ((uint8_t)green_to_blue == prev_x.green_to_blue_) {
    886     cur_diff -= 3;  // favor keeping the areas locally similar
    887   }
    888   if ((uint8_t)green_to_blue == prev_y.green_to_blue_) {
    889     cur_diff -= 3;  // favor keeping the areas locally similar
    890   }
    891   if ((uint8_t)red_to_blue == prev_x.red_to_blue_) {
    892     cur_diff -= 3;  // favor keeping the areas locally similar
    893   }
    894   if ((uint8_t)red_to_blue == prev_y.red_to_blue_) {
    895     cur_diff -= 3;  // favor keeping the areas locally similar
    896   }
    897   if (green_to_blue == 0) {
    898     cur_diff -= 3;
    899   }
    900   if (red_to_blue == 0) {
    901     cur_diff -= 3;
    902   }
    903   return cur_diff;
    904 }
    905 
    906 #define kGreenRedToBlueNumAxis 8
    907 #define kGreenRedToBlueMaxIters 7
    908 static void GetBestGreenRedToBlue(
    909     const uint32_t* argb, int stride, int tile_width, int tile_height,
    910     VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,
    911     const int accumulated_blue_histo[256],
    912     VP8LMultipliers* const best_tx) {
    913   const int8_t offset[kGreenRedToBlueNumAxis][2] =
    914       {{0, -1}, {0, 1}, {-1, 0}, {1, 0}, {-1, -1}, {-1, 1}, {1, -1}, {1, 1}};
    915   const int8_t delta_lut[kGreenRedToBlueMaxIters] = { 16, 16, 8, 4, 2, 2, 2 };
    916   const int iters =
    917       (quality < 25) ? 1 : (quality > 50) ? kGreenRedToBlueMaxIters : 4;
    918   int green_to_blue_best = 0;
    919   int red_to_blue_best = 0;
    920   int iter;
    921   // Initial value at origin:
    922   float best_diff = GetPredictionCostCrossColorBlue(
    923       argb, stride, tile_width, tile_height, prev_x, prev_y,
    924       green_to_blue_best, red_to_blue_best, accumulated_blue_histo);
    925   for (iter = 0; iter < iters; ++iter) {
    926     const int delta = delta_lut[iter];
    927     int axis;
    928     for (axis = 0; axis < kGreenRedToBlueNumAxis; ++axis) {
    929       const int green_to_blue_cur =
    930           offset[axis][0] * delta + green_to_blue_best;
    931       const int red_to_blue_cur = offset[axis][1] * delta + red_to_blue_best;
    932       const float cur_diff = GetPredictionCostCrossColorBlue(
    933           argb, stride, tile_width, tile_height, prev_x, prev_y,
    934           green_to_blue_cur, red_to_blue_cur, accumulated_blue_histo);
    935       if (cur_diff < best_diff) {
    936         best_diff = cur_diff;
    937         green_to_blue_best = green_to_blue_cur;
    938         red_to_blue_best = red_to_blue_cur;
    939       }
    940       if (quality < 25 && iter == 4) {
    941         // Only axis aligned diffs for lower quality.
    942         break;  // next iter.
    943       }
    944     }
    945     if (delta == 2 && green_to_blue_best == 0 && red_to_blue_best == 0) {
    946       // Further iterations would not help.
    947       break;  // out of iter-loop.
    948     }
    949   }
    950   best_tx->green_to_blue_ = green_to_blue_best;
    951   best_tx->red_to_blue_ = red_to_blue_best;
    952 }
    953 #undef kGreenRedToBlueMaxIters
    954 #undef kGreenRedToBlueNumAxis
    955 
    956 static VP8LMultipliers GetBestColorTransformForTile(
    957     int tile_x, int tile_y, int bits,
    958     VP8LMultipliers prev_x,
    959     VP8LMultipliers prev_y,
    960     int quality, int xsize, int ysize,
    961     const int accumulated_red_histo[256],
    962     const int accumulated_blue_histo[256],
    963     const uint32_t* const argb) {
    964   const int max_tile_size = 1 << bits;
    965   const int tile_y_offset = tile_y * max_tile_size;
    966   const int tile_x_offset = tile_x * max_tile_size;
    967   const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize);
    968   const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize);
    969   const int tile_width = all_x_max - tile_x_offset;
    970   const int tile_height = all_y_max - tile_y_offset;
    971   const uint32_t* const tile_argb = argb + tile_y_offset * xsize
    972                                   + tile_x_offset;
    973   VP8LMultipliers best_tx;
    974   MultipliersClear(&best_tx);
    975 
    976   GetBestGreenToRed(tile_argb, xsize, tile_width, tile_height,
    977                     prev_x, prev_y, quality, accumulated_red_histo, &best_tx);
    978   GetBestGreenRedToBlue(tile_argb, xsize, tile_width, tile_height,
    979                         prev_x, prev_y, quality, accumulated_blue_histo,
    980                         &best_tx);
    981   return best_tx;
    982 }
    983 
    984 static void CopyTileWithColorTransform(int xsize, int ysize,
    985                                        int tile_x, int tile_y,
    986                                        int max_tile_size,
    987                                        VP8LMultipliers color_transform,
    988                                        uint32_t* argb) {
    989   const int xscan = GetMin(max_tile_size, xsize - tile_x);
    990   int yscan = GetMin(max_tile_size, ysize - tile_y);
    991   argb += tile_y * xsize + tile_x;
    992   while (yscan-- > 0) {
    993     VP8LTransformColor(&color_transform, argb, xscan);
    994     argb += xsize;
    995   }
    996 }
    997 
    998 void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
    999                              uint32_t* const argb, uint32_t* image) {
   1000   const int max_tile_size = 1 << bits;
   1001   const int tile_xsize = VP8LSubSampleSize(width, bits);
   1002   const int tile_ysize = VP8LSubSampleSize(height, bits);
   1003   int accumulated_red_histo[256] = { 0 };
   1004   int accumulated_blue_histo[256] = { 0 };
   1005   int tile_x, tile_y;
   1006   VP8LMultipliers prev_x, prev_y;
   1007   MultipliersClear(&prev_y);
   1008   MultipliersClear(&prev_x);
   1009   for (tile_y = 0; tile_y < tile_ysize; ++tile_y) {
   1010     for (tile_x = 0; tile_x < tile_xsize; ++tile_x) {
   1011       int y;
   1012       const int tile_x_offset = tile_x * max_tile_size;
   1013       const int tile_y_offset = tile_y * max_tile_size;
   1014       const int all_x_max = GetMin(tile_x_offset + max_tile_size, width);
   1015       const int all_y_max = GetMin(tile_y_offset + max_tile_size, height);
   1016       const int offset = tile_y * tile_xsize + tile_x;
   1017       if (tile_y != 0) {
   1018         ColorCodeToMultipliers(image[offset - tile_xsize], &prev_y);
   1019       }
   1020       prev_x = GetBestColorTransformForTile(tile_x, tile_y, bits,
   1021                                             prev_x, prev_y,
   1022                                             quality, width, height,
   1023                                             accumulated_red_histo,
   1024                                             accumulated_blue_histo,
   1025                                             argb);
   1026       image[offset] = MultipliersToColorCode(&prev_x);
   1027       CopyTileWithColorTransform(width, height, tile_x_offset, tile_y_offset,
   1028                                  max_tile_size, prev_x, argb);
   1029 
   1030       // Gather accumulated histogram data.
   1031       for (y = tile_y_offset; y < all_y_max; ++y) {
   1032         int ix = y * width + tile_x_offset;
   1033         const int ix_end = ix + all_x_max - tile_x_offset;
   1034         for (; ix < ix_end; ++ix) {
   1035           const uint32_t pix = argb[ix];
   1036           if (ix >= 2 &&
   1037               pix == argb[ix - 2] &&
   1038               pix == argb[ix - 1]) {
   1039             continue;  // repeated pixels are handled by backward references
   1040           }
   1041           if (ix >= width + 2 &&
   1042               argb[ix - 2] == argb[ix - width - 2] &&
   1043               argb[ix - 1] == argb[ix - width - 1] &&
   1044               pix == argb[ix - width]) {
   1045             continue;  // repeated pixels are handled by backward references
   1046           }
   1047           ++accumulated_red_histo[(pix >> 16) & 0xff];
   1048           ++accumulated_blue_histo[(pix >> 0) & 0xff];
   1049         }
   1050       }
   1051     }
   1052   }
   1053 }
   1054 
   1055 //------------------------------------------------------------------------------
   1056 // Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
   1057 void VP8LBundleColorMap(const uint8_t* const row, int width,
   1058                         int xbits, uint32_t* const dst) {
   1059   int x;
   1060   if (xbits > 0) {
   1061     const int bit_depth = 1 << (3 - xbits);
   1062     const int mask = (1 << xbits) - 1;
   1063     uint32_t code = 0xff000000;
   1064     for (x = 0; x < width; ++x) {
   1065       const int xsub = x & mask;
   1066       if (xsub == 0) {
   1067         code = 0xff000000;
   1068       }
   1069       code |= row[x] << (8 + bit_depth * xsub);
   1070       dst[x >> xbits] = code;
   1071     }
   1072   } else {
   1073     for (x = 0; x < width; ++x) dst[x] = 0xff000000 | (row[x] << 8);
   1074   }
   1075 }
   1076 
   1077 //------------------------------------------------------------------------------
   1078 
   1079 static double ExtraCost(const uint32_t* population, int length) {
   1080   int i;
   1081   double cost = 0.;
   1082   for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
   1083   return cost;
   1084 }
   1085 
   1086 static double ExtraCostCombined(const uint32_t* X, const uint32_t* Y,
   1087                                 int length) {
   1088   int i;
   1089   double cost = 0.;
   1090   for (i = 2; i < length - 2; ++i) {
   1091     const int xy = X[i + 2] + Y[i + 2];
   1092     cost += (i >> 1) * xy;
   1093   }
   1094   return cost;
   1095 }
   1096 
   1097 //------------------------------------------------------------------------------
   1098 
   1099 static void HistogramAdd(const VP8LHistogram* const a,
   1100                          const VP8LHistogram* const b,
   1101                          VP8LHistogram* const out) {
   1102   int i;
   1103   const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
   1104   assert(a->palette_code_bits_ == b->palette_code_bits_);
   1105   if (b != out) {
   1106     for (i = 0; i < literal_size; ++i) {
   1107       out->literal_[i] = a->literal_[i] + b->literal_[i];
   1108     }
   1109     for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
   1110       out->distance_[i] = a->distance_[i] + b->distance_[i];
   1111     }
   1112     for (i = 0; i < NUM_LITERAL_CODES; ++i) {
   1113       out->red_[i] = a->red_[i] + b->red_[i];
   1114       out->blue_[i] = a->blue_[i] + b->blue_[i];
   1115       out->alpha_[i] = a->alpha_[i] + b->alpha_[i];
   1116     }
   1117   } else {
   1118     for (i = 0; i < literal_size; ++i) {
   1119       out->literal_[i] += a->literal_[i];
   1120     }
   1121     for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
   1122       out->distance_[i] += a->distance_[i];
   1123     }
   1124     for (i = 0; i < NUM_LITERAL_CODES; ++i) {
   1125       out->red_[i] += a->red_[i];
   1126       out->blue_[i] += a->blue_[i];
   1127       out->alpha_[i] += a->alpha_[i];
   1128     }
   1129   }
   1130 }
   1131 
   1132 //------------------------------------------------------------------------------
   1133 
   1134 VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
   1135 
   1136 VP8LTransformColorFunc VP8LTransformColor;
   1137 
   1138 VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
   1139 VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
   1140 
   1141 VP8LFastLog2SlowFunc VP8LFastLog2Slow;
   1142 VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
   1143 
   1144 VP8LCostFunc VP8LExtraCost;
   1145 VP8LCostCombinedFunc VP8LExtraCostCombined;
   1146 VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
   1147 
   1148 GetEntropyUnrefinedHelperFunc VP8LGetEntropyUnrefinedHelper;
   1149 
   1150 VP8LHistogramAddFunc VP8LHistogramAdd;
   1151 
   1152 extern void VP8LEncDspInitSSE2(void);
   1153 extern void VP8LEncDspInitSSE41(void);
   1154 extern void VP8LEncDspInitNEON(void);
   1155 extern void VP8LEncDspInitMIPS32(void);
   1156 extern void VP8LEncDspInitMIPSdspR2(void);
   1157 
   1158 static volatile VP8CPUInfo lossless_enc_last_cpuinfo_used =
   1159     (VP8CPUInfo)&lossless_enc_last_cpuinfo_used;
   1160 
   1161 WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) {
   1162   if (lossless_enc_last_cpuinfo_used == VP8GetCPUInfo) return;
   1163 
   1164   VP8LDspInit();
   1165 
   1166   VP8LSubtractGreenFromBlueAndRed = VP8LSubtractGreenFromBlueAndRed_C;
   1167 
   1168   VP8LTransformColor = VP8LTransformColor_C;
   1169 
   1170   VP8LCollectColorBlueTransforms = VP8LCollectColorBlueTransforms_C;
   1171   VP8LCollectColorRedTransforms = VP8LCollectColorRedTransforms_C;
   1172 
   1173   VP8LFastLog2Slow = FastLog2Slow;
   1174   VP8LFastSLog2Slow = FastSLog2Slow;
   1175 
   1176   VP8LExtraCost = ExtraCost;
   1177   VP8LExtraCostCombined = ExtraCostCombined;
   1178   VP8LCombinedShannonEntropy = CombinedShannonEntropy;
   1179 
   1180   VP8LGetEntropyUnrefinedHelper = GetEntropyUnrefinedHelper;
   1181 
   1182   VP8LHistogramAdd = HistogramAdd;
   1183 
   1184   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
   1185   if (VP8GetCPUInfo != NULL) {
   1186 #if defined(WEBP_USE_SSE2)
   1187     if (VP8GetCPUInfo(kSSE2)) {
   1188       VP8LEncDspInitSSE2();
   1189 #if defined(WEBP_USE_SSE41)
   1190       if (VP8GetCPUInfo(kSSE4_1)) {
   1191         VP8LEncDspInitSSE41();
   1192       }
   1193 #endif
   1194     }
   1195 #endif
   1196 #if defined(WEBP_USE_NEON)
   1197     if (VP8GetCPUInfo(kNEON)) {
   1198       VP8LEncDspInitNEON();
   1199     }
   1200 #endif
   1201 #if defined(WEBP_USE_MIPS32)
   1202     if (VP8GetCPUInfo(kMIPS32)) {
   1203       VP8LEncDspInitMIPS32();
   1204     }
   1205 #endif
   1206 #if defined(WEBP_USE_MIPS_DSP_R2)
   1207     if (VP8GetCPUInfo(kMIPSdspR2)) {
   1208       VP8LEncDspInitMIPSdspR2();
   1209     }
   1210 #endif
   1211   }
   1212   lossless_enc_last_cpuinfo_used = VP8GetCPUInfo;
   1213 }
   1214 
   1215 //------------------------------------------------------------------------------
   1216