Home | History | Annotate | Download | only in x86
      1 /*
      2  * Copyright (c) 2017, Alliance for Open Media. All rights reserved
      3  *
      4  * This source code is subject to the terms of the BSD 2 Clause License and
      5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6  * was not distributed with this source code in the LICENSE file, you can
      7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8  * Media Patent License 1.0 was not distributed with this source code in the
      9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10  */
     11 
     12 #include <assert.h>
     13 #include <emmintrin.h>  // SSE2
     14 
     15 #include "aom/aom_integer.h"
     16 #include "aom_dsp/x86/mem_sse2.h"
     17 #include "av1/common/onyxc_int.h"
     18 #include "av1/common/txb_common.h"
     19 
     20 static INLINE void load_levels_4x4x5_sse2(const uint8_t *const src,
     21                                           const int stride,
     22                                           const ptrdiff_t *const offsets,
     23                                           __m128i *const level) {
     24   level[0] = load_8bit_4x4_to_1_reg_sse2(src + 1, stride);
     25   level[1] = load_8bit_4x4_to_1_reg_sse2(src + stride, stride);
     26   level[2] = load_8bit_4x4_to_1_reg_sse2(src + offsets[0], stride);
     27   level[3] = load_8bit_4x4_to_1_reg_sse2(src + offsets[1], stride);
     28   level[4] = load_8bit_4x4_to_1_reg_sse2(src + offsets[2], stride);
     29 }
     30 
     31 static INLINE void load_levels_8x2x5_sse2(const uint8_t *const src,
     32                                           const int stride,
     33                                           const ptrdiff_t *const offsets,
     34                                           __m128i *const level) {
     35   level[0] = load_8bit_8x2_to_1_reg_sse2(src + 1, stride);
     36   level[1] = load_8bit_8x2_to_1_reg_sse2(src + stride, stride);
     37   level[2] = load_8bit_8x2_to_1_reg_sse2(src + offsets[0], stride);
     38   level[3] = load_8bit_8x2_to_1_reg_sse2(src + offsets[1], stride);
     39   level[4] = load_8bit_8x2_to_1_reg_sse2(src + offsets[2], stride);
     40 }
     41 
     42 static INLINE void load_levels_16x1x5_sse2(const uint8_t *const src,
     43                                            const int stride,
     44                                            const ptrdiff_t *const offsets,
     45                                            __m128i *const level) {
     46   level[0] = _mm_loadu_si128((__m128i *)(src + 1));
     47   level[1] = _mm_loadu_si128((__m128i *)(src + stride));
     48   level[2] = _mm_loadu_si128((__m128i *)(src + offsets[0]));
     49   level[3] = _mm_loadu_si128((__m128i *)(src + offsets[1]));
     50   level[4] = _mm_loadu_si128((__m128i *)(src + offsets[2]));
     51 }
     52 
     53 static INLINE __m128i get_coeff_contexts_kernel_sse2(__m128i *const level) {
     54   const __m128i const_3 = _mm_set1_epi8(3);
     55   const __m128i const_4 = _mm_set1_epi8(4);
     56   __m128i count;
     57 
     58   count = _mm_min_epu8(level[0], const_3);
     59   level[1] = _mm_min_epu8(level[1], const_3);
     60   level[2] = _mm_min_epu8(level[2], const_3);
     61   level[3] = _mm_min_epu8(level[3], const_3);
     62   level[4] = _mm_min_epu8(level[4], const_3);
     63   count = _mm_add_epi8(count, level[1]);
     64   count = _mm_add_epi8(count, level[2]);
     65   count = _mm_add_epi8(count, level[3]);
     66   count = _mm_add_epi8(count, level[4]);
     67   count = _mm_avg_epu8(count, _mm_setzero_si128());
     68   count = _mm_min_epu8(count, const_4);
     69   return count;
     70 }
     71 
     72 static INLINE void get_4_nz_map_contexts_2d(const uint8_t *levels,
     73                                             const int height,
     74                                             const ptrdiff_t *const offsets,
     75                                             int8_t *const coeff_contexts) {
     76   const int stride = 4 + TX_PAD_HOR;
     77   const __m128i pos_to_offset_large = _mm_set1_epi8(21);
     78   __m128i pos_to_offset =
     79       (height == 4)
     80           ? _mm_setr_epi8(0, 1, 6, 6, 1, 6, 6, 21, 6, 6, 21, 21, 6, 21, 21, 21)
     81           : _mm_setr_epi8(0, 11, 11, 11, 11, 11, 11, 11, 6, 6, 21, 21, 6, 21,
     82                           21, 21);
     83   __m128i count;
     84   __m128i level[5];
     85   int8_t *cc = coeff_contexts;
     86   int row = height;
     87 
     88   assert(!(height % 4));
     89 
     90   do {
     91     load_levels_4x4x5_sse2(levels, stride, offsets, level);
     92     count = get_coeff_contexts_kernel_sse2(level);
     93     count = _mm_add_epi8(count, pos_to_offset);
     94     _mm_store_si128((__m128i *)cc, count);
     95     pos_to_offset = pos_to_offset_large;
     96     levels += 4 * stride;
     97     cc += 16;
     98     row -= 4;
     99   } while (row);
    100 
    101   coeff_contexts[0] = 0;
    102 }
    103 
    104 static INLINE void get_4_nz_map_contexts_hor(const uint8_t *levels,
    105                                              const int height,
    106                                              const ptrdiff_t *const offsets,
    107                                              int8_t *coeff_contexts) {
    108   const int stride = 4 + TX_PAD_HOR;
    109   const __m128i pos_to_offset =
    110       _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
    111                     SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    112                     SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
    113                     SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    114                     SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
    115                     SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    116                     SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
    117                     SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
    118   __m128i count;
    119   __m128i level[5];
    120   int row = height;
    121 
    122   assert(!(height % 4));
    123 
    124   do {
    125     load_levels_4x4x5_sse2(levels, stride, offsets, level);
    126     count = get_coeff_contexts_kernel_sse2(level);
    127     count = _mm_add_epi8(count, pos_to_offset);
    128     _mm_store_si128((__m128i *)coeff_contexts, count);
    129     levels += 4 * stride;
    130     coeff_contexts += 16;
    131     row -= 4;
    132   } while (row);
    133 }
    134 
    135 static INLINE void get_4_nz_map_contexts_ver(const uint8_t *levels,
    136                                              const int height,
    137                                              const ptrdiff_t *const offsets,
    138                                              int8_t *coeff_contexts) {
    139   const int stride = 4 + TX_PAD_HOR;
    140   const __m128i pos_to_offset_large = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 10);
    141   __m128i pos_to_offset =
    142       _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
    143                     SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
    144                     SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
    145                     SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
    146                     SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    147                     SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    148                     SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    149                     SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
    150   __m128i count;
    151   __m128i level[5];
    152   int row = height;
    153 
    154   assert(!(height % 4));
    155 
    156   do {
    157     load_levels_4x4x5_sse2(levels, stride, offsets, level);
    158     count = get_coeff_contexts_kernel_sse2(level);
    159     count = _mm_add_epi8(count, pos_to_offset);
    160     _mm_store_si128((__m128i *)coeff_contexts, count);
    161     pos_to_offset = pos_to_offset_large;
    162     levels += 4 * stride;
    163     coeff_contexts += 16;
    164     row -= 4;
    165   } while (row);
    166 }
    167 
    168 static INLINE void get_8_coeff_contexts_2d(const uint8_t *levels,
    169                                            const int height,
    170                                            const ptrdiff_t *const offsets,
    171                                            int8_t *coeff_contexts) {
    172   const int stride = 8 + TX_PAD_HOR;
    173   int8_t *cc = coeff_contexts;
    174   int row = height;
    175   __m128i count;
    176   __m128i level[5];
    177   __m128i pos_to_offset[3];
    178 
    179   assert(!(height % 2));
    180 
    181   if (height == 8) {
    182     pos_to_offset[0] =
    183         _mm_setr_epi8(0, 1, 6, 6, 21, 21, 21, 21, 1, 6, 6, 21, 21, 21, 21, 21);
    184     pos_to_offset[1] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 6, 21, 21,
    185                                      21, 21, 21, 21, 21);
    186   } else if (height < 8) {
    187     pos_to_offset[0] = _mm_setr_epi8(0, 16, 6, 6, 21, 21, 21, 21, 16, 16, 6, 21,
    188                                      21, 21, 21, 21);
    189     pos_to_offset[1] = _mm_setr_epi8(16, 16, 21, 21, 21, 21, 21, 21, 16, 16, 21,
    190                                      21, 21, 21, 21, 21);
    191   } else {
    192     pos_to_offset[0] = _mm_setr_epi8(0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
    193                                      11, 11, 11, 11, 11);
    194     pos_to_offset[1] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 6, 21, 21,
    195                                      21, 21, 21, 21, 21);
    196   }
    197   pos_to_offset[2] = _mm_set1_epi8(21);
    198 
    199   do {
    200     load_levels_8x2x5_sse2(levels, stride, offsets, level);
    201     count = get_coeff_contexts_kernel_sse2(level);
    202     count = _mm_add_epi8(count, pos_to_offset[0]);
    203     _mm_store_si128((__m128i *)cc, count);
    204     pos_to_offset[0] = pos_to_offset[1];
    205     pos_to_offset[1] = pos_to_offset[2];
    206     levels += 2 * stride;
    207     cc += 16;
    208     row -= 2;
    209   } while (row);
    210 
    211   coeff_contexts[0] = 0;
    212 }
    213 
    214 static INLINE void get_8_coeff_contexts_hor(const uint8_t *levels,
    215                                             const int height,
    216                                             const ptrdiff_t *const offsets,
    217                                             int8_t *coeff_contexts) {
    218   const int stride = 8 + TX_PAD_HOR;
    219   const __m128i pos_to_offset =
    220       _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
    221                     SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    222                     SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    223                     SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    224                     SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
    225                     SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    226                     SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    227                     SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
    228   int row = height;
    229   __m128i count;
    230   __m128i level[5];
    231 
    232   assert(!(height % 2));
    233 
    234   do {
    235     load_levels_8x2x5_sse2(levels, stride, offsets, level);
    236     count = get_coeff_contexts_kernel_sse2(level);
    237     count = _mm_add_epi8(count, pos_to_offset);
    238     _mm_store_si128((__m128i *)coeff_contexts, count);
    239     levels += 2 * stride;
    240     coeff_contexts += 16;
    241     row -= 2;
    242   } while (row);
    243 }
    244 
    245 static INLINE void get_8_coeff_contexts_ver(const uint8_t *levels,
    246                                             const int height,
    247                                             const ptrdiff_t *const offsets,
    248                                             int8_t *coeff_contexts) {
    249   const int stride = 8 + TX_PAD_HOR;
    250   const __m128i pos_to_offset_large = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 10);
    251   __m128i pos_to_offset =
    252       _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
    253                     SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
    254                     SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
    255                     SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
    256                     SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
    257                     SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
    258                     SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
    259                     SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5);
    260   int row = height;
    261   __m128i count;
    262   __m128i level[5];
    263 
    264   assert(!(height % 2));
    265 
    266   do {
    267     load_levels_8x2x5_sse2(levels, stride, offsets, level);
    268     count = get_coeff_contexts_kernel_sse2(level);
    269     count = _mm_add_epi8(count, pos_to_offset);
    270     _mm_store_si128((__m128i *)coeff_contexts, count);
    271     pos_to_offset = pos_to_offset_large;
    272     levels += 2 * stride;
    273     coeff_contexts += 16;
    274     row -= 2;
    275   } while (row);
    276 }
    277 
    278 static INLINE void get_16n_coeff_contexts_2d(const uint8_t *levels,
    279                                              const int real_width,
    280                                              const int real_height,
    281                                              const int width, const int height,
    282                                              const ptrdiff_t *const offsets,
    283                                              int8_t *coeff_contexts) {
    284   const int stride = width + TX_PAD_HOR;
    285   int8_t *cc = coeff_contexts;
    286   int row = height;
    287   __m128i pos_to_offset[5];
    288   __m128i pos_to_offset_large[3];
    289   __m128i count;
    290   __m128i level[5];
    291 
    292   assert(!(width % 16));
    293 
    294   pos_to_offset_large[2] = _mm_set1_epi8(21);
    295   if (real_width == real_height) {
    296     pos_to_offset[0] = _mm_setr_epi8(0, 1, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21,
    297                                      21, 21, 21, 21);
    298     pos_to_offset[1] = _mm_setr_epi8(1, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21,
    299                                      21, 21, 21, 21, 21);
    300     pos_to_offset[2] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21,
    301                                      21, 21, 21, 21, 21);
    302     pos_to_offset[3] = _mm_setr_epi8(6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
    303                                      21, 21, 21, 21, 21);
    304     pos_to_offset[4] = pos_to_offset_large[0] = pos_to_offset_large[1] =
    305         pos_to_offset_large[2];
    306   } else if (real_width > real_height) {
    307     pos_to_offset[0] = _mm_setr_epi8(0, 16, 6, 6, 21, 21, 21, 21, 21, 21, 21,
    308                                      21, 21, 21, 21, 21);
    309     pos_to_offset[1] = _mm_setr_epi8(16, 16, 6, 21, 21, 21, 21, 21, 21, 21, 21,
    310                                      21, 21, 21, 21, 21);
    311     pos_to_offset[2] = pos_to_offset[3] = pos_to_offset[4] = _mm_setr_epi8(
    312         16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21);
    313     pos_to_offset_large[0] = pos_to_offset_large[1] = pos_to_offset_large[2];
    314   } else {  // real_width < real_height
    315     pos_to_offset[0] = pos_to_offset[1] = _mm_setr_epi8(
    316         11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11);
    317     pos_to_offset[2] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21,
    318                                      21, 21, 21, 21, 21);
    319     pos_to_offset[3] = _mm_setr_epi8(6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
    320                                      21, 21, 21, 21, 21);
    321     pos_to_offset[4] = pos_to_offset_large[2];
    322     pos_to_offset_large[0] = pos_to_offset_large[1] = _mm_set1_epi8(11);
    323   }
    324 
    325   do {
    326     int w = width;
    327 
    328     do {
    329       load_levels_16x1x5_sse2(levels, stride, offsets, level);
    330       count = get_coeff_contexts_kernel_sse2(level);
    331       count = _mm_add_epi8(count, pos_to_offset[0]);
    332       _mm_store_si128((__m128i *)cc, count);
    333       levels += 16;
    334       cc += 16;
    335       w -= 16;
    336       pos_to_offset[0] = pos_to_offset_large[0];
    337     } while (w);
    338 
    339     pos_to_offset[0] = pos_to_offset[1];
    340     pos_to_offset[1] = pos_to_offset[2];
    341     pos_to_offset[2] = pos_to_offset[3];
    342     pos_to_offset[3] = pos_to_offset[4];
    343     pos_to_offset_large[0] = pos_to_offset_large[1];
    344     pos_to_offset_large[1] = pos_to_offset_large[2];
    345     levels += TX_PAD_HOR;
    346   } while (--row);
    347 
    348   coeff_contexts[0] = 0;
    349 }
    350 
    351 static INLINE void get_16n_coeff_contexts_hor(const uint8_t *levels,
    352                                               const int width, const int height,
    353                                               const ptrdiff_t *const offsets,
    354                                               int8_t *coeff_contexts) {
    355   const int stride = width + TX_PAD_HOR;
    356   const __m128i pos_to_offset_large =
    357       _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    358                     SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    359                     SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    360                     SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    361                     SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    362                     SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    363                     SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    364                     SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
    365   __m128i count;
    366   __m128i level[5];
    367   int row = height;
    368 
    369   assert(!(width % 16));
    370 
    371   do {
    372     __m128i pos_to_offset =
    373         _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
    374                       SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    375                       SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    376                       SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    377                       SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    378                       SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    379                       SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
    380                       SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
    381     int w = width;
    382 
    383     do {
    384       load_levels_16x1x5_sse2(levels, stride, offsets, level);
    385       count = get_coeff_contexts_kernel_sse2(level);
    386       count = _mm_add_epi8(count, pos_to_offset);
    387       _mm_store_si128((__m128i *)coeff_contexts, count);
    388       pos_to_offset = pos_to_offset_large;
    389       levels += 16;
    390       coeff_contexts += 16;
    391       w -= 16;
    392     } while (w);
    393 
    394     levels += TX_PAD_HOR;
    395   } while (--row);
    396 }
    397 
    398 static INLINE void get_16n_coeff_contexts_ver(const uint8_t *levels,
    399                                               const int width, const int height,
    400                                               const ptrdiff_t *const offsets,
    401                                               int8_t *coeff_contexts) {
    402   const int stride = width + TX_PAD_HOR;
    403   __m128i pos_to_offset[3];
    404   __m128i count;
    405   __m128i level[5];
    406   int row = height;
    407 
    408   assert(!(width % 16));
    409 
    410   pos_to_offset[0] = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 0);
    411   pos_to_offset[1] = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 5);
    412   pos_to_offset[2] = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 10);
    413 
    414   do {
    415     int w = width;
    416 
    417     do {
    418       load_levels_16x1x5_sse2(levels, stride, offsets, level);
    419       count = get_coeff_contexts_kernel_sse2(level);
    420       count = _mm_add_epi8(count, pos_to_offset[0]);
    421       _mm_store_si128((__m128i *)coeff_contexts, count);
    422       levels += 16;
    423       coeff_contexts += 16;
    424       w -= 16;
    425     } while (w);
    426 
    427     pos_to_offset[0] = pos_to_offset[1];
    428     pos_to_offset[1] = pos_to_offset[2];
    429     levels += TX_PAD_HOR;
    430   } while (--row);
    431 }
    432 
    433 // Note: levels[] must be in the range [0, 127], inclusive.
    434 void av1_get_nz_map_contexts_sse2(const uint8_t *const levels,
    435                                   const int16_t *const scan, const uint16_t eob,
    436                                   const TX_SIZE tx_size,
    437                                   const TX_CLASS tx_class,
    438                                   int8_t *const coeff_contexts) {
    439   const int last_idx = eob - 1;
    440   if (!last_idx) {
    441     coeff_contexts[0] = 0;
    442     return;
    443   }
    444 
    445   const int real_width = tx_size_wide[tx_size];
    446   const int real_height = tx_size_high[tx_size];
    447   const int width = get_txb_wide(tx_size);
    448   const int height = get_txb_high(tx_size);
    449   const int stride = width + TX_PAD_HOR;
    450   ptrdiff_t offsets[3];
    451 
    452   /* coeff_contexts must be 16 byte aligned. */
    453   assert(!((intptr_t)coeff_contexts & 0xf));
    454 
    455   if (tx_class == TX_CLASS_2D) {
    456     offsets[0] = 0 * stride + 2;
    457     offsets[1] = 1 * stride + 1;
    458     offsets[2] = 2 * stride + 0;
    459 
    460     if (width == 4) {
    461       get_4_nz_map_contexts_2d(levels, height, offsets, coeff_contexts);
    462     } else if (width == 8) {
    463       get_8_coeff_contexts_2d(levels, height, offsets, coeff_contexts);
    464     } else if (width == 16) {
    465       get_16n_coeff_contexts_2d(levels, real_width, real_height, width, height,
    466                                 offsets, coeff_contexts);
    467     } else {
    468       get_16n_coeff_contexts_2d(levels, real_width, real_height, width, height,
    469                                 offsets, coeff_contexts);
    470     }
    471   } else if (tx_class == TX_CLASS_HORIZ) {
    472     offsets[0] = 2;
    473     offsets[1] = 3;
    474     offsets[2] = 4;
    475     if (width == 4) {
    476       get_4_nz_map_contexts_hor(levels, height, offsets, coeff_contexts);
    477     } else if (width == 8) {
    478       get_8_coeff_contexts_hor(levels, height, offsets, coeff_contexts);
    479     } else {
    480       get_16n_coeff_contexts_hor(levels, width, height, offsets,
    481                                  coeff_contexts);
    482     }
    483   } else {  // TX_CLASS_VERT
    484     offsets[0] = 2 * stride;
    485     offsets[1] = 3 * stride;
    486     offsets[2] = 4 * stride;
    487     if (width == 4) {
    488       get_4_nz_map_contexts_ver(levels, height, offsets, coeff_contexts);
    489     } else if (width == 8) {
    490       get_8_coeff_contexts_ver(levels, height, offsets, coeff_contexts);
    491     } else {
    492       get_16n_coeff_contexts_ver(levels, width, height, offsets,
    493                                  coeff_contexts);
    494     }
    495   }
    496 
    497   const int bwl = get_txb_bwl(tx_size);
    498   const int pos = scan[last_idx];
    499   if (last_idx <= (height << bwl) / 8)
    500     coeff_contexts[pos] = 1;
    501   else if (last_idx <= (height << bwl) / 4)
    502     coeff_contexts[pos] = 2;
    503   else
    504     coeff_contexts[pos] = 3;
    505 }
    506