Home | History | Annotate | Download | only in main
      1 /*
      2  * Copyright (C) 2014 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file texcompress_bptc.c
     26  * GL_ARB_texture_compression_bptc support.
     27  */
     28 
     29 #include <stdbool.h>
     30 #include "texcompress.h"
     31 #include "texcompress_bptc.h"
     32 #include "util/format_srgb.h"
     33 #include "util/half_float.h"
     34 #include "texstore.h"
     35 #include "macros.h"
     36 #include "image.h"
     37 
     38 #define BLOCK_SIZE 4
     39 #define N_PARTITIONS 64
     40 #define BLOCK_BYTES 16
     41 
     42 struct bptc_unorm_mode {
     43    int n_subsets;
     44    int n_partition_bits;
     45    bool has_rotation_bits;
     46    bool has_index_selection_bit;
     47    int n_color_bits;
     48    int n_alpha_bits;
     49    bool has_endpoint_pbits;
     50    bool has_shared_pbits;
     51    int n_index_bits;
     52    int n_secondary_index_bits;
     53 };
     54 
     55 struct bptc_float_bitfield {
     56    int8_t endpoint;
     57    uint8_t component;
     58    uint8_t offset;
     59    uint8_t n_bits;
     60    bool reverse;
     61 };
     62 
     63 struct bptc_float_mode {
     64    bool reserved;
     65    bool transformed_endpoints;
     66    int n_partition_bits;
     67    int n_endpoint_bits;
     68    int n_index_bits;
     69    int n_delta_bits[3];
     70    struct bptc_float_bitfield bitfields[24];
     71 };
     72 
     73 struct bit_writer {
     74    uint8_t buf;
     75    int pos;
     76    uint8_t *dst;
     77 };
     78 
     79 static const struct bptc_unorm_mode
     80 bptc_unorm_modes[] = {
     81    /* 0 */ { 3, 4, false, false, 4, 0, true,  false, 3, 0 },
     82    /* 1 */ { 2, 6, false, false, 6, 0, false, true,  3, 0 },
     83    /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
     84    /* 3 */ { 2, 6, false, false, 7, 0, true,  false, 2, 0 },
     85    /* 4 */ { 1, 0, true,  true,  5, 6, false, false, 2, 3 },
     86    /* 5 */ { 1, 0, true,  false, 7, 8, false, false, 2, 2 },
     87    /* 6 */ { 1, 0, false, false, 7, 7, true,  false, 4, 0 },
     88    /* 7 */ { 2, 6, false, false, 5, 5, true,  false, 2, 0 }
     89 };
     90 
     91 static const struct bptc_float_mode
     92 bptc_float_modes[] = {
     93    /* 00 */
     94    { false, true, 5, 10, 3, { 5, 5, 5 },
     95      { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
     96        { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
     97        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
     98        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
     99        { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
    100        { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
    101        { 3, 2, 3, 1, false },
    102        { -1 } }
    103    },
    104    /* 01 */
    105    { false, true, 5, 7, 3, { 6, 6, 6 },
    106      { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
    107        { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
    108        { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
    109        { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
    110        { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
    111        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
    112        { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
    113        { 2, 0, 0, 6, false },
    114        { 3, 0, 0, 6, false },
    115        { -1 } }
    116    },
    117    /* 00010 */
    118    { false, true, 5, 11, 3, { 5, 4, 4 },
    119      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
    120        { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
    121        { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
    122        { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
    123        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
    124        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
    125        { -1 } }
    126    },
    127    /* 00011 */
    128    { false, false, 0, 10, 4, { 10, 10, 10 },
    129      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
    130        { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
    131        { -1 } }
    132    },
    133    /* 00110 */
    134    { false, true, 5, 11, 3, { 4, 5, 4 },
    135      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
    136        { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
    137        { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
    138        { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
    139        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
    140        { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
    141        { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
    142        { -1 } }
    143    },
    144    /* 00111 */
    145    { false, true, 0, 11, 4, { 9, 9, 9 },
    146      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
    147        { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
    148        { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
    149        { -1 } }
    150    },
    151    /* 01010 */
    152    { false, true, 5, 11, 3, { 4, 4, 5 },
    153      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
    154        { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
    155        { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
    156        { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
    157        { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
    158        { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
    159        { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
    160        { -1 } }
    161    },
    162    /* 01011 */
    163    { false, true, 0, 12, 4, { 8, 8, 8 },
    164      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
    165        { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
    166        { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
    167        { -1 } }
    168    },
    169    /* 01110 */
    170    { false, true, 5, 9, 3, { 5, 5, 5 },
    171      { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
    172        { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
    173        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
    174        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
    175        { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
    176        { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
    177        { 3, 2, 3, 1, false },
    178        { -1 } }
    179    },
    180    /* 01111 */
    181    { false, true, 0, 16, 4, { 4, 4, 4 },
    182      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
    183        { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
    184        { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
    185        { -1 } }
    186    },
    187    /* 10010 */
    188    { false, true, 5, 8, 3, { 6, 5, 5 },
    189      { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
    190        { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
    191        { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
    192        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
    193        { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
    194        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
    195        { 3, 0, 0, 6, false },
    196        { -1 } }
    197    },
    198    /* 10011 */
    199    { true /* reserved */ },
    200    /* 10110 */
    201    { false, true, 5, 8, 3, { 5, 6, 5 },
    202      { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
    203        { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
    204        { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
    205        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
    206        { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
    207        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
    208        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
    209        { -1 } }
    210    },
    211    /* 10111 */
    212    { true /* reserved */ },
    213    /* 11010 */
    214    { false, true, 5, 8, 3, { 5, 5, 6 },
    215      { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
    216        { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
    217        { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
    218        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
    219        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
    220        { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
    221        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
    222        { -1 } }
    223    },
    224    /* 11011 */
    225    { true /* reserved */ },
    226    /* 11110 */
    227    { false, false, 5, 6, 3, { 6, 6, 6 },
    228      { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
    229        { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
    230        { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
    231        { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
    232        { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
    233        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
    234        { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
    235        { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
    236        { -1 } }
    237    },
    238    /* 11111 */
    239    { true /* reserved */ },
    240 };
    241 
    242 /* This partition table is used when the mode has two subsets. Each
    243  * partition is represented by a 32-bit value which gives 2 bits per texel
    244  * within the block. The value of the two bits represents which subset to use
    245  * (0 or 1).
    246  */
    247 static const uint32_t
    248 partition_table1[N_PARTITIONS] = {
    249    0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
    250    0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
    251    0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
    252    0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
    253    0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
    254    0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
    255    0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
    256    0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
    257    0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
    258    0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
    259    0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
    260    0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
    261    0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
    262    0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
    263    0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
    264    0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
    265 };
    266 
    267 /* This partition table is used when the mode has three subsets. In this case
    268  * the values can be 0, 1 or 2.
    269  */
    270 static const uint32_t
    271 partition_table2[N_PARTITIONS] = {
    272    0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
    273    0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
    274    0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
    275    0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
    276    0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
    277    0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
    278    0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
    279    0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
    280    0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
    281    0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
    282    0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
    283    0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
    284    0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
    285    0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
    286    0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
    287    0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
    288 };
    289 
    290 static const uint8_t
    291 anchor_indices[][N_PARTITIONS] = {
    292    /* Anchor index values for the second subset of two-subset partitioning */
    293    {
    294       0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
    295       0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
    296       0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
    297       0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
    298    },
    299 
    300    /* Anchor index values for the second subset of three-subset partitioning */
    301    {
    302       0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
    303       0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
    304       0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
    305       0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
    306    },
    307 
    308    /* Anchor index values for the third subset of three-subset
    309     * partitioning
    310     */
    311    {
    312       0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
    313       0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
    314       0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
    315       0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
    316    }
    317 };
    318 
    319 static int
    320 extract_bits(const uint8_t *block,
    321              int offset,
    322              int n_bits)
    323 {
    324    int byte_index = offset / 8;
    325    int bit_index = offset % 8;
    326    int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
    327    int result = 0;
    328    int bit = 0;
    329 
    330    while (true) {
    331       result |= ((block[byte_index] >> bit_index) &
    332                  ((1 << n_bits_in_byte) - 1)) << bit;
    333 
    334       n_bits -= n_bits_in_byte;
    335 
    336       if (n_bits <= 0)
    337          return result;
    338 
    339       bit += n_bits_in_byte;
    340       byte_index++;
    341       bit_index = 0;
    342       n_bits_in_byte = MIN2(n_bits, 8);
    343    }
    344 }
    345 
    346 static uint8_t
    347 expand_component(uint8_t byte,
    348                  int n_bits)
    349 {
    350    /* Expands a n-bit quantity into a byte by copying the most-significant
    351     * bits into the unused least-significant bits.
    352     */
    353    return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
    354 }
    355 
    356 static int
    357 extract_unorm_endpoints(const struct bptc_unorm_mode *mode,
    358                         const uint8_t *block,
    359                         int bit_offset,
    360                         uint8_t endpoints[][4])
    361 {
    362    int component;
    363    int subset;
    364    int endpoint;
    365    int pbit;
    366    int n_components;
    367 
    368    /* Extract each color component */
    369    for (component = 0; component < 3; component++) {
    370       for (subset = 0; subset < mode->n_subsets; subset++) {
    371          for (endpoint = 0; endpoint < 2; endpoint++) {
    372             endpoints[subset * 2 + endpoint][component] =
    373                extract_bits(block, bit_offset, mode->n_color_bits);
    374             bit_offset += mode->n_color_bits;
    375          }
    376       }
    377    }
    378 
    379    /* Extract the alpha values */
    380    if (mode->n_alpha_bits > 0) {
    381       for (subset = 0; subset < mode->n_subsets; subset++) {
    382          for (endpoint = 0; endpoint < 2; endpoint++) {
    383             endpoints[subset * 2 + endpoint][3] =
    384                extract_bits(block, bit_offset, mode->n_alpha_bits);
    385             bit_offset += mode->n_alpha_bits;
    386          }
    387       }
    388 
    389       n_components = 4;
    390    } else {
    391       for (subset = 0; subset < mode->n_subsets; subset++)
    392          for (endpoint = 0; endpoint < 2; endpoint++)
    393             endpoints[subset * 2 + endpoint][3] = 255;
    394 
    395       n_components = 3;
    396    }
    397 
    398    /* Add in the p-bits */
    399    if (mode->has_endpoint_pbits) {
    400       for (subset = 0; subset < mode->n_subsets; subset++) {
    401          for (endpoint = 0; endpoint < 2; endpoint++) {
    402             pbit = extract_bits(block, bit_offset, 1);
    403             bit_offset += 1;
    404 
    405             for (component = 0; component < n_components; component++) {
    406                endpoints[subset * 2 + endpoint][component] <<= 1;
    407                endpoints[subset * 2 + endpoint][component] |= pbit;
    408             }
    409          }
    410       }
    411    } else if (mode->has_shared_pbits) {
    412       for (subset = 0; subset < mode->n_subsets; subset++) {
    413          pbit = extract_bits(block, bit_offset, 1);
    414          bit_offset += 1;
    415 
    416          for (endpoint = 0; endpoint < 2; endpoint++) {
    417             for (component = 0; component < n_components; component++) {
    418                endpoints[subset * 2 + endpoint][component] <<= 1;
    419                endpoints[subset * 2 + endpoint][component] |= pbit;
    420             }
    421          }
    422       }
    423    }
    424 
    425    /* Expand the n-bit values to a byte */
    426    for (subset = 0; subset < mode->n_subsets; subset++) {
    427       for (endpoint = 0; endpoint < 2; endpoint++) {
    428          for (component = 0; component < 3; component++) {
    429             endpoints[subset * 2 + endpoint][component] =
    430                expand_component(endpoints[subset * 2 + endpoint][component],
    431                                 mode->n_color_bits +
    432                                 mode->has_endpoint_pbits +
    433                                 mode->has_shared_pbits);
    434          }
    435 
    436          if (mode->n_alpha_bits > 0) {
    437             endpoints[subset * 2 + endpoint][3] =
    438                expand_component(endpoints[subset * 2 + endpoint][3],
    439                                 mode->n_alpha_bits +
    440                                 mode->has_endpoint_pbits +
    441                                 mode->has_shared_pbits);
    442          }
    443       }
    444    }
    445 
    446    return bit_offset;
    447 }
    448 
    449 static bool
    450 is_anchor(int n_subsets,
    451           int partition_num,
    452           int texel)
    453 {
    454    if (texel == 0)
    455       return true;
    456 
    457    switch (n_subsets) {
    458    case 1:
    459       return false;
    460    case 2:
    461       return anchor_indices[0][partition_num] == texel;
    462    case 3:
    463       return (anchor_indices[1][partition_num] == texel ||
    464               anchor_indices[2][partition_num] == texel);
    465    default:
    466       assert(false);
    467       return false;
    468    }
    469 }
    470 
    471 static int
    472 count_anchors_before_texel(int n_subsets,
    473                            int partition_num,
    474                            int texel)
    475 {
    476    int count = 1;
    477 
    478    if (texel == 0)
    479       return 0;
    480 
    481    switch (n_subsets) {
    482    case 1:
    483       break;
    484    case 2:
    485       if (texel > anchor_indices[0][partition_num])
    486          count++;
    487       break;
    488    case 3:
    489       if (texel > anchor_indices[1][partition_num])
    490          count++;
    491       if (texel > anchor_indices[2][partition_num])
    492          count++;
    493       break;
    494    default:
    495       assert(false);
    496       return 0;
    497    }
    498 
    499    return count;
    500 }
    501 
    502 static int32_t
    503 interpolate(int32_t a, int32_t b,
    504             int index,
    505             int index_bits)
    506 {
    507    static const uint8_t weights2[] = { 0, 21, 43, 64 };
    508    static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
    509    static const uint8_t weights4[] =
    510       { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
    511    static const uint8_t *weights[] = {
    512       NULL, NULL, weights2, weights3, weights4
    513    };
    514    int weight;
    515 
    516    weight = weights[index_bits][index];
    517 
    518    return ((64 - weight) * a + weight * b + 32) >> 6;
    519 }
    520 
    521 static void
    522 apply_rotation(int rotation,
    523                uint8_t *result)
    524 {
    525    uint8_t t;
    526 
    527    if (rotation == 0)
    528       return;
    529 
    530    rotation--;
    531 
    532    t = result[rotation];
    533    result[rotation] = result[3];
    534    result[3] = t;
    535 }
    536 
    537 static void
    538 fetch_rgba_unorm_from_block(const uint8_t *block,
    539                             uint8_t *result,
    540                             int texel)
    541 {
    542    int mode_num = ffs(block[0]);
    543    const struct bptc_unorm_mode *mode;
    544    int bit_offset, secondary_bit_offset;
    545    int partition_num;
    546    int subset_num;
    547    int rotation;
    548    int index_selection;
    549    int index_bits;
    550    int indices[2];
    551    int index;
    552    int anchors_before_texel;
    553    bool anchor;
    554    uint8_t endpoints[3 * 2][4];
    555    uint32_t subsets;
    556    int component;
    557 
    558    if (mode_num == 0) {
    559       /* According to the spec this mode is reserved and shouldn't be used. */
    560       memset(result, 0, 3);
    561       result[3] = 0xff;
    562       return;
    563    }
    564 
    565    mode = bptc_unorm_modes + mode_num - 1;
    566    bit_offset = mode_num;
    567 
    568    partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
    569    bit_offset += mode->n_partition_bits;
    570 
    571    switch (mode->n_subsets) {
    572    case 1:
    573       subsets = 0;
    574       break;
    575    case 2:
    576       subsets = partition_table1[partition_num];
    577       break;
    578    case 3:
    579       subsets = partition_table2[partition_num];
    580       break;
    581    default:
    582       assert(false);
    583       return;
    584    }
    585 
    586    if (mode->has_rotation_bits) {
    587       rotation = extract_bits(block, bit_offset, 2);
    588       bit_offset += 2;
    589    } else {
    590       rotation = 0;
    591    }
    592 
    593    if (mode->has_index_selection_bit) {
    594       index_selection = extract_bits(block, bit_offset, 1);
    595       bit_offset++;
    596    } else {
    597       index_selection = 0;
    598    }
    599 
    600    bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
    601 
    602    anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
    603                                                      partition_num, texel);
    604 
    605    /* Calculate the offset to the secondary index */
    606    secondary_bit_offset = (bit_offset +
    607                            BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
    608                            mode->n_subsets +
    609                            mode->n_secondary_index_bits * texel -
    610                            anchors_before_texel);
    611 
    612    /* Calculate the offset to the primary index for this texel */
    613    bit_offset += mode->n_index_bits * texel - anchors_before_texel;
    614 
    615    subset_num = (subsets >> (texel * 2)) & 3;
    616 
    617    anchor = is_anchor(mode->n_subsets, partition_num, texel);
    618 
    619    index_bits = mode->n_index_bits;
    620    if (anchor)
    621       index_bits--;
    622    indices[0] = extract_bits(block, bit_offset, index_bits);
    623 
    624    if (mode->n_secondary_index_bits) {
    625       index_bits = mode->n_secondary_index_bits;
    626       if (anchor)
    627          index_bits--;
    628       indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
    629    }
    630 
    631    index = indices[index_selection];
    632    index_bits = (index_selection ?
    633                  mode->n_secondary_index_bits :
    634                  mode->n_index_bits);
    635 
    636    for (component = 0; component < 3; component++)
    637       result[component] = interpolate(endpoints[subset_num * 2][component],
    638                                       endpoints[subset_num * 2 + 1][component],
    639                                       index,
    640                                       index_bits);
    641 
    642    /* Alpha uses the opposite index from the color components */
    643    if (mode->n_secondary_index_bits && !index_selection) {
    644       index = indices[1];
    645       index_bits = mode->n_secondary_index_bits;
    646    } else {
    647       index = indices[0];
    648       index_bits = mode->n_index_bits;
    649    }
    650 
    651    result[3] = interpolate(endpoints[subset_num * 2][3],
    652                            endpoints[subset_num * 2 + 1][3],
    653                            index,
    654                            index_bits);
    655 
    656    apply_rotation(rotation, result);
    657 }
    658 
    659 static void
    660 fetch_bptc_rgba_unorm_bytes(const GLubyte *map,
    661                             GLint rowStride, GLint i, GLint j,
    662                             GLubyte *texel)
    663 {
    664    const GLubyte *block;
    665 
    666    block = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
    667 
    668    fetch_rgba_unorm_from_block(block, texel, (i % 4) + (j % 4) * 4);
    669 }
    670 
    671 static void
    672 fetch_bptc_rgba_unorm(const GLubyte *map,
    673                       GLint rowStride, GLint i, GLint j,
    674                       GLfloat *texel)
    675 {
    676    GLubyte texel_bytes[4];
    677 
    678    fetch_bptc_rgba_unorm_bytes(map, rowStride, i, j, texel_bytes);
    679 
    680    texel[RCOMP] = UBYTE_TO_FLOAT(texel_bytes[0]);
    681    texel[GCOMP] = UBYTE_TO_FLOAT(texel_bytes[1]);
    682    texel[BCOMP] = UBYTE_TO_FLOAT(texel_bytes[2]);
    683    texel[ACOMP] = UBYTE_TO_FLOAT(texel_bytes[3]);
    684 }
    685 
    686 static void
    687 fetch_bptc_srgb_alpha_unorm(const GLubyte *map,
    688                             GLint rowStride, GLint i, GLint j,
    689                             GLfloat *texel)
    690 {
    691    GLubyte texel_bytes[4];
    692 
    693    fetch_bptc_rgba_unorm_bytes(map, rowStride, i, j, texel_bytes);
    694 
    695    texel[RCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[0]);
    696    texel[GCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[1]);
    697    texel[BCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[2]);
    698    texel[ACOMP] = UBYTE_TO_FLOAT(texel_bytes[3]);
    699 }
    700 
    701 static int32_t
    702 sign_extend(int32_t value,
    703             int n_bits)
    704 {
    705    if ((value & (1 << (n_bits - 1)))) {
    706       value |= (~(int32_t) 0) << n_bits;
    707    }
    708 
    709    return value;
    710 }
    711 
    712 static int
    713 signed_unquantize(int value, int n_endpoint_bits)
    714 {
    715    bool sign;
    716 
    717    if (n_endpoint_bits >= 16)
    718       return value;
    719 
    720    if (value == 0)
    721       return 0;
    722 
    723    sign = false;
    724 
    725    if (value < 0) {
    726       sign = true;
    727       value = -value;
    728    }
    729 
    730    if (value >= (1 << (n_endpoint_bits - 1)) - 1)
    731       value = 0x7fff;
    732    else
    733       value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
    734 
    735    if (sign)
    736       value = -value;
    737 
    738    return value;
    739 }
    740 
    741 static int
    742 unsigned_unquantize(int value, int n_endpoint_bits)
    743 {
    744    if (n_endpoint_bits >= 15)
    745       return value;
    746 
    747    if (value == 0)
    748       return 0;
    749 
    750    if (value == (1 << n_endpoint_bits) - 1)
    751       return 0xffff;
    752 
    753    return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
    754 }
    755 
    756 static int
    757 extract_float_endpoints(const struct bptc_float_mode *mode,
    758                         const uint8_t *block,
    759                         int bit_offset,
    760                         int32_t endpoints[][3],
    761                         bool is_signed)
    762 {
    763    const struct bptc_float_bitfield *bitfield;
    764    int endpoint, component;
    765    int n_endpoints;
    766    int value;
    767    int i;
    768 
    769    if (mode->n_partition_bits)
    770       n_endpoints = 4;
    771    else
    772       n_endpoints = 2;
    773 
    774    memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
    775 
    776    for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
    777       value = extract_bits(block, bit_offset, bitfield->n_bits);
    778       bit_offset += bitfield->n_bits;
    779 
    780       if (bitfield->reverse) {
    781          for (i = 0; i < bitfield->n_bits; i++) {
    782             if (value & (1 << i))
    783                endpoints[bitfield->endpoint][bitfield->component] |=
    784                   1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
    785          }
    786       } else {
    787          endpoints[bitfield->endpoint][bitfield->component] |=
    788             value << bitfield->offset;
    789       }
    790    }
    791 
    792    if (mode->transformed_endpoints) {
    793       /* The endpoints are specified as signed offsets from e0 */
    794       for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
    795          for (component = 0; component < 3; component++) {
    796             value = sign_extend(endpoints[endpoint][component],
    797                                 mode->n_delta_bits[component]);
    798             endpoints[endpoint][component] =
    799                ((endpoints[0][component] + value) &
    800                 ((1 << mode->n_endpoint_bits) - 1));
    801          }
    802       }
    803    }
    804 
    805    if (is_signed) {
    806       for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
    807          for (component = 0; component < 3; component++) {
    808             value = sign_extend(endpoints[endpoint][component],
    809                                 mode->n_endpoint_bits);
    810             endpoints[endpoint][component] =
    811                signed_unquantize(value, mode->n_endpoint_bits);
    812          }
    813       }
    814    } else {
    815       for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
    816          for (component = 0; component < 3; component++) {
    817             endpoints[endpoint][component] =
    818                unsigned_unquantize(endpoints[endpoint][component],
    819                                    mode->n_endpoint_bits);
    820          }
    821       }
    822    }
    823 
    824    return bit_offset;
    825 }
    826 
    827 static int32_t
    828 finish_unsigned_unquantize(int32_t value)
    829 {
    830    return value * 31 / 64;
    831 }
    832 
    833 static int32_t
    834 finish_signed_unquantize(int32_t value)
    835 {
    836    if (value < 0)
    837       return (-value * 31 / 32) | 0x8000;
    838    else
    839       return value * 31 / 32;
    840 }
    841 
    842 static void
    843 fetch_rgb_float_from_block(const uint8_t *block,
    844                            float *result,
    845                            int texel,
    846                            bool is_signed)
    847 {
    848    int mode_num;
    849    const struct bptc_float_mode *mode;
    850    int bit_offset;
    851    int partition_num;
    852    int subset_num;
    853    int index_bits;
    854    int index;
    855    int anchors_before_texel;
    856    int32_t endpoints[2 * 2][3];
    857    uint32_t subsets;
    858    int n_subsets;
    859    int component;
    860    int32_t value;
    861 
    862    if (block[0] & 0x2) {
    863       mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
    864       bit_offset = 5;
    865    } else {
    866       mode_num = block[0] & 3;
    867       bit_offset = 2;
    868    }
    869 
    870    mode = bptc_float_modes + mode_num;
    871 
    872    if (mode->reserved) {
    873       memset(result, 0, sizeof result[0] * 3);
    874       result[3] = 1.0f;
    875       return;
    876    }
    877 
    878    bit_offset = extract_float_endpoints(mode, block, bit_offset,
    879                                         endpoints, is_signed);
    880 
    881    if (mode->n_partition_bits) {
    882       partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
    883       bit_offset += mode->n_partition_bits;
    884 
    885       subsets = partition_table1[partition_num];
    886       n_subsets = 2;
    887    } else {
    888       partition_num = 0;
    889       subsets = 0;
    890       n_subsets = 1;
    891    }
    892 
    893    anchors_before_texel =
    894       count_anchors_before_texel(n_subsets, partition_num, texel);
    895 
    896    /* Calculate the offset to the primary index for this texel */
    897    bit_offset += mode->n_index_bits * texel - anchors_before_texel;
    898 
    899    subset_num = (subsets >> (texel * 2)) & 3;
    900 
    901    index_bits = mode->n_index_bits;
    902    if (is_anchor(n_subsets, partition_num, texel))
    903       index_bits--;
    904    index = extract_bits(block, bit_offset, index_bits);
    905 
    906    for (component = 0; component < 3; component++) {
    907       value = interpolate(endpoints[subset_num * 2][component],
    908                           endpoints[subset_num * 2 + 1][component],
    909                           index,
    910                           mode->n_index_bits);
    911 
    912       if (is_signed)
    913          value = finish_signed_unquantize(value);
    914       else
    915          value = finish_unsigned_unquantize(value);
    916 
    917       result[component] = _mesa_half_to_float(value);
    918    }
    919 
    920    result[3] = 1.0f;
    921 }
    922 
    923 static void
    924 fetch_bptc_rgb_float(const GLubyte *map,
    925                      GLint rowStride, GLint i, GLint j,
    926                      GLfloat *texel,
    927                      bool is_signed)
    928 {
    929    const GLubyte *block;
    930 
    931    block = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
    932 
    933    fetch_rgb_float_from_block(block, texel, (i % 4) + (j % 4) * 4, is_signed);
    934 }
    935 
    936 static void
    937 fetch_bptc_rgb_signed_float(const GLubyte *map,
    938                             GLint rowStride, GLint i, GLint j,
    939                             GLfloat *texel)
    940 {
    941    fetch_bptc_rgb_float(map, rowStride, i, j, texel, true);
    942 }
    943 
    944 static void
    945 fetch_bptc_rgb_unsigned_float(const GLubyte *map,
    946                               GLint rowStride, GLint i, GLint j,
    947                               GLfloat *texel)
    948 {
    949    fetch_bptc_rgb_float(map, rowStride, i, j, texel, false);
    950 }
    951 
    952 compressed_fetch_func
    953 _mesa_get_bptc_fetch_func(mesa_format format)
    954 {
    955    switch (format) {
    956    case MESA_FORMAT_BPTC_RGBA_UNORM:
    957       return fetch_bptc_rgba_unorm;
    958    case MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM:
    959       return fetch_bptc_srgb_alpha_unorm;
    960    case MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT:
    961       return fetch_bptc_rgb_signed_float;
    962    case MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT:
    963       return fetch_bptc_rgb_unsigned_float;
    964    default:
    965       return NULL;
    966    }
    967 }
    968 
    969 static void
    970 write_bits(struct bit_writer *writer, int n_bits, int value)
    971 {
    972    do {
    973       if (n_bits + writer->pos >= 8) {
    974          *(writer->dst++) = writer->buf | (value << writer->pos);
    975          writer->buf = 0;
    976          value >>= (8 - writer->pos);
    977          n_bits -= (8 - writer->pos);
    978          writer->pos = 0;
    979       } else {
    980          writer->buf |= value << writer->pos;
    981          writer->pos += n_bits;
    982          break;
    983       }
    984    } while (n_bits > 0);
    985 }
    986 
    987 static void
    988 get_average_luminance_alpha_unorm(int width, int height,
    989                                   const uint8_t *src, int src_rowstride,
    990                                   int *average_luminance, int *average_alpha)
    991 {
    992    int luminance_sum = 0, alpha_sum = 0;
    993    int y, x;
    994 
    995    for (y = 0; y < height; y++) {
    996       for (x = 0; x < width; x++) {
    997          luminance_sum += src[0] + src[1] + src[2];
    998          alpha_sum += src[3];
    999          src += 4;
   1000       }
   1001       src += src_rowstride - width * 4;
   1002    }
   1003 
   1004    *average_luminance = luminance_sum / (width * height);
   1005    *average_alpha = alpha_sum / (width * height);
   1006 }
   1007 
   1008 static void
   1009 get_rgba_endpoints_unorm(int width, int height,
   1010                          const uint8_t *src, int src_rowstride,
   1011                          int average_luminance, int average_alpha,
   1012                          uint8_t endpoints[][4])
   1013 {
   1014    int endpoint_luminances[2];
   1015    int midpoint;
   1016    int sums[2][4];
   1017    int endpoint;
   1018    int luminance;
   1019    uint8_t temp[3];
   1020    const uint8_t *p = src;
   1021    int rgb_left_endpoint_count = 0;
   1022    int alpha_left_endpoint_count = 0;
   1023    int y, x, i;
   1024 
   1025    memset(sums, 0, sizeof sums);
   1026 
   1027    for (y = 0; y < height; y++) {
   1028       for (x = 0; x < width; x++) {
   1029          luminance = p[0] + p[1] + p[2];
   1030          if (luminance < average_luminance) {
   1031             endpoint = 0;
   1032             rgb_left_endpoint_count++;
   1033          } else {
   1034             endpoint = 1;
   1035          }
   1036          for (i = 0; i < 3; i++)
   1037             sums[endpoint][i] += p[i];
   1038 
   1039          if (p[2] < average_alpha) {
   1040             endpoint = 0;
   1041             alpha_left_endpoint_count++;
   1042          } else {
   1043             endpoint = 1;
   1044          }
   1045          sums[endpoint][3] += p[3];
   1046 
   1047          p += 4;
   1048       }
   1049 
   1050       p += src_rowstride - width * 4;
   1051    }
   1052 
   1053    if (rgb_left_endpoint_count == 0 ||
   1054        rgb_left_endpoint_count == width * height) {
   1055       for (i = 0; i < 3; i++)
   1056          endpoints[0][i] = endpoints[1][i] =
   1057             (sums[0][i] + sums[1][i]) / (width * height);
   1058    } else {
   1059       for (i = 0; i < 3; i++) {
   1060          endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
   1061          endpoints[1][i] = (sums[1][i] /
   1062                             (width * height - rgb_left_endpoint_count));
   1063       }
   1064    }
   1065 
   1066    if (alpha_left_endpoint_count == 0 ||
   1067        alpha_left_endpoint_count == width * height) {
   1068       endpoints[0][3] = endpoints[1][3] =
   1069          (sums[0][3] + sums[1][3]) / (width * height);
   1070    } else {
   1071          endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
   1072          endpoints[1][3] = (sums[1][3] /
   1073                             (width * height - alpha_left_endpoint_count));
   1074    }
   1075 
   1076    /* We may need to swap the endpoints to ensure the most-significant bit of
   1077     * the first index is zero */
   1078 
   1079    for (endpoint = 0; endpoint < 2; endpoint++) {
   1080       endpoint_luminances[endpoint] =
   1081          endpoints[endpoint][0] +
   1082          endpoints[endpoint][1] +
   1083          endpoints[endpoint][2];
   1084    }
   1085    midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
   1086 
   1087    if ((src[0] + src[1] + src[2] <= midpoint) !=
   1088        (endpoint_luminances[0] <= midpoint)) {
   1089       memcpy(temp, endpoints[0], 3);
   1090       memcpy(endpoints[0], endpoints[1], 3);
   1091       memcpy(endpoints[1], temp, 3);
   1092    }
   1093 
   1094    /* Same for the alpha endpoints */
   1095 
   1096    midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
   1097 
   1098    if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
   1099       temp[0] = endpoints[0][3];
   1100       endpoints[0][3] = endpoints[1][3];
   1101       endpoints[1][3] = temp[0];
   1102    }
   1103 }
   1104 
   1105 static void
   1106 write_rgb_indices_unorm(struct bit_writer *writer,
   1107                         int src_width, int src_height,
   1108                         const uint8_t *src, int src_rowstride,
   1109                         uint8_t endpoints[][4])
   1110 {
   1111    int luminance;
   1112    int endpoint_luminances[2];
   1113    int endpoint;
   1114    int index;
   1115    int y, x;
   1116 
   1117    for (endpoint = 0; endpoint < 2; endpoint++) {
   1118       endpoint_luminances[endpoint] =
   1119          endpoints[endpoint][0] +
   1120          endpoints[endpoint][1] +
   1121          endpoints[endpoint][2];
   1122    }
   1123 
   1124    /* If the endpoints have the same luminance then we'll just use index 0 for
   1125     * all of the texels */
   1126    if (endpoint_luminances[0] == endpoint_luminances[1]) {
   1127       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
   1128       return;
   1129    }
   1130 
   1131    for (y = 0; y < src_height; y++) {
   1132       for (x = 0; x < src_width; x++) {
   1133          luminance = src[0] + src[1] + src[2];
   1134 
   1135          index = ((luminance - endpoint_luminances[0]) * 3 /
   1136                   (endpoint_luminances[1] - endpoint_luminances[0]));
   1137          if (index < 0)
   1138             index = 0;
   1139          else if (index > 3)
   1140             index = 3;
   1141 
   1142          assert(x != 0 || y != 0 || index < 2);
   1143 
   1144          write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
   1145 
   1146          src += 4;
   1147       }
   1148 
   1149       /* Pad the indices out to the block size */
   1150       if (src_width < BLOCK_SIZE)
   1151          write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
   1152 
   1153       src += src_rowstride - src_width * 4;
   1154    }
   1155 
   1156    /* Pad the indices out to the block size */
   1157    if (src_height < BLOCK_SIZE)
   1158       write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
   1159 }
   1160 
   1161 static void
   1162 write_alpha_indices_unorm(struct bit_writer *writer,
   1163                           int src_width, int src_height,
   1164                           const uint8_t *src, int src_rowstride,
   1165                           uint8_t endpoints[][4])
   1166 {
   1167    int index;
   1168    int y, x;
   1169 
   1170    /* If the endpoints have the same alpha then we'll just use index 0 for
   1171     * all of the texels */
   1172    if (endpoints[0][3] == endpoints[1][3]) {
   1173       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
   1174       return;
   1175    }
   1176 
   1177    for (y = 0; y < src_height; y++) {
   1178       for (x = 0; x < src_width; x++) {
   1179          index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
   1180                   ((int) endpoints[1][3] - endpoints[0][3]));
   1181          if (index < 0)
   1182             index = 0;
   1183          else if (index > 7)
   1184             index = 7;
   1185 
   1186          assert(x != 0 || y != 0 || index < 4);
   1187 
   1188          /* The first index has one less bit */
   1189          write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
   1190 
   1191          src += 4;
   1192       }
   1193 
   1194       /* Pad the indices out to the block size */
   1195       if (src_width < BLOCK_SIZE)
   1196          write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
   1197 
   1198       src += src_rowstride - src_width * 4;
   1199    }
   1200 
   1201    /* Pad the indices out to the block size */
   1202    if (src_height < BLOCK_SIZE)
   1203       write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
   1204 }
   1205 
   1206 static void
   1207 compress_rgba_unorm_block(int src_width, int src_height,
   1208                           const uint8_t *src, int src_rowstride,
   1209                           uint8_t *dst)
   1210 {
   1211    int average_luminance, average_alpha;
   1212    uint8_t endpoints[2][4];
   1213    struct bit_writer writer;
   1214    int component, endpoint;
   1215 
   1216    get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
   1217                                      &average_luminance, &average_alpha);
   1218    get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
   1219                             average_luminance, average_alpha,
   1220                             endpoints);
   1221 
   1222    writer.dst = dst;
   1223    writer.pos = 0;
   1224    writer.buf = 0;
   1225 
   1226    write_bits(&writer, 5, 0x10); /* mode 4 */
   1227    write_bits(&writer, 2, 0); /* rotation 0 */
   1228    write_bits(&writer, 1, 0); /* index selection bit */
   1229 
   1230    /* Write the color endpoints */
   1231    for (component = 0; component < 3; component++)
   1232       for (endpoint = 0; endpoint < 2; endpoint++)
   1233          write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
   1234 
   1235    /* Write the alpha endpoints */
   1236    for (endpoint = 0; endpoint < 2; endpoint++)
   1237       write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
   1238 
   1239    write_rgb_indices_unorm(&writer,
   1240                            src_width, src_height,
   1241                            src, src_rowstride,
   1242                            endpoints);
   1243    write_alpha_indices_unorm(&writer,
   1244                              src_width, src_height,
   1245                              src, src_rowstride,
   1246                              endpoints);
   1247 }
   1248 
   1249 static void
   1250 compress_rgba_unorm(int width, int height,
   1251                     const uint8_t *src, int src_rowstride,
   1252                     uint8_t *dst, int dst_rowstride)
   1253 {
   1254    int dst_row_diff;
   1255    int y, x;
   1256 
   1257    if (dst_rowstride >= width * 4)
   1258       dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
   1259    else
   1260       dst_row_diff = 0;
   1261 
   1262    for (y = 0; y < height; y += BLOCK_SIZE) {
   1263       for (x = 0; x < width; x += BLOCK_SIZE) {
   1264          compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
   1265                                    MIN2(height - y, BLOCK_SIZE),
   1266                                    src + x * 4 + y * src_rowstride,
   1267                                    src_rowstride,
   1268                                    dst);
   1269          dst += BLOCK_BYTES;
   1270       }
   1271       dst += dst_row_diff;
   1272    }
   1273 }
   1274 
   1275 GLboolean
   1276 _mesa_texstore_bptc_rgba_unorm(TEXSTORE_PARAMS)
   1277 {
   1278    const GLubyte *pixels;
   1279    const GLubyte *tempImage = NULL;
   1280    int rowstride;
   1281 
   1282    if (srcFormat != GL_RGBA ||
   1283        srcType != GL_UNSIGNED_BYTE ||
   1284        ctx->_ImageTransferState ||
   1285        srcPacking->SwapBytes) {
   1286       /* convert image to RGBA/ubyte */
   1287       GLubyte *tempImageSlices[1];
   1288       int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
   1289       tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
   1290       if (!tempImage)
   1291          return GL_FALSE; /* out of memory */
   1292       tempImageSlices[0] = (GLubyte *) tempImage;
   1293       _mesa_texstore(ctx, dims,
   1294                      baseInternalFormat,
   1295                      _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
   1296                                            : MESA_FORMAT_A8B8G8R8_UNORM,
   1297                      rgbaRowStride, tempImageSlices,
   1298                      srcWidth, srcHeight, srcDepth,
   1299                      srcFormat, srcType, srcAddr,
   1300                      srcPacking);
   1301 
   1302       pixels = tempImage;
   1303       rowstride = srcWidth * 4;
   1304    } else {
   1305       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
   1306                                      srcFormat, srcType, 0, 0);
   1307       rowstride = _mesa_image_row_stride(srcPacking, srcWidth,
   1308                                          srcFormat, srcType);
   1309    }
   1310 
   1311    compress_rgba_unorm(srcWidth, srcHeight,
   1312                        pixels, rowstride,
   1313                        dstSlices[0], dstRowStride);
   1314 
   1315    free((void *) tempImage);
   1316 
   1317    return GL_TRUE;
   1318 }
   1319 
   1320 static float
   1321 get_average_luminance_float(int width, int height,
   1322                             const float *src, int src_rowstride)
   1323 {
   1324    float luminance_sum = 0;
   1325    int y, x;
   1326 
   1327    for (y = 0; y < height; y++) {
   1328       for (x = 0; x < width; x++) {
   1329          luminance_sum += src[0] + src[1] + src[2];
   1330          src += 3;
   1331       }
   1332       src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
   1333    }
   1334 
   1335    return luminance_sum / (width * height);
   1336 }
   1337 
   1338 static float
   1339 clamp_value(float value, bool is_signed)
   1340 {
   1341    if (value > 65504.0f)
   1342       return 65504.0f;
   1343 
   1344    if (is_signed) {
   1345       if (value < -65504.0f)
   1346          return -65504.0f;
   1347       else
   1348          return value;
   1349    }
   1350 
   1351    if (value < 0.0f)
   1352       return 0.0f;
   1353 
   1354    return value;
   1355 }
   1356 
   1357 static void
   1358 get_endpoints_float(int width, int height,
   1359                     const float *src, int src_rowstride,
   1360                     float average_luminance, float endpoints[][3],
   1361                     bool is_signed)
   1362 {
   1363    float endpoint_luminances[2];
   1364    float midpoint;
   1365    float sums[2][3];
   1366    int endpoint, component;
   1367    float luminance;
   1368    float temp[3];
   1369    const float *p = src;
   1370    int left_endpoint_count = 0;
   1371    int y, x, i;
   1372 
   1373    memset(sums, 0, sizeof sums);
   1374 
   1375    for (y = 0; y < height; y++) {
   1376       for (x = 0; x < width; x++) {
   1377          luminance = p[0] + p[1] + p[2];
   1378          if (luminance < average_luminance) {
   1379             endpoint = 0;
   1380             left_endpoint_count++;
   1381          } else {
   1382             endpoint = 1;
   1383          }
   1384          for (i = 0; i < 3; i++)
   1385             sums[endpoint][i] += p[i];
   1386 
   1387          p += 3;
   1388       }
   1389 
   1390       p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
   1391    }
   1392 
   1393    if (left_endpoint_count == 0 ||
   1394        left_endpoint_count == width * height) {
   1395       for (i = 0; i < 3; i++)
   1396          endpoints[0][i] = endpoints[1][i] =
   1397             (sums[0][i] + sums[1][i]) / (width * height);
   1398    } else {
   1399       for (i = 0; i < 3; i++) {
   1400          endpoints[0][i] = sums[0][i] / left_endpoint_count;
   1401          endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
   1402       }
   1403    }
   1404 
   1405    /* Clamp the endpoints to the range of a half float and strip out
   1406     * infinities */
   1407    for (endpoint = 0; endpoint < 2; endpoint++) {
   1408       for (component = 0; component < 3; component++) {
   1409          endpoints[endpoint][component] =
   1410             clamp_value(endpoints[endpoint][component], is_signed);
   1411       }
   1412    }
   1413 
   1414    /* We may need to swap the endpoints to ensure the most-significant bit of
   1415     * the first index is zero */
   1416 
   1417    for (endpoint = 0; endpoint < 2; endpoint++) {
   1418       endpoint_luminances[endpoint] =
   1419          endpoints[endpoint][0] +
   1420          endpoints[endpoint][1] +
   1421          endpoints[endpoint][2];
   1422    }
   1423    midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
   1424 
   1425    if ((src[0] + src[1] + src[2] <= midpoint) !=
   1426        (endpoint_luminances[0] <= midpoint)) {
   1427       memcpy(temp, endpoints[0], sizeof temp);
   1428       memcpy(endpoints[0], endpoints[1], sizeof temp);
   1429       memcpy(endpoints[1], temp, sizeof temp);
   1430    }
   1431 }
   1432 
   1433 static void
   1434 write_rgb_indices_float(struct bit_writer *writer,
   1435                         int src_width, int src_height,
   1436                         const float *src, int src_rowstride,
   1437                         float endpoints[][3])
   1438 {
   1439    float luminance;
   1440    float endpoint_luminances[2];
   1441    int endpoint;
   1442    int index;
   1443    int y, x;
   1444 
   1445    for (endpoint = 0; endpoint < 2; endpoint++) {
   1446       endpoint_luminances[endpoint] =
   1447          endpoints[endpoint][0] +
   1448          endpoints[endpoint][1] +
   1449          endpoints[endpoint][2];
   1450    }
   1451 
   1452    /* If the endpoints have the same luminance then we'll just use index 0 for
   1453     * all of the texels */
   1454    if (endpoint_luminances[0] == endpoint_luminances[1]) {
   1455       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
   1456       return;
   1457    }
   1458 
   1459    for (y = 0; y < src_height; y++) {
   1460       for (x = 0; x < src_width; x++) {
   1461          luminance = src[0] + src[1] + src[2];
   1462 
   1463          index = ((luminance - endpoint_luminances[0]) * 15 /
   1464                   (endpoint_luminances[1] - endpoint_luminances[0]));
   1465          if (index < 0)
   1466             index = 0;
   1467          else if (index > 15)
   1468             index = 15;
   1469 
   1470          assert(x != 0 || y != 0 || index < 8);
   1471 
   1472          write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
   1473 
   1474          src += 3;
   1475       }
   1476 
   1477       /* Pad the indices out to the block size */
   1478       if (src_width < BLOCK_SIZE)
   1479          write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
   1480 
   1481       src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
   1482    }
   1483 
   1484    /* Pad the indices out to the block size */
   1485    if (src_height < BLOCK_SIZE)
   1486       write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
   1487 }
   1488 
   1489 static int
   1490 get_endpoint_value(float value, bool is_signed)
   1491 {
   1492    bool sign = false;
   1493    int half;
   1494 
   1495    if (is_signed) {
   1496       half = _mesa_float_to_half(value);
   1497 
   1498       if (half & 0x8000) {
   1499          half &= 0x7fff;
   1500          sign = true;
   1501       }
   1502 
   1503       half = (32 * half / 31) >> 6;
   1504 
   1505       if (sign)
   1506          half = -half & ((1 << 10) - 1);
   1507 
   1508       return half;
   1509    } else {
   1510       if (value <= 0.0f)
   1511          return 0;
   1512 
   1513       half = _mesa_float_to_half(value);
   1514 
   1515       return (64 * half / 31) >> 6;
   1516    }
   1517 }
   1518 
   1519 static void
   1520 compress_rgb_float_block(int src_width, int src_height,
   1521                          const float *src, int src_rowstride,
   1522                          uint8_t *dst,
   1523                          bool is_signed)
   1524 {
   1525    float average_luminance;
   1526    float endpoints[2][3];
   1527    struct bit_writer writer;
   1528    int component, endpoint;
   1529    int endpoint_value;
   1530 
   1531    average_luminance =
   1532       get_average_luminance_float(src_width, src_height, src, src_rowstride);
   1533    get_endpoints_float(src_width, src_height, src, src_rowstride,
   1534                        average_luminance, endpoints, is_signed);
   1535 
   1536    writer.dst = dst;
   1537    writer.pos = 0;
   1538    writer.buf = 0;
   1539 
   1540    write_bits(&writer, 5, 3); /* mode 3 */
   1541 
   1542    /* Write the endpoints */
   1543    for (endpoint = 0; endpoint < 2; endpoint++) {
   1544       for (component = 0; component < 3; component++) {
   1545          endpoint_value =
   1546             get_endpoint_value(endpoints[endpoint][component], is_signed);
   1547          write_bits(&writer, 10, endpoint_value);
   1548       }
   1549    }
   1550 
   1551    write_rgb_indices_float(&writer,
   1552                            src_width, src_height,
   1553                            src, src_rowstride,
   1554                            endpoints);
   1555 }
   1556 
   1557 static void
   1558 compress_rgb_float(int width, int height,
   1559                    const float *src, int src_rowstride,
   1560                    uint8_t *dst, int dst_rowstride,
   1561                    bool is_signed)
   1562 {
   1563    int dst_row_diff;
   1564    int y, x;
   1565 
   1566    if (dst_rowstride >= width * 4)
   1567       dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
   1568    else
   1569       dst_row_diff = 0;
   1570 
   1571    for (y = 0; y < height; y += BLOCK_SIZE) {
   1572       for (x = 0; x < width; x += BLOCK_SIZE) {
   1573          compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
   1574                                   MIN2(height - y, BLOCK_SIZE),
   1575                                   src + x * 3 +
   1576                                   y * src_rowstride / sizeof (float),
   1577                                   src_rowstride,
   1578                                   dst,
   1579                                   is_signed);
   1580          dst += BLOCK_BYTES;
   1581       }
   1582       dst += dst_row_diff;
   1583    }
   1584 }
   1585 
   1586 static GLboolean
   1587 texstore_bptc_rgb_float(TEXSTORE_PARAMS,
   1588                         bool is_signed)
   1589 {
   1590    const float *pixels;
   1591    const float *tempImage = NULL;
   1592    int rowstride;
   1593 
   1594    if (srcFormat != GL_RGB ||
   1595        srcType != GL_FLOAT ||
   1596        ctx->_ImageTransferState ||
   1597        srcPacking->SwapBytes) {
   1598       /* convert image to RGB/float */
   1599       GLfloat *tempImageSlices[1];
   1600       int rgbRowStride = 3 * srcWidth * sizeof(GLfloat);
   1601       tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLfloat));
   1602       if (!tempImage)
   1603          return GL_FALSE; /* out of memory */
   1604       tempImageSlices[0] = (GLfloat *) tempImage;
   1605       _mesa_texstore(ctx, dims,
   1606                      baseInternalFormat,
   1607                      MESA_FORMAT_RGB_FLOAT32,
   1608                      rgbRowStride, (GLubyte **)tempImageSlices,
   1609                      srcWidth, srcHeight, srcDepth,
   1610                      srcFormat, srcType, srcAddr,
   1611                      srcPacking);
   1612 
   1613       pixels = tempImage;
   1614       rowstride = srcWidth * sizeof(float) * 3;
   1615    } else {
   1616       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
   1617                                      srcFormat, srcType, 0, 0);
   1618       rowstride = _mesa_image_row_stride(srcPacking, srcWidth,
   1619                                          srcFormat, srcType);
   1620    }
   1621 
   1622    compress_rgb_float(srcWidth, srcHeight,
   1623                       pixels, rowstride,
   1624                       dstSlices[0], dstRowStride,
   1625                       is_signed);
   1626 
   1627    free((void *) tempImage);
   1628 
   1629    return GL_TRUE;
   1630 }
   1631 
   1632 GLboolean
   1633 _mesa_texstore_bptc_rgb_signed_float(TEXSTORE_PARAMS)
   1634 {
   1635    assert(dstFormat == MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT);
   1636 
   1637    return texstore_bptc_rgb_float(ctx, dims, baseInternalFormat,
   1638                                   dstFormat, dstRowStride, dstSlices,
   1639                                   srcWidth, srcHeight, srcDepth,
   1640                                   srcFormat, srcType,
   1641                                   srcAddr, srcPacking,
   1642                                   true /* signed */);
   1643 }
   1644 
   1645 GLboolean
   1646 _mesa_texstore_bptc_rgb_unsigned_float(TEXSTORE_PARAMS)
   1647 {
   1648    assert(dstFormat == MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT);
   1649 
   1650    return texstore_bptc_rgb_float(ctx, dims, baseInternalFormat,
   1651                                   dstFormat, dstRowStride, dstSlices,
   1652                                   srcWidth, srcHeight, srcDepth,
   1653                                   srcFormat, srcType,
   1654                                   srcAddr, srcPacking,
   1655                                   false /* unsigned */);
   1656 }
   1657