1 /* 2 * Copyright (C) 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /** 25 * \file texcompress_bptc.c 26 * GL_ARB_texture_compression_bptc support. 27 */ 28 29 #include <stdbool.h> 30 #include "texcompress.h" 31 #include "texcompress_bptc.h" 32 #include "util/format_srgb.h" 33 #include "util/half_float.h" 34 #include "texstore.h" 35 #include "macros.h" 36 #include "image.h" 37 38 #define BLOCK_SIZE 4 39 #define N_PARTITIONS 64 40 #define BLOCK_BYTES 16 41 42 struct bptc_unorm_mode { 43 int n_subsets; 44 int n_partition_bits; 45 bool has_rotation_bits; 46 bool has_index_selection_bit; 47 int n_color_bits; 48 int n_alpha_bits; 49 bool has_endpoint_pbits; 50 bool has_shared_pbits; 51 int n_index_bits; 52 int n_secondary_index_bits; 53 }; 54 55 struct bptc_float_bitfield { 56 int8_t endpoint; 57 uint8_t component; 58 uint8_t offset; 59 uint8_t n_bits; 60 bool reverse; 61 }; 62 63 struct bptc_float_mode { 64 bool reserved; 65 bool transformed_endpoints; 66 int n_partition_bits; 67 int n_endpoint_bits; 68 int n_index_bits; 69 int n_delta_bits[3]; 70 struct bptc_float_bitfield bitfields[24]; 71 }; 72 73 struct bit_writer { 74 uint8_t buf; 75 int pos; 76 uint8_t *dst; 77 }; 78 79 static const struct bptc_unorm_mode 80 bptc_unorm_modes[] = { 81 /* 0 */ { 3, 4, false, false, 4, 0, true, false, 3, 0 }, 82 /* 1 */ { 2, 6, false, false, 6, 0, false, true, 3, 0 }, 83 /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 }, 84 /* 3 */ { 2, 6, false, false, 7, 0, true, false, 2, 0 }, 85 /* 4 */ { 1, 0, true, true, 5, 6, false, false, 2, 3 }, 86 /* 5 */ { 1, 0, true, false, 7, 8, false, false, 2, 2 }, 87 /* 6 */ { 1, 0, false, false, 7, 7, true, false, 4, 0 }, 88 /* 7 */ { 2, 6, false, false, 5, 5, true, false, 2, 0 } 89 }; 90 91 static const struct bptc_float_mode 92 bptc_float_modes[] = { 93 /* 00 */ 94 { false, true, 5, 10, 3, { 5, 5, 5 }, 95 { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false }, 96 { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, 97 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false }, 98 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, 99 { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, 100 { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, 101 { 3, 2, 3, 1, false }, 102 { -1 } } 103 }, 104 /* 01 */ 105 { false, true, 5, 7, 3, { 6, 6, 6 }, 106 { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false }, 107 { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false }, 108 { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false }, 109 { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false }, 110 { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false }, 111 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false }, 112 { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, 113 { 2, 0, 0, 6, false }, 114 { 3, 0, 0, 6, false }, 115 { -1 } } 116 }, 117 /* 00010 */ 118 { false, true, 5, 11, 3, { 5, 4, 4 }, 119 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, 120 { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false }, 121 { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false }, 122 { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false }, 123 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false }, 124 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false }, 125 { -1 } } 126 }, 127 /* 00011 */ 128 { false, false, 0, 10, 4, { 10, 10, 10 }, 129 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, 130 { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false }, 131 { -1 } } 132 }, 133 /* 00110 */ 134 { false, true, 5, 11, 3, { 4, 5, 4 }, 135 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, 136 { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false }, 137 { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false }, 138 { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false }, 139 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false }, 140 { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false }, 141 { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false }, 142 { -1 } } 143 }, 144 /* 00111 */ 145 { false, true, 0, 11, 4, { 9, 9, 9 }, 146 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, 147 { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false }, 148 { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false }, 149 { -1 } } 150 }, 151 /* 01010 */ 152 { false, true, 5, 11, 3, { 4, 4, 5 }, 153 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, 154 { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false }, 155 { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, 156 { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false }, 157 { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false }, 158 { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false }, 159 { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false }, 160 { -1 } } 161 }, 162 /* 01011 */ 163 { false, true, 0, 12, 4, { 8, 8, 8 }, 164 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, 165 { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false }, 166 { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true }, 167 { -1 } } 168 }, 169 /* 01110 */ 170 { false, true, 5, 9, 3, { 5, 5, 5 }, 171 { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false }, 172 { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false }, 173 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false }, 174 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, 175 { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, 176 { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, 177 { 3, 2, 3, 1, false }, 178 { -1 } } 179 }, 180 /* 01111 */ 181 { false, true, 0, 16, 4, { 4, 4, 4 }, 182 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, 183 { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false }, 184 { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true }, 185 { -1 } } 186 }, 187 /* 10010 */ 188 { false, true, 5, 8, 3, { 6, 5, 5 }, 189 { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false }, 190 { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, 191 { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false }, 192 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, 193 { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false }, 194 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false }, 195 { 3, 0, 0, 6, false }, 196 { -1 } } 197 }, 198 /* 10011 */ 199 { true /* reserved */ }, 200 /* 10110 */ 201 { false, true, 5, 8, 3, { 5, 6, 5 }, 202 { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false }, 203 { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false }, 204 { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false }, 205 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false }, 206 { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false }, 207 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false }, 208 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false }, 209 { -1 } } 210 }, 211 /* 10111 */ 212 { true /* reserved */ }, 213 /* 11010 */ 214 { false, true, 5, 8, 3, { 5, 5, 6 }, 215 { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, 216 { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false }, 217 { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false }, 218 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false }, 219 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, 220 { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false }, 221 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false }, 222 { -1 } } 223 }, 224 /* 11011 */ 225 { true /* reserved */ }, 226 /* 11110 */ 227 { false, false, 5, 6, 3, { 6, 6, 6 }, 228 { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false }, 229 { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false }, 230 { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false }, 231 { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false }, 232 { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false }, 233 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false }, 234 { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, 235 { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false }, 236 { -1 } } 237 }, 238 /* 11111 */ 239 { true /* reserved */ }, 240 }; 241 242 /* This partition table is used when the mode has two subsets. Each 243 * partition is represented by a 32-bit value which gives 2 bits per texel 244 * within the block. The value of the two bits represents which subset to use 245 * (0 or 1). 246 */ 247 static const uint32_t 248 partition_table1[N_PARTITIONS] = { 249 0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U, 250 0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U, 251 0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U, 252 0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U, 253 0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U, 254 0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U, 255 0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U, 256 0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U, 257 0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U, 258 0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U, 259 0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U, 260 0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U, 261 0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U, 262 0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U, 263 0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U, 264 0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U, 265 }; 266 267 /* This partition table is used when the mode has three subsets. In this case 268 * the values can be 0, 1 or 2. 269 */ 270 static const uint32_t 271 partition_table2[N_PARTITIONS] = { 272 0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U, 273 0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U, 274 0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U, 275 0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U, 276 0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U, 277 0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U, 278 0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U, 279 0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U, 280 0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U, 281 0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U, 282 0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U, 283 0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U, 284 0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U, 285 0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U, 286 0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U, 287 0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U 288 }; 289 290 static const uint8_t 291 anchor_indices[][N_PARTITIONS] = { 292 /* Anchor index values for the second subset of two-subset partitioning */ 293 { 294 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, 295 0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2, 296 0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6, 297 0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf 298 }, 299 300 /* Anchor index values for the second subset of three-subset partitioning */ 301 { 302 0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3, 303 0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf, 304 0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf, 305 0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3 306 }, 307 308 /* Anchor index values for the third subset of three-subset 309 * partitioning 310 */ 311 { 312 0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8, 313 0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8, 314 0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8, 315 0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8 316 } 317 }; 318 319 static int 320 extract_bits(const uint8_t *block, 321 int offset, 322 int n_bits) 323 { 324 int byte_index = offset / 8; 325 int bit_index = offset % 8; 326 int n_bits_in_byte = MIN2(n_bits, 8 - bit_index); 327 int result = 0; 328 int bit = 0; 329 330 while (true) { 331 result |= ((block[byte_index] >> bit_index) & 332 ((1 << n_bits_in_byte) - 1)) << bit; 333 334 n_bits -= n_bits_in_byte; 335 336 if (n_bits <= 0) 337 return result; 338 339 bit += n_bits_in_byte; 340 byte_index++; 341 bit_index = 0; 342 n_bits_in_byte = MIN2(n_bits, 8); 343 } 344 } 345 346 static uint8_t 347 expand_component(uint8_t byte, 348 int n_bits) 349 { 350 /* Expands a n-bit quantity into a byte by copying the most-significant 351 * bits into the unused least-significant bits. 352 */ 353 return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8)); 354 } 355 356 static int 357 extract_unorm_endpoints(const struct bptc_unorm_mode *mode, 358 const uint8_t *block, 359 int bit_offset, 360 uint8_t endpoints[][4]) 361 { 362 int component; 363 int subset; 364 int endpoint; 365 int pbit; 366 int n_components; 367 368 /* Extract each color component */ 369 for (component = 0; component < 3; component++) { 370 for (subset = 0; subset < mode->n_subsets; subset++) { 371 for (endpoint = 0; endpoint < 2; endpoint++) { 372 endpoints[subset * 2 + endpoint][component] = 373 extract_bits(block, bit_offset, mode->n_color_bits); 374 bit_offset += mode->n_color_bits; 375 } 376 } 377 } 378 379 /* Extract the alpha values */ 380 if (mode->n_alpha_bits > 0) { 381 for (subset = 0; subset < mode->n_subsets; subset++) { 382 for (endpoint = 0; endpoint < 2; endpoint++) { 383 endpoints[subset * 2 + endpoint][3] = 384 extract_bits(block, bit_offset, mode->n_alpha_bits); 385 bit_offset += mode->n_alpha_bits; 386 } 387 } 388 389 n_components = 4; 390 } else { 391 for (subset = 0; subset < mode->n_subsets; subset++) 392 for (endpoint = 0; endpoint < 2; endpoint++) 393 endpoints[subset * 2 + endpoint][3] = 255; 394 395 n_components = 3; 396 } 397 398 /* Add in the p-bits */ 399 if (mode->has_endpoint_pbits) { 400 for (subset = 0; subset < mode->n_subsets; subset++) { 401 for (endpoint = 0; endpoint < 2; endpoint++) { 402 pbit = extract_bits(block, bit_offset, 1); 403 bit_offset += 1; 404 405 for (component = 0; component < n_components; component++) { 406 endpoints[subset * 2 + endpoint][component] <<= 1; 407 endpoints[subset * 2 + endpoint][component] |= pbit; 408 } 409 } 410 } 411 } else if (mode->has_shared_pbits) { 412 for (subset = 0; subset < mode->n_subsets; subset++) { 413 pbit = extract_bits(block, bit_offset, 1); 414 bit_offset += 1; 415 416 for (endpoint = 0; endpoint < 2; endpoint++) { 417 for (component = 0; component < n_components; component++) { 418 endpoints[subset * 2 + endpoint][component] <<= 1; 419 endpoints[subset * 2 + endpoint][component] |= pbit; 420 } 421 } 422 } 423 } 424 425 /* Expand the n-bit values to a byte */ 426 for (subset = 0; subset < mode->n_subsets; subset++) { 427 for (endpoint = 0; endpoint < 2; endpoint++) { 428 for (component = 0; component < 3; component++) { 429 endpoints[subset * 2 + endpoint][component] = 430 expand_component(endpoints[subset * 2 + endpoint][component], 431 mode->n_color_bits + 432 mode->has_endpoint_pbits + 433 mode->has_shared_pbits); 434 } 435 436 if (mode->n_alpha_bits > 0) { 437 endpoints[subset * 2 + endpoint][3] = 438 expand_component(endpoints[subset * 2 + endpoint][3], 439 mode->n_alpha_bits + 440 mode->has_endpoint_pbits + 441 mode->has_shared_pbits); 442 } 443 } 444 } 445 446 return bit_offset; 447 } 448 449 static bool 450 is_anchor(int n_subsets, 451 int partition_num, 452 int texel) 453 { 454 if (texel == 0) 455 return true; 456 457 switch (n_subsets) { 458 case 1: 459 return false; 460 case 2: 461 return anchor_indices[0][partition_num] == texel; 462 case 3: 463 return (anchor_indices[1][partition_num] == texel || 464 anchor_indices[2][partition_num] == texel); 465 default: 466 assert(false); 467 return false; 468 } 469 } 470 471 static int 472 count_anchors_before_texel(int n_subsets, 473 int partition_num, 474 int texel) 475 { 476 int count = 1; 477 478 if (texel == 0) 479 return 0; 480 481 switch (n_subsets) { 482 case 1: 483 break; 484 case 2: 485 if (texel > anchor_indices[0][partition_num]) 486 count++; 487 break; 488 case 3: 489 if (texel > anchor_indices[1][partition_num]) 490 count++; 491 if (texel > anchor_indices[2][partition_num]) 492 count++; 493 break; 494 default: 495 assert(false); 496 return 0; 497 } 498 499 return count; 500 } 501 502 static int32_t 503 interpolate(int32_t a, int32_t b, 504 int index, 505 int index_bits) 506 { 507 static const uint8_t weights2[] = { 0, 21, 43, 64 }; 508 static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 }; 509 static const uint8_t weights4[] = 510 { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; 511 static const uint8_t *weights[] = { 512 NULL, NULL, weights2, weights3, weights4 513 }; 514 int weight; 515 516 weight = weights[index_bits][index]; 517 518 return ((64 - weight) * a + weight * b + 32) >> 6; 519 } 520 521 static void 522 apply_rotation(int rotation, 523 uint8_t *result) 524 { 525 uint8_t t; 526 527 if (rotation == 0) 528 return; 529 530 rotation--; 531 532 t = result[rotation]; 533 result[rotation] = result[3]; 534 result[3] = t; 535 } 536 537 static void 538 fetch_rgba_unorm_from_block(const uint8_t *block, 539 uint8_t *result, 540 int texel) 541 { 542 int mode_num = ffs(block[0]); 543 const struct bptc_unorm_mode *mode; 544 int bit_offset, secondary_bit_offset; 545 int partition_num; 546 int subset_num; 547 int rotation; 548 int index_selection; 549 int index_bits; 550 int indices[2]; 551 int index; 552 int anchors_before_texel; 553 bool anchor; 554 uint8_t endpoints[3 * 2][4]; 555 uint32_t subsets; 556 int component; 557 558 if (mode_num == 0) { 559 /* According to the spec this mode is reserved and shouldn't be used. */ 560 memset(result, 0, 3); 561 result[3] = 0xff; 562 return; 563 } 564 565 mode = bptc_unorm_modes + mode_num - 1; 566 bit_offset = mode_num; 567 568 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits); 569 bit_offset += mode->n_partition_bits; 570 571 switch (mode->n_subsets) { 572 case 1: 573 subsets = 0; 574 break; 575 case 2: 576 subsets = partition_table1[partition_num]; 577 break; 578 case 3: 579 subsets = partition_table2[partition_num]; 580 break; 581 default: 582 assert(false); 583 return; 584 } 585 586 if (mode->has_rotation_bits) { 587 rotation = extract_bits(block, bit_offset, 2); 588 bit_offset += 2; 589 } else { 590 rotation = 0; 591 } 592 593 if (mode->has_index_selection_bit) { 594 index_selection = extract_bits(block, bit_offset, 1); 595 bit_offset++; 596 } else { 597 index_selection = 0; 598 } 599 600 bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints); 601 602 anchors_before_texel = count_anchors_before_texel(mode->n_subsets, 603 partition_num, texel); 604 605 /* Calculate the offset to the secondary index */ 606 secondary_bit_offset = (bit_offset + 607 BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits - 608 mode->n_subsets + 609 mode->n_secondary_index_bits * texel - 610 anchors_before_texel); 611 612 /* Calculate the offset to the primary index for this texel */ 613 bit_offset += mode->n_index_bits * texel - anchors_before_texel; 614 615 subset_num = (subsets >> (texel * 2)) & 3; 616 617 anchor = is_anchor(mode->n_subsets, partition_num, texel); 618 619 index_bits = mode->n_index_bits; 620 if (anchor) 621 index_bits--; 622 indices[0] = extract_bits(block, bit_offset, index_bits); 623 624 if (mode->n_secondary_index_bits) { 625 index_bits = mode->n_secondary_index_bits; 626 if (anchor) 627 index_bits--; 628 indices[1] = extract_bits(block, secondary_bit_offset, index_bits); 629 } 630 631 index = indices[index_selection]; 632 index_bits = (index_selection ? 633 mode->n_secondary_index_bits : 634 mode->n_index_bits); 635 636 for (component = 0; component < 3; component++) 637 result[component] = interpolate(endpoints[subset_num * 2][component], 638 endpoints[subset_num * 2 + 1][component], 639 index, 640 index_bits); 641 642 /* Alpha uses the opposite index from the color components */ 643 if (mode->n_secondary_index_bits && !index_selection) { 644 index = indices[1]; 645 index_bits = mode->n_secondary_index_bits; 646 } else { 647 index = indices[0]; 648 index_bits = mode->n_index_bits; 649 } 650 651 result[3] = interpolate(endpoints[subset_num * 2][3], 652 endpoints[subset_num * 2 + 1][3], 653 index, 654 index_bits); 655 656 apply_rotation(rotation, result); 657 } 658 659 static void 660 fetch_bptc_rgba_unorm_bytes(const GLubyte *map, 661 GLint rowStride, GLint i, GLint j, 662 GLubyte *texel) 663 { 664 const GLubyte *block; 665 666 block = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16; 667 668 fetch_rgba_unorm_from_block(block, texel, (i % 4) + (j % 4) * 4); 669 } 670 671 static void 672 fetch_bptc_rgba_unorm(const GLubyte *map, 673 GLint rowStride, GLint i, GLint j, 674 GLfloat *texel) 675 { 676 GLubyte texel_bytes[4]; 677 678 fetch_bptc_rgba_unorm_bytes(map, rowStride, i, j, texel_bytes); 679 680 texel[RCOMP] = UBYTE_TO_FLOAT(texel_bytes[0]); 681 texel[GCOMP] = UBYTE_TO_FLOAT(texel_bytes[1]); 682 texel[BCOMP] = UBYTE_TO_FLOAT(texel_bytes[2]); 683 texel[ACOMP] = UBYTE_TO_FLOAT(texel_bytes[3]); 684 } 685 686 static void 687 fetch_bptc_srgb_alpha_unorm(const GLubyte *map, 688 GLint rowStride, GLint i, GLint j, 689 GLfloat *texel) 690 { 691 GLubyte texel_bytes[4]; 692 693 fetch_bptc_rgba_unorm_bytes(map, rowStride, i, j, texel_bytes); 694 695 texel[RCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[0]); 696 texel[GCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[1]); 697 texel[BCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[2]); 698 texel[ACOMP] = UBYTE_TO_FLOAT(texel_bytes[3]); 699 } 700 701 static int32_t 702 sign_extend(int32_t value, 703 int n_bits) 704 { 705 if ((value & (1 << (n_bits - 1)))) { 706 value |= (~(int32_t) 0) << n_bits; 707 } 708 709 return value; 710 } 711 712 static int 713 signed_unquantize(int value, int n_endpoint_bits) 714 { 715 bool sign; 716 717 if (n_endpoint_bits >= 16) 718 return value; 719 720 if (value == 0) 721 return 0; 722 723 sign = false; 724 725 if (value < 0) { 726 sign = true; 727 value = -value; 728 } 729 730 if (value >= (1 << (n_endpoint_bits - 1)) - 1) 731 value = 0x7fff; 732 else 733 value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1); 734 735 if (sign) 736 value = -value; 737 738 return value; 739 } 740 741 static int 742 unsigned_unquantize(int value, int n_endpoint_bits) 743 { 744 if (n_endpoint_bits >= 15) 745 return value; 746 747 if (value == 0) 748 return 0; 749 750 if (value == (1 << n_endpoint_bits) - 1) 751 return 0xffff; 752 753 return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1); 754 } 755 756 static int 757 extract_float_endpoints(const struct bptc_float_mode *mode, 758 const uint8_t *block, 759 int bit_offset, 760 int32_t endpoints[][3], 761 bool is_signed) 762 { 763 const struct bptc_float_bitfield *bitfield; 764 int endpoint, component; 765 int n_endpoints; 766 int value; 767 int i; 768 769 if (mode->n_partition_bits) 770 n_endpoints = 4; 771 else 772 n_endpoints = 2; 773 774 memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3); 775 776 for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) { 777 value = extract_bits(block, bit_offset, bitfield->n_bits); 778 bit_offset += bitfield->n_bits; 779 780 if (bitfield->reverse) { 781 for (i = 0; i < bitfield->n_bits; i++) { 782 if (value & (1 << i)) 783 endpoints[bitfield->endpoint][bitfield->component] |= 784 1 << ((bitfield->n_bits - 1 - i) + bitfield->offset); 785 } 786 } else { 787 endpoints[bitfield->endpoint][bitfield->component] |= 788 value << bitfield->offset; 789 } 790 } 791 792 if (mode->transformed_endpoints) { 793 /* The endpoints are specified as signed offsets from e0 */ 794 for (endpoint = 1; endpoint < n_endpoints; endpoint++) { 795 for (component = 0; component < 3; component++) { 796 value = sign_extend(endpoints[endpoint][component], 797 mode->n_delta_bits[component]); 798 endpoints[endpoint][component] = 799 ((endpoints[0][component] + value) & 800 ((1 << mode->n_endpoint_bits) - 1)); 801 } 802 } 803 } 804 805 if (is_signed) { 806 for (endpoint = 0; endpoint < n_endpoints; endpoint++) { 807 for (component = 0; component < 3; component++) { 808 value = sign_extend(endpoints[endpoint][component], 809 mode->n_endpoint_bits); 810 endpoints[endpoint][component] = 811 signed_unquantize(value, mode->n_endpoint_bits); 812 } 813 } 814 } else { 815 for (endpoint = 0; endpoint < n_endpoints; endpoint++) { 816 for (component = 0; component < 3; component++) { 817 endpoints[endpoint][component] = 818 unsigned_unquantize(endpoints[endpoint][component], 819 mode->n_endpoint_bits); 820 } 821 } 822 } 823 824 return bit_offset; 825 } 826 827 static int32_t 828 finish_unsigned_unquantize(int32_t value) 829 { 830 return value * 31 / 64; 831 } 832 833 static int32_t 834 finish_signed_unquantize(int32_t value) 835 { 836 if (value < 0) 837 return (-value * 31 / 32) | 0x8000; 838 else 839 return value * 31 / 32; 840 } 841 842 static void 843 fetch_rgb_float_from_block(const uint8_t *block, 844 float *result, 845 int texel, 846 bool is_signed) 847 { 848 int mode_num; 849 const struct bptc_float_mode *mode; 850 int bit_offset; 851 int partition_num; 852 int subset_num; 853 int index_bits; 854 int index; 855 int anchors_before_texel; 856 int32_t endpoints[2 * 2][3]; 857 uint32_t subsets; 858 int n_subsets; 859 int component; 860 int32_t value; 861 862 if (block[0] & 0x2) { 863 mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2; 864 bit_offset = 5; 865 } else { 866 mode_num = block[0] & 3; 867 bit_offset = 2; 868 } 869 870 mode = bptc_float_modes + mode_num; 871 872 if (mode->reserved) { 873 memset(result, 0, sizeof result[0] * 3); 874 result[3] = 1.0f; 875 return; 876 } 877 878 bit_offset = extract_float_endpoints(mode, block, bit_offset, 879 endpoints, is_signed); 880 881 if (mode->n_partition_bits) { 882 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits); 883 bit_offset += mode->n_partition_bits; 884 885 subsets = partition_table1[partition_num]; 886 n_subsets = 2; 887 } else { 888 partition_num = 0; 889 subsets = 0; 890 n_subsets = 1; 891 } 892 893 anchors_before_texel = 894 count_anchors_before_texel(n_subsets, partition_num, texel); 895 896 /* Calculate the offset to the primary index for this texel */ 897 bit_offset += mode->n_index_bits * texel - anchors_before_texel; 898 899 subset_num = (subsets >> (texel * 2)) & 3; 900 901 index_bits = mode->n_index_bits; 902 if (is_anchor(n_subsets, partition_num, texel)) 903 index_bits--; 904 index = extract_bits(block, bit_offset, index_bits); 905 906 for (component = 0; component < 3; component++) { 907 value = interpolate(endpoints[subset_num * 2][component], 908 endpoints[subset_num * 2 + 1][component], 909 index, 910 mode->n_index_bits); 911 912 if (is_signed) 913 value = finish_signed_unquantize(value); 914 else 915 value = finish_unsigned_unquantize(value); 916 917 result[component] = _mesa_half_to_float(value); 918 } 919 920 result[3] = 1.0f; 921 } 922 923 static void 924 fetch_bptc_rgb_float(const GLubyte *map, 925 GLint rowStride, GLint i, GLint j, 926 GLfloat *texel, 927 bool is_signed) 928 { 929 const GLubyte *block; 930 931 block = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16; 932 933 fetch_rgb_float_from_block(block, texel, (i % 4) + (j % 4) * 4, is_signed); 934 } 935 936 static void 937 fetch_bptc_rgb_signed_float(const GLubyte *map, 938 GLint rowStride, GLint i, GLint j, 939 GLfloat *texel) 940 { 941 fetch_bptc_rgb_float(map, rowStride, i, j, texel, true); 942 } 943 944 static void 945 fetch_bptc_rgb_unsigned_float(const GLubyte *map, 946 GLint rowStride, GLint i, GLint j, 947 GLfloat *texel) 948 { 949 fetch_bptc_rgb_float(map, rowStride, i, j, texel, false); 950 } 951 952 compressed_fetch_func 953 _mesa_get_bptc_fetch_func(mesa_format format) 954 { 955 switch (format) { 956 case MESA_FORMAT_BPTC_RGBA_UNORM: 957 return fetch_bptc_rgba_unorm; 958 case MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM: 959 return fetch_bptc_srgb_alpha_unorm; 960 case MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT: 961 return fetch_bptc_rgb_signed_float; 962 case MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT: 963 return fetch_bptc_rgb_unsigned_float; 964 default: 965 return NULL; 966 } 967 } 968 969 static void 970 write_bits(struct bit_writer *writer, int n_bits, int value) 971 { 972 do { 973 if (n_bits + writer->pos >= 8) { 974 *(writer->dst++) = writer->buf | (value << writer->pos); 975 writer->buf = 0; 976 value >>= (8 - writer->pos); 977 n_bits -= (8 - writer->pos); 978 writer->pos = 0; 979 } else { 980 writer->buf |= value << writer->pos; 981 writer->pos += n_bits; 982 break; 983 } 984 } while (n_bits > 0); 985 } 986 987 static void 988 get_average_luminance_alpha_unorm(int width, int height, 989 const uint8_t *src, int src_rowstride, 990 int *average_luminance, int *average_alpha) 991 { 992 int luminance_sum = 0, alpha_sum = 0; 993 int y, x; 994 995 for (y = 0; y < height; y++) { 996 for (x = 0; x < width; x++) { 997 luminance_sum += src[0] + src[1] + src[2]; 998 alpha_sum += src[3]; 999 src += 4; 1000 } 1001 src += src_rowstride - width * 4; 1002 } 1003 1004 *average_luminance = luminance_sum / (width * height); 1005 *average_alpha = alpha_sum / (width * height); 1006 } 1007 1008 static void 1009 get_rgba_endpoints_unorm(int width, int height, 1010 const uint8_t *src, int src_rowstride, 1011 int average_luminance, int average_alpha, 1012 uint8_t endpoints[][4]) 1013 { 1014 int endpoint_luminances[2]; 1015 int midpoint; 1016 int sums[2][4]; 1017 int endpoint; 1018 int luminance; 1019 uint8_t temp[3]; 1020 const uint8_t *p = src; 1021 int rgb_left_endpoint_count = 0; 1022 int alpha_left_endpoint_count = 0; 1023 int y, x, i; 1024 1025 memset(sums, 0, sizeof sums); 1026 1027 for (y = 0; y < height; y++) { 1028 for (x = 0; x < width; x++) { 1029 luminance = p[0] + p[1] + p[2]; 1030 if (luminance < average_luminance) { 1031 endpoint = 0; 1032 rgb_left_endpoint_count++; 1033 } else { 1034 endpoint = 1; 1035 } 1036 for (i = 0; i < 3; i++) 1037 sums[endpoint][i] += p[i]; 1038 1039 if (p[2] < average_alpha) { 1040 endpoint = 0; 1041 alpha_left_endpoint_count++; 1042 } else { 1043 endpoint = 1; 1044 } 1045 sums[endpoint][3] += p[3]; 1046 1047 p += 4; 1048 } 1049 1050 p += src_rowstride - width * 4; 1051 } 1052 1053 if (rgb_left_endpoint_count == 0 || 1054 rgb_left_endpoint_count == width * height) { 1055 for (i = 0; i < 3; i++) 1056 endpoints[0][i] = endpoints[1][i] = 1057 (sums[0][i] + sums[1][i]) / (width * height); 1058 } else { 1059 for (i = 0; i < 3; i++) { 1060 endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count; 1061 endpoints[1][i] = (sums[1][i] / 1062 (width * height - rgb_left_endpoint_count)); 1063 } 1064 } 1065 1066 if (alpha_left_endpoint_count == 0 || 1067 alpha_left_endpoint_count == width * height) { 1068 endpoints[0][3] = endpoints[1][3] = 1069 (sums[0][3] + sums[1][3]) / (width * height); 1070 } else { 1071 endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count; 1072 endpoints[1][3] = (sums[1][3] / 1073 (width * height - alpha_left_endpoint_count)); 1074 } 1075 1076 /* We may need to swap the endpoints to ensure the most-significant bit of 1077 * the first index is zero */ 1078 1079 for (endpoint = 0; endpoint < 2; endpoint++) { 1080 endpoint_luminances[endpoint] = 1081 endpoints[endpoint][0] + 1082 endpoints[endpoint][1] + 1083 endpoints[endpoint][2]; 1084 } 1085 midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2; 1086 1087 if ((src[0] + src[1] + src[2] <= midpoint) != 1088 (endpoint_luminances[0] <= midpoint)) { 1089 memcpy(temp, endpoints[0], 3); 1090 memcpy(endpoints[0], endpoints[1], 3); 1091 memcpy(endpoints[1], temp, 3); 1092 } 1093 1094 /* Same for the alpha endpoints */ 1095 1096 midpoint = (endpoints[0][3] + endpoints[1][3]) / 2; 1097 1098 if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) { 1099 temp[0] = endpoints[0][3]; 1100 endpoints[0][3] = endpoints[1][3]; 1101 endpoints[1][3] = temp[0]; 1102 } 1103 } 1104 1105 static void 1106 write_rgb_indices_unorm(struct bit_writer *writer, 1107 int src_width, int src_height, 1108 const uint8_t *src, int src_rowstride, 1109 uint8_t endpoints[][4]) 1110 { 1111 int luminance; 1112 int endpoint_luminances[2]; 1113 int endpoint; 1114 int index; 1115 int y, x; 1116 1117 for (endpoint = 0; endpoint < 2; endpoint++) { 1118 endpoint_luminances[endpoint] = 1119 endpoints[endpoint][0] + 1120 endpoints[endpoint][1] + 1121 endpoints[endpoint][2]; 1122 } 1123 1124 /* If the endpoints have the same luminance then we'll just use index 0 for 1125 * all of the texels */ 1126 if (endpoint_luminances[0] == endpoint_luminances[1]) { 1127 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0); 1128 return; 1129 } 1130 1131 for (y = 0; y < src_height; y++) { 1132 for (x = 0; x < src_width; x++) { 1133 luminance = src[0] + src[1] + src[2]; 1134 1135 index = ((luminance - endpoint_luminances[0]) * 3 / 1136 (endpoint_luminances[1] - endpoint_luminances[0])); 1137 if (index < 0) 1138 index = 0; 1139 else if (index > 3) 1140 index = 3; 1141 1142 assert(x != 0 || y != 0 || index < 2); 1143 1144 write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index); 1145 1146 src += 4; 1147 } 1148 1149 /* Pad the indices out to the block size */ 1150 if (src_width < BLOCK_SIZE) 1151 write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0); 1152 1153 src += src_rowstride - src_width * 4; 1154 } 1155 1156 /* Pad the indices out to the block size */ 1157 if (src_height < BLOCK_SIZE) 1158 write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0); 1159 } 1160 1161 static void 1162 write_alpha_indices_unorm(struct bit_writer *writer, 1163 int src_width, int src_height, 1164 const uint8_t *src, int src_rowstride, 1165 uint8_t endpoints[][4]) 1166 { 1167 int index; 1168 int y, x; 1169 1170 /* If the endpoints have the same alpha then we'll just use index 0 for 1171 * all of the texels */ 1172 if (endpoints[0][3] == endpoints[1][3]) { 1173 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0); 1174 return; 1175 } 1176 1177 for (y = 0; y < src_height; y++) { 1178 for (x = 0; x < src_width; x++) { 1179 index = (((int) src[3] - (int) endpoints[0][3]) * 7 / 1180 ((int) endpoints[1][3] - endpoints[0][3])); 1181 if (index < 0) 1182 index = 0; 1183 else if (index > 7) 1184 index = 7; 1185 1186 assert(x != 0 || y != 0 || index < 4); 1187 1188 /* The first index has one less bit */ 1189 write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index); 1190 1191 src += 4; 1192 } 1193 1194 /* Pad the indices out to the block size */ 1195 if (src_width < BLOCK_SIZE) 1196 write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0); 1197 1198 src += src_rowstride - src_width * 4; 1199 } 1200 1201 /* Pad the indices out to the block size */ 1202 if (src_height < BLOCK_SIZE) 1203 write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0); 1204 } 1205 1206 static void 1207 compress_rgba_unorm_block(int src_width, int src_height, 1208 const uint8_t *src, int src_rowstride, 1209 uint8_t *dst) 1210 { 1211 int average_luminance, average_alpha; 1212 uint8_t endpoints[2][4]; 1213 struct bit_writer writer; 1214 int component, endpoint; 1215 1216 get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride, 1217 &average_luminance, &average_alpha); 1218 get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride, 1219 average_luminance, average_alpha, 1220 endpoints); 1221 1222 writer.dst = dst; 1223 writer.pos = 0; 1224 writer.buf = 0; 1225 1226 write_bits(&writer, 5, 0x10); /* mode 4 */ 1227 write_bits(&writer, 2, 0); /* rotation 0 */ 1228 write_bits(&writer, 1, 0); /* index selection bit */ 1229 1230 /* Write the color endpoints */ 1231 for (component = 0; component < 3; component++) 1232 for (endpoint = 0; endpoint < 2; endpoint++) 1233 write_bits(&writer, 5, endpoints[endpoint][component] >> 3); 1234 1235 /* Write the alpha endpoints */ 1236 for (endpoint = 0; endpoint < 2; endpoint++) 1237 write_bits(&writer, 6, endpoints[endpoint][3] >> 2); 1238 1239 write_rgb_indices_unorm(&writer, 1240 src_width, src_height, 1241 src, src_rowstride, 1242 endpoints); 1243 write_alpha_indices_unorm(&writer, 1244 src_width, src_height, 1245 src, src_rowstride, 1246 endpoints); 1247 } 1248 1249 static void 1250 compress_rgba_unorm(int width, int height, 1251 const uint8_t *src, int src_rowstride, 1252 uint8_t *dst, int dst_rowstride) 1253 { 1254 int dst_row_diff; 1255 int y, x; 1256 1257 if (dst_rowstride >= width * 4) 1258 dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4; 1259 else 1260 dst_row_diff = 0; 1261 1262 for (y = 0; y < height; y += BLOCK_SIZE) { 1263 for (x = 0; x < width; x += BLOCK_SIZE) { 1264 compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE), 1265 MIN2(height - y, BLOCK_SIZE), 1266 src + x * 4 + y * src_rowstride, 1267 src_rowstride, 1268 dst); 1269 dst += BLOCK_BYTES; 1270 } 1271 dst += dst_row_diff; 1272 } 1273 } 1274 1275 GLboolean 1276 _mesa_texstore_bptc_rgba_unorm(TEXSTORE_PARAMS) 1277 { 1278 const GLubyte *pixels; 1279 const GLubyte *tempImage = NULL; 1280 int rowstride; 1281 1282 if (srcFormat != GL_RGBA || 1283 srcType != GL_UNSIGNED_BYTE || 1284 ctx->_ImageTransferState || 1285 srcPacking->SwapBytes) { 1286 /* convert image to RGBA/ubyte */ 1287 GLubyte *tempImageSlices[1]; 1288 int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte); 1289 tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte)); 1290 if (!tempImage) 1291 return GL_FALSE; /* out of memory */ 1292 tempImageSlices[0] = (GLubyte *) tempImage; 1293 _mesa_texstore(ctx, dims, 1294 baseInternalFormat, 1295 _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM 1296 : MESA_FORMAT_A8B8G8R8_UNORM, 1297 rgbaRowStride, tempImageSlices, 1298 srcWidth, srcHeight, srcDepth, 1299 srcFormat, srcType, srcAddr, 1300 srcPacking); 1301 1302 pixels = tempImage; 1303 rowstride = srcWidth * 4; 1304 } else { 1305 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight, 1306 srcFormat, srcType, 0, 0); 1307 rowstride = _mesa_image_row_stride(srcPacking, srcWidth, 1308 srcFormat, srcType); 1309 } 1310 1311 compress_rgba_unorm(srcWidth, srcHeight, 1312 pixels, rowstride, 1313 dstSlices[0], dstRowStride); 1314 1315 free((void *) tempImage); 1316 1317 return GL_TRUE; 1318 } 1319 1320 static float 1321 get_average_luminance_float(int width, int height, 1322 const float *src, int src_rowstride) 1323 { 1324 float luminance_sum = 0; 1325 int y, x; 1326 1327 for (y = 0; y < height; y++) { 1328 for (x = 0; x < width; x++) { 1329 luminance_sum += src[0] + src[1] + src[2]; 1330 src += 3; 1331 } 1332 src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float); 1333 } 1334 1335 return luminance_sum / (width * height); 1336 } 1337 1338 static float 1339 clamp_value(float value, bool is_signed) 1340 { 1341 if (value > 65504.0f) 1342 return 65504.0f; 1343 1344 if (is_signed) { 1345 if (value < -65504.0f) 1346 return -65504.0f; 1347 else 1348 return value; 1349 } 1350 1351 if (value < 0.0f) 1352 return 0.0f; 1353 1354 return value; 1355 } 1356 1357 static void 1358 get_endpoints_float(int width, int height, 1359 const float *src, int src_rowstride, 1360 float average_luminance, float endpoints[][3], 1361 bool is_signed) 1362 { 1363 float endpoint_luminances[2]; 1364 float midpoint; 1365 float sums[2][3]; 1366 int endpoint, component; 1367 float luminance; 1368 float temp[3]; 1369 const float *p = src; 1370 int left_endpoint_count = 0; 1371 int y, x, i; 1372 1373 memset(sums, 0, sizeof sums); 1374 1375 for (y = 0; y < height; y++) { 1376 for (x = 0; x < width; x++) { 1377 luminance = p[0] + p[1] + p[2]; 1378 if (luminance < average_luminance) { 1379 endpoint = 0; 1380 left_endpoint_count++; 1381 } else { 1382 endpoint = 1; 1383 } 1384 for (i = 0; i < 3; i++) 1385 sums[endpoint][i] += p[i]; 1386 1387 p += 3; 1388 } 1389 1390 p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float); 1391 } 1392 1393 if (left_endpoint_count == 0 || 1394 left_endpoint_count == width * height) { 1395 for (i = 0; i < 3; i++) 1396 endpoints[0][i] = endpoints[1][i] = 1397 (sums[0][i] + sums[1][i]) / (width * height); 1398 } else { 1399 for (i = 0; i < 3; i++) { 1400 endpoints[0][i] = sums[0][i] / left_endpoint_count; 1401 endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count); 1402 } 1403 } 1404 1405 /* Clamp the endpoints to the range of a half float and strip out 1406 * infinities */ 1407 for (endpoint = 0; endpoint < 2; endpoint++) { 1408 for (component = 0; component < 3; component++) { 1409 endpoints[endpoint][component] = 1410 clamp_value(endpoints[endpoint][component], is_signed); 1411 } 1412 } 1413 1414 /* We may need to swap the endpoints to ensure the most-significant bit of 1415 * the first index is zero */ 1416 1417 for (endpoint = 0; endpoint < 2; endpoint++) { 1418 endpoint_luminances[endpoint] = 1419 endpoints[endpoint][0] + 1420 endpoints[endpoint][1] + 1421 endpoints[endpoint][2]; 1422 } 1423 midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f; 1424 1425 if ((src[0] + src[1] + src[2] <= midpoint) != 1426 (endpoint_luminances[0] <= midpoint)) { 1427 memcpy(temp, endpoints[0], sizeof temp); 1428 memcpy(endpoints[0], endpoints[1], sizeof temp); 1429 memcpy(endpoints[1], temp, sizeof temp); 1430 } 1431 } 1432 1433 static void 1434 write_rgb_indices_float(struct bit_writer *writer, 1435 int src_width, int src_height, 1436 const float *src, int src_rowstride, 1437 float endpoints[][3]) 1438 { 1439 float luminance; 1440 float endpoint_luminances[2]; 1441 int endpoint; 1442 int index; 1443 int y, x; 1444 1445 for (endpoint = 0; endpoint < 2; endpoint++) { 1446 endpoint_luminances[endpoint] = 1447 endpoints[endpoint][0] + 1448 endpoints[endpoint][1] + 1449 endpoints[endpoint][2]; 1450 } 1451 1452 /* If the endpoints have the same luminance then we'll just use index 0 for 1453 * all of the texels */ 1454 if (endpoint_luminances[0] == endpoint_luminances[1]) { 1455 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0); 1456 return; 1457 } 1458 1459 for (y = 0; y < src_height; y++) { 1460 for (x = 0; x < src_width; x++) { 1461 luminance = src[0] + src[1] + src[2]; 1462 1463 index = ((luminance - endpoint_luminances[0]) * 15 / 1464 (endpoint_luminances[1] - endpoint_luminances[0])); 1465 if (index < 0) 1466 index = 0; 1467 else if (index > 15) 1468 index = 15; 1469 1470 assert(x != 0 || y != 0 || index < 8); 1471 1472 write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index); 1473 1474 src += 3; 1475 } 1476 1477 /* Pad the indices out to the block size */ 1478 if (src_width < BLOCK_SIZE) 1479 write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0); 1480 1481 src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float); 1482 } 1483 1484 /* Pad the indices out to the block size */ 1485 if (src_height < BLOCK_SIZE) 1486 write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0); 1487 } 1488 1489 static int 1490 get_endpoint_value(float value, bool is_signed) 1491 { 1492 bool sign = false; 1493 int half; 1494 1495 if (is_signed) { 1496 half = _mesa_float_to_half(value); 1497 1498 if (half & 0x8000) { 1499 half &= 0x7fff; 1500 sign = true; 1501 } 1502 1503 half = (32 * half / 31) >> 6; 1504 1505 if (sign) 1506 half = -half & ((1 << 10) - 1); 1507 1508 return half; 1509 } else { 1510 if (value <= 0.0f) 1511 return 0; 1512 1513 half = _mesa_float_to_half(value); 1514 1515 return (64 * half / 31) >> 6; 1516 } 1517 } 1518 1519 static void 1520 compress_rgb_float_block(int src_width, int src_height, 1521 const float *src, int src_rowstride, 1522 uint8_t *dst, 1523 bool is_signed) 1524 { 1525 float average_luminance; 1526 float endpoints[2][3]; 1527 struct bit_writer writer; 1528 int component, endpoint; 1529 int endpoint_value; 1530 1531 average_luminance = 1532 get_average_luminance_float(src_width, src_height, src, src_rowstride); 1533 get_endpoints_float(src_width, src_height, src, src_rowstride, 1534 average_luminance, endpoints, is_signed); 1535 1536 writer.dst = dst; 1537 writer.pos = 0; 1538 writer.buf = 0; 1539 1540 write_bits(&writer, 5, 3); /* mode 3 */ 1541 1542 /* Write the endpoints */ 1543 for (endpoint = 0; endpoint < 2; endpoint++) { 1544 for (component = 0; component < 3; component++) { 1545 endpoint_value = 1546 get_endpoint_value(endpoints[endpoint][component], is_signed); 1547 write_bits(&writer, 10, endpoint_value); 1548 } 1549 } 1550 1551 write_rgb_indices_float(&writer, 1552 src_width, src_height, 1553 src, src_rowstride, 1554 endpoints); 1555 } 1556 1557 static void 1558 compress_rgb_float(int width, int height, 1559 const float *src, int src_rowstride, 1560 uint8_t *dst, int dst_rowstride, 1561 bool is_signed) 1562 { 1563 int dst_row_diff; 1564 int y, x; 1565 1566 if (dst_rowstride >= width * 4) 1567 dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4; 1568 else 1569 dst_row_diff = 0; 1570 1571 for (y = 0; y < height; y += BLOCK_SIZE) { 1572 for (x = 0; x < width; x += BLOCK_SIZE) { 1573 compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE), 1574 MIN2(height - y, BLOCK_SIZE), 1575 src + x * 3 + 1576 y * src_rowstride / sizeof (float), 1577 src_rowstride, 1578 dst, 1579 is_signed); 1580 dst += BLOCK_BYTES; 1581 } 1582 dst += dst_row_diff; 1583 } 1584 } 1585 1586 static GLboolean 1587 texstore_bptc_rgb_float(TEXSTORE_PARAMS, 1588 bool is_signed) 1589 { 1590 const float *pixels; 1591 const float *tempImage = NULL; 1592 int rowstride; 1593 1594 if (srcFormat != GL_RGB || 1595 srcType != GL_FLOAT || 1596 ctx->_ImageTransferState || 1597 srcPacking->SwapBytes) { 1598 /* convert image to RGB/float */ 1599 GLfloat *tempImageSlices[1]; 1600 int rgbRowStride = 3 * srcWidth * sizeof(GLfloat); 1601 tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLfloat)); 1602 if (!tempImage) 1603 return GL_FALSE; /* out of memory */ 1604 tempImageSlices[0] = (GLfloat *) tempImage; 1605 _mesa_texstore(ctx, dims, 1606 baseInternalFormat, 1607 MESA_FORMAT_RGB_FLOAT32, 1608 rgbRowStride, (GLubyte **)tempImageSlices, 1609 srcWidth, srcHeight, srcDepth, 1610 srcFormat, srcType, srcAddr, 1611 srcPacking); 1612 1613 pixels = tempImage; 1614 rowstride = srcWidth * sizeof(float) * 3; 1615 } else { 1616 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight, 1617 srcFormat, srcType, 0, 0); 1618 rowstride = _mesa_image_row_stride(srcPacking, srcWidth, 1619 srcFormat, srcType); 1620 } 1621 1622 compress_rgb_float(srcWidth, srcHeight, 1623 pixels, rowstride, 1624 dstSlices[0], dstRowStride, 1625 is_signed); 1626 1627 free((void *) tempImage); 1628 1629 return GL_TRUE; 1630 } 1631 1632 GLboolean 1633 _mesa_texstore_bptc_rgb_signed_float(TEXSTORE_PARAMS) 1634 { 1635 assert(dstFormat == MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT); 1636 1637 return texstore_bptc_rgb_float(ctx, dims, baseInternalFormat, 1638 dstFormat, dstRowStride, dstSlices, 1639 srcWidth, srcHeight, srcDepth, 1640 srcFormat, srcType, 1641 srcAddr, srcPacking, 1642 true /* signed */); 1643 } 1644 1645 GLboolean 1646 _mesa_texstore_bptc_rgb_unsigned_float(TEXSTORE_PARAMS) 1647 { 1648 assert(dstFormat == MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT); 1649 1650 return texstore_bptc_rgb_float(ctx, dims, baseInternalFormat, 1651 dstFormat, dstRowStride, dstSlices, 1652 srcWidth, srcHeight, srcDepth, 1653 srcFormat, srcType, 1654 srcAddr, srcPacking, 1655 false /* unsigned */); 1656 } 1657