1 2 import re 3 4 type_split_re = re.compile(r'(?P<type>[a-z]+)(?P<bits>\d+)') 5 6 def type_has_size(type_): 7 return type_[-1:].isdigit() 8 9 def type_size(type_): 10 assert type_has_size(type_) 11 return int(type_split_re.match(type_).group('bits')) 12 13 def type_sizes(type_): 14 if type_has_size(type_): 15 return [type_size(type_)] 16 elif type_ == 'float': 17 return [16, 32, 64] 18 else: 19 return [8, 16, 32, 64] 20 21 def type_add_size(type_, size): 22 if type_has_size(type_): 23 return type_ 24 return type_ + str(size) 25 26 def op_bit_sizes(op): 27 sizes = None 28 if not type_has_size(op.output_type): 29 sizes = set(type_sizes(op.output_type)) 30 31 for input_type in op.input_types: 32 if not type_has_size(input_type): 33 if sizes is None: 34 sizes = set(type_sizes(input_type)) 35 else: 36 sizes = sizes.intersection(set(type_sizes(input_type))) 37 38 return sorted(list(sizes)) if sizes is not None else None 39 40 def get_const_field(type_): 41 if type_ == "bool32": 42 return "u32" 43 elif type_ == "float16": 44 return "u16" 45 else: 46 m = type_split_re.match(type_) 47 if not m: 48 raise Exception(str(type_)) 49 return m.group('type')[0] + m.group('bits') 50 51 template = """\ 52 /* 53 * Copyright (C) 2014 Intel Corporation 54 * 55 * Permission is hereby granted, free of charge, to any person obtaining a 56 * copy of this software and associated documentation files (the "Software"), 57 * to deal in the Software without restriction, including without limitation 58 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 59 * and/or sell copies of the Software, and to permit persons to whom the 60 * Software is furnished to do so, subject to the following conditions: 61 * 62 * The above copyright notice and this permission notice (including the next 63 * paragraph) shall be included in all copies or substantial portions of the 64 * Software. 65 * 66 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 67 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 68 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 69 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 70 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 71 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 72 * IN THE SOFTWARE. 73 * 74 * Authors: 75 * Jason Ekstrand (jason (at] jlekstrand.net) 76 */ 77 78 #include <math.h> 79 #include "main/core.h" 80 #include "util/rounding.h" /* for _mesa_roundeven */ 81 #include "util/half_float.h" 82 #include "nir_constant_expressions.h" 83 84 /** 85 * Evaluate one component of packSnorm4x8. 86 */ 87 static uint8_t 88 pack_snorm_1x8(float x) 89 { 90 /* From section 8.4 of the GLSL 4.30 spec: 91 * 92 * packSnorm4x8 93 * ------------ 94 * The conversion for component c of v to fixed point is done as 95 * follows: 96 * 97 * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) 98 * 99 * We must first cast the float to an int, because casting a negative 100 * float to a uint is undefined. 101 */ 102 return (uint8_t) (int) 103 _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f); 104 } 105 106 /** 107 * Evaluate one component of packSnorm2x16. 108 */ 109 static uint16_t 110 pack_snorm_1x16(float x) 111 { 112 /* From section 8.4 of the GLSL ES 3.00 spec: 113 * 114 * packSnorm2x16 115 * ------------- 116 * The conversion for component c of v to fixed point is done as 117 * follows: 118 * 119 * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) 120 * 121 * We must first cast the float to an int, because casting a negative 122 * float to a uint is undefined. 123 */ 124 return (uint16_t) (int) 125 _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f); 126 } 127 128 /** 129 * Evaluate one component of unpackSnorm4x8. 130 */ 131 static float 132 unpack_snorm_1x8(uint8_t u) 133 { 134 /* From section 8.4 of the GLSL 4.30 spec: 135 * 136 * unpackSnorm4x8 137 * -------------- 138 * The conversion for unpacked fixed-point value f to floating point is 139 * done as follows: 140 * 141 * unpackSnorm4x8: clamp(f / 127.0, -1, +1) 142 */ 143 return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f); 144 } 145 146 /** 147 * Evaluate one component of unpackSnorm2x16. 148 */ 149 static float 150 unpack_snorm_1x16(uint16_t u) 151 { 152 /* From section 8.4 of the GLSL ES 3.00 spec: 153 * 154 * unpackSnorm2x16 155 * --------------- 156 * The conversion for unpacked fixed-point value f to floating point is 157 * done as follows: 158 * 159 * unpackSnorm2x16: clamp(f / 32767.0, -1, +1) 160 */ 161 return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f); 162 } 163 164 /** 165 * Evaluate one component packUnorm4x8. 166 */ 167 static uint8_t 168 pack_unorm_1x8(float x) 169 { 170 /* From section 8.4 of the GLSL 4.30 spec: 171 * 172 * packUnorm4x8 173 * ------------ 174 * The conversion for component c of v to fixed point is done as 175 * follows: 176 * 177 * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) 178 */ 179 return (uint8_t) (int) 180 _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f); 181 } 182 183 /** 184 * Evaluate one component packUnorm2x16. 185 */ 186 static uint16_t 187 pack_unorm_1x16(float x) 188 { 189 /* From section 8.4 of the GLSL ES 3.00 spec: 190 * 191 * packUnorm2x16 192 * ------------- 193 * The conversion for component c of v to fixed point is done as 194 * follows: 195 * 196 * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) 197 */ 198 return (uint16_t) (int) 199 _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f); 200 } 201 202 /** 203 * Evaluate one component of unpackUnorm4x8. 204 */ 205 static float 206 unpack_unorm_1x8(uint8_t u) 207 { 208 /* From section 8.4 of the GLSL 4.30 spec: 209 * 210 * unpackUnorm4x8 211 * -------------- 212 * The conversion for unpacked fixed-point value f to floating point is 213 * done as follows: 214 * 215 * unpackUnorm4x8: f / 255.0 216 */ 217 return (float) u / 255.0f; 218 } 219 220 /** 221 * Evaluate one component of unpackUnorm2x16. 222 */ 223 static float 224 unpack_unorm_1x16(uint16_t u) 225 { 226 /* From section 8.4 of the GLSL ES 3.00 spec: 227 * 228 * unpackUnorm2x16 229 * --------------- 230 * The conversion for unpacked fixed-point value f to floating point is 231 * done as follows: 232 * 233 * unpackUnorm2x16: f / 65535.0 234 */ 235 return (float) u / 65535.0f; 236 } 237 238 /** 239 * Evaluate one component of packHalf2x16. 240 */ 241 static uint16_t 242 pack_half_1x16(float x) 243 { 244 return _mesa_float_to_half(x); 245 } 246 247 /** 248 * Evaluate one component of unpackHalf2x16. 249 */ 250 static float 251 unpack_half_1x16(uint16_t u) 252 { 253 return _mesa_half_to_float(u); 254 } 255 256 /* Some typed vector structures to make things like src0.y work */ 257 typedef float float16_t; 258 typedef float float32_t; 259 typedef double float64_t; 260 typedef bool bool32_t; 261 % for type in ["float", "int", "uint"]: 262 % for width in type_sizes(type): 263 struct ${type}${width}_vec { 264 ${type}${width}_t x; 265 ${type}${width}_t y; 266 ${type}${width}_t z; 267 ${type}${width}_t w; 268 }; 269 % endfor 270 % endfor 271 272 struct bool32_vec { 273 bool x; 274 bool y; 275 bool z; 276 bool w; 277 }; 278 279 <%def name="evaluate_op(op, bit_size)"> 280 <% 281 output_type = type_add_size(op.output_type, bit_size) 282 input_types = [type_add_size(type_, bit_size) for type_ in op.input_types] 283 %> 284 285 ## For each non-per-component input, create a variable srcN that 286 ## contains x, y, z, and w elements which are filled in with the 287 ## appropriately-typed values. 288 % for j in range(op.num_inputs): 289 % if op.input_sizes[j] == 0: 290 <% continue %> 291 % elif "src" + str(j) not in op.const_expr: 292 ## Avoid unused variable warnings 293 <% continue %> 294 %endif 295 296 const struct ${input_types[j]}_vec src${j} = { 297 % for k in range(op.input_sizes[j]): 298 % if input_types[j] == "bool32": 299 _src[${j}].u32[${k}] != 0, 300 % elif input_types[j] == "float16": 301 _mesa_half_to_float(_src[${j}].u16[${k}]), 302 % else: 303 _src[${j}].${get_const_field(input_types[j])}[${k}], 304 % endif 305 % endfor 306 % for k in range(op.input_sizes[j], 4): 307 0, 308 % endfor 309 }; 310 % endfor 311 312 % if op.output_size == 0: 313 ## For per-component instructions, we need to iterate over the 314 ## components and apply the constant expression one component 315 ## at a time. 316 for (unsigned _i = 0; _i < num_components; _i++) { 317 ## For each per-component input, create a variable srcN that 318 ## contains the value of the current (_i'th) component. 319 % for j in range(op.num_inputs): 320 % if op.input_sizes[j] != 0: 321 <% continue %> 322 % elif "src" + str(j) not in op.const_expr: 323 ## Avoid unused variable warnings 324 <% continue %> 325 % elif input_types[j] == "bool32": 326 const bool src${j} = _src[${j}].u32[_i] != 0; 327 % elif input_types[j] == "float16": 328 const float src${j} = 329 _mesa_half_to_float(_src[${j}].u16[_i]); 330 % else: 331 const ${input_types[j]}_t src${j} = 332 _src[${j}].${get_const_field(input_types[j])}[_i]; 333 % endif 334 % endfor 335 336 ## Create an appropriately-typed variable dst and assign the 337 ## result of the const_expr to it. If const_expr already contains 338 ## writes to dst, just include const_expr directly. 339 % if "dst" in op.const_expr: 340 ${output_type}_t dst; 341 342 ${op.const_expr} 343 % else: 344 ${output_type}_t dst = ${op.const_expr}; 345 % endif 346 347 ## Store the current component of the actual destination to the 348 ## value of dst. 349 % if output_type == "bool32": 350 ## Sanitize the C value to a proper NIR bool 351 _dst_val.u32[_i] = dst ? NIR_TRUE : NIR_FALSE; 352 % elif output_type == "float16": 353 _dst_val.u16[_i] = _mesa_float_to_half(dst); 354 % else: 355 _dst_val.${get_const_field(output_type)}[_i] = dst; 356 % endif 357 } 358 % else: 359 ## In the non-per-component case, create a struct dst with 360 ## appropriately-typed elements x, y, z, and w and assign the result 361 ## of the const_expr to all components of dst, or include the 362 ## const_expr directly if it writes to dst already. 363 struct ${output_type}_vec dst; 364 365 % if "dst" in op.const_expr: 366 ${op.const_expr} 367 % else: 368 ## Splat the value to all components. This way expressions which 369 ## write the same value to all components don't need to explicitly 370 ## write to dest. One such example is fnoise which has a 371 ## const_expr of 0.0f. 372 dst.x = dst.y = dst.z = dst.w = ${op.const_expr}; 373 % endif 374 375 ## For each component in the destination, copy the value of dst to 376 ## the actual destination. 377 % for k in range(op.output_size): 378 % if output_type == "bool32": 379 ## Sanitize the C value to a proper NIR bool 380 _dst_val.u32[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE; 381 % elif output_type == "float16": 382 _dst_val.u16[${k}] = _mesa_float_to_half(dst.${"xyzw"[k]}); 383 % else: 384 _dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]}; 385 % endif 386 % endfor 387 % endif 388 </%def> 389 390 % for name, op in sorted(opcodes.iteritems()): 391 static nir_const_value 392 evaluate_${name}(MAYBE_UNUSED unsigned num_components, unsigned bit_size, 393 MAYBE_UNUSED nir_const_value *_src) 394 { 395 nir_const_value _dst_val = { {0, } }; 396 397 % if op_bit_sizes(op) is not None: 398 switch (bit_size) { 399 % for bit_size in op_bit_sizes(op): 400 case ${bit_size}: { 401 ${evaluate_op(op, bit_size)} 402 break; 403 } 404 % endfor 405 406 default: 407 unreachable("unknown bit width"); 408 } 409 % else: 410 ${evaluate_op(op, 0)} 411 % endif 412 413 return _dst_val; 414 } 415 % endfor 416 417 nir_const_value 418 nir_eval_const_opcode(nir_op op, unsigned num_components, 419 unsigned bit_width, nir_const_value *src) 420 { 421 switch (op) { 422 % for name in sorted(opcodes.iterkeys()): 423 case nir_op_${name}: 424 return evaluate_${name}(num_components, bit_width, src); 425 % endfor 426 default: 427 unreachable("shouldn't get here"); 428 } 429 }""" 430 431 from nir_opcodes import opcodes 432 from mako.template import Template 433 434 print Template(template).render(opcodes=opcodes, type_sizes=type_sizes, 435 type_has_size=type_has_size, 436 type_add_size=type_add_size, 437 op_bit_sizes=op_bit_sizes, 438 get_const_field=get_const_field) 439