1 #! /usr/bin/python2 2 3 def type_has_size(type_): 4 return type_[-1:].isdigit() 5 6 def type_sizes(type_): 7 if type_.endswith("8"): 8 return [8] 9 elif type_.endswith("16"): 10 return [16] 11 elif type_.endswith("32"): 12 return [32] 13 elif type_.endswith("64"): 14 return [64] 15 else: 16 return [32, 64] 17 18 def type_add_size(type_, size): 19 if type_has_size(type_): 20 return type_ 21 return type_ + str(size) 22 23 def get_const_field(type_): 24 if type_ == "int32": 25 return "i32" 26 if type_ == "uint32": 27 return "u32" 28 if type_ == "int64": 29 return "i64" 30 if type_ == "uint64": 31 return "u64" 32 if type_ == "bool32": 33 return "u32" 34 if type_ == "float32": 35 return "f32" 36 if type_ == "float64": 37 return "f64" 38 raise Exception(str(type_)) 39 assert(0) 40 41 template = """\ 42 /* 43 * Copyright (C) 2014 Intel Corporation 44 * 45 * Permission is hereby granted, free of charge, to any person obtaining a 46 * copy of this software and associated documentation files (the "Software"), 47 * to deal in the Software without restriction, including without limitation 48 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 49 * and/or sell copies of the Software, and to permit persons to whom the 50 * Software is furnished to do so, subject to the following conditions: 51 * 52 * The above copyright notice and this permission notice (including the next 53 * paragraph) shall be included in all copies or substantial portions of the 54 * Software. 55 * 56 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 57 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 58 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 59 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 60 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 61 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 62 * IN THE SOFTWARE. 63 * 64 * Authors: 65 * Jason Ekstrand (jason (at] jlekstrand.net) 66 */ 67 68 #include <math.h> 69 #include "main/core.h" 70 #include "util/rounding.h" /* for _mesa_roundeven */ 71 #include "util/half_float.h" 72 #include "nir_constant_expressions.h" 73 74 /** 75 * Evaluate one component of packSnorm4x8. 76 */ 77 static uint8_t 78 pack_snorm_1x8(float x) 79 { 80 /* From section 8.4 of the GLSL 4.30 spec: 81 * 82 * packSnorm4x8 83 * ------------ 84 * The conversion for component c of v to fixed point is done as 85 * follows: 86 * 87 * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) 88 * 89 * We must first cast the float to an int, because casting a negative 90 * float to a uint is undefined. 91 */ 92 return (uint8_t) (int) 93 _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f); 94 } 95 96 /** 97 * Evaluate one component of packSnorm2x16. 98 */ 99 static uint16_t 100 pack_snorm_1x16(float x) 101 { 102 /* From section 8.4 of the GLSL ES 3.00 spec: 103 * 104 * packSnorm2x16 105 * ------------- 106 * The conversion for component c of v to fixed point is done as 107 * follows: 108 * 109 * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) 110 * 111 * We must first cast the float to an int, because casting a negative 112 * float to a uint is undefined. 113 */ 114 return (uint16_t) (int) 115 _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f); 116 } 117 118 /** 119 * Evaluate one component of unpackSnorm4x8. 120 */ 121 static float 122 unpack_snorm_1x8(uint8_t u) 123 { 124 /* From section 8.4 of the GLSL 4.30 spec: 125 * 126 * unpackSnorm4x8 127 * -------------- 128 * The conversion for unpacked fixed-point value f to floating point is 129 * done as follows: 130 * 131 * unpackSnorm4x8: clamp(f / 127.0, -1, +1) 132 */ 133 return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f); 134 } 135 136 /** 137 * Evaluate one component of unpackSnorm2x16. 138 */ 139 static float 140 unpack_snorm_1x16(uint16_t u) 141 { 142 /* From section 8.4 of the GLSL ES 3.00 spec: 143 * 144 * unpackSnorm2x16 145 * --------------- 146 * The conversion for unpacked fixed-point value f to floating point is 147 * done as follows: 148 * 149 * unpackSnorm2x16: clamp(f / 32767.0, -1, +1) 150 */ 151 return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f); 152 } 153 154 /** 155 * Evaluate one component packUnorm4x8. 156 */ 157 static uint8_t 158 pack_unorm_1x8(float x) 159 { 160 /* From section 8.4 of the GLSL 4.30 spec: 161 * 162 * packUnorm4x8 163 * ------------ 164 * The conversion for component c of v to fixed point is done as 165 * follows: 166 * 167 * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) 168 */ 169 return (uint8_t) (int) 170 _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f); 171 } 172 173 /** 174 * Evaluate one component packUnorm2x16. 175 */ 176 static uint16_t 177 pack_unorm_1x16(float x) 178 { 179 /* From section 8.4 of the GLSL ES 3.00 spec: 180 * 181 * packUnorm2x16 182 * ------------- 183 * The conversion for component c of v to fixed point is done as 184 * follows: 185 * 186 * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) 187 */ 188 return (uint16_t) (int) 189 _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f); 190 } 191 192 /** 193 * Evaluate one component of unpackUnorm4x8. 194 */ 195 static float 196 unpack_unorm_1x8(uint8_t u) 197 { 198 /* From section 8.4 of the GLSL 4.30 spec: 199 * 200 * unpackUnorm4x8 201 * -------------- 202 * The conversion for unpacked fixed-point value f to floating point is 203 * done as follows: 204 * 205 * unpackUnorm4x8: f / 255.0 206 */ 207 return (float) u / 255.0f; 208 } 209 210 /** 211 * Evaluate one component of unpackUnorm2x16. 212 */ 213 static float 214 unpack_unorm_1x16(uint16_t u) 215 { 216 /* From section 8.4 of the GLSL ES 3.00 spec: 217 * 218 * unpackUnorm2x16 219 * --------------- 220 * The conversion for unpacked fixed-point value f to floating point is 221 * done as follows: 222 * 223 * unpackUnorm2x16: f / 65535.0 224 */ 225 return (float) u / 65535.0f; 226 } 227 228 /** 229 * Evaluate one component of packHalf2x16. 230 */ 231 static uint16_t 232 pack_half_1x16(float x) 233 { 234 return _mesa_float_to_half(x); 235 } 236 237 /** 238 * Evaluate one component of unpackHalf2x16. 239 */ 240 static float 241 unpack_half_1x16(uint16_t u) 242 { 243 return _mesa_half_to_float(u); 244 } 245 246 /* Some typed vector structures to make things like src0.y work */ 247 typedef float float32_t; 248 typedef double float64_t; 249 typedef bool bool32_t; 250 % for type in ["float", "int", "uint"]: 251 % for width in [32, 64]: 252 struct ${type}${width}_vec { 253 ${type}${width}_t x; 254 ${type}${width}_t y; 255 ${type}${width}_t z; 256 ${type}${width}_t w; 257 }; 258 % endfor 259 % endfor 260 261 struct bool32_vec { 262 bool x; 263 bool y; 264 bool z; 265 bool w; 266 }; 267 268 % for name, op in sorted(opcodes.iteritems()): 269 static nir_const_value 270 evaluate_${name}(MAYBE_UNUSED unsigned num_components, unsigned bit_size, 271 MAYBE_UNUSED nir_const_value *_src) 272 { 273 nir_const_value _dst_val = { {0, } }; 274 275 switch (bit_size) { 276 % for bit_size in [32, 64]: 277 case ${bit_size}: { 278 <% 279 output_type = type_add_size(op.output_type, bit_size) 280 input_types = [type_add_size(type_, bit_size) for type_ in op.input_types] 281 %> 282 283 ## For each non-per-component input, create a variable srcN that 284 ## contains x, y, z, and w elements which are filled in with the 285 ## appropriately-typed values. 286 % for j in range(op.num_inputs): 287 % if op.input_sizes[j] == 0: 288 <% continue %> 289 % elif "src" + str(j) not in op.const_expr: 290 ## Avoid unused variable warnings 291 <% continue %> 292 %endif 293 294 const struct ${input_types[j]}_vec src${j} = { 295 % for k in range(op.input_sizes[j]): 296 % if input_types[j] == "bool32": 297 _src[${j}].u32[${k}] != 0, 298 % else: 299 _src[${j}].${get_const_field(input_types[j])}[${k}], 300 % endif 301 % endfor 302 % for k in range(op.input_sizes[j], 4): 303 0, 304 % endfor 305 }; 306 % endfor 307 308 % if op.output_size == 0: 309 ## For per-component instructions, we need to iterate over the 310 ## components and apply the constant expression one component 311 ## at a time. 312 for (unsigned _i = 0; _i < num_components; _i++) { 313 ## For each per-component input, create a variable srcN that 314 ## contains the value of the current (_i'th) component. 315 % for j in range(op.num_inputs): 316 % if op.input_sizes[j] != 0: 317 <% continue %> 318 % elif "src" + str(j) not in op.const_expr: 319 ## Avoid unused variable warnings 320 <% continue %> 321 % elif input_types[j] == "bool32": 322 const bool src${j} = _src[${j}].u32[_i] != 0; 323 % else: 324 const ${input_types[j]}_t src${j} = 325 _src[${j}].${get_const_field(input_types[j])}[_i]; 326 % endif 327 % endfor 328 329 ## Create an appropriately-typed variable dst and assign the 330 ## result of the const_expr to it. If const_expr already contains 331 ## writes to dst, just include const_expr directly. 332 % if "dst" in op.const_expr: 333 ${output_type}_t dst; 334 335 ${op.const_expr} 336 % else: 337 ${output_type}_t dst = ${op.const_expr}; 338 % endif 339 340 ## Store the current component of the actual destination to the 341 ## value of dst. 342 % if output_type == "bool32": 343 ## Sanitize the C value to a proper NIR bool 344 _dst_val.u32[_i] = dst ? NIR_TRUE : NIR_FALSE; 345 % else: 346 _dst_val.${get_const_field(output_type)}[_i] = dst; 347 % endif 348 } 349 % else: 350 ## In the non-per-component case, create a struct dst with 351 ## appropriately-typed elements x, y, z, and w and assign the result 352 ## of the const_expr to all components of dst, or include the 353 ## const_expr directly if it writes to dst already. 354 struct ${output_type}_vec dst; 355 356 % if "dst" in op.const_expr: 357 ${op.const_expr} 358 % else: 359 ## Splat the value to all components. This way expressions which 360 ## write the same value to all components don't need to explicitly 361 ## write to dest. One such example is fnoise which has a 362 ## const_expr of 0.0f. 363 dst.x = dst.y = dst.z = dst.w = ${op.const_expr}; 364 % endif 365 366 ## For each component in the destination, copy the value of dst to 367 ## the actual destination. 368 % for k in range(op.output_size): 369 % if output_type == "bool32": 370 ## Sanitize the C value to a proper NIR bool 371 _dst_val.u32[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE; 372 % else: 373 _dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]}; 374 % endif 375 % endfor 376 % endif 377 378 break; 379 } 380 % endfor 381 382 default: 383 unreachable("unknown bit width"); 384 } 385 386 return _dst_val; 387 } 388 % endfor 389 390 nir_const_value 391 nir_eval_const_opcode(nir_op op, unsigned num_components, 392 unsigned bit_width, nir_const_value *src) 393 { 394 switch (op) { 395 % for name in sorted(opcodes.iterkeys()): 396 case nir_op_${name}: 397 return evaluate_${name}(num_components, bit_width, src); 398 % endfor 399 default: 400 unreachable("shouldn't get here"); 401 } 402 }""" 403 404 from nir_opcodes import opcodes 405 from mako.template import Template 406 407 print Template(template).render(opcodes=opcodes, type_sizes=type_sizes, 408 type_has_size=type_has_size, 409 type_add_size=type_add_size, 410 get_const_field=get_const_field) 411