1 /**************************************************************************** 2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 ***************************************************************************/ 23 24 #include "swr_context.h" 25 #include "swr_public.h" 26 #include "swr_screen.h" 27 #include "swr_resource.h" 28 #include "swr_fence.h" 29 #include "gen_knobs.h" 30 31 #include "pipe/p_screen.h" 32 #include "pipe/p_defines.h" 33 #include "util/u_memory.h" 34 #include "util/u_format.h" 35 #include "util/u_inlines.h" 36 #include "util/u_cpu_detect.h" 37 #include "util/u_format_s3tc.h" 38 #include "util/u_string.h" 39 40 #include "state_tracker/sw_winsys.h" 41 42 #include "jit_api.h" 43 44 #include "memory/TilingFunctions.h" 45 46 #include <stdio.h> 47 #include <map> 48 49 /* MSVC case instensitive compare */ 50 #if defined(PIPE_CC_MSVC) 51 #define strcasecmp lstrcmpiA 52 #endif 53 54 /* 55 * Max texture sizes 56 * XXX Check max texture size values against core and sampler. 57 */ 58 #define SWR_MAX_TEXTURE_SIZE (4 * 1024 * 1024 * 1024ULL) /* 4GB */ 59 #define SWR_MAX_TEXTURE_2D_LEVELS 14 /* 8K x 8K for now */ 60 #define SWR_MAX_TEXTURE_3D_LEVELS 12 /* 2K x 2K x 2K for now */ 61 #define SWR_MAX_TEXTURE_CUBE_LEVELS 14 /* 8K x 8K for now */ 62 #define SWR_MAX_TEXTURE_ARRAY_LAYERS 512 /* 8K x 512 / 8K x 8K x 512 */ 63 64 static const char * 65 swr_get_name(struct pipe_screen *screen) 66 { 67 static char buf[100]; 68 util_snprintf(buf, sizeof(buf), "SWR (LLVM %u.%u, %u bits)", 69 HAVE_LLVM >> 8, HAVE_LLVM & 0xff, 70 lp_native_vector_width ); 71 return buf; 72 } 73 74 static const char * 75 swr_get_vendor(struct pipe_screen *screen) 76 { 77 return "Intel Corporation"; 78 } 79 80 static boolean 81 swr_is_format_supported(struct pipe_screen *screen, 82 enum pipe_format format, 83 enum pipe_texture_target target, 84 unsigned sample_count, 85 unsigned bind) 86 { 87 struct sw_winsys *winsys = swr_screen(screen)->winsys; 88 const struct util_format_description *format_desc; 89 90 assert(target == PIPE_BUFFER || target == PIPE_TEXTURE_1D 91 || target == PIPE_TEXTURE_1D_ARRAY 92 || target == PIPE_TEXTURE_2D 93 || target == PIPE_TEXTURE_2D_ARRAY 94 || target == PIPE_TEXTURE_RECT 95 || target == PIPE_TEXTURE_3D 96 || target == PIPE_TEXTURE_CUBE 97 || target == PIPE_TEXTURE_CUBE_ARRAY); 98 99 format_desc = util_format_description(format); 100 if (!format_desc) 101 return FALSE; 102 103 if (sample_count > 1) 104 return FALSE; 105 106 if (bind 107 & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) { 108 if (!winsys->is_displaytarget_format_supported(winsys, bind, format)) 109 return FALSE; 110 } 111 112 if (bind & PIPE_BIND_RENDER_TARGET) { 113 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) 114 return FALSE; 115 116 if (mesa_to_swr_format(format) == (SWR_FORMAT)-1) 117 return FALSE; 118 119 /* 120 * Although possible, it is unnatural to render into compressed or YUV 121 * surfaces. So disable these here to avoid going into weird paths 122 * inside the state trackers. 123 */ 124 if (format_desc->block.width != 1 || format_desc->block.height != 1) 125 return FALSE; 126 } 127 128 if (bind & PIPE_BIND_DEPTH_STENCIL) { 129 if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 130 return FALSE; 131 132 if (mesa_to_swr_format(format) == (SWR_FORMAT)-1) 133 return FALSE; 134 } 135 136 if (format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC || 137 format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) { 138 return FALSE; 139 } 140 141 if (format_desc->layout == UTIL_FORMAT_LAYOUT_ETC && 142 format != PIPE_FORMAT_ETC1_RGB8) { 143 return FALSE; 144 } 145 146 if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 147 return util_format_s3tc_enabled; 148 } 149 150 return TRUE; 151 } 152 153 static int 154 swr_get_param(struct pipe_screen *screen, enum pipe_cap param) 155 { 156 switch (param) { 157 /* limits */ 158 case PIPE_CAP_MAX_RENDER_TARGETS: 159 return PIPE_MAX_COLOR_BUFS; 160 case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: 161 return SWR_MAX_TEXTURE_2D_LEVELS; 162 case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: 163 return SWR_MAX_TEXTURE_3D_LEVELS; 164 case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: 165 return SWR_MAX_TEXTURE_CUBE_LEVELS; 166 case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: 167 return MAX_SO_STREAMS; 168 case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: 169 case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: 170 return MAX_ATTRIBUTES * 4; 171 case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: 172 case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: 173 return 1024; 174 case PIPE_CAP_MAX_VERTEX_STREAMS: 175 return 1; 176 case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: 177 return 2048; 178 case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: 179 return SWR_MAX_TEXTURE_ARRAY_LAYERS; 180 case PIPE_CAP_MIN_TEXEL_OFFSET: 181 return -8; 182 case PIPE_CAP_MAX_TEXEL_OFFSET: 183 return 7; 184 case PIPE_CAP_GLSL_FEATURE_LEVEL: 185 return 330; 186 case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: 187 return 16; 188 case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: 189 return 64; 190 case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: 191 return 65536; 192 case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: 193 return 0; 194 case PIPE_CAP_MAX_VIEWPORTS: 195 return 1; 196 case PIPE_CAP_ENDIANNESS: 197 return PIPE_ENDIAN_NATIVE; 198 case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: 199 case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET: 200 return 0; 201 202 /* supported features */ 203 case PIPE_CAP_NPOT_TEXTURES: 204 case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: 205 case PIPE_CAP_MIXED_COLOR_DEPTH_BITS: 206 case PIPE_CAP_TWO_SIDED_STENCIL: 207 case PIPE_CAP_SM3: 208 case PIPE_CAP_POINT_SPRITE: 209 case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: 210 case PIPE_CAP_OCCLUSION_QUERY: 211 case PIPE_CAP_QUERY_TIME_ELAPSED: 212 case PIPE_CAP_QUERY_PIPELINE_STATISTICS: 213 case PIPE_CAP_TEXTURE_MIRROR_CLAMP: 214 case PIPE_CAP_TEXTURE_SHADOW_MAP: 215 case PIPE_CAP_TEXTURE_SWIZZLE: 216 case PIPE_CAP_BLEND_EQUATION_SEPARATE: 217 case PIPE_CAP_INDEP_BLEND_ENABLE: 218 case PIPE_CAP_INDEP_BLEND_FUNC: 219 case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: 220 case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: 221 case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: 222 case PIPE_CAP_DEPTH_CLIP_DISABLE: 223 case PIPE_CAP_PRIMITIVE_RESTART: 224 case PIPE_CAP_TGSI_INSTANCEID: 225 case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: 226 case PIPE_CAP_START_INSTANCE: 227 case PIPE_CAP_SEAMLESS_CUBE_MAP: 228 case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: 229 case PIPE_CAP_CONDITIONAL_RENDER: 230 case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: 231 case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: 232 case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: 233 case PIPE_CAP_USER_VERTEX_BUFFERS: 234 case PIPE_CAP_USER_INDEX_BUFFERS: 235 case PIPE_CAP_USER_CONSTANT_BUFFERS: 236 case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS: 237 case PIPE_CAP_QUERY_TIMESTAMP: 238 case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: 239 case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: 240 case PIPE_CAP_FAKE_SW_MSAA: 241 case PIPE_CAP_DRAW_INDIRECT: 242 case PIPE_CAP_UMA: 243 case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: 244 case PIPE_CAP_CLIP_HALFZ: 245 case PIPE_CAP_POLYGON_OFFSET_CLAMP: 246 case PIPE_CAP_DEPTH_BOUNDS_TEST: 247 case PIPE_CAP_TEXTURE_FLOAT_LINEAR: 248 case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: 249 case PIPE_CAP_CULL_DISTANCE: 250 case PIPE_CAP_CUBE_MAP_ARRAY: 251 return 1; 252 253 /* unsupported features */ 254 case PIPE_CAP_ANISOTROPIC_FILTER: 255 case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: 256 case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: 257 case PIPE_CAP_SHADER_STENCIL_EXPORT: 258 case PIPE_CAP_TEXTURE_BARRIER: 259 case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: 260 case PIPE_CAP_VERTEX_COLOR_CLAMPED: 261 case PIPE_CAP_COMPUTE: 262 case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: 263 case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: 264 case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: 265 case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: 266 case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: 267 case PIPE_CAP_TEXTURE_MULTISAMPLE: 268 case PIPE_CAP_TGSI_TEXCOORD: 269 case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: 270 case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: 271 case PIPE_CAP_TEXTURE_GATHER_SM5: 272 case PIPE_CAP_TEXTURE_QUERY_LOD: 273 case PIPE_CAP_SAMPLE_SHADING: 274 case PIPE_CAP_TEXTURE_GATHER_OFFSETS: 275 case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: 276 case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: 277 case PIPE_CAP_SAMPLER_VIEW_TARGET: 278 case PIPE_CAP_VERTEXID_NOBASE: 279 case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: 280 case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: 281 case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: 282 case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: 283 case PIPE_CAP_TGSI_TXQS: 284 case PIPE_CAP_FORCE_PERSAMPLE_INTERP: 285 case PIPE_CAP_SHAREABLE_SHADERS: 286 case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: 287 case PIPE_CAP_CLEAR_TEXTURE: 288 case PIPE_CAP_DRAW_PARAMETERS: 289 case PIPE_CAP_TGSI_PACK_HALF_FLOAT: 290 case PIPE_CAP_MULTI_DRAW_INDIRECT: 291 case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: 292 case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: 293 case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: 294 case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: 295 case PIPE_CAP_INVALIDATE_BUFFER: 296 case PIPE_CAP_GENERATE_MIPMAP: 297 case PIPE_CAP_STRING_MARKER: 298 case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: 299 case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: 300 case PIPE_CAP_QUERY_BUFFER_OBJECT: 301 case PIPE_CAP_QUERY_MEMORY_INFO: 302 case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: 303 case PIPE_CAP_PCI_GROUP: 304 case PIPE_CAP_PCI_BUS: 305 case PIPE_CAP_PCI_DEVICE: 306 case PIPE_CAP_PCI_FUNCTION: 307 case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: 308 case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES: 309 case PIPE_CAP_TGSI_VOTE: 310 case PIPE_CAP_MAX_WINDOW_RECTANGLES: 311 case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED: 312 case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS: 313 case PIPE_CAP_TGSI_ARRAY_COMPONENTS: 314 case PIPE_CAP_TGSI_CAN_READ_OUTPUTS: 315 case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: 316 case PIPE_CAP_NATIVE_FENCE_FD: 317 case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: 318 case PIPE_CAP_TGSI_FS_FBFETCH: 319 return 0; 320 321 case PIPE_CAP_VENDOR_ID: 322 return 0xFFFFFFFF; 323 case PIPE_CAP_DEVICE_ID: 324 return 0xFFFFFFFF; 325 case PIPE_CAP_ACCELERATED: 326 return 0; 327 case PIPE_CAP_VIDEO_MEMORY: { 328 /* XXX: Do we want to return the full amount of system memory ? */ 329 uint64_t system_memory; 330 331 if (!os_get_total_physical_memory(&system_memory)) 332 return 0; 333 334 return (int)(system_memory >> 20); 335 } 336 } 337 338 /* should only get here on unhandled cases */ 339 debug_printf("Unexpected PIPE_CAP %d query\n", param); 340 return 0; 341 } 342 343 static int 344 swr_get_shader_param(struct pipe_screen *screen, 345 unsigned shader, 346 enum pipe_shader_cap param) 347 { 348 if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_FRAGMENT) 349 return gallivm_get_shader_param(param); 350 351 // Todo: geometry, tesselation, compute 352 return 0; 353 } 354 355 356 static float 357 swr_get_paramf(struct pipe_screen *screen, enum pipe_capf param) 358 { 359 switch (param) { 360 case PIPE_CAPF_MAX_LINE_WIDTH: 361 case PIPE_CAPF_MAX_LINE_WIDTH_AA: 362 case PIPE_CAPF_MAX_POINT_WIDTH: 363 return 255.0; /* arbitrary */ 364 case PIPE_CAPF_MAX_POINT_WIDTH_AA: 365 return 0.0; 366 case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: 367 return 0.0; 368 case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: 369 return 16.0; /* arbitrary */ 370 case PIPE_CAPF_GUARD_BAND_LEFT: 371 case PIPE_CAPF_GUARD_BAND_TOP: 372 case PIPE_CAPF_GUARD_BAND_RIGHT: 373 case PIPE_CAPF_GUARD_BAND_BOTTOM: 374 return 0.0; 375 } 376 /* should only get here on unhandled cases */ 377 debug_printf("Unexpected PIPE_CAPF %d query\n", param); 378 return 0.0; 379 } 380 381 SWR_FORMAT 382 mesa_to_swr_format(enum pipe_format format) 383 { 384 static const std::map<pipe_format,SWR_FORMAT> mesa2swr = { 385 /* depth / stencil */ 386 {PIPE_FORMAT_Z16_UNORM, R16_UNORM}, // z 387 {PIPE_FORMAT_Z32_FLOAT, R32_FLOAT}, // z 388 {PIPE_FORMAT_Z24_UNORM_S8_UINT, R24_UNORM_X8_TYPELESS}, // z 389 {PIPE_FORMAT_Z24X8_UNORM, R24_UNORM_X8_TYPELESS}, // z 390 {PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, R32_FLOAT_X8X24_TYPELESS}, // z 391 392 /* alpha */ 393 {PIPE_FORMAT_A8_UNORM, A8_UNORM}, 394 {PIPE_FORMAT_A16_UNORM, A16_UNORM}, 395 {PIPE_FORMAT_A16_FLOAT, A16_FLOAT}, 396 {PIPE_FORMAT_A32_FLOAT, A32_FLOAT}, 397 398 /* odd sizes, bgr */ 399 {PIPE_FORMAT_B5G6R5_UNORM, B5G6R5_UNORM}, 400 {PIPE_FORMAT_B5G6R5_SRGB, B5G6R5_UNORM_SRGB}, 401 {PIPE_FORMAT_B5G5R5A1_UNORM, B5G5R5A1_UNORM}, 402 {PIPE_FORMAT_B5G5R5X1_UNORM, B5G5R5X1_UNORM}, 403 {PIPE_FORMAT_B4G4R4A4_UNORM, B4G4R4A4_UNORM}, 404 {PIPE_FORMAT_B8G8R8A8_UNORM, B8G8R8A8_UNORM}, 405 {PIPE_FORMAT_B8G8R8A8_SRGB, B8G8R8A8_UNORM_SRGB}, 406 {PIPE_FORMAT_B8G8R8X8_UNORM, B8G8R8X8_UNORM}, 407 {PIPE_FORMAT_B8G8R8X8_SRGB, B8G8R8X8_UNORM_SRGB}, 408 409 /* rgb10a2 */ 410 {PIPE_FORMAT_R10G10B10A2_UNORM, R10G10B10A2_UNORM}, 411 {PIPE_FORMAT_R10G10B10A2_SNORM, R10G10B10A2_SNORM}, 412 {PIPE_FORMAT_R10G10B10A2_USCALED, R10G10B10A2_USCALED}, 413 {PIPE_FORMAT_R10G10B10A2_SSCALED, R10G10B10A2_SSCALED}, 414 {PIPE_FORMAT_R10G10B10A2_UINT, R10G10B10A2_UINT}, 415 416 /* rgb10x2 */ 417 {PIPE_FORMAT_R10G10B10X2_USCALED, R10G10B10X2_USCALED}, 418 419 /* bgr10a2 */ 420 {PIPE_FORMAT_B10G10R10A2_UNORM, B10G10R10A2_UNORM}, 421 {PIPE_FORMAT_B10G10R10A2_SNORM, B10G10R10A2_SNORM}, 422 {PIPE_FORMAT_B10G10R10A2_USCALED, B10G10R10A2_USCALED}, 423 {PIPE_FORMAT_B10G10R10A2_SSCALED, B10G10R10A2_SSCALED}, 424 {PIPE_FORMAT_B10G10R10A2_UINT, B10G10R10A2_UINT}, 425 426 /* bgr10x2 */ 427 {PIPE_FORMAT_B10G10R10X2_UNORM, B10G10R10X2_UNORM}, 428 429 /* r11g11b10 */ 430 {PIPE_FORMAT_R11G11B10_FLOAT, R11G11B10_FLOAT}, 431 432 /* 32 bits per component */ 433 {PIPE_FORMAT_R32_FLOAT, R32_FLOAT}, 434 {PIPE_FORMAT_R32G32_FLOAT, R32G32_FLOAT}, 435 {PIPE_FORMAT_R32G32B32_FLOAT, R32G32B32_FLOAT}, 436 {PIPE_FORMAT_R32G32B32A32_FLOAT, R32G32B32A32_FLOAT}, 437 {PIPE_FORMAT_R32G32B32X32_FLOAT, R32G32B32X32_FLOAT}, 438 439 {PIPE_FORMAT_R32_USCALED, R32_USCALED}, 440 {PIPE_FORMAT_R32G32_USCALED, R32G32_USCALED}, 441 {PIPE_FORMAT_R32G32B32_USCALED, R32G32B32_USCALED}, 442 {PIPE_FORMAT_R32G32B32A32_USCALED, R32G32B32A32_USCALED}, 443 444 {PIPE_FORMAT_R32_SSCALED, R32_SSCALED}, 445 {PIPE_FORMAT_R32G32_SSCALED, R32G32_SSCALED}, 446 {PIPE_FORMAT_R32G32B32_SSCALED, R32G32B32_SSCALED}, 447 {PIPE_FORMAT_R32G32B32A32_SSCALED, R32G32B32A32_SSCALED}, 448 449 {PIPE_FORMAT_R32_UINT, R32_UINT}, 450 {PIPE_FORMAT_R32G32_UINT, R32G32_UINT}, 451 {PIPE_FORMAT_R32G32B32_UINT, R32G32B32_UINT}, 452 {PIPE_FORMAT_R32G32B32A32_UINT, R32G32B32A32_UINT}, 453 454 {PIPE_FORMAT_R32_SINT, R32_SINT}, 455 {PIPE_FORMAT_R32G32_SINT, R32G32_SINT}, 456 {PIPE_FORMAT_R32G32B32_SINT, R32G32B32_SINT}, 457 {PIPE_FORMAT_R32G32B32A32_SINT, R32G32B32A32_SINT}, 458 459 /* 16 bits per component */ 460 {PIPE_FORMAT_R16_UNORM, R16_UNORM}, 461 {PIPE_FORMAT_R16G16_UNORM, R16G16_UNORM}, 462 {PIPE_FORMAT_R16G16B16_UNORM, R16G16B16_UNORM}, 463 {PIPE_FORMAT_R16G16B16A16_UNORM, R16G16B16A16_UNORM}, 464 {PIPE_FORMAT_R16G16B16X16_UNORM, R16G16B16X16_UNORM}, 465 466 {PIPE_FORMAT_R16_USCALED, R16_USCALED}, 467 {PIPE_FORMAT_R16G16_USCALED, R16G16_USCALED}, 468 {PIPE_FORMAT_R16G16B16_USCALED, R16G16B16_USCALED}, 469 {PIPE_FORMAT_R16G16B16A16_USCALED, R16G16B16A16_USCALED}, 470 471 {PIPE_FORMAT_R16_SNORM, R16_SNORM}, 472 {PIPE_FORMAT_R16G16_SNORM, R16G16_SNORM}, 473 {PIPE_FORMAT_R16G16B16_SNORM, R16G16B16_SNORM}, 474 {PIPE_FORMAT_R16G16B16A16_SNORM, R16G16B16A16_SNORM}, 475 476 {PIPE_FORMAT_R16_SSCALED, R16_SSCALED}, 477 {PIPE_FORMAT_R16G16_SSCALED, R16G16_SSCALED}, 478 {PIPE_FORMAT_R16G16B16_SSCALED, R16G16B16_SSCALED}, 479 {PIPE_FORMAT_R16G16B16A16_SSCALED, R16G16B16A16_SSCALED}, 480 481 {PIPE_FORMAT_R16_UINT, R16_UINT}, 482 {PIPE_FORMAT_R16G16_UINT, R16G16_UINT}, 483 {PIPE_FORMAT_R16G16B16_UINT, R16G16B16_UINT}, 484 {PIPE_FORMAT_R16G16B16A16_UINT, R16G16B16A16_UINT}, 485 486 {PIPE_FORMAT_R16_SINT, R16_SINT}, 487 {PIPE_FORMAT_R16G16_SINT, R16G16_SINT}, 488 {PIPE_FORMAT_R16G16B16_SINT, R16G16B16_SINT}, 489 {PIPE_FORMAT_R16G16B16A16_SINT, R16G16B16A16_SINT}, 490 491 {PIPE_FORMAT_R16_FLOAT, R16_FLOAT}, 492 {PIPE_FORMAT_R16G16_FLOAT, R16G16_FLOAT}, 493 {PIPE_FORMAT_R16G16B16_FLOAT, R16G16B16_FLOAT}, 494 {PIPE_FORMAT_R16G16B16A16_FLOAT, R16G16B16A16_FLOAT}, 495 {PIPE_FORMAT_R16G16B16X16_FLOAT, R16G16B16X16_FLOAT}, 496 497 /* 8 bits per component */ 498 {PIPE_FORMAT_R8_UNORM, R8_UNORM}, 499 {PIPE_FORMAT_R8G8_UNORM, R8G8_UNORM}, 500 {PIPE_FORMAT_R8G8B8_UNORM, R8G8B8_UNORM}, 501 {PIPE_FORMAT_R8G8B8_SRGB, R8G8B8_UNORM_SRGB}, 502 {PIPE_FORMAT_R8G8B8A8_UNORM, R8G8B8A8_UNORM}, 503 {PIPE_FORMAT_R8G8B8A8_SRGB, R8G8B8A8_UNORM_SRGB}, 504 {PIPE_FORMAT_R8G8B8X8_UNORM, R8G8B8X8_UNORM}, 505 {PIPE_FORMAT_R8G8B8X8_SRGB, R8G8B8X8_UNORM_SRGB}, 506 507 {PIPE_FORMAT_R8_USCALED, R8_USCALED}, 508 {PIPE_FORMAT_R8G8_USCALED, R8G8_USCALED}, 509 {PIPE_FORMAT_R8G8B8_USCALED, R8G8B8_USCALED}, 510 {PIPE_FORMAT_R8G8B8A8_USCALED, R8G8B8A8_USCALED}, 511 512 {PIPE_FORMAT_R8_SNORM, R8_SNORM}, 513 {PIPE_FORMAT_R8G8_SNORM, R8G8_SNORM}, 514 {PIPE_FORMAT_R8G8B8_SNORM, R8G8B8_SNORM}, 515 {PIPE_FORMAT_R8G8B8A8_SNORM, R8G8B8A8_SNORM}, 516 517 {PIPE_FORMAT_R8_SSCALED, R8_SSCALED}, 518 {PIPE_FORMAT_R8G8_SSCALED, R8G8_SSCALED}, 519 {PIPE_FORMAT_R8G8B8_SSCALED, R8G8B8_SSCALED}, 520 {PIPE_FORMAT_R8G8B8A8_SSCALED, R8G8B8A8_SSCALED}, 521 522 {PIPE_FORMAT_R8_UINT, R8_UINT}, 523 {PIPE_FORMAT_R8G8_UINT, R8G8_UINT}, 524 {PIPE_FORMAT_R8G8B8_UINT, R8G8B8_UINT}, 525 {PIPE_FORMAT_R8G8B8A8_UINT, R8G8B8A8_UINT}, 526 527 {PIPE_FORMAT_R8_SINT, R8_SINT}, 528 {PIPE_FORMAT_R8G8_SINT, R8G8_SINT}, 529 {PIPE_FORMAT_R8G8B8_SINT, R8G8B8_SINT}, 530 {PIPE_FORMAT_R8G8B8A8_SINT, R8G8B8A8_SINT}, 531 532 /* These formats are valid for vertex data, but should not be used 533 * for render targets. 534 */ 535 536 {PIPE_FORMAT_R32_FIXED, R32_SFIXED}, 537 {PIPE_FORMAT_R32G32_FIXED, R32G32_SFIXED}, 538 {PIPE_FORMAT_R32G32B32_FIXED, R32G32B32_SFIXED}, 539 {PIPE_FORMAT_R32G32B32A32_FIXED, R32G32B32A32_SFIXED}, 540 541 {PIPE_FORMAT_R64_FLOAT, R64_FLOAT}, 542 {PIPE_FORMAT_R64G64_FLOAT, R64G64_FLOAT}, 543 {PIPE_FORMAT_R64G64B64_FLOAT, R64G64B64_FLOAT}, 544 {PIPE_FORMAT_R64G64B64A64_FLOAT, R64G64B64A64_FLOAT}, 545 546 /* These formats have entries in SWR but don't have Load/StoreTile 547 * implementations. That means these aren't renderable, and thus having 548 * a mapping entry here is detrimental. 549 */ 550 /* 551 552 {PIPE_FORMAT_L8_UNORM, L8_UNORM}, 553 {PIPE_FORMAT_I8_UNORM, I8_UNORM}, 554 {PIPE_FORMAT_L8A8_UNORM, L8A8_UNORM}, 555 {PIPE_FORMAT_L16_UNORM, L16_UNORM}, 556 {PIPE_FORMAT_UYVY, YCRCB_SWAPUVY}, 557 558 {PIPE_FORMAT_L8_SRGB, L8_UNORM_SRGB}, 559 {PIPE_FORMAT_L8A8_SRGB, L8A8_UNORM_SRGB}, 560 561 {PIPE_FORMAT_DXT1_RGBA, BC1_UNORM}, 562 {PIPE_FORMAT_DXT3_RGBA, BC2_UNORM}, 563 {PIPE_FORMAT_DXT5_RGBA, BC3_UNORM}, 564 565 {PIPE_FORMAT_DXT1_SRGBA, BC1_UNORM_SRGB}, 566 {PIPE_FORMAT_DXT3_SRGBA, BC2_UNORM_SRGB}, 567 {PIPE_FORMAT_DXT5_SRGBA, BC3_UNORM_SRGB}, 568 569 {PIPE_FORMAT_RGTC1_UNORM, BC4_UNORM}, 570 {PIPE_FORMAT_RGTC1_SNORM, BC4_SNORM}, 571 {PIPE_FORMAT_RGTC2_UNORM, BC5_UNORM}, 572 {PIPE_FORMAT_RGTC2_SNORM, BC5_SNORM}, 573 574 {PIPE_FORMAT_L16A16_UNORM, L16A16_UNORM}, 575 {PIPE_FORMAT_I16_UNORM, I16_UNORM}, 576 {PIPE_FORMAT_L16_FLOAT, L16_FLOAT}, 577 {PIPE_FORMAT_L16A16_FLOAT, L16A16_FLOAT}, 578 {PIPE_FORMAT_I16_FLOAT, I16_FLOAT}, 579 {PIPE_FORMAT_L32_FLOAT, L32_FLOAT}, 580 {PIPE_FORMAT_L32A32_FLOAT, L32A32_FLOAT}, 581 {PIPE_FORMAT_I32_FLOAT, I32_FLOAT}, 582 583 {PIPE_FORMAT_I8_UINT, I8_UINT}, 584 {PIPE_FORMAT_L8_UINT, L8_UINT}, 585 {PIPE_FORMAT_L8A8_UINT, L8A8_UINT}, 586 587 {PIPE_FORMAT_I8_SINT, I8_SINT}, 588 {PIPE_FORMAT_L8_SINT, L8_SINT}, 589 {PIPE_FORMAT_L8A8_SINT, L8A8_SINT}, 590 591 */ 592 }; 593 594 auto it = mesa2swr.find(format); 595 if (it == mesa2swr.end()) 596 return (SWR_FORMAT)-1; 597 else 598 return it->second; 599 } 600 601 static boolean 602 swr_displaytarget_layout(struct swr_screen *screen, struct swr_resource *res) 603 { 604 struct sw_winsys *winsys = screen->winsys; 605 struct sw_displaytarget *dt; 606 607 const unsigned width = align(res->swr.width, res->swr.halign); 608 const unsigned height = align(res->swr.height, res->swr.valign); 609 610 UINT stride; 611 dt = winsys->displaytarget_create(winsys, 612 res->base.bind, 613 res->base.format, 614 width, height, 615 64, NULL, 616 &stride); 617 618 if (dt == NULL) 619 return FALSE; 620 621 void *map = winsys->displaytarget_map(winsys, dt, 0); 622 623 res->display_target = dt; 624 res->swr.pBaseAddress = (uint8_t*) map; 625 626 /* Clear the display target surface */ 627 if (map) 628 memset(map, 0, height * stride); 629 630 winsys->displaytarget_unmap(winsys, dt); 631 632 return TRUE; 633 } 634 635 static bool 636 swr_texture_layout(struct swr_screen *screen, 637 struct swr_resource *res, 638 boolean allocate) 639 { 640 struct pipe_resource *pt = &res->base; 641 642 pipe_format fmt = pt->format; 643 const struct util_format_description *desc = util_format_description(fmt); 644 645 res->has_depth = util_format_has_depth(desc); 646 res->has_stencil = util_format_has_stencil(desc); 647 648 if (res->has_stencil && !res->has_depth) 649 fmt = PIPE_FORMAT_R8_UINT; 650 651 /* We always use the SWR layout. For 2D and 3D textures this looks like: 652 * 653 * |<------- pitch ------->| 654 * +=======================+------- 655 * |Array 0 | ^ 656 * | | | 657 * | Level 0 | | 658 * | | | 659 * | | qpitch 660 * +-----------+-----------+ | 661 * | | L2L2L2L2 | | 662 * | Level 1 | L3L3 | | 663 * | | L4 | v 664 * +===========+===========+------- 665 * |Array 1 | 666 * | | 667 * | Level 0 | 668 * | | 669 * | | 670 * +-----------+-----------+ 671 * | | L2L2L2L2 | 672 * | Level 1 | L3L3 | 673 * | | L4 | 674 * +===========+===========+ 675 * 676 * The overall width in bytes is known as the pitch, while the overall 677 * height in rows is the qpitch. Array slices are laid out logically below 678 * one another, qpitch rows apart. For 3D surfaces, the "level" values are 679 * just invalid for the higher array numbers (since depth is also 680 * minified). 1D and 1D array surfaces are stored effectively the same way, 681 * except that pitch never plays into it. All the levels are logically 682 * adjacent to each other on the X axis. The qpitch becomes the number of 683 * elements between array slices, while the pitch is unused. 684 * 685 * Each level's sizes are subject to the valign and halign settings of the 686 * surface. For compressed formats that swr is unaware of, we will use an 687 * appropriately-sized uncompressed format, and scale the widths/heights. 688 * 689 * This surface is stored inside res->swr. For depth/stencil textures, 690 * res->secondary will have an identically-laid-out but R8_UINT-formatted 691 * stencil tree. In the Z32F_S8 case, the primary surface still has 64-bpp 692 * texels, to simplify map/unmap logic which copies the stencil values 693 * in/out. 694 */ 695 696 res->swr.width = pt->width0; 697 res->swr.height = pt->height0; 698 res->swr.type = swr_convert_target_type(pt->target); 699 res->swr.tileMode = SWR_TILE_NONE; 700 res->swr.format = mesa_to_swr_format(fmt); 701 res->swr.numSamples = std::max(1u, pt->nr_samples); 702 703 if (pt->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) { 704 res->swr.halign = KNOB_MACROTILE_X_DIM; 705 res->swr.valign = KNOB_MACROTILE_Y_DIM; 706 } else { 707 res->swr.halign = 1; 708 res->swr.valign = 1; 709 } 710 711 unsigned halign = res->swr.halign * util_format_get_blockwidth(fmt); 712 unsigned width = align(pt->width0, halign); 713 if (pt->target == PIPE_TEXTURE_1D || pt->target == PIPE_TEXTURE_1D_ARRAY) { 714 for (int level = 1; level <= pt->last_level; level++) 715 width += align(u_minify(pt->width0, level), halign); 716 res->swr.pitch = util_format_get_blocksize(fmt); 717 res->swr.qpitch = util_format_get_nblocksx(fmt, width); 718 } else { 719 // The pitch is the overall width of the texture in bytes. Most of the 720 // time this is the pitch of level 0 since all the other levels fit 721 // underneath it. However in some degenerate situations, the width of 722 // level1 + level2 may be larger. In that case, we use those 723 // widths. This can happen if, e.g. halign is 32, and the width of level 724 // 0 is 32 or less. In that case, the aligned levels 1 and 2 will also 725 // be 32 each, adding up to 64. 726 unsigned valign = res->swr.valign * util_format_get_blockheight(fmt); 727 if (pt->last_level > 1) { 728 width = std::max<uint32_t>( 729 width, 730 align(u_minify(pt->width0, 1), halign) + 731 align(u_minify(pt->width0, 2), halign)); 732 } 733 res->swr.pitch = util_format_get_stride(fmt, width); 734 735 // The qpitch is controlled by either the height of the second LOD, or 736 // the combination of all the later LODs. 737 unsigned height = align(pt->height0, valign); 738 if (pt->last_level == 1) { 739 height += align(u_minify(pt->height0, 1), valign); 740 } else if (pt->last_level > 1) { 741 unsigned level1 = align(u_minify(pt->height0, 1), valign); 742 unsigned level2 = 0; 743 for (int level = 2; level <= pt->last_level; level++) { 744 level2 += align(u_minify(pt->height0, level), valign); 745 } 746 height += std::max(level1, level2); 747 } 748 res->swr.qpitch = util_format_get_nblocksy(fmt, height); 749 } 750 751 if (pt->target == PIPE_TEXTURE_3D) 752 res->swr.depth = pt->depth0; 753 else 754 res->swr.depth = pt->array_size; 755 756 // Fix up swr format if necessary so that LOD offset computation works 757 if (res->swr.format == (SWR_FORMAT)-1) { 758 switch (util_format_get_blocksize(fmt)) { 759 default: 760 unreachable("Unexpected format block size"); 761 case 1: res->swr.format = R8_UINT; break; 762 case 2: res->swr.format = R16_UINT; break; 763 case 4: res->swr.format = R32_UINT; break; 764 case 8: 765 if (util_format_is_compressed(fmt)) 766 res->swr.format = BC4_UNORM; 767 else 768 res->swr.format = R32G32_UINT; 769 break; 770 case 16: 771 if (util_format_is_compressed(fmt)) 772 res->swr.format = BC5_UNORM; 773 else 774 res->swr.format = R32G32B32A32_UINT; 775 break; 776 } 777 } 778 779 for (int level = 0; level <= pt->last_level; level++) { 780 res->mip_offsets[level] = 781 ComputeSurfaceOffset<false>(0, 0, 0, 0, 0, level, &res->swr); 782 } 783 784 size_t total_size = 785 (size_t)res->swr.depth * res->swr.qpitch * res->swr.pitch; 786 if (total_size > SWR_MAX_TEXTURE_SIZE) 787 return false; 788 789 if (allocate) { 790 res->swr.pBaseAddress = (uint8_t *)AlignedMalloc(total_size, 64); 791 792 if (res->has_depth && res->has_stencil) { 793 res->secondary = res->swr; 794 res->secondary.format = R8_UINT; 795 res->secondary.pitch = res->swr.pitch / util_format_get_blocksize(fmt); 796 797 for (int level = 0; level <= pt->last_level; level++) { 798 res->secondary_mip_offsets[level] = 799 ComputeSurfaceOffset<false>(0, 0, 0, 0, 0, level, &res->secondary); 800 } 801 802 res->secondary.pBaseAddress = (uint8_t *)AlignedMalloc( 803 res->secondary.depth * res->secondary.qpitch * 804 res->secondary.pitch, 64); 805 } 806 } 807 808 return true; 809 } 810 811 static boolean 812 swr_can_create_resource(struct pipe_screen *screen, 813 const struct pipe_resource *templat) 814 { 815 struct swr_resource res; 816 memset(&res, 0, sizeof(res)); 817 res.base = *templat; 818 return swr_texture_layout(swr_screen(screen), &res, false); 819 } 820 821 static struct pipe_resource * 822 swr_resource_create(struct pipe_screen *_screen, 823 const struct pipe_resource *templat) 824 { 825 struct swr_screen *screen = swr_screen(_screen); 826 struct swr_resource *res = CALLOC_STRUCT(swr_resource); 827 if (!res) 828 return NULL; 829 830 res->base = *templat; 831 pipe_reference_init(&res->base.reference, 1); 832 res->base.screen = &screen->base; 833 834 if (swr_resource_is_texture(&res->base)) { 835 if (res->base.bind & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT 836 | PIPE_BIND_SHARED)) { 837 /* displayable surface 838 * first call swr_texture_layout without allocating to finish 839 * filling out the SWR_SURFAE_STATE in res */ 840 swr_texture_layout(screen, res, false); 841 if (!swr_displaytarget_layout(screen, res)) 842 goto fail; 843 } else { 844 /* texture map */ 845 if (!swr_texture_layout(screen, res, true)) 846 goto fail; 847 } 848 } else { 849 /* other data (vertex buffer, const buffer, etc) */ 850 assert(util_format_get_blocksize(templat->format) == 1); 851 assert(templat->height0 == 1); 852 assert(templat->depth0 == 1); 853 assert(templat->last_level == 0); 854 855 /* Easiest to just call swr_texture_layout, as it sets up 856 * SWR_SURFAE_STATE in res */ 857 if (!swr_texture_layout(screen, res, true)) 858 goto fail; 859 } 860 861 return &res->base; 862 863 fail: 864 FREE(res); 865 return NULL; 866 } 867 868 static void 869 swr_resource_destroy(struct pipe_screen *p_screen, struct pipe_resource *pt) 870 { 871 struct swr_screen *screen = swr_screen(p_screen); 872 struct swr_resource *spr = swr_resource(pt); 873 struct pipe_context *pipe = screen->pipe; 874 875 if (spr->display_target) { 876 /* If resource is display target, winsys manages the buffer and will 877 * free it on displaytarget_destroy. */ 878 swr_fence_finish(p_screen, NULL, screen->flush_fence, 0); 879 880 struct sw_winsys *winsys = screen->winsys; 881 winsys->displaytarget_destroy(winsys, spr->display_target); 882 883 } else { 884 /* For regular resources, defer deletion */ 885 swr_resource_unused(pt); 886 swr_fence_work_free(screen->flush_fence, spr->swr.pBaseAddress, true); 887 swr_fence_work_free(screen->flush_fence, 888 spr->secondary.pBaseAddress, true); 889 } 890 891 FREE(spr); 892 } 893 894 895 static void 896 swr_flush_frontbuffer(struct pipe_screen *p_screen, 897 struct pipe_resource *resource, 898 unsigned level, 899 unsigned layer, 900 void *context_private, 901 struct pipe_box *sub_box) 902 { 903 struct swr_screen *screen = swr_screen(p_screen); 904 struct sw_winsys *winsys = screen->winsys; 905 struct swr_resource *spr = swr_resource(resource); 906 struct pipe_context *pipe = screen->pipe; 907 908 if (pipe) { 909 swr_fence_finish(p_screen, NULL, screen->flush_fence, 0); 910 swr_resource_unused(resource); 911 SwrEndFrame(swr_context(pipe)->swrContext); 912 } 913 914 debug_assert(spr->display_target); 915 if (spr->display_target) 916 winsys->displaytarget_display( 917 winsys, spr->display_target, context_private, sub_box); 918 } 919 920 921 static void 922 swr_destroy_screen(struct pipe_screen *p_screen) 923 { 924 struct swr_screen *screen = swr_screen(p_screen); 925 struct sw_winsys *winsys = screen->winsys; 926 927 fprintf(stderr, "SWR destroy screen!\n"); 928 929 swr_fence_finish(p_screen, NULL, screen->flush_fence, 0); 930 swr_fence_reference(p_screen, &screen->flush_fence, NULL); 931 932 JitDestroyContext(screen->hJitMgr); 933 934 if (winsys->destroy) 935 winsys->destroy(winsys); 936 937 FREE(screen); 938 } 939 940 PUBLIC 941 struct pipe_screen * 942 swr_create_screen_internal(struct sw_winsys *winsys) 943 { 944 struct swr_screen *screen = CALLOC_STRUCT(swr_screen); 945 946 if (!screen) 947 return NULL; 948 949 if (!getenv("KNOB_MAX_PRIMS_PER_DRAW")) { 950 g_GlobalKnobs.MAX_PRIMS_PER_DRAW.Value(49152); 951 } 952 953 if (!lp_build_init()) { 954 FREE(screen); 955 return NULL; 956 } 957 958 screen->winsys = winsys; 959 screen->base.get_name = swr_get_name; 960 screen->base.get_vendor = swr_get_vendor; 961 screen->base.is_format_supported = swr_is_format_supported; 962 screen->base.context_create = swr_create_context; 963 screen->base.can_create_resource = swr_can_create_resource; 964 965 screen->base.destroy = swr_destroy_screen; 966 screen->base.get_param = swr_get_param; 967 screen->base.get_shader_param = swr_get_shader_param; 968 screen->base.get_paramf = swr_get_paramf; 969 970 screen->base.resource_create = swr_resource_create; 971 screen->base.resource_destroy = swr_resource_destroy; 972 973 screen->base.flush_frontbuffer = swr_flush_frontbuffer; 974 975 screen->hJitMgr = JitCreateContext(KNOB_SIMD_WIDTH, KNOB_ARCH_STR, "swr"); 976 977 swr_fence_init(&screen->base); 978 979 util_format_s3tc_init(); 980 981 return &screen->base; 982 } 983 984