1 /* 2 * Copyright 2016 Red Hat 3 * based on intel anv code: 4 * Copyright 2015 Intel Corporation 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 23 * IN THE SOFTWARE. 24 */ 25 26 #include "radv_meta.h" 27 28 #include <fcntl.h> 29 #include <limits.h> 30 #include <pwd.h> 31 #include <sys/stat.h> 32 33 void 34 radv_meta_save(struct radv_meta_saved_state *state, 35 struct radv_cmd_buffer *cmd_buffer, uint32_t flags) 36 { 37 assert(flags & (RADV_META_SAVE_GRAPHICS_PIPELINE | 38 RADV_META_SAVE_COMPUTE_PIPELINE)); 39 40 state->flags = flags; 41 42 if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) { 43 assert(!(state->flags & RADV_META_SAVE_COMPUTE_PIPELINE)); 44 45 state->old_pipeline = cmd_buffer->state.pipeline; 46 47 /* Save all viewports. */ 48 state->viewport.count = cmd_buffer->state.dynamic.viewport.count; 49 typed_memcpy(state->viewport.viewports, 50 cmd_buffer->state.dynamic.viewport.viewports, 51 MAX_VIEWPORTS); 52 53 /* Save all scissors. */ 54 state->scissor.count = cmd_buffer->state.dynamic.scissor.count; 55 typed_memcpy(state->scissor.scissors, 56 cmd_buffer->state.dynamic.scissor.scissors, 57 MAX_SCISSORS); 58 59 /* The most common meta operations all want to have the 60 * viewport reset and any scissors disabled. The rest of the 61 * dynamic state should have no effect. 62 */ 63 cmd_buffer->state.dynamic.viewport.count = 0; 64 cmd_buffer->state.dynamic.scissor.count = 0; 65 cmd_buffer->state.dirty |= 1 << VK_DYNAMIC_STATE_VIEWPORT | 66 1 << VK_DYNAMIC_STATE_SCISSOR; 67 } 68 69 if (state->flags & RADV_META_SAVE_COMPUTE_PIPELINE) { 70 assert(!(state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE)); 71 72 state->old_pipeline = cmd_buffer->state.compute_pipeline; 73 } 74 75 if (state->flags & RADV_META_SAVE_DESCRIPTORS) { 76 if (cmd_buffer->state.valid_descriptors & (1 << 0)) 77 state->old_descriptor_set0 = cmd_buffer->descriptors[0]; 78 else 79 state->old_descriptor_set0 = NULL; 80 } 81 82 if (state->flags & RADV_META_SAVE_CONSTANTS) { 83 memcpy(state->push_constants, cmd_buffer->push_constants, 84 MAX_PUSH_CONSTANTS_SIZE); 85 } 86 87 if (state->flags & RADV_META_SAVE_PASS) { 88 state->pass = cmd_buffer->state.pass; 89 state->subpass = cmd_buffer->state.subpass; 90 state->framebuffer = cmd_buffer->state.framebuffer; 91 state->attachments = cmd_buffer->state.attachments; 92 state->render_area = cmd_buffer->state.render_area; 93 } 94 } 95 96 void 97 radv_meta_restore(const struct radv_meta_saved_state *state, 98 struct radv_cmd_buffer *cmd_buffer) 99 { 100 if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) { 101 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), 102 VK_PIPELINE_BIND_POINT_GRAPHICS, 103 radv_pipeline_to_handle(state->old_pipeline)); 104 105 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE; 106 107 /* Restore all viewports. */ 108 cmd_buffer->state.dynamic.viewport.count = state->viewport.count; 109 typed_memcpy(cmd_buffer->state.dynamic.viewport.viewports, 110 state->viewport.viewports, 111 MAX_VIEWPORTS); 112 113 /* Restore all scissors. */ 114 cmd_buffer->state.dynamic.scissor.count = state->scissor.count; 115 typed_memcpy(cmd_buffer->state.dynamic.scissor.scissors, 116 state->scissor.scissors, 117 MAX_SCISSORS); 118 119 cmd_buffer->state.dirty |= 1 << VK_DYNAMIC_STATE_VIEWPORT | 120 1 << VK_DYNAMIC_STATE_SCISSOR; 121 } 122 123 if (state->flags & RADV_META_SAVE_COMPUTE_PIPELINE) { 124 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), 125 VK_PIPELINE_BIND_POINT_COMPUTE, 126 radv_pipeline_to_handle(state->old_pipeline)); 127 } 128 129 if (state->flags & RADV_META_SAVE_DESCRIPTORS) { 130 radv_set_descriptor_set(cmd_buffer, state->old_descriptor_set0, 0); 131 } 132 133 if (state->flags & RADV_META_SAVE_CONSTANTS) { 134 memcpy(cmd_buffer->push_constants, state->push_constants, 135 MAX_PUSH_CONSTANTS_SIZE); 136 cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT; 137 138 if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) { 139 cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_ALL_GRAPHICS; 140 } 141 } 142 143 if (state->flags & RADV_META_SAVE_PASS) { 144 cmd_buffer->state.pass = state->pass; 145 cmd_buffer->state.subpass = state->subpass; 146 cmd_buffer->state.framebuffer = state->framebuffer; 147 cmd_buffer->state.attachments = state->attachments; 148 cmd_buffer->state.render_area = state->render_area; 149 if (state->subpass) 150 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER; 151 } 152 } 153 154 VkImageViewType 155 radv_meta_get_view_type(const struct radv_image *image) 156 { 157 switch (image->type) { 158 case VK_IMAGE_TYPE_1D: return VK_IMAGE_VIEW_TYPE_1D; 159 case VK_IMAGE_TYPE_2D: return VK_IMAGE_VIEW_TYPE_2D; 160 case VK_IMAGE_TYPE_3D: return VK_IMAGE_VIEW_TYPE_3D; 161 default: 162 unreachable("bad VkImageViewType"); 163 } 164 } 165 166 /** 167 * When creating a destination VkImageView, this function provides the needed 168 * VkImageViewCreateInfo::subresourceRange::baseArrayLayer. 169 */ 170 uint32_t 171 radv_meta_get_iview_layer(const struct radv_image *dest_image, 172 const VkImageSubresourceLayers *dest_subresource, 173 const VkOffset3D *dest_offset) 174 { 175 switch (dest_image->type) { 176 case VK_IMAGE_TYPE_1D: 177 case VK_IMAGE_TYPE_2D: 178 return dest_subresource->baseArrayLayer; 179 case VK_IMAGE_TYPE_3D: 180 /* HACK: Vulkan does not allow attaching a 3D image to a framebuffer, 181 * but meta does it anyway. When doing so, we translate the 182 * destination's z offset into an array offset. 183 */ 184 return dest_offset->z; 185 default: 186 assert(!"bad VkImageType"); 187 return 0; 188 } 189 } 190 191 static void * 192 meta_alloc(void* _device, size_t size, size_t alignment, 193 VkSystemAllocationScope allocationScope) 194 { 195 struct radv_device *device = _device; 196 return device->alloc.pfnAllocation(device->alloc.pUserData, size, alignment, 197 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 198 } 199 200 static void * 201 meta_realloc(void* _device, void *original, size_t size, size_t alignment, 202 VkSystemAllocationScope allocationScope) 203 { 204 struct radv_device *device = _device; 205 return device->alloc.pfnReallocation(device->alloc.pUserData, original, 206 size, alignment, 207 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 208 } 209 210 static void 211 meta_free(void* _device, void *data) 212 { 213 struct radv_device *device = _device; 214 return device->alloc.pfnFree(device->alloc.pUserData, data); 215 } 216 217 static bool 218 radv_builtin_cache_path(char *path) 219 { 220 char *xdg_cache_home = getenv("XDG_CACHE_HOME"); 221 const char *suffix = "/radv_builtin_shaders"; 222 const char *suffix2 = "/.cache/radv_builtin_shaders"; 223 struct passwd pwd, *result; 224 char path2[PATH_MAX + 1]; /* PATH_MAX is not a real max,but suffices here. */ 225 226 if (xdg_cache_home) { 227 228 if (strlen(xdg_cache_home) + strlen(suffix) > PATH_MAX) 229 return false; 230 231 strcpy(path, xdg_cache_home); 232 strcat(path, suffix); 233 return true; 234 } 235 236 getpwuid_r(getuid(), &pwd, path2, PATH_MAX - strlen(suffix2), &result); 237 if (!result) 238 return false; 239 240 strcpy(path, pwd.pw_dir); 241 strcat(path, "/.cache"); 242 mkdir(path, 0755); 243 244 strcat(path, suffix); 245 return true; 246 } 247 248 static void 249 radv_load_meta_pipeline(struct radv_device *device) 250 { 251 char path[PATH_MAX + 1]; 252 struct stat st; 253 void *data = NULL; 254 255 if (!radv_builtin_cache_path(path)) 256 return; 257 258 int fd = open(path, O_RDONLY); 259 if (fd < 0) 260 return; 261 if (fstat(fd, &st)) 262 goto fail; 263 data = malloc(st.st_size); 264 if (!data) 265 goto fail; 266 if(read(fd, data, st.st_size) == -1) 267 goto fail; 268 269 radv_pipeline_cache_load(&device->meta_state.cache, data, st.st_size); 270 fail: 271 free(data); 272 close(fd); 273 } 274 275 static void 276 radv_store_meta_pipeline(struct radv_device *device) 277 { 278 char path[PATH_MAX + 1], path2[PATH_MAX + 7]; 279 size_t size; 280 void *data = NULL; 281 282 if (!device->meta_state.cache.modified) 283 return; 284 285 if (radv_GetPipelineCacheData(radv_device_to_handle(device), 286 radv_pipeline_cache_to_handle(&device->meta_state.cache), 287 &size, NULL)) 288 return; 289 290 if (!radv_builtin_cache_path(path)) 291 return; 292 293 strcpy(path2, path); 294 strcat(path2, "XXXXXX"); 295 int fd = mkstemp(path2);//open(path, O_WRONLY | O_CREAT, 0600); 296 if (fd < 0) 297 return; 298 data = malloc(size); 299 if (!data) 300 goto fail; 301 302 if (radv_GetPipelineCacheData(radv_device_to_handle(device), 303 radv_pipeline_cache_to_handle(&device->meta_state.cache), 304 &size, data)) 305 goto fail; 306 if(write(fd, data, size) == -1) 307 goto fail; 308 309 rename(path2, path); 310 fail: 311 free(data); 312 close(fd); 313 unlink(path2); 314 } 315 316 VkResult 317 radv_device_init_meta(struct radv_device *device) 318 { 319 VkResult result; 320 321 device->meta_state.alloc = (VkAllocationCallbacks) { 322 .pUserData = device, 323 .pfnAllocation = meta_alloc, 324 .pfnReallocation = meta_realloc, 325 .pfnFree = meta_free, 326 }; 327 328 device->meta_state.cache.alloc = device->meta_state.alloc; 329 radv_pipeline_cache_init(&device->meta_state.cache, device); 330 radv_load_meta_pipeline(device); 331 332 result = radv_device_init_meta_clear_state(device); 333 if (result != VK_SUCCESS) 334 goto fail_clear; 335 336 result = radv_device_init_meta_resolve_state(device); 337 if (result != VK_SUCCESS) 338 goto fail_resolve; 339 340 result = radv_device_init_meta_blit_state(device); 341 if (result != VK_SUCCESS) 342 goto fail_blit; 343 344 result = radv_device_init_meta_blit2d_state(device); 345 if (result != VK_SUCCESS) 346 goto fail_blit2d; 347 348 result = radv_device_init_meta_bufimage_state(device); 349 if (result != VK_SUCCESS) 350 goto fail_bufimage; 351 352 result = radv_device_init_meta_depth_decomp_state(device); 353 if (result != VK_SUCCESS) 354 goto fail_depth_decomp; 355 356 result = radv_device_init_meta_buffer_state(device); 357 if (result != VK_SUCCESS) 358 goto fail_buffer; 359 360 result = radv_device_init_meta_query_state(device); 361 if (result != VK_SUCCESS) 362 goto fail_query; 363 364 result = radv_device_init_meta_fast_clear_flush_state(device); 365 if (result != VK_SUCCESS) 366 goto fail_fast_clear; 367 368 result = radv_device_init_meta_resolve_compute_state(device); 369 if (result != VK_SUCCESS) 370 goto fail_resolve_compute; 371 372 result = radv_device_init_meta_resolve_fragment_state(device); 373 if (result != VK_SUCCESS) 374 goto fail_resolve_fragment; 375 return VK_SUCCESS; 376 377 fail_resolve_fragment: 378 radv_device_finish_meta_resolve_compute_state(device); 379 fail_resolve_compute: 380 radv_device_finish_meta_fast_clear_flush_state(device); 381 fail_fast_clear: 382 radv_device_finish_meta_query_state(device); 383 fail_query: 384 radv_device_finish_meta_buffer_state(device); 385 fail_buffer: 386 radv_device_finish_meta_depth_decomp_state(device); 387 fail_depth_decomp: 388 radv_device_finish_meta_bufimage_state(device); 389 fail_bufimage: 390 radv_device_finish_meta_blit2d_state(device); 391 fail_blit2d: 392 radv_device_finish_meta_blit_state(device); 393 fail_blit: 394 radv_device_finish_meta_resolve_state(device); 395 fail_resolve: 396 radv_device_finish_meta_clear_state(device); 397 fail_clear: 398 radv_pipeline_cache_finish(&device->meta_state.cache); 399 return result; 400 } 401 402 void 403 radv_device_finish_meta(struct radv_device *device) 404 { 405 radv_device_finish_meta_clear_state(device); 406 radv_device_finish_meta_resolve_state(device); 407 radv_device_finish_meta_blit_state(device); 408 radv_device_finish_meta_blit2d_state(device); 409 radv_device_finish_meta_bufimage_state(device); 410 radv_device_finish_meta_depth_decomp_state(device); 411 radv_device_finish_meta_query_state(device); 412 radv_device_finish_meta_buffer_state(device); 413 radv_device_finish_meta_fast_clear_flush_state(device); 414 radv_device_finish_meta_resolve_compute_state(device); 415 radv_device_finish_meta_resolve_fragment_state(device); 416 417 radv_store_meta_pipeline(device); 418 radv_pipeline_cache_finish(&device->meta_state.cache); 419 } 420 421 nir_ssa_def *radv_meta_gen_rect_vertices_comp2(nir_builder *vs_b, nir_ssa_def *comp2) 422 { 423 424 nir_intrinsic_instr *vertex_id = nir_intrinsic_instr_create(vs_b->shader, nir_intrinsic_load_vertex_id_zero_base); 425 nir_ssa_dest_init(&vertex_id->instr, &vertex_id->dest, 1, 32, "vertexid"); 426 nir_builder_instr_insert(vs_b, &vertex_id->instr); 427 428 /* vertex 0 - -1.0, -1.0 */ 429 /* vertex 1 - -1.0, 1.0 */ 430 /* vertex 2 - 1.0, -1.0 */ 431 /* so channel 0 is vertex_id != 2 ? -1.0 : 1.0 432 channel 1 is vertex id != 1 ? -1.0 : 1.0 */ 433 434 nir_ssa_def *c0cmp = nir_ine(vs_b, &vertex_id->dest.ssa, 435 nir_imm_int(vs_b, 2)); 436 nir_ssa_def *c1cmp = nir_ine(vs_b, &vertex_id->dest.ssa, 437 nir_imm_int(vs_b, 1)); 438 439 nir_ssa_def *comp[4]; 440 comp[0] = nir_bcsel(vs_b, c0cmp, 441 nir_imm_float(vs_b, -1.0), 442 nir_imm_float(vs_b, 1.0)); 443 444 comp[1] = nir_bcsel(vs_b, c1cmp, 445 nir_imm_float(vs_b, -1.0), 446 nir_imm_float(vs_b, 1.0)); 447 comp[2] = comp2; 448 comp[3] = nir_imm_float(vs_b, 1.0); 449 nir_ssa_def *outvec = nir_vec(vs_b, comp, 4); 450 451 return outvec; 452 } 453 454 nir_ssa_def *radv_meta_gen_rect_vertices(nir_builder *vs_b) 455 { 456 return radv_meta_gen_rect_vertices_comp2(vs_b, nir_imm_float(vs_b, 0.0)); 457 } 458 459 /* vertex shader that generates vertices */ 460 nir_shader * 461 radv_meta_build_nir_vs_generate_vertices(void) 462 { 463 const struct glsl_type *vec4 = glsl_vec4_type(); 464 465 nir_builder b; 466 nir_variable *v_position; 467 468 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL); 469 b.shader->info.name = ralloc_strdup(b.shader, "meta_vs_gen_verts"); 470 471 nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b); 472 473 v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, 474 "gl_Position"); 475 v_position->data.location = VARYING_SLOT_POS; 476 477 nir_store_var(&b, v_position, outvec, 0xf); 478 479 return b.shader; 480 } 481 482 nir_shader * 483 radv_meta_build_nir_fs_noop(void) 484 { 485 nir_builder b; 486 487 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL); 488 b.shader->info.name = ralloc_asprintf(b.shader, 489 "meta_noop_fs"); 490 491 return b.shader; 492 } 493 494 void radv_meta_build_resolve_shader_core(nir_builder *b, 495 bool is_integer, 496 int samples, 497 nir_variable *input_img, 498 nir_variable *color, 499 nir_ssa_def *img_coord) 500 { 501 /* do a txf_ms on each sample */ 502 nir_ssa_def *tmp; 503 nir_if *outer_if = NULL; 504 505 nir_tex_instr *tex = nir_tex_instr_create(b->shader, 2); 506 tex->sampler_dim = GLSL_SAMPLER_DIM_MS; 507 tex->op = nir_texop_txf_ms; 508 tex->src[0].src_type = nir_tex_src_coord; 509 tex->src[0].src = nir_src_for_ssa(img_coord); 510 tex->src[1].src_type = nir_tex_src_ms_index; 511 tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, 0)); 512 tex->dest_type = nir_type_float; 513 tex->is_array = false; 514 tex->coord_components = 2; 515 tex->texture = nir_deref_var_create(tex, input_img); 516 tex->sampler = NULL; 517 518 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); 519 nir_builder_instr_insert(b, &tex->instr); 520 521 tmp = &tex->dest.ssa; 522 523 if (!is_integer && samples > 1) { 524 nir_tex_instr *tex_all_same = nir_tex_instr_create(b->shader, 1); 525 tex_all_same->sampler_dim = GLSL_SAMPLER_DIM_MS; 526 tex_all_same->op = nir_texop_samples_identical; 527 tex_all_same->src[0].src_type = nir_tex_src_coord; 528 tex_all_same->src[0].src = nir_src_for_ssa(img_coord); 529 tex_all_same->dest_type = nir_type_float; 530 tex_all_same->is_array = false; 531 tex_all_same->coord_components = 2; 532 tex_all_same->texture = nir_deref_var_create(tex_all_same, input_img); 533 tex_all_same->sampler = NULL; 534 535 nir_ssa_dest_init(&tex_all_same->instr, &tex_all_same->dest, 1, 32, "tex"); 536 nir_builder_instr_insert(b, &tex_all_same->instr); 537 538 nir_ssa_def *all_same = nir_ieq(b, &tex_all_same->dest.ssa, nir_imm_int(b, 0)); 539 nir_if *if_stmt = nir_if_create(b->shader); 540 if_stmt->condition = nir_src_for_ssa(all_same); 541 nir_cf_node_insert(b->cursor, &if_stmt->cf_node); 542 543 b->cursor = nir_after_cf_list(&if_stmt->then_list); 544 for (int i = 1; i < samples; i++) { 545 nir_tex_instr *tex_add = nir_tex_instr_create(b->shader, 2); 546 tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS; 547 tex_add->op = nir_texop_txf_ms; 548 tex_add->src[0].src_type = nir_tex_src_coord; 549 tex_add->src[0].src = nir_src_for_ssa(img_coord); 550 tex_add->src[1].src_type = nir_tex_src_ms_index; 551 tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(b, i)); 552 tex_add->dest_type = nir_type_float; 553 tex_add->is_array = false; 554 tex_add->coord_components = 2; 555 tex_add->texture = nir_deref_var_create(tex_add, input_img); 556 tex_add->sampler = NULL; 557 558 nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex"); 559 nir_builder_instr_insert(b, &tex_add->instr); 560 561 tmp = nir_fadd(b, tmp, &tex_add->dest.ssa); 562 } 563 564 tmp = nir_fdiv(b, tmp, nir_imm_float(b, samples)); 565 nir_store_var(b, color, tmp, 0xf); 566 b->cursor = nir_after_cf_list(&if_stmt->else_list); 567 outer_if = if_stmt; 568 } 569 nir_store_var(b, color, &tex->dest.ssa, 0xf); 570 571 if (outer_if) 572 b->cursor = nir_after_cf_node(&outer_if->cf_node); 573 } 574