1 /**************************************************************************** 2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 ***************************************************************************/ 23 24 #include "swr_screen.h" 25 #include "swr_context.h" 26 #include "swr_resource.h" 27 #include "swr_fence.h" 28 #include "swr_query.h" 29 #include "jit_api.h" 30 31 #include "util/u_draw.h" 32 #include "util/u_prim.h" 33 34 /* 35 * Draw vertex arrays, with optional indexing, optional instancing. 36 */ 37 static void 38 swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) 39 { 40 struct swr_context *ctx = swr_context(pipe); 41 42 if (!info->count_from_stream_output && !info->indirect && 43 !info->primitive_restart && 44 !u_trim_pipe_prim(info->mode, (unsigned*)&info->count)) 45 return; 46 47 if (!swr_check_render_cond(pipe)) 48 return; 49 50 if (info->indirect) { 51 util_draw_indirect(pipe, info); 52 return; 53 } 54 55 /* If indexed draw, force vertex validation since index buffer comes 56 * from draw info. */ 57 if (info->index_size) 58 ctx->dirty |= SWR_NEW_VERTEX; 59 60 /* Update derived state, pass draw info to update function. */ 61 swr_update_derived(pipe, info); 62 63 swr_update_draw_context(ctx); 64 65 if (ctx->vs->pipe.stream_output.num_outputs) { 66 if (!ctx->vs->soFunc[info->mode]) { 67 STREAMOUT_COMPILE_STATE state = {0}; 68 struct pipe_stream_output_info *so = &ctx->vs->pipe.stream_output; 69 70 state.numVertsPerPrim = u_vertices_per_prim(info->mode); 71 72 uint32_t offsets[MAX_SO_STREAMS] = {0}; 73 uint32_t num = 0; 74 75 for (uint32_t i = 0; i < so->num_outputs; i++) { 76 assert(so->output[i].stream == 0); // @todo 77 uint32_t output_buffer = so->output[i].output_buffer; 78 if (so->output[i].dst_offset != offsets[output_buffer]) { 79 // hole - need to fill 80 state.stream.decl[num].bufferIndex = output_buffer; 81 state.stream.decl[num].hole = true; 82 state.stream.decl[num].componentMask = 83 (1 << (so->output[i].dst_offset - offsets[output_buffer])) 84 - 1; 85 num++; 86 offsets[output_buffer] = so->output[i].dst_offset; 87 } 88 89 unsigned attrib_slot = so->output[i].register_index; 90 attrib_slot = swr_so_adjust_attrib(attrib_slot, ctx->vs); 91 92 state.stream.decl[num].bufferIndex = output_buffer; 93 state.stream.decl[num].attribSlot = attrib_slot; 94 state.stream.decl[num].componentMask = 95 ((1 << so->output[i].num_components) - 1) 96 << so->output[i].start_component; 97 state.stream.decl[num].hole = false; 98 num++; 99 100 offsets[output_buffer] += so->output[i].num_components; 101 } 102 103 state.stream.numDecls = num; 104 105 HANDLE hJitMgr = swr_screen(pipe->screen)->hJitMgr; 106 ctx->vs->soFunc[info->mode] = JitCompileStreamout(hJitMgr, state); 107 debug_printf("so shader %p\n", ctx->vs->soFunc[info->mode]); 108 assert(ctx->vs->soFunc[info->mode] && "Error: SoShader = NULL"); 109 } 110 111 ctx->api.pfnSwrSetSoFunc(ctx->swrContext, ctx->vs->soFunc[info->mode], 0); 112 } 113 114 struct swr_vertex_element_state *velems = ctx->velems; 115 if (info->primitive_restart) 116 velems->fsState.cutIndex = info->restart_index; 117 else 118 velems->fsState.cutIndex = 0; 119 velems->fsState.bEnableCutIndex = info->primitive_restart; 120 velems->fsState.bPartialVertexBuffer = (info->min_index > 0); 121 122 swr_jit_fetch_key key; 123 swr_generate_fetch_key(key, velems); 124 auto search = velems->map.find(key); 125 if (search != velems->map.end()) { 126 velems->fsFunc = search->second; 127 } else { 128 HANDLE hJitMgr = swr_screen(ctx->pipe.screen)->hJitMgr; 129 velems->fsFunc = JitCompileFetch(hJitMgr, velems->fsState); 130 131 debug_printf("fetch shader %p\n", velems->fsFunc); 132 assert(velems->fsFunc && "Error: FetchShader = NULL"); 133 134 velems->map.insert(std::make_pair(key, velems->fsFunc)); 135 } 136 137 ctx->api.pfnSwrSetFetchFunc(ctx->swrContext, velems->fsFunc); 138 139 /* Set up frontend state 140 * XXX setup provokingVertex & topologyProvokingVertex */ 141 SWR_FRONTEND_STATE feState = {0}; 142 143 // feState.vsVertexSize seeds the PA size that is used as an interface 144 // between all the shader stages, so it has to be large enough to 145 // incorporate all interfaces between stages 146 147 // max of gs and vs num_outputs 148 feState.vsVertexSize = ctx->vs->info.base.num_outputs; 149 if (ctx->gs && 150 ctx->gs->info.base.num_outputs > feState.vsVertexSize) { 151 feState.vsVertexSize = ctx->gs->info.base.num_outputs; 152 } 153 154 if (ctx->vs->info.base.num_outputs) { 155 // gs does not adjust for position in SGV slot at input from vs 156 if (!ctx->gs) 157 feState.vsVertexSize--; 158 } 159 160 // other (non-SGV) slots start at VERTEX_ATTRIB_START_SLOT 161 feState.vsVertexSize += VERTEX_ATTRIB_START_SLOT; 162 163 // The PA in the clipper does not handle BE vertex sizes 164 // different from FE. Increase vertexsize only for the cases that needed it 165 166 // primid needs a slot 167 if (ctx->fs->info.base.uses_primid) 168 feState.vsVertexSize++; 169 // sprite coord enable 170 if (ctx->rasterizer->sprite_coord_enable) 171 feState.vsVertexSize++; 172 173 174 if (ctx->rasterizer->flatshade_first) { 175 feState.provokingVertex = {1, 0, 0}; 176 } else { 177 feState.provokingVertex = {2, 1, 2}; 178 } 179 180 enum pipe_prim_type topology; 181 if (ctx->gs) 182 topology = (pipe_prim_type)ctx->gs->info.base.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]; 183 else 184 topology = info->mode; 185 186 switch (topology) { 187 case PIPE_PRIM_TRIANGLE_FAN: 188 feState.topologyProvokingVertex = feState.provokingVertex.triFan; 189 break; 190 case PIPE_PRIM_TRIANGLE_STRIP: 191 case PIPE_PRIM_TRIANGLES: 192 feState.topologyProvokingVertex = feState.provokingVertex.triStripList; 193 break; 194 case PIPE_PRIM_QUAD_STRIP: 195 case PIPE_PRIM_QUADS: 196 if (ctx->rasterizer->flatshade_first) 197 feState.topologyProvokingVertex = 0; 198 else 199 feState.topologyProvokingVertex = 3; 200 break; 201 case PIPE_PRIM_LINES: 202 case PIPE_PRIM_LINE_LOOP: 203 case PIPE_PRIM_LINE_STRIP: 204 feState.topologyProvokingVertex = feState.provokingVertex.lineStripList; 205 break; 206 default: 207 feState.topologyProvokingVertex = 0; 208 } 209 210 feState.bEnableCutIndex = info->primitive_restart; 211 ctx->api.pfnSwrSetFrontendState(ctx->swrContext, &feState); 212 213 if (info->index_size) 214 ctx->api.pfnSwrDrawIndexedInstanced(ctx->swrContext, 215 swr_convert_prim_topology(info->mode), 216 info->count, 217 info->instance_count, 218 info->start, 219 info->index_bias, 220 info->start_instance); 221 else 222 ctx->api.pfnSwrDrawInstanced(ctx->swrContext, 223 swr_convert_prim_topology(info->mode), 224 info->count, 225 info->instance_count, 226 info->start, 227 info->start_instance); 228 229 /* On large client-buffer draw, we used client buffer directly, without 230 * copy. Block until draw is finished. 231 * VMD is an example application that benefits from this. */ 232 if (ctx->dirty & SWR_LARGE_CLIENT_DRAW) { 233 struct swr_screen *screen = swr_screen(pipe->screen); 234 swr_fence_submit(ctx, screen->flush_fence); 235 swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0); 236 } 237 } 238 239 240 static void 241 swr_flush(struct pipe_context *pipe, 242 struct pipe_fence_handle **fence, 243 unsigned flags) 244 { 245 struct swr_context *ctx = swr_context(pipe); 246 struct swr_screen *screen = swr_screen(pipe->screen); 247 248 for (int i=0; i < ctx->framebuffer.nr_cbufs; i++) { 249 struct pipe_surface *cb = ctx->framebuffer.cbufs[i]; 250 if (cb) { 251 swr_store_dirty_resource(pipe, cb->texture, SWR_TILE_RESOLVED); 252 } 253 } 254 if (ctx->framebuffer.zsbuf) { 255 swr_store_dirty_resource(pipe, ctx->framebuffer.zsbuf->texture, 256 SWR_TILE_RESOLVED); 257 } 258 259 if (fence) 260 swr_fence_reference(pipe->screen, fence, screen->flush_fence); 261 } 262 263 void 264 swr_finish(struct pipe_context *pipe) 265 { 266 struct pipe_fence_handle *fence = nullptr; 267 268 swr_flush(pipe, &fence, 0); 269 swr_fence_finish(pipe->screen, NULL, fence, 0); 270 swr_fence_reference(pipe->screen, &fence, NULL); 271 } 272 273 /* 274 * Invalidate tiles so they can be reloaded back when needed 275 */ 276 void 277 swr_invalidate_render_target(struct pipe_context *pipe, 278 uint32_t attachment, 279 uint16_t width, uint16_t height) 280 { 281 struct swr_context *ctx = swr_context(pipe); 282 283 /* grab the rect from the passed in arguments */ 284 swr_update_draw_context(ctx); 285 SWR_RECT full_rect = 286 {0, 0, (int32_t)width, (int32_t)height}; 287 ctx->api.pfnSwrInvalidateTiles(ctx->swrContext, 288 1 << attachment, 289 full_rect); 290 } 291 292 293 /* 294 * Store SWR HotTiles back to renderTarget surface. 295 */ 296 void 297 swr_store_render_target(struct pipe_context *pipe, 298 uint32_t attachment, 299 enum SWR_TILE_STATE post_tile_state) 300 { 301 struct swr_context *ctx = swr_context(pipe); 302 struct swr_draw_context *pDC = &ctx->swrDC; 303 struct SWR_SURFACE_STATE *renderTarget = &pDC->renderTargets[attachment]; 304 305 /* Only proceed if there's a valid surface to store to */ 306 if (renderTarget->xpBaseAddress) { 307 swr_update_draw_context(ctx); 308 SWR_RECT full_rect = 309 {0, 0, 310 (int32_t)u_minify(renderTarget->width, renderTarget->lod), 311 (int32_t)u_minify(renderTarget->height, renderTarget->lod)}; 312 ctx->api.pfnSwrStoreTiles(ctx->swrContext, 313 1 << attachment, 314 post_tile_state, 315 full_rect); 316 } 317 } 318 319 void 320 swr_store_dirty_resource(struct pipe_context *pipe, 321 struct pipe_resource *resource, 322 enum SWR_TILE_STATE post_tile_state) 323 { 324 /* Only store resource if it has been written to */ 325 if (swr_resource(resource)->status & SWR_RESOURCE_WRITE) { 326 struct swr_context *ctx = swr_context(pipe); 327 struct swr_screen *screen = swr_screen(pipe->screen); 328 struct swr_resource *spr = swr_resource(resource); 329 330 swr_draw_context *pDC = &ctx->swrDC; 331 SWR_SURFACE_STATE *renderTargets = pDC->renderTargets; 332 for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++) 333 if (renderTargets[i].xpBaseAddress == spr->swr.xpBaseAddress || 334 (spr->secondary.xpBaseAddress && 335 renderTargets[i].xpBaseAddress == spr->secondary.xpBaseAddress)) { 336 swr_store_render_target(pipe, i, post_tile_state); 337 338 /* Mesa thinks depth/stencil are fused, so we'll never get an 339 * explicit resource for stencil. So, if checking depth, then 340 * also check for stencil. */ 341 if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH)) { 342 swr_store_render_target( 343 pipe, SWR_ATTACHMENT_STENCIL, post_tile_state); 344 } 345 346 /* This fence signals StoreTiles completion */ 347 swr_fence_submit(ctx, screen->flush_fence); 348 349 break; 350 } 351 } 352 } 353 354 void 355 swr_draw_init(struct pipe_context *pipe) 356 { 357 pipe->draw_vbo = swr_draw_vbo; 358 pipe->flush = swr_flush; 359 } 360