1 /************************************************************************** 2 3 Copyright (C) 2004 Nicolai Haehnle. 4 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. 5 6 The Weather Channel (TM) funded Tungsten Graphics to develop the 7 initial release of the Radeon 8500 driver under the XFree86 license. 8 This notice must be preserved. 9 10 All Rights Reserved. 11 12 Permission is hereby granted, free of charge, to any person obtaining a 13 copy of this software and associated documentation files (the "Software"), 14 to deal in the Software without restriction, including without limitation 15 on the rights to use, copy, modify, merge, publish, distribute, sub 16 license, and/or sell copies of the Software, and to permit persons to whom 17 the Software is furnished to do so, subject to the following conditions: 18 19 The above copyright notice and this permission notice (including the next 20 paragraph) shall be included in all copies or substantial portions of the 21 Software. 22 23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 29 USE OR OTHER DEALINGS IN THE SOFTWARE. 30 31 **************************************************************************/ 32 33 #include <errno.h> 34 #include "radeon_common.h" 35 #include "radeon_fog.h" 36 #include "main/simple_list.h" 37 38 #if defined(USE_X86_ASM) 39 #define COPY_DWORDS( dst, src, nr ) \ 40 do { \ 41 int __tmp; \ 42 __asm__ __volatile__( "rep ; movsl" \ 43 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \ 44 : "0" (nr), \ 45 "D" ((long)dst), \ 46 "S" ((long)src) ); \ 47 } while (0) 48 #else 49 #define COPY_DWORDS( dst, src, nr ) \ 50 do { \ 51 int j; \ 52 for ( j = 0 ; j < nr ; j++ ) \ 53 dst[j] = ((int *)src)[j]; \ 54 dst += nr; \ 55 } while (0) 56 #endif 57 58 void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count) 59 { 60 int i; 61 62 if (RADEON_DEBUG & RADEON_VERTS) 63 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 64 __FUNCTION__, count, stride, (void *)out, (void *)data); 65 66 if (stride == 4) 67 COPY_DWORDS(out, data, count); 68 else 69 for (i = 0; i < count; i++) { 70 out[0] = *(int *)data; 71 out++; 72 data += stride; 73 } 74 } 75 76 void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count) 77 { 78 int i; 79 80 if (RADEON_DEBUG & RADEON_VERTS) 81 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 82 __FUNCTION__, count, stride, (void *)out, (void *)data); 83 84 if (stride == 8) 85 COPY_DWORDS(out, data, count * 2); 86 else 87 for (i = 0; i < count; i++) { 88 out[0] = *(int *)data; 89 out[1] = *(int *)(data + 4); 90 out += 2; 91 data += stride; 92 } 93 } 94 95 void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count) 96 { 97 int i; 98 99 if (RADEON_DEBUG & RADEON_VERTS) 100 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 101 __FUNCTION__, count, stride, (void *)out, (void *)data); 102 103 if (stride == 12) { 104 COPY_DWORDS(out, data, count * 3); 105 } 106 else 107 for (i = 0; i < count; i++) { 108 out[0] = *(int *)data; 109 out[1] = *(int *)(data + 4); 110 out[2] = *(int *)(data + 8); 111 out += 3; 112 data += stride; 113 } 114 } 115 116 void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count) 117 { 118 int i; 119 120 if (RADEON_DEBUG & RADEON_VERTS) 121 fprintf(stderr, "%s count %d stride %d out %p data %p\n", 122 __FUNCTION__, count, stride, (void *)out, (void *)data); 123 124 if (stride == 16) 125 COPY_DWORDS(out, data, count * 4); 126 else 127 for (i = 0; i < count; i++) { 128 out[0] = *(int *)data; 129 out[1] = *(int *)(data + 4); 130 out[2] = *(int *)(data + 8); 131 out[3] = *(int *)(data + 12); 132 out += 4; 133 data += stride; 134 } 135 } 136 137 void rcommon_emit_vector(struct gl_context * ctx, struct radeon_aos *aos, 138 const GLvoid * data, int size, int stride, int count) 139 { 140 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 141 uint32_t *out; 142 143 if (stride == 0) { 144 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32); 145 count = 1; 146 aos->stride = 0; 147 } else { 148 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32); 149 aos->stride = size; 150 } 151 152 aos->components = size; 153 aos->count = count; 154 155 radeon_bo_map(aos->bo, 1); 156 out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); 157 switch (size) { 158 case 1: radeonEmitVec4(out, data, stride, count); break; 159 case 2: radeonEmitVec8(out, data, stride, count); break; 160 case 3: radeonEmitVec12(out, data, stride, count); break; 161 case 4: radeonEmitVec16(out, data, stride, count); break; 162 default: 163 assert(0); 164 break; 165 } 166 radeon_bo_unmap(aos->bo); 167 } 168 169 void rcommon_emit_vecfog(struct gl_context *ctx, struct radeon_aos *aos, 170 GLvoid *data, int stride, int count) 171 { 172 int i; 173 float *out; 174 int size = 1; 175 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 176 177 if (RADEON_DEBUG & RADEON_VERTS) 178 fprintf(stderr, "%s count %d stride %d\n", 179 __FUNCTION__, count, stride); 180 181 if (stride == 0) { 182 radeonAllocDmaRegion( rmesa, &aos->bo, &aos->offset, size * 4, 32 ); 183 count = 1; 184 aos->stride = 0; 185 } else { 186 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32); 187 aos->stride = size; 188 } 189 190 aos->components = size; 191 aos->count = count; 192 193 /* Emit the data */ 194 radeon_bo_map(aos->bo, 1); 195 out = (float*)((char*)aos->bo->ptr + aos->offset); 196 for (i = 0; i < count; i++) { 197 out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data ); 198 out++; 199 data += stride; 200 } 201 radeon_bo_unmap(aos->bo); 202 } 203 204 void radeon_init_dma(radeonContextPtr rmesa) 205 { 206 make_empty_list(&rmesa->dma.free); 207 make_empty_list(&rmesa->dma.wait); 208 make_empty_list(&rmesa->dma.reserved); 209 rmesa->dma.minimum_size = MAX_DMA_BUF_SZ; 210 } 211 212 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) 213 { 214 struct radeon_dma_bo *dma_bo = NULL; 215 /* we set minimum sizes to at least requested size 216 aligned to next 16 bytes. */ 217 if (size > rmesa->dma.minimum_size) 218 rmesa->dma.minimum_size = (size + 15) & (~15); 219 220 radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %Zi\n", 221 __FUNCTION__, size, rmesa->dma.minimum_size); 222 223 if (is_empty_list(&rmesa->dma.free) 224 || last_elem(&rmesa->dma.free)->bo->size < size) { 225 dma_bo = CALLOC_STRUCT(radeon_dma_bo); 226 assert(dma_bo); 227 228 again_alloc: 229 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom, 230 0, rmesa->dma.minimum_size, 4, 231 RADEON_GEM_DOMAIN_GTT, 0); 232 233 if (!dma_bo->bo) { 234 rcommonFlushCmdBuf(rmesa, __FUNCTION__); 235 goto again_alloc; 236 } 237 insert_at_head(&rmesa->dma.reserved, dma_bo); 238 } else { 239 /* We push and pop buffers from end of list so we can keep 240 counter on unused buffers for later freeing them from 241 begin of list */ 242 dma_bo = last_elem(&rmesa->dma.free); 243 remove_from_list(dma_bo); 244 insert_at_head(&rmesa->dma.reserved, dma_bo); 245 } 246 247 rmesa->dma.current_used = 0; 248 rmesa->dma.current_vertexptr = 0; 249 250 if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs, 251 first_elem(&rmesa->dma.reserved)->bo, 252 RADEON_GEM_DOMAIN_GTT, 0)) 253 fprintf(stderr,"failure to revalidate BOs - badness\n"); 254 255 if (is_empty_list(&rmesa->dma.reserved)) { 256 /* Cmd buff have been flushed in radeon_revalidate_bos */ 257 goto again_alloc; 258 } 259 radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1); 260 } 261 262 /* Allocates a region from rmesa->dma.current. If there isn't enough 263 * space in current, grab a new buffer (and discard what was left of current) 264 */ 265 void radeonAllocDmaRegion(radeonContextPtr rmesa, 266 struct radeon_bo **pbo, int *poffset, 267 int bytes, int alignment) 268 { 269 if (RADEON_DEBUG & RADEON_IOCTL) 270 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); 271 272 if (rmesa->dma.flush) 273 rmesa->dma.flush(rmesa->glCtx); 274 275 assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr); 276 277 alignment--; 278 rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment; 279 280 if (is_empty_list(&rmesa->dma.reserved) 281 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size) 282 radeonRefillCurrentDmaRegion(rmesa, bytes); 283 284 *poffset = rmesa->dma.current_used; 285 *pbo = first_elem(&rmesa->dma.reserved)->bo; 286 radeon_bo_ref(*pbo); 287 288 /* Always align to at least 16 bytes */ 289 rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15; 290 rmesa->dma.current_vertexptr = rmesa->dma.current_used; 291 292 assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size); 293 } 294 295 void radeonFreeDmaRegions(radeonContextPtr rmesa) 296 { 297 struct radeon_dma_bo *dma_bo; 298 struct radeon_dma_bo *temp; 299 if (RADEON_DEBUG & RADEON_DMA) 300 fprintf(stderr, "%s\n", __FUNCTION__); 301 302 foreach_s(dma_bo, temp, &rmesa->dma.free) { 303 remove_from_list(dma_bo); 304 radeon_bo_unref(dma_bo->bo); 305 FREE(dma_bo); 306 } 307 308 foreach_s(dma_bo, temp, &rmesa->dma.wait) { 309 remove_from_list(dma_bo); 310 radeon_bo_unref(dma_bo->bo); 311 FREE(dma_bo); 312 } 313 314 foreach_s(dma_bo, temp, &rmesa->dma.reserved) { 315 remove_from_list(dma_bo); 316 radeon_bo_unref(dma_bo->bo); 317 FREE(dma_bo); 318 } 319 } 320 321 void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes) 322 { 323 if (is_empty_list(&rmesa->dma.reserved)) 324 return; 325 326 if (RADEON_DEBUG & RADEON_IOCTL) 327 fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes); 328 rmesa->dma.current_used -= return_bytes; 329 rmesa->dma.current_vertexptr = rmesa->dma.current_used; 330 } 331 332 static int radeon_bo_is_idle(struct radeon_bo* bo) 333 { 334 uint32_t domain; 335 int ret = radeon_bo_is_busy(bo, &domain); 336 if (ret == -EINVAL) { 337 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n" 338 "This may cause small performance drop for you.\n"); 339 } 340 return ret != -EBUSY; 341 } 342 343 void radeonReleaseDmaRegions(radeonContextPtr rmesa) 344 { 345 struct radeon_dma_bo *dma_bo; 346 struct radeon_dma_bo *temp; 347 const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME; 348 const int time = rmesa->dma.free.expire_counter; 349 350 if (RADEON_DEBUG & RADEON_DMA) { 351 size_t free = 0, 352 wait = 0, 353 reserved = 0; 354 foreach(dma_bo, &rmesa->dma.free) 355 ++free; 356 357 foreach(dma_bo, &rmesa->dma.wait) 358 ++wait; 359 360 foreach(dma_bo, &rmesa->dma.reserved) 361 ++reserved; 362 363 fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n", 364 __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size); 365 } 366 367 /* move waiting bos to free list. 368 wait list provides gpu time to handle data before reuse */ 369 foreach_s(dma_bo, temp, &rmesa->dma.wait) { 370 if (dma_bo->expire_counter == time) { 371 WARN_ONCE("Leaking dma buffer object!\n"); 372 radeon_bo_unref(dma_bo->bo); 373 remove_from_list(dma_bo); 374 FREE(dma_bo); 375 continue; 376 } 377 /* free objects that are too small to be used because of large request */ 378 if (dma_bo->bo->size < rmesa->dma.minimum_size) { 379 radeon_bo_unref(dma_bo->bo); 380 remove_from_list(dma_bo); 381 FREE(dma_bo); 382 continue; 383 } 384 if (!radeon_bo_is_idle(dma_bo->bo)) { 385 break; 386 } 387 remove_from_list(dma_bo); 388 dma_bo->expire_counter = expire_at; 389 insert_at_tail(&rmesa->dma.free, dma_bo); 390 } 391 392 /* move reserved to wait list */ 393 foreach_s(dma_bo, temp, &rmesa->dma.reserved) { 394 radeon_bo_unmap(dma_bo->bo); 395 /* free objects that are too small to be used because of large request */ 396 if (dma_bo->bo->size < rmesa->dma.minimum_size) { 397 radeon_bo_unref(dma_bo->bo); 398 remove_from_list(dma_bo); 399 FREE(dma_bo); 400 continue; 401 } 402 remove_from_list(dma_bo); 403 dma_bo->expire_counter = expire_at; 404 insert_at_tail(&rmesa->dma.wait, dma_bo); 405 } 406 407 /* free bos that have been unused for some time */ 408 foreach_s(dma_bo, temp, &rmesa->dma.free) { 409 if (dma_bo->expire_counter != time) 410 break; 411 remove_from_list(dma_bo); 412 radeon_bo_unref(dma_bo->bo); 413 FREE(dma_bo); 414 } 415 416 } 417 418 419 /* Flush vertices in the current dma region. 420 */ 421 void rcommon_flush_last_swtcl_prim( struct gl_context *ctx ) 422 { 423 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 424 struct radeon_dma *dma = &rmesa->dma; 425 426 if (RADEON_DEBUG & RADEON_IOCTL) 427 fprintf(stderr, "%s\n", __FUNCTION__); 428 dma->flush = NULL; 429 430 radeon_bo_unmap(rmesa->swtcl.bo); 431 432 if (!is_empty_list(&dma->reserved)) { 433 GLuint current_offset = dma->current_used; 434 435 assert (dma->current_used + 436 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == 437 dma->current_vertexptr); 438 439 if (dma->current_used != dma->current_vertexptr) { 440 dma->current_used = dma->current_vertexptr; 441 442 rmesa->vtbl.swtcl_flush(ctx, current_offset); 443 } 444 rmesa->swtcl.numverts = 0; 445 } 446 radeon_bo_unref(rmesa->swtcl.bo); 447 rmesa->swtcl.bo = NULL; 448 } 449 /* Alloc space in the current dma region. 450 */ 451 void * 452 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize ) 453 { 454 GLuint bytes = vsize * nverts; 455 void *head; 456 if (RADEON_DEBUG & RADEON_IOCTL) 457 fprintf(stderr, "%s\n", __FUNCTION__); 458 459 if(is_empty_list(&rmesa->dma.reserved) 460 ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) { 461 if (rmesa->dma.flush) { 462 rmesa->dma.flush(rmesa->glCtx); 463 } 464 465 radeonRefillCurrentDmaRegion(rmesa, bytes); 466 467 return NULL; 468 } 469 470 if (!rmesa->dma.flush) { 471 /* if cmdbuf flushed DMA restart */ 472 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; 473 rmesa->dma.flush = rcommon_flush_last_swtcl_prim; 474 } 475 476 ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); 477 ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim ); 478 ASSERT( rmesa->dma.current_used + 479 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == 480 rmesa->dma.current_vertexptr ); 481 482 if (!rmesa->swtcl.bo) { 483 rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo; 484 radeon_bo_ref(rmesa->swtcl.bo); 485 radeon_bo_map(rmesa->swtcl.bo, 1); 486 } 487 488 head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr); 489 rmesa->dma.current_vertexptr += bytes; 490 rmesa->swtcl.numverts += nverts; 491 return head; 492 } 493 494 void radeonReleaseArrays( struct gl_context *ctx, GLuint newinputs ) 495 { 496 radeonContextPtr radeon = RADEON_CONTEXT( ctx ); 497 int i; 498 if (RADEON_DEBUG & RADEON_IOCTL) 499 fprintf(stderr, "%s\n", __FUNCTION__); 500 501 if (radeon->dma.flush) { 502 radeon->dma.flush(radeon->glCtx); 503 } 504 for (i = 0; i < radeon->tcl.aos_count; i++) { 505 if (radeon->tcl.aos[i].bo) { 506 radeon_bo_unref(radeon->tcl.aos[i].bo); 507 radeon->tcl.aos[i].bo = NULL; 508 509 } 510 } 511 } 512