Home | History | Annotate | Download | only in radeon
      1 /**************************************************************************
      2 
      3 Copyright (C) 2004 Nicolai Haehnle.
      4 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
      5 
      6 The Weather Channel (TM) funded Tungsten Graphics to develop the
      7 initial release of the Radeon 8500 driver under the XFree86 license.
      8 This notice must be preserved.
      9 
     10 All Rights Reserved.
     11 
     12 Permission is hereby granted, free of charge, to any person obtaining a
     13 copy of this software and associated documentation files (the "Software"),
     14 to deal in the Software without restriction, including without limitation
     15 on the rights to use, copy, modify, merge, publish, distribute, sub
     16 license, and/or sell copies of the Software, and to permit persons to whom
     17 the Software is furnished to do so, subject to the following conditions:
     18 
     19 The above copyright notice and this permission notice (including the next
     20 paragraph) shall be included in all copies or substantial portions of the
     21 Software.
     22 
     23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     29 USE OR OTHER DEALINGS IN THE SOFTWARE.
     30 
     31 **************************************************************************/
     32 
     33 #include <errno.h>
     34 #include "radeon_common.h"
     35 #include "radeon_fog.h"
     36 #include "util/simple_list.h"
     37 
     38 #if defined(USE_X86_ASM)
     39 #define COPY_DWORDS( dst, src, nr )					\
     40 do {									\
     41 	int __tmp;							\
     42 	__asm__ __volatile__( "rep ; movsl"				\
     43 			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
     44 			      : "0" (nr),				\
     45 			        "D" ((long)dst),			\
     46 			        "S" ((long)src) );			\
     47 } while (0)
     48 #else
     49 #define COPY_DWORDS( dst, src, nr )		\
     50 do {						\
     51    int j;					\
     52    for ( j = 0 ; j < nr ; j++ )			\
     53       dst[j] = ((int *)src)[j];			\
     54    dst += nr;					\
     55 } while (0)
     56 #endif
     57 
     58 void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
     59 {
     60 	int i;
     61 
     62 	if (RADEON_DEBUG & RADEON_VERTS)
     63 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
     64 			__func__, count, stride, (void *)out, (void *)data);
     65 
     66 	if (stride == 4)
     67 		COPY_DWORDS(out, data, count);
     68 	else
     69 		for (i = 0; i < count; i++) {
     70 			out[0] = *(int *)data;
     71 			out++;
     72 			data += stride;
     73 		}
     74 }
     75 
     76 void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
     77 {
     78 	int i;
     79 
     80 	if (RADEON_DEBUG & RADEON_VERTS)
     81 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
     82 			__func__, count, stride, (void *)out, (void *)data);
     83 
     84 	if (stride == 8)
     85 		COPY_DWORDS(out, data, count * 2);
     86 	else
     87 		for (i = 0; i < count; i++) {
     88 			out[0] = *(int *)data;
     89 			out[1] = *(int *)(data + 4);
     90 			out += 2;
     91 			data += stride;
     92 		}
     93 }
     94 
     95 void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
     96 {
     97 	int i;
     98 
     99 	if (RADEON_DEBUG & RADEON_VERTS)
    100 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
    101 			__func__, count, stride, (void *)out, (void *)data);
    102 
    103 	if (stride == 12) {
    104 		COPY_DWORDS(out, data, count * 3);
    105     }
    106 	else
    107 		for (i = 0; i < count; i++) {
    108 			out[0] = *(int *)data;
    109 			out[1] = *(int *)(data + 4);
    110 			out[2] = *(int *)(data + 8);
    111 			out += 3;
    112 			data += stride;
    113 		}
    114 }
    115 
    116 void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
    117 {
    118 	int i;
    119 
    120 	if (RADEON_DEBUG & RADEON_VERTS)
    121 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
    122 			__func__, count, stride, (void *)out, (void *)data);
    123 
    124 	if (stride == 16)
    125 		COPY_DWORDS(out, data, count * 4);
    126 	else
    127 		for (i = 0; i < count; i++) {
    128 			out[0] = *(int *)data;
    129 			out[1] = *(int *)(data + 4);
    130 			out[2] = *(int *)(data + 8);
    131 			out[3] = *(int *)(data + 12);
    132 			out += 4;
    133 			data += stride;
    134 		}
    135 }
    136 
    137 void rcommon_emit_vector(struct gl_context * ctx, struct radeon_aos *aos,
    138 			 const GLvoid * data, int size, int stride, int count)
    139 {
    140 	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    141 	uint32_t *out;
    142 
    143 	if (stride == 0) {
    144 		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
    145 		count = 1;
    146 		aos->stride = 0;
    147 	} else {
    148 		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
    149 		aos->stride = size;
    150 	}
    151 
    152 	aos->components = size;
    153 	aos->count = count;
    154 
    155 	radeon_bo_map(aos->bo, 1);
    156 	out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
    157 	switch (size) {
    158 	case 1: radeonEmitVec4(out, data, stride, count); break;
    159 	case 2: radeonEmitVec8(out, data, stride, count); break;
    160 	case 3: radeonEmitVec12(out, data, stride, count); break;
    161 	case 4: radeonEmitVec16(out, data, stride, count); break;
    162 	default:
    163 		assert(0);
    164 		break;
    165 	}
    166 	radeon_bo_unmap(aos->bo);
    167 }
    168 
    169 void rcommon_emit_vecfog(struct gl_context *ctx, struct radeon_aos *aos,
    170 			 GLvoid *data, int stride, int count)
    171 {
    172 	int i;
    173 	float *out;
    174 	int size = 1;
    175 	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    176 
    177 	if (RADEON_DEBUG & RADEON_VERTS)
    178 		fprintf(stderr, "%s count %d stride %d\n",
    179 			__func__, count, stride);
    180 
    181 	if (stride == 0) {
    182 		radeonAllocDmaRegion( rmesa, &aos->bo, &aos->offset, size * 4, 32 );
    183 		count = 1;
    184 		aos->stride = 0;
    185 	} else {
    186 		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
    187 		aos->stride = size;
    188 	}
    189 
    190 	aos->components = size;
    191 	aos->count = count;
    192 
    193 	/* Emit the data */
    194 	radeon_bo_map(aos->bo, 1);
    195 	out = (float*)((char*)aos->bo->ptr + aos->offset);
    196 	for (i = 0; i < count; i++) {
    197 		out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data );
    198 		out++;
    199 		data += stride;
    200 	}
    201 	radeon_bo_unmap(aos->bo);
    202 }
    203 
    204 void radeon_init_dma(radeonContextPtr rmesa)
    205 {
    206 	make_empty_list(&rmesa->dma.free);
    207 	make_empty_list(&rmesa->dma.wait);
    208 	make_empty_list(&rmesa->dma.reserved);
    209 	rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
    210 }
    211 
    212 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
    213 {
    214 	struct radeon_dma_bo *dma_bo = NULL;
    215 	/* we set minimum sizes to at least requested size
    216 	   aligned to next 16 bytes. */
    217 	if (size > rmesa->dma.minimum_size)
    218 		rmesa->dma.minimum_size = (size + 15) & (~15);
    219 
    220 	radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %Zi\n",
    221 			__func__, size, rmesa->dma.minimum_size);
    222 
    223 	if (is_empty_list(&rmesa->dma.free)
    224 	      || last_elem(&rmesa->dma.free)->bo->size < size) {
    225 		dma_bo = CALLOC_STRUCT(radeon_dma_bo);
    226 		assert(dma_bo);
    227 
    228 again_alloc:
    229 		dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
    230 					    0, rmesa->dma.minimum_size, 4,
    231 					    RADEON_GEM_DOMAIN_GTT, 0);
    232 
    233 		if (!dma_bo->bo) {
    234 			rcommonFlushCmdBuf(rmesa, __func__);
    235 			goto again_alloc;
    236 		}
    237 		insert_at_head(&rmesa->dma.reserved, dma_bo);
    238 	} else {
    239 		/* We push and pop buffers from end of list so we can keep
    240 		   counter on unused buffers for later freeing them from
    241 		   begin of list */
    242 		dma_bo = last_elem(&rmesa->dma.free);
    243 		remove_from_list(dma_bo);
    244 		insert_at_head(&rmesa->dma.reserved, dma_bo);
    245 	}
    246 
    247 	rmesa->dma.current_used = 0;
    248 	rmesa->dma.current_vertexptr = 0;
    249 
    250 	if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
    251 					  first_elem(&rmesa->dma.reserved)->bo,
    252 					  RADEON_GEM_DOMAIN_GTT, 0))
    253 		fprintf(stderr,"failure to revalidate BOs - badness\n");
    254 
    255 	if (is_empty_list(&rmesa->dma.reserved)) {
    256         /* Cmd buff have been flushed in radeon_revalidate_bos */
    257 		goto again_alloc;
    258 	}
    259 	radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
    260 }
    261 
    262 /* Allocates a region from rmesa->dma.current.  If there isn't enough
    263  * space in current, grab a new buffer (and discard what was left of current)
    264  */
    265 void radeonAllocDmaRegion(radeonContextPtr rmesa,
    266 			  struct radeon_bo **pbo, int *poffset,
    267 			  int bytes, int alignment)
    268 {
    269 	if (RADEON_DEBUG & RADEON_IOCTL)
    270 		fprintf(stderr, "%s %d\n", __func__, bytes);
    271 
    272 	if (rmesa->dma.flush)
    273 		rmesa->dma.flush(&rmesa->glCtx);
    274 
    275 	assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
    276 
    277 	alignment--;
    278 	rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
    279 
    280 	if (is_empty_list(&rmesa->dma.reserved)
    281 		|| rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
    282 		radeonRefillCurrentDmaRegion(rmesa, bytes);
    283 
    284 	*poffset = rmesa->dma.current_used;
    285 	*pbo = first_elem(&rmesa->dma.reserved)->bo;
    286 	radeon_bo_ref(*pbo);
    287 
    288 	/* Always align to at least 16 bytes */
    289 	rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
    290 	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
    291 
    292 	assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
    293 }
    294 
    295 void radeonFreeDmaRegions(radeonContextPtr rmesa)
    296 {
    297 	struct radeon_dma_bo *dma_bo;
    298 	struct radeon_dma_bo *temp;
    299 	if (RADEON_DEBUG & RADEON_DMA)
    300 		fprintf(stderr, "%s\n", __func__);
    301 
    302 	foreach_s(dma_bo, temp, &rmesa->dma.free) {
    303 		remove_from_list(dma_bo);
    304 	        radeon_bo_unref(dma_bo->bo);
    305 		free(dma_bo);
    306 	}
    307 
    308 	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
    309 		remove_from_list(dma_bo);
    310 	        radeon_bo_unref(dma_bo->bo);
    311 		free(dma_bo);
    312 	}
    313 
    314 	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
    315 		remove_from_list(dma_bo);
    316 	        radeon_bo_unref(dma_bo->bo);
    317 		free(dma_bo);
    318 	}
    319 }
    320 
    321 void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
    322 {
    323 	if (is_empty_list(&rmesa->dma.reserved))
    324 		return;
    325 
    326 	if (RADEON_DEBUG & RADEON_IOCTL)
    327 		fprintf(stderr, "%s %d\n", __func__, return_bytes);
    328 	rmesa->dma.current_used -= return_bytes;
    329 	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
    330 }
    331 
    332 static int radeon_bo_is_idle(struct radeon_bo* bo)
    333 {
    334 	uint32_t domain;
    335 	int ret = radeon_bo_is_busy(bo, &domain);
    336 	if (ret == -EINVAL) {
    337 		WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
    338 			"This may cause small performance drop for you.\n");
    339 	}
    340 	return ret != -EBUSY;
    341 }
    342 
    343 void radeonReleaseDmaRegions(radeonContextPtr rmesa)
    344 {
    345 	struct radeon_dma_bo *dma_bo;
    346 	struct radeon_dma_bo *temp;
    347 	const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
    348 	const int time = rmesa->dma.free.expire_counter;
    349 
    350 	if (RADEON_DEBUG & RADEON_DMA) {
    351 		size_t free = 0,
    352 		       wait = 0,
    353 		       reserved = 0;
    354 		foreach(dma_bo, &rmesa->dma.free)
    355 			++free;
    356 
    357 		foreach(dma_bo, &rmesa->dma.wait)
    358 			++wait;
    359 
    360 		foreach(dma_bo, &rmesa->dma.reserved)
    361 			++reserved;
    362 
    363 		fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
    364 		      __func__, free, wait, reserved, rmesa->dma.minimum_size);
    365 	}
    366 
    367 	/* move waiting bos to free list.
    368 	   wait list provides gpu time to handle data before reuse */
    369 	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
    370 		if (dma_bo->expire_counter == time) {
    371 			WARN_ONCE("Leaking dma buffer object!\n");
    372 			radeon_bo_unref(dma_bo->bo);
    373 			remove_from_list(dma_bo);
    374 			free(dma_bo);
    375 			continue;
    376 		}
    377 		/* free objects that are too small to be used because of large request */
    378 		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
    379 		   radeon_bo_unref(dma_bo->bo);
    380 		   remove_from_list(dma_bo);
    381 		   free(dma_bo);
    382 		   continue;
    383 		}
    384 		if (!radeon_bo_is_idle(dma_bo->bo)) {
    385 			break;
    386 		}
    387 		remove_from_list(dma_bo);
    388 		dma_bo->expire_counter = expire_at;
    389 		insert_at_tail(&rmesa->dma.free, dma_bo);
    390 	}
    391 
    392 	/* move reserved to wait list */
    393 	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
    394 		radeon_bo_unmap(dma_bo->bo);
    395 		/* free objects that are too small to be used because of large request */
    396 		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
    397 		   radeon_bo_unref(dma_bo->bo);
    398 		   remove_from_list(dma_bo);
    399 		   free(dma_bo);
    400 		   continue;
    401 		}
    402 		remove_from_list(dma_bo);
    403 		dma_bo->expire_counter = expire_at;
    404 		insert_at_tail(&rmesa->dma.wait, dma_bo);
    405 	}
    406 
    407 	/* free bos that have been unused for some time */
    408 	foreach_s(dma_bo, temp, &rmesa->dma.free) {
    409 		if (dma_bo->expire_counter != time)
    410 			break;
    411 		remove_from_list(dma_bo);
    412 	        radeon_bo_unref(dma_bo->bo);
    413 		free(dma_bo);
    414 	}
    415 
    416 }
    417 
    418 
    419 /* Flush vertices in the current dma region.
    420  */
    421 void rcommon_flush_last_swtcl_prim( struct gl_context *ctx  )
    422 {
    423 	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
    424 	struct radeon_dma *dma = &rmesa->dma;
    425 
    426 	if (RADEON_DEBUG & RADEON_IOCTL)
    427 		fprintf(stderr, "%s\n", __func__);
    428 	dma->flush = NULL;
    429 
    430 	radeon_bo_unmap(rmesa->swtcl.bo);
    431 
    432 	if (!is_empty_list(&dma->reserved)) {
    433 	    GLuint current_offset = dma->current_used;
    434 
    435 	    assert (dma->current_used +
    436 		    rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
    437 		    dma->current_vertexptr);
    438 
    439 	    if (dma->current_used != dma->current_vertexptr) {
    440 		    dma->current_used = dma->current_vertexptr;
    441 
    442 		    rmesa->vtbl.swtcl_flush(ctx, current_offset);
    443 	    }
    444 	    rmesa->swtcl.numverts = 0;
    445 	}
    446 	radeon_bo_unref(rmesa->swtcl.bo);
    447 	rmesa->swtcl.bo = NULL;
    448 }
    449 /* Alloc space in the current dma region.
    450  */
    451 void *
    452 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
    453 {
    454 	GLuint bytes = vsize * nverts;
    455 	void *head;
    456 	if (RADEON_DEBUG & RADEON_IOCTL)
    457 		fprintf(stderr, "%s\n", __func__);
    458 
    459 	if(is_empty_list(&rmesa->dma.reserved)
    460 	      ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
    461 		if (rmesa->dma.flush) {
    462 			rmesa->dma.flush(&rmesa->glCtx);
    463 		}
    464 
    465                 radeonRefillCurrentDmaRegion(rmesa, bytes);
    466 
    467 		return NULL;
    468 	}
    469 
    470         if (!rmesa->dma.flush) {
    471 		/* if cmdbuf flushed DMA restart */
    472                 rmesa->glCtx.Driver.NeedFlush |= FLUSH_STORED_VERTICES;
    473                 rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
    474         }
    475 
    476 	assert( vsize == rmesa->swtcl.vertex_size * 4 );
    477         assert( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
    478         assert( rmesa->dma.current_used +
    479                 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
    480                 rmesa->dma.current_vertexptr );
    481 
    482 	if (!rmesa->swtcl.bo) {
    483 		rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo;
    484 		radeon_bo_ref(rmesa->swtcl.bo);
    485 		radeon_bo_map(rmesa->swtcl.bo, 1);
    486 	}
    487 
    488 	head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr);
    489 	rmesa->dma.current_vertexptr += bytes;
    490 	rmesa->swtcl.numverts += nverts;
    491 	return head;
    492 }
    493 
    494 void radeonReleaseArrays( struct gl_context *ctx, GLuint newinputs )
    495 {
    496    radeonContextPtr radeon = RADEON_CONTEXT( ctx );
    497    int i;
    498 	if (RADEON_DEBUG & RADEON_IOCTL)
    499 		fprintf(stderr, "%s\n", __func__);
    500 
    501    if (radeon->dma.flush) {
    502        radeon->dma.flush(&radeon->glCtx);
    503    }
    504    for (i = 0; i < radeon->tcl.aos_count; i++) {
    505       if (radeon->tcl.aos[i].bo) {
    506          radeon_bo_unref(radeon->tcl.aos[i].bo);
    507          radeon->tcl.aos[i].bo = NULL;
    508 
    509       }
    510    }
    511 }
    512