1 /* 2 * Copyright (C) 2010 Advanced Micro Devices, Inc. 3 * 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial 16 * portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 */ 27 28 #include "radeon_common.h" 29 #include "radeon_context.h" 30 #include "radeon_blit.h" 31 #include "radeon_tex.h" 32 33 static inline uint32_t cmdpacket0(struct radeon_screen *rscrn, 34 int reg, int count) 35 { 36 if (count) 37 return CP_PACKET0(reg, count - 1); 38 return CP_PACKET2; 39 } 40 41 /* common formats supported as both textures and render targets */ 42 unsigned r100_check_blit(mesa_format mesa_format, uint32_t dst_pitch) 43 { 44 /* XXX others? */ 45 if (_mesa_little_endian()) { 46 switch (mesa_format) { 47 case MESA_FORMAT_B8G8R8A8_UNORM: 48 case MESA_FORMAT_B8G8R8X8_UNORM: 49 case MESA_FORMAT_B5G6R5_UNORM: 50 case MESA_FORMAT_B4G4R4A4_UNORM: 51 case MESA_FORMAT_B5G5R5A1_UNORM: 52 case MESA_FORMAT_A_UNORM8: 53 case MESA_FORMAT_L_UNORM8: 54 case MESA_FORMAT_I_UNORM8: 55 break; 56 default: 57 return 0; 58 } 59 } 60 else { 61 switch (mesa_format) { 62 case MESA_FORMAT_A8R8G8B8_UNORM: 63 case MESA_FORMAT_X8R8G8B8_UNORM: 64 case MESA_FORMAT_R5G6B5_UNORM: 65 case MESA_FORMAT_A4R4G4B4_UNORM: 66 case MESA_FORMAT_A1R5G5B5_UNORM: 67 case MESA_FORMAT_A_UNORM8: 68 case MESA_FORMAT_L_UNORM8: 69 case MESA_FORMAT_I_UNORM8: 70 break; 71 default: 72 return 0; 73 } 74 } 75 76 /* Rendering to small buffer doesn't work. 77 * Looks like a hw limitation. 78 */ 79 if (dst_pitch < 32) 80 return 0; 81 82 /* ??? */ 83 if (_mesa_get_format_bits(mesa_format, GL_DEPTH_BITS) > 0) 84 return 0; 85 86 return 1; 87 } 88 89 static inline void emit_vtx_state(struct r100_context *r100) 90 { 91 BATCH_LOCALS(&r100->radeon); 92 93 BEGIN_BATCH(8); 94 if (r100->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { 95 OUT_BATCH_REGVAL(RADEON_SE_CNTL_STATUS, 0); 96 } else { 97 OUT_BATCH_REGVAL(RADEON_SE_CNTL_STATUS, RADEON_TCL_BYPASS); 98 99 } 100 OUT_BATCH_REGVAL(RADEON_SE_COORD_FMT, (RADEON_VTX_XY_PRE_MULT_1_OVER_W0 | 101 RADEON_TEX1_W_ROUTING_USE_W0)); 102 OUT_BATCH_REGVAL(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY | RADEON_SE_VTX_FMT_ST0); 103 OUT_BATCH_REGVAL(RADEON_SE_CNTL, (RADEON_DIFFUSE_SHADE_GOURAUD | 104 RADEON_BFACE_SOLID | 105 RADEON_FFACE_SOLID | 106 RADEON_VTX_PIX_CENTER_OGL | 107 RADEON_ROUND_MODE_ROUND | 108 RADEON_ROUND_PREC_4TH_PIX)); 109 END_BATCH(); 110 } 111 112 static void inline emit_tx_setup(struct r100_context *r100, 113 mesa_format mesa_format, 114 struct radeon_bo *bo, 115 intptr_t offset, 116 unsigned width, 117 unsigned height, 118 unsigned pitch) 119 { 120 uint32_t txformat = RADEON_TXFORMAT_NON_POWER2; 121 BATCH_LOCALS(&r100->radeon); 122 123 assert(width <= 2048); 124 assert(height <= 2048); 125 assert(offset % 32 == 0); 126 127 txformat |= tx_table[mesa_format].format; 128 129 if (bo->flags & RADEON_BO_FLAGS_MACRO_TILE) 130 offset |= RADEON_TXO_MACRO_TILE; 131 if (bo->flags & RADEON_BO_FLAGS_MICRO_TILE) 132 offset |= RADEON_TXO_MICRO_TILE_X2; 133 134 BEGIN_BATCH(18); 135 OUT_BATCH_REGVAL(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE); 136 OUT_BATCH_REGVAL(RADEON_PP_TXCBLEND_0, (RADEON_COLOR_ARG_A_ZERO | 137 RADEON_COLOR_ARG_B_ZERO | 138 RADEON_COLOR_ARG_C_T0_COLOR | 139 RADEON_BLEND_CTL_ADD | 140 RADEON_CLAMP_TX)); 141 OUT_BATCH_REGVAL(RADEON_PP_TXABLEND_0, (RADEON_ALPHA_ARG_A_ZERO | 142 RADEON_ALPHA_ARG_B_ZERO | 143 RADEON_ALPHA_ARG_C_T0_ALPHA | 144 RADEON_BLEND_CTL_ADD | 145 RADEON_CLAMP_TX)); 146 OUT_BATCH_REGVAL(RADEON_PP_TXFILTER_0, (RADEON_CLAMP_S_CLAMP_LAST | 147 RADEON_CLAMP_T_CLAMP_LAST | 148 RADEON_MAG_FILTER_NEAREST | 149 RADEON_MIN_FILTER_NEAREST)); 150 OUT_BATCH_REGVAL(RADEON_PP_TXFORMAT_0, txformat); 151 OUT_BATCH_REGVAL(RADEON_PP_TEX_SIZE_0, ((width - 1) | 152 ((height - 1) << RADEON_TEX_VSIZE_SHIFT))); 153 OUT_BATCH_REGVAL(RADEON_PP_TEX_PITCH_0, pitch * _mesa_get_format_bytes(mesa_format) - 32); 154 155 OUT_BATCH_REGSEQ(RADEON_PP_TXOFFSET_0, 1); 156 OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); 157 158 END_BATCH(); 159 } 160 161 static inline void emit_cb_setup(struct r100_context *r100, 162 struct radeon_bo *bo, 163 intptr_t offset, 164 mesa_format mesa_format, 165 unsigned pitch, 166 unsigned width, 167 unsigned height) 168 { 169 uint32_t dst_pitch = pitch; 170 uint32_t dst_format = 0; 171 BATCH_LOCALS(&r100->radeon); 172 173 /* XXX others? */ 174 switch (mesa_format) { 175 /* The first of each pair is for little, the second for big endian. */ 176 case MESA_FORMAT_B8G8R8A8_UNORM: 177 case MESA_FORMAT_A8R8G8B8_UNORM: 178 case MESA_FORMAT_B8G8R8X8_UNORM: 179 case MESA_FORMAT_X8R8G8B8_UNORM: 180 dst_format = RADEON_COLOR_FORMAT_ARGB8888; 181 break; 182 case MESA_FORMAT_B5G6R5_UNORM: 183 case MESA_FORMAT_R5G6B5_UNORM: 184 dst_format = RADEON_COLOR_FORMAT_RGB565; 185 break; 186 case MESA_FORMAT_B4G4R4A4_UNORM: 187 case MESA_FORMAT_A4R4G4B4_UNORM: 188 dst_format = RADEON_COLOR_FORMAT_ARGB4444; 189 break; 190 case MESA_FORMAT_B5G5R5A1_UNORM: 191 case MESA_FORMAT_A1R5G5B5_UNORM: 192 dst_format = RADEON_COLOR_FORMAT_ARGB1555; 193 break; 194 case MESA_FORMAT_A_UNORM8: 195 case MESA_FORMAT_L_UNORM8: 196 case MESA_FORMAT_I_UNORM8: 197 dst_format = RADEON_COLOR_FORMAT_RGB8; 198 break; 199 default: 200 break; 201 } 202 203 if (bo->flags & RADEON_BO_FLAGS_MACRO_TILE) 204 dst_pitch |= RADEON_COLOR_TILE_ENABLE; 205 206 if (bo->flags & RADEON_BO_FLAGS_MICRO_TILE) 207 dst_pitch |= RADEON_COLOR_MICROTILE_ENABLE; 208 209 BEGIN_BATCH(18); 210 OUT_BATCH_REGVAL(RADEON_RE_TOP_LEFT, 0); 211 OUT_BATCH_REGVAL(RADEON_RE_WIDTH_HEIGHT, (((width - 1) << RADEON_RE_WIDTH_SHIFT) | 212 ((height - 1) << RADEON_RE_HEIGHT_SHIFT))); 213 OUT_BATCH_REGVAL(RADEON_RB3D_PLANEMASK, 0xffffffff); 214 OUT_BATCH_REGVAL(RADEON_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO); 215 OUT_BATCH_REGVAL(RADEON_RB3D_CNTL, dst_format); 216 217 OUT_BATCH_REGSEQ(RADEON_RB3D_COLOROFFSET, 1); 218 OUT_BATCH_RELOC(offset, bo, offset, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0); 219 OUT_BATCH_REGSEQ(RADEON_RB3D_COLORPITCH, 1); 220 OUT_BATCH_RELOC(dst_pitch, bo, dst_pitch, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0); 221 222 END_BATCH(); 223 } 224 225 static GLboolean validate_buffers(struct r100_context *r100, 226 struct radeon_bo *src_bo, 227 struct radeon_bo *dst_bo) 228 { 229 int ret; 230 231 radeon_cs_space_reset_bos(r100->radeon.cmdbuf.cs); 232 233 ret = radeon_cs_space_check_with_bo(r100->radeon.cmdbuf.cs, 234 src_bo, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); 235 if (ret) 236 return GL_FALSE; 237 238 ret = radeon_cs_space_check_with_bo(r100->radeon.cmdbuf.cs, 239 dst_bo, 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT); 240 if (ret) 241 return GL_FALSE; 242 243 return GL_TRUE; 244 } 245 246 /** 247 * Calculate texcoords for given image region. 248 * Output values are [minx, maxx, miny, maxy] 249 */ 250 static inline void calc_tex_coords(float img_width, float img_height, 251 float x, float y, 252 float reg_width, float reg_height, 253 unsigned flip_y, float *buf) 254 { 255 buf[0] = x / img_width; 256 buf[1] = buf[0] + reg_width / img_width; 257 buf[2] = y / img_height; 258 buf[3] = buf[2] + reg_height / img_height; 259 if (flip_y) 260 { 261 buf[2] = 1.0 - buf[2]; 262 buf[3] = 1.0 - buf[3]; 263 } 264 } 265 266 static inline void emit_draw_packet(struct r100_context *r100, 267 unsigned src_width, unsigned src_height, 268 unsigned src_x_offset, unsigned src_y_offset, 269 unsigned dst_x_offset, unsigned dst_y_offset, 270 unsigned reg_width, unsigned reg_height, 271 unsigned flip_y) 272 { 273 float texcoords[4]; 274 float verts[12]; 275 BATCH_LOCALS(&r100->radeon); 276 277 calc_tex_coords(src_width, src_height, 278 src_x_offset, src_y_offset, 279 reg_width, reg_height, 280 flip_y, texcoords); 281 282 verts[0] = dst_x_offset; 283 verts[1] = dst_y_offset + reg_height; 284 verts[2] = texcoords[0]; 285 verts[3] = texcoords[3]; 286 287 verts[4] = dst_x_offset + reg_width; 288 verts[5] = dst_y_offset + reg_height; 289 verts[6] = texcoords[1]; 290 verts[7] = texcoords[3]; 291 292 verts[8] = dst_x_offset + reg_width; 293 verts[9] = dst_y_offset; 294 verts[10] = texcoords[1]; 295 verts[11] = texcoords[2]; 296 297 BEGIN_BATCH(15); 298 OUT_BATCH(RADEON_CP_PACKET3_3D_DRAW_IMMD | (13 << 16)); 299 OUT_BATCH(RADEON_CP_VC_FRMT_XY | RADEON_CP_VC_FRMT_ST0); 300 OUT_BATCH(RADEON_CP_VC_CNTL_PRIM_WALK_RING | 301 RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST | 302 RADEON_CP_VC_CNTL_MAOS_ENABLE | 303 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | 304 (3 << 16)); 305 OUT_BATCH_TABLE(verts, 12); 306 END_BATCH(); 307 } 308 309 /** 310 * Copy a region of [@a width x @a height] pixels from source buffer 311 * to destination buffer. 312 * @param[in] r100 r100 context 313 * @param[in] src_bo source radeon buffer object 314 * @param[in] src_offset offset of the source image in the @a src_bo 315 * @param[in] src_mesaformat source image format 316 * @param[in] src_pitch aligned source image width 317 * @param[in] src_width source image width 318 * @param[in] src_height source image height 319 * @param[in] src_x_offset x offset in the source image 320 * @param[in] src_y_offset y offset in the source image 321 * @param[in] dst_bo destination radeon buffer object 322 * @param[in] dst_offset offset of the destination image in the @a dst_bo 323 * @param[in] dst_mesaformat destination image format 324 * @param[in] dst_pitch aligned destination image width 325 * @param[in] dst_width destination image width 326 * @param[in] dst_height destination image height 327 * @param[in] dst_x_offset x offset in the destination image 328 * @param[in] dst_y_offset y offset in the destination image 329 * @param[in] width region width 330 * @param[in] height region height 331 * @param[in] flip_y set if y coords of the source image need to be flipped 332 */ 333 unsigned r100_blit(struct gl_context *ctx, 334 struct radeon_bo *src_bo, 335 intptr_t src_offset, 336 mesa_format src_mesaformat, 337 unsigned src_pitch, 338 unsigned src_width, 339 unsigned src_height, 340 unsigned src_x_offset, 341 unsigned src_y_offset, 342 struct radeon_bo *dst_bo, 343 intptr_t dst_offset, 344 mesa_format dst_mesaformat, 345 unsigned dst_pitch, 346 unsigned dst_width, 347 unsigned dst_height, 348 unsigned dst_x_offset, 349 unsigned dst_y_offset, 350 unsigned reg_width, 351 unsigned reg_height, 352 unsigned flip_y) 353 { 354 struct r100_context *r100 = R100_CONTEXT(ctx); 355 356 if (!r100_check_blit(dst_mesaformat, dst_pitch)) 357 return GL_FALSE; 358 359 /* Make sure that colorbuffer has even width - hw limitation */ 360 if (dst_pitch % 2 > 0) 361 ++dst_pitch; 362 363 /* Need to clamp the region size to make sure 364 * we don't read outside of the source buffer 365 * or write outside of the destination buffer. 366 */ 367 if (reg_width + src_x_offset > src_width) 368 reg_width = src_width - src_x_offset; 369 if (reg_height + src_y_offset > src_height) 370 reg_height = src_height - src_y_offset; 371 if (reg_width + dst_x_offset > dst_width) 372 reg_width = dst_width - dst_x_offset; 373 if (reg_height + dst_y_offset > dst_height) 374 reg_height = dst_height - dst_y_offset; 375 376 if (src_bo == dst_bo) { 377 return GL_FALSE; 378 } 379 380 if (src_offset % 32 || dst_offset % 32) { 381 return GL_FALSE; 382 } 383 384 if (0) { 385 fprintf(stderr, "src: size [%d x %d], pitch %d, offset %zd " 386 "offset [%d x %d], format %s, bo %p\n", 387 src_width, src_height, src_pitch, src_offset, 388 src_x_offset, src_y_offset, 389 _mesa_get_format_name(src_mesaformat), 390 src_bo); 391 fprintf(stderr, "dst: pitch %d offset %zd, offset[%d x %d], format %s, bo %p\n", 392 dst_pitch, dst_offset, dst_x_offset, dst_y_offset, 393 _mesa_get_format_name(dst_mesaformat), dst_bo); 394 fprintf(stderr, "region: %d x %d\n", reg_width, reg_height); 395 } 396 397 /* Flush is needed to make sure that source buffer has correct data */ 398 radeonFlush(ctx); 399 400 rcommonEnsureCmdBufSpace(&r100->radeon, 59, __func__); 401 402 if (!validate_buffers(r100, src_bo, dst_bo)) 403 return GL_FALSE; 404 405 /* 8 */ 406 emit_vtx_state(r100); 407 /* 18 */ 408 emit_tx_setup(r100, src_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch); 409 /* 18 */ 410 emit_cb_setup(r100, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height); 411 /* 15 */ 412 emit_draw_packet(r100, src_width, src_height, 413 src_x_offset, src_y_offset, 414 dst_x_offset, dst_y_offset, 415 reg_width, reg_height, 416 flip_y); 417 418 radeonFlush(ctx); 419 420 /* We submitted those packets outside our state atom mechanism. Thus 421 * make sure they are all resubmitted the next time. */ 422 r100->hw.ctx.dirty = GL_TRUE; 423 r100->hw.msk.dirty = GL_TRUE; 424 r100->hw.set.dirty = GL_TRUE; 425 r100->hw.tex[0].dirty = GL_TRUE; 426 r100->hw.txr[0].dirty = GL_TRUE; 427 428 return GL_TRUE; 429 } 430