Home | History | Annotate | Download | only in i965
      1 /**************************************************************************
      2  *
      3  * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 
     29 #include "main/mtypes.h"
     30 #include "main/context.h"
     31 #include "main/enums.h"
     32 #include "main/colormac.h"
     33 #include "main/fbobject.h"
     34 
     35 #include "intel_blit.h"
     36 #include "intel_buffers.h"
     37 #include "intel_context.h"
     38 #include "intel_fbo.h"
     39 #include "intel_reg.h"
     40 #include "intel_regions.h"
     41 #include "intel_batchbuffer.h"
     42 #include "intel_mipmap_tree.h"
     43 
     44 #define FILE_DEBUG_FLAG DEBUG_BLIT
     45 
     46 static GLuint translate_raster_op(GLenum logicop)
     47 {
     48    switch(logicop) {
     49    case GL_CLEAR: return 0x00;
     50    case GL_AND: return 0x88;
     51    case GL_AND_REVERSE: return 0x44;
     52    case GL_COPY: return 0xCC;
     53    case GL_AND_INVERTED: return 0x22;
     54    case GL_NOOP: return 0xAA;
     55    case GL_XOR: return 0x66;
     56    case GL_OR: return 0xEE;
     57    case GL_NOR: return 0x11;
     58    case GL_EQUIV: return 0x99;
     59    case GL_INVERT: return 0x55;
     60    case GL_OR_REVERSE: return 0xDD;
     61    case GL_COPY_INVERTED: return 0x33;
     62    case GL_OR_INVERTED: return 0xBB;
     63    case GL_NAND: return 0x77;
     64    case GL_SET: return 0xFF;
     65    default: return 0;
     66    }
     67 }
     68 
     69 static uint32_t
     70 br13_for_cpp(int cpp)
     71 {
     72    switch (cpp) {
     73    case 4:
     74       return BR13_8888;
     75       break;
     76    case 2:
     77       return BR13_565;
     78       break;
     79    case 1:
     80       return BR13_8;
     81       break;
     82    default:
     83       assert(0);
     84       return 0;
     85    }
     86 }
     87 
     88 /* Copy BitBlt
     89  */
     90 bool
     91 intelEmitCopyBlit(struct intel_context *intel,
     92 		  GLuint cpp,
     93 		  GLshort src_pitch,
     94 		  drm_intel_bo *src_buffer,
     95 		  GLuint src_offset,
     96 		  uint32_t src_tiling,
     97 		  GLshort dst_pitch,
     98 		  drm_intel_bo *dst_buffer,
     99 		  GLuint dst_offset,
    100 		  uint32_t dst_tiling,
    101 		  GLshort src_x, GLshort src_y,
    102 		  GLshort dst_x, GLshort dst_y,
    103 		  GLshort w, GLshort h,
    104 		  GLenum logic_op)
    105 {
    106    GLuint CMD, BR13, pass = 0;
    107    int dst_y2 = dst_y + h;
    108    int dst_x2 = dst_x + w;
    109    drm_intel_bo *aper_array[3];
    110    BATCH_LOCALS;
    111 
    112    if (dst_tiling != I915_TILING_NONE) {
    113       if (dst_offset & 4095)
    114 	 return false;
    115       if (dst_tiling == I915_TILING_Y)
    116 	 return false;
    117    }
    118    if (src_tiling != I915_TILING_NONE) {
    119       if (src_offset & 4095)
    120 	 return false;
    121       if (src_tiling == I915_TILING_Y)
    122 	 return false;
    123    }
    124 
    125    /* do space check before going any further */
    126    do {
    127        aper_array[0] = intel->batch.bo;
    128        aper_array[1] = dst_buffer;
    129        aper_array[2] = src_buffer;
    130 
    131        if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
    132            intel_batchbuffer_flush(intel);
    133            pass++;
    134        } else
    135            break;
    136    } while (pass < 2);
    137 
    138    if (pass >= 2)
    139       return false;
    140 
    141    intel_batchbuffer_require_space(intel, 8 * 4, true);
    142    DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
    143        __FUNCTION__,
    144        src_buffer, src_pitch, src_offset, src_x, src_y,
    145        dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
    146 
    147    src_pitch *= cpp;
    148    dst_pitch *= cpp;
    149 
    150    /* Blit pitch must be dword-aligned.  Otherwise, the hardware appears to drop
    151     * the low bits.
    152     */
    153    assert(src_pitch % 4 == 0);
    154    assert(dst_pitch % 4 == 0);
    155 
    156    /* For big formats (such as floating point), do the copy using 32bpp and
    157     * multiply the coordinates.
    158     */
    159    if (cpp > 4) {
    160       assert(cpp % 4 == 0);
    161       dst_x *= cpp / 4;
    162       dst_x2 *= cpp / 4;
    163       src_x *= cpp / 4;
    164       cpp = 4;
    165    }
    166 
    167    BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;
    168 
    169    switch (cpp) {
    170    case 1:
    171    case 2:
    172       CMD = XY_SRC_COPY_BLT_CMD;
    173       break;
    174    case 4:
    175       CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
    176       break;
    177    default:
    178       return false;
    179    }
    180 
    181 #ifndef I915
    182    if (dst_tiling != I915_TILING_NONE) {
    183       CMD |= XY_DST_TILED;
    184       dst_pitch /= 4;
    185    }
    186    if (src_tiling != I915_TILING_NONE) {
    187       CMD |= XY_SRC_TILED;
    188       src_pitch /= 4;
    189    }
    190 #endif
    191 
    192    if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
    193       return true;
    194    }
    195 
    196    assert(dst_x < dst_x2);
    197    assert(dst_y < dst_y2);
    198 
    199    BEGIN_BATCH_BLT(8);
    200    OUT_BATCH(CMD);
    201    OUT_BATCH(BR13 | (uint16_t)dst_pitch);
    202    OUT_BATCH((dst_y << 16) | dst_x);
    203    OUT_BATCH((dst_y2 << 16) | dst_x2);
    204    OUT_RELOC_FENCED(dst_buffer,
    205 		    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
    206 		    dst_offset);
    207    OUT_BATCH((src_y << 16) | src_x);
    208    OUT_BATCH((uint16_t)src_pitch);
    209    OUT_RELOC_FENCED(src_buffer,
    210 		    I915_GEM_DOMAIN_RENDER, 0,
    211 		    src_offset);
    212    ADVANCE_BATCH();
    213 
    214    intel_batchbuffer_emit_mi_flush(intel);
    215 
    216    return true;
    217 }
    218 
    219 
    220 /**
    221  * Use blitting to clear the renderbuffers named by 'flags'.
    222  * Note: we can't use the ctx->DrawBuffer->_ColorDrawBufferIndexes field
    223  * since that might include software renderbuffers or renderbuffers
    224  * which we're clearing with triangles.
    225  * \param mask  bitmask of BUFFER_BIT_* values indicating buffers to clear
    226  */
    227 GLbitfield
    228 intelClearWithBlit(struct gl_context *ctx, GLbitfield mask)
    229 {
    230    struct intel_context *intel = intel_context(ctx);
    231    struct gl_framebuffer *fb = ctx->DrawBuffer;
    232    GLuint clear_depth_value, clear_depth_mask;
    233    GLint cx, cy, cw, ch;
    234    GLbitfield fail_mask = 0;
    235    BATCH_LOCALS;
    236 
    237    /*
    238     * Compute values for clearing the buffers.
    239     */
    240    clear_depth_value = 0;
    241    clear_depth_mask = 0;
    242    if (mask & BUFFER_BIT_DEPTH) {
    243       clear_depth_value = (GLuint) (fb->_DepthMax * ctx->Depth.Clear);
    244       clear_depth_mask = XY_BLT_WRITE_RGB;
    245    }
    246    if (mask & BUFFER_BIT_STENCIL) {
    247       clear_depth_value |= (ctx->Stencil.Clear & 0xff) << 24;
    248       clear_depth_mask |= XY_BLT_WRITE_ALPHA;
    249    }
    250 
    251    cx = fb->_Xmin;
    252    if (_mesa_is_winsys_fbo(fb))
    253       cy = ctx->DrawBuffer->Height - fb->_Ymax;
    254    else
    255       cy = fb->_Ymin;
    256    cw = fb->_Xmax - fb->_Xmin;
    257    ch = fb->_Ymax - fb->_Ymin;
    258 
    259    if (cw == 0 || ch == 0)
    260       return 0;
    261 
    262    /* Loop over all renderbuffers */
    263    mask &= (1 << BUFFER_COUNT) - 1;
    264    while (mask) {
    265       GLuint buf = ffs(mask) - 1;
    266       bool is_depth_stencil = buf == BUFFER_DEPTH || buf == BUFFER_STENCIL;
    267       struct intel_renderbuffer *irb;
    268       int x1, y1, x2, y2;
    269       uint32_t clear_val;
    270       uint32_t BR13, CMD;
    271       struct intel_region *region;
    272       int pitch, cpp;
    273       drm_intel_bo *aper_array[2];
    274 
    275       mask &= ~(1 << buf);
    276 
    277       irb = intel_get_renderbuffer(fb, buf);
    278       if (irb && irb->mt) {
    279 	 region = irb->mt->region;
    280 	 assert(region);
    281 	 assert(region->bo);
    282       } else {
    283          fail_mask |= 1 << buf;
    284          continue;
    285       }
    286 
    287       /* OK, clear this renderbuffer */
    288       x1 = cx + irb->draw_x;
    289       y1 = cy + irb->draw_y;
    290       x2 = cx + cw + irb->draw_x;
    291       y2 = cy + ch + irb->draw_y;
    292 
    293       pitch = region->pitch;
    294       cpp = region->cpp;
    295 
    296       DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
    297 	  __FUNCTION__,
    298 	  region->bo, (pitch * cpp),
    299 	  x1, y1, x2 - x1, y2 - y1);
    300 
    301       BR13 = 0xf0 << 16;
    302       CMD = XY_COLOR_BLT_CMD;
    303 
    304       /* Setup the blit command */
    305       if (cpp == 4) {
    306 	 if (is_depth_stencil) {
    307 	    CMD |= clear_depth_mask;
    308 	 } else {
    309 	    /* clearing RGBA */
    310 	    CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
    311 	 }
    312       }
    313 
    314       assert(region->tiling != I915_TILING_Y);
    315 
    316 #ifndef I915
    317       if (region->tiling != I915_TILING_NONE) {
    318 	 CMD |= XY_DST_TILED;
    319 	 pitch /= 4;
    320       }
    321 #endif
    322       BR13 |= (pitch * cpp);
    323 
    324       if (is_depth_stencil) {
    325 	 clear_val = clear_depth_value;
    326       } else {
    327 	 uint8_t clear[4];
    328 	 GLfloat *color = ctx->Color.ClearColor.f;
    329 
    330 	 _mesa_unclamped_float_rgba_to_ubyte(clear, color);
    331 
    332 	 switch (intel_rb_format(irb)) {
    333 	 case MESA_FORMAT_ARGB8888:
    334 	 case MESA_FORMAT_XRGB8888:
    335 	    clear_val = PACK_COLOR_8888(clear[3], clear[0],
    336 					clear[1], clear[2]);
    337 	    break;
    338 	 case MESA_FORMAT_RGB565:
    339 	    clear_val = PACK_COLOR_565(clear[0], clear[1], clear[2]);
    340 	    break;
    341 	 case MESA_FORMAT_ARGB4444:
    342 	    clear_val = PACK_COLOR_4444(clear[3], clear[0],
    343 					clear[1], clear[2]);
    344 	    break;
    345 	 case MESA_FORMAT_ARGB1555:
    346 	    clear_val = PACK_COLOR_1555(clear[3], clear[0],
    347 					clear[1], clear[2]);
    348 	    break;
    349 	 case MESA_FORMAT_A8:
    350 	    clear_val = PACK_COLOR_8888(clear[3], clear[3],
    351 					clear[3], clear[3]);
    352 	    break;
    353 	 default:
    354 	    fail_mask |= 1 << buf;
    355 	    continue;
    356 	 }
    357       }
    358 
    359       BR13 |= br13_for_cpp(cpp);
    360 
    361       assert(x1 < x2);
    362       assert(y1 < y2);
    363 
    364       /* do space check before going any further */
    365       aper_array[0] = intel->batch.bo;
    366       aper_array[1] = region->bo;
    367 
    368       if (drm_intel_bufmgr_check_aperture_space(aper_array,
    369 						ARRAY_SIZE(aper_array)) != 0) {
    370 	 intel_batchbuffer_flush(intel);
    371       }
    372 
    373       BEGIN_BATCH_BLT(6);
    374       OUT_BATCH(CMD);
    375       OUT_BATCH(BR13);
    376       OUT_BATCH((y1 << 16) | x1);
    377       OUT_BATCH((y2 << 16) | x2);
    378       OUT_RELOC_FENCED(region->bo,
    379 		       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
    380 		       0);
    381       OUT_BATCH(clear_val);
    382       ADVANCE_BATCH();
    383 
    384       if (intel->always_flush_cache)
    385 	 intel_batchbuffer_emit_mi_flush(intel);
    386 
    387       if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL)
    388 	 mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL);
    389    }
    390 
    391    return fail_mask;
    392 }
    393 
    394 bool
    395 intelEmitImmediateColorExpandBlit(struct intel_context *intel,
    396 				  GLuint cpp,
    397 				  GLubyte *src_bits, GLuint src_size,
    398 				  GLuint fg_color,
    399 				  GLshort dst_pitch,
    400 				  drm_intel_bo *dst_buffer,
    401 				  GLuint dst_offset,
    402 				  uint32_t dst_tiling,
    403 				  GLshort x, GLshort y,
    404 				  GLshort w, GLshort h,
    405 				  GLenum logic_op)
    406 {
    407    int dwords = ALIGN(src_size, 8) / 4;
    408    uint32_t opcode, br13, blit_cmd;
    409 
    410    if (dst_tiling != I915_TILING_NONE) {
    411       if (dst_offset & 4095)
    412 	 return false;
    413       if (dst_tiling == I915_TILING_Y)
    414 	 return false;
    415    }
    416 
    417    assert( logic_op - GL_CLEAR >= 0 );
    418    assert( logic_op - GL_CLEAR < 0x10 );
    419    assert(dst_pitch > 0);
    420 
    421    if (w < 0 || h < 0)
    422       return true;
    423 
    424    dst_pitch *= cpp;
    425 
    426    DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
    427        __FUNCTION__,
    428        dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
    429 
    430    intel_batchbuffer_require_space(intel,
    431 				   (8 * 4) +
    432 				   (3 * 4) +
    433 				   dwords * 4, true);
    434 
    435    opcode = XY_SETUP_BLT_CMD;
    436    if (cpp == 4)
    437       opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
    438 #ifndef I915
    439    if (dst_tiling != I915_TILING_NONE) {
    440       opcode |= XY_DST_TILED;
    441       dst_pitch /= 4;
    442    }
    443 #endif
    444 
    445    br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29);
    446    br13 |= br13_for_cpp(cpp);
    447 
    448    blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */
    449    if (dst_tiling != I915_TILING_NONE)
    450       blit_cmd |= XY_DST_TILED;
    451 
    452    BEGIN_BATCH_BLT(8 + 3);
    453    OUT_BATCH(opcode);
    454    OUT_BATCH(br13);
    455    OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
    456    OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
    457    OUT_RELOC_FENCED(dst_buffer,
    458 		    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
    459 		    dst_offset);
    460    OUT_BATCH(0); /* bg */
    461    OUT_BATCH(fg_color); /* fg */
    462    OUT_BATCH(0); /* pattern base addr */
    463 
    464    OUT_BATCH(blit_cmd | ((3 - 2) + dwords));
    465    OUT_BATCH((y << 16) | x);
    466    OUT_BATCH(((y + h) << 16) | (x + w));
    467    ADVANCE_BATCH();
    468 
    469    intel_batchbuffer_data(intel, src_bits, dwords * 4, true);
    470 
    471    intel_batchbuffer_emit_mi_flush(intel);
    472 
    473    return true;
    474 }
    475 
    476 /* We don't have a memmove-type blit like some other hardware, so we'll do a
    477  * rectangular blit covering a large space, then emit 1-scanline blit at the
    478  * end to cover the last if we need.
    479  */
    480 void
    481 intel_emit_linear_blit(struct intel_context *intel,
    482 		       drm_intel_bo *dst_bo,
    483 		       unsigned int dst_offset,
    484 		       drm_intel_bo *src_bo,
    485 		       unsigned int src_offset,
    486 		       unsigned int size)
    487 {
    488    GLuint pitch, height;
    489    bool ok;
    490 
    491    /* The pitch given to the GPU must be DWORD aligned, and
    492     * we want width to match pitch. Max width is (1 << 15 - 1),
    493     * rounding that down to the nearest DWORD is 1 << 15 - 4
    494     */
    495    pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 1), 4);
    496    height = (pitch == 0) ? 1 : size / pitch;
    497    ok = intelEmitCopyBlit(intel, 1,
    498 			  pitch, src_bo, src_offset, I915_TILING_NONE,
    499 			  pitch, dst_bo, dst_offset, I915_TILING_NONE,
    500 			  0, 0, /* src x/y */
    501 			  0, 0, /* dst x/y */
    502 			  pitch, height, /* w, h */
    503 			  GL_COPY);
    504    assert(ok);
    505 
    506    src_offset += pitch * height;
    507    dst_offset += pitch * height;
    508    size -= pitch * height;
    509    assert (size < (1 << 15));
    510    pitch = ALIGN(size, 4);
    511    if (size != 0) {
    512       ok = intelEmitCopyBlit(intel, 1,
    513 			     pitch, src_bo, src_offset, I915_TILING_NONE,
    514 			     pitch, dst_bo, dst_offset, I915_TILING_NONE,
    515 			     0, 0, /* src x/y */
    516 			     0, 0, /* dst x/y */
    517 			     size, 1, /* w, h */
    518 			     GL_COPY);
    519       assert(ok);
    520    }
    521 }
    522 
    523 /**
    524  * Used to initialize the alpha value of an ARGB8888 teximage after
    525  * loading it from an XRGB8888 source.
    526  *
    527  * This is very common with glCopyTexImage2D().
    528  */
    529 void
    530 intel_set_teximage_alpha_to_one(struct gl_context *ctx,
    531 				struct intel_texture_image *intel_image)
    532 {
    533    struct intel_context *intel = intel_context(ctx);
    534    unsigned int image_x, image_y;
    535    uint32_t x1, y1, x2, y2;
    536    uint32_t BR13, CMD;
    537    int pitch, cpp;
    538    drm_intel_bo *aper_array[2];
    539    struct intel_region *region = intel_image->mt->region;
    540    int width, height, depth;
    541    BATCH_LOCALS;
    542 
    543    intel_miptree_get_dimensions_for_image(&intel_image->base.Base,
    544                                           &width, &height, &depth);
    545    assert(depth == 1);
    546 
    547    assert(intel_image->base.Base.TexFormat == MESA_FORMAT_ARGB8888);
    548 
    549    /* get dest x/y in destination texture */
    550    intel_miptree_get_image_offset(intel_image->mt,
    551 				  intel_image->base.Base.Level,
    552 				  intel_image->base.Base.Face,
    553 				  0,
    554 				  &image_x, &image_y);
    555 
    556    x1 = image_x;
    557    y1 = image_y;
    558    x2 = image_x + width;
    559    y2 = image_y + height;
    560 
    561    pitch = region->pitch;
    562    cpp = region->cpp;
    563 
    564    DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
    565        __FUNCTION__,
    566        intel_image->mt->region->bo, (pitch * cpp),
    567        x1, y1, x2 - x1, y2 - y1);
    568 
    569    BR13 = br13_for_cpp(cpp) | 0xf0 << 16;
    570    CMD = XY_COLOR_BLT_CMD;
    571    CMD |= XY_BLT_WRITE_ALPHA;
    572 
    573    assert(region->tiling != I915_TILING_Y);
    574 
    575 #ifndef I915
    576    if (region->tiling != I915_TILING_NONE) {
    577       CMD |= XY_DST_TILED;
    578       pitch /= 4;
    579    }
    580 #endif
    581    BR13 |= (pitch * cpp);
    582 
    583    /* do space check before going any further */
    584    aper_array[0] = intel->batch.bo;
    585    aper_array[1] = region->bo;
    586 
    587    if (drm_intel_bufmgr_check_aperture_space(aper_array,
    588 					     ARRAY_SIZE(aper_array)) != 0) {
    589       intel_batchbuffer_flush(intel);
    590    }
    591 
    592    BEGIN_BATCH_BLT(6);
    593    OUT_BATCH(CMD);
    594    OUT_BATCH(BR13);
    595    OUT_BATCH((y1 << 16) | x1);
    596    OUT_BATCH((y2 << 16) | x2);
    597    OUT_RELOC_FENCED(region->bo,
    598 		    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
    599 		    0);
    600    OUT_BATCH(0xffffffff); /* white, but only alpha gets written */
    601    ADVANCE_BATCH();
    602 
    603    intel_batchbuffer_emit_mi_flush(intel);
    604 }
    605