Home | History | Annotate | Download | only in i915
      1 /**************************************************************************
      2  *
      3  * Copyright 2003 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 
     29 #include "main/mtypes.h"
     30 #include "main/context.h"
     31 #include "main/enums.h"
     32 #include "main/colormac.h"
     33 #include "main/fbobject.h"
     34 
     35 #include "intel_blit.h"
     36 #include "intel_buffers.h"
     37 #include "intel_context.h"
     38 #include "intel_fbo.h"
     39 #include "intel_reg.h"
     40 #include "intel_regions.h"
     41 #include "intel_batchbuffer.h"
     42 #include "intel_mipmap_tree.h"
     43 
     44 #define FILE_DEBUG_FLAG DEBUG_BLIT
     45 
     46 static void
     47 intel_miptree_set_alpha_to_one(struct intel_context *intel,
     48                                struct intel_mipmap_tree *mt,
     49                                int x, int y, int width, int height);
     50 
     51 static GLuint translate_raster_op(GLenum logicop)
     52 {
     53    switch(logicop) {
     54    case GL_CLEAR: return 0x00;
     55    case GL_AND: return 0x88;
     56    case GL_AND_REVERSE: return 0x44;
     57    case GL_COPY: return 0xCC;
     58    case GL_AND_INVERTED: return 0x22;
     59    case GL_NOOP: return 0xAA;
     60    case GL_XOR: return 0x66;
     61    case GL_OR: return 0xEE;
     62    case GL_NOR: return 0x11;
     63    case GL_EQUIV: return 0x99;
     64    case GL_INVERT: return 0x55;
     65    case GL_OR_REVERSE: return 0xDD;
     66    case GL_COPY_INVERTED: return 0x33;
     67    case GL_OR_INVERTED: return 0xBB;
     68    case GL_NAND: return 0x77;
     69    case GL_SET: return 0xFF;
     70    default: return 0;
     71    }
     72 }
     73 
     74 static uint32_t
     75 br13_for_cpp(int cpp)
     76 {
     77    switch (cpp) {
     78    case 4:
     79       return BR13_8888;
     80       break;
     81    case 2:
     82       return BR13_565;
     83       break;
     84    case 1:
     85       return BR13_8;
     86       break;
     87    default:
     88       assert(0);
     89       return 0;
     90    }
     91 }
     92 
     93 /**
     94  * Implements a rectangular block transfer (blit) of pixels between two
     95  * miptrees.
     96  *
     97  * Our blitter can operate on 1, 2, or 4-byte-per-pixel data, with generous,
     98  * but limited, pitches and sizes allowed.
     99  *
    100  * The src/dst coordinates are relative to the given level/slice of the
    101  * miptree.
    102  *
    103  * If @src_flip or @dst_flip is set, then the rectangle within that miptree
    104  * will be inverted (including scanline order) when copying.  This is common
    105  * in GL when copying between window system and user-created
    106  * renderbuffers/textures.
    107  */
    108 bool
    109 intel_miptree_blit(struct intel_context *intel,
    110                    struct intel_mipmap_tree *src_mt,
    111                    int src_level, int src_slice,
    112                    uint32_t src_x, uint32_t src_y, bool src_flip,
    113                    struct intel_mipmap_tree *dst_mt,
    114                    int dst_level, int dst_slice,
    115                    uint32_t dst_x, uint32_t dst_y, bool dst_flip,
    116                    uint32_t width, uint32_t height,
    117                    GLenum logicop)
    118 {
    119    /* No sRGB decode or encode is done by the hardware blitter, which is
    120     * consistent with what we want in the callers (glCopyTexSubImage(),
    121     * glBlitFramebuffer(), texture validation, etc.).
    122     */
    123    mesa_format src_format = _mesa_get_srgb_format_linear(src_mt->format);
    124    mesa_format dst_format = _mesa_get_srgb_format_linear(dst_mt->format);
    125 
    126    /* The blitter doesn't support doing any format conversions.  We do also
    127     * support blitting ARGB8888 to XRGB8888 (trivial, the values dropped into
    128     * the X channel don't matter), and XRGB8888 to ARGB8888 by setting the A
    129     * channel to 1.0 at the end.
    130     */
    131    if (src_format != dst_format &&
    132       ((src_format != MESA_FORMAT_B8G8R8A8_UNORM &&
    133         src_format != MESA_FORMAT_B8G8R8X8_UNORM) ||
    134        (dst_format != MESA_FORMAT_B8G8R8A8_UNORM &&
    135         dst_format != MESA_FORMAT_B8G8R8X8_UNORM))) {
    136       perf_debug("%s: Can't use hardware blitter from %s to %s, "
    137                  "falling back.\n", __func__,
    138                  _mesa_get_format_name(src_format),
    139                  _mesa_get_format_name(dst_format));
    140       return false;
    141    }
    142 
    143    /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
    144     * Data Size Limitations):
    145     *
    146     *    The BLT engine is capable of transferring very large quantities of
    147     *    graphics data. Any graphics data read from and written to the
    148     *    destination is permitted to represent a number of pixels that
    149     *    occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
    150     *    at the destination. The maximum number of pixels that may be
    151     *    represented per scan lines worth of graphics data depends on the
    152     *    color depth.
    153     *
    154     * Furthermore, intelEmitCopyBlit (which is called below) uses a signed
    155     * 16-bit integer to represent buffer pitch, so it can only handle buffer
    156     * pitches < 32k.
    157     *
    158     * As a result of these two limitations, we can only use the blitter to do
    159     * this copy when the region's pitch is less than 32k.
    160     */
    161    if (src_mt->region->pitch > 32768 ||
    162        dst_mt->region->pitch > 32768) {
    163       perf_debug("Falling back due to >32k pitch\n");
    164       return false;
    165    }
    166 
    167    if (src_flip)
    168       src_y = src_mt->level[src_level].height - src_y - height;
    169 
    170    if (dst_flip)
    171       dst_y = dst_mt->level[dst_level].height - dst_y - height;
    172 
    173    int src_pitch = src_mt->region->pitch;
    174    if (src_flip != dst_flip)
    175       src_pitch = -src_pitch;
    176 
    177    uint32_t src_image_x, src_image_y;
    178    intel_miptree_get_image_offset(src_mt, src_level, src_slice,
    179                                   &src_image_x, &src_image_y);
    180    src_x += src_image_x;
    181    src_y += src_image_y;
    182 
    183    uint32_t dst_image_x, dst_image_y;
    184    intel_miptree_get_image_offset(dst_mt, dst_level, dst_slice,
    185                                   &dst_image_x, &dst_image_y);
    186    dst_x += dst_image_x;
    187    dst_y += dst_image_y;
    188 
    189    if (!intelEmitCopyBlit(intel,
    190                           src_mt->cpp,
    191                           src_pitch,
    192                           src_mt->region->bo, src_mt->offset,
    193                           src_mt->region->tiling,
    194                           dst_mt->region->pitch,
    195                           dst_mt->region->bo, dst_mt->offset,
    196                           dst_mt->region->tiling,
    197                           src_x, src_y,
    198                           dst_x, dst_y,
    199                           width, height,
    200                           logicop)) {
    201       return false;
    202    }
    203 
    204    if (src_mt->format == MESA_FORMAT_B8G8R8X8_UNORM &&
    205        dst_mt->format == MESA_FORMAT_B8G8R8A8_UNORM) {
    206       intel_miptree_set_alpha_to_one(intel, dst_mt,
    207                                      dst_x, dst_y,
    208                                      width, height);
    209    }
    210 
    211    return true;
    212 }
    213 
    214 /* Copy BitBlt
    215  */
    216 bool
    217 intelEmitCopyBlit(struct intel_context *intel,
    218 		  GLuint cpp,
    219 		  GLshort src_pitch,
    220 		  drm_intel_bo *src_buffer,
    221 		  GLuint src_offset,
    222 		  uint32_t src_tiling,
    223 		  GLshort dst_pitch,
    224 		  drm_intel_bo *dst_buffer,
    225 		  GLuint dst_offset,
    226 		  uint32_t dst_tiling,
    227 		  GLshort src_x, GLshort src_y,
    228 		  GLshort dst_x, GLshort dst_y,
    229 		  GLshort w, GLshort h,
    230 		  GLenum logic_op)
    231 {
    232    GLuint CMD, BR13, pass = 0;
    233    int dst_y2 = dst_y + h;
    234    int dst_x2 = dst_x + w;
    235    drm_intel_bo *aper_array[3];
    236    bool dst_y_tiled = dst_tiling == I915_TILING_Y;
    237    bool src_y_tiled = src_tiling == I915_TILING_Y;
    238    BATCH_LOCALS;
    239 
    240    if (dst_tiling != I915_TILING_NONE) {
    241       if (dst_offset & 4095)
    242 	 return false;
    243    }
    244    if (src_tiling != I915_TILING_NONE) {
    245       if (src_offset & 4095)
    246 	 return false;
    247    }
    248    if (dst_y_tiled || src_y_tiled)
    249       return false;
    250 
    251    /* do space check before going any further */
    252    do {
    253        aper_array[0] = intel->batch.bo;
    254        aper_array[1] = dst_buffer;
    255        aper_array[2] = src_buffer;
    256 
    257        if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
    258            intel_batchbuffer_flush(intel);
    259            pass++;
    260        } else
    261            break;
    262    } while (pass < 2);
    263 
    264    if (pass >= 2)
    265       return false;
    266 
    267    intel_batchbuffer_require_space(intel, 8 * 4);
    268    DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
    269        __func__,
    270        src_buffer, src_pitch, src_offset, src_x, src_y,
    271        dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
    272 
    273    /* Blit pitch must be dword-aligned.  Otherwise, the hardware appears to drop
    274     * the low bits.  Offsets must be naturally aligned.
    275     */
    276    if (src_pitch % 4 != 0 || src_offset % cpp != 0 ||
    277        dst_pitch % 4 != 0 || dst_offset % cpp != 0)
    278       return false;
    279 
    280    /* For big formats (such as floating point), do the copy using 16 or 32bpp
    281     * and multiply the coordinates.
    282     */
    283    if (cpp > 4) {
    284       if (cpp % 4 == 2) {
    285          dst_x *= cpp / 2;
    286          dst_x2 *= cpp / 2;
    287          src_x *= cpp / 2;
    288          cpp = 2;
    289       } else {
    290          assert(cpp % 4 == 0);
    291          dst_x *= cpp / 4;
    292          dst_x2 *= cpp / 4;
    293          src_x *= cpp / 4;
    294          cpp = 4;
    295       }
    296    }
    297 
    298    BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;
    299 
    300    switch (cpp) {
    301    case 1:
    302    case 2:
    303       CMD = XY_SRC_COPY_BLT_CMD;
    304       break;
    305    case 4:
    306       CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
    307       break;
    308    default:
    309       return false;
    310    }
    311 
    312    if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
    313       return true;
    314    }
    315 
    316    assert(dst_x < dst_x2);
    317    assert(dst_y < dst_y2);
    318 
    319    BEGIN_BATCH(8);
    320 
    321    OUT_BATCH(CMD | (8 - 2));
    322    OUT_BATCH(BR13 | (uint16_t)dst_pitch);
    323    OUT_BATCH((dst_y << 16) | dst_x);
    324    OUT_BATCH((dst_y2 << 16) | dst_x2);
    325    OUT_RELOC_FENCED(dst_buffer,
    326 		    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
    327 		    dst_offset);
    328    OUT_BATCH((src_y << 16) | src_x);
    329    OUT_BATCH((uint16_t)src_pitch);
    330    OUT_RELOC_FENCED(src_buffer,
    331 		    I915_GEM_DOMAIN_RENDER, 0,
    332 		    src_offset);
    333 
    334    ADVANCE_BATCH();
    335 
    336    intel_batchbuffer_emit_mi_flush(intel);
    337 
    338    return true;
    339 }
    340 
    341 
    342 /**
    343  * Use blitting to clear the renderbuffers named by 'flags'.
    344  * Note: we can't use the ctx->DrawBuffer->_ColorDrawBufferIndexes field
    345  * since that might include software renderbuffers or renderbuffers
    346  * which we're clearing with triangles.
    347  * \param mask  bitmask of BUFFER_BIT_* values indicating buffers to clear
    348  */
    349 GLbitfield
    350 intelClearWithBlit(struct gl_context *ctx, GLbitfield mask)
    351 {
    352    struct intel_context *intel = intel_context(ctx);
    353    struct gl_framebuffer *fb = ctx->DrawBuffer;
    354    GLuint clear_depth_value, clear_depth_mask;
    355    GLint cx, cy, cw, ch;
    356    GLbitfield fail_mask = 0;
    357    BATCH_LOCALS;
    358 
    359    /* Note: we don't use this function on Gen7+ hardware, so we can safely
    360     * ignore fast color clear issues.
    361     */
    362    assert(intel->gen < 7);
    363 
    364    /*
    365     * Compute values for clearing the buffers.
    366     */
    367    clear_depth_value = 0;
    368    clear_depth_mask = 0;
    369    if (mask & BUFFER_BIT_DEPTH) {
    370       clear_depth_value = (GLuint) (fb->_DepthMax * ctx->Depth.Clear);
    371       clear_depth_mask = XY_BLT_WRITE_RGB;
    372    }
    373    if (mask & BUFFER_BIT_STENCIL) {
    374       clear_depth_value |= (ctx->Stencil.Clear & 0xff) << 24;
    375       clear_depth_mask |= XY_BLT_WRITE_ALPHA;
    376    }
    377 
    378    cx = fb->_Xmin;
    379    if (_mesa_is_winsys_fbo(fb))
    380       cy = ctx->DrawBuffer->Height - fb->_Ymax;
    381    else
    382       cy = fb->_Ymin;
    383    cw = fb->_Xmax - fb->_Xmin;
    384    ch = fb->_Ymax - fb->_Ymin;
    385 
    386    if (cw == 0 || ch == 0)
    387       return 0;
    388 
    389    /* Loop over all renderbuffers */
    390    mask &= (1 << BUFFER_COUNT) - 1;
    391    while (mask) {
    392       GLuint buf = ffs(mask) - 1;
    393       bool is_depth_stencil = buf == BUFFER_DEPTH || buf == BUFFER_STENCIL;
    394       struct intel_renderbuffer *irb;
    395       int x1, y1, x2, y2;
    396       uint32_t clear_val;
    397       uint32_t BR13, CMD;
    398       struct intel_region *region;
    399       int pitch, cpp;
    400       drm_intel_bo *aper_array[2];
    401 
    402       mask &= ~(1 << buf);
    403 
    404       irb = intel_get_renderbuffer(fb, buf);
    405       if (irb && irb->mt) {
    406 	 region = irb->mt->region;
    407 	 assert(region);
    408 	 assert(region->bo);
    409       } else {
    410          fail_mask |= 1 << buf;
    411          continue;
    412       }
    413 
    414       /* OK, clear this renderbuffer */
    415       x1 = cx + irb->draw_x;
    416       y1 = cy + irb->draw_y;
    417       x2 = cx + cw + irb->draw_x;
    418       y2 = cy + ch + irb->draw_y;
    419 
    420       pitch = region->pitch;
    421       cpp = region->cpp;
    422 
    423       DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
    424 	  __func__,
    425 	  region->bo, pitch,
    426 	  x1, y1, x2 - x1, y2 - y1);
    427 
    428       BR13 = 0xf0 << 16;
    429       CMD = XY_COLOR_BLT_CMD;
    430 
    431       /* Setup the blit command */
    432       if (cpp == 4) {
    433 	 if (is_depth_stencil) {
    434 	    CMD |= clear_depth_mask;
    435 	 } else {
    436 	    /* clearing RGBA */
    437 	    CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
    438 	 }
    439       }
    440 
    441       assert(region->tiling != I915_TILING_Y);
    442 
    443       BR13 |= pitch;
    444 
    445       if (is_depth_stencil) {
    446 	 clear_val = clear_depth_value;
    447       } else {
    448 	 uint8_t clear[4];
    449 	 GLfloat *color = ctx->Color.ClearColor.f;
    450 
    451 	 _mesa_unclamped_float_rgba_to_ubyte(clear, color);
    452 
    453 	 switch (intel_rb_format(irb)) {
    454 	 case MESA_FORMAT_B8G8R8A8_UNORM:
    455 	 case MESA_FORMAT_B8G8R8X8_UNORM:
    456 	    clear_val = PACK_COLOR_8888(clear[3], clear[0],
    457 					clear[1], clear[2]);
    458 	    break;
    459 	 case MESA_FORMAT_B5G6R5_UNORM:
    460 	    clear_val = PACK_COLOR_565(clear[0], clear[1], clear[2]);
    461 	    break;
    462 	 case MESA_FORMAT_B4G4R4A4_UNORM:
    463 	    clear_val = PACK_COLOR_4444(clear[3], clear[0],
    464 					clear[1], clear[2]);
    465 	    break;
    466 	 case MESA_FORMAT_B5G5R5A1_UNORM:
    467 	    clear_val = PACK_COLOR_1555(clear[3], clear[0],
    468 					clear[1], clear[2]);
    469 	    break;
    470 	 case MESA_FORMAT_A_UNORM8:
    471 	    clear_val = PACK_COLOR_8888(clear[3], clear[3],
    472 					clear[3], clear[3]);
    473 	    break;
    474 	 default:
    475 	    fail_mask |= 1 << buf;
    476 	    continue;
    477 	 }
    478       }
    479 
    480       BR13 |= br13_for_cpp(cpp);
    481 
    482       assert(x1 < x2);
    483       assert(y1 < y2);
    484 
    485       /* do space check before going any further */
    486       aper_array[0] = intel->batch.bo;
    487       aper_array[1] = region->bo;
    488 
    489       if (drm_intel_bufmgr_check_aperture_space(aper_array,
    490 						ARRAY_SIZE(aper_array)) != 0) {
    491 	 intel_batchbuffer_flush(intel);
    492       }
    493 
    494       BEGIN_BATCH(6);
    495       OUT_BATCH(CMD | (6 - 2));
    496       OUT_BATCH(BR13);
    497       OUT_BATCH((y1 << 16) | x1);
    498       OUT_BATCH((y2 << 16) | x2);
    499       OUT_RELOC_FENCED(region->bo,
    500 		       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
    501 		       0);
    502       OUT_BATCH(clear_val);
    503       ADVANCE_BATCH();
    504 
    505       if (intel->always_flush_cache)
    506 	 intel_batchbuffer_emit_mi_flush(intel);
    507 
    508       if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL)
    509 	 mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL);
    510    }
    511 
    512    return fail_mask;
    513 }
    514 
    515 bool
    516 intelEmitImmediateColorExpandBlit(struct intel_context *intel,
    517 				  GLuint cpp,
    518 				  GLubyte *src_bits, GLuint src_size,
    519 				  GLuint fg_color,
    520 				  GLshort dst_pitch,
    521 				  drm_intel_bo *dst_buffer,
    522 				  GLuint dst_offset,
    523 				  uint32_t dst_tiling,
    524 				  GLshort x, GLshort y,
    525 				  GLshort w, GLshort h,
    526 				  GLenum logic_op)
    527 {
    528    int dwords = ALIGN(src_size, 8) / 4;
    529    uint32_t opcode, br13, blit_cmd;
    530 
    531    if (dst_tiling != I915_TILING_NONE) {
    532       if (dst_offset & 4095)
    533 	 return false;
    534       if (dst_tiling == I915_TILING_Y)
    535 	 return false;
    536    }
    537 
    538    assert((logic_op >= GL_CLEAR) && (logic_op <= (GL_CLEAR + 0x0f)));
    539    assert(dst_pitch > 0);
    540 
    541    if (w < 0 || h < 0)
    542       return true;
    543 
    544    DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
    545        __func__,
    546        dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
    547 
    548    intel_batchbuffer_require_space(intel,
    549 				   (8 * 4) +
    550 				   (3 * 4) +
    551 				   dwords * 4);
    552 
    553    opcode = XY_SETUP_BLT_CMD;
    554    if (cpp == 4)
    555       opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
    556 
    557    br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29);
    558    br13 |= br13_for_cpp(cpp);
    559 
    560    blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */
    561    if (dst_tiling != I915_TILING_NONE)
    562       blit_cmd |= XY_DST_TILED;
    563 
    564    BEGIN_BATCH(8 + 3);
    565    OUT_BATCH(opcode | (8 - 2));
    566    OUT_BATCH(br13);
    567    OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
    568    OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
    569    OUT_RELOC_FENCED(dst_buffer,
    570 		    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
    571 		    dst_offset);
    572    OUT_BATCH(0); /* bg */
    573    OUT_BATCH(fg_color); /* fg */
    574    OUT_BATCH(0); /* pattern base addr */
    575 
    576    OUT_BATCH(blit_cmd | ((3 - 2) + dwords));
    577    OUT_BATCH((y << 16) | x);
    578    OUT_BATCH(((y + h) << 16) | (x + w));
    579    ADVANCE_BATCH();
    580 
    581    intel_batchbuffer_data(intel, src_bits, dwords * 4);
    582 
    583    intel_batchbuffer_emit_mi_flush(intel);
    584 
    585    return true;
    586 }
    587 
    588 /* We don't have a memmove-type blit like some other hardware, so we'll do a
    589  * rectangular blit covering a large space, then emit 1-scanline blit at the
    590  * end to cover the last if we need.
    591  */
    592 void
    593 intel_emit_linear_blit(struct intel_context *intel,
    594 		       drm_intel_bo *dst_bo,
    595 		       unsigned int dst_offset,
    596 		       drm_intel_bo *src_bo,
    597 		       unsigned int src_offset,
    598 		       unsigned int size)
    599 {
    600    struct gl_context *ctx = &intel->ctx;
    601    GLuint pitch, height;
    602    bool ok;
    603 
    604    /* The pitch given to the GPU must be DWORD aligned, and
    605     * we want width to match pitch. Max width is (1 << 15 - 1),
    606     * rounding that down to the nearest DWORD is 1 << 15 - 4
    607     */
    608    pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 1), 4);
    609    height = (pitch == 0) ? 1 : size / pitch;
    610    ok = intelEmitCopyBlit(intel, 1,
    611 			  pitch, src_bo, src_offset, I915_TILING_NONE,
    612 			  pitch, dst_bo, dst_offset, I915_TILING_NONE,
    613 			  0, 0, /* src x/y */
    614 			  0, 0, /* dst x/y */
    615 			  pitch, height, /* w, h */
    616 			  GL_COPY);
    617    if (!ok)
    618       _mesa_problem(ctx, "Failed to linear blit %dx%d\n", pitch, height);
    619 
    620    src_offset += pitch * height;
    621    dst_offset += pitch * height;
    622    size -= pitch * height;
    623    assert (size < (1 << 15));
    624    pitch = ALIGN(size, 4);
    625    if (size != 0) {
    626       ok = intelEmitCopyBlit(intel, 1,
    627 			     pitch, src_bo, src_offset, I915_TILING_NONE,
    628 			     pitch, dst_bo, dst_offset, I915_TILING_NONE,
    629 			     0, 0, /* src x/y */
    630 			     0, 0, /* dst x/y */
    631 			     size, 1, /* w, h */
    632 			     GL_COPY);
    633       if (!ok)
    634          _mesa_problem(ctx, "Failed to linear blit %dx%d\n", size, 1);
    635    }
    636 }
    637 
    638 /**
    639  * Used to initialize the alpha value of an ARGB8888 miptree after copying
    640  * into it from an XRGB8888 source.
    641  *
    642  * This is very common with glCopyTexImage2D().  Note that the coordinates are
    643  * relative to the start of the miptree, not relative to a slice within the
    644  * miptree.
    645  */
    646 static void
    647 intel_miptree_set_alpha_to_one(struct intel_context *intel,
    648                               struct intel_mipmap_tree *mt,
    649                               int x, int y, int width, int height)
    650 {
    651    struct intel_region *region = mt->region;
    652    uint32_t BR13, CMD;
    653    int pitch, cpp;
    654    drm_intel_bo *aper_array[2];
    655    BATCH_LOCALS;
    656 
    657    pitch = region->pitch;
    658    cpp = region->cpp;
    659 
    660    DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
    661        __func__, region->bo, pitch, x, y, width, height);
    662 
    663    BR13 = br13_for_cpp(cpp) | 0xf0 << 16;
    664    CMD = XY_COLOR_BLT_CMD;
    665    CMD |= XY_BLT_WRITE_ALPHA;
    666 
    667    BR13 |= pitch;
    668 
    669    /* do space check before going any further */
    670    aper_array[0] = intel->batch.bo;
    671    aper_array[1] = region->bo;
    672 
    673    if (drm_intel_bufmgr_check_aperture_space(aper_array,
    674 					     ARRAY_SIZE(aper_array)) != 0) {
    675       intel_batchbuffer_flush(intel);
    676    }
    677 
    678    BEGIN_BATCH(6);
    679    OUT_BATCH(CMD | (6 - 2));
    680    OUT_BATCH(BR13);
    681    OUT_BATCH((y << 16) | x);
    682    OUT_BATCH(((y + height) << 16) | (x + width));
    683    OUT_RELOC_FENCED(region->bo,
    684 		    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
    685 		    0);
    686    OUT_BATCH(0xffffffff); /* white, but only alpha gets written */
    687    ADVANCE_BATCH();
    688 
    689    intel_batchbuffer_emit_mi_flush(intel);
    690 }
    691