1 /* 2 * Copyright 2012 Red Hat Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: Ben Skeggs 23 * 24 */ 25 26 #define XFER_ARGS \ 27 struct nv30_context *nv30, enum nv30_transfer_filter filter, \ 28 struct nv30_rect *src, struct nv30_rect *dst 29 30 #include "util/u_math.h" 31 32 #include "nv_object.xml.h" 33 #include "nv_m2mf.xml.h" 34 #include "nv30/nv01_2d.xml.h" 35 #include "nv30/nv30-40_3d.xml.h" 36 37 #include "nv30/nv30_context.h" 38 #include "nv30/nv30_transfer.h" 39 40 /* Various helper functions to transfer different types of data in a number 41 * of different ways. 42 */ 43 44 static inline bool 45 nv30_transfer_scaled(struct nv30_rect *src, struct nv30_rect *dst) 46 { 47 if (src->x1 - src->x0 != dst->x1 - dst->x0) 48 return true; 49 if (src->y1 - src->y0 != dst->y1 - dst->y0) 50 return true; 51 return false; 52 } 53 54 static inline bool 55 nv30_transfer_blit(XFER_ARGS) 56 { 57 if (nv30->screen->eng3d->oclass < NV40_3D_CLASS) 58 return false; 59 if (dst->offset & 63 || dst->pitch & 63 || dst->d > 1) 60 return false; 61 if (dst->w < 2 || dst->h < 2) 62 return false; 63 if (dst->cpp > 4 || (dst->cpp == 1 && !dst->pitch)) 64 return false; 65 if (src->cpp > 4) 66 return false; 67 return true; 68 } 69 70 static inline struct nouveau_heap * 71 nv30_transfer_rect_vertprog(struct nv30_context *nv30) 72 { 73 struct nouveau_heap *heap = nv30->screen->vp_exec_heap; 74 struct nouveau_heap *vp; 75 76 vp = nv30->blit_vp; 77 if (!vp) { 78 if (nouveau_heap_alloc(heap, 2, &nv30->blit_vp, &nv30->blit_vp)) { 79 while (heap->next && heap->size < 2) { 80 struct nouveau_heap **evict = heap->next->priv; 81 nouveau_heap_free(evict); 82 } 83 84 if (nouveau_heap_alloc(heap, 2, &nv30->blit_vp, &nv30->blit_vp)) 85 return NULL; 86 } 87 88 vp = nv30->blit_vp; 89 if (vp) { 90 struct nouveau_pushbuf *push = nv30->base.pushbuf; 91 92 BEGIN_NV04(push, NV30_3D(VP_UPLOAD_FROM_ID), 1); 93 PUSH_DATA (push, vp->start); 94 BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4); 95 PUSH_DATA (push, 0x401f9c6c); /* mov o[hpos], a[0]; */ 96 PUSH_DATA (push, 0x0040000d); 97 PUSH_DATA (push, 0x8106c083); 98 PUSH_DATA (push, 0x6041ff80); 99 BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4); 100 PUSH_DATA (push, 0x401f9c6c); /* mov o[tex0], a[8]; end; */ 101 PUSH_DATA (push, 0x0040080d); 102 PUSH_DATA (push, 0x8106c083); 103 PUSH_DATA (push, 0x6041ff9d); 104 } 105 } 106 107 return vp; 108 } 109 110 111 static inline struct nv04_resource * 112 nv30_transfer_rect_fragprog(struct nv30_context *nv30) 113 { 114 struct nv04_resource *fp = nv04_resource(nv30->blit_fp); 115 struct pipe_context *pipe = &nv30->base.pipe; 116 117 if (!fp) { 118 nv30->blit_fp = 119 pipe_buffer_create(pipe->screen, 0, PIPE_USAGE_STAGING, 12 * 4); 120 if (nv30->blit_fp) { 121 struct pipe_transfer *transfer; 122 u32 *map = pipe_buffer_map(pipe, nv30->blit_fp, 123 PIPE_TRANSFER_WRITE, &transfer); 124 if (map) { 125 map[0] = 0x17009e00; /* texr r0, i[tex0], texture[0]; end; */ 126 map[1] = 0x1c9dc801; 127 map[2] = 0x0001c800; 128 map[3] = 0x3fe1c800; 129 map[4] = 0x01401e81; /* end; */ 130 map[5] = 0x1c9dc800; 131 map[6] = 0x0001c800; 132 map[7] = 0x0001c800; 133 pipe_buffer_unmap(pipe, transfer); 134 } 135 136 fp = nv04_resource(nv30->blit_fp); 137 nouveau_buffer_migrate(&nv30->base, fp, NOUVEAU_BO_VRAM); 138 } 139 } 140 141 return fp; 142 } 143 144 static void 145 nv30_transfer_rect_blit(XFER_ARGS) 146 { 147 struct nv04_resource *fp = nv30_transfer_rect_fragprog(nv30); 148 struct nouveau_heap *vp = nv30_transfer_rect_vertprog(nv30); 149 struct nouveau_pushbuf *push = nv30->base.pushbuf; 150 struct nouveau_pushbuf_refn refs[] = { 151 { fp->bo, fp->domain | NOUVEAU_BO_RD }, 152 { src->bo, src->domain | NOUVEAU_BO_RD }, 153 { dst->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR }, 154 }; 155 u32 texfmt, texswz; 156 u32 format, stride; 157 158 if (nouveau_pushbuf_space(push, 512, 8, 0) || 159 nouveau_pushbuf_refn (push, refs, ARRAY_SIZE(refs))) 160 return; 161 162 /* various switches depending on cpp of the transfer */ 163 switch (dst->cpp) { 164 case 4: 165 format = NV30_3D_RT_FORMAT_COLOR_A8R8G8B8 | 166 NV30_3D_RT_FORMAT_ZETA_Z24S8; 167 texfmt = NV40_3D_TEX_FORMAT_FORMAT_A8R8G8B8; 168 texswz = 0x0000aae4; 169 break; 170 case 2: 171 format = NV30_3D_RT_FORMAT_COLOR_R5G6B5 | 172 NV30_3D_RT_FORMAT_ZETA_Z16; 173 texfmt = NV40_3D_TEX_FORMAT_FORMAT_R5G6B5; 174 texswz = 0x0000a9e4; 175 break; 176 case 1: 177 format = NV30_3D_RT_FORMAT_COLOR_B8 | 178 NV30_3D_RT_FORMAT_ZETA_Z16; 179 texfmt = NV40_3D_TEX_FORMAT_FORMAT_L8; 180 texswz = 0x0000aaff; 181 break; 182 default: 183 assert(0); 184 return; 185 } 186 187 /* render target */ 188 if (!dst->pitch) { 189 format |= NV30_3D_RT_FORMAT_TYPE_SWIZZLED; 190 format |= util_logbase2(dst->w) << 16; 191 format |= util_logbase2(dst->h) << 24; 192 stride = 64; 193 } else { 194 format |= NV30_3D_RT_FORMAT_TYPE_LINEAR; 195 stride = dst->pitch; 196 } 197 198 BEGIN_NV04(push, NV30_3D(VIEWPORT_HORIZ), 2); 199 PUSH_DATA (push, dst->w << 16); 200 PUSH_DATA (push, dst->h << 16); 201 BEGIN_NV04(push, NV30_3D(RT_HORIZ), 5); 202 PUSH_DATA (push, dst->w << 16); 203 PUSH_DATA (push, dst->h << 16); 204 PUSH_DATA (push, format); 205 PUSH_DATA (push, stride); 206 PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0); 207 BEGIN_NV04(push, NV30_3D(RT_ENABLE), 1); 208 PUSH_DATA (push, NV30_3D_RT_ENABLE_COLOR0); 209 210 nv30->dirty |= NV30_NEW_FRAMEBUFFER; 211 212 /* viewport state */ 213 BEGIN_NV04(push, NV30_3D(VIEWPORT_TRANSLATE_X), 8); 214 PUSH_DATAf(push, 0.0); 215 PUSH_DATAf(push, 0.0); 216 PUSH_DATAf(push, 0.0); 217 PUSH_DATAf(push, 0.0); 218 PUSH_DATAf(push, 1.0); 219 PUSH_DATAf(push, 1.0); 220 PUSH_DATAf(push, 1.0); 221 PUSH_DATAf(push, 1.0); 222 BEGIN_NV04(push, NV30_3D(DEPTH_RANGE_NEAR), 2); 223 PUSH_DATAf(push, 0.0); 224 PUSH_DATAf(push, 1.0); 225 226 nv30->dirty |= NV30_NEW_VIEWPORT; 227 228 /* blend state */ 229 BEGIN_NV04(push, NV30_3D(COLOR_LOGIC_OP_ENABLE), 1); 230 PUSH_DATA (push, 0); 231 BEGIN_NV04(push, NV30_3D(DITHER_ENABLE), 1); 232 PUSH_DATA (push, 0); 233 BEGIN_NV04(push, NV30_3D(BLEND_FUNC_ENABLE), 1); 234 PUSH_DATA (push, 0); 235 BEGIN_NV04(push, NV30_3D(COLOR_MASK), 1); 236 PUSH_DATA (push, 0x01010101); 237 238 nv30->dirty |= NV30_NEW_BLEND; 239 240 /* depth-stencil-alpha state */ 241 BEGIN_NV04(push, NV30_3D(DEPTH_WRITE_ENABLE), 2); 242 PUSH_DATA (push, 0); 243 PUSH_DATA (push, 0); 244 BEGIN_NV04(push, NV30_3D(STENCIL_ENABLE(0)), 1); 245 PUSH_DATA (push, 0); 246 BEGIN_NV04(push, NV30_3D(STENCIL_ENABLE(1)), 1); 247 PUSH_DATA (push, 0); 248 BEGIN_NV04(push, NV30_3D(ALPHA_FUNC_ENABLE), 1); 249 PUSH_DATA (push, 0); 250 251 nv30->dirty |= NV30_NEW_ZSA; 252 253 /* rasterizer state */ 254 BEGIN_NV04(push, NV30_3D(SHADE_MODEL), 1); 255 PUSH_DATA (push, NV30_3D_SHADE_MODEL_FLAT); 256 BEGIN_NV04(push, NV30_3D(CULL_FACE_ENABLE), 1); 257 PUSH_DATA (push, 0); 258 BEGIN_NV04(push, NV30_3D(POLYGON_MODE_FRONT), 2); 259 PUSH_DATA (push, NV30_3D_POLYGON_MODE_FRONT_FILL); 260 PUSH_DATA (push, NV30_3D_POLYGON_MODE_BACK_FILL); 261 BEGIN_NV04(push, NV30_3D(POLYGON_OFFSET_FILL_ENABLE), 1); 262 PUSH_DATA (push, 0); 263 BEGIN_NV04(push, NV30_3D(POLYGON_STIPPLE_ENABLE), 1); 264 PUSH_DATA (push, 0); 265 266 nv30->state.scissor_off = 0; 267 nv30->dirty |= NV30_NEW_RASTERIZER; 268 269 /* vertex program */ 270 BEGIN_NV04(push, NV30_3D(VP_START_FROM_ID), 1); 271 PUSH_DATA (push, vp->start); 272 BEGIN_NV04(push, NV40_3D(VP_ATTRIB_EN), 2); 273 PUSH_DATA (push, 0x00000101); /* attrib: 0, 8 */ 274 PUSH_DATA (push, 0x00004000); /* result: hpos, tex0 */ 275 BEGIN_NV04(push, NV30_3D(ENGINE), 1); 276 PUSH_DATA (push, 0x00000103); 277 BEGIN_NV04(push, NV30_3D(VP_CLIP_PLANES_ENABLE), 1); 278 PUSH_DATA (push, 0x00000000); 279 280 nv30->dirty |= NV30_NEW_VERTPROG; 281 nv30->dirty |= NV30_NEW_CLIP; 282 283 /* fragment program */ 284 BEGIN_NV04(push, NV30_3D(FP_ACTIVE_PROGRAM), 1); 285 PUSH_RELOC(push, fp->bo, fp->offset, fp->domain | 286 NOUVEAU_BO_LOW | NOUVEAU_BO_OR, 287 NV30_3D_FP_ACTIVE_PROGRAM_DMA0, 288 NV30_3D_FP_ACTIVE_PROGRAM_DMA1); 289 BEGIN_NV04(push, NV30_3D(FP_CONTROL), 1); 290 PUSH_DATA (push, 0x02000000); 291 292 nv30->state.fragprog = NULL; 293 nv30->dirty |= NV30_NEW_FRAGPROG; 294 295 /* texture */ 296 texfmt |= 1 << NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT; 297 texfmt |= NV30_3D_TEX_FORMAT_NO_BORDER; 298 texfmt |= NV40_3D_TEX_FORMAT_RECT; 299 texfmt |= 0x00008000; 300 if (src->d < 2) 301 texfmt |= NV30_3D_TEX_FORMAT_DIMS_2D; 302 else 303 texfmt |= NV30_3D_TEX_FORMAT_DIMS_3D; 304 if (src->pitch) 305 texfmt |= NV40_3D_TEX_FORMAT_LINEAR; 306 307 BEGIN_NV04(push, NV30_3D(TEX_OFFSET(0)), 8); 308 PUSH_RELOC(push, src->bo, src->offset, NOUVEAU_BO_LOW, 0, 0); 309 PUSH_RELOC(push, src->bo, texfmt, NOUVEAU_BO_OR, 310 NV30_3D_TEX_FORMAT_DMA0, NV30_3D_TEX_FORMAT_DMA1); 311 PUSH_DATA (push, NV30_3D_TEX_WRAP_S_CLAMP_TO_EDGE | 312 NV30_3D_TEX_WRAP_T_CLAMP_TO_EDGE | 313 NV30_3D_TEX_WRAP_R_CLAMP_TO_EDGE); 314 PUSH_DATA (push, NV40_3D_TEX_ENABLE_ENABLE); 315 PUSH_DATA (push, texswz); 316 switch (filter) { 317 case BILINEAR: 318 PUSH_DATA (push, NV30_3D_TEX_FILTER_MIN_LINEAR | 319 NV30_3D_TEX_FILTER_MAG_LINEAR | 0x00002000); 320 break; 321 default: 322 PUSH_DATA (push, NV30_3D_TEX_FILTER_MIN_NEAREST | 323 NV30_3D_TEX_FILTER_MAG_NEAREST | 0x00002000); 324 break; 325 } 326 PUSH_DATA (push, (src->w << 16) | src->h); 327 PUSH_DATA (push, 0x00000000); 328 BEGIN_NV04(push, NV40_3D(TEX_SIZE1(0)), 1); 329 PUSH_DATA (push, 0x00100000 | src->pitch); 330 BEGIN_NV04(push, SUBC_3D(0x0b40), 1); 331 PUSH_DATA (push, src->d < 2 ? 0x00000001 : 0x00000000); 332 BEGIN_NV04(push, NV40_3D(TEX_CACHE_CTL), 1); 333 PUSH_DATA (push, 1); 334 335 nv30->fragprog.dirty_samplers |= 1; 336 nv30->dirty |= NV30_NEW_FRAGTEX; 337 338 /* blit! */ 339 BEGIN_NV04(push, NV30_3D(SCISSOR_HORIZ), 2); 340 PUSH_DATA (push, (dst->x1 - dst->x0) << 16 | dst->x0); 341 PUSH_DATA (push, (dst->y1 - dst->y0) << 16 | dst->y0); 342 BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); 343 PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_QUADS); 344 BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3); 345 PUSH_DATAf(push, src->x0); 346 PUSH_DATAf(push, src->y0); 347 PUSH_DATAf(push, src->z); 348 BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1); 349 PUSH_DATA (push, (dst->y0 << 16) | dst->x0); 350 BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3); 351 PUSH_DATAf(push, src->x1); 352 PUSH_DATAf(push, src->y0); 353 PUSH_DATAf(push, src->z); 354 BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1); 355 PUSH_DATA (push, (dst->y0 << 16) | dst->x1); 356 BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3); 357 PUSH_DATAf(push, src->x1); 358 PUSH_DATAf(push, src->y1); 359 PUSH_DATAf(push, src->z); 360 BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1); 361 PUSH_DATA (push, (dst->y1 << 16) | dst->x1); 362 BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3); 363 PUSH_DATAf(push, src->x0); 364 PUSH_DATAf(push, src->y1); 365 PUSH_DATAf(push, src->z); 366 BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1); 367 PUSH_DATA (push, (dst->y1 << 16) | dst->x0); 368 BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1); 369 PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP); 370 } 371 372 static bool 373 nv30_transfer_sifm(XFER_ARGS) 374 { 375 if (!src->pitch || src->w > 1024 || src->h > 1024 || src->w < 2 || src->h < 2) 376 return false; 377 378 if (src->d > 1 || dst->d > 1) 379 return false; 380 381 if (dst->offset & 63) 382 return false; 383 384 if (!dst->pitch) { 385 if (dst->w > 2048 || dst->h > 2048 || dst->w < 2 || dst->h < 2) 386 return false; 387 } else { 388 if (dst->domain != NOUVEAU_BO_VRAM) 389 return false; 390 if (dst->pitch & 63) 391 return false; 392 } 393 394 return true; 395 } 396 397 static void 398 nv30_transfer_rect_sifm(XFER_ARGS) 399 400 { 401 struct nouveau_pushbuf *push = nv30->base.pushbuf; 402 struct nouveau_pushbuf_refn refs[] = { 403 { src->bo, src->domain | NOUVEAU_BO_RD }, 404 { dst->bo, dst->domain | NOUVEAU_BO_WR }, 405 }; 406 struct nv04_fifo *fifo = push->channel->data; 407 unsigned si_fmt, si_arg; 408 unsigned ss_fmt; 409 410 switch (dst->cpp) { 411 case 4: ss_fmt = NV04_SURFACE_SWZ_FORMAT_COLOR_A8R8G8B8; break; 412 case 2: ss_fmt = NV04_SURFACE_SWZ_FORMAT_COLOR_R5G6B5; break; 413 default: 414 ss_fmt = NV04_SURFACE_SWZ_FORMAT_COLOR_Y8; 415 break; 416 } 417 418 switch (src->cpp) { 419 case 4: si_fmt = NV03_SIFM_COLOR_FORMAT_A8R8G8B8; break; 420 case 2: si_fmt = NV03_SIFM_COLOR_FORMAT_R5G6B5; break; 421 default: 422 si_fmt = NV03_SIFM_COLOR_FORMAT_AY8; 423 break; 424 } 425 426 if (filter == NEAREST) { 427 si_arg = NV03_SIFM_FORMAT_ORIGIN_CENTER; 428 si_arg |= NV03_SIFM_FORMAT_FILTER_POINT_SAMPLE; 429 } else { 430 si_arg = NV03_SIFM_FORMAT_ORIGIN_CORNER; 431 si_arg |= NV03_SIFM_FORMAT_FILTER_BILINEAR; 432 } 433 434 if (nouveau_pushbuf_space(push, 64, 6, 0) || 435 nouveau_pushbuf_refn (push, refs, 2)) 436 return; 437 438 if (dst->pitch) { 439 BEGIN_NV04(push, NV04_SF2D(DMA_IMAGE_SOURCE), 2); 440 PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart); 441 PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart); 442 BEGIN_NV04(push, NV04_SF2D(FORMAT), 4); 443 PUSH_DATA (push, ss_fmt); 444 PUSH_DATA (push, dst->pitch << 16 | dst->pitch); 445 PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0); 446 PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0); 447 BEGIN_NV04(push, NV05_SIFM(SURFACE), 1); 448 PUSH_DATA (push, nv30->screen->surf2d->handle); 449 } else { 450 BEGIN_NV04(push, NV04_SSWZ(DMA_IMAGE), 1); 451 PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart); 452 BEGIN_NV04(push, NV04_SSWZ(FORMAT), 2); 453 PUSH_DATA (push, ss_fmt | (util_logbase2(dst->w) << 16) | 454 (util_logbase2(dst->h) << 24)); 455 PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0); 456 BEGIN_NV04(push, NV05_SIFM(SURFACE), 1); 457 PUSH_DATA (push, nv30->screen->swzsurf->handle); 458 } 459 460 BEGIN_NV04(push, NV03_SIFM(DMA_IMAGE), 1); 461 PUSH_RELOC(push, src->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart); 462 BEGIN_NV04(push, NV03_SIFM(COLOR_FORMAT), 8); 463 PUSH_DATA (push, si_fmt); 464 PUSH_DATA (push, NV03_SIFM_OPERATION_SRCCOPY); 465 PUSH_DATA (push, ( dst->y0 << 16) | dst->x0); 466 PUSH_DATA (push, ((dst->y1 - dst->y0) << 16) | (dst->x1 - dst->x0)); 467 PUSH_DATA (push, ( dst->y0 << 16) | dst->x0); 468 PUSH_DATA (push, ((dst->y1 - dst->y0) << 16) | (dst->x1 - dst->x0)); 469 PUSH_DATA (push, ((src->x1 - src->x0) << 20) / (dst->x1 - dst->x0)); 470 PUSH_DATA (push, ((src->y1 - src->y0) << 20) / (dst->y1 - dst->y0)); 471 BEGIN_NV04(push, NV03_SIFM(SIZE), 4); 472 PUSH_DATA (push, align(src->h, 2) << 16 | align(src->w, 2)); 473 PUSH_DATA (push, src->pitch | si_arg); 474 PUSH_RELOC(push, src->bo, src->offset, NOUVEAU_BO_LOW, 0, 0); 475 PUSH_DATA (push, (src->y0 << 20) | src->x0 << 4); 476 } 477 478 /* The NOP+OFFSET_OUT stuff after each M2MF transfer *is* actually required 479 * to prevent some odd things from happening, easily reproducible by 480 * attempting to do conditional rendering that has a M2MF transfer done 481 * some time before it. 0x1e98 will fail with a DMA_W_PROTECTION (assuming 482 * that name is still accurate on nv4x) error. 483 */ 484 485 static bool 486 nv30_transfer_m2mf(XFER_ARGS) 487 { 488 if (!src->pitch || !dst->pitch) 489 return false; 490 if (nv30_transfer_scaled(src, dst)) 491 return false; 492 return true; 493 } 494 495 static void 496 nv30_transfer_rect_m2mf(XFER_ARGS) 497 { 498 struct nouveau_pushbuf *push = nv30->base.pushbuf; 499 struct nouveau_pushbuf_refn refs[] = { 500 { src->bo, src->domain | NOUVEAU_BO_RD }, 501 { dst->bo, dst->domain | NOUVEAU_BO_WR }, 502 }; 503 struct nv04_fifo *fifo = push->channel->data; 504 unsigned src_offset = src->offset; 505 unsigned dst_offset = dst->offset; 506 unsigned w = dst->x1 - dst->x0; 507 unsigned h = dst->y1 - dst->y0; 508 509 src_offset += (src->y0 * src->pitch) + (src->x0 * src->cpp); 510 dst_offset += (dst->y0 * dst->pitch) + (dst->x0 * dst->cpp); 511 512 BEGIN_NV04(push, NV03_M2MF(DMA_BUFFER_IN), 2); 513 PUSH_DATA (push, (src->domain == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart); 514 PUSH_DATA (push, (dst->domain == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart); 515 516 while (h) { 517 unsigned lines = (h > 2047) ? 2047 : h; 518 519 if (nouveau_pushbuf_space(push, 32, 2, 0) || 520 nouveau_pushbuf_refn (push, refs, 2)) 521 return; 522 523 BEGIN_NV04(push, NV03_M2MF(OFFSET_IN), 8); 524 PUSH_RELOC(push, src->bo, src_offset, NOUVEAU_BO_LOW, 0, 0); 525 PUSH_RELOC(push, dst->bo, dst_offset, NOUVEAU_BO_LOW, 0, 0); 526 PUSH_DATA (push, src->pitch); 527 PUSH_DATA (push, dst->pitch); 528 PUSH_DATA (push, w * src->cpp); 529 PUSH_DATA (push, lines); 530 PUSH_DATA (push, NV03_M2MF_FORMAT_INPUT_INC_1 | 531 NV03_M2MF_FORMAT_OUTPUT_INC_1); 532 PUSH_DATA (push, 0x00000000); 533 BEGIN_NV04(push, NV04_GRAPH(M2MF, NOP), 1); 534 PUSH_DATA (push, 0x00000000); 535 BEGIN_NV04(push, NV03_M2MF(OFFSET_OUT), 1); 536 PUSH_DATA (push, 0x00000000); 537 538 h -= lines; 539 src_offset += src->pitch * lines; 540 dst_offset += dst->pitch * lines; 541 } 542 } 543 544 static bool 545 nv30_transfer_cpu(XFER_ARGS) 546 { 547 if (nv30_transfer_scaled(src, dst)) 548 return false; 549 return true; 550 } 551 552 static char * 553 linear_ptr(struct nv30_rect *rect, char *base, int x, int y, int z) 554 { 555 return base + (y * rect->pitch) + (x * rect->cpp); 556 } 557 558 static inline unsigned 559 swizzle2d(unsigned v, unsigned s) 560 { 561 v = (v | (v << 8)) & 0x00ff00ff; 562 v = (v | (v << 4)) & 0x0f0f0f0f; 563 v = (v | (v << 2)) & 0x33333333; 564 v = (v | (v << 1)) & 0x55555555; 565 return v << s; 566 } 567 568 static char * 569 swizzle2d_ptr(struct nv30_rect *rect, char *base, int x, int y, int z) 570 { 571 unsigned k = util_logbase2(MIN2(rect->w, rect->h)); 572 unsigned km = (1 << k) - 1; 573 unsigned nx = rect->w >> k; 574 unsigned tx = x >> k; 575 unsigned ty = y >> k; 576 unsigned m; 577 578 m = swizzle2d(x & km, 0); 579 m |= swizzle2d(y & km, 1); 580 m += ((ty * nx) + tx) << k << k; 581 582 return base + (m * rect->cpp); 583 } 584 585 static char * 586 swizzle3d_ptr(struct nv30_rect *rect, char *base, int x, int y, int z) 587 { 588 unsigned w = rect->w >> 1; 589 unsigned h = rect->h >> 1; 590 unsigned d = rect->d >> 1; 591 unsigned i = 0, o; 592 unsigned v = 0; 593 594 do { 595 o = i; 596 if (w) { 597 v |= (x & 1) << i++; 598 x >>= 1; 599 w >>= 1; 600 } 601 if (h) { 602 v |= (y & 1) << i++; 603 y >>= 1; 604 h >>= 1; 605 } 606 if (d) { 607 v |= (z & 1) << i++; 608 z >>= 1; 609 d >>= 1; 610 } 611 } while(o != i); 612 613 return base + (v * rect->cpp); 614 } 615 616 typedef char *(*get_ptr_t)(struct nv30_rect *, char *, int, int, int); 617 618 static inline get_ptr_t 619 get_ptr(struct nv30_rect *rect) 620 { 621 if (rect->pitch) 622 return linear_ptr; 623 624 if (rect->d <= 1) 625 return swizzle2d_ptr; 626 627 return swizzle3d_ptr; 628 } 629 630 static void 631 nv30_transfer_rect_cpu(XFER_ARGS) 632 { 633 get_ptr_t sp = get_ptr(src); 634 get_ptr_t dp = get_ptr(dst); 635 char *srcmap, *dstmap; 636 int x, y; 637 638 nouveau_bo_map(src->bo, NOUVEAU_BO_RD, nv30->base.client); 639 nouveau_bo_map(dst->bo, NOUVEAU_BO_WR, nv30->base.client); 640 srcmap = src->bo->map + src->offset; 641 dstmap = dst->bo->map + dst->offset; 642 643 for (y = 0; y < (dst->y1 - dst->y0); y++) { 644 for (x = 0; x < (dst->x1 - dst->x0); x++) { 645 memcpy(dp(dst, dstmap, dst->x0 + x, dst->y0 + y, dst->z), 646 sp(src, srcmap, src->x0 + x, src->y0 + y, src->z), dst->cpp); 647 } 648 } 649 } 650 651 void 652 nv30_transfer_rect(struct nv30_context *nv30, enum nv30_transfer_filter filter, 653 struct nv30_rect *src, struct nv30_rect *dst) 654 { 655 static const struct { 656 char *name; 657 bool (*possible)(XFER_ARGS); 658 void (*execute)(XFER_ARGS); 659 } *method, methods[] = { 660 { "m2mf", nv30_transfer_m2mf, nv30_transfer_rect_m2mf }, 661 { "sifm", nv30_transfer_sifm, nv30_transfer_rect_sifm }, 662 { "blit", nv30_transfer_blit, nv30_transfer_rect_blit }, 663 { "rect", nv30_transfer_cpu, nv30_transfer_rect_cpu }, 664 {} 665 }; 666 667 for (method = methods; method->possible; method++) { 668 if (method->possible(nv30, filter, src, dst)) { 669 method->execute(nv30, filter, src, dst); 670 return; 671 } 672 } 673 674 assert(0); 675 } 676 677 void 678 nv30_transfer_push_data(struct nouveau_context *nv, 679 struct nouveau_bo *bo, unsigned offset, unsigned domain, 680 unsigned size, void *data) 681 { 682 /* use ifc, or scratch + copy_data? */ 683 fprintf(stderr, "nv30: push_data not implemented\n"); 684 } 685 686 void 687 nv30_transfer_copy_data(struct nouveau_context *nv, 688 struct nouveau_bo *dst, unsigned d_off, unsigned d_dom, 689 struct nouveau_bo *src, unsigned s_off, unsigned s_dom, 690 unsigned size) 691 { 692 struct nv04_fifo *fifo = nv->screen->channel->data; 693 struct nouveau_pushbuf_refn refs[] = { 694 { src, s_dom | NOUVEAU_BO_RD }, 695 { dst, d_dom | NOUVEAU_BO_WR }, 696 }; 697 struct nouveau_pushbuf *push = nv->pushbuf; 698 unsigned pages, lines; 699 700 pages = size >> 12; 701 size -= (pages << 12); 702 703 BEGIN_NV04(push, NV03_M2MF(DMA_BUFFER_IN), 2); 704 PUSH_DATA (push, (s_dom == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart); 705 PUSH_DATA (push, (d_dom == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart); 706 707 while (pages) { 708 lines = (pages > 2047) ? 2047 : pages; 709 pages -= lines; 710 711 if (nouveau_pushbuf_space(push, 32, 2, 0) || 712 nouveau_pushbuf_refn (push, refs, 2)) 713 return; 714 715 BEGIN_NV04(push, NV03_M2MF(OFFSET_IN), 8); 716 PUSH_RELOC(push, src, s_off, NOUVEAU_BO_LOW, 0, 0); 717 PUSH_RELOC(push, dst, d_off, NOUVEAU_BO_LOW, 0, 0); 718 PUSH_DATA (push, 4096); 719 PUSH_DATA (push, 4096); 720 PUSH_DATA (push, 4096); 721 PUSH_DATA (push, lines); 722 PUSH_DATA (push, NV03_M2MF_FORMAT_INPUT_INC_1 | 723 NV03_M2MF_FORMAT_OUTPUT_INC_1); 724 PUSH_DATA (push, 0x00000000); 725 BEGIN_NV04(push, NV04_GRAPH(M2MF, NOP), 1); 726 PUSH_DATA (push, 0x00000000); 727 BEGIN_NV04(push, NV03_M2MF(OFFSET_OUT), 1); 728 PUSH_DATA (push, 0x00000000); 729 730 s_off += (lines << 12); 731 d_off += (lines << 12); 732 } 733 734 if (size) { 735 if (nouveau_pushbuf_space(push, 32, 2, 0) || 736 nouveau_pushbuf_refn (push, refs, 2)) 737 return; 738 739 BEGIN_NV04(push, NV03_M2MF(OFFSET_IN), 8); 740 PUSH_RELOC(push, src, s_off, NOUVEAU_BO_LOW, 0, 0); 741 PUSH_RELOC(push, dst, d_off, NOUVEAU_BO_LOW, 0, 0); 742 PUSH_DATA (push, size); 743 PUSH_DATA (push, size); 744 PUSH_DATA (push, size); 745 PUSH_DATA (push, 1); 746 PUSH_DATA (push, NV03_M2MF_FORMAT_INPUT_INC_1 | 747 NV03_M2MF_FORMAT_OUTPUT_INC_1); 748 PUSH_DATA (push, 0x00000000); 749 BEGIN_NV04(push, NV04_GRAPH(M2MF, NOP), 1); 750 PUSH_DATA (push, 0x00000000); 751 BEGIN_NV04(push, NV03_M2MF(OFFSET_OUT), 1); 752 PUSH_DATA (push, 0x00000000); 753 } 754 } 755