1 /************************************************************************** 2 * 3 * Copyright 2007-2010 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 /* 29 * Rasterization for binned triangles within a tile 30 */ 31 32 33 34 /** 35 * Prototype for a 8 plane rasterizer function. Will codegenerate 36 * several of these. 37 * 38 * XXX: Varients for more/fewer planes. 39 * XXX: Need ways of dropping planes as we descend. 40 * XXX: SIMD 41 */ 42 static void 43 TAG(do_block_4)(struct lp_rasterizer_task *task, 44 const struct lp_rast_triangle *tri, 45 const struct lp_rast_plane *plane, 46 int x, int y, 47 const int *c) 48 { 49 unsigned mask = 0xffff; 50 int j; 51 52 for (j = 0; j < NR_PLANES; j++) { 53 mask &= ~build_mask_linear(c[j] - 1, 54 -plane[j].dcdx, 55 plane[j].dcdy); 56 } 57 58 /* Now pass to the shader: 59 */ 60 if (mask) 61 lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask); 62 } 63 64 /** 65 * Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out 66 * of the triangle's bounds. 67 */ 68 static void 69 TAG(do_block_16)(struct lp_rasterizer_task *task, 70 const struct lp_rast_triangle *tri, 71 const struct lp_rast_plane *plane, 72 int x, int y, 73 const int *c) 74 { 75 unsigned outmask, inmask, partmask, partial_mask; 76 unsigned j; 77 78 outmask = 0; /* outside one or more trivial reject planes */ 79 partmask = 0; /* outside one or more trivial accept planes */ 80 81 for (j = 0; j < NR_PLANES; j++) { 82 const int dcdx = -plane[j].dcdx * 4; 83 const int dcdy = plane[j].dcdy * 4; 84 const int cox = plane[j].eo * 4; 85 const int ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo; 86 const int cio = ei * 4 - 1; 87 88 build_masks(c[j] + cox, 89 cio - cox, 90 dcdx, dcdy, 91 &outmask, /* sign bits from c[i][0..15] + cox */ 92 &partmask); /* sign bits from c[i][0..15] + cio */ 93 } 94 95 if (outmask == 0xffff) 96 return; 97 98 /* Mask of sub-blocks which are inside all trivial accept planes: 99 */ 100 inmask = ~partmask & 0xffff; 101 102 /* Mask of sub-blocks which are inside all trivial reject planes, 103 * but outside at least one trivial accept plane: 104 */ 105 partial_mask = partmask & ~outmask; 106 107 assert((partial_mask & inmask) == 0); 108 109 LP_COUNT_ADD(nr_empty_4, util_bitcount(0xffff & ~(partial_mask | inmask))); 110 111 /* Iterate over partials: 112 */ 113 while (partial_mask) { 114 int i = ffs(partial_mask) - 1; 115 int ix = (i & 3) * 4; 116 int iy = (i >> 2) * 4; 117 int px = x + ix; 118 int py = y + iy; 119 int cx[NR_PLANES]; 120 121 partial_mask &= ~(1 << i); 122 123 LP_COUNT(nr_partially_covered_4); 124 125 for (j = 0; j < NR_PLANES; j++) 126 cx[j] = (c[j] 127 - plane[j].dcdx * ix 128 + plane[j].dcdy * iy); 129 130 TAG(do_block_4)(task, tri, plane, px, py, cx); 131 } 132 133 /* Iterate over fulls: 134 */ 135 while (inmask) { 136 int i = ffs(inmask) - 1; 137 int ix = (i & 3) * 4; 138 int iy = (i >> 2) * 4; 139 int px = x + ix; 140 int py = y + iy; 141 142 inmask &= ~(1 << i); 143 144 LP_COUNT(nr_fully_covered_4); 145 block_full_4(task, tri, px, py); 146 } 147 } 148 149 150 /** 151 * Scan the tile in chunks and figure out which pixels to rasterize 152 * for this triangle. 153 */ 154 void 155 TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, 156 const union lp_rast_cmd_arg arg) 157 { 158 const struct lp_rast_triangle *tri = arg.triangle.tri; 159 unsigned plane_mask = arg.triangle.plane_mask; 160 const struct lp_rast_plane *tri_plane = GET_PLANES(tri); 161 const int x = task->x, y = task->y; 162 struct lp_rast_plane plane[NR_PLANES]; 163 int c[NR_PLANES]; 164 unsigned outmask, inmask, partmask, partial_mask; 165 unsigned j = 0; 166 167 if (tri->inputs.disable) { 168 /* This triangle was partially binned and has been disabled */ 169 return; 170 } 171 172 outmask = 0; /* outside one or more trivial reject planes */ 173 partmask = 0; /* outside one or more trivial accept planes */ 174 175 while (plane_mask) { 176 int i = ffs(plane_mask) - 1; 177 plane[j] = tri_plane[i]; 178 plane_mask &= ~(1 << i); 179 c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; 180 181 { 182 const int dcdx = -plane[j].dcdx * 16; 183 const int dcdy = plane[j].dcdy * 16; 184 const int cox = plane[j].eo * 16; 185 const int ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo; 186 const int cio = ei * 16 - 1; 187 188 build_masks(c[j] + cox, 189 cio - cox, 190 dcdx, dcdy, 191 &outmask, /* sign bits from c[i][0..15] + cox */ 192 &partmask); /* sign bits from c[i][0..15] + cio */ 193 } 194 195 j++; 196 } 197 198 if (outmask == 0xffff) 199 return; 200 201 /* Mask of sub-blocks which are inside all trivial accept planes: 202 */ 203 inmask = ~partmask & 0xffff; 204 205 /* Mask of sub-blocks which are inside all trivial reject planes, 206 * but outside at least one trivial accept plane: 207 */ 208 partial_mask = partmask & ~outmask; 209 210 assert((partial_mask & inmask) == 0); 211 212 LP_COUNT_ADD(nr_empty_16, util_bitcount(0xffff & ~(partial_mask | inmask))); 213 214 /* Iterate over partials: 215 */ 216 while (partial_mask) { 217 int i = ffs(partial_mask) - 1; 218 int ix = (i & 3) * 16; 219 int iy = (i >> 2) * 16; 220 int px = x + ix; 221 int py = y + iy; 222 int cx[NR_PLANES]; 223 224 for (j = 0; j < NR_PLANES; j++) 225 cx[j] = (c[j] 226 - plane[j].dcdx * ix 227 + plane[j].dcdy * iy); 228 229 partial_mask &= ~(1 << i); 230 231 LP_COUNT(nr_partially_covered_16); 232 TAG(do_block_16)(task, tri, plane, px, py, cx); 233 } 234 235 /* Iterate over fulls: 236 */ 237 while (inmask) { 238 int i = ffs(inmask) - 1; 239 int ix = (i & 3) * 16; 240 int iy = (i >> 2) * 16; 241 int px = x + ix; 242 int py = y + iy; 243 244 inmask &= ~(1 << i); 245 246 LP_COUNT(nr_fully_covered_16); 247 block_full_16(task, tri, px, py); 248 } 249 } 250 251 #if defined(PIPE_ARCH_SSE) && defined(TRI_16) 252 /* XXX: special case this when intersection is not required. 253 * - tile completely within bbox, 254 * - bbox completely within tile. 255 */ 256 void 257 TRI_16(struct lp_rasterizer_task *task, 258 const union lp_rast_cmd_arg arg) 259 { 260 const struct lp_rast_triangle *tri = arg.triangle.tri; 261 const struct lp_rast_plane *plane = GET_PLANES(tri); 262 unsigned mask = arg.triangle.plane_mask; 263 unsigned outmask, partial_mask; 264 unsigned j; 265 __m128i cstep4[NR_PLANES][4]; 266 267 int x = (mask & 0xff); 268 int y = (mask >> 8); 269 270 outmask = 0; /* outside one or more trivial reject planes */ 271 272 x += task->x; 273 y += task->y; 274 275 for (j = 0; j < NR_PLANES; j++) { 276 const int dcdx = -plane[j].dcdx * 4; 277 const int dcdy = plane[j].dcdy * 4; 278 __m128i xdcdy = _mm_set1_epi32(dcdy); 279 280 cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3); 281 cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy); 282 cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy); 283 cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy); 284 285 { 286 const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; 287 const int cox = plane[j].eo * 4; 288 289 outmask |= sign_bits4(cstep4[j], c + cox); 290 } 291 } 292 293 if (outmask == 0xffff) 294 return; 295 296 297 /* Mask of sub-blocks which are inside all trivial reject planes, 298 * but outside at least one trivial accept plane: 299 */ 300 partial_mask = 0xffff & ~outmask; 301 302 /* Iterate over partials: 303 */ 304 while (partial_mask) { 305 int i = ffs(partial_mask) - 1; 306 int ix = (i & 3) * 4; 307 int iy = (i >> 2) * 4; 308 int px = x + ix; 309 int py = y + iy; 310 unsigned mask = 0xffff; 311 312 partial_mask &= ~(1 << i); 313 314 for (j = 0; j < NR_PLANES; j++) { 315 const int cx = (plane[j].c - 1 316 - plane[j].dcdx * px 317 + plane[j].dcdy * py) * 4; 318 319 mask &= ~sign_bits4(cstep4[j], cx); 320 } 321 322 if (mask) 323 lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask); 324 } 325 } 326 #endif 327 328 #if defined(PIPE_ARCH_SSE) && defined(TRI_4) 329 void 330 TRI_4(struct lp_rasterizer_task *task, 331 const union lp_rast_cmd_arg arg) 332 { 333 const struct lp_rast_triangle *tri = arg.triangle.tri; 334 const struct lp_rast_plane *plane = GET_PLANES(tri); 335 unsigned mask = arg.triangle.plane_mask; 336 const int x = task->x + (mask & 0xff); 337 const int y = task->y + (mask >> 8); 338 unsigned j; 339 340 /* Iterate over partials: 341 */ 342 { 343 unsigned mask = 0xffff; 344 345 for (j = 0; j < NR_PLANES; j++) { 346 const int cx = (plane[j].c 347 - plane[j].dcdx * x 348 + plane[j].dcdy * y); 349 350 const int dcdx = -plane[j].dcdx; 351 const int dcdy = plane[j].dcdy; 352 __m128i xdcdy = _mm_set1_epi32(dcdy); 353 354 __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3); 355 __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy); 356 __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy); 357 __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy); 358 359 __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1); 360 __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3); 361 __m128i result = _mm_packs_epi16(cstep01, cstep23); 362 363 /* Extract the sign bits 364 */ 365 mask &= ~_mm_movemask_epi8(result); 366 } 367 368 if (mask) 369 lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask); 370 } 371 } 372 #endif 373 374 375 376 #undef TAG 377 #undef TRI_4 378 #undef TRI_16 379 #undef NR_PLANES 380 381