Home | History | Annotate | Download | only in r600
      1 /*
      2  * Copyright 2014 Advanced Micro Devices, Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     21  * SOFTWARE.
     22  *
     23  * Authors: Marek Olk <maraeo (at) gmail.com>
     24  *
     25  */
     26 
     27 #include "r600_cs.h"
     28 #include "evergreend.h"
     29 
     30 /* 2xMSAA
     31  * There are two locations (4, 4), (-4, -4). */
     32 const uint32_t eg_sample_locs_2x[4] = {
     33 	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
     34 	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
     35 	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
     36 	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
     37 };
     38 const unsigned eg_max_dist_2x = 4;
     39 /* 4xMSAA
     40  * There are 4 locations: (-2, 6), (6, -2), (-6, 2), (2, 6). */
     41 const uint32_t eg_sample_locs_4x[4] = {
     42 	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
     43 	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
     44 	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
     45 	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
     46 };
     47 const unsigned eg_max_dist_4x = 6;
     48 
     49 /* Cayman 8xMSAA */
     50 static const uint32_t cm_sample_locs_8x[] = {
     51 	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
     52 	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
     53 	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
     54 	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
     55 	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
     56 	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
     57 	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
     58 	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
     59 };
     60 static const unsigned cm_max_dist_8x = 8;
     61 /* Cayman 16xMSAA */
     62 static const uint32_t cm_sample_locs_16x[] = {
     63 	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
     64 	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
     65 	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
     66 	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
     67 	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
     68 	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
     69 	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
     70 	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
     71 	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
     72 	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
     73 	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
     74 	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
     75 	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
     76 	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
     77 	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
     78 	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
     79 };
     80 static const unsigned cm_max_dist_16x = 8;
     81 
     82 void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
     83 				unsigned sample_index, float *out_value)
     84 {
     85 	int offset, index;
     86 	struct {
     87 		int idx:4;
     88 	} val;
     89 	switch (sample_count) {
     90 	case 1:
     91 	default:
     92 		out_value[0] = out_value[1] = 0.5;
     93 		break;
     94 	case 2:
     95 		offset = 4 * (sample_index * 2);
     96 		val.idx = (eg_sample_locs_2x[0] >> offset) & 0xf;
     97 		out_value[0] = (float)(val.idx + 8) / 16.0f;
     98 		val.idx = (eg_sample_locs_2x[0] >> (offset + 4)) & 0xf;
     99 		out_value[1] = (float)(val.idx + 8) / 16.0f;
    100 		break;
    101 	case 4:
    102 		offset = 4 * (sample_index * 2);
    103 		val.idx = (eg_sample_locs_4x[0] >> offset) & 0xf;
    104 		out_value[0] = (float)(val.idx + 8) / 16.0f;
    105 		val.idx = (eg_sample_locs_4x[0] >> (offset + 4)) & 0xf;
    106 		out_value[1] = (float)(val.idx + 8) / 16.0f;
    107 		break;
    108 	case 8:
    109 		offset = 4 * (sample_index % 4 * 2);
    110 		index = (sample_index / 4) * 4;
    111 		val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf;
    112 		out_value[0] = (float)(val.idx + 8) / 16.0f;
    113 		val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf;
    114 		out_value[1] = (float)(val.idx + 8) / 16.0f;
    115 		break;
    116 	case 16:
    117 		offset = 4 * (sample_index % 4 * 2);
    118 		index = (sample_index / 4) * 4;
    119 		val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf;
    120 		out_value[0] = (float)(val.idx + 8) / 16.0f;
    121 		val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf;
    122 		out_value[1] = (float)(val.idx + 8) / 16.0f;
    123 		break;
    124 	}
    125 }
    126 
    127 void cayman_init_msaa(struct pipe_context *ctx)
    128 {
    129 	struct r600_common_context *rctx = (struct r600_common_context*)ctx;
    130 	int i;
    131 
    132 	cayman_get_sample_position(ctx, 1, 0, rctx->sample_locations_1x[0]);
    133 
    134 	for (i = 0; i < 2; i++)
    135 		cayman_get_sample_position(ctx, 2, i, rctx->sample_locations_2x[i]);
    136 	for (i = 0; i < 4; i++)
    137 		cayman_get_sample_position(ctx, 4, i, rctx->sample_locations_4x[i]);
    138 	for (i = 0; i < 8; i++)
    139 		cayman_get_sample_position(ctx, 8, i, rctx->sample_locations_8x[i]);
    140 	for (i = 0; i < 16; i++)
    141 		cayman_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]);
    142 }
    143 
    144 void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
    145 {
    146 	switch (nr_samples) {
    147 	default:
    148 	case 1:
    149 		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0);
    150 		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0);
    151 		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0);
    152 		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0);
    153 		break;
    154 	case 2:
    155 		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
    156 		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
    157 		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]);
    158 		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]);
    159 		break;
    160 	case 4:
    161 		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]);
    162 		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]);
    163 		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]);
    164 		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]);
    165 		break;
    166 	case 8:
    167 		radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
    168 		radeon_emit(cs, cm_sample_locs_8x[0]);
    169 		radeon_emit(cs, cm_sample_locs_8x[4]);
    170 		radeon_emit(cs, 0);
    171 		radeon_emit(cs, 0);
    172 		radeon_emit(cs, cm_sample_locs_8x[1]);
    173 		radeon_emit(cs, cm_sample_locs_8x[5]);
    174 		radeon_emit(cs, 0);
    175 		radeon_emit(cs, 0);
    176 		radeon_emit(cs, cm_sample_locs_8x[2]);
    177 		radeon_emit(cs, cm_sample_locs_8x[6]);
    178 		radeon_emit(cs, 0);
    179 		radeon_emit(cs, 0);
    180 		radeon_emit(cs, cm_sample_locs_8x[3]);
    181 		radeon_emit(cs, cm_sample_locs_8x[7]);
    182 		break;
    183 	case 16:
    184 		radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
    185 		radeon_emit(cs, cm_sample_locs_16x[0]);
    186 		radeon_emit(cs, cm_sample_locs_16x[4]);
    187 		radeon_emit(cs, cm_sample_locs_16x[8]);
    188 		radeon_emit(cs, cm_sample_locs_16x[12]);
    189 		radeon_emit(cs, cm_sample_locs_16x[1]);
    190 		radeon_emit(cs, cm_sample_locs_16x[5]);
    191 		radeon_emit(cs, cm_sample_locs_16x[9]);
    192 		radeon_emit(cs, cm_sample_locs_16x[13]);
    193 		radeon_emit(cs, cm_sample_locs_16x[2]);
    194 		radeon_emit(cs, cm_sample_locs_16x[6]);
    195 		radeon_emit(cs, cm_sample_locs_16x[10]);
    196 		radeon_emit(cs, cm_sample_locs_16x[14]);
    197 		radeon_emit(cs, cm_sample_locs_16x[3]);
    198 		radeon_emit(cs, cm_sample_locs_16x[7]);
    199 		radeon_emit(cs, cm_sample_locs_16x[11]);
    200 		radeon_emit(cs, cm_sample_locs_16x[15]);
    201 		break;
    202 	}
    203 }
    204 
    205 void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
    206 			     int ps_iter_samples, int overrast_samples,
    207 			     unsigned sc_mode_cntl_1)
    208 {
    209 	int setup_samples = nr_samples > 1 ? nr_samples :
    210 			    overrast_samples > 1 ? overrast_samples : 0;
    211 	/* Required by OpenGL line rasterization.
    212 	 *
    213 	 * TODO: We should also enable perpendicular endcaps for AA lines,
    214 	 *       but that requires implementing line stippling in the pixel
    215 	 *       shader. SC can only do line stippling with axis-aligned
    216 	 *       endcaps.
    217 	 */
    218 	unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);
    219 
    220 	if (setup_samples > 1) {
    221 		/* indexed by log2(nr_samples) */
    222 		unsigned max_dist[] = {
    223 			0,
    224 			eg_max_dist_2x,
    225 			eg_max_dist_4x,
    226 			cm_max_dist_8x,
    227 			cm_max_dist_16x
    228 		};
    229 		unsigned log_samples = util_logbase2(setup_samples);
    230 		unsigned log_ps_iter_samples =
    231 			util_logbase2(util_next_power_of_two(ps_iter_samples));
    232 
    233 		radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
    234 		radeon_emit(cs, sc_line_cntl |
    235 			    S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
    236 		radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
    237 			    S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
    238 			    S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */
    239 
    240 		if (nr_samples > 1) {
    241 			radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
    242 					       S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
    243 					       S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
    244 					       S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
    245 					       S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
    246 					       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
    247 					       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
    248 			radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
    249 					       EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) |
    250 					       sc_mode_cntl_1);
    251 		} else if (overrast_samples > 1) {
    252 			radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
    253 					       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
    254 					       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
    255 					       S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
    256 			radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
    257 					       sc_mode_cntl_1);
    258 		}
    259 	} else {
    260 		radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
    261 		radeon_emit(cs, sc_line_cntl); /* CM_R_028BDC_PA_SC_LINE_CNTL */
    262 		radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
    263 
    264 		radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
    265 				       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
    266 				       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
    267 		radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
    268 				       sc_mode_cntl_1);
    269 	}
    270 }
    271