Home | History | Annotate | Download | only in radeon
      1 /*
      2  * Copyright 2014 Advanced Micro Devices, Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     21  * SOFTWARE.
     22  *
     23  * Authors: Marek Olk <maraeo (at) gmail.com>
     24  *
     25  */
     26 
     27 #include "r600_cs.h"
     28 
     29 /* 2xMSAA
     30  * There are two locations (4, 4), (-4, -4). */
     31 const uint32_t eg_sample_locs_2x[4] = {
     32 	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
     33 	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
     34 	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
     35 	FILL_SREG(4, 4, -4, -4, 4, 4, -4, -4),
     36 };
     37 const unsigned eg_max_dist_2x = 4;
     38 /* 4xMSAA
     39  * There are 4 locations: (-2, 6), (6, -2), (-6, 2), (2, 6). */
     40 const uint32_t eg_sample_locs_4x[4] = {
     41 	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
     42 	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
     43 	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
     44 	FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6),
     45 };
     46 const unsigned eg_max_dist_4x = 6;
     47 
     48 /* Cayman 8xMSAA */
     49 static const uint32_t cm_sample_locs_8x[] = {
     50 	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
     51 	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
     52 	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
     53 	FILL_SREG( 1, -3, -1,  3, 5,  1, -3, -5),
     54 	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
     55 	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
     56 	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
     57 	FILL_SREG(-5,  5, -7, -1, 3,  7,  7, -7),
     58 };
     59 static const unsigned cm_max_dist_8x = 8;
     60 /* Cayman 16xMSAA */
     61 static const uint32_t cm_sample_locs_16x[] = {
     62 	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
     63 	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
     64 	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
     65 	FILL_SREG( 1,  1, -1, -3, -3,  2,  4, -1),
     66 	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
     67 	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
     68 	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
     69 	FILL_SREG(-5, -2,  2,  5,  5,  3,  3, -5),
     70 	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
     71 	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
     72 	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
     73 	FILL_SREG(-2,  6,  0, -7, -4, -6, -6,  4),
     74 	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
     75 	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
     76 	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
     77 	FILL_SREG(-8,  0,  7, -4,  6,  7, -7, -8),
     78 };
     79 static const unsigned cm_max_dist_16x = 8;
     80 
     81 void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
     82 				unsigned sample_index, float *out_value)
     83 {
     84 	int offset, index;
     85 	struct {
     86 		int idx:4;
     87 	} val;
     88 	switch (sample_count) {
     89 	case 1:
     90 	default:
     91 		out_value[0] = out_value[1] = 0.5;
     92 		break;
     93 	case 2:
     94 		offset = 4 * (sample_index * 2);
     95 		val.idx = (eg_sample_locs_2x[0] >> offset) & 0xf;
     96 		out_value[0] = (float)(val.idx + 8) / 16.0f;
     97 		val.idx = (eg_sample_locs_2x[0] >> (offset + 4)) & 0xf;
     98 		out_value[1] = (float)(val.idx + 8) / 16.0f;
     99 		break;
    100 	case 4:
    101 		offset = 4 * (sample_index * 2);
    102 		val.idx = (eg_sample_locs_4x[0] >> offset) & 0xf;
    103 		out_value[0] = (float)(val.idx + 8) / 16.0f;
    104 		val.idx = (eg_sample_locs_4x[0] >> (offset + 4)) & 0xf;
    105 		out_value[1] = (float)(val.idx + 8) / 16.0f;
    106 		break;
    107 	case 8:
    108 		offset = 4 * (sample_index % 4 * 2);
    109 		index = (sample_index / 4) * 4;
    110 		val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf;
    111 		out_value[0] = (float)(val.idx + 8) / 16.0f;
    112 		val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf;
    113 		out_value[1] = (float)(val.idx + 8) / 16.0f;
    114 		break;
    115 	case 16:
    116 		offset = 4 * (sample_index % 4 * 2);
    117 		index = (sample_index / 4) * 4;
    118 		val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf;
    119 		out_value[0] = (float)(val.idx + 8) / 16.0f;
    120 		val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf;
    121 		out_value[1] = (float)(val.idx + 8) / 16.0f;
    122 		break;
    123 	}
    124 }
    125 
    126 void cayman_init_msaa(struct pipe_context *ctx)
    127 {
    128 	struct r600_common_context *rctx = (struct r600_common_context*)ctx;
    129 	int i;
    130 
    131 	cayman_get_sample_position(ctx, 1, 0, rctx->sample_locations_1x[0]);
    132 
    133 	for (i = 0; i < 2; i++)
    134 		cayman_get_sample_position(ctx, 2, i, rctx->sample_locations_2x[i]);
    135 	for (i = 0; i < 4; i++)
    136 		cayman_get_sample_position(ctx, 4, i, rctx->sample_locations_4x[i]);
    137 	for (i = 0; i < 8; i++)
    138 		cayman_get_sample_position(ctx, 8, i, rctx->sample_locations_8x[i]);
    139 	for (i = 0; i < 16; i++)
    140 		cayman_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]);
    141 }
    142 
    143 void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
    144 {
    145 	switch (nr_samples) {
    146 	default:
    147 	case 1:
    148 		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0);
    149 		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0);
    150 		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0);
    151 		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0);
    152 		break;
    153 	case 2:
    154 		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
    155 		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
    156 		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]);
    157 		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]);
    158 		break;
    159 	case 4:
    160 		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]);
    161 		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]);
    162 		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]);
    163 		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]);
    164 		break;
    165 	case 8:
    166 		radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
    167 		radeon_emit(cs, cm_sample_locs_8x[0]);
    168 		radeon_emit(cs, cm_sample_locs_8x[4]);
    169 		radeon_emit(cs, 0);
    170 		radeon_emit(cs, 0);
    171 		radeon_emit(cs, cm_sample_locs_8x[1]);
    172 		radeon_emit(cs, cm_sample_locs_8x[5]);
    173 		radeon_emit(cs, 0);
    174 		radeon_emit(cs, 0);
    175 		radeon_emit(cs, cm_sample_locs_8x[2]);
    176 		radeon_emit(cs, cm_sample_locs_8x[6]);
    177 		radeon_emit(cs, 0);
    178 		radeon_emit(cs, 0);
    179 		radeon_emit(cs, cm_sample_locs_8x[3]);
    180 		radeon_emit(cs, cm_sample_locs_8x[7]);
    181 		break;
    182 	case 16:
    183 		radeon_set_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
    184 		radeon_emit(cs, cm_sample_locs_16x[0]);
    185 		radeon_emit(cs, cm_sample_locs_16x[4]);
    186 		radeon_emit(cs, cm_sample_locs_16x[8]);
    187 		radeon_emit(cs, cm_sample_locs_16x[12]);
    188 		radeon_emit(cs, cm_sample_locs_16x[1]);
    189 		radeon_emit(cs, cm_sample_locs_16x[5]);
    190 		radeon_emit(cs, cm_sample_locs_16x[9]);
    191 		radeon_emit(cs, cm_sample_locs_16x[13]);
    192 		radeon_emit(cs, cm_sample_locs_16x[2]);
    193 		radeon_emit(cs, cm_sample_locs_16x[6]);
    194 		radeon_emit(cs, cm_sample_locs_16x[10]);
    195 		radeon_emit(cs, cm_sample_locs_16x[14]);
    196 		radeon_emit(cs, cm_sample_locs_16x[3]);
    197 		radeon_emit(cs, cm_sample_locs_16x[7]);
    198 		radeon_emit(cs, cm_sample_locs_16x[11]);
    199 		radeon_emit(cs, cm_sample_locs_16x[15]);
    200 		break;
    201 	}
    202 }
    203 
    204 void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
    205 			     int ps_iter_samples, int overrast_samples,
    206 			     unsigned sc_mode_cntl_1)
    207 {
    208 	int setup_samples = nr_samples > 1 ? nr_samples :
    209 			    overrast_samples > 1 ? overrast_samples : 0;
    210 	/* Required by OpenGL line rasterization.
    211 	 *
    212 	 * TODO: We should also enable perpendicular endcaps for AA lines,
    213 	 *       but that requires implementing line stippling in the pixel
    214 	 *       shader. SC can only do line stippling with axis-aligned
    215 	 *       endcaps.
    216 	 */
    217 	unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);
    218 
    219 	if (setup_samples > 1) {
    220 		/* indexed by log2(nr_samples) */
    221 		unsigned max_dist[] = {
    222 			0,
    223 			eg_max_dist_2x,
    224 			eg_max_dist_4x,
    225 			cm_max_dist_8x,
    226 			cm_max_dist_16x
    227 		};
    228 		unsigned log_samples = util_logbase2(setup_samples);
    229 		unsigned log_ps_iter_samples =
    230 			util_logbase2(util_next_power_of_two(ps_iter_samples));
    231 
    232 		radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
    233 		radeon_emit(cs, sc_line_cntl |
    234 			    S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
    235 		radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
    236 			    S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
    237 			    S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */
    238 
    239 		if (nr_samples > 1) {
    240 			radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
    241 					       S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
    242 					       S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
    243 					       S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
    244 					       S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
    245 					       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
    246 					       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
    247 			radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
    248 					       EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) |
    249 					       sc_mode_cntl_1);
    250 		} else if (overrast_samples > 1) {
    251 			radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
    252 					       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
    253 					       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
    254 					       S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
    255 			radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
    256 					       sc_mode_cntl_1);
    257 		}
    258 	} else {
    259 		radeon_set_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
    260 		radeon_emit(cs, sc_line_cntl); /* CM_R_028BDC_PA_SC_LINE_CNTL */
    261 		radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
    262 
    263 		radeon_set_context_reg(cs, CM_R_028804_DB_EQAA,
    264 				       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
    265 				       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
    266 		radeon_set_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
    267 				       sc_mode_cntl_1);
    268 	}
    269 }
    270