1 /* 2 * Copyright 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include <assert.h> 25 #include <stdbool.h> 26 #include <string.h> 27 #include <unistd.h> 28 #include <fcntl.h> 29 30 #include "anv_private.h" 31 32 #include "common/gen_sample_positions.h" 33 #include "genxml/gen_macros.h" 34 #include "genxml/genX_pack.h" 35 36 #include "vk_util.h" 37 38 #if GEN_GEN == 10 39 /** 40 * From Gen10 Workarounds page in h/w specs: 41 * WaSampleOffsetIZ: 42 * "Prior to the 3DSTATE_SAMPLE_PATTERN driver must ensure there are no 43 * markers in the pipeline by programming a PIPE_CONTROL with stall." 44 */ 45 static void 46 gen10_emit_wa_cs_stall_flush(struct anv_batch *batch) 47 { 48 49 anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) { 50 pc.CommandStreamerStallEnable = true; 51 pc.StallAtPixelScoreboard = true; 52 } 53 } 54 55 /** 56 * From Gen10 Workarounds page in h/w specs: 57 * WaSampleOffsetIZ:_cs_stall_flush 58 * "When 3DSTATE_SAMPLE_PATTERN is programmed, driver must then issue an 59 * MI_LOAD_REGISTER_IMM command to an offset between 0x7000 and 0x7FFF(SVL) 60 * after the command to ensure the state has been delivered prior to any 61 * command causing a marker in the pipeline." 62 */ 63 static void 64 gen10_emit_wa_lri_to_cache_mode_zero(struct anv_batch *batch) 65 { 66 /* Before changing the value of CACHE_MODE_0 register, GFX pipeline must 67 * be idle; i.e., full flush is required. 68 */ 69 anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) { 70 pc.DepthCacheFlushEnable = true; 71 pc.DCFlushEnable = true; 72 pc.RenderTargetCacheFlushEnable = true; 73 pc.InstructionCacheInvalidateEnable = true; 74 pc.StateCacheInvalidationEnable = true; 75 pc.TextureCacheInvalidationEnable = true; 76 pc.VFCacheInvalidationEnable = true; 77 pc.ConstantCacheInvalidationEnable =true; 78 } 79 80 /* Write to CACHE_MODE_0 (0x7000) */ 81 uint32_t cache_mode_0 = 0; 82 anv_pack_struct(&cache_mode_0, GENX(CACHE_MODE_0)); 83 84 anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { 85 lri.RegisterOffset = GENX(CACHE_MODE_0_num); 86 lri.DataDWord = cache_mode_0; 87 } 88 } 89 #endif 90 91 VkResult 92 genX(init_device_state)(struct anv_device *device) 93 { 94 GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs, 95 &GENX(MOCS)); 96 97 struct anv_batch batch; 98 99 uint32_t cmds[64]; 100 batch.start = batch.next = cmds; 101 batch.end = (void *) cmds + sizeof(cmds); 102 103 anv_batch_emit(&batch, GENX(PIPELINE_SELECT), ps) { 104 #if GEN_GEN >= 9 105 ps.MaskBits = 3; 106 #endif 107 ps.PipelineSelection = _3D; 108 } 109 110 #if GEN_GEN == 9 111 uint32_t cache_mode_1; 112 anv_pack_struct(&cache_mode_1, GENX(CACHE_MODE_1), 113 .FloatBlendOptimizationEnable = true, 114 .FloatBlendOptimizationEnableMask = true, 115 .PartialResolveDisableInVC = true, 116 .PartialResolveDisableInVCMask = true); 117 118 anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) { 119 lri.RegisterOffset = GENX(CACHE_MODE_1_num); 120 lri.DataDWord = cache_mode_1; 121 } 122 #endif 123 124 #if GEN_GEN == 10 125 uint32_t cache_mode_ss; 126 anv_pack_struct(&cache_mode_ss, GENX(CACHE_MODE_SS), 127 .FloatBlendOptimizationEnable = true, 128 .FloatBlendOptimizationEnableMask = true); 129 130 anv_batch_emit(&batch, GENX(MI_LOAD_REGISTER_IMM), lri) { 131 lri.RegisterOffset = GENX(CACHE_MODE_SS_num); 132 lri.DataDWord = cache_mode_ss; 133 } 134 #endif 135 136 anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS), aa); 137 138 anv_batch_emit(&batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) { 139 rect.ClippedDrawingRectangleYMin = 0; 140 rect.ClippedDrawingRectangleXMin = 0; 141 rect.ClippedDrawingRectangleYMax = UINT16_MAX; 142 rect.ClippedDrawingRectangleXMax = UINT16_MAX; 143 rect.DrawingRectangleOriginY = 0; 144 rect.DrawingRectangleOriginX = 0; 145 } 146 147 #if GEN_GEN >= 8 148 anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY), ck); 149 150 #if GEN_GEN == 10 151 gen10_emit_wa_cs_stall_flush(&batch); 152 #endif 153 154 /* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and 155 * VkPhysicalDeviceFeatures::standardSampleLocations. 156 */ 157 anv_batch_emit(&batch, GENX(3DSTATE_SAMPLE_PATTERN), sp) { 158 GEN_SAMPLE_POS_1X(sp._1xSample); 159 GEN_SAMPLE_POS_2X(sp._2xSample); 160 GEN_SAMPLE_POS_4X(sp._4xSample); 161 GEN_SAMPLE_POS_8X(sp._8xSample); 162 #if GEN_GEN >= 9 163 GEN_SAMPLE_POS_16X(sp._16xSample); 164 #endif 165 } 166 #endif 167 168 #if GEN_GEN == 10 169 gen10_emit_wa_lri_to_cache_mode_zero(&batch); 170 #endif 171 172 anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe); 173 174 assert(batch.next <= batch.end); 175 176 return anv_device_submit_simple_batch(device, &batch); 177 } 178 179 static uint32_t 180 vk_to_gen_tex_filter(VkFilter filter, bool anisotropyEnable) 181 { 182 switch (filter) { 183 default: 184 assert(!"Invalid filter"); 185 case VK_FILTER_NEAREST: 186 return anisotropyEnable ? MAPFILTER_ANISOTROPIC : MAPFILTER_NEAREST; 187 case VK_FILTER_LINEAR: 188 return anisotropyEnable ? MAPFILTER_ANISOTROPIC : MAPFILTER_LINEAR; 189 } 190 } 191 192 static uint32_t 193 vk_to_gen_max_anisotropy(float ratio) 194 { 195 return (anv_clamp_f(ratio, 2, 16) - 2) / 2; 196 } 197 198 static const uint32_t vk_to_gen_mipmap_mode[] = { 199 [VK_SAMPLER_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST, 200 [VK_SAMPLER_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR 201 }; 202 203 static const uint32_t vk_to_gen_tex_address[] = { 204 [VK_SAMPLER_ADDRESS_MODE_REPEAT] = TCM_WRAP, 205 [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = TCM_MIRROR, 206 [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = TCM_CLAMP, 207 [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE, 208 [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, 209 }; 210 211 /* Vulkan specifies the result of shadow comparisons as: 212 * 1 if ref <op> texel, 213 * 0 otherwise. 214 * 215 * The hardware does: 216 * 0 if texel <op> ref, 217 * 1 otherwise. 218 * 219 * So, these look a bit strange because there's both a negation 220 * and swapping of the arguments involved. 221 */ 222 static const uint32_t vk_to_gen_shadow_compare_op[] = { 223 [VK_COMPARE_OP_NEVER] = PREFILTEROPALWAYS, 224 [VK_COMPARE_OP_LESS] = PREFILTEROPLEQUAL, 225 [VK_COMPARE_OP_EQUAL] = PREFILTEROPNOTEQUAL, 226 [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLESS, 227 [VK_COMPARE_OP_GREATER] = PREFILTEROPGEQUAL, 228 [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPEQUAL, 229 [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGREATER, 230 [VK_COMPARE_OP_ALWAYS] = PREFILTEROPNEVER, 231 }; 232 233 VkResult genX(CreateSampler)( 234 VkDevice _device, 235 const VkSamplerCreateInfo* pCreateInfo, 236 const VkAllocationCallbacks* pAllocator, 237 VkSampler* pSampler) 238 { 239 ANV_FROM_HANDLE(anv_device, device, _device); 240 struct anv_sampler *sampler; 241 242 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); 243 244 sampler = vk_zalloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, 245 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 246 if (!sampler) 247 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 248 249 sampler->n_planes = 1; 250 251 uint32_t border_color_offset = device->border_colors.offset + 252 pCreateInfo->borderColor * 64; 253 254 vk_foreach_struct(ext, pCreateInfo->pNext) { 255 switch (ext->sType) { 256 case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO_KHR: { 257 VkSamplerYcbcrConversionInfoKHR *pSamplerConversion = 258 (VkSamplerYcbcrConversionInfoKHR *) ext; 259 ANV_FROM_HANDLE(anv_ycbcr_conversion, conversion, 260 pSamplerConversion->conversion); 261 262 if (conversion == NULL) 263 break; 264 265 sampler->n_planes = conversion->format->n_planes; 266 sampler->conversion = conversion; 267 break; 268 } 269 default: 270 anv_debug_ignored_stype(ext->sType); 271 break; 272 } 273 } 274 275 for (unsigned p = 0; p < sampler->n_planes; p++) { 276 const bool plane_has_chroma = 277 sampler->conversion && sampler->conversion->format->planes[p].has_chroma; 278 const VkFilter min_filter = 279 plane_has_chroma ? sampler->conversion->chroma_filter : pCreateInfo->minFilter; 280 const VkFilter mag_filter = 281 plane_has_chroma ? sampler->conversion->chroma_filter : pCreateInfo->magFilter; 282 const bool enable_min_filter_addr_rounding = min_filter != VK_FILTER_NEAREST; 283 const bool enable_mag_filter_addr_rounding = mag_filter != VK_FILTER_NEAREST; 284 /* From Broadwell PRM, SAMPLER_STATE: 285 * "Mip Mode Filter must be set to MIPFILTER_NONE for Planar YUV surfaces." 286 */ 287 const uint32_t mip_filter_mode = 288 (sampler->conversion && 289 isl_format_is_yuv(sampler->conversion->format->planes[0].isl_format)) ? 290 MIPFILTER_NONE : vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode]; 291 292 struct GENX(SAMPLER_STATE) sampler_state = { 293 .SamplerDisable = false, 294 .TextureBorderColorMode = DX10OGL, 295 296 #if GEN_GEN >= 8 297 .LODPreClampMode = CLAMP_MODE_OGL, 298 #else 299 .LODPreClampEnable = CLAMP_ENABLE_OGL, 300 #endif 301 302 #if GEN_GEN == 8 303 .BaseMipLevel = 0.0, 304 #endif 305 .MipModeFilter = mip_filter_mode, 306 .MagModeFilter = vk_to_gen_tex_filter(mag_filter, pCreateInfo->anisotropyEnable), 307 .MinModeFilter = vk_to_gen_tex_filter(min_filter, pCreateInfo->anisotropyEnable), 308 .TextureLODBias = anv_clamp_f(pCreateInfo->mipLodBias, -16, 15.996), 309 .AnisotropicAlgorithm = EWAApproximation, 310 .MinLOD = anv_clamp_f(pCreateInfo->minLod, 0, 14), 311 .MaxLOD = anv_clamp_f(pCreateInfo->maxLod, 0, 14), 312 .ChromaKeyEnable = 0, 313 .ChromaKeyIndex = 0, 314 .ChromaKeyMode = 0, 315 .ShadowFunction = vk_to_gen_shadow_compare_op[pCreateInfo->compareOp], 316 .CubeSurfaceControlMode = OVERRIDE, 317 318 .BorderColorPointer = border_color_offset, 319 320 #if GEN_GEN >= 8 321 .LODClampMagnificationMode = MIPNONE, 322 #endif 323 324 .MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy), 325 .RAddressMinFilterRoundingEnable = enable_min_filter_addr_rounding, 326 .RAddressMagFilterRoundingEnable = enable_mag_filter_addr_rounding, 327 .VAddressMinFilterRoundingEnable = enable_min_filter_addr_rounding, 328 .VAddressMagFilterRoundingEnable = enable_mag_filter_addr_rounding, 329 .UAddressMinFilterRoundingEnable = enable_min_filter_addr_rounding, 330 .UAddressMagFilterRoundingEnable = enable_mag_filter_addr_rounding, 331 .TrilinearFilterQuality = 0, 332 .NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates, 333 .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU], 334 .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV], 335 .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW], 336 }; 337 338 GENX(SAMPLER_STATE_pack)(NULL, sampler->state[p], &sampler_state); 339 } 340 341 *pSampler = anv_sampler_to_handle(sampler); 342 343 return VK_SUCCESS; 344 } 345