Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2015-2016 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include "brw_compiler.h"
     25 #include "brw_context.h"
     26 #include "compiler/nir/nir.h"
     27 #include "main/errors.h"
     28 #include "util/debug.h"
     29 
     30 #define COMMON_OPTIONS                                                        \
     31    .lower_sub = true,                                                         \
     32    .lower_fdiv = true,                                                        \
     33    .lower_scmp = true,                                                        \
     34    .lower_fmod32 = true,                                                      \
     35    .lower_fmod64 = false,                                                     \
     36    .lower_bitfield_extract = true,                                            \
     37    .lower_bitfield_insert = true,                                             \
     38    .lower_uadd_carry = true,                                                  \
     39    .lower_usub_borrow = true,                                                 \
     40    .lower_fdiv = true,                                                        \
     41    .lower_flrp64 = true,                                                      \
     42    .native_integers = true,                                                   \
     43    .use_interpolated_input_intrinsics = true,                                 \
     44    .vertex_id_zero_based = true
     45 
     46 static const struct nir_shader_compiler_options scalar_nir_options = {
     47    COMMON_OPTIONS,
     48    .lower_pack_half_2x16 = true,
     49    .lower_pack_snorm_2x16 = true,
     50    .lower_pack_snorm_4x8 = true,
     51    .lower_pack_unorm_2x16 = true,
     52    .lower_pack_unorm_4x8 = true,
     53    .lower_unpack_half_2x16 = true,
     54    .lower_unpack_snorm_2x16 = true,
     55    .lower_unpack_snorm_4x8 = true,
     56    .lower_unpack_unorm_2x16 = true,
     57    .lower_unpack_unorm_4x8 = true,
     58    .max_unroll_iterations = 32,
     59 };
     60 
     61 static const struct nir_shader_compiler_options vector_nir_options = {
     62    COMMON_OPTIONS,
     63 
     64    /* In the vec4 backend, our dpN instruction replicates its result to all the
     65     * components of a vec4.  We would like NIR to give us replicated fdot
     66     * instructions because it can optimize better for us.
     67     */
     68    .fdot_replicates = true,
     69 
     70    /* Prior to Gen6, there are no three source operations for SIMD4x2. */
     71    .lower_flrp32 = true,
     72 
     73    .lower_pack_snorm_2x16 = true,
     74    .lower_pack_unorm_2x16 = true,
     75    .lower_unpack_snorm_2x16 = true,
     76    .lower_unpack_unorm_2x16 = true,
     77    .lower_extract_byte = true,
     78    .lower_extract_word = true,
     79    .max_unroll_iterations = 32,
     80 };
     81 
     82 static const struct nir_shader_compiler_options vector_nir_options_gen6 = {
     83    COMMON_OPTIONS,
     84 
     85    /* In the vec4 backend, our dpN instruction replicates its result to all the
     86     * components of a vec4.  We would like NIR to give us replicated fdot
     87     * instructions because it can optimize better for us.
     88     */
     89    .fdot_replicates = true,
     90 
     91    .lower_pack_snorm_2x16 = true,
     92    .lower_pack_unorm_2x16 = true,
     93    .lower_unpack_snorm_2x16 = true,
     94    .lower_unpack_unorm_2x16 = true,
     95    .lower_extract_byte = true,
     96    .lower_extract_word = true,
     97    .max_unroll_iterations = 32,
     98 };
     99 
    100 struct brw_compiler *
    101 brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
    102 {
    103    struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler);
    104 
    105    compiler->devinfo = devinfo;
    106 
    107    brw_fs_alloc_reg_sets(compiler);
    108    brw_vec4_alloc_reg_set(compiler);
    109 
    110    compiler->precise_trig = env_var_as_boolean("INTEL_PRECISE_TRIG", false);
    111 
    112    compiler->scalar_stage[MESA_SHADER_VERTEX] =
    113       devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS);
    114    compiler->scalar_stage[MESA_SHADER_TESS_CTRL] =
    115       devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_TCS", true);
    116    compiler->scalar_stage[MESA_SHADER_TESS_EVAL] =
    117       devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_TES", true);
    118    compiler->scalar_stage[MESA_SHADER_GEOMETRY] =
    119       devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", true);
    120    compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true;
    121    compiler->scalar_stage[MESA_SHADER_COMPUTE] = true;
    122 
    123    /* We want the GLSL compiler to emit code that uses condition codes */
    124    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
    125       compiler->glsl_compiler_options[i].MaxUnrollIterations = 0;
    126       compiler->glsl_compiler_options[i].MaxIfDepth =
    127          devinfo->gen < 6 ? 16 : UINT_MAX;
    128 
    129       compiler->glsl_compiler_options[i].EmitNoIndirectInput = true;
    130       compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false;
    131 
    132       bool is_scalar = compiler->scalar_stage[i];
    133 
    134       compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar;
    135       compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar;
    136       compiler->glsl_compiler_options[i].OptimizeForAOS = !is_scalar;
    137 
    138       if (is_scalar) {
    139          compiler->glsl_compiler_options[i].NirOptions = &scalar_nir_options;
    140       } else {
    141          compiler->glsl_compiler_options[i].NirOptions =
    142             devinfo->gen < 6 ? &vector_nir_options : &vector_nir_options_gen6;
    143       }
    144 
    145       compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true;
    146       compiler->glsl_compiler_options[i].ClampBlockIndicesToArrayBounds = true;
    147    }
    148 
    149    compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].EmitNoIndirectInput = false;
    150    compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].EmitNoIndirectInput = false;
    151    compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].EmitNoIndirectOutput = false;
    152 
    153    if (compiler->scalar_stage[MESA_SHADER_GEOMETRY])
    154       compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false;
    155 
    156    return compiler;
    157 }
    158