Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2011 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #ifndef BRW_VEC4_H
     25 #define BRW_VEC4_H
     26 
     27 #include "brw_shader.h"
     28 #include "brw_program.h"
     29 
     30 #ifdef __cplusplus
     31 #include "brw_ir_vec4.h"
     32 #endif
     33 
     34 #include "compiler/glsl/ir.h"
     35 #include "compiler/nir/nir.h"
     36 
     37 
     38 #ifdef __cplusplus
     39 extern "C" {
     40 #endif
     41 
     42 const unsigned *
     43 brw_vec4_generate_assembly(const struct brw_compiler *compiler,
     44                            void *log_data,
     45                            void *mem_ctx,
     46                            const nir_shader *nir,
     47                            struct brw_vue_prog_data *prog_data,
     48                            const struct cfg_t *cfg,
     49                            unsigned *out_assembly_size);
     50 
     51 #ifdef __cplusplus
     52 } /* extern "C" */
     53 
     54 namespace brw {
     55 
     56 class vec4_live_variables;
     57 
     58 /**
     59  * The vertex shader front-end.
     60  *
     61  * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
     62  * fixed-function) into VS IR.
     63  */
     64 class vec4_visitor : public backend_shader
     65 {
     66 public:
     67    vec4_visitor(const struct brw_compiler *compiler,
     68                 void *log_data,
     69                 const struct brw_sampler_prog_key_data *key,
     70                 struct brw_vue_prog_data *prog_data,
     71                 const nir_shader *shader,
     72 		void *mem_ctx,
     73                 bool no_spills,
     74                 int shader_time_index);
     75    virtual ~vec4_visitor();
     76 
     77    dst_reg dst_null_f()
     78    {
     79       return dst_reg(brw_null_reg());
     80    }
     81 
     82    dst_reg dst_null_df()
     83    {
     84       return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF));
     85    }
     86 
     87    dst_reg dst_null_d()
     88    {
     89       return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
     90    }
     91 
     92    dst_reg dst_null_ud()
     93    {
     94       return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
     95    }
     96 
     97    const struct brw_sampler_prog_key_data * const key_tex;
     98    struct brw_vue_prog_data * const prog_data;
     99    char *fail_msg;
    100    bool failed;
    101 
    102    /**
    103     * GLSL IR currently being processed, which is associated with our
    104     * driver IR instructions for debugging purposes.
    105     */
    106    const void *base_ir;
    107    const char *current_annotation;
    108 
    109    int first_non_payload_grf;
    110    unsigned int max_grf;
    111    int *virtual_grf_start;
    112    int *virtual_grf_end;
    113    brw::vec4_live_variables *live_intervals;
    114    dst_reg userplane[MAX_CLIP_PLANES];
    115 
    116    bool need_all_constants_in_pull_buffer;
    117 
    118    /* Regs for vertex results.  Generated at ir_variable visiting time
    119     * for the ir->location's used.
    120     */
    121    dst_reg output_reg[VARYING_SLOT_TESS_MAX][4];
    122    unsigned output_num_components[VARYING_SLOT_TESS_MAX][4];
    123    const char *output_reg_annotation[VARYING_SLOT_TESS_MAX];
    124    int uniforms;
    125 
    126    src_reg shader_start_time;
    127 
    128    bool run();
    129    void fail(const char *msg, ...);
    130 
    131    int setup_uniforms(int payload_reg);
    132 
    133    bool reg_allocate_trivial();
    134    bool reg_allocate();
    135    void evaluate_spill_costs(float *spill_costs, bool *no_spill);
    136    int choose_spill_reg(struct ra_graph *g);
    137    void spill_reg(int spill_reg);
    138    void move_grf_array_access_to_scratch();
    139    void move_uniform_array_access_to_pull_constants();
    140    void move_push_constants_to_pull_constants();
    141    void split_uniform_registers();
    142    void pack_uniform_registers();
    143    void calculate_live_intervals();
    144    void invalidate_live_intervals();
    145    void split_virtual_grfs();
    146    bool opt_vector_float();
    147    bool opt_reduce_swizzle();
    148    bool dead_code_eliminate();
    149    int var_range_start(unsigned v, unsigned n) const;
    150    int var_range_end(unsigned v, unsigned n) const;
    151    bool virtual_grf_interferes(int a, int b);
    152    bool opt_cmod_propagation();
    153    bool opt_copy_propagation(bool do_constant_prop = true);
    154    bool opt_cse_local(bblock_t *block);
    155    bool opt_cse();
    156    bool opt_algebraic();
    157    bool opt_register_coalesce();
    158    bool eliminate_find_live_channel();
    159    bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
    160    void opt_set_dependency_control();
    161    void opt_schedule_instructions();
    162    void convert_to_hw_regs();
    163 
    164    bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg);
    165    bool lower_simd_width();
    166    bool scalarize_df();
    167    bool lower_64bit_mad_to_mul_add();
    168    void apply_logical_swizzle(struct brw_reg *hw_reg,
    169                               vec4_instruction *inst, int arg);
    170 
    171    vec4_instruction *emit(vec4_instruction *inst);
    172 
    173    vec4_instruction *emit(enum opcode opcode);
    174    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
    175    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
    176                           const src_reg &src0);
    177    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
    178                           const src_reg &src0, const src_reg &src1);
    179    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
    180                           const src_reg &src0, const src_reg &src1,
    181                           const src_reg &src2);
    182 
    183    vec4_instruction *emit_before(bblock_t *block,
    184                                  vec4_instruction *inst,
    185 				 vec4_instruction *new_inst);
    186 
    187 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
    188 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
    189 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
    190    EMIT1(MOV)
    191    EMIT1(NOT)
    192    EMIT1(RNDD)
    193    EMIT1(RNDE)
    194    EMIT1(RNDZ)
    195    EMIT1(FRC)
    196    EMIT1(F32TO16)
    197    EMIT1(F16TO32)
    198    EMIT2(ADD)
    199    EMIT2(MUL)
    200    EMIT2(MACH)
    201    EMIT2(MAC)
    202    EMIT2(AND)
    203    EMIT2(OR)
    204    EMIT2(XOR)
    205    EMIT2(DP3)
    206    EMIT2(DP4)
    207    EMIT2(DPH)
    208    EMIT2(SHL)
    209    EMIT2(SHR)
    210    EMIT2(ASR)
    211    vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
    212 			 enum brw_conditional_mod condition);
    213    vec4_instruction *IF(src_reg src0, src_reg src1,
    214                         enum brw_conditional_mod condition);
    215    vec4_instruction *IF(enum brw_predicate predicate);
    216    EMIT1(SCRATCH_READ)
    217    EMIT2(SCRATCH_WRITE)
    218    EMIT3(LRP)
    219    EMIT1(BFREV)
    220    EMIT3(BFE)
    221    EMIT2(BFI1)
    222    EMIT3(BFI2)
    223    EMIT1(FBH)
    224    EMIT1(FBL)
    225    EMIT1(CBIT)
    226    EMIT3(MAD)
    227    EMIT2(ADDC)
    228    EMIT2(SUBB)
    229    EMIT1(DIM)
    230 
    231 #undef EMIT1
    232 #undef EMIT2
    233 #undef EMIT3
    234 
    235    int implied_mrf_writes(vec4_instruction *inst);
    236 
    237    vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
    238                                  src_reg src0, src_reg src1);
    239 
    240    vec4_instruction *emit_lrp(const dst_reg &dst, const src_reg &x,
    241                               const src_reg &y, const src_reg &a);
    242 
    243    /**
    244     * Copy any live channel from \p src to the first channel of the
    245     * result.
    246     */
    247    src_reg emit_uniformize(const src_reg &src);
    248 
    249    src_reg fix_3src_operand(const src_reg &src);
    250    src_reg resolve_source_modifiers(const src_reg &src);
    251 
    252    vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
    253                                const src_reg &src1 = src_reg());
    254 
    255    src_reg fix_math_operand(const src_reg &src);
    256 
    257    void emit_pack_half_2x16(dst_reg dst, src_reg src0);
    258    void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
    259    void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
    260    void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
    261    void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
    262    void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
    263 
    264    void emit_texture(ir_texture_opcode op,
    265                      dst_reg dest,
    266                      const glsl_type *dest_type,
    267                      src_reg coordinate,
    268                      int coord_components,
    269                      src_reg shadow_comparator,
    270                      src_reg lod, src_reg lod2,
    271                      src_reg sample_index,
    272                      uint32_t constant_offset,
    273                      src_reg offset_value,
    274                      src_reg mcs,
    275                      uint32_t surface, src_reg surface_reg,
    276                      src_reg sampler_reg);
    277 
    278    src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
    279                           src_reg surface);
    280    void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
    281 
    282    void emit_ndc_computation();
    283    void emit_psiz_and_flags(dst_reg reg);
    284    vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp);
    285    virtual void emit_urb_slot(dst_reg reg, int varying);
    286 
    287    void emit_shader_time_begin();
    288    void emit_shader_time_end();
    289    void emit_shader_time_write(int shader_time_subindex, src_reg value);
    290 
    291    src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
    292 			      src_reg *reladdr, int reg_offset);
    293    void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
    294 			  dst_reg dst,
    295 			  src_reg orig_src,
    296 			  int base_offset);
    297    void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
    298 			   int base_offset);
    299    void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
    300 				dst_reg dst,
    301 				src_reg orig_src,
    302                                 int base_offset,
    303                                 src_reg indirect);
    304    void emit_pull_constant_load_reg(dst_reg dst,
    305                                     src_reg surf_index,
    306                                     src_reg offset,
    307                                     bblock_t *before_block,
    308                                     vec4_instruction *before_inst);
    309    src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
    310                                 vec4_instruction *inst, src_reg src);
    311 
    312    void resolve_ud_negate(src_reg *reg);
    313 
    314    bool lower_minmax();
    315 
    316    src_reg get_timestamp();
    317 
    318    void dump_instruction(backend_instruction *inst);
    319    void dump_instruction(backend_instruction *inst, FILE *file);
    320 
    321    bool is_high_sampler(src_reg sampler);
    322 
    323    bool optimize_predicate(nir_alu_instr *instr, enum brw_predicate *predicate);
    324 
    325    void emit_conversion_from_double(dst_reg dst, src_reg src, bool saturate,
    326                                     brw_reg_type single_type);
    327    void emit_conversion_to_double(dst_reg dst, src_reg src, bool saturate,
    328                                   brw_reg_type single_type);
    329 
    330    src_reg setup_imm_df(double v);
    331 
    332    vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src,
    333                                         bool for_write,
    334                                         bblock_t *block = NULL,
    335                                         vec4_instruction *ref = NULL);
    336 
    337    virtual void emit_nir_code();
    338    virtual void nir_setup_uniforms();
    339    virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
    340    virtual void nir_setup_system_values();
    341    virtual void nir_emit_impl(nir_function_impl *impl);
    342    virtual void nir_emit_cf_list(exec_list *list);
    343    virtual void nir_emit_if(nir_if *if_stmt);
    344    virtual void nir_emit_loop(nir_loop *loop);
    345    virtual void nir_emit_block(nir_block *block);
    346    virtual void nir_emit_instr(nir_instr *instr);
    347    virtual void nir_emit_load_const(nir_load_const_instr *instr);
    348    virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
    349    virtual void nir_emit_alu(nir_alu_instr *instr);
    350    virtual void nir_emit_jump(nir_jump_instr *instr);
    351    virtual void nir_emit_texture(nir_tex_instr *instr);
    352    virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
    353    virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
    354 
    355    dst_reg get_nir_dest(const nir_dest &dest, enum brw_reg_type type);
    356    dst_reg get_nir_dest(const nir_dest &dest, nir_alu_type type);
    357    dst_reg get_nir_dest(const nir_dest &dest);
    358    src_reg get_nir_src(const nir_src &src, enum brw_reg_type type,
    359                        unsigned num_components = 4);
    360    src_reg get_nir_src(const nir_src &src, nir_alu_type type,
    361                        unsigned num_components = 4);
    362    src_reg get_nir_src(const nir_src &src,
    363                        unsigned num_components = 4);
    364    src_reg get_indirect_offset(nir_intrinsic_instr *instr);
    365 
    366    virtual dst_reg *make_reg_for_system_value(int location) = 0;
    367 
    368    dst_reg *nir_locals;
    369    dst_reg *nir_ssa_values;
    370    dst_reg *nir_system_values;
    371 
    372 protected:
    373    void emit_vertex();
    374    void lower_attributes_to_hw_regs(const int *attribute_map,
    375                                     bool interleaved);
    376    void setup_payload_interference(struct ra_graph *g, int first_payload_node,
    377                                    int reg_node_count);
    378    virtual void setup_payload() = 0;
    379    virtual void emit_prolog() = 0;
    380    virtual void emit_thread_end() = 0;
    381    virtual void emit_urb_write_header(int mrf) = 0;
    382    virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
    383    virtual void gs_emit_vertex(int stream_id);
    384    virtual void gs_end_primitive();
    385 
    386 private:
    387    /**
    388     * If true, then register allocation should fail instead of spilling.
    389     */
    390    const bool no_spills;
    391 
    392    int shader_time_index;
    393 
    394    unsigned last_scratch; /**< measured in 32-byte (register size) units */
    395 };
    396 
    397 } /* namespace brw */
    398 #endif /* __cplusplus */
    399 
    400 #endif /* BRW_VEC4_H */
    401