Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2010 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *    Eric Anholt <eric (at) anholt.net>
     25  *
     26  */
     27 
     28 #pragma once
     29 
     30 #include "brw_shader.h"
     31 #include "brw_ir_fs.h"
     32 #include "brw_fs_builder.h"
     33 #include "compiler/nir/nir.h"
     34 
     35 struct bblock_t;
     36 namespace {
     37    struct acp_entry;
     38 }
     39 
     40 namespace brw {
     41    class fs_live_variables;
     42 }
     43 
     44 struct brw_gs_compile;
     45 
     46 static inline fs_reg
     47 offset(const fs_reg &reg, const brw::fs_builder &bld, unsigned delta)
     48 {
     49    return offset(reg, bld.dispatch_width(), delta);
     50 }
     51 
     52 /**
     53  * The fragment shader front-end.
     54  *
     55  * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
     56  */
     57 class fs_visitor : public backend_shader
     58 {
     59 public:
     60    fs_visitor(const struct brw_compiler *compiler, void *log_data,
     61               void *mem_ctx,
     62               const void *key,
     63               struct brw_stage_prog_data *prog_data,
     64               struct gl_program *prog,
     65               const nir_shader *shader,
     66               unsigned dispatch_width,
     67               int shader_time_index,
     68               const struct brw_vue_map *input_vue_map = NULL);
     69    fs_visitor(const struct brw_compiler *compiler, void *log_data,
     70               void *mem_ctx,
     71               struct brw_gs_compile *gs_compile,
     72               struct brw_gs_prog_data *prog_data,
     73               const nir_shader *shader,
     74               int shader_time_index);
     75    void init();
     76    ~fs_visitor();
     77 
     78    fs_reg vgrf(const glsl_type *const type);
     79    void import_uniforms(fs_visitor *v);
     80    void setup_uniform_clipplane_values(gl_clip_plane *clip_planes);
     81    void compute_clip_distance(gl_clip_plane *clip_planes);
     82 
     83    fs_inst *get_instruction_generating_reg(fs_inst *start,
     84 					   fs_inst *end,
     85 					   const fs_reg &reg);
     86 
     87    void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld,
     88                                    const fs_reg &dst,
     89                                    const fs_reg &surf_index,
     90                                    const fs_reg &varying_offset,
     91                                    uint32_t const_offset);
     92    void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf);
     93 
     94    bool run_fs(bool allow_spilling, bool do_rep_send);
     95    bool run_vs(gl_clip_plane *clip_planes);
     96    bool run_tcs_single_patch();
     97    bool run_tes();
     98    bool run_gs();
     99    bool run_cs();
    100    void optimize();
    101    void allocate_registers(bool allow_spilling);
    102    void setup_fs_payload_gen4();
    103    void setup_fs_payload_gen6();
    104    void setup_vs_payload();
    105    void setup_gs_payload();
    106    void setup_cs_payload();
    107    void fixup_3src_null_dest();
    108    void assign_curb_setup();
    109    void calculate_urb_setup();
    110    void assign_urb_setup();
    111    void convert_attr_sources_to_hw_regs(fs_inst *inst);
    112    void assign_vs_urb_setup();
    113    void assign_tcs_single_patch_urb_setup();
    114    void assign_tes_urb_setup();
    115    void assign_gs_urb_setup();
    116    bool assign_regs(bool allow_spilling, bool spill_all);
    117    void assign_regs_trivial();
    118    void calculate_payload_ranges(int payload_node_count,
    119                                  int *payload_last_use_ip);
    120    void setup_payload_interference(struct ra_graph *g, int payload_reg_count,
    121                                    int first_payload_node);
    122    int choose_spill_reg(struct ra_graph *g);
    123    void spill_reg(int spill_reg);
    124    void split_virtual_grfs();
    125    bool compact_virtual_grfs();
    126    void assign_constant_locations();
    127    void lower_constant_loads();
    128    void invalidate_live_intervals();
    129    void calculate_live_intervals();
    130    void calculate_register_pressure();
    131    void validate();
    132    bool opt_algebraic();
    133    bool opt_redundant_discard_jumps();
    134    bool opt_cse();
    135    bool opt_cse_local(bblock_t *block);
    136    bool opt_copy_propagation();
    137    bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
    138    bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
    139    bool opt_copy_propagation_local(void *mem_ctx, bblock_t *block,
    140                                    exec_list *acp);
    141    bool opt_drop_redundant_mov_to_flags();
    142    bool opt_register_renaming();
    143    bool register_coalesce();
    144    bool compute_to_mrf();
    145    bool eliminate_find_live_channel();
    146    bool dead_code_eliminate();
    147    bool remove_duplicate_mrf_writes();
    148 
    149    bool opt_sampler_eot();
    150    bool virtual_grf_interferes(int a, int b);
    151    void schedule_instructions(instruction_scheduler_mode mode);
    152    void insert_gen4_send_dependency_workarounds();
    153    void insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
    154                                                     fs_inst *inst);
    155    void insert_gen4_post_send_dependency_workarounds(bblock_t *block,
    156                                                      fs_inst *inst);
    157    void vfail(const char *msg, va_list args);
    158    void fail(const char *msg, ...);
    159    void limit_dispatch_width(unsigned n, const char *msg);
    160    void lower_uniform_pull_constant_loads();
    161    bool lower_load_payload();
    162    bool lower_pack();
    163    bool lower_d2x();
    164    bool lower_logical_sends();
    165    bool lower_integer_multiplication();
    166    bool lower_minmax();
    167    bool lower_simd_width();
    168    bool opt_combine_constants();
    169 
    170    void emit_dummy_fs();
    171    void emit_repclear_shader();
    172    void emit_fragcoord_interpolation(fs_reg wpos);
    173    fs_reg *emit_frontfacing_interpolation();
    174    fs_reg *emit_samplepos_setup();
    175    fs_reg *emit_sampleid_setup();
    176    fs_reg *emit_samplemaskin_setup();
    177    fs_reg *emit_vs_system_value(int location);
    178    void emit_interpolation_setup_gen4();
    179    void emit_interpolation_setup_gen6();
    180    void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
    181    fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
    182                          const fs_reg &sampler);
    183    void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
    184    fs_reg resolve_source_modifiers(const fs_reg &src);
    185    void emit_discard_jump();
    186    bool opt_peephole_sel();
    187    bool opt_peephole_predicated_break();
    188    bool opt_saturate_propagation();
    189    bool opt_cmod_propagation();
    190    bool opt_zero_samples();
    191 
    192    void emit_nir_code();
    193    void nir_setup_outputs();
    194    void nir_setup_uniforms();
    195    void nir_emit_system_values();
    196    void nir_emit_impl(nir_function_impl *impl);
    197    void nir_emit_cf_list(exec_list *list);
    198    void nir_emit_if(nir_if *if_stmt);
    199    void nir_emit_loop(nir_loop *loop);
    200    void nir_emit_block(nir_block *block);
    201    void nir_emit_instr(nir_instr *instr);
    202    void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr);
    203    void nir_emit_load_const(const brw::fs_builder &bld,
    204                             nir_load_const_instr *instr);
    205    void nir_emit_vs_intrinsic(const brw::fs_builder &bld,
    206                               nir_intrinsic_instr *instr);
    207    void nir_emit_tcs_intrinsic(const brw::fs_builder &bld,
    208                                nir_intrinsic_instr *instr);
    209    void nir_emit_gs_intrinsic(const brw::fs_builder &bld,
    210                               nir_intrinsic_instr *instr);
    211    void nir_emit_fs_intrinsic(const brw::fs_builder &bld,
    212                               nir_intrinsic_instr *instr);
    213    void nir_emit_cs_intrinsic(const brw::fs_builder &bld,
    214                               nir_intrinsic_instr *instr);
    215    void nir_emit_intrinsic(const brw::fs_builder &bld,
    216                            nir_intrinsic_instr *instr);
    217    void nir_emit_tes_intrinsic(const brw::fs_builder &bld,
    218                                nir_intrinsic_instr *instr);
    219    void nir_emit_ssbo_atomic(const brw::fs_builder &bld,
    220                              int op, nir_intrinsic_instr *instr);
    221    void nir_emit_shared_atomic(const brw::fs_builder &bld,
    222                                int op, nir_intrinsic_instr *instr);
    223    void nir_emit_texture(const brw::fs_builder &bld,
    224                          nir_tex_instr *instr);
    225    void nir_emit_jump(const brw::fs_builder &bld,
    226                       nir_jump_instr *instr);
    227    fs_reg get_nir_src(const nir_src &src);
    228    fs_reg get_nir_src_imm(const nir_src &src);
    229    fs_reg get_nir_dest(const nir_dest &dest);
    230    fs_reg get_nir_image_deref(const nir_deref_var *deref);
    231    fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
    232    void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
    233                      unsigned wr_mask);
    234 
    235    bool optimize_extract_to_float(nir_alu_instr *instr,
    236                                   const fs_reg &result);
    237    bool optimize_frontfacing_ternary(nir_alu_instr *instr,
    238                                      const fs_reg &result);
    239 
    240    void emit_alpha_test();
    241    fs_inst *emit_single_fb_write(const brw::fs_builder &bld,
    242                                  fs_reg color1, fs_reg color2,
    243                                  fs_reg src0_alpha, unsigned components);
    244    void emit_fb_writes();
    245    fs_inst *emit_non_coherent_fb_read(const brw::fs_builder &bld,
    246                                       const fs_reg &dst, unsigned target);
    247    void emit_urb_writes(const fs_reg &gs_vertex_count = fs_reg());
    248    void set_gs_stream_control_data_bits(const fs_reg &vertex_count,
    249                                         unsigned stream_id);
    250    void emit_gs_control_data_bits(const fs_reg &vertex_count);
    251    void emit_gs_end_primitive(const nir_src &vertex_count_nir_src);
    252    void emit_gs_vertex(const nir_src &vertex_count_nir_src,
    253                        unsigned stream_id);
    254    void emit_gs_thread_end();
    255    void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src,
    256                            unsigned base_offset, const nir_src &offset_src,
    257                            unsigned num_components, unsigned first_component);
    258    void emit_cs_terminate();
    259    fs_reg *emit_cs_work_group_id_setup();
    260 
    261    void emit_barrier();
    262 
    263    void emit_shader_time_begin();
    264    void emit_shader_time_end();
    265    void SHADER_TIME_ADD(const brw::fs_builder &bld,
    266                         int shader_time_subindex,
    267                         fs_reg value);
    268 
    269    fs_reg get_timestamp(const brw::fs_builder &bld);
    270 
    271    struct brw_reg interp_reg(int location, int channel);
    272 
    273    int implied_mrf_writes(fs_inst *inst);
    274 
    275    virtual void dump_instructions();
    276    virtual void dump_instructions(const char *name);
    277    void dump_instruction(backend_instruction *inst);
    278    void dump_instruction(backend_instruction *inst, FILE *file);
    279 
    280    const void *const key;
    281    const struct brw_sampler_prog_key_data *key_tex;
    282 
    283    struct brw_gs_compile *gs_compile;
    284 
    285    struct brw_stage_prog_data *prog_data;
    286    struct gl_program *prog;
    287 
    288    const struct brw_vue_map *input_vue_map;
    289 
    290    int *virtual_grf_start;
    291    int *virtual_grf_end;
    292    brw::fs_live_variables *live_intervals;
    293 
    294    int *regs_live_at_ip;
    295 
    296    /** Number of uniform variable components visited. */
    297    unsigned uniforms;
    298 
    299    /** Byte-offset for the next available spot in the scratch space buffer. */
    300    unsigned last_scratch;
    301 
    302    /**
    303     * Array mapping UNIFORM register numbers to the pull parameter index,
    304     * or -1 if this uniform register isn't being uploaded as a pull constant.
    305     */
    306    int *pull_constant_loc;
    307 
    308    /**
    309     * Array mapping UNIFORM register numbers to the push parameter index,
    310     * or -1 if this uniform register isn't being uploaded as a push constant.
    311     */
    312    int *push_constant_loc;
    313 
    314    fs_reg frag_depth;
    315    fs_reg frag_stencil;
    316    fs_reg sample_mask;
    317    fs_reg outputs[VARYING_SLOT_MAX];
    318    fs_reg dual_src_output;
    319    int first_non_payload_grf;
    320    /** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */
    321    unsigned max_grf;
    322 
    323    fs_reg *nir_locals;
    324    fs_reg *nir_ssa_values;
    325    fs_reg *nir_system_values;
    326 
    327    bool failed;
    328    char *fail_msg;
    329 
    330    /** Register numbers for thread payload fields. */
    331    struct thread_payload {
    332       uint8_t source_depth_reg;
    333       uint8_t source_w_reg;
    334       uint8_t aa_dest_stencil_reg;
    335       uint8_t dest_depth_reg;
    336       uint8_t sample_pos_reg;
    337       uint8_t sample_mask_in_reg;
    338       uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT];
    339       uint8_t local_invocation_id_reg;
    340 
    341       /** The number of thread payload registers the hardware will supply. */
    342       uint8_t num_regs;
    343    } payload;
    344 
    345    bool source_depth_to_render_target;
    346    bool runtime_check_aads_emit;
    347 
    348    fs_reg pixel_x;
    349    fs_reg pixel_y;
    350    fs_reg wpos_w;
    351    fs_reg pixel_w;
    352    fs_reg delta_xy[BRW_BARYCENTRIC_MODE_COUNT];
    353    fs_reg shader_start_time;
    354    fs_reg userplane[MAX_CLIP_PLANES];
    355    fs_reg final_gs_vertex_count;
    356    fs_reg control_data_bits;
    357    fs_reg invocation_id;
    358 
    359    unsigned grf_used;
    360    bool spilled_any_registers;
    361 
    362    const unsigned dispatch_width; /**< 8, 16 or 32 */
    363    unsigned min_dispatch_width;
    364    unsigned max_dispatch_width;
    365 
    366    int shader_time_index;
    367 
    368    unsigned promoted_constants;
    369    brw::fs_builder bld;
    370 };
    371 
    372 /**
    373  * The fragment shader code generator.
    374  *
    375  * Translates FS IR to actual i965 assembly code.
    376  */
    377 class fs_generator
    378 {
    379 public:
    380    fs_generator(const struct brw_compiler *compiler, void *log_data,
    381                 void *mem_ctx,
    382                 const void *key,
    383                 struct brw_stage_prog_data *prog_data,
    384                 unsigned promoted_constants,
    385                 bool runtime_check_aads_emit,
    386                 gl_shader_stage stage);
    387    ~fs_generator();
    388 
    389    void enable_debug(const char *shader_name);
    390    int generate_code(const cfg_t *cfg, int dispatch_width);
    391    const unsigned *get_assembly(unsigned int *assembly_size);
    392 
    393 private:
    394    void fire_fb_write(fs_inst *inst,
    395                       struct brw_reg payload,
    396                       struct brw_reg implied_header,
    397                       GLuint nr);
    398    void generate_fb_write(fs_inst *inst, struct brw_reg payload);
    399    void generate_fb_read(fs_inst *inst, struct brw_reg dst,
    400                          struct brw_reg payload);
    401    void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload);
    402    void generate_urb_write(fs_inst *inst, struct brw_reg payload);
    403    void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
    404    void generate_barrier(fs_inst *inst, struct brw_reg src);
    405    void generate_linterp(fs_inst *inst, struct brw_reg dst,
    406 			 struct brw_reg *src);
    407    void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
    408                      struct brw_reg surface_index,
    409                      struct brw_reg sampler_index);
    410    void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst,
    411                                  struct brw_reg src,
    412                                  struct brw_reg surf_index);
    413    void generate_ddx(enum opcode op, struct brw_reg dst, struct brw_reg src);
    414    void generate_ddy(enum opcode op, struct brw_reg dst, struct brw_reg src);
    415    void generate_scratch_write(fs_inst *inst, struct brw_reg src);
    416    void generate_scratch_read(fs_inst *inst, struct brw_reg dst);
    417    void generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst);
    418    void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst,
    419                                             struct brw_reg index,
    420                                             struct brw_reg offset);
    421    void generate_uniform_pull_constant_load_gen7(fs_inst *inst,
    422                                                  struct brw_reg dst,
    423                                                  struct brw_reg surf_index,
    424                                                  struct brw_reg payload);
    425    void generate_varying_pull_constant_load_gen4(fs_inst *inst,
    426                                                  struct brw_reg dst,
    427                                                  struct brw_reg index);
    428    void generate_varying_pull_constant_load_gen7(fs_inst *inst,
    429                                                  struct brw_reg dst,
    430                                                  struct brw_reg index,
    431                                                  struct brw_reg offset);
    432    void generate_mov_dispatch_to_flags(fs_inst *inst);
    433 
    434    void generate_pixel_interpolator_query(fs_inst *inst,
    435                                           struct brw_reg dst,
    436                                           struct brw_reg src,
    437                                           struct brw_reg msg_data,
    438                                           unsigned msg_type);
    439 
    440    void generate_set_sample_id(fs_inst *inst,
    441                                struct brw_reg dst,
    442                                struct brw_reg src0,
    443                                struct brw_reg src1);
    444 
    445    void generate_discard_jump(fs_inst *inst);
    446 
    447    void generate_pack_half_2x16_split(fs_inst *inst,
    448                                       struct brw_reg dst,
    449                                       struct brw_reg x,
    450                                       struct brw_reg y);
    451    void generate_unpack_half_2x16_split(fs_inst *inst,
    452                                         struct brw_reg dst,
    453                                         struct brw_reg src);
    454 
    455    void generate_shader_time_add(fs_inst *inst,
    456                                  struct brw_reg payload,
    457                                  struct brw_reg offset,
    458                                  struct brw_reg value);
    459 
    460    void generate_mov_indirect(fs_inst *inst,
    461                               struct brw_reg dst,
    462                               struct brw_reg reg,
    463                               struct brw_reg indirect_byte_offset);
    464 
    465    bool patch_discard_jumps_to_fb_writes();
    466 
    467    const struct brw_compiler *compiler;
    468    void *log_data; /* Passed to compiler->*_log functions */
    469 
    470    const struct gen_device_info *devinfo;
    471 
    472    struct brw_codegen *p;
    473    const void * const key;
    474    struct brw_stage_prog_data * const prog_data;
    475 
    476    unsigned dispatch_width; /**< 8, 16 or 32 */
    477 
    478    exec_list discard_halt_patches;
    479    unsigned promoted_constants;
    480    bool runtime_check_aads_emit;
    481    bool debug_flag;
    482    const char *shader_name;
    483    gl_shader_stage stage;
    484    void *mem_ctx;
    485 };
    486 
    487 bool brw_do_channel_expressions(struct exec_list *instructions);
    488 bool brw_do_vector_splitting(struct exec_list *instructions);
    489 
    490 void shuffle_32bit_load_result_to_64bit_data(const brw::fs_builder &bld,
    491                                              const fs_reg &dst,
    492                                              const fs_reg &src,
    493                                              uint32_t components);
    494 
    495 void shuffle_64bit_data_for_32bit_write(const brw::fs_builder &bld,
    496                                         const fs_reg &dst,
    497                                         const fs_reg &src,
    498                                         uint32_t components);
    499 fs_reg setup_imm_df(const brw::fs_builder &bld,
    500                     double v);
    501 
    502 enum brw_barycentric_mode brw_barycentric_mode(enum glsl_interp_mode mode,
    503                                                nir_intrinsic_op op);
    504