Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2010 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *    Eric Anholt <eric (at) anholt.net>
     25  *
     26  */
     27 
     28 #include "main/macros.h"
     29 #include "program/program.h"
     30 #include "program/prog_print.h"
     31 #include "brw_context.h"
     32 #include "brw_defines.h"
     33 #include "brw_eu.h"
     34 
     35 const struct brw_instruction_info brw_opcodes[128] = {
     36     [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1, .is_arith = 1 },
     37     [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1, .is_arith = 1 },
     38     [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1, .is_arith = 1 },
     39     [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1, .is_arith = 1 },
     40     [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1, .is_arith = 1 },
     41     [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1, .is_arith = 1 },
     42     [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1, .is_arith = 1 },
     43     [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
     44 
     45     [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1, .is_arith = 1 },
     46     [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1, .is_arith = 1 },
     47     [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1, .is_arith = 1 },
     48     [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1, .is_arith = 1 },
     49     [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
     50     [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
     51     [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
     52     [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
     53     [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
     54     [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
     55     [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
     56     [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
     57 
     58     [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1, .is_arith = 1 },
     59     [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1, .is_arith = 1 },
     60     [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1, .is_arith = 1 },
     61     [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1, .is_arith = 1 },
     62     [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1, .is_arith = 1 },
     63     [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1, .is_arith = 1 },
     64     [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1, .is_arith = 1 },
     65     [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1, .is_arith = 1 },
     66     [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
     67     [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
     68     [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
     69 
     70     [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
     71     [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
     72     [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
     73     [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
     74     [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 },
     75     [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
     76     [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
     77     [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
     78     [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
     79     [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
     80     [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
     81     [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
     82     [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
     83     [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
     84     [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 },
     85     [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
     86     [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
     87 };
     88 
     89 static INLINE
     90 bool brw_is_arithmetic_inst(const struct brw_instruction *inst)
     91 {
     92    return brw_opcodes[inst->header.opcode].is_arith;
     93 }
     94 
     95 static const GLuint inst_stride[7] = {
     96     [0] = 0,
     97     [1] = 1,
     98     [2] = 2,
     99     [3] = 4,
    100     [4] = 8,
    101     [5] = 16,
    102     [6] = 32
    103 };
    104 
    105 static const GLuint inst_type_size[8] = {
    106     [BRW_REGISTER_TYPE_UD] = 4,
    107     [BRW_REGISTER_TYPE_D] = 4,
    108     [BRW_REGISTER_TYPE_UW] = 2,
    109     [BRW_REGISTER_TYPE_W] = 2,
    110     [BRW_REGISTER_TYPE_UB] = 1,
    111     [BRW_REGISTER_TYPE_B] = 1,
    112     [BRW_REGISTER_TYPE_F] = 4
    113 };
    114 
    115 static INLINE bool
    116 brw_is_grf_written(const struct brw_instruction *inst,
    117                    int reg_index, int size,
    118                    int gen)
    119 {
    120    if (brw_opcodes[inst->header.opcode].ndst == 0)
    121       return false;
    122 
    123    if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT)
    124       if (inst->bits1.ia1.dest_reg_file == BRW_GENERAL_REGISTER_FILE)
    125          return true;
    126 
    127    if (inst->bits1.da1.dest_reg_file != BRW_GENERAL_REGISTER_FILE)
    128       return false;
    129 
    130    const int reg_start = reg_index * REG_SIZE;
    131    const int reg_end = reg_start + size;
    132 
    133    const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type];
    134    const int write_start = inst->bits1.da1.dest_reg_nr*REG_SIZE
    135                          + inst->bits1.da1.dest_subreg_nr;
    136    int length, write_end;
    137 
    138    /* SEND is specific */
    139    if (inst->header.opcode == BRW_OPCODE_SEND) {
    140       if (gen >= 5)
    141          length = inst->bits3.generic_gen5.response_length*REG_SIZE;
    142       else
    143          length = inst->bits3.generic.response_length*REG_SIZE;
    144    }
    145    else {
    146       length = 1 << inst->header.execution_size;
    147       length *= type_size;
    148       length *= inst->bits1.da1.dest_horiz_stride;
    149    }
    150 
    151    /* If the two intervals intersect, we overwrite the register */
    152    write_end = write_start + length;
    153    const int left = MAX2(write_start, reg_start);
    154    const int right = MIN2(write_end, reg_end);
    155 
    156    return left < right;
    157 }
    158 
    159 static bool
    160 brw_is_mrf_written_alu(const struct brw_instruction *inst,
    161 		       int reg_index, int size)
    162 {
    163    if (brw_opcodes[inst->header.opcode].ndst == 0)
    164       return false;
    165 
    166    if (inst->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE)
    167       return false;
    168 
    169    if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT)
    170       return true;
    171 
    172    const int reg_start = reg_index * REG_SIZE;
    173    const int reg_end = reg_start + size;
    174 
    175    const int mrf_index = inst->bits1.da1.dest_reg_nr & 0x0f;
    176    const int is_compr4 = inst->bits1.da1.dest_reg_nr & BRW_MRF_COMPR4;
    177    const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type];
    178 
    179    /* We use compr4 with a size != 16 elements. Strange, we conservatively
    180     * consider that we are writing the register.
    181     */
    182    if (is_compr4 && inst->header.execution_size != BRW_EXECUTE_16)
    183       return true;
    184 
    185    /* Here we write mrf_{i} and mrf_{i+4}. So we read two times 8 elements */
    186    if (is_compr4) {
    187       const int length = 8 * type_size * inst->bits1.da1.dest_horiz_stride;
    188 
    189       /* First 8-way register */
    190       const int write_start0 = mrf_index*REG_SIZE
    191                              + inst->bits1.da1.dest_subreg_nr;
    192       const int write_end0 = write_start0 + length;
    193 
    194       /* Second 8-way register */
    195       const int write_start1 = (mrf_index+4)*REG_SIZE
    196                              + inst->bits1.da1.dest_subreg_nr;
    197       const int write_end1 = write_start1 + length;
    198 
    199       /* If the two intervals intersect, we overwrite the register */
    200       const int left0 = MAX2(write_start0, reg_start);
    201       const int right0 = MIN2(write_end0, reg_end);
    202       const int left1 = MAX2(write_start1, reg_start);
    203       const int right1 = MIN2(write_end1, reg_end);
    204 
    205       if (left0 < right0 || left1 < right1)
    206 	 return true;
    207    }
    208    else {
    209       int length;
    210       length = 1 << inst->header.execution_size;
    211       length *= type_size;
    212       length *= inst->bits1.da1.dest_horiz_stride;
    213 
    214       /* If the two intervals intersect, we write into the register */
    215       const int write_start = inst->bits1.da1.dest_reg_nr*REG_SIZE
    216                             + inst->bits1.da1.dest_subreg_nr;
    217       const int write_end = write_start + length;
    218       const int left = MAX2(write_start, reg_start);
    219       const int right = MIN2(write_end, reg_end);
    220 
    221       if (left < right)
    222 	 return true;
    223    }
    224 
    225    return false;
    226 }
    227 
    228 /* SEND may perform an implicit mov to a mrf register */
    229 static bool
    230 brw_is_mrf_written_send(const struct brw_instruction *inst,
    231 			int reg_index, int size)
    232 {
    233 
    234    const int reg_start = reg_index * REG_SIZE;
    235    const int reg_end = reg_start + size;
    236    const int mrf_start = inst->header.destreg__conditionalmod;
    237    const int write_start = mrf_start * REG_SIZE;
    238    const int write_end = write_start + REG_SIZE;
    239    const int left = MAX2(write_start, reg_start);
    240    const int right = MIN2(write_end, reg_end);
    241 
    242    if (inst->header.opcode != BRW_OPCODE_SEND ||
    243        inst->bits1.da1.src0_reg_file == 0)
    244       return false;
    245 
    246    return left < right;
    247 }
    248 
    249 /* Specific path for message register since we need to handle the compr4 case */
    250 static INLINE bool
    251 brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size)
    252 {
    253    return (brw_is_mrf_written_alu(inst, reg_index, size) ||
    254 	   brw_is_mrf_written_send(inst, reg_index, size));
    255 }
    256 
    257 static INLINE bool
    258 brw_is_mrf_read(const struct brw_instruction *inst,
    259                 int reg_index, int size, int gen)
    260 {
    261    if (inst->header.opcode != BRW_OPCODE_SEND)
    262       return false;
    263    if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT)
    264       return true;
    265 
    266    const int reg_start = reg_index*REG_SIZE;
    267    const int reg_end = reg_start + size;
    268 
    269    int length, read_start, read_end;
    270    if (gen >= 5)
    271       length = inst->bits3.generic_gen5.msg_length*REG_SIZE;
    272    else
    273       length = inst->bits3.generic.msg_length*REG_SIZE;
    274 
    275    /* Look if SEND uses an implicit mov. In that case, we read one less register
    276     * (but we write it)
    277     */
    278    if (inst->bits1.da1.src0_reg_file != 0)
    279       read_start = inst->header.destreg__conditionalmod;
    280    else {
    281       length--;
    282       read_start = inst->header.destreg__conditionalmod + 1;
    283    }
    284    read_start *= REG_SIZE;
    285    read_end = read_start + length;
    286 
    287    const int left = MAX2(read_start, reg_start);
    288    const int right = MIN2(read_end, reg_end);
    289 
    290    return left < right;
    291 }
    292 
    293 static INLINE bool
    294 brw_is_grf_read(const struct brw_instruction *inst, int reg_index, int size)
    295 {
    296    int i, j;
    297    if (brw_opcodes[inst->header.opcode].nsrc == 0)
    298       return false;
    299 
    300    /* Look at first source. We must take into account register regions to
    301     * monitor carefully the read. Note that we are a bit too conservative here
    302     * since we do not take into account the fact that some complete registers
    303     * may be skipped
    304     */
    305    if (brw_opcodes[inst->header.opcode].nsrc >= 1) {
    306 
    307       if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT)
    308          if (inst->bits1.ia1.src0_reg_file == BRW_GENERAL_REGISTER_FILE)
    309             return true;
    310       if (inst->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE)
    311          return false;
    312 
    313       const int reg_start = reg_index*REG_SIZE;
    314       const int reg_end = reg_start + size;
    315 
    316       /* See if at least one of this element intersects the interval */
    317       const int type_size = inst_type_size[inst->bits1.da1.src0_reg_type];
    318       const int elem_num = 1 << inst->header.execution_size;
    319       const int width = 1 << inst->bits2.da1.src0_width;
    320       const int row_num = elem_num >> inst->bits2.da1.src0_width;
    321       const int hs = type_size*inst_stride[inst->bits2.da1.src0_horiz_stride];
    322       const int vs = type_size*inst_stride[inst->bits2.da1.src0_vert_stride];
    323       int row_start = inst->bits2.da1.src0_reg_nr*REG_SIZE
    324                     + inst->bits2.da1.src0_subreg_nr;
    325       for (j = 0; j < row_num; ++j) {
    326          int write_start = row_start;
    327          for (i = 0; i < width; ++i) {
    328             const int write_end = write_start + type_size;
    329             const int left = write_start > reg_start ? write_start : reg_start;
    330             const int right = write_end < reg_end ? write_end : reg_end;
    331             if (left < right)
    332                return true;
    333             write_start += hs;
    334          }
    335          row_start += vs;
    336       }
    337    }
    338 
    339    /* Second src register */
    340    if (brw_opcodes[inst->header.opcode].nsrc >= 2) {
    341 
    342       if (inst->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT)
    343          if (inst->bits1.ia1.src1_reg_file == BRW_GENERAL_REGISTER_FILE)
    344             return true;
    345       if (inst->bits1.da1.src1_reg_file != BRW_GENERAL_REGISTER_FILE)
    346          return false;
    347 
    348       const int reg_start = reg_index*REG_SIZE;
    349       const int reg_end = reg_start + size;
    350 
    351       /* See if at least one of this element intersects the interval */
    352       const int type_size = inst_type_size[inst->bits1.da1.src1_reg_type];
    353       const int elem_num = 1 << inst->header.execution_size;
    354       const int width = 1 << inst->bits3.da1.src1_width;
    355       const int row_num = elem_num >> inst->bits3.da1.src1_width;
    356       const int hs = type_size*inst_stride[inst->bits3.da1.src1_horiz_stride];
    357       const int vs = type_size*inst_stride[inst->bits3.da1.src1_vert_stride];
    358       int row_start = inst->bits3.da1.src1_reg_nr*REG_SIZE
    359                     + inst->bits3.da1.src1_subreg_nr;
    360       for (j = 0; j < row_num; ++j) {
    361          int write_start = row_start;
    362          for (i = 0; i < width; ++i) {
    363             const int write_end = write_start + type_size;
    364             const int left = write_start > reg_start ? write_start : reg_start;
    365             const int right = write_end < reg_end ? write_end : reg_end;
    366             if (left < right)
    367                return true;
    368             write_start += hs;
    369          }
    370          row_start += vs;
    371       }
    372    }
    373 
    374    return false;
    375 }
    376 
    377 static INLINE bool
    378 brw_is_control_done(const struct brw_instruction *mov) {
    379    return
    380        mov->header.dependency_control != 0 ||
    381        mov->header.thread_control != 0 ||
    382        mov->header.mask_control != 0 ||
    383        mov->header.saturate != 0 ||
    384        mov->header.debug_control != 0;
    385 }
    386 
    387 static INLINE bool
    388 brw_is_predicated(const struct brw_instruction *mov) {
    389    return mov->header.predicate_control != 0;
    390 }
    391 
    392 static INLINE bool
    393 brw_is_grf_to_mrf_mov(const struct brw_instruction *mov,
    394                       int *mrf_index,
    395                       int *grf_index,
    396                       bool *is_compr4)
    397 {
    398    if (brw_is_predicated(mov) ||
    399        brw_is_control_done(mov) ||
    400        mov->header.debug_control != 0)
    401       return false;
    402 
    403    if (mov->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT ||
    404        mov->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE ||
    405        mov->bits1.da1.dest_reg_type != BRW_REGISTER_TYPE_F ||
    406        mov->bits1.da1.dest_horiz_stride != BRW_HORIZONTAL_STRIDE_1 ||
    407        mov->bits1.da1.dest_subreg_nr != 0)
    408       return false;
    409 
    410    if (mov->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT ||
    411        mov->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE ||
    412        mov->bits1.da1.src0_reg_type != BRW_REGISTER_TYPE_F ||
    413        mov->bits2.da1.src0_width != BRW_WIDTH_8 ||
    414        mov->bits2.da1.src0_horiz_stride != BRW_HORIZONTAL_STRIDE_1 ||
    415        mov->bits2.da1.src0_vert_stride != BRW_VERTICAL_STRIDE_8 ||
    416        mov->bits2.da1.src0_subreg_nr != 0 ||
    417        mov->bits2.da1.src0_abs != 0 ||
    418        mov->bits2.da1.src0_negate != 0)
    419       return false;
    420 
    421    *grf_index = mov->bits2.da1.src0_reg_nr;
    422    *mrf_index = mov->bits1.da1.dest_reg_nr & 0x0f;
    423    *is_compr4 = (mov->bits1.da1.dest_reg_nr & BRW_MRF_COMPR4) != 0;
    424    return true;
    425 }
    426 
    427 static INLINE bool
    428 brw_is_grf_straight_write(const struct brw_instruction *inst, int grf_index)
    429 {
    430    /* remark: no problem to predicate a SEL instruction */
    431    if ((!brw_is_predicated(inst) || inst->header.opcode == BRW_OPCODE_SEL) &&
    432        brw_is_control_done(inst) == false &&
    433        inst->header.execution_size == 4 &&
    434        inst->header.access_mode == BRW_ALIGN_1 &&
    435        inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT &&
    436        inst->bits1.da1.dest_reg_file == BRW_GENERAL_REGISTER_FILE &&
    437        inst->bits1.da1.dest_reg_type == BRW_REGISTER_TYPE_F &&
    438        inst->bits1.da1.dest_horiz_stride == BRW_HORIZONTAL_STRIDE_1 &&
    439        inst->bits1.da1.dest_reg_nr == grf_index &&
    440        inst->bits1.da1.dest_subreg_nr == 0 &&
    441        brw_is_arithmetic_inst(inst))
    442       return true;
    443 
    444    return false;
    445 }
    446 
    447 static INLINE bool
    448 brw_inst_are_equal(const struct brw_instruction *src0,
    449                    const struct brw_instruction *src1)
    450 {
    451    const GLuint *field0 = (GLuint *) src0;
    452    const GLuint *field1 = (GLuint *) src1;
    453    return field0[0] == field1[0] &&
    454           field0[1] == field1[1] &&
    455           field0[2] == field1[2] &&
    456           field0[3] == field1[3];
    457 }
    458 
    459 static INLINE void
    460 brw_inst_copy(struct brw_instruction *dst,
    461               const struct brw_instruction *src)
    462 {
    463    GLuint *field_dst = (GLuint *) dst;
    464    const GLuint *field_src = (GLuint *) src;
    465    field_dst[0] = field_src[0];
    466    field_dst[1] = field_src[1];
    467    field_dst[2] = field_src[2];
    468    field_dst[3] = field_src[3];
    469 }
    470 
    471 static void brw_remove_inst(struct brw_compile *p, const bool *removeInst)
    472 {
    473    int i, nr_insn = 0, to = 0, from = 0;
    474 
    475    for (from = 0; from < p->nr_insn; ++from) {
    476       if (removeInst[from])
    477          continue;
    478       if(to != from)
    479          brw_inst_copy(p->store + to, p->store + from);
    480       to++;
    481    }
    482 
    483    for (i = 0; i < p->nr_insn; ++i)
    484       if (removeInst[i] == false)
    485          nr_insn++;
    486    p->nr_insn = nr_insn;
    487 }
    488 
    489 /* The gen code emitter generates a lot of duplications in the
    490  * grf-to-mrf moves, for example when texture sampling with the same
    491  * coordinates from multiple textures..  Here, we monitor same mov
    492  * grf-to-mrf instrutions and remove repeated ones where the operands
    493  * and dst ahven't changed in between.
    494  */
    495 void brw_remove_duplicate_mrf_moves(struct brw_compile *p)
    496 {
    497    const int gen = p->brw->intel.gen;
    498    int i, j;
    499 
    500    bool *removeInst = calloc(sizeof(bool), p->nr_insn);
    501    for (i = 0; i < p->nr_insn; i++) {
    502       if (removeInst[i])
    503          continue;
    504 
    505       const struct brw_instruction *mov = p->store + i;
    506       int mrf_index, grf_index;
    507       bool is_compr4;
    508 
    509       /* Only consider _straight_ grf-to-mrf moves */
    510       if (!brw_is_grf_to_mrf_mov(mov, &mrf_index, &grf_index, &is_compr4))
    511          continue;
    512 
    513       const int mrf_index0 = mrf_index;
    514       const int mrf_index1 = is_compr4 ? mrf_index0+4 : mrf_index0+1;
    515       const int simd16_size = 2 * REG_SIZE;
    516 
    517       for (j = i + 1; j < p->nr_insn; j++) {
    518          const struct brw_instruction *inst = p->store + j;
    519 
    520          if (brw_inst_are_equal(mov, inst)) {
    521             removeInst[j] = true;
    522             continue;
    523          }
    524 
    525          if (brw_is_grf_written(inst, grf_index, simd16_size, gen) ||
    526              brw_is_mrf_written(inst, mrf_index0, REG_SIZE) ||
    527              brw_is_mrf_written(inst, mrf_index1, REG_SIZE))
    528             break;
    529       }
    530    }
    531 
    532    brw_remove_inst(p, removeInst);
    533    free(removeInst);
    534 }
    535 
    536 /* Replace moves to MRFs where the value moved is the result of a
    537  * normal arithmetic operation with computation right into the MRF.
    538  */
    539 void brw_remove_grf_to_mrf_moves(struct brw_compile *p)
    540 {
    541    int i, j, prev;
    542    struct brw_context *brw = p->brw;
    543    const int gen = brw->intel.gen;
    544    const int simd16_size = 2*REG_SIZE;
    545 
    546    bool *removeInst = calloc(sizeof(bool), p->nr_insn);
    547    assert(removeInst);
    548 
    549    for (i = 0; i < p->nr_insn; i++) {
    550       if (removeInst[i])
    551          continue;
    552 
    553       struct brw_instruction *grf_inst = NULL;
    554       const struct brw_instruction *mov = p->store + i;
    555       int mrf_index, grf_index;
    556       bool is_compr4;
    557 
    558       /* Only consider _straight_ grf-to-mrf moves */
    559       if (!brw_is_grf_to_mrf_mov(mov, &mrf_index, &grf_index, &is_compr4))
    560          continue;
    561 
    562       /* Using comp4 enables a stride of 4 for this instruction */
    563       const int mrf_index0 = mrf_index;
    564       const int mrf_index1 = is_compr4 ? mrf_index+4 : mrf_index+1;
    565 
    566       /* Look where the register has been set */
    567       prev = i;
    568       bool potential_remove = false;
    569       while (prev--) {
    570 
    571          /* If _one_ instruction writes the grf, we try to remove the mov */
    572          struct brw_instruction *inst = p->store + prev;
    573          if (brw_is_grf_straight_write(inst, grf_index)) {
    574             potential_remove = true;
    575             grf_inst = inst;
    576             break;
    577          }
    578 
    579       }
    580 
    581       if (potential_remove == false)
    582          continue;
    583       removeInst[i] = true;
    584 
    585       /* Monitor first the section of code between the grf computation and the
    586        * mov. Here we cannot read or write both mrf and grf register
    587        */
    588       for (j = prev + 1; j < i; ++j) {
    589          struct brw_instruction *inst = p->store + j;
    590          if (removeInst[j])
    591             continue;
    592          if (brw_is_grf_written(inst, grf_index, simd16_size, gen)   ||
    593              brw_is_grf_read(inst, grf_index, simd16_size)           ||
    594              brw_is_mrf_written(inst, mrf_index0, REG_SIZE)   ||
    595              brw_is_mrf_written(inst, mrf_index1, REG_SIZE)   ||
    596              brw_is_mrf_read(inst, mrf_index0, REG_SIZE, gen) ||
    597              brw_is_mrf_read(inst, mrf_index1, REG_SIZE, gen)) {
    598             removeInst[i] = false;
    599             break;
    600          }
    601       }
    602 
    603       /* After the mov, we can read or write the mrf. If the grf is overwritten,
    604        * we are done
    605        */
    606       for (j = i + 1; j < p->nr_insn; ++j) {
    607          struct brw_instruction *inst = p->store + j;
    608          if (removeInst[j])
    609             continue;
    610 
    611          if (brw_is_grf_read(inst, grf_index, simd16_size)) {
    612             removeInst[i] = false;
    613             break;
    614          }
    615 
    616          if (brw_is_grf_straight_write(inst, grf_index))
    617             break;
    618       }
    619 
    620       /* Note that with the top down traversal, we can safely pacth the mov
    621        * instruction
    622        */
    623       if (removeInst[i]) {
    624          grf_inst->bits1.da1.dest_reg_file = mov->bits1.da1.dest_reg_file;
    625          grf_inst->bits1.da1.dest_reg_nr = mov->bits1.da1.dest_reg_nr;
    626       }
    627    }
    628 
    629    brw_remove_inst(p, removeInst);
    630    free(removeInst);
    631 }
    632 
    633 static bool
    634 is_single_channel_dp4(struct brw_instruction *insn)
    635 {
    636    if (insn->header.opcode != BRW_OPCODE_DP4 ||
    637        insn->header.execution_size != BRW_EXECUTE_8 ||
    638        insn->header.access_mode != BRW_ALIGN_16 ||
    639        insn->bits1.da1.dest_reg_file != BRW_GENERAL_REGISTER_FILE)
    640       return false;
    641 
    642    if (!is_power_of_two(insn->bits1.da16.dest_writemask))
    643       return false;
    644 
    645    return true;
    646 }
    647 
    648 /**
    649  * Sets the dependency control fields on DP4 instructions.
    650  *
    651  * The hardware only tracks dependencies on a register basis, so when
    652  * you do:
    653  *
    654  * DP4 dst.x src1 src2
    655  * DP4 dst.y src1 src3
    656  * DP4 dst.z src1 src4
    657  * DP4 dst.w src1 src5
    658  *
    659  * It will wait to do the DP4 dst.y until the dst.x is resolved, etc.
    660  * We can examine our instruction stream and set the dependency
    661  * control fields to tell the hardware when to do it.
    662  *
    663  * We may want to extend this to other instructions that are used to
    664  * fill in a channel at a time of the destination register.
    665  */
    666 static void
    667 brw_set_dp4_dependency_control(struct brw_compile *p)
    668 {
    669    int i;
    670 
    671    for (i = 1; i < p->nr_insn; i++) {
    672       struct brw_instruction *insn = &p->store[i];
    673       struct brw_instruction *prev = &p->store[i - 1];
    674 
    675       if (!is_single_channel_dp4(prev))
    676 	 continue;
    677 
    678       if (!is_single_channel_dp4(insn)) {
    679 	 i++;
    680 	 continue;
    681       }
    682 
    683       /* Only avoid hw dep control if the write masks are different
    684        * channels of one reg.
    685        */
    686       if (insn->bits1.da16.dest_writemask == prev->bits1.da16.dest_writemask)
    687 	 continue;
    688       if (insn->bits1.da16.dest_reg_nr != prev->bits1.da16.dest_reg_nr)
    689 	 continue;
    690 
    691       /* Check if the second instruction depends on the previous one
    692        * for a src.
    693        */
    694       if (insn->bits1.da1.src0_reg_file == BRW_GENERAL_REGISTER_FILE &&
    695 	  (insn->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT ||
    696 	   insn->bits2.da1.src0_reg_nr == insn->bits1.da16.dest_reg_nr))
    697 	  continue;
    698       if (insn->bits1.da1.src1_reg_file == BRW_GENERAL_REGISTER_FILE &&
    699 	  (insn->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT ||
    700 	   insn->bits3.da1.src1_reg_nr == insn->bits1.da16.dest_reg_nr))
    701 	  continue;
    702 
    703       prev->header.dependency_control |= BRW_DEPENDENCY_NOTCLEARED;
    704       insn->header.dependency_control |= BRW_DEPENDENCY_NOTCHECKED;
    705    }
    706 }
    707 
    708 void
    709 brw_optimize(struct brw_compile *p)
    710 {
    711    brw_set_dp4_dependency_control(p);
    712 }
    713