Home | History | Annotate | Download | only in shader
      1 /*
      2  * Mesa 3-D graphics library
      3  *
      4  * Copyright (C) 2012-2013 LunarG, Inc.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included
     14  * in all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     22  * DEALINGS IN THE SOFTWARE.
     23  *
     24  * Authors:
     25  *    Chia-I Wu <olv (at) lunarg.com>
     26  */
     27 
     28 #include "toy_compiler.h"
     29 
     30 #define CG_REG_SHIFT 5
     31 #define CG_REG_NUM(origin) ((origin) >> CG_REG_SHIFT)
     32 
     33 struct codegen {
     34    const struct ilo_dev *dev;
     35    const struct toy_inst *inst;
     36    int pc;
     37 
     38    unsigned flag_reg_num;
     39    unsigned flag_sub_reg_num;
     40 
     41    struct codegen_dst {
     42       unsigned file;
     43       unsigned type;
     44       bool indirect;
     45       unsigned indirect_subreg;
     46       unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
     47 
     48       unsigned horz_stride;
     49 
     50       unsigned writemask;
     51    } dst;
     52 
     53    struct codegen_src {
     54       unsigned file;
     55       unsigned type;
     56       bool indirect;
     57       unsigned indirect_subreg;
     58       unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
     59 
     60       unsigned vert_stride;
     61       unsigned width;
     62       unsigned horz_stride;
     63 
     64       unsigned swizzle[4];
     65       bool absolute;
     66       bool negate;
     67    } src[3];
     68 };
     69 
     70 /*
     71  * From the Sandy Bridge PRM, volume 4 part 2, page 107-108:
     72  *
     73  *     "(Src0Index) The 5-bit index for source 0. The 12-bit table-look-up
     74  *      result forms bits [88:77], the source 0 register region fields, of the
     75  *      128-bit instruction word."
     76  *
     77  *     "(SubRegIndex) The 5-bit index for sub-register fields. The 15-bit
     78  *      table-look-up result forms bits [100:96], [68,64] and [52,48] of the
     79  *      128-bit instruction word."
     80  *
     81  *     "(DataTypeIndex) The 5-bit index for data type fields. The 18-bit
     82  *      table-look-up result forms bits [63:61] and [46, 32] of the 128-bit
     83  *      instruction word."
     84  *
     85  *     "(ControlIndex) The 5-bit index for data type fields. The 17-bit
     86  *      table-look-up result forms bits[31], and [23, 8] of the 128-bit
     87  *      instruction word."
     88  */
     89 static const struct toy_compaction_table toy_compaction_table_gen6 = {
     90    .control = {
     91       [0]   = 0x00000,  /* 00000000000000000 */
     92       [1]   = 0x08000,  /* 01000000000000000 */
     93       [2]   = 0x06000,  /* 00110000000000000 */
     94       [3]   = 0x00100,  /* 00000000100000000 */
     95       [4]   = 0x02000,  /* 00010000000000000 */
     96       [5]   = 0x01100,  /* 00001000100000000 */
     97       [6]   = 0x00102,  /* 00000000100000010 */
     98       [7]   = 0x00002,  /* 00000000000000010 */
     99       [8]   = 0x08100,  /* 01000000100000000 */
    100       [9]   = 0x0a000,  /* 01010000000000000 */
    101       [10]  = 0x16000,  /* 10110000000000000 */
    102       [11]  = 0x04000,  /* 00100000000000000 */
    103       [12]  = 0x1a000,  /* 11010000000000000 */
    104       [13]  = 0x18000,  /* 11000000000000000 */
    105       [14]  = 0x09100,  /* 01001000100000000 */
    106       [15]  = 0x08008,  /* 01000000000001000 */
    107       [16]  = 0x08004,  /* 01000000000000100 */
    108       [17]  = 0x00008,  /* 00000000000001000 */
    109       [18]  = 0x00004,  /* 00000000000000100 */
    110       [19]  = 0x01100,  /* 00111000100000000 */
    111       [20]  = 0x01102,  /* 00001000100000010 */
    112       [21]  = 0x06100,  /* 00110000100000000 */
    113       [22]  = 0x06001,  /* 00110000000000001 */
    114       [23]  = 0x04001,  /* 00100000000000001 */
    115       [24]  = 0x06002,  /* 00110000000000010 */
    116       [25]  = 0x06005,  /* 00110000000000101 */
    117       [26]  = 0x06009,  /* 00110000000001001 */
    118       [27]  = 0x06010,  /* 00110000000010000 */
    119       [28]  = 0x06003,  /* 00110000000000011 */
    120       [29]  = 0x06004,  /* 00110000000000100 */
    121       [30]  = 0x06108,  /* 00110000100001000 */
    122       [31]  = 0x04009,  /* 00100000000001001 */
    123    },
    124    .datatype = {
    125       [0]   = 0x09c00,  /* 001001110000000000 */
    126       [1]   = 0x08c20,  /* 001000110000100000 */
    127       [2]   = 0x09c01,  /* 001001110000000001 */
    128       [3]   = 0x08060,  /* 001000000001100000 */
    129       [4]   = 0x0ad29,  /* 001010110100101001 */
    130       [5]   = 0x081ad,  /* 001000000110101101 */
    131       [6]   = 0x0c62c,  /* 001100011000101100 */
    132       [7]   = 0x0bdad,  /* 001011110110101101 */
    133       [8]   = 0x081ec,  /* 001000000111101100 */
    134       [9]   = 0x08061,  /* 001000000001100001 */
    135       [10]  = 0x08ca5,  /* 001000110010100101 */
    136       [11]  = 0x08041,  /* 001000000001000001 */
    137       [12]  = 0x08231,  /* 001000001000110001 */
    138       [13]  = 0x08229,  /* 001000001000101001 */
    139       [14]  = 0x08020,  /* 001000000000100000 */
    140       [15]  = 0x08232,  /* 001000001000110010 */
    141       [16]  = 0x0a529,  /* 001010010100101001 */
    142       [17]  = 0x0b4a5,  /* 001011010010100101 */
    143       [18]  = 0x081a5,  /* 001000000110100101 */
    144       [19]  = 0x0c629,  /* 001100011000101001 */
    145       [20]  = 0x0b62c,  /* 001011011000101100 */
    146       [21]  = 0x0b5a5,  /* 001011010110100101 */
    147       [22]  = 0x0bda5,  /* 001011110110100101 */
    148       [23]  = 0x0f1bd,  /* 001111011110111101 */
    149       [24]  = 0x0f1bc,  /* 001111011110111100 */
    150       [25]  = 0x0f1bd,  /* 001111011110111101 */
    151       [26]  = 0x0f19d,  /* 001111011110011101 */
    152       [27]  = 0x0f1be,  /* 001111011110111110 */
    153       [28]  = 0x08021,  /* 001000000000100001 */
    154       [29]  = 0x08022,  /* 001000000000100010 */
    155       [30]  = 0x09fdd,  /* 001001111111011101 */
    156       [31]  = 0x083be,  /* 001000001110111110 */
    157    },
    158    .subreg = {
    159       [0]   = 0x0000,   /* 000000000000000 */
    160       [1]   = 0x0004,   /* 000000000000100 */
    161       [2]   = 0x0180,   /* 000000110000000 */
    162       [3]   = 0x1000,   /* 111000000000000 */
    163       [4]   = 0x3c08,   /* 011110000001000 */
    164       [5]   = 0x0400,   /* 000010000000000 */
    165       [6]   = 0x0010,   /* 000000000010000 */
    166       [7]   = 0x0c0c,   /* 000110000001100 */
    167       [8]   = 0x1000,   /* 001000000000000 */
    168       [9]   = 0x0200,   /* 000001000000000 */
    169       [10]  = 0x0294,   /* 000001010010100 */
    170       [11]  = 0x0056,   /* 000000001010110 */
    171       [12]  = 0x2000,   /* 010000000000000 */
    172       [13]  = 0x6000,   /* 110000000000000 */
    173       [14]  = 0x0800,   /* 000100000000000 */
    174       [15]  = 0x0080,   /* 000000010000000 */
    175       [16]  = 0x0008,   /* 000000000001000 */
    176       [17]  = 0x4000,   /* 100000000000000 */
    177       [18]  = 0x0280,   /* 000001010000000 */
    178       [19]  = 0x1400,   /* 001010000000000 */
    179       [20]  = 0x1800,   /* 001100000000000 */
    180       [21]  = 0x0054,   /* 000000001010100 */
    181       [22]  = 0x5a94,   /* 101101010010100 */
    182       [23]  = 0x2800,   /* 010100000000000 */
    183       [24]  = 0x008f,   /* 000000010001111 */
    184       [25]  = 0x3000,   /* 011000000000000 */
    185       [26]  = 0x1c00,   /* 111110000000000 */
    186       [27]  = 0x5000,   /* 101000000000000 */
    187       [28]  = 0x000f,   /* 000000000001111 */
    188       [29]  = 0x088f,   /* 000100010001111 */
    189       [30]  = 0x108f,   /* 001000010001111 */
    190       [31]  = 0x0c00,   /* 000110000000000 */
    191    },
    192    .src = {
    193       [0]   = 0x000,    /* 000000000000 */
    194       [1]   = 0x588,    /* 010110001000 */
    195       [2]   = 0x468,    /* 010001101000 */
    196       [3]   = 0x228,    /* 001000101000 */
    197       [4]   = 0x690,    /* 011010010000 */
    198       [5]   = 0x120,    /* 000100100000 */
    199       [6]   = 0x46c,    /* 010001101100 */
    200       [7]   = 0x510,    /* 010101110000 */
    201       [8]   = 0x618,    /* 011001111000 */
    202       [9]   = 0x328,    /* 001100101000 */
    203       [10]  = 0x58c,    /* 010110001100 */
    204       [11]  = 0x220,    /* 001000100000 */
    205       [12]  = 0x58a,    /* 010110001010 */
    206       [13]  = 0x002,    /* 000000000010 */
    207       [14]  = 0x550,    /* 010101010000 */
    208       [15]  = 0x568,    /* 010101101000 */
    209       [16]  = 0xf4c,    /* 111101001100 */
    210       [17]  = 0xf2c,    /* 111100101100 */
    211       [18]  = 0x610,    /* 011001110000 */
    212       [19]  = 0x589,    /* 010110001001 */
    213       [20]  = 0x558,    /* 010101011000 */
    214       [21]  = 0x348,    /* 001101001000 */
    215       [22]  = 0x42c,    /* 010000101100 */
    216       [23]  = 0x400,    /* 010000000000 */
    217       [24]  = 0x310,    /* 001101110000 */
    218       [25]  = 0x310,    /* 001100010000 */
    219       [26]  = 0x300,    /* 001100000000 */
    220       [27]  = 0x46a,    /* 010001101010 */
    221       [28]  = 0x318,    /* 001101111000 */
    222       [29]  = 0x010,    /* 000001110000 */
    223       [30]  = 0x320,    /* 001100100000 */
    224       [31]  = 0x350,    /* 001101010000 */
    225    },
    226 };
    227 
    228 /*
    229  * From the Ivy Bridge PRM, volume 4 part 3, page 128:
    230  *
    231  *     "(Src0Index) Lookup one of 32 12-bit values. That value is used (from
    232  *      MSB to LSB) for the Src0.AddrMode, Src0.ChanSel[7:4], Src0.HorzStride,
    233  *      Src0.SrcMod, Src0.VertStride, and Src0.Width bit fields."
    234  *
    235  *     "(SubRegIndex) Lookup one of 32 15-bit values. That value is used (from
    236  *      MSB to LSB) for various fields for Src1, Src0, and Dst, including
    237  *      ChanEn/ChanSel, SubRegNum, and AddrImm[4] or AddrImm[4:0], depending
    238  *      on AddrMode and AccessMode.
    239  *
    240  *     "(DataTypeIndex) Lookup one of 32 18-bit values. That value is used
    241  *      (from MSB to LSB) for the Dst.AddrMode, Dst.HorzStride, Dst.DstType,
    242  *      Dst.RegFile, Src0.SrcType, Src0.RegFile, Src1.SrcType, and
    243  *      Src1.RegType bit fields."
    244  *
    245  *     "(ControlIndex) Lookup one of 32 19-bit values. That value is used
    246  *      (from MSB to LSB) for the FlagRegNum, FlagSubRegNum, Saturate,
    247  *      ExecSize, PredInv, PredCtrl, ThreadCtrl, QtrCtrl, DepCtrl, MaskCtrl,
    248  *      and AccessMode bit fields."
    249  */
    250 static const struct toy_compaction_table toy_compaction_table_gen7 = {
    251    .control = {
    252       [0]   = 0x00002,  /* 0000000000000000010 */
    253       [1]   = 0x04000,  /* 0000100000000000000 */
    254       [2]   = 0x04001,  /* 0000100000000000001 */
    255       [3]   = 0x04002,  /* 0000100000000000010 */
    256       [4]   = 0x04003,  /* 0000100000000000011 */
    257       [5]   = 0x04004,  /* 0000100000000000100 */
    258       [6]   = 0x04005,  /* 0000100000000000101 */
    259       [7]   = 0x04007,  /* 0000100000000000111 */
    260       [8]   = 0x04008,  /* 0000100000000001000 */
    261       [9]   = 0x04009,  /* 0000100000000001001 */
    262       [10]  = 0x0400d,  /* 0000100000000001101 */
    263       [11]  = 0x06000,  /* 0000110000000000000 */
    264       [12]  = 0x06001,  /* 0000110000000000001 */
    265       [13]  = 0x06002,  /* 0000110000000000010 */
    266       [14]  = 0x06003,  /* 0000110000000000011 */
    267       [15]  = 0x06004,  /* 0000110000000000100 */
    268       [16]  = 0x06005,  /* 0000110000000000101 */
    269       [17]  = 0x06007,  /* 0000110000000000111 */
    270       [18]  = 0x06009,  /* 0000110000000001001 */
    271       [19]  = 0x0600d,  /* 0000110000000001101 */
    272       [20]  = 0x06010,  /* 0000110000000010000 */
    273       [21]  = 0x06100,  /* 0000110000100000000 */
    274       [22]  = 0x08000,  /* 0001000000000000000 */
    275       [23]  = 0x08002,  /* 0001000000000000010 */
    276       [24]  = 0x08004,  /* 0001000000000000100 */
    277       [25]  = 0x08100,  /* 0001000000100000000 */
    278       [26]  = 0x16000,  /* 0010110000000000000 */
    279       [27]  = 0x16010,  /* 0010110000000010000 */
    280       [28]  = 0x18000,  /* 0011000000000000000 */
    281       [29]  = 0x18100,  /* 0011000000100000000 */
    282       [30]  = 0x28000,  /* 0101000000000000000 */
    283       [31]  = 0x28100,  /* 0101000000100000000 */
    284    },
    285    .datatype = {
    286       [0]   = 0x08001,  /* 001000000000000001 */
    287       [1]   = 0x08020,  /* 001000000000100000 */
    288       [2]   = 0x08021,  /* 001000000000100001 */
    289       [3]   = 0x08061,  /* 001000000001100001 */
    290       [4]   = 0x080bd,  /* 001000000010111101 */
    291       [5]   = 0x082fd,  /* 001000001011111101 */
    292       [6]   = 0x083a1,  /* 001000001110100001 */
    293       [7]   = 0x083a5,  /* 001000001110100101 */
    294       [8]   = 0x083bd,  /* 001000001110111101 */
    295       [9]   = 0x08421,  /* 001000010000100001 */
    296       [10]  = 0x08c20,  /* 001000110000100000 */
    297       [11]  = 0x08c21,  /* 001000110000100001 */
    298       [12]  = 0x094a5,  /* 001001010010100101 */
    299       [13]  = 0x09ca4,  /* 001001110010100100 */
    300       [14]  = 0x09ca5,  /* 001001110010100101 */
    301       [15]  = 0x0f3bd,  /* 001111001110111101 */
    302       [16]  = 0x0f79d,  /* 001111011110011101 */
    303       [17]  = 0x0f7bc,  /* 001111011110111100 */
    304       [18]  = 0x0f7bd,  /* 001111011110111101 */
    305       [19]  = 0x0ffbc,  /* 001111111110111100 */
    306       [20]  = 0x0020c,  /* 000000001000001100 */
    307       [21]  = 0x0803d,  /* 001000000000111101 */
    308       [22]  = 0x080a5,  /* 001000000010100101 */
    309       [23]  = 0x08420,  /* 001000010000100000 */
    310       [24]  = 0x094a4,  /* 001001010010100100 */
    311       [25]  = 0x09c84,  /* 001001110010000100 */
    312       [26]  = 0x0a509,  /* 001010010100001001 */
    313       [27]  = 0x0dfbd,  /* 001101111110111101 */
    314       [28]  = 0x0ffbd,  /* 001111111110111101 */
    315       [29]  = 0x0bdac,  /* 001011110110101100 */
    316       [30]  = 0x0a528,  /* 001010010100101000 */
    317       [31]  = 0x0ad28,  /* 001010110100101000 */
    318    },
    319    .subreg = {
    320       [0]   = 0x0000,   /* 000000000000000 */
    321       [1]   = 0x0001,   /* 000000000000001 */
    322       [2]   = 0x0008,   /* 000000000001000 */
    323       [3]   = 0x000f,   /* 000000000001111 */
    324       [4]   = 0x0010,   /* 000000000010000 */
    325       [5]   = 0x0080,   /* 000000010000000 */
    326       [6]   = 0x0100,   /* 000000100000000 */
    327       [7]   = 0x0180,   /* 000000110000000 */
    328       [8]   = 0x0200,   /* 000001000000000 */
    329       [9]   = 0x0210,   /* 000001000010000 */
    330       [10]  = 0x0280,   /* 000001010000000 */
    331       [11]  = 0x1000,   /* 001000000000000 */
    332       [12]  = 0x1001,   /* 001000000000001 */
    333       [13]  = 0x1081,   /* 001000010000001 */
    334       [14]  = 0x1082,   /* 001000010000010 */
    335       [15]  = 0x1083,   /* 001000010000011 */
    336       [16]  = 0x1084,   /* 001000010000100 */
    337       [17]  = 0x1087,   /* 001000010000111 */
    338       [18]  = 0x1088,   /* 001000010001000 */
    339       [19]  = 0x108e,   /* 001000010001110 */
    340       [20]  = 0x108f,   /* 001000010001111 */
    341       [21]  = 0x1180,   /* 001000110000000 */
    342       [22]  = 0x11e8,   /* 001000111101000 */
    343       [23]  = 0x2000,   /* 010000000000000 */
    344       [24]  = 0x2180,   /* 010000110000000 */
    345       [25]  = 0x3000,   /* 011000000000000 */
    346       [26]  = 0x3c87,   /* 011110010000111 */
    347       [27]  = 0x4000,   /* 100000000000000 */
    348       [28]  = 0x5000,   /* 101000000000000 */
    349       [29]  = 0x6000,   /* 110000000000000 */
    350       [30]  = 0x7000,   /* 111000000000000 */
    351       [31]  = 0x701c,   /* 111000000011100 */
    352    },
    353    .src = {
    354       [0]   = 0x000,    /* 000000000000 */
    355       [1]   = 0x002,    /* 000000000010 */
    356       [2]   = 0x010,    /* 000000010000 */
    357       [3]   = 0x012,    /* 000000010010 */
    358       [4]   = 0x018,    /* 000000011000 */
    359       [5]   = 0x020,    /* 000000100000 */
    360       [6]   = 0x028,    /* 000000101000 */
    361       [7]   = 0x048,    /* 000001001000 */
    362       [8]   = 0x050,    /* 000001010000 */
    363       [9]   = 0x070,    /* 000001110000 */
    364       [10]  = 0x078,    /* 000001111000 */
    365       [11]  = 0x300,    /* 001100000000 */
    366       [12]  = 0x302,    /* 001100000010 */
    367       [13]  = 0x308,    /* 001100001000 */
    368       [14]  = 0x310,    /* 001100010000 */
    369       [15]  = 0x312,    /* 001100010010 */
    370       [16]  = 0x320,    /* 001100100000 */
    371       [17]  = 0x328,    /* 001100101000 */
    372       [18]  = 0x338,    /* 001100111000 */
    373       [19]  = 0x340,    /* 001101000000 */
    374       [20]  = 0x342,    /* 001101000010 */
    375       [21]  = 0x348,    /* 001101001000 */
    376       [22]  = 0x350,    /* 001101010000 */
    377       [23]  = 0x360,    /* 001101100000 */
    378       [24]  = 0x368,    /* 001101101000 */
    379       [25]  = 0x370,    /* 001101110000 */
    380       [26]  = 0x371,    /* 001101110001 */
    381       [27]  = 0x378,    /* 001101111000 */
    382       [28]  = 0x468,    /* 010001101000 */
    383       [29]  = 0x469,    /* 010001101001 */
    384       [30]  = 0x46a,    /* 010001101010 */
    385       [31]  = 0x588,    /* 010110001000 */
    386    },
    387 };
    388 
    389 static const struct toy_compaction_table toy_compaction_table_gen8 = {
    390    .control = {
    391    },
    392    .datatype = {
    393    },
    394    .subreg = {
    395    },
    396    .src = {
    397    },
    398    .control_3src = {
    399    },
    400    .source_3src = {
    401    },
    402 };
    403 
    404 const struct toy_compaction_table *
    405 toy_compiler_get_compaction_table(const struct ilo_dev *dev)
    406 {
    407    switch (ilo_dev_gen(dev)) {
    408    case ILO_GEN(8):
    409       return &toy_compaction_table_gen8;
    410    case ILO_GEN(7.5):
    411    case ILO_GEN(7):
    412       return &toy_compaction_table_gen7;
    413    case ILO_GEN(6):
    414       return &toy_compaction_table_gen6;
    415    default:
    416       assert(!"unsupported gen");
    417       return NULL;
    418    }
    419 }
    420 
    421 /**
    422  * Return true if the source operand is null.
    423  */
    424 static bool
    425 src_is_null(const struct codegen *cg, int idx)
    426 {
    427    const struct codegen_src *src = &cg->src[idx];
    428 
    429    return (src->file == GEN6_FILE_ARF &&
    430            src->origin == GEN6_ARF_NULL << CG_REG_SHIFT);
    431 }
    432 
    433 /**
    434  * Translate a source operand to DW2 or DW3 of the 1-src/2-src format.
    435  */
    436 static uint32_t
    437 translate_src_gen6(const struct codegen *cg, int idx)
    438 {
    439    const struct codegen_src *src = &cg->src[idx];
    440    uint32_t dw;
    441 
    442    ILO_DEV_ASSERT(cg->dev, 6, 8);
    443 
    444    /* special treatment may be needed if any of the operand is immediate */
    445    if (cg->src[0].file == GEN6_FILE_IMM) {
    446       assert(!cg->src[0].absolute && !cg->src[0].negate);
    447 
    448       /* only the last src operand can be an immediate unless it is Gen8+ */
    449       assert(ilo_dev_gen(cg->dev) >= ILO_GEN(8) || src_is_null(cg, 1));
    450 
    451       if (!src_is_null(cg, 1))
    452          return cg->src[idx].origin;
    453 
    454       if (idx == 0) {
    455          if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
    456             return cg->src[1].type << 27 |
    457                    cg->src[1].file << 25;
    458          } else {
    459             return cg->flag_sub_reg_num << 25;
    460          }
    461       } else {
    462          return cg->src[0].origin;
    463       }
    464    }
    465    else if (idx && cg->src[1].file == GEN6_FILE_IMM) {
    466       assert(!cg->src[1].absolute && !cg->src[1].negate);
    467       return cg->src[1].origin;
    468    }
    469 
    470    assert(src->file != GEN6_FILE_IMM);
    471 
    472    if (src->indirect) {
    473       const int offset = (int) src->origin;
    474 
    475       assert(src->file == GEN6_FILE_GRF);
    476       assert(offset < 512 && offset >= -512);
    477 
    478       if (cg->inst->access_mode == GEN6_ALIGN_16) {
    479          assert(src->width == GEN6_WIDTH_4);
    480          assert(src->horz_stride == GEN6_HORZSTRIDE_1);
    481 
    482          /* the lower 4 bits are reserved for the swizzle_[xy] */
    483          assert(!(src->origin & 0xf));
    484 
    485          dw = src->vert_stride << 21 |
    486               src->swizzle[3] << 18 |
    487               src->swizzle[2] << 16 |
    488               GEN6_ADDRMODE_INDIRECT << 15 |
    489               src->negate << 14 |
    490               src->absolute << 13 |
    491               src->swizzle[1] << 2 |
    492               src->swizzle[0];
    493          if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
    494             dw |= src->indirect_subreg << 9 |
    495                   (src->origin & 0x1f0);
    496          } else {
    497             dw |= src->indirect_subreg << 10 |
    498                   (src->origin & 0x3f0);
    499          }
    500       }
    501       else {
    502          assert(src->swizzle[0] == TOY_SWIZZLE_X &&
    503                 src->swizzle[1] == TOY_SWIZZLE_Y &&
    504                 src->swizzle[2] == TOY_SWIZZLE_Z &&
    505                 src->swizzle[3] == TOY_SWIZZLE_W);
    506 
    507          dw = src->vert_stride << 21 |
    508               src->width << 18 |
    509               src->horz_stride << 16 |
    510               GEN6_ADDRMODE_INDIRECT << 15 |
    511               src->negate << 14 |
    512               src->absolute << 13;
    513          if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
    514             dw |= src->indirect_subreg << 9 |
    515                   (src->origin & 0x1ff);
    516          } else {
    517             dw |= src->indirect_subreg << 10 |
    518                   (src->origin & 0x3ff);
    519          }
    520       }
    521    }
    522    else {
    523       switch (src->file) {
    524       case GEN6_FILE_ARF:
    525          break;
    526       case GEN6_FILE_GRF:
    527          assert(CG_REG_NUM(src->origin) < 128);
    528          break;
    529       case GEN6_FILE_MRF:
    530          assert(cg->inst->opcode == GEN6_OPCODE_SEND ||
    531                 cg->inst->opcode == GEN6_OPCODE_SENDC);
    532          assert(CG_REG_NUM(src->origin) < 16);
    533          break;
    534       case GEN6_FILE_IMM:
    535       default:
    536          assert(!"invalid src file");
    537          break;
    538       }
    539 
    540       if (cg->inst->access_mode == GEN6_ALIGN_16) {
    541          assert(src->width == GEN6_WIDTH_4);
    542          assert(src->horz_stride == GEN6_HORZSTRIDE_1);
    543 
    544          /* the lower 4 bits are reserved for the swizzle_[xy] */
    545          assert(!(src->origin & 0xf));
    546 
    547          dw = src->vert_stride << 21 |
    548               src->swizzle[3] << 18 |
    549               src->swizzle[2] << 16 |
    550               GEN6_ADDRMODE_DIRECT << 15 |
    551               src->negate << 14 |
    552               src->absolute << 13 |
    553               src->origin |
    554               src->swizzle[1] << 2 |
    555               src->swizzle[0];
    556       }
    557       else {
    558          assert(src->swizzle[0] == TOY_SWIZZLE_X &&
    559                 src->swizzle[1] == TOY_SWIZZLE_Y &&
    560                 src->swizzle[2] == TOY_SWIZZLE_Z &&
    561                 src->swizzle[3] == TOY_SWIZZLE_W);
    562 
    563          dw = src->vert_stride << 21 |
    564               src->width << 18 |
    565               src->horz_stride << 16 |
    566               GEN6_ADDRMODE_DIRECT << 15 |
    567               src->negate << 14 |
    568               src->absolute << 13 |
    569               src->origin;
    570       }
    571    }
    572 
    573    if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
    574       const bool indirect_origin_bit9 = (cg->dst.indirect) ?
    575          (src->origin & 0x200) : 0;
    576 
    577       if (idx == 0) {
    578          dw |= indirect_origin_bit9 << 31 |
    579                cg->src[1].type << 27 |
    580                cg->src[1].file << 25;
    581       } else {
    582          dw |= indirect_origin_bit9 << 25;
    583       }
    584    } else {
    585       if (idx == 0)
    586          dw |= cg->flag_sub_reg_num << 25;
    587    }
    588 
    589    return dw;
    590 }
    591 
    592 /**
    593  * Translate the destination operand to the higher 16 bits of DW1 of the
    594  * 1-src/2-src format.
    595  */
    596 static uint16_t
    597 translate_dst_region_gen6(const struct codegen *cg)
    598 {
    599    const struct codegen_dst *dst = &cg->dst;
    600    uint16_t dw1_region;
    601 
    602    ILO_DEV_ASSERT(cg->dev, 6, 8);
    603 
    604    if (dst->file == GEN6_FILE_IMM) {
    605       /* dst is immediate (JIP) when the opcode is a conditional branch */
    606       switch (cg->inst->opcode) {
    607       case GEN6_OPCODE_IF:
    608       case GEN6_OPCODE_ELSE:
    609       case GEN6_OPCODE_ENDIF:
    610       case GEN6_OPCODE_WHILE:
    611          assert(dst->type == GEN6_TYPE_W);
    612          dw1_region = (dst->origin & 0xffff);
    613          break;
    614       default:
    615          assert(!"dst cannot be immediate");
    616          dw1_region = 0;
    617          break;
    618       }
    619 
    620       return dw1_region;
    621    }
    622 
    623    if (dst->indirect) {
    624       const int offset = (int) dst->origin;
    625 
    626       assert(dst->file == GEN6_FILE_GRF);
    627       assert(offset < 512 && offset >= -512);
    628 
    629       if (cg->inst->access_mode == GEN6_ALIGN_16) {
    630          /*
    631           * From the Sandy Bridge PRM, volume 4 part 2, page 144:
    632           *
    633           *     "Allthough Dst.HorzStride is a don't care for Align16, HW
    634           *      needs this to be programmed as 01."
    635           */
    636          assert(dst->horz_stride == GEN6_HORZSTRIDE_1);
    637          /* the lower 4 bits are reserved for the writemask */
    638          assert(!(dst->origin & 0xf));
    639 
    640          dw1_region = GEN6_ADDRMODE_INDIRECT << 15 |
    641                       dst->horz_stride << 13 |
    642                       dst->writemask;
    643          if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
    644             dw1_region |= dst->indirect_subreg << 9 |
    645                           (dst->origin & 0x1f0);
    646          } else {
    647             dw1_region |= dst->indirect_subreg << 10 |
    648                           (dst->origin & 0x3f0);
    649          }
    650       }
    651       else {
    652          assert(dst->writemask == TOY_WRITEMASK_XYZW);
    653 
    654          dw1_region = GEN6_ADDRMODE_INDIRECT << 15 |
    655                       dst->horz_stride << 13;
    656          if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
    657             dw1_region |= dst->indirect_subreg << 9 |
    658                           (dst->origin & 0x1ff);
    659          } else {
    660             dw1_region |= dst->indirect_subreg << 10 |
    661                           (dst->origin & 0x3ff);
    662          }
    663       }
    664    }
    665    else {
    666       assert((dst->file == GEN6_FILE_GRF &&
    667               CG_REG_NUM(dst->origin) < 128) ||
    668              (dst->file == GEN6_FILE_MRF &&
    669               CG_REG_NUM(dst->origin) < 16) ||
    670              (dst->file == GEN6_FILE_ARF));
    671 
    672       if (cg->inst->access_mode == GEN6_ALIGN_16) {
    673          /* similar to the indirect case */
    674          assert(dst->horz_stride == GEN6_HORZSTRIDE_1);
    675          assert(!(dst->origin & 0xf));
    676 
    677          dw1_region = GEN6_ADDRMODE_DIRECT << 15 |
    678                       dst->horz_stride << 13 |
    679                       dst->origin |
    680                       dst->writemask;
    681       }
    682       else {
    683          assert(dst->writemask == TOY_WRITEMASK_XYZW);
    684 
    685          dw1_region = GEN6_ADDRMODE_DIRECT << 15 |
    686                       dst->horz_stride << 13 |
    687                       dst->origin;
    688       }
    689    }
    690 
    691    return dw1_region;
    692 }
    693 
    694 /**
    695  * Translate the destination operand to DW1 of the 1-src/2-src format.
    696  */
    697 static uint32_t
    698 translate_dst_gen6(const struct codegen *cg)
    699 {
    700    ILO_DEV_ASSERT(cg->dev, 6, 7.5);
    701 
    702    return translate_dst_region_gen6(cg) << 16 |
    703           cg->src[1].type << 12 |
    704           cg->src[1].file << 10 |
    705           cg->src[0].type << 7 |
    706           cg->src[0].file << 5 |
    707           cg->dst.type << 2 |
    708           cg->dst.file;
    709 }
    710 
    711 static uint32_t
    712 translate_dst_gen8(const struct codegen *cg)
    713 {
    714    const bool indirect_origin_bit9 = (cg->dst.indirect) ?
    715       (cg->dst.origin & 0x200) : 0;
    716 
    717    ILO_DEV_ASSERT(cg->dev, 8, 8);
    718 
    719    return translate_dst_region_gen6(cg) << 16 |
    720           indirect_origin_bit9 << 15 |
    721           cg->src[0].type << 11 |
    722           cg->src[0].file << 9 |
    723           cg->dst.type << 5 |
    724           cg->dst.file << 3 |
    725           cg->inst->mask_ctrl << 2 |
    726           cg->flag_reg_num << 1 |
    727           cg->flag_sub_reg_num;
    728 }
    729 
    730 /**
    731  * Translate the instruction to DW0 of the 1-src/2-src format.
    732  */
    733 static uint32_t
    734 translate_inst_gen6(const struct codegen *cg)
    735 {
    736    const bool debug_ctrl = false;
    737    const bool cmpt_ctrl = false;
    738 
    739    ILO_DEV_ASSERT(cg->dev, 6, 7.5);
    740 
    741    assert(cg->inst->opcode < 128);
    742 
    743    return cg->inst->saturate << 31 |
    744           debug_ctrl << 30 |
    745           cmpt_ctrl << 29 |
    746           cg->inst->acc_wr_ctrl << 28 |
    747           cg->inst->cond_modifier << 24 |
    748           cg->inst->exec_size << 21 |
    749           cg->inst->pred_inv << 20 |
    750           cg->inst->pred_ctrl << 16 |
    751           cg->inst->thread_ctrl << 14 |
    752           cg->inst->qtr_ctrl << 12 |
    753           cg->inst->dep_ctrl << 10 |
    754           cg->inst->mask_ctrl << 9 |
    755           cg->inst->access_mode << 8 |
    756           cg->inst->opcode;
    757 }
    758 
    759 static uint32_t
    760 translate_inst_gen8(const struct codegen *cg)
    761 {
    762    const bool debug_ctrl = false;
    763    const bool cmpt_ctrl = false;
    764 
    765    ILO_DEV_ASSERT(cg->dev, 8, 8);
    766 
    767    assert(cg->inst->opcode < 128);
    768 
    769    return cg->inst->saturate << 31 |
    770           debug_ctrl << 30 |
    771           cmpt_ctrl << 29 |
    772           cg->inst->acc_wr_ctrl << 28 |
    773           cg->inst->cond_modifier << 24 |
    774           cg->inst->exec_size << 21 |
    775           cg->inst->pred_inv << 20 |
    776           cg->inst->pred_ctrl << 16 |
    777           cg->inst->thread_ctrl << 14 |
    778           cg->inst->qtr_ctrl << 12 |
    779           cg->inst->dep_ctrl << 9 |
    780           cg->inst->access_mode << 8 |
    781           cg->inst->opcode;
    782 }
    783 
    784 /**
    785  * Codegen an instruction in 1-src/2-src format.
    786  */
    787 static void
    788 codegen_inst_gen6(const struct codegen *cg, uint32_t *code)
    789 {
    790    ILO_DEV_ASSERT(cg->dev, 6, 8);
    791 
    792    if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
    793       code[0] = translate_inst_gen8(cg);
    794       code[1] = translate_dst_gen8(cg);
    795    } else {
    796       code[0] = translate_inst_gen6(cg);
    797       code[1] = translate_dst_gen6(cg);
    798    }
    799 
    800    code[2] = translate_src_gen6(cg, 0);
    801    code[3] = translate_src_gen6(cg, 1);
    802    assert(src_is_null(cg, 2));
    803 }
    804 
    805 /**
    806  * Codegen an instruction in 3-src format.
    807  */
    808 static void
    809 codegen_inst_3src_gen6(const struct codegen *cg, uint32_t *code)
    810 {
    811    const struct codegen_dst *dst = &cg->dst;
    812    uint32_t dw0, dw1, dw_src[3];
    813    int i;
    814 
    815    ILO_DEV_ASSERT(cg->dev, 6, 8);
    816 
    817    if (ilo_dev_gen(cg->dev) >= ILO_GEN(8))
    818       dw0 = translate_inst_gen8(cg);
    819    else
    820       dw0 = translate_inst_gen6(cg);
    821 
    822    /*
    823     * 3-src instruction restrictions
    824     *
    825     *  - align16 with direct addressing
    826     *  - GRF or MRF dst
    827     *  - GRF src
    828     *  - sub_reg_num is DWORD aligned
    829     *  - no regioning except replication control
    830     *    (vert_stride == 0 && horz_stride == 0)
    831     */
    832    assert(cg->inst->access_mode == GEN6_ALIGN_16);
    833 
    834    assert(!dst->indirect);
    835    assert((dst->file == GEN6_FILE_GRF && CG_REG_NUM(dst->origin) < 128) ||
    836           (dst->file == GEN6_FILE_MRF && CG_REG_NUM(dst->origin) < 16));
    837    assert(!(dst->origin & 0x3));
    838    assert(dst->horz_stride == GEN6_HORZSTRIDE_1);
    839 
    840    if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
    841       dw1 = dst->origin << 19 |
    842             dst->writemask << 17 |
    843             cg->src[2].negate << 10 |
    844             cg->src[2].negate << 10 |
    845             cg->src[2].absolute << 9 |
    846             cg->src[1].negate << 8 |
    847             cg->src[1].absolute << 7 |
    848             cg->src[0].negate << 6 |
    849             cg->src[0].absolute << 5 |
    850             cg->inst->mask_ctrl << 2 |
    851             cg->flag_reg_num << 1 |
    852             cg->flag_sub_reg_num;
    853    } else {
    854       dw1 = dst->origin << 19 |
    855             dst->writemask << 17 |
    856             cg->src[2].negate << 9 |
    857             cg->src[2].absolute << 8 |
    858             cg->src[1].negate << 7 |
    859             cg->src[1].absolute << 6 |
    860             cg->src[0].negate << 5 |
    861             cg->src[0].absolute << 4 |
    862             cg->flag_sub_reg_num << 1 |
    863             (dst->file == GEN6_FILE_MRF);
    864    }
    865 
    866    for (i = 0; i < 3; i++) {
    867       const struct codegen_src *src = &cg->src[i];
    868 
    869       assert(!src->indirect);
    870       assert(src->file == GEN6_FILE_GRF && CG_REG_NUM(src->origin) < 128);
    871       assert(!(src->origin & 0x3));
    872 
    873       assert((src->vert_stride == GEN6_VERTSTRIDE_4 &&
    874               src->horz_stride == GEN6_HORZSTRIDE_1) ||
    875              (src->vert_stride == GEN6_VERTSTRIDE_0 &&
    876               src->horz_stride == GEN6_HORZSTRIDE_0));
    877       assert(src->width == GEN6_WIDTH_4);
    878 
    879       dw_src[i] = src->origin << 7 |
    880                   src->swizzle[3] << 7 |
    881                   src->swizzle[2] << 5 |
    882                   src->swizzle[1] << 3 |
    883                   src->swizzle[0] << 1 |
    884                   (src->vert_stride == GEN6_VERTSTRIDE_0 &&
    885                    src->horz_stride == GEN6_HORZSTRIDE_0);
    886 
    887       /* only the lower 20 bits are used */
    888       assert((dw_src[i] & 0xfffff) == dw_src[i]);
    889    }
    890 
    891    code[0] = dw0;
    892    code[1] = dw1;
    893    /* concatenate the bits of dw_src */
    894    code[2] = (dw_src[1] & 0x7ff ) << 21 | dw_src[0];
    895    code[3] = dw_src[2] << 10 | (dw_src[1] >> 11);
    896 }
    897 
    898 /**
    899  * Sanity check the region parameters of the operands.
    900  */
    901 static void
    902 codegen_validate_region_restrictions(const struct codegen *cg)
    903 {
    904    const int exec_size_map[] = {
    905       [GEN6_EXECSIZE_1] = 1,
    906       [GEN6_EXECSIZE_2] = 2,
    907       [GEN6_EXECSIZE_4] = 4,
    908       [GEN6_EXECSIZE_8] = 8,
    909       [GEN6_EXECSIZE_16] = 16,
    910       [GEN6_EXECSIZE_32] = 32,
    911    };
    912    const int width_map[] = {
    913       [GEN6_WIDTH_1] = 1,
    914       [GEN6_WIDTH_2] = 2,
    915       [GEN6_WIDTH_4] = 4,
    916       [GEN6_WIDTH_8] = 8,
    917       [GEN6_WIDTH_16] = 16,
    918    };
    919    const int horz_stride_map[] = {
    920       [GEN6_HORZSTRIDE_0] = 0,
    921       [GEN6_HORZSTRIDE_1] = 1,
    922       [GEN6_HORZSTRIDE_2] = 2,
    923       [GEN6_HORZSTRIDE_4] = 4,
    924    };
    925    const int vert_stride_map[] = {
    926       [GEN6_VERTSTRIDE_0] = 0,
    927       [GEN6_VERTSTRIDE_1] = 1,
    928       [GEN6_VERTSTRIDE_2] = 2,
    929       [GEN6_VERTSTRIDE_4] = 4,
    930       [GEN6_VERTSTRIDE_8] = 8,
    931       [GEN6_VERTSTRIDE_16] = 16,
    932       [GEN6_VERTSTRIDE_32] = 32,
    933       [7] = 64,
    934       [8] = 128,
    935       [9] = 256,
    936       [GEN6_VERTSTRIDE_VXH] = 0,
    937    };
    938    const int exec_size = exec_size_map[cg->inst->exec_size];
    939    int i;
    940 
    941    /* Sandy Bridge PRM, volume 4 part 2, page 94 */
    942 
    943    /* 1. (we don't do 32 anyway) */
    944    assert(exec_size <= 16);
    945 
    946    for (i = 0; i < ARRAY_SIZE(cg->src); i++) {
    947       const int width = width_map[cg->src[i].width];
    948       const int horz_stride = horz_stride_map[cg->src[i].horz_stride];
    949       const int vert_stride = vert_stride_map[cg->src[i].vert_stride];
    950 
    951       if (src_is_null(cg, i))
    952          break;
    953 
    954       /* 3. */
    955       assert(exec_size >= width);
    956 
    957       if (exec_size == width) {
    958          /* 4. & 5. */
    959          if (horz_stride)
    960             assert(vert_stride == width * horz_stride);
    961       }
    962 
    963       if (width == 1) {
    964          /* 6. */
    965          assert(horz_stride == 0);
    966 
    967          /* 7. */
    968          if (exec_size == 1)
    969             assert(vert_stride == 0);
    970       }
    971 
    972       /* 8. */
    973       if (!vert_stride && !horz_stride)
    974          assert(width == 1);
    975    }
    976 
    977    /* derived from 10.1.2. & 10.2. */
    978    assert(cg->dst.horz_stride != GEN6_HORZSTRIDE_0);
    979 }
    980 
    981 static unsigned
    982 translate_vfile(enum toy_file file)
    983 {
    984    switch (file) {
    985    case TOY_FILE_ARF:   return GEN6_FILE_ARF;
    986    case TOY_FILE_GRF:   return GEN6_FILE_GRF;
    987    case TOY_FILE_MRF:   return GEN6_FILE_MRF;
    988    case TOY_FILE_IMM:   return GEN6_FILE_IMM;
    989    default:
    990       assert(!"unhandled toy file");
    991       return GEN6_FILE_GRF;
    992    }
    993 }
    994 
    995 static unsigned
    996 translate_vtype(enum toy_type type)
    997 {
    998    switch (type) {
    999    case TOY_TYPE_F:     return GEN6_TYPE_F;
   1000    case TOY_TYPE_D:     return GEN6_TYPE_D;
   1001    case TOY_TYPE_UD:    return GEN6_TYPE_UD;
   1002    case TOY_TYPE_W:     return GEN6_TYPE_W;
   1003    case TOY_TYPE_UW:    return GEN6_TYPE_UW;
   1004    case TOY_TYPE_V:     return GEN6_TYPE_V_IMM;
   1005    default:
   1006       assert(!"unhandled toy type");
   1007       return GEN6_TYPE_F;
   1008    }
   1009 }
   1010 
   1011 static unsigned
   1012 translate_writemask(enum toy_writemask writemask)
   1013 {
   1014    /* TOY_WRITEMASK_* are compatible with the hardware definitions */
   1015    assert(writemask <= 0xf);
   1016    return writemask;
   1017 }
   1018 
   1019 static unsigned
   1020 translate_swizzle(enum toy_swizzle swizzle)
   1021 {
   1022    /* TOY_SWIZZLE_* are compatible with the hardware definitions */
   1023    assert(swizzle <= 3);
   1024    return swizzle;
   1025 }
   1026 
   1027 /**
   1028  * Prepare for generating an instruction.
   1029  */
   1030 static void
   1031 codegen_prepare(struct codegen *cg, const struct ilo_dev *dev,
   1032                 const struct toy_inst *inst, int pc, int rect_linear_width)
   1033 {
   1034    int i;
   1035 
   1036    cg->dev = dev;
   1037    cg->inst = inst;
   1038    cg->pc = pc;
   1039 
   1040    cg->flag_reg_num = 0;
   1041    cg->flag_sub_reg_num = 0;
   1042 
   1043    cg->dst.file = translate_vfile(inst->dst.file);
   1044    cg->dst.type = translate_vtype(inst->dst.type);
   1045    cg->dst.indirect = inst->dst.indirect;
   1046    cg->dst.indirect_subreg = inst->dst.indirect_subreg;
   1047    cg->dst.origin = inst->dst.val32;
   1048 
   1049    /*
   1050     * From the Sandy Bridge PRM, volume 4 part 2, page 81:
   1051     *
   1052     *     "For a word or an unsigned word immediate data, software must
   1053     *      replicate the same 16-bit immediate value to both the lower word
   1054     *      and the high word of the 32-bit immediate field in an instruction."
   1055     */
   1056    if (inst->dst.file == TOY_FILE_IMM) {
   1057       switch (inst->dst.type) {
   1058       case TOY_TYPE_W:
   1059       case TOY_TYPE_UW:
   1060          cg->dst.origin &= 0xffff;
   1061          cg->dst.origin |= cg->dst.origin << 16;
   1062          break;
   1063       default:
   1064          break;
   1065       }
   1066    }
   1067 
   1068    cg->dst.writemask = translate_writemask(inst->dst.writemask);
   1069 
   1070    switch (inst->dst.rect) {
   1071    case TOY_RECT_LINEAR:
   1072       cg->dst.horz_stride = GEN6_HORZSTRIDE_1;
   1073       break;
   1074    default:
   1075       assert(!"unsupported dst region");
   1076       cg->dst.horz_stride = GEN6_HORZSTRIDE_1;
   1077       break;
   1078    }
   1079 
   1080    for (i = 0; i < ARRAY_SIZE(cg->src); i++) {
   1081       struct codegen_src *src = &cg->src[i];
   1082 
   1083       src->file = translate_vfile(inst->src[i].file);
   1084       src->type = translate_vtype(inst->src[i].type);
   1085       src->indirect = inst->src[i].indirect;
   1086       src->indirect_subreg = inst->src[i].indirect_subreg;
   1087       src->origin = inst->src[i].val32;
   1088 
   1089       /* do the same for src */
   1090       if (inst->dst.file == TOY_FILE_IMM) {
   1091          switch (inst->src[i].type) {
   1092          case TOY_TYPE_W:
   1093          case TOY_TYPE_UW:
   1094             src->origin &= 0xffff;
   1095             src->origin |= src->origin << 16;
   1096             break;
   1097          default:
   1098             break;
   1099          }
   1100       }
   1101 
   1102       src->swizzle[0] = translate_swizzle(inst->src[i].swizzle_x);
   1103       src->swizzle[1] = translate_swizzle(inst->src[i].swizzle_y);
   1104       src->swizzle[2] = translate_swizzle(inst->src[i].swizzle_z);
   1105       src->swizzle[3] = translate_swizzle(inst->src[i].swizzle_w);
   1106       src->absolute = inst->src[i].absolute;
   1107       src->negate = inst->src[i].negate;
   1108 
   1109       switch (inst->src[i].rect) {
   1110       case TOY_RECT_LINEAR:
   1111          switch (rect_linear_width) {
   1112          case 1:
   1113             src->vert_stride = GEN6_VERTSTRIDE_1;
   1114             src->width = GEN6_WIDTH_1;
   1115             break;
   1116          case 2:
   1117             src->vert_stride = GEN6_VERTSTRIDE_2;
   1118             src->width = GEN6_WIDTH_2;
   1119             break;
   1120          case 4:
   1121             src->vert_stride = GEN6_VERTSTRIDE_4;
   1122             src->width = GEN6_WIDTH_4;
   1123             break;
   1124          case 8:
   1125             src->vert_stride = GEN6_VERTSTRIDE_8;
   1126             src->width = GEN6_WIDTH_8;
   1127             break;
   1128          case 16:
   1129             src->vert_stride = GEN6_VERTSTRIDE_16;
   1130             src->width = GEN6_WIDTH_16;
   1131             break;
   1132          default:
   1133             assert(!"unsupported TOY_RECT_LINEAR width");
   1134             src->vert_stride = GEN6_VERTSTRIDE_1;
   1135             src->width = GEN6_WIDTH_1;
   1136             break;
   1137          }
   1138          src->horz_stride = GEN6_HORZSTRIDE_1;
   1139          break;
   1140       case TOY_RECT_041:
   1141          src->vert_stride = GEN6_VERTSTRIDE_0;
   1142          src->width = GEN6_WIDTH_4;
   1143          src->horz_stride = GEN6_HORZSTRIDE_1;
   1144          break;
   1145       case TOY_RECT_010:
   1146          src->vert_stride = GEN6_VERTSTRIDE_0;
   1147          src->width = GEN6_WIDTH_1;
   1148          src->horz_stride = GEN6_HORZSTRIDE_0;
   1149          break;
   1150       case TOY_RECT_220:
   1151          src->vert_stride = GEN6_VERTSTRIDE_2;
   1152          src->width = GEN6_WIDTH_2;
   1153          src->horz_stride = GEN6_HORZSTRIDE_0;
   1154          break;
   1155       case TOY_RECT_440:
   1156          src->vert_stride = GEN6_VERTSTRIDE_4;
   1157          src->width = GEN6_WIDTH_4;
   1158          src->horz_stride = GEN6_HORZSTRIDE_0;
   1159          break;
   1160       case TOY_RECT_240:
   1161          src->vert_stride = GEN6_VERTSTRIDE_2;
   1162          src->width = GEN6_WIDTH_4;
   1163          src->horz_stride = GEN6_HORZSTRIDE_0;
   1164          break;
   1165       default:
   1166          assert(!"unsupported src region");
   1167          src->vert_stride = GEN6_VERTSTRIDE_1;
   1168          src->width = GEN6_WIDTH_1;
   1169          src->horz_stride = GEN6_HORZSTRIDE_1;
   1170          break;
   1171       }
   1172    }
   1173 }
   1174 
   1175 /**
   1176  * Generate HW shader code.  The instructions should have been legalized.
   1177  */
   1178 void *
   1179 toy_compiler_assemble(struct toy_compiler *tc, int *size)
   1180 {
   1181    const struct toy_inst *inst;
   1182    uint32_t *code;
   1183    int pc;
   1184 
   1185    code = MALLOC(tc->num_instructions * 4 * sizeof(uint32_t));
   1186    if (!code)
   1187       return NULL;
   1188 
   1189    pc = 0;
   1190    tc_head(tc);
   1191    while ((inst = tc_next(tc)) != NULL) {
   1192       uint32_t *dw = &code[pc * 4];
   1193       struct codegen cg;
   1194 
   1195       if (pc >= tc->num_instructions) {
   1196          tc_fail(tc, "wrong instructoun count");
   1197          break;
   1198       }
   1199 
   1200       codegen_prepare(&cg, tc->dev, inst, pc, tc->rect_linear_width);
   1201       codegen_validate_region_restrictions(&cg);
   1202 
   1203       switch (inst->opcode) {
   1204       case GEN6_OPCODE_MAD:
   1205          codegen_inst_3src_gen6(&cg, dw);
   1206          break;
   1207       default:
   1208          codegen_inst_gen6(&cg, dw);
   1209          break;
   1210       }
   1211 
   1212       pc++;
   1213    }
   1214 
   1215    /* never return an invalid kernel */
   1216    if (tc->fail) {
   1217       FREE(code);
   1218       return NULL;
   1219    }
   1220 
   1221    if (size)
   1222       *size = pc * 4 * sizeof(uint32_t);
   1223 
   1224    return code;
   1225 }
   1226