1 /* 2 * Mesa 3-D graphics library 3 * 4 * Copyright (C) 2012-2013 LunarG, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included 14 * in all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Chia-I Wu <olv (at) lunarg.com> 26 */ 27 28 #include "toy_compiler.h" 29 30 #define CG_REG_SHIFT 5 31 #define CG_REG_NUM(origin) ((origin) >> CG_REG_SHIFT) 32 33 struct codegen { 34 const struct ilo_dev *dev; 35 const struct toy_inst *inst; 36 int pc; 37 38 unsigned flag_reg_num; 39 unsigned flag_sub_reg_num; 40 41 struct codegen_dst { 42 unsigned file; 43 unsigned type; 44 bool indirect; 45 unsigned indirect_subreg; 46 unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */ 47 48 unsigned horz_stride; 49 50 unsigned writemask; 51 } dst; 52 53 struct codegen_src { 54 unsigned file; 55 unsigned type; 56 bool indirect; 57 unsigned indirect_subreg; 58 unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */ 59 60 unsigned vert_stride; 61 unsigned width; 62 unsigned horz_stride; 63 64 unsigned swizzle[4]; 65 bool absolute; 66 bool negate; 67 } src[3]; 68 }; 69 70 /* 71 * From the Sandy Bridge PRM, volume 4 part 2, page 107-108: 72 * 73 * "(Src0Index) The 5-bit index for source 0. The 12-bit table-look-up 74 * result forms bits [88:77], the source 0 register region fields, of the 75 * 128-bit instruction word." 76 * 77 * "(SubRegIndex) The 5-bit index for sub-register fields. The 15-bit 78 * table-look-up result forms bits [100:96], [68,64] and [52,48] of the 79 * 128-bit instruction word." 80 * 81 * "(DataTypeIndex) The 5-bit index for data type fields. The 18-bit 82 * table-look-up result forms bits [63:61] and [46, 32] of the 128-bit 83 * instruction word." 84 * 85 * "(ControlIndex) The 5-bit index for data type fields. The 17-bit 86 * table-look-up result forms bits[31], and [23, 8] of the 128-bit 87 * instruction word." 88 */ 89 static const struct toy_compaction_table toy_compaction_table_gen6 = { 90 .control = { 91 [0] = 0x00000, /* 00000000000000000 */ 92 [1] = 0x08000, /* 01000000000000000 */ 93 [2] = 0x06000, /* 00110000000000000 */ 94 [3] = 0x00100, /* 00000000100000000 */ 95 [4] = 0x02000, /* 00010000000000000 */ 96 [5] = 0x01100, /* 00001000100000000 */ 97 [6] = 0x00102, /* 00000000100000010 */ 98 [7] = 0x00002, /* 00000000000000010 */ 99 [8] = 0x08100, /* 01000000100000000 */ 100 [9] = 0x0a000, /* 01010000000000000 */ 101 [10] = 0x16000, /* 10110000000000000 */ 102 [11] = 0x04000, /* 00100000000000000 */ 103 [12] = 0x1a000, /* 11010000000000000 */ 104 [13] = 0x18000, /* 11000000000000000 */ 105 [14] = 0x09100, /* 01001000100000000 */ 106 [15] = 0x08008, /* 01000000000001000 */ 107 [16] = 0x08004, /* 01000000000000100 */ 108 [17] = 0x00008, /* 00000000000001000 */ 109 [18] = 0x00004, /* 00000000000000100 */ 110 [19] = 0x01100, /* 00111000100000000 */ 111 [20] = 0x01102, /* 00001000100000010 */ 112 [21] = 0x06100, /* 00110000100000000 */ 113 [22] = 0x06001, /* 00110000000000001 */ 114 [23] = 0x04001, /* 00100000000000001 */ 115 [24] = 0x06002, /* 00110000000000010 */ 116 [25] = 0x06005, /* 00110000000000101 */ 117 [26] = 0x06009, /* 00110000000001001 */ 118 [27] = 0x06010, /* 00110000000010000 */ 119 [28] = 0x06003, /* 00110000000000011 */ 120 [29] = 0x06004, /* 00110000000000100 */ 121 [30] = 0x06108, /* 00110000100001000 */ 122 [31] = 0x04009, /* 00100000000001001 */ 123 }, 124 .datatype = { 125 [0] = 0x09c00, /* 001001110000000000 */ 126 [1] = 0x08c20, /* 001000110000100000 */ 127 [2] = 0x09c01, /* 001001110000000001 */ 128 [3] = 0x08060, /* 001000000001100000 */ 129 [4] = 0x0ad29, /* 001010110100101001 */ 130 [5] = 0x081ad, /* 001000000110101101 */ 131 [6] = 0x0c62c, /* 001100011000101100 */ 132 [7] = 0x0bdad, /* 001011110110101101 */ 133 [8] = 0x081ec, /* 001000000111101100 */ 134 [9] = 0x08061, /* 001000000001100001 */ 135 [10] = 0x08ca5, /* 001000110010100101 */ 136 [11] = 0x08041, /* 001000000001000001 */ 137 [12] = 0x08231, /* 001000001000110001 */ 138 [13] = 0x08229, /* 001000001000101001 */ 139 [14] = 0x08020, /* 001000000000100000 */ 140 [15] = 0x08232, /* 001000001000110010 */ 141 [16] = 0x0a529, /* 001010010100101001 */ 142 [17] = 0x0b4a5, /* 001011010010100101 */ 143 [18] = 0x081a5, /* 001000000110100101 */ 144 [19] = 0x0c629, /* 001100011000101001 */ 145 [20] = 0x0b62c, /* 001011011000101100 */ 146 [21] = 0x0b5a5, /* 001011010110100101 */ 147 [22] = 0x0bda5, /* 001011110110100101 */ 148 [23] = 0x0f1bd, /* 001111011110111101 */ 149 [24] = 0x0f1bc, /* 001111011110111100 */ 150 [25] = 0x0f1bd, /* 001111011110111101 */ 151 [26] = 0x0f19d, /* 001111011110011101 */ 152 [27] = 0x0f1be, /* 001111011110111110 */ 153 [28] = 0x08021, /* 001000000000100001 */ 154 [29] = 0x08022, /* 001000000000100010 */ 155 [30] = 0x09fdd, /* 001001111111011101 */ 156 [31] = 0x083be, /* 001000001110111110 */ 157 }, 158 .subreg = { 159 [0] = 0x0000, /* 000000000000000 */ 160 [1] = 0x0004, /* 000000000000100 */ 161 [2] = 0x0180, /* 000000110000000 */ 162 [3] = 0x1000, /* 111000000000000 */ 163 [4] = 0x3c08, /* 011110000001000 */ 164 [5] = 0x0400, /* 000010000000000 */ 165 [6] = 0x0010, /* 000000000010000 */ 166 [7] = 0x0c0c, /* 000110000001100 */ 167 [8] = 0x1000, /* 001000000000000 */ 168 [9] = 0x0200, /* 000001000000000 */ 169 [10] = 0x0294, /* 000001010010100 */ 170 [11] = 0x0056, /* 000000001010110 */ 171 [12] = 0x2000, /* 010000000000000 */ 172 [13] = 0x6000, /* 110000000000000 */ 173 [14] = 0x0800, /* 000100000000000 */ 174 [15] = 0x0080, /* 000000010000000 */ 175 [16] = 0x0008, /* 000000000001000 */ 176 [17] = 0x4000, /* 100000000000000 */ 177 [18] = 0x0280, /* 000001010000000 */ 178 [19] = 0x1400, /* 001010000000000 */ 179 [20] = 0x1800, /* 001100000000000 */ 180 [21] = 0x0054, /* 000000001010100 */ 181 [22] = 0x5a94, /* 101101010010100 */ 182 [23] = 0x2800, /* 010100000000000 */ 183 [24] = 0x008f, /* 000000010001111 */ 184 [25] = 0x3000, /* 011000000000000 */ 185 [26] = 0x1c00, /* 111110000000000 */ 186 [27] = 0x5000, /* 101000000000000 */ 187 [28] = 0x000f, /* 000000000001111 */ 188 [29] = 0x088f, /* 000100010001111 */ 189 [30] = 0x108f, /* 001000010001111 */ 190 [31] = 0x0c00, /* 000110000000000 */ 191 }, 192 .src = { 193 [0] = 0x000, /* 000000000000 */ 194 [1] = 0x588, /* 010110001000 */ 195 [2] = 0x468, /* 010001101000 */ 196 [3] = 0x228, /* 001000101000 */ 197 [4] = 0x690, /* 011010010000 */ 198 [5] = 0x120, /* 000100100000 */ 199 [6] = 0x46c, /* 010001101100 */ 200 [7] = 0x510, /* 010101110000 */ 201 [8] = 0x618, /* 011001111000 */ 202 [9] = 0x328, /* 001100101000 */ 203 [10] = 0x58c, /* 010110001100 */ 204 [11] = 0x220, /* 001000100000 */ 205 [12] = 0x58a, /* 010110001010 */ 206 [13] = 0x002, /* 000000000010 */ 207 [14] = 0x550, /* 010101010000 */ 208 [15] = 0x568, /* 010101101000 */ 209 [16] = 0xf4c, /* 111101001100 */ 210 [17] = 0xf2c, /* 111100101100 */ 211 [18] = 0x610, /* 011001110000 */ 212 [19] = 0x589, /* 010110001001 */ 213 [20] = 0x558, /* 010101011000 */ 214 [21] = 0x348, /* 001101001000 */ 215 [22] = 0x42c, /* 010000101100 */ 216 [23] = 0x400, /* 010000000000 */ 217 [24] = 0x310, /* 001101110000 */ 218 [25] = 0x310, /* 001100010000 */ 219 [26] = 0x300, /* 001100000000 */ 220 [27] = 0x46a, /* 010001101010 */ 221 [28] = 0x318, /* 001101111000 */ 222 [29] = 0x010, /* 000001110000 */ 223 [30] = 0x320, /* 001100100000 */ 224 [31] = 0x350, /* 001101010000 */ 225 }, 226 }; 227 228 /* 229 * From the Ivy Bridge PRM, volume 4 part 3, page 128: 230 * 231 * "(Src0Index) Lookup one of 32 12-bit values. That value is used (from 232 * MSB to LSB) for the Src0.AddrMode, Src0.ChanSel[7:4], Src0.HorzStride, 233 * Src0.SrcMod, Src0.VertStride, and Src0.Width bit fields." 234 * 235 * "(SubRegIndex) Lookup one of 32 15-bit values. That value is used (from 236 * MSB to LSB) for various fields for Src1, Src0, and Dst, including 237 * ChanEn/ChanSel, SubRegNum, and AddrImm[4] or AddrImm[4:0], depending 238 * on AddrMode and AccessMode. 239 * 240 * "(DataTypeIndex) Lookup one of 32 18-bit values. That value is used 241 * (from MSB to LSB) for the Dst.AddrMode, Dst.HorzStride, Dst.DstType, 242 * Dst.RegFile, Src0.SrcType, Src0.RegFile, Src1.SrcType, and 243 * Src1.RegType bit fields." 244 * 245 * "(ControlIndex) Lookup one of 32 19-bit values. That value is used 246 * (from MSB to LSB) for the FlagRegNum, FlagSubRegNum, Saturate, 247 * ExecSize, PredInv, PredCtrl, ThreadCtrl, QtrCtrl, DepCtrl, MaskCtrl, 248 * and AccessMode bit fields." 249 */ 250 static const struct toy_compaction_table toy_compaction_table_gen7 = { 251 .control = { 252 [0] = 0x00002, /* 0000000000000000010 */ 253 [1] = 0x04000, /* 0000100000000000000 */ 254 [2] = 0x04001, /* 0000100000000000001 */ 255 [3] = 0x04002, /* 0000100000000000010 */ 256 [4] = 0x04003, /* 0000100000000000011 */ 257 [5] = 0x04004, /* 0000100000000000100 */ 258 [6] = 0x04005, /* 0000100000000000101 */ 259 [7] = 0x04007, /* 0000100000000000111 */ 260 [8] = 0x04008, /* 0000100000000001000 */ 261 [9] = 0x04009, /* 0000100000000001001 */ 262 [10] = 0x0400d, /* 0000100000000001101 */ 263 [11] = 0x06000, /* 0000110000000000000 */ 264 [12] = 0x06001, /* 0000110000000000001 */ 265 [13] = 0x06002, /* 0000110000000000010 */ 266 [14] = 0x06003, /* 0000110000000000011 */ 267 [15] = 0x06004, /* 0000110000000000100 */ 268 [16] = 0x06005, /* 0000110000000000101 */ 269 [17] = 0x06007, /* 0000110000000000111 */ 270 [18] = 0x06009, /* 0000110000000001001 */ 271 [19] = 0x0600d, /* 0000110000000001101 */ 272 [20] = 0x06010, /* 0000110000000010000 */ 273 [21] = 0x06100, /* 0000110000100000000 */ 274 [22] = 0x08000, /* 0001000000000000000 */ 275 [23] = 0x08002, /* 0001000000000000010 */ 276 [24] = 0x08004, /* 0001000000000000100 */ 277 [25] = 0x08100, /* 0001000000100000000 */ 278 [26] = 0x16000, /* 0010110000000000000 */ 279 [27] = 0x16010, /* 0010110000000010000 */ 280 [28] = 0x18000, /* 0011000000000000000 */ 281 [29] = 0x18100, /* 0011000000100000000 */ 282 [30] = 0x28000, /* 0101000000000000000 */ 283 [31] = 0x28100, /* 0101000000100000000 */ 284 }, 285 .datatype = { 286 [0] = 0x08001, /* 001000000000000001 */ 287 [1] = 0x08020, /* 001000000000100000 */ 288 [2] = 0x08021, /* 001000000000100001 */ 289 [3] = 0x08061, /* 001000000001100001 */ 290 [4] = 0x080bd, /* 001000000010111101 */ 291 [5] = 0x082fd, /* 001000001011111101 */ 292 [6] = 0x083a1, /* 001000001110100001 */ 293 [7] = 0x083a5, /* 001000001110100101 */ 294 [8] = 0x083bd, /* 001000001110111101 */ 295 [9] = 0x08421, /* 001000010000100001 */ 296 [10] = 0x08c20, /* 001000110000100000 */ 297 [11] = 0x08c21, /* 001000110000100001 */ 298 [12] = 0x094a5, /* 001001010010100101 */ 299 [13] = 0x09ca4, /* 001001110010100100 */ 300 [14] = 0x09ca5, /* 001001110010100101 */ 301 [15] = 0x0f3bd, /* 001111001110111101 */ 302 [16] = 0x0f79d, /* 001111011110011101 */ 303 [17] = 0x0f7bc, /* 001111011110111100 */ 304 [18] = 0x0f7bd, /* 001111011110111101 */ 305 [19] = 0x0ffbc, /* 001111111110111100 */ 306 [20] = 0x0020c, /* 000000001000001100 */ 307 [21] = 0x0803d, /* 001000000000111101 */ 308 [22] = 0x080a5, /* 001000000010100101 */ 309 [23] = 0x08420, /* 001000010000100000 */ 310 [24] = 0x094a4, /* 001001010010100100 */ 311 [25] = 0x09c84, /* 001001110010000100 */ 312 [26] = 0x0a509, /* 001010010100001001 */ 313 [27] = 0x0dfbd, /* 001101111110111101 */ 314 [28] = 0x0ffbd, /* 001111111110111101 */ 315 [29] = 0x0bdac, /* 001011110110101100 */ 316 [30] = 0x0a528, /* 001010010100101000 */ 317 [31] = 0x0ad28, /* 001010110100101000 */ 318 }, 319 .subreg = { 320 [0] = 0x0000, /* 000000000000000 */ 321 [1] = 0x0001, /* 000000000000001 */ 322 [2] = 0x0008, /* 000000000001000 */ 323 [3] = 0x000f, /* 000000000001111 */ 324 [4] = 0x0010, /* 000000000010000 */ 325 [5] = 0x0080, /* 000000010000000 */ 326 [6] = 0x0100, /* 000000100000000 */ 327 [7] = 0x0180, /* 000000110000000 */ 328 [8] = 0x0200, /* 000001000000000 */ 329 [9] = 0x0210, /* 000001000010000 */ 330 [10] = 0x0280, /* 000001010000000 */ 331 [11] = 0x1000, /* 001000000000000 */ 332 [12] = 0x1001, /* 001000000000001 */ 333 [13] = 0x1081, /* 001000010000001 */ 334 [14] = 0x1082, /* 001000010000010 */ 335 [15] = 0x1083, /* 001000010000011 */ 336 [16] = 0x1084, /* 001000010000100 */ 337 [17] = 0x1087, /* 001000010000111 */ 338 [18] = 0x1088, /* 001000010001000 */ 339 [19] = 0x108e, /* 001000010001110 */ 340 [20] = 0x108f, /* 001000010001111 */ 341 [21] = 0x1180, /* 001000110000000 */ 342 [22] = 0x11e8, /* 001000111101000 */ 343 [23] = 0x2000, /* 010000000000000 */ 344 [24] = 0x2180, /* 010000110000000 */ 345 [25] = 0x3000, /* 011000000000000 */ 346 [26] = 0x3c87, /* 011110010000111 */ 347 [27] = 0x4000, /* 100000000000000 */ 348 [28] = 0x5000, /* 101000000000000 */ 349 [29] = 0x6000, /* 110000000000000 */ 350 [30] = 0x7000, /* 111000000000000 */ 351 [31] = 0x701c, /* 111000000011100 */ 352 }, 353 .src = { 354 [0] = 0x000, /* 000000000000 */ 355 [1] = 0x002, /* 000000000010 */ 356 [2] = 0x010, /* 000000010000 */ 357 [3] = 0x012, /* 000000010010 */ 358 [4] = 0x018, /* 000000011000 */ 359 [5] = 0x020, /* 000000100000 */ 360 [6] = 0x028, /* 000000101000 */ 361 [7] = 0x048, /* 000001001000 */ 362 [8] = 0x050, /* 000001010000 */ 363 [9] = 0x070, /* 000001110000 */ 364 [10] = 0x078, /* 000001111000 */ 365 [11] = 0x300, /* 001100000000 */ 366 [12] = 0x302, /* 001100000010 */ 367 [13] = 0x308, /* 001100001000 */ 368 [14] = 0x310, /* 001100010000 */ 369 [15] = 0x312, /* 001100010010 */ 370 [16] = 0x320, /* 001100100000 */ 371 [17] = 0x328, /* 001100101000 */ 372 [18] = 0x338, /* 001100111000 */ 373 [19] = 0x340, /* 001101000000 */ 374 [20] = 0x342, /* 001101000010 */ 375 [21] = 0x348, /* 001101001000 */ 376 [22] = 0x350, /* 001101010000 */ 377 [23] = 0x360, /* 001101100000 */ 378 [24] = 0x368, /* 001101101000 */ 379 [25] = 0x370, /* 001101110000 */ 380 [26] = 0x371, /* 001101110001 */ 381 [27] = 0x378, /* 001101111000 */ 382 [28] = 0x468, /* 010001101000 */ 383 [29] = 0x469, /* 010001101001 */ 384 [30] = 0x46a, /* 010001101010 */ 385 [31] = 0x588, /* 010110001000 */ 386 }, 387 }; 388 389 static const struct toy_compaction_table toy_compaction_table_gen8 = { 390 .control = { 391 }, 392 .datatype = { 393 }, 394 .subreg = { 395 }, 396 .src = { 397 }, 398 .control_3src = { 399 }, 400 .source_3src = { 401 }, 402 }; 403 404 const struct toy_compaction_table * 405 toy_compiler_get_compaction_table(const struct ilo_dev *dev) 406 { 407 switch (ilo_dev_gen(dev)) { 408 case ILO_GEN(8): 409 return &toy_compaction_table_gen8; 410 case ILO_GEN(7.5): 411 case ILO_GEN(7): 412 return &toy_compaction_table_gen7; 413 case ILO_GEN(6): 414 return &toy_compaction_table_gen6; 415 default: 416 assert(!"unsupported gen"); 417 return NULL; 418 } 419 } 420 421 /** 422 * Return true if the source operand is null. 423 */ 424 static bool 425 src_is_null(const struct codegen *cg, int idx) 426 { 427 const struct codegen_src *src = &cg->src[idx]; 428 429 return (src->file == GEN6_FILE_ARF && 430 src->origin == GEN6_ARF_NULL << CG_REG_SHIFT); 431 } 432 433 /** 434 * Translate a source operand to DW2 or DW3 of the 1-src/2-src format. 435 */ 436 static uint32_t 437 translate_src_gen6(const struct codegen *cg, int idx) 438 { 439 const struct codegen_src *src = &cg->src[idx]; 440 uint32_t dw; 441 442 ILO_DEV_ASSERT(cg->dev, 6, 8); 443 444 /* special treatment may be needed if any of the operand is immediate */ 445 if (cg->src[0].file == GEN6_FILE_IMM) { 446 assert(!cg->src[0].absolute && !cg->src[0].negate); 447 448 /* only the last src operand can be an immediate unless it is Gen8+ */ 449 assert(ilo_dev_gen(cg->dev) >= ILO_GEN(8) || src_is_null(cg, 1)); 450 451 if (!src_is_null(cg, 1)) 452 return cg->src[idx].origin; 453 454 if (idx == 0) { 455 if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { 456 return cg->src[1].type << 27 | 457 cg->src[1].file << 25; 458 } else { 459 return cg->flag_sub_reg_num << 25; 460 } 461 } else { 462 return cg->src[0].origin; 463 } 464 } 465 else if (idx && cg->src[1].file == GEN6_FILE_IMM) { 466 assert(!cg->src[1].absolute && !cg->src[1].negate); 467 return cg->src[1].origin; 468 } 469 470 assert(src->file != GEN6_FILE_IMM); 471 472 if (src->indirect) { 473 const int offset = (int) src->origin; 474 475 assert(src->file == GEN6_FILE_GRF); 476 assert(offset < 512 && offset >= -512); 477 478 if (cg->inst->access_mode == GEN6_ALIGN_16) { 479 assert(src->width == GEN6_WIDTH_4); 480 assert(src->horz_stride == GEN6_HORZSTRIDE_1); 481 482 /* the lower 4 bits are reserved for the swizzle_[xy] */ 483 assert(!(src->origin & 0xf)); 484 485 dw = src->vert_stride << 21 | 486 src->swizzle[3] << 18 | 487 src->swizzle[2] << 16 | 488 GEN6_ADDRMODE_INDIRECT << 15 | 489 src->negate << 14 | 490 src->absolute << 13 | 491 src->swizzle[1] << 2 | 492 src->swizzle[0]; 493 if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { 494 dw |= src->indirect_subreg << 9 | 495 (src->origin & 0x1f0); 496 } else { 497 dw |= src->indirect_subreg << 10 | 498 (src->origin & 0x3f0); 499 } 500 } 501 else { 502 assert(src->swizzle[0] == TOY_SWIZZLE_X && 503 src->swizzle[1] == TOY_SWIZZLE_Y && 504 src->swizzle[2] == TOY_SWIZZLE_Z && 505 src->swizzle[3] == TOY_SWIZZLE_W); 506 507 dw = src->vert_stride << 21 | 508 src->width << 18 | 509 src->horz_stride << 16 | 510 GEN6_ADDRMODE_INDIRECT << 15 | 511 src->negate << 14 | 512 src->absolute << 13; 513 if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { 514 dw |= src->indirect_subreg << 9 | 515 (src->origin & 0x1ff); 516 } else { 517 dw |= src->indirect_subreg << 10 | 518 (src->origin & 0x3ff); 519 } 520 } 521 } 522 else { 523 switch (src->file) { 524 case GEN6_FILE_ARF: 525 break; 526 case GEN6_FILE_GRF: 527 assert(CG_REG_NUM(src->origin) < 128); 528 break; 529 case GEN6_FILE_MRF: 530 assert(cg->inst->opcode == GEN6_OPCODE_SEND || 531 cg->inst->opcode == GEN6_OPCODE_SENDC); 532 assert(CG_REG_NUM(src->origin) < 16); 533 break; 534 case GEN6_FILE_IMM: 535 default: 536 assert(!"invalid src file"); 537 break; 538 } 539 540 if (cg->inst->access_mode == GEN6_ALIGN_16) { 541 assert(src->width == GEN6_WIDTH_4); 542 assert(src->horz_stride == GEN6_HORZSTRIDE_1); 543 544 /* the lower 4 bits are reserved for the swizzle_[xy] */ 545 assert(!(src->origin & 0xf)); 546 547 dw = src->vert_stride << 21 | 548 src->swizzle[3] << 18 | 549 src->swizzle[2] << 16 | 550 GEN6_ADDRMODE_DIRECT << 15 | 551 src->negate << 14 | 552 src->absolute << 13 | 553 src->origin | 554 src->swizzle[1] << 2 | 555 src->swizzle[0]; 556 } 557 else { 558 assert(src->swizzle[0] == TOY_SWIZZLE_X && 559 src->swizzle[1] == TOY_SWIZZLE_Y && 560 src->swizzle[2] == TOY_SWIZZLE_Z && 561 src->swizzle[3] == TOY_SWIZZLE_W); 562 563 dw = src->vert_stride << 21 | 564 src->width << 18 | 565 src->horz_stride << 16 | 566 GEN6_ADDRMODE_DIRECT << 15 | 567 src->negate << 14 | 568 src->absolute << 13 | 569 src->origin; 570 } 571 } 572 573 if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { 574 const bool indirect_origin_bit9 = (cg->dst.indirect) ? 575 (src->origin & 0x200) : 0; 576 577 if (idx == 0) { 578 dw |= indirect_origin_bit9 << 31 | 579 cg->src[1].type << 27 | 580 cg->src[1].file << 25; 581 } else { 582 dw |= indirect_origin_bit9 << 25; 583 } 584 } else { 585 if (idx == 0) 586 dw |= cg->flag_sub_reg_num << 25; 587 } 588 589 return dw; 590 } 591 592 /** 593 * Translate the destination operand to the higher 16 bits of DW1 of the 594 * 1-src/2-src format. 595 */ 596 static uint16_t 597 translate_dst_region_gen6(const struct codegen *cg) 598 { 599 const struct codegen_dst *dst = &cg->dst; 600 uint16_t dw1_region; 601 602 ILO_DEV_ASSERT(cg->dev, 6, 8); 603 604 if (dst->file == GEN6_FILE_IMM) { 605 /* dst is immediate (JIP) when the opcode is a conditional branch */ 606 switch (cg->inst->opcode) { 607 case GEN6_OPCODE_IF: 608 case GEN6_OPCODE_ELSE: 609 case GEN6_OPCODE_ENDIF: 610 case GEN6_OPCODE_WHILE: 611 assert(dst->type == GEN6_TYPE_W); 612 dw1_region = (dst->origin & 0xffff); 613 break; 614 default: 615 assert(!"dst cannot be immediate"); 616 dw1_region = 0; 617 break; 618 } 619 620 return dw1_region; 621 } 622 623 if (dst->indirect) { 624 const int offset = (int) dst->origin; 625 626 assert(dst->file == GEN6_FILE_GRF); 627 assert(offset < 512 && offset >= -512); 628 629 if (cg->inst->access_mode == GEN6_ALIGN_16) { 630 /* 631 * From the Sandy Bridge PRM, volume 4 part 2, page 144: 632 * 633 * "Allthough Dst.HorzStride is a don't care for Align16, HW 634 * needs this to be programmed as 01." 635 */ 636 assert(dst->horz_stride == GEN6_HORZSTRIDE_1); 637 /* the lower 4 bits are reserved for the writemask */ 638 assert(!(dst->origin & 0xf)); 639 640 dw1_region = GEN6_ADDRMODE_INDIRECT << 15 | 641 dst->horz_stride << 13 | 642 dst->writemask; 643 if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { 644 dw1_region |= dst->indirect_subreg << 9 | 645 (dst->origin & 0x1f0); 646 } else { 647 dw1_region |= dst->indirect_subreg << 10 | 648 (dst->origin & 0x3f0); 649 } 650 } 651 else { 652 assert(dst->writemask == TOY_WRITEMASK_XYZW); 653 654 dw1_region = GEN6_ADDRMODE_INDIRECT << 15 | 655 dst->horz_stride << 13; 656 if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { 657 dw1_region |= dst->indirect_subreg << 9 | 658 (dst->origin & 0x1ff); 659 } else { 660 dw1_region |= dst->indirect_subreg << 10 | 661 (dst->origin & 0x3ff); 662 } 663 } 664 } 665 else { 666 assert((dst->file == GEN6_FILE_GRF && 667 CG_REG_NUM(dst->origin) < 128) || 668 (dst->file == GEN6_FILE_MRF && 669 CG_REG_NUM(dst->origin) < 16) || 670 (dst->file == GEN6_FILE_ARF)); 671 672 if (cg->inst->access_mode == GEN6_ALIGN_16) { 673 /* similar to the indirect case */ 674 assert(dst->horz_stride == GEN6_HORZSTRIDE_1); 675 assert(!(dst->origin & 0xf)); 676 677 dw1_region = GEN6_ADDRMODE_DIRECT << 15 | 678 dst->horz_stride << 13 | 679 dst->origin | 680 dst->writemask; 681 } 682 else { 683 assert(dst->writemask == TOY_WRITEMASK_XYZW); 684 685 dw1_region = GEN6_ADDRMODE_DIRECT << 15 | 686 dst->horz_stride << 13 | 687 dst->origin; 688 } 689 } 690 691 return dw1_region; 692 } 693 694 /** 695 * Translate the destination operand to DW1 of the 1-src/2-src format. 696 */ 697 static uint32_t 698 translate_dst_gen6(const struct codegen *cg) 699 { 700 ILO_DEV_ASSERT(cg->dev, 6, 7.5); 701 702 return translate_dst_region_gen6(cg) << 16 | 703 cg->src[1].type << 12 | 704 cg->src[1].file << 10 | 705 cg->src[0].type << 7 | 706 cg->src[0].file << 5 | 707 cg->dst.type << 2 | 708 cg->dst.file; 709 } 710 711 static uint32_t 712 translate_dst_gen8(const struct codegen *cg) 713 { 714 const bool indirect_origin_bit9 = (cg->dst.indirect) ? 715 (cg->dst.origin & 0x200) : 0; 716 717 ILO_DEV_ASSERT(cg->dev, 8, 8); 718 719 return translate_dst_region_gen6(cg) << 16 | 720 indirect_origin_bit9 << 15 | 721 cg->src[0].type << 11 | 722 cg->src[0].file << 9 | 723 cg->dst.type << 5 | 724 cg->dst.file << 3 | 725 cg->inst->mask_ctrl << 2 | 726 cg->flag_reg_num << 1 | 727 cg->flag_sub_reg_num; 728 } 729 730 /** 731 * Translate the instruction to DW0 of the 1-src/2-src format. 732 */ 733 static uint32_t 734 translate_inst_gen6(const struct codegen *cg) 735 { 736 const bool debug_ctrl = false; 737 const bool cmpt_ctrl = false; 738 739 ILO_DEV_ASSERT(cg->dev, 6, 7.5); 740 741 assert(cg->inst->opcode < 128); 742 743 return cg->inst->saturate << 31 | 744 debug_ctrl << 30 | 745 cmpt_ctrl << 29 | 746 cg->inst->acc_wr_ctrl << 28 | 747 cg->inst->cond_modifier << 24 | 748 cg->inst->exec_size << 21 | 749 cg->inst->pred_inv << 20 | 750 cg->inst->pred_ctrl << 16 | 751 cg->inst->thread_ctrl << 14 | 752 cg->inst->qtr_ctrl << 12 | 753 cg->inst->dep_ctrl << 10 | 754 cg->inst->mask_ctrl << 9 | 755 cg->inst->access_mode << 8 | 756 cg->inst->opcode; 757 } 758 759 static uint32_t 760 translate_inst_gen8(const struct codegen *cg) 761 { 762 const bool debug_ctrl = false; 763 const bool cmpt_ctrl = false; 764 765 ILO_DEV_ASSERT(cg->dev, 8, 8); 766 767 assert(cg->inst->opcode < 128); 768 769 return cg->inst->saturate << 31 | 770 debug_ctrl << 30 | 771 cmpt_ctrl << 29 | 772 cg->inst->acc_wr_ctrl << 28 | 773 cg->inst->cond_modifier << 24 | 774 cg->inst->exec_size << 21 | 775 cg->inst->pred_inv << 20 | 776 cg->inst->pred_ctrl << 16 | 777 cg->inst->thread_ctrl << 14 | 778 cg->inst->qtr_ctrl << 12 | 779 cg->inst->dep_ctrl << 9 | 780 cg->inst->access_mode << 8 | 781 cg->inst->opcode; 782 } 783 784 /** 785 * Codegen an instruction in 1-src/2-src format. 786 */ 787 static void 788 codegen_inst_gen6(const struct codegen *cg, uint32_t *code) 789 { 790 ILO_DEV_ASSERT(cg->dev, 6, 8); 791 792 if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { 793 code[0] = translate_inst_gen8(cg); 794 code[1] = translate_dst_gen8(cg); 795 } else { 796 code[0] = translate_inst_gen6(cg); 797 code[1] = translate_dst_gen6(cg); 798 } 799 800 code[2] = translate_src_gen6(cg, 0); 801 code[3] = translate_src_gen6(cg, 1); 802 assert(src_is_null(cg, 2)); 803 } 804 805 /** 806 * Codegen an instruction in 3-src format. 807 */ 808 static void 809 codegen_inst_3src_gen6(const struct codegen *cg, uint32_t *code) 810 { 811 const struct codegen_dst *dst = &cg->dst; 812 uint32_t dw0, dw1, dw_src[3]; 813 int i; 814 815 ILO_DEV_ASSERT(cg->dev, 6, 8); 816 817 if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) 818 dw0 = translate_inst_gen8(cg); 819 else 820 dw0 = translate_inst_gen6(cg); 821 822 /* 823 * 3-src instruction restrictions 824 * 825 * - align16 with direct addressing 826 * - GRF or MRF dst 827 * - GRF src 828 * - sub_reg_num is DWORD aligned 829 * - no regioning except replication control 830 * (vert_stride == 0 && horz_stride == 0) 831 */ 832 assert(cg->inst->access_mode == GEN6_ALIGN_16); 833 834 assert(!dst->indirect); 835 assert((dst->file == GEN6_FILE_GRF && CG_REG_NUM(dst->origin) < 128) || 836 (dst->file == GEN6_FILE_MRF && CG_REG_NUM(dst->origin) < 16)); 837 assert(!(dst->origin & 0x3)); 838 assert(dst->horz_stride == GEN6_HORZSTRIDE_1); 839 840 if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { 841 dw1 = dst->origin << 19 | 842 dst->writemask << 17 | 843 cg->src[2].negate << 10 | 844 cg->src[2].negate << 10 | 845 cg->src[2].absolute << 9 | 846 cg->src[1].negate << 8 | 847 cg->src[1].absolute << 7 | 848 cg->src[0].negate << 6 | 849 cg->src[0].absolute << 5 | 850 cg->inst->mask_ctrl << 2 | 851 cg->flag_reg_num << 1 | 852 cg->flag_sub_reg_num; 853 } else { 854 dw1 = dst->origin << 19 | 855 dst->writemask << 17 | 856 cg->src[2].negate << 9 | 857 cg->src[2].absolute << 8 | 858 cg->src[1].negate << 7 | 859 cg->src[1].absolute << 6 | 860 cg->src[0].negate << 5 | 861 cg->src[0].absolute << 4 | 862 cg->flag_sub_reg_num << 1 | 863 (dst->file == GEN6_FILE_MRF); 864 } 865 866 for (i = 0; i < 3; i++) { 867 const struct codegen_src *src = &cg->src[i]; 868 869 assert(!src->indirect); 870 assert(src->file == GEN6_FILE_GRF && CG_REG_NUM(src->origin) < 128); 871 assert(!(src->origin & 0x3)); 872 873 assert((src->vert_stride == GEN6_VERTSTRIDE_4 && 874 src->horz_stride == GEN6_HORZSTRIDE_1) || 875 (src->vert_stride == GEN6_VERTSTRIDE_0 && 876 src->horz_stride == GEN6_HORZSTRIDE_0)); 877 assert(src->width == GEN6_WIDTH_4); 878 879 dw_src[i] = src->origin << 7 | 880 src->swizzle[3] << 7 | 881 src->swizzle[2] << 5 | 882 src->swizzle[1] << 3 | 883 src->swizzle[0] << 1 | 884 (src->vert_stride == GEN6_VERTSTRIDE_0 && 885 src->horz_stride == GEN6_HORZSTRIDE_0); 886 887 /* only the lower 20 bits are used */ 888 assert((dw_src[i] & 0xfffff) == dw_src[i]); 889 } 890 891 code[0] = dw0; 892 code[1] = dw1; 893 /* concatenate the bits of dw_src */ 894 code[2] = (dw_src[1] & 0x7ff ) << 21 | dw_src[0]; 895 code[3] = dw_src[2] << 10 | (dw_src[1] >> 11); 896 } 897 898 /** 899 * Sanity check the region parameters of the operands. 900 */ 901 static void 902 codegen_validate_region_restrictions(const struct codegen *cg) 903 { 904 const int exec_size_map[] = { 905 [GEN6_EXECSIZE_1] = 1, 906 [GEN6_EXECSIZE_2] = 2, 907 [GEN6_EXECSIZE_4] = 4, 908 [GEN6_EXECSIZE_8] = 8, 909 [GEN6_EXECSIZE_16] = 16, 910 [GEN6_EXECSIZE_32] = 32, 911 }; 912 const int width_map[] = { 913 [GEN6_WIDTH_1] = 1, 914 [GEN6_WIDTH_2] = 2, 915 [GEN6_WIDTH_4] = 4, 916 [GEN6_WIDTH_8] = 8, 917 [GEN6_WIDTH_16] = 16, 918 }; 919 const int horz_stride_map[] = { 920 [GEN6_HORZSTRIDE_0] = 0, 921 [GEN6_HORZSTRIDE_1] = 1, 922 [GEN6_HORZSTRIDE_2] = 2, 923 [GEN6_HORZSTRIDE_4] = 4, 924 }; 925 const int vert_stride_map[] = { 926 [GEN6_VERTSTRIDE_0] = 0, 927 [GEN6_VERTSTRIDE_1] = 1, 928 [GEN6_VERTSTRIDE_2] = 2, 929 [GEN6_VERTSTRIDE_4] = 4, 930 [GEN6_VERTSTRIDE_8] = 8, 931 [GEN6_VERTSTRIDE_16] = 16, 932 [GEN6_VERTSTRIDE_32] = 32, 933 [7] = 64, 934 [8] = 128, 935 [9] = 256, 936 [GEN6_VERTSTRIDE_VXH] = 0, 937 }; 938 const int exec_size = exec_size_map[cg->inst->exec_size]; 939 int i; 940 941 /* Sandy Bridge PRM, volume 4 part 2, page 94 */ 942 943 /* 1. (we don't do 32 anyway) */ 944 assert(exec_size <= 16); 945 946 for (i = 0; i < ARRAY_SIZE(cg->src); i++) { 947 const int width = width_map[cg->src[i].width]; 948 const int horz_stride = horz_stride_map[cg->src[i].horz_stride]; 949 const int vert_stride = vert_stride_map[cg->src[i].vert_stride]; 950 951 if (src_is_null(cg, i)) 952 break; 953 954 /* 3. */ 955 assert(exec_size >= width); 956 957 if (exec_size == width) { 958 /* 4. & 5. */ 959 if (horz_stride) 960 assert(vert_stride == width * horz_stride); 961 } 962 963 if (width == 1) { 964 /* 6. */ 965 assert(horz_stride == 0); 966 967 /* 7. */ 968 if (exec_size == 1) 969 assert(vert_stride == 0); 970 } 971 972 /* 8. */ 973 if (!vert_stride && !horz_stride) 974 assert(width == 1); 975 } 976 977 /* derived from 10.1.2. & 10.2. */ 978 assert(cg->dst.horz_stride != GEN6_HORZSTRIDE_0); 979 } 980 981 static unsigned 982 translate_vfile(enum toy_file file) 983 { 984 switch (file) { 985 case TOY_FILE_ARF: return GEN6_FILE_ARF; 986 case TOY_FILE_GRF: return GEN6_FILE_GRF; 987 case TOY_FILE_MRF: return GEN6_FILE_MRF; 988 case TOY_FILE_IMM: return GEN6_FILE_IMM; 989 default: 990 assert(!"unhandled toy file"); 991 return GEN6_FILE_GRF; 992 } 993 } 994 995 static unsigned 996 translate_vtype(enum toy_type type) 997 { 998 switch (type) { 999 case TOY_TYPE_F: return GEN6_TYPE_F; 1000 case TOY_TYPE_D: return GEN6_TYPE_D; 1001 case TOY_TYPE_UD: return GEN6_TYPE_UD; 1002 case TOY_TYPE_W: return GEN6_TYPE_W; 1003 case TOY_TYPE_UW: return GEN6_TYPE_UW; 1004 case TOY_TYPE_V: return GEN6_TYPE_V_IMM; 1005 default: 1006 assert(!"unhandled toy type"); 1007 return GEN6_TYPE_F; 1008 } 1009 } 1010 1011 static unsigned 1012 translate_writemask(enum toy_writemask writemask) 1013 { 1014 /* TOY_WRITEMASK_* are compatible with the hardware definitions */ 1015 assert(writemask <= 0xf); 1016 return writemask; 1017 } 1018 1019 static unsigned 1020 translate_swizzle(enum toy_swizzle swizzle) 1021 { 1022 /* TOY_SWIZZLE_* are compatible with the hardware definitions */ 1023 assert(swizzle <= 3); 1024 return swizzle; 1025 } 1026 1027 /** 1028 * Prepare for generating an instruction. 1029 */ 1030 static void 1031 codegen_prepare(struct codegen *cg, const struct ilo_dev *dev, 1032 const struct toy_inst *inst, int pc, int rect_linear_width) 1033 { 1034 int i; 1035 1036 cg->dev = dev; 1037 cg->inst = inst; 1038 cg->pc = pc; 1039 1040 cg->flag_reg_num = 0; 1041 cg->flag_sub_reg_num = 0; 1042 1043 cg->dst.file = translate_vfile(inst->dst.file); 1044 cg->dst.type = translate_vtype(inst->dst.type); 1045 cg->dst.indirect = inst->dst.indirect; 1046 cg->dst.indirect_subreg = inst->dst.indirect_subreg; 1047 cg->dst.origin = inst->dst.val32; 1048 1049 /* 1050 * From the Sandy Bridge PRM, volume 4 part 2, page 81: 1051 * 1052 * "For a word or an unsigned word immediate data, software must 1053 * replicate the same 16-bit immediate value to both the lower word 1054 * and the high word of the 32-bit immediate field in an instruction." 1055 */ 1056 if (inst->dst.file == TOY_FILE_IMM) { 1057 switch (inst->dst.type) { 1058 case TOY_TYPE_W: 1059 case TOY_TYPE_UW: 1060 cg->dst.origin &= 0xffff; 1061 cg->dst.origin |= cg->dst.origin << 16; 1062 break; 1063 default: 1064 break; 1065 } 1066 } 1067 1068 cg->dst.writemask = translate_writemask(inst->dst.writemask); 1069 1070 switch (inst->dst.rect) { 1071 case TOY_RECT_LINEAR: 1072 cg->dst.horz_stride = GEN6_HORZSTRIDE_1; 1073 break; 1074 default: 1075 assert(!"unsupported dst region"); 1076 cg->dst.horz_stride = GEN6_HORZSTRIDE_1; 1077 break; 1078 } 1079 1080 for (i = 0; i < ARRAY_SIZE(cg->src); i++) { 1081 struct codegen_src *src = &cg->src[i]; 1082 1083 src->file = translate_vfile(inst->src[i].file); 1084 src->type = translate_vtype(inst->src[i].type); 1085 src->indirect = inst->src[i].indirect; 1086 src->indirect_subreg = inst->src[i].indirect_subreg; 1087 src->origin = inst->src[i].val32; 1088 1089 /* do the same for src */ 1090 if (inst->dst.file == TOY_FILE_IMM) { 1091 switch (inst->src[i].type) { 1092 case TOY_TYPE_W: 1093 case TOY_TYPE_UW: 1094 src->origin &= 0xffff; 1095 src->origin |= src->origin << 16; 1096 break; 1097 default: 1098 break; 1099 } 1100 } 1101 1102 src->swizzle[0] = translate_swizzle(inst->src[i].swizzle_x); 1103 src->swizzle[1] = translate_swizzle(inst->src[i].swizzle_y); 1104 src->swizzle[2] = translate_swizzle(inst->src[i].swizzle_z); 1105 src->swizzle[3] = translate_swizzle(inst->src[i].swizzle_w); 1106 src->absolute = inst->src[i].absolute; 1107 src->negate = inst->src[i].negate; 1108 1109 switch (inst->src[i].rect) { 1110 case TOY_RECT_LINEAR: 1111 switch (rect_linear_width) { 1112 case 1: 1113 src->vert_stride = GEN6_VERTSTRIDE_1; 1114 src->width = GEN6_WIDTH_1; 1115 break; 1116 case 2: 1117 src->vert_stride = GEN6_VERTSTRIDE_2; 1118 src->width = GEN6_WIDTH_2; 1119 break; 1120 case 4: 1121 src->vert_stride = GEN6_VERTSTRIDE_4; 1122 src->width = GEN6_WIDTH_4; 1123 break; 1124 case 8: 1125 src->vert_stride = GEN6_VERTSTRIDE_8; 1126 src->width = GEN6_WIDTH_8; 1127 break; 1128 case 16: 1129 src->vert_stride = GEN6_VERTSTRIDE_16; 1130 src->width = GEN6_WIDTH_16; 1131 break; 1132 default: 1133 assert(!"unsupported TOY_RECT_LINEAR width"); 1134 src->vert_stride = GEN6_VERTSTRIDE_1; 1135 src->width = GEN6_WIDTH_1; 1136 break; 1137 } 1138 src->horz_stride = GEN6_HORZSTRIDE_1; 1139 break; 1140 case TOY_RECT_041: 1141 src->vert_stride = GEN6_VERTSTRIDE_0; 1142 src->width = GEN6_WIDTH_4; 1143 src->horz_stride = GEN6_HORZSTRIDE_1; 1144 break; 1145 case TOY_RECT_010: 1146 src->vert_stride = GEN6_VERTSTRIDE_0; 1147 src->width = GEN6_WIDTH_1; 1148 src->horz_stride = GEN6_HORZSTRIDE_0; 1149 break; 1150 case TOY_RECT_220: 1151 src->vert_stride = GEN6_VERTSTRIDE_2; 1152 src->width = GEN6_WIDTH_2; 1153 src->horz_stride = GEN6_HORZSTRIDE_0; 1154 break; 1155 case TOY_RECT_440: 1156 src->vert_stride = GEN6_VERTSTRIDE_4; 1157 src->width = GEN6_WIDTH_4; 1158 src->horz_stride = GEN6_HORZSTRIDE_0; 1159 break; 1160 case TOY_RECT_240: 1161 src->vert_stride = GEN6_VERTSTRIDE_2; 1162 src->width = GEN6_WIDTH_4; 1163 src->horz_stride = GEN6_HORZSTRIDE_0; 1164 break; 1165 default: 1166 assert(!"unsupported src region"); 1167 src->vert_stride = GEN6_VERTSTRIDE_1; 1168 src->width = GEN6_WIDTH_1; 1169 src->horz_stride = GEN6_HORZSTRIDE_1; 1170 break; 1171 } 1172 } 1173 } 1174 1175 /** 1176 * Generate HW shader code. The instructions should have been legalized. 1177 */ 1178 void * 1179 toy_compiler_assemble(struct toy_compiler *tc, int *size) 1180 { 1181 const struct toy_inst *inst; 1182 uint32_t *code; 1183 int pc; 1184 1185 code = MALLOC(tc->num_instructions * 4 * sizeof(uint32_t)); 1186 if (!code) 1187 return NULL; 1188 1189 pc = 0; 1190 tc_head(tc); 1191 while ((inst = tc_next(tc)) != NULL) { 1192 uint32_t *dw = &code[pc * 4]; 1193 struct codegen cg; 1194 1195 if (pc >= tc->num_instructions) { 1196 tc_fail(tc, "wrong instructoun count"); 1197 break; 1198 } 1199 1200 codegen_prepare(&cg, tc->dev, inst, pc, tc->rect_linear_width); 1201 codegen_validate_region_restrictions(&cg); 1202 1203 switch (inst->opcode) { 1204 case GEN6_OPCODE_MAD: 1205 codegen_inst_3src_gen6(&cg, dw); 1206 break; 1207 default: 1208 codegen_inst_gen6(&cg, dw); 1209 break; 1210 } 1211 1212 pc++; 1213 } 1214 1215 /* never return an invalid kernel */ 1216 if (tc->fail) { 1217 FREE(code); 1218 return NULL; 1219 } 1220 1221 if (size) 1222 *size = pc * 4 * sizeof(uint32_t); 1223 1224 return code; 1225 } 1226