Home | History | Annotate | Download | only in common
      1 /*
      2  * Copyright  2013 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include <stdio.h>
     25 #include <stdlib.h>
     26 #include "gen_device_info.h"
     27 #include "compiler/shader_enums.h"
     28 
     29 static const struct gen_device_info gen_device_info_i965 = {
     30    .gen = 4,
     31    .has_negative_rhw_bug = true,
     32    .num_slices = 1,
     33    .max_vs_threads = 16,
     34    .max_gs_threads = 2,
     35    .max_wm_threads = 8 * 4,
     36    .urb = {
     37       .size = 256,
     38    },
     39 };
     40 
     41 static const struct gen_device_info gen_device_info_g4x = {
     42    .gen = 4,
     43    .has_pln = true,
     44    .has_compr4 = true,
     45    .has_surface_tile_offset = true,
     46    .is_g4x = true,
     47    .num_slices = 1,
     48    .max_vs_threads = 32,
     49    .max_gs_threads = 2,
     50    .max_wm_threads = 10 * 5,
     51    .urb = {
     52       .size = 384,
     53    },
     54 };
     55 
     56 static const struct gen_device_info gen_device_info_ilk = {
     57    .gen = 5,
     58    .has_pln = true,
     59    .has_compr4 = true,
     60    .has_surface_tile_offset = true,
     61    .num_slices = 1,
     62    .max_vs_threads = 72,
     63    .max_gs_threads = 32,
     64    .max_wm_threads = 12 * 6,
     65    .urb = {
     66       .size = 1024,
     67    },
     68 };
     69 
     70 static const struct gen_device_info gen_device_info_snb_gt1 = {
     71    .gen = 6,
     72    .gt = 1,
     73    .has_hiz_and_separate_stencil = true,
     74    .has_llc = true,
     75    .has_pln = true,
     76    .has_surface_tile_offset = true,
     77    .needs_unlit_centroid_workaround = true,
     78    .num_slices = 1,
     79    .max_vs_threads = 24,
     80    .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */
     81    .max_wm_threads = 40,
     82    .urb = {
     83       .size = 32,
     84       .min_entries = {
     85          [MESA_SHADER_VERTEX]   = 24,
     86       },
     87       .max_entries = {
     88          [MESA_SHADER_VERTEX]   = 256,
     89          [MESA_SHADER_GEOMETRY] = 256,
     90       },
     91    },
     92 };
     93 
     94 static const struct gen_device_info gen_device_info_snb_gt2 = {
     95    .gen = 6,
     96    .gt = 2,
     97    .has_hiz_and_separate_stencil = true,
     98    .has_llc = true,
     99    .has_pln = true,
    100    .has_surface_tile_offset = true,
    101    .needs_unlit_centroid_workaround = true,
    102    .num_slices = 1,
    103    .max_vs_threads = 60,
    104    .max_gs_threads = 60,
    105    .max_wm_threads = 80,
    106    .urb = {
    107       .size = 64,
    108       .min_entries = {
    109          [MESA_SHADER_VERTEX]   = 24,
    110       },
    111       .max_entries = {
    112          [MESA_SHADER_VERTEX]   = 256,
    113          [MESA_SHADER_GEOMETRY] = 256,
    114       },
    115    },
    116 };
    117 
    118 #define GEN7_FEATURES                               \
    119    .gen = 7,                                        \
    120    .has_hiz_and_separate_stencil = true,            \
    121    .must_use_separate_stencil = true,               \
    122    .has_llc = true,                                 \
    123    .has_pln = true,                                 \
    124    .has_surface_tile_offset = true
    125 
    126 static const struct gen_device_info gen_device_info_ivb_gt1 = {
    127    GEN7_FEATURES, .is_ivybridge = true, .gt = 1,
    128    .num_slices = 1,
    129    .max_vs_threads = 36,
    130    .max_tcs_threads = 36,
    131    .max_tes_threads = 36,
    132    .max_gs_threads = 36,
    133    .max_wm_threads = 48,
    134    .max_cs_threads = 36,
    135    .urb = {
    136       .size = 128,
    137       .min_entries = {
    138          [MESA_SHADER_VERTEX]    = 32,
    139          [MESA_SHADER_TESS_EVAL] = 10,
    140       },
    141       .max_entries = {
    142          [MESA_SHADER_VERTEX]    = 512,
    143          [MESA_SHADER_TESS_CTRL] = 32,
    144          [MESA_SHADER_TESS_EVAL] = 288,
    145          [MESA_SHADER_GEOMETRY]  = 192,
    146       },
    147    },
    148 };
    149 
    150 static const struct gen_device_info gen_device_info_ivb_gt2 = {
    151    GEN7_FEATURES, .is_ivybridge = true, .gt = 2,
    152    .num_slices = 1,
    153    .max_vs_threads = 128,
    154    .max_tcs_threads = 128,
    155    .max_tes_threads = 128,
    156    .max_gs_threads = 128,
    157    .max_wm_threads = 172,
    158    .max_cs_threads = 64,
    159    .urb = {
    160       .size = 256,
    161       .min_entries = {
    162          [MESA_SHADER_VERTEX]    = 32,
    163          [MESA_SHADER_TESS_EVAL] = 10,
    164       },
    165       .max_entries = {
    166          [MESA_SHADER_VERTEX]    = 704,
    167          [MESA_SHADER_TESS_CTRL] = 64,
    168          [MESA_SHADER_TESS_EVAL] = 448,
    169          [MESA_SHADER_GEOMETRY]  = 320,
    170       },
    171    },
    172 };
    173 
    174 static const struct gen_device_info gen_device_info_byt = {
    175    GEN7_FEATURES, .is_baytrail = true, .gt = 1,
    176    .num_slices = 1,
    177    .has_llc = false,
    178    .max_vs_threads = 36,
    179    .max_tcs_threads = 36,
    180    .max_tes_threads = 36,
    181    .max_gs_threads = 36,
    182    .max_wm_threads = 48,
    183    .max_cs_threads = 32,
    184    .urb = {
    185       .size = 128,
    186       .min_entries = {
    187          [MESA_SHADER_VERTEX]    = 32,
    188          [MESA_SHADER_TESS_EVAL] = 10,
    189       },
    190       .max_entries = {
    191          [MESA_SHADER_VERTEX]    = 512,
    192          [MESA_SHADER_TESS_CTRL] = 32,
    193          [MESA_SHADER_TESS_EVAL] = 288,
    194          [MESA_SHADER_GEOMETRY]  = 192,
    195       },
    196    },
    197 };
    198 
    199 #define HSW_FEATURES             \
    200    GEN7_FEATURES,                \
    201    .is_haswell = true,           \
    202    .supports_simd16_3src = true, \
    203    .has_resource_streamer = true
    204 
    205 static const struct gen_device_info gen_device_info_hsw_gt1 = {
    206    HSW_FEATURES, .gt = 1,
    207    .num_slices = 1,
    208    .max_vs_threads = 70,
    209    .max_tcs_threads = 70,
    210    .max_tes_threads = 70,
    211    .max_gs_threads = 70,
    212    .max_wm_threads = 102,
    213    .max_cs_threads = 70,
    214    .urb = {
    215       .size = 128,
    216       .min_entries = {
    217          [MESA_SHADER_VERTEX]    = 32,
    218          [MESA_SHADER_TESS_EVAL] = 10,
    219       },
    220       .max_entries = {
    221          [MESA_SHADER_VERTEX]    = 640,
    222          [MESA_SHADER_TESS_CTRL] = 64,
    223          [MESA_SHADER_TESS_EVAL] = 384,
    224          [MESA_SHADER_GEOMETRY]  = 256,
    225       },
    226    },
    227 };
    228 
    229 static const struct gen_device_info gen_device_info_hsw_gt2 = {
    230    HSW_FEATURES, .gt = 2,
    231    .num_slices = 1,
    232    .max_vs_threads = 280,
    233    .max_tcs_threads = 256,
    234    .max_tes_threads = 280,
    235    .max_gs_threads = 256,
    236    .max_wm_threads = 204,
    237    .max_cs_threads = 70,
    238    .urb = {
    239       .size = 256,
    240       .min_entries = {
    241          [MESA_SHADER_VERTEX]    = 64,
    242          [MESA_SHADER_TESS_EVAL] = 10,
    243       },
    244       .max_entries = {
    245          [MESA_SHADER_VERTEX]    = 1664,
    246          [MESA_SHADER_TESS_CTRL] = 128,
    247          [MESA_SHADER_TESS_EVAL] = 960,
    248          [MESA_SHADER_GEOMETRY]  = 640,
    249       },
    250    },
    251 };
    252 
    253 static const struct gen_device_info gen_device_info_hsw_gt3 = {
    254    HSW_FEATURES, .gt = 3,
    255    .num_slices = 2,
    256    .max_vs_threads = 280,
    257    .max_tcs_threads = 256,
    258    .max_tes_threads = 280,
    259    .max_gs_threads = 256,
    260    .max_wm_threads = 408,
    261    .max_cs_threads = 70,
    262    .urb = {
    263       .size = 512,
    264       .min_entries = {
    265          [MESA_SHADER_VERTEX]    = 64,
    266          [MESA_SHADER_TESS_EVAL] = 10,
    267       },
    268       .max_entries = {
    269          [MESA_SHADER_VERTEX]    = 1664,
    270          [MESA_SHADER_TESS_CTRL] = 128,
    271          [MESA_SHADER_TESS_EVAL] = 960,
    272          [MESA_SHADER_GEOMETRY]  = 640,
    273       },
    274    },
    275 };
    276 
    277 #define GEN8_FEATURES                               \
    278    .gen = 8,                                        \
    279    .has_hiz_and_separate_stencil = true,            \
    280    .has_resource_streamer = true,                   \
    281    .must_use_separate_stencil = true,               \
    282    .has_llc = true,                                 \
    283    .has_pln = true,                                 \
    284    .supports_simd16_3src = true,                    \
    285    .has_surface_tile_offset = true,                 \
    286    .max_vs_threads = 504,                           \
    287    .max_tcs_threads = 504,                          \
    288    .max_tes_threads = 504,                          \
    289    .max_gs_threads = 504,                           \
    290    .max_wm_threads = 384
    291 
    292 static const struct gen_device_info gen_device_info_bdw_gt1 = {
    293    GEN8_FEATURES, .gt = 1,
    294    .num_slices = 1,
    295    .max_cs_threads = 42,
    296    .urb = {
    297       .size = 192,
    298       .min_entries = {
    299          [MESA_SHADER_VERTEX]    = 64,
    300          [MESA_SHADER_TESS_EVAL] = 34,
    301       },
    302       .max_entries = {
    303          [MESA_SHADER_VERTEX]    = 2560,
    304          [MESA_SHADER_TESS_CTRL] = 504,
    305          [MESA_SHADER_TESS_EVAL] = 1536,
    306          [MESA_SHADER_GEOMETRY]  = 960,
    307       },
    308    }
    309 };
    310 
    311 static const struct gen_device_info gen_device_info_bdw_gt2 = {
    312    GEN8_FEATURES, .gt = 2,
    313    .num_slices = 1,
    314    .max_cs_threads = 56,
    315    .urb = {
    316       .size = 384,
    317       .min_entries = {
    318          [MESA_SHADER_VERTEX]    = 64,
    319          [MESA_SHADER_TESS_EVAL] = 34,
    320       },
    321       .max_entries = {
    322          [MESA_SHADER_VERTEX]    = 2560,
    323          [MESA_SHADER_TESS_CTRL] = 504,
    324          [MESA_SHADER_TESS_EVAL] = 1536,
    325          [MESA_SHADER_GEOMETRY]  = 960,
    326       },
    327    }
    328 };
    329 
    330 static const struct gen_device_info gen_device_info_bdw_gt3 = {
    331    GEN8_FEATURES, .gt = 3,
    332    .num_slices = 2,
    333    .max_cs_threads = 56,
    334    .urb = {
    335       .size = 384,
    336       .min_entries = {
    337          [MESA_SHADER_VERTEX]    = 64,
    338          [MESA_SHADER_TESS_EVAL] = 34,
    339       },
    340       .max_entries = {
    341          [MESA_SHADER_VERTEX]    = 2560,
    342          [MESA_SHADER_TESS_CTRL] = 504,
    343          [MESA_SHADER_TESS_EVAL] = 1536,
    344          [MESA_SHADER_GEOMETRY]  = 960,
    345       },
    346    }
    347 };
    348 
    349 static const struct gen_device_info gen_device_info_chv = {
    350    GEN8_FEATURES, .is_cherryview = 1, .gt = 1,
    351    .has_llc = false,
    352    .num_slices = 1,
    353    .max_vs_threads = 80,
    354    .max_tcs_threads = 80,
    355    .max_tes_threads = 80,
    356    .max_gs_threads = 80,
    357    .max_wm_threads = 128,
    358    .max_cs_threads = 6 * 7,
    359    .urb = {
    360       .size = 192,
    361       .min_entries = {
    362          [MESA_SHADER_VERTEX]    = 34,
    363          [MESA_SHADER_TESS_EVAL] = 34,
    364       },
    365       .max_entries = {
    366          [MESA_SHADER_VERTEX]    = 640,
    367          [MESA_SHADER_TESS_CTRL] = 80,
    368          [MESA_SHADER_TESS_EVAL] = 384,
    369          [MESA_SHADER_GEOMETRY]  = 256,
    370       },
    371    }
    372 };
    373 
    374 #define GEN9_FEATURES                               \
    375    .gen = 9,                                        \
    376    .has_hiz_and_separate_stencil = true,            \
    377    .has_resource_streamer = true,                   \
    378    .must_use_separate_stencil = true,               \
    379    .has_llc = true,                                 \
    380    .has_pln = true,                                 \
    381    .supports_simd16_3src = true,                    \
    382    .has_surface_tile_offset = true,                 \
    383    .max_vs_threads = 336,                           \
    384    .max_gs_threads = 336,                           \
    385    .max_tcs_threads = 336,                          \
    386    .max_tes_threads = 336,                          \
    387    .max_cs_threads = 56,                            \
    388    .urb = {                                         \
    389       .size = 384,                                  \
    390       .min_entries = {                              \
    391          [MESA_SHADER_VERTEX]    = 64,              \
    392          [MESA_SHADER_TESS_EVAL] = 34,              \
    393       },                                            \
    394       .max_entries = {                              \
    395          [MESA_SHADER_VERTEX]    = 1856,            \
    396          [MESA_SHADER_TESS_CTRL] = 672,             \
    397          [MESA_SHADER_TESS_EVAL] = 1120,            \
    398          [MESA_SHADER_GEOMETRY]  = 640,             \
    399       },                                            \
    400    }
    401 
    402 #define GEN9_LP_FEATURES                           \
    403    GEN9_FEATURES,                                  \
    404    .is_broxton = 1,                                \
    405    .gt = 1,                                        \
    406    .has_llc = false,                               \
    407    .num_slices = 1,                                \
    408    .max_vs_threads = 112,                          \
    409    .max_tcs_threads = 112,                         \
    410    .max_tes_threads = 112,                         \
    411    .max_gs_threads = 112,                          \
    412    .max_cs_threads = 6 * 6,                        \
    413    .urb = {                                        \
    414       .size = 192,                                 \
    415       .min_entries = {                             \
    416          [MESA_SHADER_VERTEX]    = 34,             \
    417          [MESA_SHADER_TESS_EVAL] = 34,             \
    418       },                                           \
    419       .max_entries = {                             \
    420          [MESA_SHADER_VERTEX]    = 704,            \
    421          [MESA_SHADER_TESS_CTRL] = 256,            \
    422          [MESA_SHADER_TESS_EVAL] = 416,            \
    423          [MESA_SHADER_GEOMETRY]  = 256,            \
    424       },                                           \
    425    }
    426 
    427 #define GEN9_LP_FEATURES_2X6                       \
    428    GEN9_LP_FEATURES,                               \
    429    .max_vs_threads = 56,                           \
    430    .max_tcs_threads = 56,                          \
    431    .max_tes_threads = 56,                          \
    432    .max_gs_threads = 56,                           \
    433    .max_cs_threads = 6 * 6,                        \
    434    .urb = {                                        \
    435       .size = 128,                                 \
    436       .min_entries = {                             \
    437          [MESA_SHADER_VERTEX]    = 34,             \
    438          [MESA_SHADER_TESS_EVAL] = 34,             \
    439       },                                           \
    440       .max_entries = {                             \
    441          [MESA_SHADER_VERTEX]    = 352,            \
    442          [MESA_SHADER_TESS_CTRL] = 128,            \
    443          [MESA_SHADER_TESS_EVAL] = 208,            \
    444          [MESA_SHADER_GEOMETRY]  = 128,            \
    445       },                                           \
    446    }
    447 
    448 static const struct gen_device_info gen_device_info_skl_gt1 = {
    449    GEN9_FEATURES, .gt = 1,
    450    .num_slices = 1,
    451    .urb.size = 192,
    452 };
    453 
    454 static const struct gen_device_info gen_device_info_skl_gt2 = {
    455    GEN9_FEATURES, .gt = 2,
    456    .num_slices = 1,
    457 };
    458 
    459 static const struct gen_device_info gen_device_info_skl_gt3 = {
    460    GEN9_FEATURES, .gt = 3,
    461    .num_slices = 2,
    462 };
    463 
    464 static const struct gen_device_info gen_device_info_skl_gt4 = {
    465    GEN9_FEATURES, .gt = 4,
    466    .num_slices = 3,
    467    /* From the "L3 Allocation and Programming" documentation:
    468     *
    469     * "URB is limited to 1008KB due to programming restrictions.  This is not a
    470     * restriction of the L3 implementation, but of the FF and other clients.
    471     * Therefore, in a GT4 implementation it is possible for the programmed
    472     * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
    473     * only 1008KB of this will be used."
    474     */
    475    .urb.size = 1008 / 3,
    476 };
    477 
    478 static const struct gen_device_info gen_device_info_bxt = {
    479    GEN9_LP_FEATURES
    480 };
    481 
    482 static const struct gen_device_info gen_device_info_bxt_2x6 = {
    483    GEN9_LP_FEATURES_2X6
    484 };
    485 /*
    486  * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+.
    487  * There's no KBL entry. Using the default SKL (GEN9) GS entries value.
    488  */
    489 
    490 static const struct gen_device_info gen_device_info_kbl_gt1 = {
    491    GEN9_FEATURES,
    492    .is_kabylake = true,
    493    .gt = 1,
    494 
    495    .max_cs_threads = 7 * 6,
    496    .urb.size = 192,
    497    .num_slices = 1,
    498 };
    499 
    500 static const struct gen_device_info gen_device_info_kbl_gt1_5 = {
    501    GEN9_FEATURES,
    502    .is_kabylake = true,
    503    .gt = 1,
    504 
    505    .max_cs_threads = 7 * 6,
    506    .num_slices = 1,
    507 };
    508 
    509 static const struct gen_device_info gen_device_info_kbl_gt2 = {
    510    GEN9_FEATURES,
    511    .is_kabylake = true,
    512    .gt = 2,
    513 
    514    .num_slices = 1,
    515 };
    516 
    517 static const struct gen_device_info gen_device_info_kbl_gt3 = {
    518    GEN9_FEATURES,
    519    .is_kabylake = true,
    520    .gt = 3,
    521 
    522    .num_slices = 2,
    523 };
    524 
    525 static const struct gen_device_info gen_device_info_kbl_gt4 = {
    526    GEN9_FEATURES,
    527    .is_kabylake = true,
    528    .gt = 4,
    529 
    530    /*
    531     * From the "L3 Allocation and Programming" documentation:
    532     *
    533     * "URB is limited to 1008KB due to programming restrictions.  This
    534     *  is not a restriction of the L3 implementation, but of the FF and
    535     *  other clients.  Therefore, in a GT4 implementation it is
    536     *  possible for the programmed allocation of the L3 data array to
    537     *  provide 3*384KB=1152KB for URB, but only 1008KB of this
    538     *  will be used."
    539     */
    540    .urb.size = 1008 / 3,
    541    .num_slices = 3,
    542 };
    543 
    544 static const struct gen_device_info gen_device_info_glk = {
    545    GEN9_LP_FEATURES
    546 };
    547 
    548 static const struct gen_device_info gen_device_info_glk_2x6 = {
    549    GEN9_LP_FEATURES_2X6
    550 };
    551 
    552 bool
    553 gen_get_device_info(int devid, struct gen_device_info *devinfo)
    554 {
    555    switch (devid) {
    556 #undef CHIPSET
    557 #define CHIPSET(id, family, name) \
    558       case id: *devinfo = gen_device_info_##family; break;
    559 #include "pci_ids/i965_pci_ids.h"
    560    default:
    561       fprintf(stderr, "i965_dri.so does not support the 0x%x PCI ID.\n", devid);
    562       return false;
    563    }
    564 
    565    /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer:
    566     *
    567     * "Scratch Space per slice is computed based on 4 sub-slices.  SW must
    568     *  allocate scratch space enough so that each slice has 4 slices allowed."
    569     *
    570     * The equivalent internal documentation says that this programming note
    571     * applies to all Gen9+ platforms.
    572     *
    573     * The hardware typically calculates the scratch space pointer by taking
    574     * the base address, and adding per-thread-scratch-space * thread ID.
    575     * Extra padding can be necessary depending how the thread IDs are
    576     * calculated for a particular shader stage.
    577     */
    578    if (devinfo->gen >= 9) {
    579       devinfo->max_wm_threads = 64 /* threads-per-PSD */
    580                               * devinfo->num_slices
    581                               * 4; /* effective subslices per slice */
    582    }
    583 
    584    return true;
    585 }
    586 
    587 const char *
    588 gen_get_device_name(int devid)
    589 {
    590    switch (devid) {
    591 #undef CHIPSET
    592 #define CHIPSET(id, family, name) case id: return name;
    593 #include "pci_ids/i965_pci_ids.h"
    594    default:
    595       return NULL;
    596    }
    597 }
    598