1 /* 2 * Copyright 2012 Francisco Jerez 3 * Copyright 2015 Samuel Pitoiset 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining 6 * a copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sublicense, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial 15 * portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 * 25 */ 26 27 #include "nv50/nv50_context.h" 28 #include "nv50/nv50_compute.xml.h" 29 30 #include "codegen/nv50_ir_driver.h" 31 32 int 33 nv50_screen_compute_setup(struct nv50_screen *screen, 34 struct nouveau_pushbuf *push) 35 { 36 struct nouveau_device *dev = screen->base.device; 37 struct nouveau_object *chan = screen->base.channel; 38 struct nv04_fifo *fifo = (struct nv04_fifo *)chan->data; 39 unsigned obj_class; 40 int i, ret; 41 42 switch (dev->chipset & 0xf0) { 43 case 0x50: 44 case 0x80: 45 case 0x90: 46 obj_class = NV50_COMPUTE_CLASS; 47 break; 48 case 0xa0: 49 switch (dev->chipset) { 50 case 0xa3: 51 case 0xa5: 52 case 0xa8: 53 obj_class = NVA3_COMPUTE_CLASS; 54 break; 55 default: 56 obj_class = NV50_COMPUTE_CLASS; 57 break; 58 } 59 break; 60 default: 61 NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); 62 return -1; 63 } 64 65 ret = nouveau_object_new(chan, 0xbeef50c0, obj_class, NULL, 0, 66 &screen->compute); 67 if (ret) 68 return ret; 69 70 BEGIN_NV04(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1); 71 PUSH_DATA (push, screen->compute->handle); 72 73 BEGIN_NV04(push, NV50_CP(UNK02A0), 1); 74 PUSH_DATA (push, 1); 75 BEGIN_NV04(push, NV50_CP(DMA_STACK), 1); 76 PUSH_DATA (push, fifo->vram); 77 BEGIN_NV04(push, NV50_CP(STACK_ADDRESS_HIGH), 2); 78 PUSH_DATAh(push, screen->stack_bo->offset); 79 PUSH_DATA (push, screen->stack_bo->offset); 80 BEGIN_NV04(push, NV50_CP(STACK_SIZE_LOG), 1); 81 PUSH_DATA (push, 4); 82 83 BEGIN_NV04(push, NV50_CP(UNK0290), 1); 84 PUSH_DATA (push, 1); 85 BEGIN_NV04(push, NV50_CP(LANES32_ENABLE), 1); 86 PUSH_DATA (push, 1); 87 BEGIN_NV04(push, NV50_CP(REG_MODE), 1); 88 PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED); 89 BEGIN_NV04(push, NV50_CP(UNK0384), 1); 90 PUSH_DATA (push, 0x100); 91 BEGIN_NV04(push, NV50_CP(DMA_GLOBAL), 1); 92 PUSH_DATA (push, fifo->vram); 93 94 for (i = 0; i < 15; i++) { 95 BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(i)), 2); 96 PUSH_DATA (push, 0); 97 PUSH_DATA (push, 0); 98 BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(i)), 1); 99 PUSH_DATA (push, 0); 100 BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(i)), 1); 101 PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); 102 } 103 104 BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(15)), 2); 105 PUSH_DATA (push, 0); 106 PUSH_DATA (push, 0); 107 BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(15)), 1); 108 PUSH_DATA (push, ~0); 109 BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(15)), 1); 110 PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); 111 112 BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_LOG_ALLOC), 1); 113 PUSH_DATA (push, 7); 114 BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_NO_CLAMP), 1); 115 PUSH_DATA (push, 1); 116 BEGIN_NV04(push, NV50_CP(STACK_WARPS_LOG_ALLOC), 1); 117 PUSH_DATA (push, 7); 118 BEGIN_NV04(push, NV50_CP(STACK_WARPS_NO_CLAMP), 1); 119 PUSH_DATA (push, 1); 120 BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1); 121 PUSH_DATA (push, 0); 122 123 BEGIN_NV04(push, NV50_CP(DMA_TEXTURE), 1); 124 PUSH_DATA (push, fifo->vram); 125 BEGIN_NV04(push, NV50_CP(TEX_LIMITS), 1); 126 PUSH_DATA (push, 0x54); 127 BEGIN_NV04(push, NV50_CP(LINKED_TSC), 1); 128 PUSH_DATA (push, 0); 129 130 BEGIN_NV04(push, NV50_CP(DMA_TIC), 1); 131 PUSH_DATA (push, fifo->vram); 132 BEGIN_NV04(push, NV50_CP(TIC_ADDRESS_HIGH), 3); 133 PUSH_DATAh(push, screen->txc->offset); 134 PUSH_DATA (push, screen->txc->offset); 135 PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1); 136 137 BEGIN_NV04(push, NV50_CP(DMA_TSC), 1); 138 PUSH_DATA (push, fifo->vram); 139 BEGIN_NV04(push, NV50_CP(TSC_ADDRESS_HIGH), 3); 140 PUSH_DATAh(push, screen->txc->offset + 65536); 141 PUSH_DATA (push, screen->txc->offset + 65536); 142 PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1); 143 144 BEGIN_NV04(push, NV50_CP(DMA_CODE_CB), 1); 145 PUSH_DATA (push, fifo->vram); 146 147 BEGIN_NV04(push, NV50_CP(DMA_LOCAL), 1); 148 PUSH_DATA (push, fifo->vram); 149 BEGIN_NV04(push, NV50_CP(LOCAL_ADDRESS_HIGH), 2); 150 PUSH_DATAh(push, screen->tls_bo->offset + 65536); 151 PUSH_DATA (push, screen->tls_bo->offset + 65536); 152 BEGIN_NV04(push, NV50_CP(LOCAL_SIZE_LOG), 1); 153 PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2)); 154 155 return 0; 156 } 157 158 static void 159 nv50_compute_validate_globals(struct nv50_context *nv50) 160 { 161 unsigned i; 162 163 for (i = 0; i < nv50->global_residents.size / sizeof(struct pipe_resource *); 164 ++i) { 165 struct pipe_resource *res = *util_dynarray_element( 166 &nv50->global_residents, struct pipe_resource *, i); 167 if (res) 168 nv50_add_bufctx_resident(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL, 169 nv04_resource(res), NOUVEAU_BO_RDWR); 170 } 171 } 172 173 static struct nv50_state_validate 174 validate_list_cp[] = { 175 { nv50_compprog_validate, NV50_NEW_CP_PROGRAM }, 176 { nv50_compute_validate_globals, NV50_NEW_CP_GLOBALS }, 177 }; 178 179 static bool 180 nv50_state_validate_cp(struct nv50_context *nv50, uint32_t mask) 181 { 182 bool ret; 183 184 /* TODO: validate textures, samplers, surfaces */ 185 ret = nv50_state_validate(nv50, mask, validate_list_cp, 186 ARRAY_SIZE(validate_list_cp), &nv50->dirty_cp, 187 nv50->bufctx_cp); 188 189 if (unlikely(nv50->state.flushed)) 190 nv50_bufctx_fence(nv50->bufctx_cp, true); 191 return ret; 192 } 193 194 static void 195 nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input) 196 { 197 struct nv50_screen *screen = nv50->screen; 198 struct nouveau_pushbuf *push = screen->base.pushbuf; 199 unsigned size = align(nv50->compprog->parm_size, 0x4); 200 201 BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1); 202 PUSH_DATA (push, (size / 4) << 8); 203 204 if (size) { 205 struct nouveau_mm_allocation *mm; 206 struct nouveau_bo *bo = NULL; 207 unsigned offset; 208 209 mm = nouveau_mm_allocate(screen->base.mm_GART, size, &bo, &offset); 210 assert(mm); 211 212 nouveau_bo_map(bo, 0, screen->base.client); 213 memcpy(bo->map + offset, input, size); 214 215 nouveau_bufctx_refn(nv50->bufctx, 0, bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); 216 nouveau_pushbuf_bufctx(push, nv50->bufctx); 217 nouveau_pushbuf_validate(push); 218 219 BEGIN_NV04(push, NV50_CP(USER_PARAM(0)), size / 4); 220 nouveau_pushbuf_data(push, bo, offset, size); 221 222 nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm); 223 nouveau_bo_ref(NULL, &bo); 224 nouveau_bufctx_reset(nv50->bufctx, 0); 225 } 226 } 227 228 static uint32_t 229 nv50_compute_find_symbol(struct nv50_context *nv50, uint32_t label) 230 { 231 struct nv50_program *prog = nv50->compprog; 232 const struct nv50_ir_prog_symbol *syms = 233 (const struct nv50_ir_prog_symbol *)prog->cp.syms; 234 unsigned i; 235 236 for (i = 0; i < prog->cp.num_syms; ++i) { 237 if (syms[i].label == label) 238 return prog->code_base + syms[i].offset; 239 } 240 return prog->code_base; /* no symbols or symbol not found */ 241 } 242 243 void 244 nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) 245 { 246 struct nv50_context *nv50 = nv50_context(pipe); 247 struct nouveau_pushbuf *push = nv50->base.pushbuf; 248 unsigned block_size = info->block[0] * info->block[1] * info->block[2]; 249 struct nv50_program *cp = nv50->compprog; 250 bool ret; 251 252 ret = !nv50_state_validate_cp(nv50, ~0); 253 if (ret) { 254 NOUVEAU_ERR("Failed to launch grid !\n"); 255 return; 256 } 257 258 nv50_compute_upload_input(nv50, info->input); 259 260 BEGIN_NV04(push, NV50_CP(CP_START_ID), 1); 261 PUSH_DATA (push, nv50_compute_find_symbol(nv50, info->pc)); 262 263 BEGIN_NV04(push, NV50_CP(SHARED_SIZE), 1); 264 PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40)); 265 BEGIN_NV04(push, NV50_CP(CP_REG_ALLOC_TEMP), 1); 266 PUSH_DATA (push, cp->max_gpr); 267 268 /* grid/block setup */ 269 BEGIN_NV04(push, NV50_CP(BLOCKDIM_XY), 2); 270 PUSH_DATA (push, info->block[1] << 16 | info->block[0]); 271 PUSH_DATA (push, info->block[2]); 272 BEGIN_NV04(push, NV50_CP(BLOCK_ALLOC), 1); 273 PUSH_DATA (push, 1 << 16 | block_size); 274 BEGIN_NV04(push, NV50_CP(BLOCKDIM_LATCH), 1); 275 PUSH_DATA (push, 1); 276 BEGIN_NV04(push, NV50_CP(GRIDDIM), 1); 277 PUSH_DATA (push, info->grid[1] << 16 | info->grid[0]); 278 BEGIN_NV04(push, NV50_CP(GRIDID), 1); 279 PUSH_DATA (push, 1); 280 281 /* kernel launching */ 282 BEGIN_NV04(push, NV50_CP(LAUNCH), 1); 283 PUSH_DATA (push, 0); 284 BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1); 285 PUSH_DATA (push, 0); 286 287 /* bind a compute shader clobbers fragment shader state */ 288 nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG; 289 } 290