1 /* 2 * Copyright 2011-2013 Maarten Lankhorst 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include <nouveau.h> 24 25 #include "pipe/p_defines.h" 26 #include "vl/vl_video_buffer.h" 27 #include "util/u_video.h" 28 29 struct nouveau_vp3_video_buffer { 30 struct pipe_video_buffer base; 31 unsigned num_planes, valid_ref; 32 struct pipe_resource *resources[VL_NUM_COMPONENTS]; 33 struct pipe_sampler_view *sampler_view_planes[VL_NUM_COMPONENTS]; 34 struct pipe_sampler_view *sampler_view_components[VL_NUM_COMPONENTS]; 35 struct pipe_surface *surfaces[VL_NUM_COMPONENTS * 2]; 36 }; 37 38 #define SLICE_SIZE 0x200 39 #define VP_OFFSET 0x200 40 #define COMM_OFFSET 0x500 41 42 #define NOUVEAU_VP3_BSP_RESERVED_SIZE 0x700 43 44 #define NOUVEAU_VP3_DEBUG_FENCE 0 45 46 #if NOUVEAU_VP3_DEBUG_FENCE 47 # define NOUVEAU_VP3_VIDEO_QDEPTH 1 48 #else 49 # define NOUVEAU_VP3_VIDEO_QDEPTH 2 50 #endif 51 52 #define SUBC_BSP(m) dec->bsp_idx, (m) 53 #define SUBC_VP(m) dec->vp_idx, (m) 54 #define SUBC_PPP(m) dec->ppp_idx, (m) 55 56 union pipe_desc { 57 struct pipe_picture_desc *base; 58 struct pipe_mpeg12_picture_desc *mpeg12; 59 struct pipe_mpeg4_picture_desc *mpeg4; 60 struct pipe_vc1_picture_desc *vc1; 61 struct pipe_h264_picture_desc *h264; 62 }; 63 64 struct nouveau_vp3_decoder { 65 struct pipe_video_codec base; 66 struct nouveau_client *client; 67 struct nouveau_object *channel[3], *bsp, *vp, *ppp; 68 struct nouveau_pushbuf *pushbuf[3]; 69 70 #if NOUVEAU_VP3_DEBUG_FENCE 71 /* dump fence and comm, as needed.. */ 72 unsigned *fence_map; 73 struct comm *comm; 74 75 struct nouveau_bo *fence_bo; 76 #endif 77 78 struct nouveau_bo *fw_bo, *bitplane_bo; 79 80 // array size max_references + 2, contains unpostprocessed images 81 // added at the end of ref_bo is a tmp array 82 // tmp is an array for h264, with each member being used for a ref frame or current 83 // target.. size = (((mb(w)*((mb(h)+1)&~1))+3)>>2)<<8 * (max_references+1) 84 // for other codecs, it simply seems that size = w*h is enough 85 // unsure what it's supposed to contain.. 86 struct nouveau_bo *ref_bo; 87 88 struct nouveau_bo *inter_bo[2]; 89 90 struct nouveau_bo *bsp_bo[NOUVEAU_VP3_VIDEO_QDEPTH]; 91 92 // bo's used by each cycle: 93 94 // bsp_bo: contains raw bitstream data and parameters for BSP and VP. 95 // inter_bo: contains data shared between BSP and VP 96 // ref_bo: reference image data, used by PPP and VP 97 // bitplane_bo: contain bitplane data (similar to ref_bo), used by BSP only 98 // fw_bo: used by VP only. 99 100 // Needed amount of copies in optimal case: 101 // 2 copies of inter_bo, VP would process the last inter_bo, while BSP is 102 // writing out a new set. 103 // NOUVEAU_VP3_VIDEO_QDEPTH copies of bsp_bo. We don't want to block the 104 // pipeline ever, and give shaders a chance to run as well. 105 106 struct { 107 struct nouveau_vp3_video_buffer *vidbuf; 108 unsigned last_used; 109 unsigned field_pic_flag : 1; 110 unsigned decoded_top : 1; 111 unsigned decoded_bottom : 1; 112 unsigned decoded_first : 1; 113 } refs[17]; 114 unsigned fence_seq, fw_sizes, last_frame_num, tmp_stride, ref_stride; 115 116 unsigned bsp_idx, vp_idx, ppp_idx; 117 118 /* End of the bsp bo where new data should be appended between one begin/end 119 * frame. 120 */ 121 char *bsp_ptr; 122 }; 123 124 struct comm { 125 uint32_t bsp_cur_index; // 000 126 uint32_t byte_ofs; // 004 127 uint32_t status[0x10]; // 008 128 uint32_t pos[0x10]; // 048 129 uint8_t pad[0x100 - 0x88]; // 0a0 bool comm_encrypted 130 131 uint32_t pvp_cur_index; // 100 132 uint32_t acked_byte_ofs; // 104 133 uint32_t status_vp[0x10]; // 108 134 uint16_t mb_y[0x10]; //148 135 uint32_t pvp_stage; // 168 0xeeXX 136 uint16_t parse_endpos_index; // 16c 137 uint16_t irq_index; // 16e 138 uint8_t irq_470[0x10]; // 170 139 uint32_t irq_pos[0x10]; // 180 140 uint32_t parse_endpos[0x10]; // 1c0 141 }; 142 143 static inline uint32_t nouveau_vp3_video_align(uint32_t h) 144 { 145 return ((h+0x3f)&~0x3f); 146 }; 147 148 static inline uint32_t mb(uint32_t coord) 149 { 150 return (coord + 0xf)>>4; 151 } 152 153 static inline uint32_t mb_half(uint32_t coord) 154 { 155 return (coord + 0x1f)>>5; 156 } 157 158 static inline uint64_t 159 nouveau_vp3_video_addr(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *target) 160 { 161 uint64_t ret; 162 if (target) 163 ret = dec->ref_stride * target->valid_ref; 164 else 165 ret = dec->ref_stride * (dec->base.max_references+1); 166 return dec->ref_bo->offset + ret; 167 } 168 169 static inline void 170 nouveau_vp3_ycbcr_offsets(struct nouveau_vp3_decoder *dec, uint32_t *y2, 171 uint32_t *cbcr, uint32_t *cbcr2) 172 { 173 uint32_t w = mb(dec->base.width), size; 174 *y2 = mb_half(dec->base.height)*w; 175 *cbcr = *y2 * 2; 176 *cbcr2 = *cbcr + w * (nouveau_vp3_video_align(dec->base.height)>>6); 177 178 /* The check here should never fail because it means a bug 179 * in the code rather than a bug in hardware.. 180 */ 181 size = (2 * (*cbcr2 - *cbcr) + *cbcr) << 8; 182 if (size > dec->ref_stride) { 183 debug_printf("Overshot ref_stride (%u) with size %u and ofs (%u,%u,%u)\n", 184 dec->ref_stride, size, *y2<<8, *cbcr<<8, *cbcr2<<8); 185 *y2 = *cbcr = *cbcr2 = 0; 186 assert(size <= dec->ref_stride); 187 } 188 } 189 190 static inline void 191 nouveau_vp3_inter_sizes(struct nouveau_vp3_decoder *dec, uint32_t slice_count, 192 uint32_t *slice_size, uint32_t *bucket_size, 193 uint32_t *ring_size) 194 { 195 *slice_size = (SLICE_SIZE * slice_count)>>8; 196 if (u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_FORMAT_MPEG12) 197 *bucket_size = 0; 198 else 199 *bucket_size = mb(dec->base.width) * 3; 200 *ring_size = (dec->inter_bo[0]->size >> 8) - *bucket_size - *slice_size; 201 } 202 203 struct pipe_video_buffer * 204 nouveau_vp3_video_buffer_create(struct pipe_context *pipe, 205 const struct pipe_video_buffer *templat, 206 int flags); 207 208 void 209 nouveau_vp3_decoder_init_common(struct pipe_video_codec *decoder); 210 211 int 212 nouveau_vp3_load_firmware(struct nouveau_vp3_decoder *dec, 213 enum pipe_video_profile profile, 214 unsigned chipset); 215 216 void 217 nouveau_vp3_bsp_begin(struct nouveau_vp3_decoder *dec); 218 219 void 220 nouveau_vp3_bsp_next(struct nouveau_vp3_decoder *dec, unsigned num_buffers, 221 const void *const *data, const unsigned *num_bytes); 222 223 uint32_t 224 nouveau_vp3_bsp_end(struct nouveau_vp3_decoder *dec, union pipe_desc desc); 225 226 void 227 nouveau_vp3_vp_caps(struct nouveau_vp3_decoder *dec, union pipe_desc desc, 228 struct nouveau_vp3_video_buffer *target, unsigned comm_seq, 229 unsigned *caps, unsigned *is_ref, 230 struct nouveau_vp3_video_buffer *refs[16]); 231 232 int 233 nouveau_vp3_screen_get_video_param(struct pipe_screen *pscreen, 234 enum pipe_video_profile profile, 235 enum pipe_video_entrypoint entrypoint, 236 enum pipe_video_cap param); 237 238 boolean 239 nouveau_vp3_screen_video_supported(struct pipe_screen *screen, 240 enum pipe_format format, 241 enum pipe_video_profile profile, 242 enum pipe_video_entrypoint entrypoint); 243