1 /* 2 * Copyright 2013 Ilia Mirkin 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include "nv50/nv84_video.h" 24 25 struct iparm { 26 struct iseqparm { 27 uint32_t chroma_format_idc; // 00 28 uint32_t pad[(0x128 - 0x4) / 4]; 29 uint32_t log2_max_frame_num_minus4; // 128 30 uint32_t pic_order_cnt_type; // 12c 31 uint32_t log2_max_pic_order_cnt_lsb_minus4; // 130 32 uint32_t delta_pic_order_always_zero_flag; // 134 33 uint32_t num_ref_frames; // 138 34 uint32_t pic_width_in_mbs_minus1; // 13c 35 uint32_t pic_height_in_map_units_minus1; // 140 36 uint32_t frame_mbs_only_flag; // 144 37 uint32_t mb_adaptive_frame_field_flag; // 148 38 uint32_t direct_8x8_inference_flag; // 14c 39 } iseqparm; // 000 40 struct ipicparm { 41 uint32_t entropy_coding_mode_flag; // 00 42 uint32_t pic_order_present_flag; // 04 43 uint32_t num_slice_groups_minus1; // 08 44 uint32_t slice_group_map_type; // 0c 45 uint32_t pad1[0x60 / 4]; 46 uint32_t u70; // 70 47 uint32_t u74; // 74 48 uint32_t u78; // 78 49 uint32_t num_ref_idx_l0_active_minus1; // 7c 50 uint32_t num_ref_idx_l1_active_minus1; // 80 51 uint32_t weighted_pred_flag; // 84 52 uint32_t weighted_bipred_idc; // 88 53 uint32_t pic_init_qp_minus26; // 8c 54 uint32_t chroma_qp_index_offset; // 90 55 uint32_t deblocking_filter_control_present_flag; // 94 56 uint32_t constrained_intra_pred_flag; // 98 57 uint32_t redundant_pic_cnt_present_flag; // 9c 58 uint32_t transform_8x8_mode_flag; // a0 59 uint32_t pad2[(0x1c8 - 0xa0 - 4) / 4]; 60 uint32_t second_chroma_qp_index_offset; // 1c8 61 uint32_t u1cc; // 1cc 62 uint32_t curr_pic_order_cnt; // 1d0 63 uint32_t field_order_cnt[2]; // 1d4 64 uint32_t curr_mvidx; // 1dc 65 struct iref { 66 uint32_t u00; // 00 67 uint32_t field_is_ref; // 04 // bit0: top, bit1: bottom 68 uint8_t is_long_term; // 08 69 uint8_t non_existing; // 09 70 uint8_t u0a; // 0a 71 uint8_t u0b; // 0b 72 uint32_t frame_idx; // 0c 73 uint32_t field_order_cnt[2]; // 10 74 uint32_t mvidx; // 18 75 uint8_t field_pic_flag; // 1c 76 uint8_t u1d; // 1d 77 uint8_t u1e; // 1e 78 uint8_t u1f; // 1f 79 // 20 80 } refs[0x10]; // 1e0 81 } ipicparm; // 150 82 }; 83 84 int 85 nv84_decoder_bsp(struct nv84_decoder *dec, 86 struct pipe_h264_picture_desc *desc, 87 unsigned num_buffers, 88 const void *const *data, 89 const unsigned *num_bytes, 90 struct nv84_video_buffer *dest) 91 { 92 struct iparm params; 93 uint32_t more_params[0x44 / 4] = {0}; 94 unsigned total_bytes = 0; 95 int i; 96 static const uint32_t end[] = {0x0b010000, 0, 0x0b010000, 0}; 97 char indexes[17] = {0}; 98 struct nouveau_pushbuf *push = dec->bsp_pushbuf; 99 struct nouveau_pushbuf_refn bo_refs[] = { 100 { dec->vpring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, 101 { dec->mbring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, 102 { dec->bitstream, NOUVEAU_BO_RDWR | NOUVEAU_BO_GART }, 103 { dec->fence, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM }, 104 }; 105 106 nouveau_bo_wait(dec->fence, NOUVEAU_BO_RDWR, dec->client); 107 108 STATIC_ASSERT(sizeof(struct iparm) == 0x530); 109 110 memset(¶ms, 0, sizeof(params)); 111 112 dest->frame_num = dest->frame_num_max = desc->frame_num; 113 114 for (i = 0; i < 16; i++) { 115 struct iref *ref = ¶ms.ipicparm.refs[i]; 116 struct nv84_video_buffer *frame = (struct nv84_video_buffer *)desc->ref[i]; 117 if (!frame) break; 118 /* The frame index is relative to the last IDR frame. So once the frame 119 * num goes back to 0, previous reference frames need to have a negative 120 * index. 121 */ 122 if (desc->frame_num >= frame->frame_num_max) { 123 frame->frame_num_max = desc->frame_num; 124 } else { 125 frame->frame_num -= frame->frame_num_max + 1; 126 frame->frame_num_max = desc->frame_num; 127 } 128 ref->non_existing = 0; 129 ref->field_is_ref = (desc->top_is_reference[i] ? 1 : 0) | 130 (desc->bottom_is_reference[i] ? 2 : 0); 131 ref->is_long_term = desc->is_long_term[i]; 132 ref->field_order_cnt[0] = desc->field_order_cnt_list[i][0]; 133 ref->field_order_cnt[1] = desc->field_order_cnt_list[i][1]; 134 ref->frame_idx = frame->frame_num; 135 ref->u00 = ref->mvidx = frame->mvidx; 136 ref->field_pic_flag = desc->field_pic_flag; 137 indexes[frame->mvidx] = 1; 138 } 139 140 /* Needs to be adjusted if we ever support non-4:2:0 videos */ 141 params.iseqparm.chroma_format_idc = 1; 142 143 params.iseqparm.pic_width_in_mbs_minus1 = mb(dec->base.width) - 1; 144 if (desc->field_pic_flag || desc->pps->sps->mb_adaptive_frame_field_flag) 145 params.iseqparm.pic_height_in_map_units_minus1 = mb_half(dec->base.height) - 1; 146 else 147 params.iseqparm.pic_height_in_map_units_minus1 = mb(dec->base.height) - 1; 148 149 if (desc->bottom_field_flag) 150 params.ipicparm.curr_pic_order_cnt = desc->field_order_cnt[1]; 151 else 152 params.ipicparm.curr_pic_order_cnt = desc->field_order_cnt[0]; 153 params.ipicparm.field_order_cnt[0] = desc->field_order_cnt[0]; 154 params.ipicparm.field_order_cnt[1] = desc->field_order_cnt[1]; 155 if (desc->is_reference) { 156 if (dest->mvidx < 0) { 157 for (i = 0; i < desc->num_ref_frames + 1; i++) { 158 if (!indexes[i]) { 159 dest->mvidx = i; 160 break; 161 } 162 } 163 assert(i != desc->num_ref_frames + 1); 164 } 165 166 params.ipicparm.u1cc = params.ipicparm.curr_mvidx = dest->mvidx; 167 } 168 169 params.iseqparm.num_ref_frames = desc->num_ref_frames; 170 params.iseqparm.mb_adaptive_frame_field_flag = desc->pps->sps->mb_adaptive_frame_field_flag; 171 params.ipicparm.constrained_intra_pred_flag = desc->pps->constrained_intra_pred_flag; 172 params.ipicparm.weighted_pred_flag = desc->pps->weighted_pred_flag; 173 params.ipicparm.weighted_bipred_idc = desc->pps->weighted_bipred_idc; 174 params.iseqparm.frame_mbs_only_flag = desc->pps->sps->frame_mbs_only_flag; 175 params.ipicparm.transform_8x8_mode_flag = desc->pps->transform_8x8_mode_flag; 176 params.ipicparm.chroma_qp_index_offset = desc->pps->chroma_qp_index_offset; 177 params.ipicparm.second_chroma_qp_index_offset = desc->pps->second_chroma_qp_index_offset; 178 params.ipicparm.pic_init_qp_minus26 = desc->pps->pic_init_qp_minus26; 179 params.ipicparm.num_ref_idx_l0_active_minus1 = desc->num_ref_idx_l0_active_minus1; 180 params.ipicparm.num_ref_idx_l1_active_minus1 = desc->num_ref_idx_l1_active_minus1; 181 params.iseqparm.log2_max_frame_num_minus4 = desc->pps->sps->log2_max_frame_num_minus4; 182 params.iseqparm.pic_order_cnt_type = desc->pps->sps->pic_order_cnt_type; 183 params.iseqparm.log2_max_pic_order_cnt_lsb_minus4 = desc->pps->sps->log2_max_pic_order_cnt_lsb_minus4; 184 params.iseqparm.delta_pic_order_always_zero_flag = desc->pps->sps->delta_pic_order_always_zero_flag; 185 params.iseqparm.direct_8x8_inference_flag = desc->pps->sps->direct_8x8_inference_flag; 186 params.ipicparm.entropy_coding_mode_flag = desc->pps->entropy_coding_mode_flag; 187 params.ipicparm.pic_order_present_flag = desc->pps->bottom_field_pic_order_in_frame_present_flag; 188 params.ipicparm.deblocking_filter_control_present_flag = desc->pps->deblocking_filter_control_present_flag; 189 params.ipicparm.redundant_pic_cnt_present_flag = desc->pps->redundant_pic_cnt_present_flag; 190 191 memcpy(dec->bitstream->map, ¶ms, sizeof(params)); 192 for (i = 0; i < num_buffers; i++) { 193 assert(total_bytes + num_bytes[i] < dec->bitstream->size / 2 - 0x700); 194 memcpy(dec->bitstream->map + 0x700 + total_bytes, data[i], num_bytes[i]); 195 total_bytes += num_bytes[i]; 196 } 197 memcpy(dec->bitstream->map + 0x700 + total_bytes, end, sizeof(end)); 198 total_bytes += sizeof(end); 199 more_params[1] = total_bytes; 200 memcpy(dec->bitstream->map + 0x600, more_params, sizeof(more_params)); 201 202 PUSH_SPACE(push, 5 + 21 + 3 + 2 + 4 + 2); 203 nouveau_pushbuf_refn(push, bo_refs, ARRAY_SIZE(bo_refs)); 204 205 /* Wait for the fence = 1 */ 206 BEGIN_NV04(push, SUBC_BSP(0x10), 4); 207 PUSH_DATAh(push, dec->fence->offset); 208 PUSH_DATA (push, dec->fence->offset); 209 PUSH_DATA (push, 1); 210 PUSH_DATA (push, 1); 211 212 /* TODO: Use both halves of bitstream/vpring for alternating frames */ 213 214 /* Kick off the BSP */ 215 BEGIN_NV04(push, SUBC_BSP(0x400), 20); 216 PUSH_DATA (push, dec->bitstream->offset >> 8); 217 PUSH_DATA (push, (dec->bitstream->offset >> 8) + 7); 218 PUSH_DATA (push, dec->bitstream->size / 2 - 0x700); 219 PUSH_DATA (push, (dec->bitstream->offset >> 8) + 6); 220 PUSH_DATA (push, 1); 221 PUSH_DATA (push, dec->mbring->offset >> 8); 222 PUSH_DATA (push, dec->frame_size); 223 PUSH_DATA (push, (dec->mbring->offset + dec->frame_size) >> 8); 224 PUSH_DATA (push, dec->vpring->offset >> 8); 225 PUSH_DATA (push, dec->vpring->size / 2); 226 PUSH_DATA (push, dec->vpring_residual); 227 PUSH_DATA (push, dec->vpring_ctrl); 228 PUSH_DATA (push, 0); 229 PUSH_DATA (push, dec->vpring_residual); 230 PUSH_DATA (push, dec->vpring_residual + dec->vpring_ctrl); 231 PUSH_DATA (push, dec->vpring_deblock); 232 PUSH_DATA (push, (dec->vpring->offset + dec->vpring_ctrl + 233 dec->vpring_residual + dec->vpring_deblock) >> 8); 234 PUSH_DATA (push, 0x654321); 235 PUSH_DATA (push, 0); 236 PUSH_DATA (push, 0x100008); 237 238 BEGIN_NV04(push, SUBC_BSP(0x620), 2); 239 PUSH_DATA (push, 0); 240 PUSH_DATA (push, 0); 241 242 BEGIN_NV04(push, SUBC_BSP(0x300), 1); 243 PUSH_DATA (push, 0); 244 245 /* Write fence = 2, intr */ 246 BEGIN_NV04(push, SUBC_BSP(0x610), 3); 247 PUSH_DATAh(push, dec->fence->offset); 248 PUSH_DATA (push, dec->fence->offset); 249 PUSH_DATA (push, 2); 250 251 BEGIN_NV04(push, SUBC_BSP(0x304), 1); 252 PUSH_DATA (push, 0x101); 253 PUSH_KICK (push); 254 return 0; 255 } 256