Home | History | Annotate | Download | only in nv50
      1 /*
      2  * Copyright 2013 Ilia Mirkin
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice shall be included in
     12  * all copies or substantial portions of the Software.
     13  *
     14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20  * OTHER DEALINGS IN THE SOFTWARE.
     21  */
     22 
     23 #include "nv50/nv84_video.h"
     24 
     25 struct iparm {
     26    struct iseqparm {
     27       uint32_t chroma_format_idc; // 00
     28       uint32_t pad[(0x128 - 0x4) / 4];
     29       uint32_t log2_max_frame_num_minus4; // 128
     30       uint32_t pic_order_cnt_type; // 12c
     31       uint32_t log2_max_pic_order_cnt_lsb_minus4; // 130
     32       uint32_t delta_pic_order_always_zero_flag; // 134
     33       uint32_t num_ref_frames; // 138
     34       uint32_t pic_width_in_mbs_minus1; // 13c
     35       uint32_t pic_height_in_map_units_minus1; // 140
     36       uint32_t frame_mbs_only_flag; // 144
     37       uint32_t mb_adaptive_frame_field_flag; // 148
     38       uint32_t direct_8x8_inference_flag; // 14c
     39    } iseqparm; // 000
     40    struct ipicparm {
     41       uint32_t entropy_coding_mode_flag; // 00
     42       uint32_t pic_order_present_flag; // 04
     43       uint32_t num_slice_groups_minus1; // 08
     44       uint32_t slice_group_map_type; // 0c
     45       uint32_t pad1[0x60 / 4];
     46       uint32_t u70; // 70
     47       uint32_t u74; // 74
     48       uint32_t u78; // 78
     49       uint32_t num_ref_idx_l0_active_minus1; // 7c
     50       uint32_t num_ref_idx_l1_active_minus1; // 80
     51       uint32_t weighted_pred_flag; // 84
     52       uint32_t weighted_bipred_idc; // 88
     53       uint32_t pic_init_qp_minus26; // 8c
     54       uint32_t chroma_qp_index_offset; // 90
     55       uint32_t deblocking_filter_control_present_flag; // 94
     56       uint32_t constrained_intra_pred_flag; // 98
     57       uint32_t redundant_pic_cnt_present_flag; // 9c
     58       uint32_t transform_8x8_mode_flag; // a0
     59       uint32_t pad2[(0x1c8 - 0xa0 - 4) / 4];
     60       uint32_t second_chroma_qp_index_offset; // 1c8
     61       uint32_t u1cc; // 1cc
     62       uint32_t curr_pic_order_cnt; // 1d0
     63       uint32_t field_order_cnt[2]; // 1d4
     64       uint32_t curr_mvidx; // 1dc
     65       struct iref {
     66          uint32_t u00; // 00
     67          uint32_t field_is_ref; // 04 // bit0: top, bit1: bottom
     68          uint8_t is_long_term; // 08
     69          uint8_t non_existing; // 09
     70          uint8_t u0a; // 0a
     71          uint8_t u0b; // 0b
     72          uint32_t frame_idx; // 0c
     73          uint32_t field_order_cnt[2]; // 10
     74          uint32_t mvidx; // 18
     75          uint8_t field_pic_flag; // 1c
     76          uint8_t u1d; // 1d
     77          uint8_t u1e; // 1e
     78          uint8_t u1f; // 1f
     79          // 20
     80       } refs[0x10]; // 1e0
     81    } ipicparm; // 150
     82 };
     83 
     84 int
     85 nv84_decoder_bsp(struct nv84_decoder *dec,
     86                  struct pipe_h264_picture_desc *desc,
     87                  unsigned num_buffers,
     88                  const void *const *data,
     89                  const unsigned *num_bytes,
     90                  struct nv84_video_buffer *dest)
     91 {
     92    struct iparm params;
     93    uint32_t more_params[0x44 / 4] = {0};
     94    unsigned total_bytes = 0;
     95    int i;
     96    static const uint32_t end[] = {0x0b010000, 0, 0x0b010000, 0};
     97    char indexes[17] = {0};
     98    struct nouveau_pushbuf *push = dec->bsp_pushbuf;
     99    struct nouveau_pushbuf_refn bo_refs[] = {
    100       { dec->vpring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
    101       { dec->mbring, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
    102       { dec->bitstream, NOUVEAU_BO_RDWR | NOUVEAU_BO_GART },
    103       { dec->fence, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
    104    };
    105 
    106    nouveau_bo_wait(dec->fence, NOUVEAU_BO_RDWR, dec->client);
    107 
    108    STATIC_ASSERT(sizeof(struct iparm) == 0x530);
    109 
    110    memset(&params, 0, sizeof(params));
    111 
    112    dest->frame_num = dest->frame_num_max = desc->frame_num;
    113 
    114    for (i = 0; i < 16; i++) {
    115       struct iref *ref = &params.ipicparm.refs[i];
    116       struct nv84_video_buffer *frame = (struct nv84_video_buffer *)desc->ref[i];
    117       if (!frame) break;
    118       /* The frame index is relative to the last IDR frame. So once the frame
    119        * num goes back to 0, previous reference frames need to have a negative
    120        * index.
    121        */
    122       if (desc->frame_num >= frame->frame_num_max) {
    123          frame->frame_num_max = desc->frame_num;
    124       } else {
    125          frame->frame_num -= frame->frame_num_max + 1;
    126          frame->frame_num_max = desc->frame_num;
    127       }
    128       ref->non_existing = 0;
    129       ref->field_is_ref = (desc->top_is_reference[i] ? 1 : 0) |
    130          (desc->bottom_is_reference[i] ? 2 : 0);
    131       ref->is_long_term = desc->is_long_term[i];
    132       ref->field_order_cnt[0] = desc->field_order_cnt_list[i][0];
    133       ref->field_order_cnt[1] = desc->field_order_cnt_list[i][1];
    134       ref->frame_idx = frame->frame_num;
    135       ref->u00 = ref->mvidx = frame->mvidx;
    136       ref->field_pic_flag = desc->field_pic_flag;
    137       indexes[frame->mvidx] = 1;
    138    }
    139 
    140    /* Needs to be adjusted if we ever support non-4:2:0 videos */
    141    params.iseqparm.chroma_format_idc = 1;
    142 
    143    params.iseqparm.pic_width_in_mbs_minus1 = mb(dec->base.width) - 1;
    144    if (desc->field_pic_flag || desc->pps->sps->mb_adaptive_frame_field_flag)
    145       params.iseqparm.pic_height_in_map_units_minus1 = mb_half(dec->base.height) - 1;
    146    else
    147       params.iseqparm.pic_height_in_map_units_minus1 = mb(dec->base.height) - 1;
    148 
    149    if (desc->bottom_field_flag)
    150       params.ipicparm.curr_pic_order_cnt = desc->field_order_cnt[1];
    151    else
    152       params.ipicparm.curr_pic_order_cnt = desc->field_order_cnt[0];
    153    params.ipicparm.field_order_cnt[0] = desc->field_order_cnt[0];
    154    params.ipicparm.field_order_cnt[1] = desc->field_order_cnt[1];
    155    if (desc->is_reference) {
    156       if (dest->mvidx < 0) {
    157          for (i = 0; i < desc->num_ref_frames + 1; i++) {
    158             if (!indexes[i]) {
    159                dest->mvidx = i;
    160                break;
    161             }
    162          }
    163          assert(i != desc->num_ref_frames + 1);
    164       }
    165 
    166       params.ipicparm.u1cc = params.ipicparm.curr_mvidx = dest->mvidx;
    167    }
    168 
    169    params.iseqparm.num_ref_frames = desc->num_ref_frames;
    170    params.iseqparm.mb_adaptive_frame_field_flag = desc->pps->sps->mb_adaptive_frame_field_flag;
    171    params.ipicparm.constrained_intra_pred_flag = desc->pps->constrained_intra_pred_flag;
    172    params.ipicparm.weighted_pred_flag = desc->pps->weighted_pred_flag;
    173    params.ipicparm.weighted_bipred_idc = desc->pps->weighted_bipred_idc;
    174    params.iseqparm.frame_mbs_only_flag = desc->pps->sps->frame_mbs_only_flag;
    175    params.ipicparm.transform_8x8_mode_flag = desc->pps->transform_8x8_mode_flag;
    176    params.ipicparm.chroma_qp_index_offset = desc->pps->chroma_qp_index_offset;
    177    params.ipicparm.second_chroma_qp_index_offset = desc->pps->second_chroma_qp_index_offset;
    178    params.ipicparm.pic_init_qp_minus26 = desc->pps->pic_init_qp_minus26;
    179    params.ipicparm.num_ref_idx_l0_active_minus1 = desc->num_ref_idx_l0_active_minus1;
    180    params.ipicparm.num_ref_idx_l1_active_minus1 = desc->num_ref_idx_l1_active_minus1;
    181    params.iseqparm.log2_max_frame_num_minus4 = desc->pps->sps->log2_max_frame_num_minus4;
    182    params.iseqparm.pic_order_cnt_type = desc->pps->sps->pic_order_cnt_type;
    183    params.iseqparm.log2_max_pic_order_cnt_lsb_minus4 = desc->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
    184    params.iseqparm.delta_pic_order_always_zero_flag = desc->pps->sps->delta_pic_order_always_zero_flag;
    185    params.iseqparm.direct_8x8_inference_flag = desc->pps->sps->direct_8x8_inference_flag;
    186    params.ipicparm.entropy_coding_mode_flag = desc->pps->entropy_coding_mode_flag;
    187    params.ipicparm.pic_order_present_flag = desc->pps->bottom_field_pic_order_in_frame_present_flag;
    188    params.ipicparm.deblocking_filter_control_present_flag = desc->pps->deblocking_filter_control_present_flag;
    189    params.ipicparm.redundant_pic_cnt_present_flag = desc->pps->redundant_pic_cnt_present_flag;
    190 
    191    memcpy(dec->bitstream->map, &params, sizeof(params));
    192    for (i = 0; i < num_buffers; i++) {
    193       assert(total_bytes + num_bytes[i] < dec->bitstream->size / 2 - 0x700);
    194       memcpy(dec->bitstream->map + 0x700 + total_bytes, data[i], num_bytes[i]);
    195       total_bytes += num_bytes[i];
    196    }
    197    memcpy(dec->bitstream->map + 0x700 + total_bytes, end, sizeof(end));
    198    total_bytes += sizeof(end);
    199    more_params[1] = total_bytes;
    200    memcpy(dec->bitstream->map + 0x600, more_params, sizeof(more_params));
    201 
    202    PUSH_SPACE(push, 5 + 21 + 3 + 2 + 4 + 2);
    203    nouveau_pushbuf_refn(push, bo_refs, ARRAY_SIZE(bo_refs));
    204 
    205    /* Wait for the fence = 1 */
    206    BEGIN_NV04(push, SUBC_BSP(0x10), 4);
    207    PUSH_DATAh(push, dec->fence->offset);
    208    PUSH_DATA (push, dec->fence->offset);
    209    PUSH_DATA (push, 1);
    210    PUSH_DATA (push, 1);
    211 
    212    /* TODO: Use both halves of bitstream/vpring for alternating frames */
    213 
    214    /* Kick off the BSP */
    215    BEGIN_NV04(push, SUBC_BSP(0x400), 20);
    216    PUSH_DATA (push, dec->bitstream->offset >> 8);
    217    PUSH_DATA (push, (dec->bitstream->offset >> 8) + 7);
    218    PUSH_DATA (push, dec->bitstream->size / 2 - 0x700);
    219    PUSH_DATA (push, (dec->bitstream->offset >> 8) + 6);
    220    PUSH_DATA (push, 1);
    221    PUSH_DATA (push, dec->mbring->offset >> 8);
    222    PUSH_DATA (push, dec->frame_size);
    223    PUSH_DATA (push, (dec->mbring->offset + dec->frame_size) >> 8);
    224    PUSH_DATA (push, dec->vpring->offset >> 8);
    225    PUSH_DATA (push, dec->vpring->size / 2);
    226    PUSH_DATA (push, dec->vpring_residual);
    227    PUSH_DATA (push, dec->vpring_ctrl);
    228    PUSH_DATA (push, 0);
    229    PUSH_DATA (push, dec->vpring_residual);
    230    PUSH_DATA (push, dec->vpring_residual + dec->vpring_ctrl);
    231    PUSH_DATA (push, dec->vpring_deblock);
    232    PUSH_DATA (push, (dec->vpring->offset + dec->vpring_ctrl +
    233                      dec->vpring_residual + dec->vpring_deblock) >> 8);
    234    PUSH_DATA (push, 0x654321);
    235    PUSH_DATA (push, 0);
    236    PUSH_DATA (push, 0x100008);
    237 
    238    BEGIN_NV04(push, SUBC_BSP(0x620), 2);
    239    PUSH_DATA (push, 0);
    240    PUSH_DATA (push, 0);
    241 
    242    BEGIN_NV04(push, SUBC_BSP(0x300), 1);
    243    PUSH_DATA (push, 0);
    244 
    245    /* Write fence = 2, intr */
    246    BEGIN_NV04(push, SUBC_BSP(0x610), 3);
    247    PUSH_DATAh(push, dec->fence->offset);
    248    PUSH_DATA (push, dec->fence->offset);
    249    PUSH_DATA (push, 2);
    250 
    251    BEGIN_NV04(push, SUBC_BSP(0x304), 1);
    252    PUSH_DATA (push, 0x101);
    253    PUSH_KICK (push);
    254    return 0;
    255 }
    256