Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2015 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 
     21 /**
     22 *******************************************************************************
     23 * @file
     24 *  ih264e_intra_modes_eval.h
     25 *
     26 * @brief
     27 *  This file contains declarations of routines that perform rate distortion
     28 *  analysis on a macroblock if coded as intra.
     29 *
     30 * @author
     31 *  ittiam
     32 *
     33 * @remarks
     34 *  none
     35 *
     36 *******************************************************************************
     37 */
     38 
     39 #ifndef IH264E_INTRA_MODES_EVAL_H_
     40 #define IH264E_INTRA_MODES_EVAL_H_
     41 
     42 /*****************************************************************************/
     43 /* Extern Function Declarations                                              */
     44 /*****************************************************************************/
     45 
     46 /**
     47 ******************************************************************************
     48 *
     49 * @brief
     50 *  derivation process for macroblock availability
     51 *
     52 * @par   Description
     53 *  Calculates the availability of the left, top, topright and topleft macroblocks.
     54 *
     55 * @param[in] ps_proc_ctxt
     56 *  pointer to proc context (handle)
     57 *
     58 * @remarks Based on section 6.4.5 in H264 spec
     59 *
     60 * @return  none
     61 *
     62 ******************************************************************************
     63 */
     64 void ih264e_derive_nghbr_avbl_of_mbs
     65         (
     66             process_ctxt_t *ps_proc_ctxt
     67         );
     68 
     69 /**
     70 ******************************************************************************
     71 *
     72 * @brief
     73 *  derivation process for subblock/partition availability
     74 *
     75 * @par   Description
     76 *  Calculates the availability of the left, top, topright and topleft subblock
     77 *  or partitions.
     78 *
     79 * @param[in]    ps_proc_ctxt
     80 *  pointer to macroblock context (handle)
     81 *
     82 * @param[in]    i1_pel_pos_x
     83 *  column position of the pel wrt the current block
     84 *
     85 * @param[in]    i1_pel_pos_y
     86 *  row position of the pel in wrt current block
     87 *
     88 * @remarks     Assumptions: before calling this function it is assumed that
     89 *   the neighbor availability of the current macroblock is already derived.
     90 *   Based on table 6-3 of H264 specification
     91 *
     92 * @return      availability status (yes or no)
     93 *
     94 ******************************************************************************
     95 */
     96 UWORD8 ih264e_derive_ngbr_avbl_of_mb_partitions
     97         (
     98             block_neighbors_t *s_ngbr_avbl,
     99             WORD8 i1_pel_pos_x,
    100             WORD8 i1_pel_pos_y
    101         );
    102 
    103 /**
    104 ******************************************************************************
    105 *
    106 * @brief
    107 *  evaluate best intra 16x16 mode (rate distortion opt off)
    108 *
    109 * @par Description
    110 *  This function evaluates all the possible intra 16x16 modes and finds the mode
    111 *  that best represents the macro-block (least distortion) and occupies fewer
    112 *  bits in the bit-stream.
    113 *
    114 * @param[in]   ps_proc_ctxt
    115 *  pointer to process context (handle)
    116 *
    117 * @remarks
    118 *  Ideally the cost of encoding a macroblock is calculated as
    119 *  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
    120 *  input block and the reconstructed block and rate is the number of bits taken
    121 *  to place the macroblock in the bit-stream. In this routine the rate does not
    122 *  exactly point to the total number of bits it takes, rather it points to header
    123 *  bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
    124 *  and residual bits fall in to texture bits the number of bits taken to encoding
    125 *  mbtype is considered as rate, we compute cost. Further we will approximate
    126 *  the distortion as the deviation b/w input and the predicted block as opposed
    127 *  to input and reconstructed block.
    128 *
    129 *  NOTE: As per the Document JVT-O079, for intra 16x16 macroblock,
    130 *  the SAD and cost are one and the same.
    131 *
    132 * @return     none
    133 *
    134 ******************************************************************************
    135 */
    136 void ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff
    137         (
    138             process_ctxt_t *ps_proc_ctxt
    139         );
    140 
    141 /**
    142 ******************************************************************************
    143 *
    144 * @brief
    145 *  evaluate best intra 8x8 mode (rate distortion opt on)
    146 *
    147 * @par Description
    148 *  This function evaluates all the possible intra 8x8 modes and finds the mode
    149 *  that best represents the macro-block (least distortion) and occupies fewer
    150 *  bits in the bit-stream.
    151 *
    152 * @param[in]    ps_proc_ctxt
    153 *  pointer to proc ctxt
    154 *
    155 * @remarks Ideally the cost of encoding a macroblock is calculated as
    156 *  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
    157 *  input block and the reconstructed block and rate is the number of bits taken
    158 *  to place the macroblock in the bit-stream. In this routine the rate does not
    159 *  exactly point to the total number of bits it takes, rather it points to header
    160 *  bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
    161 *  and residual bits fall in to texture bits the number of bits taken to encoding
    162 *  mbtype is considered as rate, we compute cost. Further we will approximate
    163 *  the distortion as the deviation b/w input and the predicted block as opposed
    164 *  to input and reconstructed block.
    165 *
    166 *  NOTE: TODO: This function needs to be tested
    167 *
    168 *  @return      none
    169 *
    170 ******************************************************************************
    171 */
    172 void ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff
    173         (
    174             process_ctxt_t *ps_proc_ctxt
    175         );
    176 
    177 /**
    178 ******************************************************************************
    179 *
    180 * @brief
    181 *  evaluate best intra 4x4 mode (rate distortion opt on)
    182 *
    183 * @par Description
    184 *  This function evaluates all the possible intra 4x4 modes and finds the mode
    185 *  that best represents the macro-block (least distortion) and occupies fewer
    186 *  bits in the bit-stream.
    187 *
    188 * @param[in]    ps_proc_ctxt
    189 *  pointer to proc ctxt
    190 *
    191 * @remarks
    192 *  Ideally the cost of encoding a macroblock is calculated as
    193 *  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
    194 *  input block and the reconstructed block and rate is the number of bits taken
    195 *  to place the macroblock in the bit-stream. In this routine the rate does not
    196 *  exactly point to the total number of bits it takes, rather it points to header
    197 *  bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
    198 *  and residual bits fall in to texture bits the number of bits taken to encoding
    199 *  mbtype is considered as rate, we compute cost. Further we will approximate
    200 *  the distortion as the deviation b/w input and the predicted block as opposed
    201 *  to input and reconstructed block.
    202 *
    203 *  NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
    204 *  24*lambda is added to the SAD before comparison with the best SAD for
    205 *  inter prediction. This is an empirical value to prevent using too many intra
    206 *  blocks.
    207 *
    208 * @return      none
    209 *
    210 ******************************************************************************
    211 */
    212 void ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton
    213         (
    214             process_ctxt_t *ps_proc_ctxt
    215         );
    216 
    217 /**
    218 ******************************************************************************
    219 *
    220 * @brief
    221 *  evaluate best intra 4x4 mode (rate distortion opt off)
    222 *
    223 * @par Description
    224 *  This function evaluates all the possible intra 4x4 modes and finds the mode
    225 *  that best represents the macro-block (least distortion) and occupies fewer
    226 *  bits in the bit-stream.
    227 *
    228 * @param[in]    ps_proc_ctxt
    229 *  pointer to proc ctxt
    230 *
    231 * @remarks
    232 *  Ideally the cost of encoding a macroblock is calculated as
    233 *  (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
    234 *  input block and the reconstructed block and rate is the number of bits taken
    235 *  to place the macroblock in the bit-stream. In this routine the rate does not
    236 *  exactly point to the total number of bits it takes, rather it points to header
    237 *  bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
    238 *  and residual bits fall in to texture bits the number of bits taken to encoding
    239 *  mbtype is considered as rate, we compute cost. Further we will approximate
    240 *  the distortion as the deviation b/w input and the predicted block as opposed
    241 *  to input and reconstructed block.
    242 *
    243 *  NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
    244 *  24*lambda is added to the SAD before comparison with the best SAD for
    245 *  inter prediction. This is an empirical value to prevent using too many intra
    246 *  blocks.
    247 *
    248 * @return      none
    249 *
    250 ******************************************************************************
    251 */
    252 void ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff
    253         (
    254             process_ctxt_t *ps_proc_ctxt
    255         );
    256 
    257 /**
    258 ******************************************************************************
    259 *
    260 * @brief
    261 *  evaluate best chroma intra 8x8 mode (rate distortion opt off)
    262 *
    263 * @par Description
    264 *  This function evaluates all the possible chroma intra 8x8 modes and finds
    265 *  the mode that best represents the macroblock (least distortion) and occupies
    266 *  fewer bits in the bitstream.
    267 *
    268 * @param[in] ps_proc_ctxt
    269 *  pointer to macroblock context (handle)
    270 *
    271 * @remarks
    272 *  For chroma best intra pred mode is calculated based only on SAD
    273 *
    274 * @returns none
    275 *
    276 ******************************************************************************
    277 */
    278 void ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff
    279         (
    280             process_ctxt_t *ps_proc_ctxt
    281         );
    282 
    283 
    284 /**
    285 ******************************************************************************
    286 *
    287 * @brief
    288 *  Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the
    289 *  prediction.
    290 *
    291 * @par Description
    292 *  This function evaluates first three 16x16 modes and compute corresponding sad
    293 *  and return the buffer predicted with best mode.
    294 *
    295 * @param[in] pu1_src
    296 *  UWORD8 pointer to the source
    297 *
    298 * @param[in] pu1_ngbr_pels_i16
    299 *  UWORD8 pointer to neighbouring pels
    300 *
    301 * @param[out] pu1_dst
    302 *  UWORD8 pointer to the destination
    303 *
    304 * @param[in] src_strd
    305 *  integer source stride
    306 *
    307 * @param[in] dst_strd
    308 *  integer destination stride
    309 *
    310 * @param[in] u4_n_avblty
    311 *  availability of neighbouring pixels
    312 *
    313 * @param[in] u4_intra_mode
    314 *  Pointer to the variable in which best mode is returned
    315 *
    316 * @param[in] pu4_sadmin
    317 *  Pointer to the variable in which minimum sad is returned
    318 *
    319 * @param[in] u4_valid_intra_modes
    320 *  Says what all modes are valid
    321 *
    322 * @returns      none
    323 *
    324 ******************************************************************************
    325 */
    326 typedef void ih264e_evaluate_intra_modes_ft(UWORD8 *pu1_src,
    327                                             UWORD8 *pu1_ngbr_pels_i16,
    328                                             UWORD8 *pu1_dst,
    329                                             UWORD32 src_strd,
    330                                             UWORD32 dst_strd,
    331                                             WORD32 u4_n_avblty,
    332                                             UWORD32 *u4_intra_mode,
    333                                             WORD32 *pu4_sadmin,
    334                                             UWORD32 u4_valid_intra_modes);
    335 
    336 ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra16x16_modes;
    337 ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra_chroma_modes;
    338 
    339 /* assembly */
    340 ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra16x16_modes_a9q;
    341 ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra_chroma_modes_a9q;
    342 
    343 ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra16x16_modes_av8;
    344 ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra_chroma_modes_av8;
    345 
    346 /* x86 intrinsics */
    347 ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra16x16_modes_ssse3;
    348 ih264e_evaluate_intra_modes_ft ih264e_evaluate_intra_chroma_modes_ssse3;
    349 
    350 /**
    351 ******************************************************************************
    352 *
    353 * @brief
    354 *  Evaluate best intra 4x4 mode and perform prediction.
    355 *
    356 * @par Description
    357 *  This function evaluates  4x4 modes and compute corresponding sad
    358 *  and return the buffer predicted with best mode.
    359 *
    360 * @param[in] pu1_src
    361 *  UWORD8 pointer to the source
    362 *
    363 * @param[in] pu1_ngbr_pels
    364 *  UWORD8 pointer to neighbouring pels
    365 *
    366 * @param[out] pu1_dst
    367 *  UWORD8 pointer to the destination
    368 *
    369 * @param[in] src_strd
    370 *  integer source stride
    371 *
    372 * @param[in] dst_strd
    373 *  integer destination stride
    374 *
    375 * @param[in] u4_n_avblty
    376 *  availability of neighbouring pixels
    377 *
    378 * @param[in] u4_intra_mode
    379 *  Pointer to the variable in which best mode is returned
    380 *
    381 * @param[in] pu4_sadmin
    382 *  Pointer to the variable in which minimum cost is returned
    383 *
    384 * @param[in] u4_valid_intra_modes
    385 *  Says what all modes are valid
    386 *
    387 * @param[in] u4_lambda
    388 *  Lamda value for computing cost from SAD
    389 *
    390 * @param[in] u4_predictd_mode
    391 *  Predicted mode for cost computation
    392 *
    393 * @returns      none
    394 *
    395 ******************************************************************************
    396 */
    397 typedef void ih264e_evaluate_intra_4x4_modes_ft(UWORD8 *pu1_src,
    398                                                 UWORD8 *pu1_ngbr_pels,
    399                                                 UWORD8 *pu1_dst,
    400                                                 UWORD32 src_strd,
    401                                                 UWORD32 dst_strd,
    402                                                 WORD32 u4_n_avblty,
    403                                                 UWORD32 *u4_intra_mode,
    404                                                 WORD32 *pu4_sadmin,
    405                                                 UWORD32 u4_valid_intra_modes,
    406                                                 UWORD32  u4_lambda,
    407                                                 UWORD32 u4_predictd_mode);
    408 
    409 ih264e_evaluate_intra_4x4_modes_ft ih264e_evaluate_intra_4x4_modes;
    410 
    411 /* x86 intrinsics */
    412 ih264e_evaluate_intra_4x4_modes_ft ih264e_evaluate_intra_4x4_modes_ssse3;
    413 
    414 /* assembly */
    415 ih264e_evaluate_intra_4x4_modes_ft ih264e_evaluate_intra_4x4_modes_a9q;
    416 ih264e_evaluate_intra_4x4_modes_ft ih264e_evaluate_intra_4x4_modes_av8;
    417 
    418 #endif /* IH264E_INTRA_MODES_EVAL_H_ */
    419