Home | History | Annotate | Download | only in decoder
      1 /******************************************************************************
      2 *
      3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 *
      5 * Licensed under the Apache License, Version 2.0 (the "License");
      6 * you may not use this file except in compliance with the License.
      7 * You may obtain a copy of the License at:
      8 *
      9 * http://www.apache.org/licenses/LICENSE-2.0
     10 *
     11 * Unless required by applicable law or agreed to in writing, software
     12 * distributed under the License is distributed on an "AS IS" BASIS,
     13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 * See the License for the specific language governing permissions and
     15 * limitations under the License.
     16 *
     17 ******************************************************************************/
     18 /**
     19 *******************************************************************************
     20 * @file
     21 *  ihevcd_fmt_conv.c
     22 *
     23 * @brief
     24 *  Contains functions for format conversion or frame copy of output buffer
     25 *
     26 * @author
     27 *  Harish
     28 *
     29 * @par List of Functions:
     30 *
     31 * @remarks
     32 *  None
     33 *
     34 *******************************************************************************
     35 */
     36 /*****************************************************************************/
     37 /* File Includes                                                             */
     38 /*****************************************************************************/
     39 #include <stdio.h>
     40 #include <stddef.h>
     41 #include <stdlib.h>
     42 #include <string.h>
     43 #include <assert.h>
     44 
     45 #include "ihevc_typedefs.h"
     46 #include "iv.h"
     47 #include "ivd.h"
     48 #include "ihevcd_cxa.h"
     49 #include "ithread.h"
     50 
     51 #include "ihevc_defs.h"
     52 #include "ihevc_debug.h"
     53 #include "ihevc_structs.h"
     54 #include "ihevc_macros.h"
     55 #include "ihevc_platform_macros.h"
     56 #include "ihevc_cabac_tables.h"
     57 #include "ihevc_disp_mgr.h"
     58 
     59 #include "ihevcd_defs.h"
     60 #include "ihevcd_function_selector.h"
     61 #include "ihevcd_structs.h"
     62 #include "ihevcd_error.h"
     63 #include "ihevcd_nal.h"
     64 #include "ihevcd_bitstream.h"
     65 #include "ihevcd_fmt_conv.h"
     66 #include "ihevcd_profile.h"
     67 
     68 /**
     69 *******************************************************************************
     70 *
     71 * @brief Function used from copying a 420SP buffer
     72 *
     73 * @par   Description
     74 * Function used from copying a 420SP buffer
     75 *
     76 * @param[in] pu1_y_src
     77 *   Input Y pointer
     78 *
     79 * @param[in] pu1_uv_src
     80 *   Input UV pointer (UV is interleaved either in UV or VU format)
     81 *
     82 * @param[in] pu1_y_dst
     83 *   Output Y pointer
     84 *
     85 * @param[in] pu1_uv_dst
     86 *   Output UV pointer (UV is interleaved in the same format as that of input)
     87 *
     88 * @param[in] wd
     89 *   Width
     90 *
     91 * @param[in] ht
     92 *   Height
     93 *
     94 * @param[in] src_y_strd
     95 *   Input Y Stride
     96 *
     97 * @param[in] src_uv_strd
     98 *   Input UV stride
     99 *
    100 * @param[in] dst_y_strd
    101 *   Output Y stride
    102 *
    103 * @param[in] dst_uv_strd
    104 *   Output UV stride
    105 *
    106 * @returns None
    107 *
    108 * @remarks In case there is a need to perform partial frame copy then
    109 * by passion appropriate source and destination pointers and appropriate
    110 * values for wd and ht it can be done
    111 *
    112 *******************************************************************************
    113 */
    114 void ihevcd_fmt_conv_420sp_to_rgb565(UWORD8 *pu1_y_src,
    115                                      UWORD8 *pu1_uv_src,
    116                                      UWORD16 *pu2_rgb_dst,
    117                                      WORD32 wd,
    118                                      WORD32 ht,
    119                                      WORD32 src_y_strd,
    120                                      WORD32 src_uv_strd,
    121                                      WORD32 dst_strd,
    122                                      WORD32 is_u_first)
    123 {
    124 
    125 
    126     WORD16  i2_r, i2_g, i2_b;
    127     UWORD32  u4_r, u4_g, u4_b;
    128     WORD16  i2_i, i2_j;
    129     UWORD8  *pu1_y_src_nxt;
    130     UWORD16 *pu2_rgb_dst_NextRow;
    131 
    132     UWORD8 *pu1_u_src, *pu1_v_src;
    133 
    134     if(is_u_first)
    135     {
    136         pu1_u_src = (UWORD8 *)pu1_uv_src;
    137         pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
    138     }
    139     else
    140     {
    141         pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
    142         pu1_v_src = (UWORD8 *)pu1_uv_src;
    143     }
    144 
    145     pu1_y_src_nxt   = pu1_y_src + src_y_strd;
    146     pu2_rgb_dst_NextRow = pu2_rgb_dst + dst_strd;
    147 
    148     for(i2_i = 0; i2_i < (ht >> 1); i2_i++)
    149     {
    150         for(i2_j = (wd >> 1); i2_j > 0; i2_j--)
    151         {
    152             i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
    153             i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3) >> 13;
    154             i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
    155 
    156             pu1_u_src += 2;
    157             pu1_v_src += 2;
    158             /* pixel 0 */
    159             /* B */
    160             u4_b = CLIP_U8(*pu1_y_src + i2_b);
    161             u4_b >>= 3;
    162             /* G */
    163             u4_g = CLIP_U8(*pu1_y_src + i2_g);
    164             u4_g >>= 2;
    165             /* R */
    166             u4_r = CLIP_U8(*pu1_y_src + i2_r);
    167             u4_r >>= 3;
    168 
    169             pu1_y_src++;
    170             *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
    171 
    172             /* pixel 1 */
    173             /* B */
    174             u4_b = CLIP_U8(*pu1_y_src + i2_b);
    175             u4_b >>= 3;
    176             /* G */
    177             u4_g = CLIP_U8(*pu1_y_src + i2_g);
    178             u4_g >>= 2;
    179             /* R */
    180             u4_r = CLIP_U8(*pu1_y_src + i2_r);
    181             u4_r >>= 3;
    182 
    183             pu1_y_src++;
    184             *pu2_rgb_dst++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
    185 
    186             /* pixel 2 */
    187             /* B */
    188             u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
    189             u4_b >>= 3;
    190             /* G */
    191             u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
    192             u4_g >>= 2;
    193             /* R */
    194             u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
    195             u4_r >>= 3;
    196 
    197             pu1_y_src_nxt++;
    198             *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
    199 
    200             /* pixel 3 */
    201             /* B */
    202             u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
    203             u4_b >>= 3;
    204             /* G */
    205             u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
    206             u4_g >>= 2;
    207             /* R */
    208             u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
    209             u4_r >>= 3;
    210 
    211             pu1_y_src_nxt++;
    212             *pu2_rgb_dst_NextRow++ = ((u4_r << 11) | (u4_g << 5) | u4_b);
    213 
    214         }
    215 
    216         pu1_u_src = pu1_u_src + src_uv_strd - wd;
    217         pu1_v_src = pu1_v_src + src_uv_strd - wd;
    218 
    219         pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
    220         pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
    221 
    222         pu2_rgb_dst = pu2_rgb_dst_NextRow - wd + dst_strd;
    223         pu2_rgb_dst_NextRow = pu2_rgb_dst_NextRow + (dst_strd << 1) - wd;
    224     }
    225 
    226 
    227 }
    228 
    229 void ihevcd_fmt_conv_420sp_to_rgba8888(UWORD8 *pu1_y_src,
    230                                        UWORD8 *pu1_uv_src,
    231                                        UWORD32 *pu4_rgba_dst,
    232                                        WORD32 wd,
    233                                        WORD32 ht,
    234                                        WORD32 src_y_strd,
    235                                        WORD32 src_uv_strd,
    236                                        WORD32 dst_strd,
    237                                        WORD32 is_u_first)
    238 {
    239 
    240 
    241     WORD16  i2_r, i2_g, i2_b;
    242     UWORD32  u4_r, u4_g, u4_b;
    243     WORD16  i2_i, i2_j;
    244     UWORD8  *pu1_y_src_nxt;
    245     UWORD32 *pu4_rgba_dst_NextRow;
    246 
    247     UWORD8 *pu1_u_src, *pu1_v_src;
    248 
    249     if(is_u_first)
    250     {
    251         pu1_u_src = (UWORD8 *)pu1_uv_src;
    252         pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
    253     }
    254     else
    255     {
    256         pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
    257         pu1_v_src = (UWORD8 *)pu1_uv_src;
    258     }
    259 
    260     pu1_y_src_nxt   = pu1_y_src + src_y_strd;
    261     pu4_rgba_dst_NextRow = pu4_rgba_dst + dst_strd;
    262 
    263     for(i2_i = 0; i2_i < (ht >> 1); i2_i++)
    264     {
    265         for(i2_j = (wd >> 1); i2_j > 0; i2_j--)
    266         {
    267             i2_b = ((*pu1_u_src - 128) * COEFF4 >> 13);
    268             i2_g = ((*pu1_u_src - 128) * COEFF2 + (*pu1_v_src - 128) * COEFF3) >> 13;
    269             i2_r = ((*pu1_v_src - 128) * COEFF1) >> 13;
    270 
    271             pu1_u_src += 2;
    272             pu1_v_src += 2;
    273             /* pixel 0 */
    274             /* B */
    275             u4_b = CLIP_U8(*pu1_y_src + i2_b);
    276             /* G */
    277             u4_g = CLIP_U8(*pu1_y_src + i2_g);
    278             /* R */
    279             u4_r = CLIP_U8(*pu1_y_src + i2_r);
    280 
    281             pu1_y_src++;
    282             *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
    283 
    284             /* pixel 1 */
    285             /* B */
    286             u4_b = CLIP_U8(*pu1_y_src + i2_b);
    287             /* G */
    288             u4_g = CLIP_U8(*pu1_y_src + i2_g);
    289             /* R */
    290             u4_r = CLIP_U8(*pu1_y_src + i2_r);
    291 
    292             pu1_y_src++;
    293             *pu4_rgba_dst++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
    294 
    295             /* pixel 2 */
    296             /* B */
    297             u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
    298             /* G */
    299             u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
    300             /* R */
    301             u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
    302 
    303             pu1_y_src_nxt++;
    304             *pu4_rgba_dst_NextRow++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
    305 
    306             /* pixel 3 */
    307             /* B */
    308             u4_b = CLIP_U8(*pu1_y_src_nxt + i2_b);
    309             /* G */
    310             u4_g = CLIP_U8(*pu1_y_src_nxt + i2_g);
    311             /* R */
    312             u4_r = CLIP_U8(*pu1_y_src_nxt + i2_r);
    313 
    314             pu1_y_src_nxt++;
    315             *pu4_rgba_dst_NextRow++ = ((u4_r << 16) | (u4_g << 8) | (u4_b << 0));
    316 
    317         }
    318 
    319         pu1_u_src = pu1_u_src + src_uv_strd - wd;
    320         pu1_v_src = pu1_v_src + src_uv_strd - wd;
    321 
    322         pu1_y_src = pu1_y_src + (src_y_strd << 1) - wd;
    323         pu1_y_src_nxt = pu1_y_src_nxt + (src_y_strd << 1) - wd;
    324 
    325         pu4_rgba_dst = pu4_rgba_dst_NextRow - wd + dst_strd;
    326         pu4_rgba_dst_NextRow = pu4_rgba_dst_NextRow + (dst_strd << 1) - wd;
    327     }
    328 
    329 
    330 }
    331 
    332 /**
    333 *******************************************************************************
    334 *
    335 * @brief Function used from copying a 420SP buffer
    336 *
    337 * @par   Description
    338 * Function used from copying a 420SP buffer
    339 *
    340 * @param[in] pu1_y_src
    341 *   Input Y pointer
    342 *
    343 * @param[in] pu1_uv_src
    344 *   Input UV pointer (UV is interleaved either in UV or VU format)
    345 *
    346 * @param[in] pu1_y_dst
    347 *   Output Y pointer
    348 *
    349 * @param[in] pu1_uv_dst
    350 *   Output UV pointer (UV is interleaved in the same format as that of input)
    351 *
    352 * @param[in] wd
    353 *   Width
    354 *
    355 * @param[in] ht
    356 *   Height
    357 *
    358 * @param[in] src_y_strd
    359 *   Input Y Stride
    360 *
    361 * @param[in] src_uv_strd
    362 *   Input UV stride
    363 *
    364 * @param[in] dst_y_strd
    365 *   Output Y stride
    366 *
    367 * @param[in] dst_uv_strd
    368 *   Output UV stride
    369 *
    370 * @returns None
    371 *
    372 * @remarks In case there is a need to perform partial frame copy then
    373 * by passion appropriate source and destination pointers and appropriate
    374 * values for wd and ht it can be done
    375 *
    376 *******************************************************************************
    377 */
    378 
    379 void ihevcd_fmt_conv_420sp_to_420sp(UWORD8 *pu1_y_src,
    380                                     UWORD8 *pu1_uv_src,
    381                                     UWORD8 *pu1_y_dst,
    382                                     UWORD8 *pu1_uv_dst,
    383                                     WORD32 wd,
    384                                     WORD32 ht,
    385                                     WORD32 src_y_strd,
    386                                     WORD32 src_uv_strd,
    387                                     WORD32 dst_y_strd,
    388                                     WORD32 dst_uv_strd)
    389 {
    390     UWORD8 *pu1_src, *pu1_dst;
    391     WORD32 num_rows, num_cols, src_strd, dst_strd;
    392     WORD32 i;
    393 
    394     /* copy luma */
    395     pu1_src = (UWORD8 *)pu1_y_src;
    396     pu1_dst = (UWORD8 *)pu1_y_dst;
    397 
    398     num_rows = ht;
    399     num_cols = wd;
    400 
    401     src_strd = src_y_strd;
    402     dst_strd = dst_y_strd;
    403 
    404     for(i = 0; i < num_rows; i++)
    405     {
    406         memcpy(pu1_dst, pu1_src, num_cols);
    407         pu1_dst += dst_strd;
    408         pu1_src += src_strd;
    409     }
    410 
    411     /* copy U and V */
    412     pu1_src = (UWORD8 *)pu1_uv_src;
    413     pu1_dst = (UWORD8 *)pu1_uv_dst;
    414 
    415     num_rows = ht >> 1;
    416     num_cols = wd;
    417 
    418     src_strd = src_uv_strd;
    419     dst_strd = dst_uv_strd;
    420 
    421     for(i = 0; i < num_rows; i++)
    422     {
    423         memcpy(pu1_dst, pu1_src, num_cols);
    424         pu1_dst += dst_strd;
    425         pu1_src += src_strd;
    426     }
    427     return;
    428 }
    429 
    430 
    431 
    432 /**
    433 *******************************************************************************
    434 *
    435 * @brief Function used from copying a 420SP buffer
    436 *
    437 * @par   Description
    438 * Function used from copying a 420SP buffer
    439 *
    440 * @param[in] pu1_y_src
    441 *   Input Y pointer
    442 *
    443 * @param[in] pu1_uv_src
    444 *   Input UV pointer (UV is interleaved either in UV or VU format)
    445 *
    446 * @param[in] pu1_y_dst
    447 *   Output Y pointer
    448 *
    449 * @param[in] pu1_uv_dst
    450 *   Output UV pointer (UV is interleaved in the same format as that of input)
    451 *
    452 * @param[in] wd
    453 *   Width
    454 *
    455 * @param[in] ht
    456 *   Height
    457 *
    458 * @param[in] src_y_strd
    459 *   Input Y Stride
    460 *
    461 * @param[in] src_uv_strd
    462 *   Input UV stride
    463 *
    464 * @param[in] dst_y_strd
    465 *   Output Y stride
    466 *
    467 * @param[in] dst_uv_strd
    468 *   Output UV stride
    469 *
    470 * @returns None
    471 *
    472 * @remarks In case there is a need to perform partial frame copy then
    473 * by passion appropriate source and destination pointers and appropriate
    474 * values for wd and ht it can be done
    475 *
    476 *******************************************************************************
    477 */
    478 void ihevcd_fmt_conv_420sp_to_420sp_swap_uv(UWORD8 *pu1_y_src,
    479                                             UWORD8 *pu1_uv_src,
    480                                             UWORD8 *pu1_y_dst,
    481                                             UWORD8 *pu1_uv_dst,
    482                                             WORD32 wd,
    483                                             WORD32 ht,
    484                                             WORD32 src_y_strd,
    485                                             WORD32 src_uv_strd,
    486                                             WORD32 dst_y_strd,
    487                                             WORD32 dst_uv_strd)
    488 {
    489     UWORD8 *pu1_src, *pu1_dst;
    490     WORD32 num_rows, num_cols, src_strd, dst_strd;
    491     WORD32 i;
    492 
    493     /* copy luma */
    494     pu1_src = (UWORD8 *)pu1_y_src;
    495     pu1_dst = (UWORD8 *)pu1_y_dst;
    496 
    497     num_rows = ht;
    498     num_cols = wd;
    499 
    500     src_strd = src_y_strd;
    501     dst_strd = dst_y_strd;
    502 
    503     for(i = 0; i < num_rows; i++)
    504     {
    505         memcpy(pu1_dst, pu1_src, num_cols);
    506         pu1_dst += dst_strd;
    507         pu1_src += src_strd;
    508     }
    509 
    510     /* copy U and V */
    511     pu1_src = (UWORD8 *)pu1_uv_src;
    512     pu1_dst = (UWORD8 *)pu1_uv_dst;
    513 
    514     num_rows = ht >> 1;
    515     num_cols = wd;
    516 
    517     src_strd = src_uv_strd;
    518     dst_strd = dst_uv_strd;
    519 
    520     for(i = 0; i < num_rows; i++)
    521     {
    522         WORD32 j;
    523         for(j = 0; j < num_cols; j += 2)
    524         {
    525             pu1_dst[j + 0] = pu1_src[j + 1];
    526             pu1_dst[j + 1] = pu1_src[j + 0];
    527         }
    528         pu1_dst += dst_strd;
    529         pu1_src += src_strd;
    530     }
    531     return;
    532 }
    533 /**
    534 *******************************************************************************
    535 *
    536 * @brief Function used from copying a 420SP buffer
    537 *
    538 * @par   Description
    539 * Function used from copying a 420SP buffer
    540 *
    541 * @param[in] pu1_y_src
    542 *   Input Y pointer
    543 *
    544 * @param[in] pu1_uv_src
    545 *   Input UV pointer (UV is interleaved either in UV or VU format)
    546 *
    547 * @param[in] pu1_y_dst
    548 *   Output Y pointer
    549 *
    550 * @param[in] pu1_u_dst
    551 *   Output U pointer
    552 *
    553 * @param[in] pu1_v_dst
    554 *   Output V pointer
    555 *
    556 * @param[in] wd
    557 *   Width
    558 *
    559 * @param[in] ht
    560 *   Height
    561 *
    562 * @param[in] src_y_strd
    563 *   Input Y Stride
    564 *
    565 * @param[in] src_uv_strd
    566 *   Input UV stride
    567 *
    568 * @param[in] dst_y_strd
    569 *   Output Y stride
    570 *
    571 * @param[in] dst_uv_strd
    572 *   Output UV stride
    573 *
    574 * @param[in] is_u_first
    575 *   Flag to indicate if U is the first byte in input chroma part
    576 *
    577 * @returns none
    578 *
    579 * @remarks In case there is a need to perform partial frame copy then
    580 * by passion appropriate source and destination pointers and appropriate
    581 * values for wd and ht it can be done
    582 *
    583 *******************************************************************************
    584 */
    585 
    586 
    587 void ihevcd_fmt_conv_420sp_to_420p(UWORD8 *pu1_y_src,
    588                                    UWORD8 *pu1_uv_src,
    589                                    UWORD8 *pu1_y_dst,
    590                                    UWORD8 *pu1_u_dst,
    591                                    UWORD8 *pu1_v_dst,
    592                                    WORD32 wd,
    593                                    WORD32 ht,
    594                                    WORD32 src_y_strd,
    595                                    WORD32 src_uv_strd,
    596                                    WORD32 dst_y_strd,
    597                                    WORD32 dst_uv_strd,
    598                                    WORD32 is_u_first,
    599                                    WORD32 disable_luma_copy)
    600 {
    601     UWORD8 *pu1_src, *pu1_dst;
    602     UWORD8 *pu1_u_src, *pu1_v_src;
    603     WORD32 num_rows, num_cols, src_strd, dst_strd;
    604     WORD32 i, j;
    605 
    606     if(0 == disable_luma_copy)
    607     {
    608         /* copy luma */
    609         pu1_src = (UWORD8 *)pu1_y_src;
    610         pu1_dst = (UWORD8 *)pu1_y_dst;
    611 
    612         num_rows = ht;
    613         num_cols = wd;
    614 
    615         src_strd = src_y_strd;
    616         dst_strd = dst_y_strd;
    617 
    618         for(i = 0; i < num_rows; i++)
    619         {
    620             memcpy(pu1_dst, pu1_src, num_cols);
    621             pu1_dst += dst_strd;
    622             pu1_src += src_strd;
    623         }
    624     }
    625     /* de-interleave U and V and copy to destination */
    626     if(is_u_first)
    627     {
    628         pu1_u_src = (UWORD8 *)pu1_uv_src;
    629         pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
    630     }
    631     else
    632     {
    633         pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
    634         pu1_v_src = (UWORD8 *)pu1_uv_src;
    635     }
    636 
    637 
    638     num_rows = ht >> 1;
    639     num_cols = wd >> 1;
    640 
    641     src_strd = src_uv_strd;
    642     dst_strd = dst_uv_strd;
    643 
    644     for(i = 0; i < num_rows; i++)
    645     {
    646         for(j = 0; j < num_cols; j++)
    647         {
    648             pu1_u_dst[j] = pu1_u_src[j * 2];
    649             pu1_v_dst[j] = pu1_v_src[j * 2];
    650         }
    651 
    652         pu1_u_dst += dst_strd;
    653         pu1_v_dst += dst_strd;
    654         pu1_u_src += src_strd;
    655         pu1_v_src += src_strd;
    656     }
    657     return;
    658 }
    659 
    660 
    661 
    662 /**
    663 *******************************************************************************
    664 *
    665 * @brief Function used from format conversion or frame copy
    666 *
    667 * @par   Description
    668 * Function used from copying or converting a reference frame to display buffer
    669 * in non shared mode
    670 *
    671 * @param[in] pu1_y_dst
    672 *   Output Y pointer
    673 *
    674 * @param[in] pu1_u_dst
    675 *   Output U/UV pointer ( UV is interleaved in the same format as that of input)
    676 *
    677 * @param[in] pu1_v_dst
    678 *   Output V pointer ( used in 420P output case)
    679 *
    680 * @param[in] blocking
    681 *   To indicate whether format conversion should wait till frame is reconstructed
    682 *   and then return after complete copy is done. To be set to 1 when called at the
    683 *   end of frame processing and set to 0 when called between frame processing modules
    684 *   in order to utilize available MCPS
    685 *
    686 * @returns Error from IHEVCD_ERROR_T
    687 *
    688 *******************************************************************************
    689 */
    690 IHEVCD_ERROR_T ihevcd_fmt_conv(codec_t *ps_codec,
    691                                process_ctxt_t *ps_proc,
    692                                UWORD8 *pu1_y_dst,
    693                                UWORD8 *pu1_u_dst,
    694                                UWORD8 *pu1_v_dst,
    695                                WORD32 cur_row,
    696                                WORD32 num_rows)
    697 {
    698     IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
    699     pic_buf_t *ps_disp_pic;
    700     UWORD8 *pu1_y_src, *pu1_uv_src;
    701     UWORD8 *pu1_y_dst_tmp, *pu1_uv_dst_tmp;
    702     UWORD8 *pu1_u_dst_tmp, *pu1_v_dst_tmp;
    703     UWORD16 *pu2_rgb_dst_tmp;
    704     UWORD32 *pu4_rgb_dst_tmp;
    705     WORD32 is_u_first;
    706     UWORD8 *pu1_luma;
    707     UWORD8 *pu1_chroma;
    708     sps_t *ps_sps;
    709     WORD32 disable_luma_copy;
    710     WORD32 crop_unit_x, crop_unit_y;
    711 
    712     if(0 == num_rows)
    713         return ret;
    714 
    715     /* In case processing is disabled, then no need to format convert/copy */
    716     PROFILE_DISABLE_FMT_CONV();
    717     ps_sps = ps_proc->ps_sps;
    718 
    719     crop_unit_x = 1;
    720     crop_unit_y = 1;
    721 
    722     if(CHROMA_FMT_IDC_YUV420 == ps_sps->i1_chroma_format_idc)
    723     {
    724         crop_unit_x = 2;
    725         crop_unit_y = 2;
    726     }
    727 
    728     ps_disp_pic = ps_codec->ps_disp_buf;
    729     pu1_luma = ps_disp_pic->pu1_luma;
    730     pu1_chroma = ps_disp_pic->pu1_chroma;
    731 
    732 
    733     /* Take care of cropping */
    734     pu1_luma    += ps_codec->i4_strd * ps_sps->i2_pic_crop_top_offset * crop_unit_y + ps_sps->i2_pic_crop_left_offset * crop_unit_x;
    735 
    736     /* Left offset is multiplied by 2 because buffer is UV interleaved */
    737     pu1_chroma  += ps_codec->i4_strd * ps_sps->i2_pic_crop_top_offset + ps_sps->i2_pic_crop_left_offset * 2;
    738 
    739 
    740     is_u_first = (IV_YUV_420SP_UV == ps_codec->e_ref_chroma_fmt) ? 1 : 0;
    741 
    742     /* In case of 420P output luma copy is disabled for shared mode */
    743     disable_luma_copy = 0;
    744     if(1 == ps_codec->i4_share_disp_buf)
    745     {
    746         disable_luma_copy = 1;
    747     }
    748 
    749 
    750 
    751     {
    752         pu1_y_src   = pu1_luma + cur_row * ps_codec->i4_strd;
    753         pu1_uv_src  = pu1_chroma + (cur_row / 2) * ps_codec->i4_strd;
    754 
    755         /* In case of shared mode, with 420P output, get chroma destination */
    756         if((1 == ps_codec->i4_share_disp_buf) && (IV_YUV_420P == ps_codec->e_chroma_fmt))
    757         {
    758             WORD32 i;
    759             for(i = 0; i < ps_codec->i4_share_disp_buf_cnt; i++)
    760             {
    761                 WORD32 diff = ps_disp_pic->pu1_luma - ps_codec->s_disp_buffer[i].pu1_bufs[0];
    762                 if(diff == (ps_codec->i4_strd * PAD_TOP + PAD_LEFT))
    763                 {
    764                     pu1_u_dst = ps_codec->s_disp_buffer[i].pu1_bufs[1];
    765                     pu1_u_dst += (ps_codec->i4_strd * PAD_TOP) / 4 + (PAD_LEFT / 2);
    766 
    767                     pu1_v_dst = ps_codec->s_disp_buffer[i].pu1_bufs[2];
    768                     pu1_v_dst += (ps_codec->i4_strd * PAD_TOP) / 4 + (PAD_LEFT / 2);
    769                     break;
    770                 }
    771             }
    772         }
    773         pu2_rgb_dst_tmp  = (UWORD16 *)pu1_y_dst;
    774         pu2_rgb_dst_tmp  += cur_row * ps_codec->i4_disp_strd;
    775         pu4_rgb_dst_tmp  = (UWORD32 *)pu1_y_dst;
    776         pu4_rgb_dst_tmp  += cur_row * ps_codec->i4_disp_strd;
    777         pu1_y_dst_tmp  = pu1_y_dst  + cur_row * ps_codec->i4_disp_strd;
    778         pu1_uv_dst_tmp = pu1_u_dst  + (cur_row / 2) * ps_codec->i4_disp_strd;
    779         pu1_u_dst_tmp = pu1_u_dst  + (cur_row / 2) * ps_codec->i4_disp_strd / 2;
    780         pu1_v_dst_tmp = pu1_v_dst  + (cur_row / 2) * ps_codec->i4_disp_strd / 2;
    781 
    782         /* In case of multi threaded implementation, format conversion might be called
    783          * before reconstruction is completed. If the frame being converted/copied
    784          * is same as the frame being reconstructed,
    785          * Check how many rows can be format converted
    786          * Convert those many rows and then check for remaining rows and so on
    787          */
    788 
    789         if((0 == ps_codec->i4_flush_mode) && (ps_codec->i4_disp_buf_id == ps_proc->i4_cur_pic_buf_id) && (1 < ps_codec->i4_num_cores))
    790         {
    791             WORD32 idx;
    792             UWORD8 *pu1_buf;
    793             WORD32 status;
    794             WORD32 last_row = cur_row + num_rows;
    795             WORD32 last_ctb_y;
    796             UWORD32 ctb_in_row;
    797 
    798             while(1)
    799             {
    800                 last_row = cur_row + MAX(num_rows, (1 << ps_sps->i1_log2_ctb_size)) +
    801                                 ps_sps->i2_pic_crop_top_offset * crop_unit_y;
    802                 last_ctb_y = (last_row >> ps_sps->i1_log2_ctb_size) - 1;
    803                 /* Since deblocking works with a shift of -4, -4 ,wait till next CTB row is processed */
    804                 last_ctb_y++;
    805                 /* In case of a  conformance window, an extra wait of one row might be needed */
    806                 last_ctb_y++;
    807                 last_ctb_y = MIN(last_ctb_y, (ps_sps->i2_pic_ht_in_ctb - 1));
    808 
    809                 idx = (last_ctb_y * ps_sps->i2_pic_wd_in_ctb);
    810 
    811                 /*Check if the row below is completely processed before proceeding with format conversion*/
    812                 status = 1;
    813                 for(ctb_in_row = 0; (WORD32)ctb_in_row < ps_sps->i2_pic_wd_in_ctb; ctb_in_row++)
    814                 {
    815                     pu1_buf = (ps_codec->pu1_proc_map + idx + ctb_in_row);
    816                     status &= *pu1_buf;
    817                 }
    818 
    819                 if(status)
    820                 {
    821                     break;
    822                 }
    823                 else
    824                 {
    825                     ithread_yield();
    826                 }
    827             }
    828         }
    829 
    830 
    831         if((IV_YUV_420SP_UV == ps_codec->e_chroma_fmt) || (IV_YUV_420SP_VU == ps_codec->e_chroma_fmt))
    832         {
    833 
    834             ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420sp_fptr(pu1_y_src, pu1_uv_src,
    835                                                                           pu1_y_dst_tmp, pu1_uv_dst_tmp,
    836                                                                           ps_codec->i4_disp_wd,
    837                                                                           num_rows,
    838                                                                           ps_codec->i4_strd,
    839                                                                           ps_codec->i4_strd,
    840                                                                           ps_codec->i4_disp_strd,
    841                                                                           ps_codec->i4_disp_strd);
    842         }
    843         else if(IV_YUV_420P == ps_codec->e_chroma_fmt)
    844         {
    845 
    846             if(0 == disable_luma_copy)
    847             {
    848                 // copy luma
    849                 WORD32 i;
    850                 WORD32 num_cols = ps_codec->i4_disp_wd;
    851 
    852                 for(i = 0; i < num_rows; i++)
    853                 {
    854                     memcpy(pu1_y_dst_tmp, pu1_y_src, num_cols);
    855                     pu1_y_dst_tmp += ps_codec->i4_disp_strd;
    856                     pu1_y_src += ps_codec->i4_strd;
    857                 }
    858 
    859                 disable_luma_copy = 1;
    860             }
    861 
    862             ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_420p_fptr(pu1_y_src, pu1_uv_src,
    863                                                                          pu1_y_dst_tmp, pu1_u_dst_tmp, pu1_v_dst_tmp,
    864                                                                          ps_codec->i4_disp_wd,
    865                                                                          num_rows,
    866                                                                          ps_codec->i4_strd,
    867                                                                          ps_codec->i4_strd,
    868                                                                          ps_codec->i4_disp_strd,
    869                                                                          (ps_codec->i4_disp_strd / 2),
    870                                                                          is_u_first,
    871                                                                          disable_luma_copy);
    872 
    873         }
    874         else if(IV_RGB_565 == ps_codec->e_chroma_fmt)
    875         {
    876 
    877             ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgb565_fptr(pu1_y_src, pu1_uv_src,
    878                                                                            pu2_rgb_dst_tmp,
    879                                                                            ps_codec->i4_disp_wd,
    880                                                                            num_rows,
    881                                                                            ps_codec->i4_strd,
    882                                                                            ps_codec->i4_strd,
    883                                                                            ps_codec->i4_disp_strd,
    884                                                                            is_u_first);
    885 
    886         }
    887         else if(IV_RGBA_8888 == ps_codec->e_chroma_fmt)
    888         {
    889             ASSERT(is_u_first == 1);
    890 
    891             ps_codec->s_func_selector.ihevcd_fmt_conv_420sp_to_rgba8888_fptr(pu1_y_src,
    892                                                                              pu1_uv_src,
    893                                                                              pu4_rgb_dst_tmp,
    894                                                                              ps_codec->i4_disp_wd,
    895                                                                              num_rows,
    896                                                                              ps_codec->i4_strd,
    897                                                                              ps_codec->i4_strd,
    898                                                                              ps_codec->i4_disp_strd,
    899                                                                              is_u_first);
    900 
    901         }
    902 
    903 
    904 
    905     }
    906     return (ret);
    907 }
    908 
    909