Home | History | Annotate | Download | only in src
      1 ;//
      2 ;//
      3 ;// File Name:  omxVCM4P10_PredictIntraChroma_8x8_s.s
      4 ;// OpenMAX DL: v1.0.2
      5 ;// Revision:   9641
      6 ;// Date:       Thursday, February 7, 2008
      7 ;//
      8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
      9 ;//
     10 ;//
     11 ;//
     12 
     13 
     14         INCLUDE omxtypes_s.h
     15         INCLUDE armCOMM_s.h
     16 
     17         EXPORT armVCM4P10_pIndexTable8x8
     18 
     19 ;// Define the processor variants supported by this file
     20 
     21          M_VARIANTS ARM1136JS
     22 
     23      AREA table, DATA
     24 ;//-------------------------------------------------------
     25 ;// This table for implementing switch case of C in asm by
     26 ;// the mehtod of two levels of indexing.
     27 ;//-------------------------------------------------------
     28 
     29     M_TABLE armVCM4P10_pIndexTable8x8
     30     DCD  OMX_VC_CHROMA_DC,     OMX_VC_CHROMA_HOR
     31     DCD  OMX_VC_CHROMA_VERT,   OMX_VC_CHROMA_PLANE
     32 
     33     M_TABLE armVCM4P10_MultiplierTableChroma8x8,1
     34     DCW   3, 2, 1,4
     35     DCW  -3,-2,-1,0
     36     DCW   1, 2, 3,4
     37 
     38     IF ARM1136JS
     39 
     40 ;//--------------------------------------------
     41 ;// Constants
     42 ;//--------------------------------------------
     43 
     44 BLK_SIZE        EQU 0x8
     45 MUL_CONST0      EQU 0x01010101
     46 MASK_CONST      EQU 0x00FF00FF
     47 MUL_CONST1      EQU 0x80808080
     48 
     49 ;//--------------------------------------------
     50 ;// Scratch variable
     51 ;//--------------------------------------------
     52 y               RN 12
     53 pc              RN 15
     54 return          RN 0
     55 pSrcLeft2       RN 1
     56 pDst2           RN 2
     57 sum1            RN 6
     58 sum2            RN 7
     59 pTable          RN 9
     60 dstStepx2       RN 11
     61 leftStepx2      RN 14
     62 outerCount      RN 14
     63 r0x01010101     RN 10
     64 r0x00FF00FF     RN 11
     65 
     66 tVal0           RN 0
     67 tVal1           RN 1
     68 tVal2           RN 2
     69 tVal3           RN 3
     70 tVal4           RN 4
     71 tVal5           RN 5
     72 tVal6           RN 6
     73 tVal7           RN 7
     74 tVal8           RN 8
     75 tVal9           RN 9
     76 tVal10          RN 10
     77 tVal11          RN 11
     78 tVal12          RN 12
     79 tVal14          RN 14
     80 
     81 b               RN 14
     82 c               RN 12
     83 
     84 p2p0            RN 0
     85 p3p1            RN 1
     86 p6p4            RN 2
     87 p7p5            RN 4
     88 
     89 pp2pp0          RN 6
     90 pp3pp1          RN 7
     91 pp6pp4          RN 8
     92 pp7pp5          RN 9
     93 
     94 p3210           RN 10
     95 p7654           RN 10
     96 
     97 ;//--------------------------------------------
     98 ;// Input Arguments
     99 ;//--------------------------------------------
    100 pSrcLeft        RN 0    ;// input pointer
    101 pSrcAbove       RN 1    ;// input pointer
    102 pSrcAboveLeft   RN 2    ;// input pointer
    103 pDst            RN 3    ;// output pointer
    104 leftStep        RN 4    ;// input variable
    105 dstStep         RN 5    ;// input variable
    106 predMode        RN 6    ;// input variable
    107 availability    RN 7    ;// input variable
    108 
    109 ;//-----------------------------------------------------------------------------------------------
    110 ;// omxVCM4P10_PredictIntraChroma_8x8 starts
    111 ;//-----------------------------------------------------------------------------------------------
    112 
    113         ;// Write function header
    114         M_START omxVCM4P10_PredictIntraChroma_8x8, r11
    115 
    116         ;// Define stack arguments
    117         M_ARG    LeftStep,     4
    118         M_ARG    DstStep,      4
    119         M_ARG    PredMode,     4
    120         M_ARG    Availability, 4
    121 
    122         ;// M_STALL ARM1136JS=4
    123 
    124         LDR      pTable,=armVCM4P10_pIndexTable8x8   ;// Load index table for switch case
    125 
    126 
    127         ;// Load argument from the stack
    128         M_LDR    predMode, PredMode                  ;// Arg predMode loaded from stack to reg
    129         M_LDR    leftStep, LeftStep                  ;// Arg leftStep loaded from stack to reg
    130         M_LDR    dstStep,  DstStep                   ;// Arg dstStep loaded from stack to reg
    131         M_LDR    availability, Availability          ;// Arg availability loaded from stack to reg
    132 
    133         MOV      y, #BLK_SIZE                        ;// Outer Loop Count
    134         LDR      pc, [pTable, predMode, LSL #2]      ;// Branch to the case based on preMode
    135 
    136 OMX_VC_CHROMA_DC
    137         AND      availability, availability,#(OMX_VC_UPPER + OMX_VC_LEFT)
    138         CMP      availability, #(OMX_VC_UPPER + OMX_VC_LEFT) ;// if(availability & (#OMX_VC_UPPER | #OMX_VC_LEFT))
    139         LDR      r0x01010101, =MUL_CONST0
    140         BNE      TST_UPPER                           ;// Jump to Upper if not both
    141         LDM      pSrcAbove,{tVal8,tVal9}             ;// tVal 8 to 9 = pSrcAbove[0 to 7]
    142 
    143         ADD      leftStepx2, leftStep,leftStep       ;// leftStepx2 = 2 * leftStep
    144         ADD      pSrcLeft2, pSrcLeft, leftStep       ;// pSrcLeft2 = pSrcLeft + leftStep
    145 
    146         ;// M_STALL ARM1136JS=1
    147 
    148         UXTB16   tVal7, tVal8                        ;// pSrcAbove[0, 2]
    149         UXTB16   tVal8, tVal8, ROR #8                ;// pSrcAbove[1, 3]
    150         UADD16   sum1, tVal7, tVal8                  ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
    151 
    152         UXTB16   tVal7, tVal9                        ;// pSrcAbove[4, 6]
    153         UXTB16   tVal9, tVal9, ROR #8                ;// pSrcAbove[5, 7]
    154         UADD16   sum2, tVal7, tVal9                  ;// pSrcAbove[0, 2] + pSrcAbove[4, 6]
    155         ADD      sum1, sum1, sum1, LSR #16           ;// sum(pSrcAbove[0] to pSrcAbove[3])
    156         ADD      sum2, sum2, sum2, LSR #16           ;// sum(pSrcAbove[4] to pSrcAbove[7])
    157         UXTH     sum1, sum1                          ;// upsum1 (Clear the top junk bits)
    158         UXTH     sum2, sum2                          ;// upsum2 (Clear the top junk bits)
    159 
    160         M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[0]
    161         M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[1]
    162         M_LDRB   tVal4, [pSrcLeft],  +leftStepx2     ;// tVal4 = pSrcLeft[2]
    163         M_LDRB   tVal12,[pSrcLeft2], +leftStepx2     ;// tVal12= pSrcLeft[3]
    164         ADD      tVal2, tVal8, tVal9                 ;// tVal14 = tVal8 + tVal9
    165 
    166         M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[4]
    167         M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[5]
    168         ADD      tVal14, tVal4, tVal12               ;// tVal14 = tVal4 + tVal12
    169 
    170         LDRB     tVal4, [pSrcLeft]                   ;// tVal4 = pSrcLeft[6]
    171         LDRB     tVal12,[pSrcLeft2]                  ;// tVal12= pSrcLeft[7]
    172         ADD      tVal8, tVal8, tVal9                 ;// tVal8 = tVal8 + tVal9
    173         ADD      tVal2, tVal2, tVal14                ;// leftsum1  = sum(pSrcLeft[0] to pSrcLeft[3])
    174         ADD      tVal4, tVal4, tVal12                ;// tVal4 = tVal4 + tVal12
    175         ADD      tVal14, tVal8, tVal4                ;// leftsum2  = sum(pSrcLeft[4] to pSrcLeft[7])
    176         ADD      tVal8, tVal14, #2                   ;// tVal8 = leftsum2 + 2
    177         ADD      tVal9, sum2,   #2                   ;// tVal8 = upsum2 + 2
    178         ADD      sum1,  sum1, tVal2                  ;// sum1 = upsum1 + leftsum1
    179         ADD      sum2,  sum2, tVal14                 ;// sum2 = upsum2 + leftsum2
    180         ADD      sum1, sum1, #4                      ;// (sum1 + 4)
    181         ADD      sum2, sum2, #4                      ;// (sum2 + 4)
    182         MOV      sum1,  sum1,  LSR #3                ;// (sum1 + 4)>>3
    183         MOV      tVal9, tVal9, LSR #2                ;// (tVal9 + 2)>>2
    184         MOV      tVal8, tVal8, LSR #2                ;// (tVal8 + 2)>>2
    185         MOV      sum2,  sum2,  LSR #3                ;// (sum2 + 4)>>3
    186 
    187         MUL      tVal0, sum1, r0x01010101            ;// replicate the val in all the bytes
    188         MUL      tVal1, tVal9,r0x01010101            ;// replicate the val in all the bytes
    189         MUL      tVal8, tVal8,r0x01010101            ;// replicate the val in all the bytes
    190         MUL      tVal9, sum2, r0x01010101            ;// replicate the val in all the bytes
    191 
    192         M_STRD   tVal0, tVal1, [pDst], dstStep       ;// pDst[0 to 7]   = tVal 0 to 1
    193         M_STRD   tVal0, tVal1, [pDst], dstStep       ;// pDst[8 to 15]  = tVal 0 to 1
    194         M_STRD   tVal0, tVal1, [pDst], dstStep       ;// pDst[16 to 23] = tVal 0 to 1
    195         M_STRD   tVal0, tVal1, [pDst], dstStep       ;// pDst[24 to 31] = tVal 0 to 1
    196 
    197         M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[32 to 39] = tVal 8 to 9
    198         M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[40 to 47] = tVal 8 to 9
    199         M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[48 to 55] = tVal 8 to 9
    200         M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[56 to 63] = tVal 8 to 9
    201         MOV      return, #OMX_Sts_NoErr
    202         M_EXIT
    203 
    204 TST_UPPER
    205 
    206         ;// M_STALL ARM1136JS=3
    207 
    208         CMP      availability, #OMX_VC_UPPER         ;// if(availability & #OMX_VC_UPPER)
    209 
    210         BNE      TST_LEFT                            ;// Jump to Left if not upper
    211         LDM      pSrcAbove,{tVal8,tVal9}             ;// tVal 8 to 9 = pSrcAbove[0 to 7]
    212 
    213         ;// M_STALL ARM1136JS=3
    214 
    215         UXTB16   tVal7, tVal8                        ;// pSrcAbove[0, 2]
    216         UXTB16   tVal8, tVal8, ROR #8                ;// pSrcAbove[1, 3]
    217         UADD16   sum1,  tVal7, tVal8                 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
    218 
    219         UXTB16   tVal7, tVal9                        ;// pSrcAbove[4, 6]
    220         UXTB16   tVal9, tVal9, ROR #8                ;// pSrcAbove[5, 7]
    221         UADD16   sum2,  tVal7, tVal9                 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6]
    222 
    223         ADD      sum1, sum1, sum1, LSR #16           ;// sum(pSrcAbove[0] to pSrcAbove[3])
    224         ADD      sum2, sum2, sum2, LSR #16           ;// sum(pSrcAbove[4] to pSrcAbove[7])
    225 
    226         UXTH     sum1, sum1                          ;// upsum1 (Clear the top junk bits)
    227         UXTH     sum2, sum2                          ;// upsum2 (Clear the top junk bits)
    228 
    229         ADD      sum1, sum1, #2                      ;// sum1 + 2
    230         ADD      sum2, sum2, #2                      ;// sum2 + 2
    231 
    232         MOV      sum1, sum1, LSR #2                  ;// (sum1 + 2)>>2
    233         MOV      sum2, sum2, LSR #2                  ;// (sum2 + 2)>>2
    234 
    235         MUL      sum1, sum1,r0x01010101              ;// replicate the val in all the bytes
    236         MUL      sum2, sum2,r0x01010101              ;// replicate the val in all the bytes
    237 
    238         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[0 to 7]   = tVal 6 to 7
    239         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[8 to 15]  = tVal 6 to 7
    240         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[16 to 23] = tVal 6 to 7
    241         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[24 to 31] = tVal 6 to 7
    242         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[32 to 39] = tVal 6 to 7
    243         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[40 to 47] = tVal 6 to 7
    244         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[48 to 55] = tVal 6 to 7
    245         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[56 to 63] = tVal 6 to 7
    246         MOV      return, #OMX_Sts_NoErr
    247         M_EXIT
    248 
    249 TST_LEFT
    250         ;// M_STALL ARM1136JS=3
    251 
    252         CMP      availability, #OMX_VC_LEFT
    253         BNE      TST_COUNT0
    254         ADD      leftStepx2, leftStep,leftStep       ;// leftStepx2 = 2 * leftStep
    255         ADD      pSrcLeft2, pSrcLeft, leftStep       ;// pSrcLeft2 = pSrcLeft + leftStep
    256 
    257         M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[0]
    258         M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[1]
    259         M_LDRB   tVal4, [pSrcLeft],  +leftStepx2     ;// tVal4 = pSrcLeft[2]
    260         M_LDRB   tVal12,[pSrcLeft2], +leftStepx2     ;// tVal12= pSrcLeft[3]
    261 
    262         ADD      tVal6, tVal8, tVal9                 ;// tVal6 = tVal8 + tVal9
    263 
    264         M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[4]
    265         ADD      tVal7, tVal4, tVal12                ;// tVal7 = tVal4 + tVal12
    266         M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[5]
    267         M_LDRB   tVal4, [pSrcLeft],  +leftStepx2     ;// tVal4 = pSrcLeft[6]
    268         M_LDRB   tVal12,[pSrcLeft2], +leftStepx2     ;// tVal12= pSrcLeft[7]
    269 
    270         ADD      tVal8, tVal8, tVal9                 ;// tVal8 = tVal8 + tVal9
    271         ADD      sum1,  tVal6, tVal7                 ;// sum1  = sum(pSrcLeft[0] to pSrcLeft[3])
    272         ADD      tVal4, tVal4, tVal12                ;// tVal4 = tVal4 + tVal12
    273         ADD      sum2,  tVal8, tVal4                 ;// sum2  = sum(pSrcLeft[4] to pSrcLeft[7])
    274 
    275         ADD      sum1, sum1, #2                      ;// sum1 + 2
    276         ADD      sum2, sum2, #2                      ;// sum2 + 2
    277 
    278         MOV      sum1, sum1, LSR #2                  ;// (sum1 + 2)>>2
    279         MOV      sum2, sum2, LSR #2                  ;// (sum2 + 2)>>2
    280 
    281         MUL      tVal6, sum1,r0x01010101             ;// replicate the val in all the bytes
    282         MUL      tVal8, sum2,r0x01010101             ;// replicate the val in all the bytes
    283 
    284         ;// M_STALL ARM1136JS=1
    285         MOV      tVal7,tVal6                         ;// tVal7 = sum1
    286         MOV      tVal9,tVal8                         ;// tVal9 = sum2
    287 
    288         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[0 to 7]   = tVal 6 to 7
    289         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[8 to 15]  = tVal 6 to 7
    290         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[16 to 23] = tVal 6 to 7
    291         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[24 to 31] = tVal 6 to 7
    292 
    293         M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[32 to 39] = tVal 8 to 9
    294         M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[40 to 47] = tVal 8 to 9
    295         M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[48 to 55] = tVal 8 to 9
    296         M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[56 to 63] = tVal 8 to 9
    297 
    298         MOV      return, #OMX_Sts_NoErr
    299         M_EXIT                                       ;// Macro to exit midway-break frm case
    300 
    301 TST_COUNT0
    302         LDR      sum1, =MUL_CONST1                  ;// sum1 = 0x80808080 if(count == 0)
    303 
    304         ;// M_STALL ARM1136JS=2
    305 
    306         MOV      tVal7, sum1                         ;// tVal7 = sum1
    307 
    308         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[0 to 7]   = tVal 6 to 7
    309         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[8 to 15]  = tVal 6 to 7
    310         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[16 to 23] = tVal 6 to 7
    311         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[24 to 31] = tVal 6 to 7
    312         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[32 to 39] = tVal 6 to 7
    313         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[40 to 47] = tVal 6 to 7
    314         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[48 to 55] = tVal 6 to 7
    315         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[56 to 63] = tVal 6 to 7
    316 
    317         MOV      return, #OMX_Sts_NoErr
    318         M_EXIT                                       ;// Macro to exit midway-break frm case
    319 
    320 OMX_VC_CHROMA_HOR
    321 
    322         ;// M_STALL ARM1136JS=2
    323 
    324         ADD      pSrcLeft2, pSrcLeft, leftStep       ;// pSrcLeft2 = pSrcLeft + leftStep
    325         ADD      leftStepx2, leftStep, leftStep      ;// leftStepx2 = leftStep * 2
    326         ADD      pDst2, pDst, dstStep                ;// pDst2 = pDst + dstStep
    327         ADD      dstStepx2, dstStep, dstStep         ;// double dstStep
    328         SUB      dstStepx2, dstStepx2, #4            ;// double dstStep  minus 4
    329         LDR      r0x01010101, =MUL_CONST0            ;// Const to repeat the byte in reg 4 times
    330         M_LDRB   tVal6, [pSrcLeft], +leftStepx2      ;// tVal6 = pSrcLeft[0]
    331         M_LDRB   tVal7, [pSrcLeft2],+leftStepx2      ;// tVal7 = pSrcLeft[1]
    332         M_LDRB   tVal8, [pSrcLeft], +leftStepx2      ;// tVal8 = pSrcLeft[2]
    333         M_LDRB   tVal9, [pSrcLeft2],+leftStepx2      ;// tVal9 = pSrcLeft[3]
    334         MUL      tVal6, tVal6, r0x01010101           ;// replicate the val in all the bytes
    335         MUL      tVal7, tVal7, r0x01010101           ;// replicate the val in all the bytes
    336         MUL      tVal8, tVal8, r0x01010101           ;// replicate the val in all the bytes
    337         MUL      tVal9, tVal9, r0x01010101           ;// replicate the val in all the bytes
    338         STR      tVal6, [pDst],  #+4                 ;// store {tVal6} at pDst [0 to 3]
    339         STR      tVal7, [pDst2], #+4                 ;// store {tVal7} at pDst2[0 to 3]
    340         M_STR    tVal6, [pDst],  dstStepx2           ;// store {tVal6} at pDst [4 to 7]
    341         M_STR    tVal7, [pDst2], dstStepx2           ;// store {tVal7} at pDst2[4 to 7]
    342         STR      tVal8, [pDst],  #+4                 ;// store {tVal6} at pDst [0 to 3]
    343         STR      tVal9, [pDst2], #+4                 ;// store {tVal7} at pDst2[0 to 3]
    344         M_STR    tVal8, [pDst],  dstStepx2           ;// store {tVal6} at pDst [4 to 7]
    345         M_STR    tVal9, [pDst2], dstStepx2           ;// store {tVal7} at pDst2[4 to 7]
    346         M_LDRB   tVal6, [pSrcLeft], +leftStepx2      ;// tVal6 = pSrcLeft[4]
    347         M_LDRB   tVal7, [pSrcLeft2],+leftStepx2      ;// tVal7 = pSrcLeft[5]
    348         M_LDRB   tVal8, [pSrcLeft], +leftStepx2      ;// tVal8 = pSrcLeft[6]
    349         M_LDRB   tVal9, [pSrcLeft2],+leftStepx2      ;// tVal9 = pSrcLeft[7]
    350         MUL      tVal6, tVal6, r0x01010101           ;// replicate the val in all the bytes
    351         MUL      tVal7, tVal7, r0x01010101           ;// replicate the val in all the bytes
    352         MUL      tVal8, tVal8, r0x01010101           ;// replicate the val in all the bytes
    353         MUL      tVal9, tVal9, r0x01010101           ;// replicate the val in all the bytes
    354         STR      tVal6, [pDst],  #+4                 ;// store {tVal6} at pDst [0 to 3]
    355         STR      tVal7, [pDst2], #+4                 ;// store {tVal7} at pDst2[0 to 3]
    356         M_STR    tVal6, [pDst],  dstStepx2           ;// store {tVal6} at pDst [4 to 7]
    357         M_STR    tVal7, [pDst2], dstStepx2           ;// store {tVal7} at pDst2[4 to 7]
    358         STR      tVal8, [pDst],  #+4                 ;// store {tVal6} at pDst [0 to 3]
    359         STR      tVal9, [pDst2], #+4                 ;// store {tVal7} at pDst2[0 to 3]
    360         M_STR    tVal8, [pDst],  dstStepx2           ;// store {tVal6} at pDst [4 to 7]
    361         M_STR    tVal9, [pDst2], dstStepx2           ;// store {tVal7} at pDst2[4 to 7]
    362         MOV      return, #OMX_Sts_NoErr
    363         M_EXIT
    364 
    365 OMX_VC_CHROMA_VERT
    366 
    367         ;// M_STALL ARM1136JS=4
    368 
    369         LDMIA    pSrcAbove, {tVal6,tVal7}            ;// tVal 6 to 7 = pSrcAbove[0 to 7]
    370         MOV      return, #OMX_Sts_NoErr
    371 
    372         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[0 to 7]   = tVal 6 to 7
    373         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[8 to 15]  = tVal 6 to 7
    374         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[16 to 23] = tVal 6 to 7
    375         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[24 to 31] = tVal 6 to 7
    376         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[32 to 39] = tVal 6 to 7
    377         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[40 to 47] = tVal 6 to 7
    378         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[48 to 55] = tVal 6 to 7
    379         M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[56 to 63] = tVal 6 to 7
    380 
    381         M_EXIT                                       ;// Macro to exit midway-break frm case
    382 
    383 OMX_VC_CHROMA_PLANE
    384 
    385         ;// M_STALL ARM1136JS=3
    386 
    387         RSB      tVal14, leftStep, leftStep, LSL #3  ;// 7*leftStep
    388         LDRB     tVal7, [pSrcAbove, #+7]             ;// pSrcAbove[7]
    389         LDRB     tVal6, [pSrcLeft, +tVal14]          ;// pSrcLeft[7*leftStep]
    390         LDRB     tVal8, [pSrcAboveLeft]              ;// pSrcAboveLeft[0]
    391         LDRB     tVal9, [pSrcAbove, #+6 ]            ;// pSrcAbove[6]
    392         LDRB     tVal10,[pSrcAbove]                  ;// pSrcAbove[0]
    393         ADD      tVal2, tVal7, tVal6                 ;// pSrcAbove[7] + pSrcLeft[7*leftStep]
    394         SUB      tVal6, tVal6, tVal8                 ;// V0 = pSrcLeft[7*leftStep] - pSrcAboveLeft[0]
    395         SUB      tVal7, tVal7, tVal8                 ;// H0 = pSrcAbove[7] - pSrcAboveLeft[0]
    396         LSL      tVal2, tVal2, #4                    ;// a = 16 * (pSrcAbove[15] + pSrcLeft[15*lS])
    397         ADD      tVal2, tVal2, #16                   ;// a + 16
    398         SUB      tVal9, tVal9,tVal10                 ;// pSrcAbove[6] - pSrcAbove[0]
    399         LDRB     tVal8, [pSrcAbove,#+5]              ;// pSrcAbove[5]
    400         LDRB     tVal10,[pSrcAbove,#+1]              ;// pSrcAbove[1]
    401         ADD      tVal9, tVal9, tVal9, LSL #1         ;// H1 = 3 * (pSrcAbove[6] - pSrcAbove[0])
    402         ADD      tVal7, tVal9, tVal7, LSL #2         ;// H = H1 + H0
    403         SUB      tVal8, tVal8, tVal10                ;// pSrcAbove[5] - pSrcAbove[1]
    404         LDRB     tVal9, [pSrcAbove,#+4]              ;// pSrcAbove[4]
    405         LDRB     tVal10,[pSrcAbove,#+2]              ;// pSrcAbove[2]
    406         ADD      tVal7, tVal7, tVal8, LSL #1         ;// H = H + H2
    407         SUB      tVal11, tVal14,leftStep             ;// 6*leftStep
    408         ADD      tVal11, pSrcLeft, tVal11            ;// pSrcLeft + 6*leftStep
    409         MOV      tVal12, pSrcLeft                    ;// pSrcLeft
    410         SUB      tVal9, tVal9, tVal10                ;// pSrcAbove[4] - pSrcAbove[2]
    411         ADD      tVal7, tVal7, tVal9                 ;// H = H + H3
    412         M_LDRB   tVal8, [tVal11],-leftStep           ;// pSrcLeft[6*leftStep]
    413         M_LDRB   tVal10,[tVal12],+leftStep           ;// pSrcLeft[0]
    414         ADD      tVal7, tVal7, tVal7, LSL #4         ;// 17 * H
    415         ADD      tVal7, tVal7, #16                   ;// 17 * H + 16
    416         SUB      tVal8, tVal8, tVal10                ;// pSrcLeft[6*leftStep] - pSrcLeft[0]
    417         ASR      b, tVal7, #5                        ;// b = (17 * H + 16) >> 5
    418         ADD      tVal8, tVal8, tVal8, LSL #1         ;// V1 = 3 * (pSrcLeft[6*leftStep] - pSrcLeft[0])
    419         ADD      tVal6, tVal8, tVal6, LSL #2         ;// V = V0 +V1
    420         M_LDRB   tVal8, [tVal11],-leftStep           ;// pSrcLeft[5*leftStep]
    421         M_LDRB   tVal10,[tVal12],+leftStep           ;// pSrcLeft[leftStep]
    422         ADD      tVal7, b, b, LSL #1                 ;// 3*b
    423         SUB      tVal2, tVal2, tVal7                 ;// a + 16 - 3*b
    424         SUB      tVal7, tVal8, tVal10                ;// pSrcLeft[5*leftStep] - pSrcLeft[leftStep]
    425         M_LDRB   tVal8, [tVal11],-leftStep           ;// pSrcLeft[4*leftStep]
    426         M_LDRB   tVal10,[tVal12],+leftStep           ;// pSrcLeft[2*leftStep]
    427         ADD      tVal6, tVal6, tVal7, LSL #1         ;// V = V + V2
    428         LDR      r0x00FF00FF, =MASK_CONST            ;// r0x00FF00FF = 0x00FF00FF
    429         SUB      tVal7, tVal8, tVal10                ;// pSrcLeft[4*leftStep] - pSrcLeft[2*leftStep]
    430         ADD      tVal6, tVal6, tVal7                 ;// V = V + V7
    431         SUB      dstStep, dstStep, #4                ;// dstStep - 4
    432         ADD      tVal6, tVal6, tVal6, LSL #4         ;// 17*V
    433         ADD      tVal6, tVal6, #16                   ;// 17*V + 16
    434 
    435         ;// M_STALL ARM1136JS=1
    436 
    437         ASR      c, tVal6, #5                        ;// c = (17*V + 16)>>5
    438 
    439         ;// M_STALL ARM1136JS=1
    440 
    441         ADD      tVal6, c, c, LSL #1                 ;// 3*c
    442         UXTH     c, c                                ;// only in half word
    443         SUB      tVal6, tVal2, tVal6                 ;// a - 3*b - 3*c + 16
    444         ORR      c, c, c, LSL #16                    ;// c c
    445         ADD      tVal7, b, b                         ;// 2b
    446         ADD      tVal2, tVal6, tVal7                 ;// pp2 = d + 2*b
    447         ADD      tVal7, tVal7, b                     ;// 3b
    448         ORR      p2p0,   tVal6,  tVal2,  LSL #16     ;// p2p0   = pack {p2, p0}
    449         UXTH     b, b
    450         UXTH     tVal7, tVal7
    451         ORR      b, b, b, LSL #16                    ;// {b,b}
    452         ORR      tVal7, tVal7, tVal7, LSL #16        ;// {3b,3b}
    453         SADD16   p3p1,   p2p0, b                     ;// p3p1   = p2p0 + {b,b}
    454         SADD16   p6p4,   p3p1, tVal7                 ;// p6p4   = p3p1 + {3b,3b}
    455         SADD16   p7p5,   p6p4, b                     ;// p7p5   = p6p4 + {b,b}
    456         MOV      outerCount, #BLK_SIZE               ;// Outer Loop Count
    457 
    458 LOOP_PLANE
    459 
    460         USAT16   p7p5,   #13, p7p5                    ;// clip13(p7) clip13(p5)
    461         USAT16   p6p4,   #13, p6p4                    ;// clip13(p6) clip13(p4)
    462         USAT16   p3p1,   #13, p3p1                    ;// clip13(p3) clip13(p1)
    463         USAT16   p2p0,   #13, p2p0                    ;// clip13(p2) clip13(p0)
    464 
    465         AND      pp7pp5, r0x00FF00FF, p7p5, ASR #5    ;// clip8(p7) clip8(p5)
    466         AND      pp6pp4, r0x00FF00FF, p6p4, ASR #5    ;// clip8(p6) clip8(p4)
    467         AND      pp3pp1, r0x00FF00FF, p3p1, ASR #5    ;// clip8(p3) clip8(p1)
    468         AND      pp2pp0, r0x00FF00FF, p2p0, ASR #5    ;// clip8(p2) clip8(p0)
    469 
    470         SUBS     outerCount, outerCount, #1           ;// outerCount--
    471 
    472         ORR      p3210, pp2pp0, pp3pp1, LSL #8        ;// pack {p3,p2, p1, p0}
    473         STR      p3210, [pDst], #4                    ;// store {pDst[0] to pDst[3]}
    474 
    475         ORR      p7654, pp6pp4, pp7pp5, LSL #8        ;// pack {p7,p6, p5, p4}
    476         M_STR    p7654, [pDst], dstStep               ;// store {pDst[4] to pDst[7]}
    477 
    478         SADD16   p7p5,   p7p5,   c                    ;// {p7 + c}, {p5 + c}
    479         SADD16   p6p4,   p6p4,   c                    ;// {p6 + c}, {p4 + c}
    480         SADD16   p3p1,   p3p1,   c                    ;// {p3 + c}, {p1 + c}
    481         SADD16   p2p0,   p2p0,   c                    ;// {p2 + c}, {p0 + c}
    482 
    483         BNE      LOOP_PLANE                           ;// Loop for 8 times
    484         MOV      return, #OMX_Sts_NoErr
    485         M_END
    486 
    487         ENDIF ;// ARM1136JS
    488 
    489 
    490 
    491         END
    492 ;//-----------------------------------------------------------------------------------------------
    493 ;// omxVCM4P10_PredictIntraChroma_8x8 ends
    494 ;//-----------------------------------------------------------------------------------------------
    495