Home | History | Annotate | Download | only in src
      1 ;//
      2 ;// Copyright (C) 2007-2008 ARM Limited
      3 ;//
      4 ;// Licensed under the Apache License, Version 2.0 (the "License");
      5 ;// you may not use this file except in compliance with the License.
      6 ;// You may obtain a copy of the License at
      7 ;//
      8 ;//      http://www.apache.org/licenses/LICENSE-2.0
      9 ;//
     10 ;// Unless required by applicable law or agreed to in writing, software
     11 ;// distributed under the License is distributed on an "AS IS" BASIS,
     12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 ;// See the License for the specific language governing permissions and
     14 ;// limitations under the License.
     15 ;//
     16 ;//
     17 ;//
     18 ;// File Name:  omxVCM4P10_PredictIntra_4x4_s.s
     19 ;// OpenMAX DL: v1.0.2
     20 ;// Revision:   9641
     21 ;// Date:       Thursday, February 7, 2008
     22 ;//
     23 ;//
     24 ;//
     25 ;//
     26 
     27 
     28         INCLUDE omxtypes_s.h
     29         INCLUDE armCOMM_s.h
     30 
     31 ;// Define the processor variants supported by this file
     32 
     33          M_VARIANTS ARM1136JS
     34 
     35 ;//-------------------------------------------------------
     36 ;// This table for implementing switch case of C in asm by
     37 ;// the mehtod of two levels of indexing.
     38 ;//-------------------------------------------------------
     39 
     40     M_TABLE armVCM4P10_pSwitchTable4x4
     41     DCD  OMX_VC_4x4_VERT,     OMX_VC_4x4_HOR
     42     DCD  OMX_VC_4x4_DC,       OMX_VC_4x4_DIAG_DL
     43     DCD  OMX_VC_4x4_DIAG_DR,  OMX_VC_4x4_VR
     44     DCD  OMX_VC_4x4_HD,       OMX_VC_4x4_VL
     45     DCD  OMX_VC_4x4_HU
     46 
     47     IF ARM1136JS
     48 
     49 ;//--------------------------------------------
     50 ;// Constants
     51 ;//--------------------------------------------
     52 BLK_SIZE              EQU 0x8
     53 MUL_CONST0            EQU 0x01010101
     54 ADD_CONST1            EQU 0x80808080
     55 
     56 ;//--------------------------------------------
     57 ;// Scratch variable
     58 ;//--------------------------------------------
     59 return          RN 0
     60 pTable          RN 9
     61 pc              RN 15
     62 r0x01010101     RN 1
     63 r0x80808080     RN 0
     64 
     65 tVal0           RN 0
     66 tVal1           RN 1
     67 tVal2           RN 2
     68 tVal4           RN 4
     69 tVal6           RN 6
     70 tVal7           RN 7
     71 tVal8           RN 8
     72 tVal9           RN 9
     73 tVal10          RN 10
     74 tVal11          RN 11
     75 tVal12          RN 12
     76 tVal14          RN 14
     77 
     78 Out0            RN 6
     79 Out1            RN 7
     80 Out2            RN 8
     81 Out3            RN 9
     82 
     83 Left0           RN 6
     84 Left1           RN 7
     85 Left2           RN 8
     86 Left3           RN 9
     87 
     88 Above0123       RN 12
     89 Above4567       RN 14
     90 
     91 AboveLeft       RN 10
     92 
     93 ;//--------------------------------------------
     94 ;// Declare input registers
     95 ;//--------------------------------------------
     96 pSrcLeft        RN 0    ;// input pointer
     97 pSrcAbove       RN 1    ;// input pointer
     98 pSrcAboveLeft   RN 2    ;// input pointer
     99 pDst            RN 3    ;// output pointer
    100 leftStep        RN 4    ;// input variable
    101 dstStep         RN 5    ;// input variable
    102 predMode        RN 6    ;// input variable
    103 availability    RN 7    ;// input variable
    104 
    105 ;//-----------------------------------------------------------------------------------------------
    106 ;// omxVCM4P10_PredictIntra_4x4 starts
    107 ;//-----------------------------------------------------------------------------------------------
    108 
    109         ;// Write function header
    110         M_START omxVCM4P10_PredictIntra_4x4, r11
    111 
    112         ;// Define stack arguments
    113         M_ARG    LeftStep,     4
    114         M_ARG    DstStep,      4
    115         M_ARG    PredMode,     4
    116         M_ARG    Availability, 4
    117 
    118         ;// M_STALL ARM1136JS=4
    119 
    120         LDR      pTable,=armVCM4P10_pSwitchTable4x4  ;// Load index table for switch case
    121 
    122         ;// Load argument from the stack
    123         M_LDR    predMode, PredMode                  ;// Arg predMode loaded from stack to reg
    124         M_LDR    leftStep, LeftStep                  ;// Arg leftStep loaded from stack to reg
    125         M_LDR    dstStep,  DstStep                   ;// Arg dstStep loaded from stack to reg
    126         M_LDR    availability, Availability          ;// Arg availability loaded from stack to reg
    127 
    128         LDR      pc, [pTable, predMode, LSL #2]      ;// Branch to the case based on preMode
    129 
    130 OMX_VC_4x4_VERT
    131 
    132         LDR      Above0123,  [pSrcAbove]             ;// Above0123 = pSrcAbove[0 to 3]
    133         M_STR    Above0123,  [pDst],  dstStep        ;// pDst[0  to 3]  = Above0123
    134         M_STR    Above0123,  [pDst],  dstStep        ;// pDst[4  to 7]  = Above0123
    135         M_STR    Above0123,  [pDst],  dstStep        ;// pDst[8  to 11] = Above0123
    136         STR      Above0123,  [pDst]                  ;// pDst[12 to 15] = Above0123
    137         MOV      return, #OMX_Sts_NoErr
    138         M_EXIT                                      ;// Macro to exit midway-break frm case
    139 
    140 OMX_VC_4x4_HOR
    141 
    142         ;// M_STALL ARM1136JS=6
    143 
    144         LDR      r0x01010101,  =MUL_CONST0           ;// Const to repeat the byte in reg 4 times
    145         M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
    146         M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
    147         M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
    148         LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
    149         MUL      Out0,   Left0,   r0x01010101        ;// replicate the val in all the bytes
    150         MUL      Out1,   Left1,   r0x01010101        ;// replicate the val in all the bytes
    151         MUL      Out2,   Left2,   r0x01010101        ;// replicate the val in all the bytes
    152         MUL      Out3,   Left3,   r0x01010101        ;// replicate the val in all the bytes
    153         M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
    154         M_STR    Out1,   [pDst],  dstStep            ;// store {Out1} at pDst [4  to 7 ]
    155         M_STR    Out2,   [pDst],  dstStep            ;// store {Out2} at pDst [8  to 11]
    156         STR      Out3,   [pDst]                      ;// store {Out3} at pDst [12 to 15]
    157         MOV      return, #OMX_Sts_NoErr
    158         M_EXIT                                       ;// Macro to exit midway-break frm case
    159 
    160 OMX_VC_4x4_DC
    161 
    162         ;// M_STALL ARM1136JS=6
    163 
    164         AND      availability,  availability,  #(OMX_VC_UPPER + OMX_VC_LEFT)
    165         CMP      availability,  #(OMX_VC_UPPER + OMX_VC_LEFT)
    166         BNE      UpperOrLeftOrNoneAvailable          ;// Jump to Upper if not both
    167         LDR      Above0123,  [pSrcAbove]             ;// Above0123  = pSrcAbove[0 to 3]
    168 
    169         ;// M_STALL ARM1136JS=1
    170 
    171         UXTB16   tVal7,  Above0123                   ;// pSrcAbove[0, 2]
    172         UXTB16   tVal6,  Above0123,  ROR #8          ;// pSrcAbove[1, 3]
    173         UADD16   tVal11, tVal6,   tVal7              ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
    174         M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
    175         M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
    176         ADD      tVal11, tVal11,  LSR #16            ;// sum(pSrcAbove[0] to pSrcAbove[3])
    177         M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
    178         LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
    179         UXTH     tVal11, tVal11                      ;// upsum1 (Clear the top junk bits)
    180         ADD      tVal6,  Left0,  Left1               ;// tVal6 = Left0 + Left1
    181         ADD      tVal7,  Left2,  Left3               ;// tVal7 = Left2 + Left3
    182         ADD      tVal6,  tVal6,  tVal7               ;// tVal6 = tVal6 + tVal7
    183         ADD      Out0,   tVal6,  tVal11              ;// Out0  = tVal6 + tVal11
    184         ADD      Out0,   Out0,   #4                  ;// Out0  = Out0 + 4
    185         LDR      r0x01010101,   =MUL_CONST0          ;// 0x01010101
    186         MOV      Out0,   Out0,  LSR #3               ;// Out0 = (Out0 + 4)>>3
    187 
    188         ;// M_STALL ARM1136JS=1
    189 
    190         MUL      Out0,   Out0,  r0x01010101          ;// replicate the val in all the bytes
    191 
    192         ;// M_STALL ARM1136JS=1
    193 
    194         MOV      return,  #OMX_Sts_NoErr
    195         M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
    196         M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [4  to 7 ]
    197         M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [8  to 11]
    198         STR      Out0,   [pDst]                      ;// store {Out0} at pDst [12 to 15]
    199         M_EXIT                                       ;// Macro to exit midway-break frm case
    200 
    201 UpperOrLeftOrNoneAvailable
    202         ;// M_STALL ARM1136JS=3
    203 
    204         CMP      availability,  #OMX_VC_UPPER        ;// if(availability & #OMX_VC_UPPER)
    205         BNE      LeftOrNoneAvailable                 ;// Jump to Left if not upper
    206         LDR      Above0123,  [pSrcAbove]             ;// Above0123  = pSrcAbove[0 to 3]
    207 
    208         ;// M_STALL ARM1136JS=3
    209 
    210         UXTB16   tVal7,  Above0123                   ;// pSrcAbove[0, 2]
    211         UXTB16   tVal6,  Above0123,  ROR #8          ;// pSrcAbove[1, 3]
    212         UADD16   Out0,   tVal6,  tVal7               ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
    213         LDR      r0x01010101,   =MUL_CONST0          ;// 0x01010101
    214         ADD      Out0,   Out0,   LSR #16             ;// sum(pSrcAbove[0] to pSrcAbove[3])
    215 
    216         ;// M_STALL ARM1136JS=1
    217 
    218         UXTH     Out0,   Out0                        ;// upsum1 (Clear the top junk bits)
    219         ADD      Out0,   Out0,   #2                  ;// Out0  = Out0 + 2
    220 
    221         ;// M_STALL ARM1136JS=1
    222 
    223         MOV      Out0,   Out0,   LSR #2              ;// Out0  = (Out0 + 2)>>2
    224 
    225         ;// M_STALL ARM1136JS=1
    226 
    227         MUL      Out0,   Out0,   r0x01010101         ;// replicate the val in all the bytes
    228 
    229         ;// M_STALL ARM1136JS=1
    230 
    231         MOV      return, #OMX_Sts_NoErr
    232         M_STR    Out0,   [pDst],  dstStep            ;// store {tVal6} at pDst [0  to 3 ]
    233         M_STR    Out0,   [pDst],  dstStep            ;// store {tVal6} at pDst [4  to 7 ]
    234         M_STR    Out0,   [pDst],  dstStep            ;// store {tVal6} at pDst [8  to 11]
    235         STR      Out0,   [pDst]                      ;// store {tVal6} at pDst [12 to 15]
    236 
    237         M_EXIT                                       ;// Macro to exit midway-break frm case
    238 
    239 LeftOrNoneAvailable
    240         ;// M_STALL ARM1136JS=3
    241 
    242         LDR      r0x01010101,   =MUL_CONST0          ;// 0x01010101
    243         CMP      availability, #OMX_VC_LEFT
    244         BNE      NoneAvailable
    245         M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
    246         M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
    247         M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
    248         LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
    249         ADD      Out0,   Left0,  Left1               ;// Out0  = Left0 + Left1
    250 
    251         ;// M_STALL ARM1136JS=1
    252 
    253         ADD      Out1,   Left2,  Left3               ;// Out1  = Left2 + Left3
    254         ADD      Out0,   Out0,   Out1                ;// Out0  = Out0  + Out1
    255         ADD      Out0,   Out0,   #2                  ;// Out0  = Out0 + 2
    256 
    257         ;// M_STALL ARM1136JS=1
    258 
    259         MOV      Out0,   Out0,   LSR #2              ;// Out0  = (Out0 + 2)>>2
    260 
    261         ;// M_STALL ARM1136JS=1
    262 
    263         MUL      Out0,   Out0,   r0x01010101         ;// replicate the val in all the bytes
    264 
    265         ;// M_STALL ARM1136JS=1
    266 
    267         MOV      return, #OMX_Sts_NoErr
    268         M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
    269         M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [4  to 7 ]
    270         M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [8  to 11]
    271         STR      Out0,   [pDst]                      ;// store {Out0} at pDst [12 to 15]
    272         M_EXIT                                       ;// Macro to exit midway-break frm case
    273 
    274 NoneAvailable
    275         MOV      Out0,   #128                        ;// Out0 = 128 if(count == 0)
    276 
    277         ;// M_STALL ARM1136JS=5
    278 
    279         MUL      Out0,   Out0,  r0x01010101          ;// replicate the val in all the bytes
    280 
    281         ;// M_STALL ARM1136JS=1
    282 
    283         MOV      return, #OMX_Sts_NoErr
    284         M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
    285         M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [4  to 7 ]
    286         M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [8  to 11]
    287         STR      Out0,   [pDst]                      ;// store {Out0} at pDst [12 to 15]
    288         M_EXIT                                       ;// Macro to exit midway-break frm case
    289 
    290 OMX_VC_4x4_DIAG_DL
    291 
    292         ;//------------------------------------------------------------------
    293         ;// f = (a+2*b+c+2)>>2
    294         ;// Calculate as:
    295         ;// d = (a + c )>>1
    296         ;// e = (d - b')>>1
    297         ;// f = e + 128
    298         ;//------------------------------------------------------------------
    299 
    300         ;// M_STALL ARM1136JS=3
    301 
    302         TST      availability, #OMX_VC_UPPER_RIGHT
    303         LDMIA    pSrcAbove,  {Above0123, Above4567}  ;// Above0123, Above4567 = pSrcAbove[0 to 7]
    304         LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
    305         BNE      DLUpperRightAvailable
    306         LDR      r0x01010101,  =MUL_CONST0           ;// 0x01010101
    307         MOV      tVal7,  Above0123,  LSR #24         ;// {00,  00,  00,  U3 }
    308         MOV      tVal11, tVal7,  LSL #24             ;// {U3,  00,  00,  00 }
    309         MUL      Out3,   tVal7,  r0x01010101         ;// {U3,  U3,  U3,  U3 }
    310         MOV      tVal8,  Above0123,  LSR #16         ;// {00,  00,  U3,  U2 }
    311         MOV      tVal10, Above0123,  LSR #8          ;// {00,  U3,  U2,  U1 }
    312         MVN      tVal10, tVal10                      ;// {00', U3', U2', U1'}
    313         UHADD8   tVal8,  tVal8,  Above0123           ;// {xx,  xx,  d1,  d0 }
    314         UHADD8   tVal6,  Above0123,  tVal9           ;// {xx,  d2,  xx,  xx }
    315         UHSUB8   tVal8,  tVal8,  tVal10              ;// {xx,  xx,  e1,  e0 }
    316         UHSUB8   tVal6,  tVal6,  tVal10              ;// {xx,  e2,  xx,  xx }
    317         UADD8    tVal8,  tVal8,  r0x80808080         ;// {xx,  xx,  f1,  f0 }
    318         UADD8    tVal6,  tVal6,  r0x80808080         ;// {xx,  f2,  xx,  xx }
    319 
    320         ;// M_STALL ARM1136JS=1
    321 
    322         PKHBT    tVal6,  tVal8,  tVal6               ;// {xx,  f2,  f1,  f0 }
    323         BIC      tVal6,  tVal6,  #0xFF000000         ;// {00,  f2,  f1,  f0 }
    324         ORR      Out0,   tVal6,  tVal11              ;// {U3,  f2,  f1,  f0 }
    325 
    326         ;// M_STALL ARM1136JS=1
    327 
    328         PKHTB    Out1,   Out3,   Out0,  ASR #8       ;// {U3,  U3,  f2,  f1 }
    329         MOV      return, #OMX_Sts_NoErr
    330         PKHTB    Out2,   Out3,   Out1,  ASR #8       ;// {U3,  U3,  U3,  f2 }
    331 
    332         M_STR    Out0,   [pDst], dstStep             ;// store {f3 to f0} at pDst[3  to 0 ]
    333         M_STR    Out1,   [pDst], dstStep             ;// store {f4 to f1} at pDst[7  to 4 ]
    334         M_STR    Out2,   [pDst], dstStep             ;// store {f5 to f2} at pDst[11 to 8 ]
    335         STR      Out3,   [pDst]                      ;// store {f6 to f3} at pDSt[15 to 12]
    336         M_EXIT                                       ;// Macro to exit midway-break frm case
    337 
    338 DLUpperRightAvailable
    339 
    340         MOV      tVal8,  Above0123,  LSR #24         ;// {00,  00,  00,  U3 }
    341         MOV      tVal9,  Above0123,  LSR #16         ;// {00,  00,  U3,  U2 }
    342         MOV      tVal10, Above0123,  LSR #8          ;// {00,  U3,  U2,  U1 }
    343         ORR      tVal8,  tVal8,  Above4567, LSL #8   ;// {U6,  U5,  U4,  U3 }
    344         ORR      tVal10, tVal10, Above4567, LSL #24  ;// {U4,  U3,  U2,  U1 }
    345         PKHBT    tVal9,  tVal9,  Above4567, LSL #16  ;// {U5,  U4,  U3,  U2 }
    346         MVN      tVal1,  tVal8                       ;// {U6', U5', U4', U3'}
    347         MVN      tVal10, tVal10                      ;// {U4', U3', U2', U1'}
    348         MVN      tVal2,  Above4567                   ;// {U7', U6', U5', U4'}
    349         UHADD8   tVal6,  Above0123,  tVal9           ;// {d3,  d2,  d1,  d0 }
    350         UHADD8   tVal9,  tVal9,  Above4567           ;// {d5,  d4,  d3,  d2 }
    351         UHADD8   tVal8,  Above4567,  tVal8           ;// {d6,  xx,  xx,  xx }
    352         UHSUB8   tVal6,  tVal6,  tVal10              ;// {e3,  e2,  e1,  e0 }
    353         UHSUB8   tVal12, tVal9,  tVal1               ;// {e5,  e4,  e3,  e2 }
    354         UHSUB8   tVal8,  tVal8,  tVal2               ;// {e6,  xx,  xx,  xx }
    355         UADD8    Out0,   tVal6,  r0x80808080         ;// {f3,  f2,  f1,  f0 }
    356         UADD8    tVal9,  tVal8,  r0x80808080         ;// {f6,  xx,  xx,  xx }
    357         UADD8    Out2,   tVal12, r0x80808080         ;// {f5,  f4,  f3,  f2 }
    358         MOV      tVal7,  Out0,   LSR #8              ;// {00,  f3,  f2,  f1 }
    359         AND      tVal9,  tVal9,  #0xFF000000         ;// {f6,  00,  00,  00 }
    360         PKHBT    Out1,   tVal7,  Out2,  LSL #8       ;// {f4,  f3,  f2,  f1 }
    361         ORR      Out3,   tVal9,  Out2,  LSR #8       ;// {f6,  f5,  f4,  f3 }
    362         M_STR    Out0,   [pDst], dstStep             ;// store {f3 to f0} at pDst[3  to 0 ]
    363         M_STR    Out1,   [pDst], dstStep             ;// store {f4 to f1} at pDst[7  to 4 ]
    364         M_STR    Out2,   [pDst], dstStep             ;// store {f5 to f2} at pDst[11 to 8 ]
    365         STR      Out3,   [pDst]                      ;// store {f6 to f3} at pDSt[15 to 12]
    366         MOV      return, #OMX_Sts_NoErr
    367         M_EXIT                                       ;// Macro to exit midway-break frm case
    368 
    369 
    370 OMX_VC_4x4_DIAG_DR
    371 
    372         ;// M_STALL ARM1136JS=4
    373 
    374         M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
    375         M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
    376         M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
    377         LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
    378         LDRB     AboveLeft, [pSrcAboveLeft]          ;// AboveLeft = pSrcAboveLeft[0]
    379         ORR      tVal7,  Left1,  Left0,  LSL #8      ;// tVal7 = 00 00 L0 L1
    380         LDR      Above0123,  [pSrcAbove]             ;// Above0123 = U3 U2 U1 U0
    381         LDR      r0x80808080, =ADD_CONST1            ;// 0x80808080
    382         ORR      tVal8,  Left3,  Left2,  LSL #8      ;// tVal8 = 00 00 L2 L3
    383         PKHBT    tVal7,  tVal8,  tVal7,  LSL #16     ;// tVal7 = L0 L1 L2 L3
    384         MOV      tVal8,  Above0123,  LSL #8          ;// tVal8 = U2 U1 U0 00
    385         MOV      tVal9,  tVal7,  LSR #8              ;// tVal9 = 00 L0 L1 L2
    386         ORR      tVal8,  tVal8,  AboveLeft           ;// tVal8 = U2 U1 U0 UL
    387         ORR      tVal9,  tVal9,  AboveLeft, LSL #24  ;// tVal9 = UL L0 L1 L2
    388         MOV      tVal10, Above0123,  LSL #24         ;// tVal10= U0 00 00 00
    389         UXTB     tVal11, tVal7,  ROR #24             ;// tVal11= 00 00 00 L0
    390         ORR      tVal10, tVal10, tVal9,  LSR #8      ;// tVal10= U0 UL L0 L1
    391         ORR      tVal11, tVal11, tVal8,  LSL #8      ;// tVal11= U1 U0 UL L0
    392         UHADD8   tVal11, Above0123,  tVal11          ;// tVal11= d1 d0 dL g0
    393         UHADD8   tVal10, tVal7,  tVal10              ;// tVal10= g0 g1 g2 g3
    394         MVN      tVal8,  tVal8                       ;// tVal8 = U2'U1'U0'UL'
    395         MVN      tVal9,  tVal9                       ;// tVal9 = UL'L0'L1'L2'
    396         UHSUB8   tVal11, tVal11, tVal8               ;// tVal11= e1 e0 eL h0
    397         UHSUB8   tVal10, tVal10, tVal9               ;// tVal10= h0 h1 h2 h3
    398         UADD8    Out3,   tVal10, r0x80808080         ;// Out3  = i0 i1 i2 i3
    399         UADD8    Out0,   tVal11, r0x80808080         ;// Out0  = f1 f0 fL i0
    400         UXTH     tVal11, Out3,   ROR #8              ;// tVal11= 00 00 i1 i2
    401         MOV      tVal7,  Out0,   LSL #8              ;// tVal7 = f0 fL i0 00
    402         ORR      Out1,   tVal7,  tVal11,  LSR #8     ;// Out1  = f0 fL i0 i1
    403         PKHBT    Out2,   tVal11, Out0,    LSL #16    ;// Out2  = fL i0 i1 i2
    404         M_STR    Out0,   [pDst], dstStep             ;// store {f1 to i0} at pDst[3  to 0 ]
    405         M_STR    Out1,   [pDst], dstStep             ;// store {f0 to i1} at pDst[7  to 4 ]
    406         M_STR    Out2,   [pDst], dstStep             ;// store {fL to i2} at pDst[11 to 8 ]
    407         STR      Out3,   [pDst]                      ;// store {i0 to i3} at pDst[15 to 12]
    408         MOV      return,  #OMX_Sts_NoErr
    409         M_EXIT                                       ;// Macro to exit midway-break frm case
    410 
    411 OMX_VC_4x4_VR
    412 
    413         ;// M_STALL ARM1136JS=4
    414 
    415         LDR      Above0123,  [pSrcAbove]             ;// Above0123 = U3 U2 U1 U0
    416         LDRB     AboveLeft,  [pSrcAboveLeft]         ;// AboveLeft = 00 00 00 UL
    417         M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0     = 00 00 00 L0
    418         M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1     = 00 00 00 L1
    419         LDRB     Left2,  [pSrcLeft]                  ;// Left2     = 00 00 00 L2
    420         MOV      tVal0,  Above0123,  LSL #8          ;// tVal0     = U2 U1 U0 00
    421         MOV      tVal9,  Above0123                   ;// tVal9     = U3 U2 U1 U0
    422         ORR      tVal14, tVal0,   AboveLeft          ;// tVal14    = U2 U1 U0 UL
    423         MVN      tVal11, tVal14                      ;// tVal11    = U2'U1'U0'UL'
    424         MOV      tVal2,  tVal14,  LSL #8             ;// tVal2     = U1 U0 UL 00
    425         UHSUB8   tVal1,  Above0123,  tVal11          ;// tVal1     = d2 d1 d0 dL
    426         UHADD8   tVal10, AboveLeft, Left1            ;// tVal10    = 00 00 00 j1
    427         MVN      tVal4,  Left0                       ;// tVal4     = 00 00 00 L0'
    428         UHSUB8   tVal4,  tVal10,  tVal4              ;// tVal4     = 00 00 00 k1
    429         ORR      tVal12, tVal0,   Left0              ;// tVal12    = U2 U1 U0 L0
    430         ORR      tVal14, tVal2,   Left0              ;// tVal14    = U1 U0 UL L0
    431         LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
    432         UHADD8   tVal10, tVal9,   tVal14             ;// tVal10    = g3 g2 g1 g0
    433         UADD8    Out0,   tVal1,   r0x80808080        ;// Out0      = e2 e1 e0 eL
    434         UHSUB8   tVal10, tVal10,  tVal11             ;// tVal10    = h3 h2 h1 h0
    435         M_STR    Out0,   [pDst],  dstStep            ;// store {e2 to eL} at pDst[3  to 0 ]
    436         MOV      tVal1,  tVal14,  LSL #8             ;// tVal1     = U0 UL L0 00
    437         MOV      tVal6,  Out0,    LSL #8             ;// tVal6     = e1 e0 eL 00
    438         ORR      tVal2,  tVal2,   Left1              ;// tVal2     = U1 U0 UL L1
    439         UADD8    tVal4,  tVal4,   r0x80808080        ;// tVal4     = 00 00 00 l1
    440         UADD8    Out1,   tVal10,  r0x80808080        ;// Out1      = i3 i2 i1 i0
    441         MVN      tVal2,  tVal2                       ;// tVal14    = U1'U0'UL'L1'
    442         ORR      tVal1,  tVal1,   Left2              ;// tVal1     = U0 UL L0 L2
    443         ORR      Out2,   tVal6,   tVal4              ;// Out2      = e1 e0 eL l1
    444         UHADD8   tVal1,  tVal1,   tVal12             ;// tVal1     = g2 g1 g0 j2
    445         M_STR    Out1,   [pDst],  dstStep            ;// store {i3 to i0} at pDst[7  to 4 ]
    446         M_STR    Out2,   [pDst],  dstStep            ;// store {e1 to l1} at pDst[11 to 8 ]
    447         UHSUB8   tVal9,  tVal1,   tVal2              ;// tVal9     = h2 h1 h0 k2
    448         UADD8    Out3,   tVal9,   r0x80808080        ;// Out3      = i2 i1 i0 l2
    449         STR      Out3,   [pDst]                      ;// store {i2 to l2} at pDst[15 to 12]
    450         MOV      return,  #OMX_Sts_NoErr
    451         M_EXIT                                       ;// Macro to exit midway-break frm case
    452 
    453 OMX_VC_4x4_HD
    454 
    455         ;// M_STALL ARM1136JS=4
    456 
    457         LDR      Above0123,  [pSrcAbove]             ;// Above0123 = U3 U2 U1 U0
    458         LDRB     AboveLeft,  [pSrcAboveLeft]         ;// AboveLeft = 00 00 00 UL
    459         M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = 00 00 00 L0
    460         M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = 00 00 00 L1
    461         M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = 00 00 00 L2
    462         LDRB     Left3,  [pSrcLeft]                  ;// Left3 = 00 00 00 L3
    463         LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
    464         ORR      tVal2,  AboveLeft, Above0123, LSL #8;// tVal2 = U2 U1 U0 UL
    465         MVN      tVal1,  Left0                       ;// tVal1 = 00 00 00 L0'
    466         ORR      tVal4,  Left0,  tVal2,  LSL #8      ;// tVal4 = U1 U0 UL L0
    467         MVN      tVal2,  tVal2                       ;// tVal2 = U2'U1'U0'UL'
    468         UHADD8   tVal4,  tVal4,  Above0123           ;// tVal4 = g3 g2 g1 g0
    469         UHSUB8   tVal1,  AboveLeft,  tVal1           ;// tVal1 = 00 00 00 dL
    470         UHSUB8   tVal4,  tVal4,  tVal2               ;// tVal4 = h3 h2 h1 h0
    471         UADD8    tVal1,  tVal1,  r0x80808080         ;// tVal1 = 00 00 00 eL
    472         UADD8    tVal4,  tVal4,  r0x80808080         ;// tVal4 = i3 i2 i1 i0
    473         ORR      tVal2,  Left0,  AboveLeft,  LSL #16 ;// tVal2 = 00 UL 00 L0
    474         MOV      tVal4,  tVal4,  LSL #8              ;// tVal4 = i2 i1 i0 00
    475         ORR      tVal11, Left1,  Left0,  LSL #16     ;// tVal11= 00 L0 00 L1
    476         ORR      tVal7,  Left2,  Left1,  LSL #16     ;// tVal7 = 00 L1 00 L2
    477         ORR      tVal10, Left3,  Left2,  LSL #16     ;// tVal10= 00 L2 00 L3
    478         ORR      Out0,   tVal4,  tVal1               ;// Out0  = i2 i1 i0 eL
    479         M_STR    Out0,   [pDst], dstStep             ;// store {Out0}  at pDst [0  to 3 ]
    480         MOV      tVal4,  Out0,   LSL #16             ;// tVal4 = i1 i0 00 00
    481         UHADD8   tVal2,  tVal2,  tVal7               ;// tVal2 = 00 j1 00 j2
    482         UHADD8   tVal6,  tVal11, tVal10              ;// tVal11= 00 j2 00 j3
    483         MVN      tVal12, tVal11                      ;// tVal12= 00 L0'00 L1'
    484         MVN      tVal14, tVal7                       ;// tVal14= 00 L1'00 L2'
    485         UHSUB8   tVal2,  tVal2,  tVal12              ;// tVal2 = 00 k1 00 k2
    486         UHSUB8   tVal8,  tVal7,  tVal12              ;// tVal8 = 00 d1 00 d2
    487         UHSUB8   tVal11, tVal6,  tVal14              ;// tVal11= 00 k2 00 k3
    488         UHSUB8   tVal9,  tVal10, tVal14              ;// tVal9 = 00 d2 00 d3
    489         UADD8    tVal2,  tVal2,  r0x80808080         ;// tVal2 = 00 l1 00 l2
    490         UADD8    tVal8,  tVal8,  r0x80808080         ;// tVal8 = 00 e1 00 e2
    491         UADD8    tVal11, tVal11, r0x80808080         ;// tVal11= 00 l2 00 l3
    492         UADD8    tVal9,  tVal9,  r0x80808080         ;// tVal9 = 00 e2 00 e3
    493         ORR      Out2,   tVal8,  tVal2,  LSL #8      ;// Out2  = l1 e1 l2 e2
    494         ORR      Out3,   tVal9,  tVal11, LSL #8      ;// Out3  = l2 e2 l3 e3
    495         PKHTB    Out1,   tVal4,  Out2,   ASR #16     ;// Out1  = i1 i0 l1 e1
    496         M_STR    Out1,   [pDst], dstStep             ;// store {Out1}  at pDst [4  to 7 ]
    497         M_STR    Out2,   [pDst], dstStep             ;// store {Out2}  at pDst [8  to 11]
    498         STR      Out3,   [pDst]                      ;// store {Out3}  at pDst [12 to 15]
    499         MOV      return,  #OMX_Sts_NoErr
    500         M_EXIT                                       ;// Macro to exit midway-break frm case
    501 
    502 OMX_VC_4x4_VL
    503 
    504         ;// M_STALL ARM1136JS=3
    505 
    506         LDMIA    pSrcAbove, {Above0123, Above4567}   ;// Above0123, Above4567 = pSrcAbove[0 to 7]
    507         TST      availability, #OMX_VC_UPPER_RIGHT
    508         LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
    509         LDR      r0x01010101,  =MUL_CONST0           ;// 0x01010101
    510         MOV      tVal11, Above0123,  LSR #24         ;// tVal11= 00 00 00 U3
    511         MULEQ    Above4567, tVal11, r0x01010101      ;// Above4567 = U3 U3 U3 U3
    512         MOV      tVal9,  Above0123,  LSR #8          ;// tVal9 = 00 U3 U2 U1
    513         MVN      tVal10, Above0123                   ;// tVal10= U3'U2'U1'U0'
    514         ORR      tVal2,  tVal9,  Above4567,  LSL #24 ;// tVal2 = U4 U3 U2 U1
    515         UHSUB8   tVal8,  tVal2,  tVal10              ;// tVal8 = d4 d3 d2 d1
    516         UADD8    Out0,   tVal8,  r0x80808080         ;// Out0 = e4 e3 e2 e1
    517         M_STR    Out0,   [pDst], dstStep             ;// store {Out0}  at pDst [0  to 3 ]
    518         MOV      tVal9,  tVal9,  LSR #8              ;// tVal9 = 00 00 U3 U2
    519         MOV      tVal10, Above4567,  LSL #8          ;// tVal10= U6 U5 U4 00
    520         PKHBT    tVal9,  tVal9,  Above4567, LSL #16  ;// tVal9 = U5 U4 U3 U2
    521         ORR      tVal10, tVal10, tVal11              ;// tVal10= U6 U5 U4 U3
    522         UHADD8   tVal11, tVal9,  Above0123           ;// tVal11= g5 g4 g3 g2
    523         UHADD8   tVal14, tVal2,  tVal10              ;// tVal14= g6 g5 g4 g3
    524         MVN      tVal8,  tVal2                       ;// tVal8 = U4'U3'U2'U1'
    525         MVN      tVal7,  tVal9                       ;// tVal7 = U5'U4'U3'U2'
    526         UHSUB8   tVal12, tVal9,  tVal8               ;// tVal12= d5 d4 d3 d2
    527         UHSUB8   tVal11, tVal11, tVal8               ;// tVal11= h5 h4 h3 h2
    528         UHSUB8   tVal2,  tVal14, tVal7               ;// tVal2 = h6 h5 h4 h3
    529         UADD8    Out1,   tVal11, r0x80808080         ;// Out1  = i5 i4 i3 i2
    530         UADD8    Out2,   tVal12, r0x80808080         ;// Out2  = e5 e4 e3 e2
    531         UADD8    Out3,   tVal2,  r0x80808080         ;// Out3  = i6 i5 i4 i3
    532         M_STR    Out1,   [pDst], dstStep             ;// store {Out1} at pDst [4  to 7 ]
    533         M_STR    Out2,   [pDst], dstStep             ;// store {Out2} at pDst [8  to 11]
    534         M_STR    Out3,   [pDst], dstStep             ;// store {Out3} at pDst [12 to 15]
    535         MOV      return, #OMX_Sts_NoErr
    536         M_EXIT                                       ;// Macro to exit midway-break frm case
    537 
    538 OMX_VC_4x4_HU
    539 
    540         ;// M_STALL ARM1136JS=2
    541 
    542         LDR      r0x01010101,  =MUL_CONST0           ;// 0x01010101
    543         M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
    544         M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
    545         M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
    546         LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
    547         MOV      r0x80808080,  r0x01010101, LSL #7   ;// 0x80808080
    548         ORR      tVal6,  Left0,  Left1,  LSL #16     ;// tVal6 = 00 L1 00 L0
    549         ORR      tVal7,  Left1,  Left2,  LSL #16     ;// tVal7 = 00 L2 00 L1
    550         ORR      tVal11, Left2,  Left3,  LSL #16     ;// tVal11= 00 L3 00 L2
    551         MUL      Out3,   Left3,  r0x01010101         ;// Out3  = L3 L3 L3 L3
    552         MVN      tVal8,  tVal7                       ;// tVal8 = 00 L2'00 L1'
    553         MVN      tVal10, tVal11                      ;// tVal10= 00 L3'00 L2'
    554         UHADD8   tVal4,  tVal6,  tVal11              ;// tVal4 = 00 g3 00 g2
    555         UXTB16   tVal12, Out3                        ;// tVal12= 00 L3 00 L3
    556         UHSUB8   tVal4,  tVal4,  tVal8               ;// tVal4 = 00 h3 00 h2
    557         UHSUB8   tVal6,  tVal6,  tVal8               ;// tVal6 = 00 d2 00 d1
    558         UHSUB8   tVal11, tVal11, tVal8               ;// tVal11= 00 d3 00 d2
    559         UHADD8   tVal12, tVal12, tVal7               ;// tVal12= 00 g4 00 g3
    560         UADD8    tVal4,  tVal4,  r0x80808080         ;// tVal4 = 00 i3 00 i2
    561         UHSUB8   tVal12, tVal12, tVal10              ;// tVal12= 00 h4 00 h3
    562         UADD8    tVal8,  tVal6,  r0x80808080         ;// tVal8 = 00 e2 00 e1
    563         UADD8    tVal11, tVal11, r0x80808080         ;// tVal11= 00 e3 00 e2
    564         UADD8    tVal12, tVal12, r0x80808080         ;// tVal12= 00 i4 00 i3
    565         ORR      Out0,   tVal8,  tVal4,  LSL #8      ;// Out0  = i3 e2 i2 e1
    566         ORR      Out1,   tVal11, tVal12, LSL #8      ;// Out1  = i4 e3 i3 e2
    567         M_STR    Out0,   [pDst], dstStep             ;// store {Out0}  at pDst [0  to 3 ]
    568         PKHTB    Out2,   Out3,   Out1,   ASR #16     ;// Out2  = L3 L3 i4 e3
    569         M_STR    Out1,   [pDst], dstStep             ;// store {Out1}  at pDst [4  to 7 ]
    570         M_STR    Out2,   [pDst], dstStep             ;// store {Out2}  at pDst [8  to 11]
    571         STR      Out3,   [pDst]                      ;// store {Out3}  at pDst [12 to 15]
    572         MOV      return,  #OMX_Sts_NoErr
    573         M_END
    574 
    575         ENDIF ;// ARM1136JS
    576 
    577 
    578         END
    579 ;//-----------------------------------------------------------------------------------------------
    580 ;// omxVCM4P10_PredictIntra_4x4 ends
    581 ;//-----------------------------------------------------------------------------------------------
    582