Home | History | Annotate | Download | only in src
      1 ;//
      2 ;//
      3 ;// File Name:  omxVCM4P10_PredictIntra_4x4_s.s
      4 ;// OpenMAX DL: v1.0.2
      5 ;// Revision:   9641
      6 ;// Date:       Thursday, February 7, 2008
      7 ;//
      8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
      9 ;//
     10 ;//
     11 ;//
     12 
     13 
     14         INCLUDE omxtypes_s.h
     15         INCLUDE armCOMM_s.h
     16 
     17 ;// Define the processor variants supported by this file
     18 
     19          M_VARIANTS ARM1136JS
     20 
     21 ;//-------------------------------------------------------
     22 ;// This table for implementing switch case of C in asm by
     23 ;// the mehtod of two levels of indexing.
     24 ;//-------------------------------------------------------
     25 
     26     M_TABLE armVCM4P10_pSwitchTable4x4
     27     DCD  OMX_VC_4x4_VERT,     OMX_VC_4x4_HOR
     28     DCD  OMX_VC_4x4_DC,       OMX_VC_4x4_DIAG_DL
     29     DCD  OMX_VC_4x4_DIAG_DR,  OMX_VC_4x4_VR
     30     DCD  OMX_VC_4x4_HD,       OMX_VC_4x4_VL
     31     DCD  OMX_VC_4x4_HU
     32 
     33     IF ARM1136JS
     34 
     35 ;//--------------------------------------------
     36 ;// Constants
     37 ;//--------------------------------------------
     38 BLK_SIZE              EQU 0x8
     39 MUL_CONST0            EQU 0x01010101
     40 ADD_CONST1            EQU 0x80808080
     41 
     42 ;//--------------------------------------------
     43 ;// Scratch variable
     44 ;//--------------------------------------------
     45 return          RN 0
     46 pTable          RN 9
     47 pc              RN 15
     48 r0x01010101     RN 1
     49 r0x80808080     RN 0
     50 
     51 tVal0           RN 0
     52 tVal1           RN 1
     53 tVal2           RN 2
     54 tVal4           RN 4
     55 tVal6           RN 6
     56 tVal7           RN 7
     57 tVal8           RN 8
     58 tVal9           RN 9
     59 tVal10          RN 10
     60 tVal11          RN 11
     61 tVal12          RN 12
     62 tVal14          RN 14
     63 
     64 Out0            RN 6
     65 Out1            RN 7
     66 Out2            RN 8
     67 Out3            RN 9
     68 
     69 Left0           RN 6
     70 Left1           RN 7
     71 Left2           RN 8
     72 Left3           RN 9
     73 
     74 Above0123       RN 12
     75 Above4567       RN 14
     76 
     77 AboveLeft       RN 10
     78 
     79 ;//--------------------------------------------
     80 ;// Declare input registers
     81 ;//--------------------------------------------
     82 pSrcLeft        RN 0    ;// input pointer
     83 pSrcAbove       RN 1    ;// input pointer
     84 pSrcAboveLeft   RN 2    ;// input pointer
     85 pDst            RN 3    ;// output pointer
     86 leftStep        RN 4    ;// input variable
     87 dstStep         RN 5    ;// input variable
     88 predMode        RN 6    ;// input variable
     89 availability    RN 7    ;// input variable
     90 
     91 ;//-----------------------------------------------------------------------------------------------
     92 ;// omxVCM4P10_PredictIntra_4x4 starts
     93 ;//-----------------------------------------------------------------------------------------------
     94 
     95         ;// Write function header
     96         M_START omxVCM4P10_PredictIntra_4x4, r11
     97 
     98         ;// Define stack arguments
     99         M_ARG    LeftStep,     4
    100         M_ARG    DstStep,      4
    101         M_ARG    PredMode,     4
    102         M_ARG    Availability, 4
    103 
    104         ;// M_STALL ARM1136JS=4
    105 
    106         LDR      pTable,=armVCM4P10_pSwitchTable4x4  ;// Load index table for switch case
    107 
    108         ;// Load argument from the stack
    109         M_LDR    predMode, PredMode                  ;// Arg predMode loaded from stack to reg
    110         M_LDR    leftStep, LeftStep                  ;// Arg leftStep loaded from stack to reg
    111         M_LDR    dstStep,  DstStep                   ;// Arg dstStep loaded from stack to reg
    112         M_LDR    availability, Availability          ;// Arg availability loaded from stack to reg
    113 
    114         LDR      pc, [pTable, predMode, LSL #2]      ;// Branch to the case based on preMode
    115 
    116 OMX_VC_4x4_VERT
    117 
    118         LDR      Above0123,  [pSrcAbove]             ;// Above0123 = pSrcAbove[0 to 3]
    119         M_STR    Above0123,  [pDst],  dstStep        ;// pDst[0  to 3]  = Above0123
    120         M_STR    Above0123,  [pDst],  dstStep        ;// pDst[4  to 7]  = Above0123
    121         M_STR    Above0123,  [pDst],  dstStep        ;// pDst[8  to 11] = Above0123
    122         STR      Above0123,  [pDst]                  ;// pDst[12 to 15] = Above0123
    123         MOV      return, #OMX_Sts_NoErr
    124         M_EXIT                                      ;// Macro to exit midway-break frm case
    125 
    126 OMX_VC_4x4_HOR
    127 
    128         ;// M_STALL ARM1136JS=6
    129 
    130         LDR      r0x01010101,  =MUL_CONST0           ;// Const to repeat the byte in reg 4 times
    131         M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
    132         M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
    133         M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
    134         LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
    135         MUL      Out0,   Left0,   r0x01010101        ;// replicate the val in all the bytes
    136         MUL      Out1,   Left1,   r0x01010101        ;// replicate the val in all the bytes
    137         MUL      Out2,   Left2,   r0x01010101        ;// replicate the val in all the bytes
    138         MUL      Out3,   Left3,   r0x01010101        ;// replicate the val in all the bytes
    139         M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
    140         M_STR    Out1,   [pDst],  dstStep            ;// store {Out1} at pDst [4  to 7 ]
    141         M_STR    Out2,   [pDst],  dstStep            ;// store {Out2} at pDst [8  to 11]
    142         STR      Out3,   [pDst]                      ;// store {Out3} at pDst [12 to 15]
    143         MOV      return, #OMX_Sts_NoErr
    144         M_EXIT                                       ;// Macro to exit midway-break frm case
    145 
    146 OMX_VC_4x4_DC
    147 
    148         ;// M_STALL ARM1136JS=6
    149 
    150         AND      availability,  availability,  #(OMX_VC_UPPER + OMX_VC_LEFT)
    151         CMP      availability,  #(OMX_VC_UPPER + OMX_VC_LEFT)
    152         BNE      UpperOrLeftOrNoneAvailable          ;// Jump to Upper if not both
    153         LDR      Above0123,  [pSrcAbove]             ;// Above0123  = pSrcAbove[0 to 3]
    154 
    155         ;// M_STALL ARM1136JS=1
    156 
    157         UXTB16   tVal7,  Above0123                   ;// pSrcAbove[0, 2]
    158         UXTB16   tVal6,  Above0123,  ROR #8          ;// pSrcAbove[1, 3]
    159         UADD16   tVal11, tVal6,   tVal7              ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
    160         M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
    161         M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
    162         ADD      tVal11, tVal11,  LSR #16            ;// sum(pSrcAbove[0] to pSrcAbove[3])
    163         M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
    164         LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
    165         UXTH     tVal11, tVal11                      ;// upsum1 (Clear the top junk bits)
    166         ADD      tVal6,  Left0,  Left1               ;// tVal6 = Left0 + Left1
    167         ADD      tVal7,  Left2,  Left3               ;// tVal7 = Left2 + Left3
    168         ADD      tVal6,  tVal6,  tVal7               ;// tVal6 = tVal6 + tVal7
    169         ADD      Out0,   tVal6,  tVal11              ;// Out0  = tVal6 + tVal11
    170         ADD      Out0,   Out0,   #4                  ;// Out0  = Out0 + 4
    171         LDR      r0x01010101,   =MUL_CONST0          ;// 0x01010101
    172         MOV      Out0,   Out0,  LSR #3               ;// Out0 = (Out0 + 4)>>3
    173 
    174         ;// M_STALL ARM1136JS=1
    175 
    176         MUL      Out0,   Out0,  r0x01010101          ;// replicate the val in all the bytes
    177 
    178         ;// M_STALL ARM1136JS=1
    179 
    180         MOV      return,  #OMX_Sts_NoErr
    181         M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
    182         M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [4  to 7 ]
    183         M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [8  to 11]
    184         STR      Out0,   [pDst]                      ;// store {Out0} at pDst [12 to 15]
    185         M_EXIT                                       ;// Macro to exit midway-break frm case
    186 
    187 UpperOrLeftOrNoneAvailable
    188         ;// M_STALL ARM1136JS=3
    189 
    190         CMP      availability,  #OMX_VC_UPPER        ;// if(availability & #OMX_VC_UPPER)
    191         BNE      LeftOrNoneAvailable                 ;// Jump to Left if not upper
    192         LDR      Above0123,  [pSrcAbove]             ;// Above0123  = pSrcAbove[0 to 3]
    193 
    194         ;// M_STALL ARM1136JS=3
    195 
    196         UXTB16   tVal7,  Above0123                   ;// pSrcAbove[0, 2]
    197         UXTB16   tVal6,  Above0123,  ROR #8          ;// pSrcAbove[1, 3]
    198         UADD16   Out0,   tVal6,  tVal7               ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
    199         LDR      r0x01010101,   =MUL_CONST0          ;// 0x01010101
    200         ADD      Out0,   Out0,   LSR #16             ;// sum(pSrcAbove[0] to pSrcAbove[3])
    201 
    202         ;// M_STALL ARM1136JS=1
    203 
    204         UXTH     Out0,   Out0                        ;// upsum1 (Clear the top junk bits)
    205         ADD      Out0,   Out0,   #2                  ;// Out0  = Out0 + 2
    206 
    207         ;// M_STALL ARM1136JS=1
    208 
    209         MOV      Out0,   Out0,   LSR #2              ;// Out0  = (Out0 + 2)>>2
    210 
    211         ;// M_STALL ARM1136JS=1
    212 
    213         MUL      Out0,   Out0,   r0x01010101         ;// replicate the val in all the bytes
    214 
    215         ;// M_STALL ARM1136JS=1
    216 
    217         MOV      return, #OMX_Sts_NoErr
    218         M_STR    Out0,   [pDst],  dstStep            ;// store {tVal6} at pDst [0  to 3 ]
    219         M_STR    Out0,   [pDst],  dstStep            ;// store {tVal6} at pDst [4  to 7 ]
    220         M_STR    Out0,   [pDst],  dstStep            ;// store {tVal6} at pDst [8  to 11]
    221         STR      Out0,   [pDst]                      ;// store {tVal6} at pDst [12 to 15]
    222 
    223         M_EXIT                                       ;// Macro to exit midway-break frm case
    224 
    225 LeftOrNoneAvailable
    226         ;// M_STALL ARM1136JS=3
    227 
    228         LDR      r0x01010101,   =MUL_CONST0          ;// 0x01010101
    229         CMP      availability, #OMX_VC_LEFT
    230         BNE      NoneAvailable
    231         M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
    232         M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
    233         M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
    234         LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
    235         ADD      Out0,   Left0,  Left1               ;// Out0  = Left0 + Left1
    236 
    237         ;// M_STALL ARM1136JS=1
    238 
    239         ADD      Out1,   Left2,  Left3               ;// Out1  = Left2 + Left3
    240         ADD      Out0,   Out0,   Out1                ;// Out0  = Out0  + Out1
    241         ADD      Out0,   Out0,   #2                  ;// Out0  = Out0 + 2
    242 
    243         ;// M_STALL ARM1136JS=1
    244 
    245         MOV      Out0,   Out0,   LSR #2              ;// Out0  = (Out0 + 2)>>2
    246 
    247         ;// M_STALL ARM1136JS=1
    248 
    249         MUL      Out0,   Out0,   r0x01010101         ;// replicate the val in all the bytes
    250 
    251         ;// M_STALL ARM1136JS=1
    252 
    253         MOV      return, #OMX_Sts_NoErr
    254         M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
    255         M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [4  to 7 ]
    256         M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [8  to 11]
    257         STR      Out0,   [pDst]                      ;// store {Out0} at pDst [12 to 15]
    258         M_EXIT                                       ;// Macro to exit midway-break frm case
    259 
    260 NoneAvailable
    261         MOV      Out0,   #128                        ;// Out0 = 128 if(count == 0)
    262 
    263         ;// M_STALL ARM1136JS=5
    264 
    265         MUL      Out0,   Out0,  r0x01010101          ;// replicate the val in all the bytes
    266 
    267         ;// M_STALL ARM1136JS=1
    268 
    269         MOV      return, #OMX_Sts_NoErr
    270         M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
    271         M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [4  to 7 ]
    272         M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [8  to 11]
    273         STR      Out0,   [pDst]                      ;// store {Out0} at pDst [12 to 15]
    274         M_EXIT                                       ;// Macro to exit midway-break frm case
    275 
    276 OMX_VC_4x4_DIAG_DL
    277 
    278         ;//------------------------------------------------------------------
    279         ;// f = (a+2*b+c+2)>>2
    280         ;// Calculate as:
    281         ;// d = (a + c )>>1
    282         ;// e = (d - b')>>1
    283         ;// f = e + 128
    284         ;//------------------------------------------------------------------
    285 
    286         ;// M_STALL ARM1136JS=3
    287 
    288         TST      availability, #OMX_VC_UPPER_RIGHT
    289         LDMIA    pSrcAbove,  {Above0123, Above4567}  ;// Above0123, Above4567 = pSrcAbove[0 to 7]
    290         LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
    291         BNE      DLUpperRightAvailable
    292         LDR      r0x01010101,  =MUL_CONST0           ;// 0x01010101
    293         MOV      tVal7,  Above0123,  LSR #24         ;// {00,  00,  00,  U3 }
    294         MOV      tVal11, tVal7,  LSL #24             ;// {U3,  00,  00,  00 }
    295         MUL      Out3,   tVal7,  r0x01010101         ;// {U3,  U3,  U3,  U3 }
    296         MOV      tVal8,  Above0123,  LSR #16         ;// {00,  00,  U3,  U2 }
    297         MOV      tVal10, Above0123,  LSR #8          ;// {00,  U3,  U2,  U1 }
    298         MVN      tVal10, tVal10                      ;// {00', U3', U2', U1'}
    299         UHADD8   tVal8,  tVal8,  Above0123           ;// {xx,  xx,  d1,  d0 }
    300         UHADD8   tVal6,  Above0123,  tVal9           ;// {xx,  d2,  xx,  xx }
    301         UHSUB8   tVal8,  tVal8,  tVal10              ;// {xx,  xx,  e1,  e0 }
    302         UHSUB8   tVal6,  tVal6,  tVal10              ;// {xx,  e2,  xx,  xx }
    303         UADD8    tVal8,  tVal8,  r0x80808080         ;// {xx,  xx,  f1,  f0 }
    304         UADD8    tVal6,  tVal6,  r0x80808080         ;// {xx,  f2,  xx,  xx }
    305 
    306         ;// M_STALL ARM1136JS=1
    307 
    308         PKHBT    tVal6,  tVal8,  tVal6               ;// {xx,  f2,  f1,  f0 }
    309         BIC      tVal6,  tVal6,  #0xFF000000         ;// {00,  f2,  f1,  f0 }
    310         ORR      Out0,   tVal6,  tVal11              ;// {U3,  f2,  f1,  f0 }
    311 
    312         ;// M_STALL ARM1136JS=1
    313 
    314         PKHTB    Out1,   Out3,   Out0,  ASR #8       ;// {U3,  U3,  f2,  f1 }
    315         MOV      return, #OMX_Sts_NoErr
    316         PKHTB    Out2,   Out3,   Out1,  ASR #8       ;// {U3,  U3,  U3,  f2 }
    317 
    318         M_STR    Out0,   [pDst], dstStep             ;// store {f3 to f0} at pDst[3  to 0 ]
    319         M_STR    Out1,   [pDst], dstStep             ;// store {f4 to f1} at pDst[7  to 4 ]
    320         M_STR    Out2,   [pDst], dstStep             ;// store {f5 to f2} at pDst[11 to 8 ]
    321         STR      Out3,   [pDst]                      ;// store {f6 to f3} at pDSt[15 to 12]
    322         M_EXIT                                       ;// Macro to exit midway-break frm case
    323 
    324 DLUpperRightAvailable
    325 
    326         MOV      tVal8,  Above0123,  LSR #24         ;// {00,  00,  00,  U3 }
    327         MOV      tVal9,  Above0123,  LSR #16         ;// {00,  00,  U3,  U2 }
    328         MOV      tVal10, Above0123,  LSR #8          ;// {00,  U3,  U2,  U1 }
    329         ORR      tVal8,  tVal8,  Above4567, LSL #8   ;// {U6,  U5,  U4,  U3 }
    330         ORR      tVal10, tVal10, Above4567, LSL #24  ;// {U4,  U3,  U2,  U1 }
    331         PKHBT    tVal9,  tVal9,  Above4567, LSL #16  ;// {U5,  U4,  U3,  U2 }
    332         MVN      tVal1,  tVal8                       ;// {U6', U5', U4', U3'}
    333         MVN      tVal10, tVal10                      ;// {U4', U3', U2', U1'}
    334         MVN      tVal2,  Above4567                   ;// {U7', U6', U5', U4'}
    335         UHADD8   tVal6,  Above0123,  tVal9           ;// {d3,  d2,  d1,  d0 }
    336         UHADD8   tVal9,  tVal9,  Above4567           ;// {d5,  d4,  d3,  d2 }
    337         UHADD8   tVal8,  Above4567,  tVal8           ;// {d6,  xx,  xx,  xx }
    338         UHSUB8   tVal6,  tVal6,  tVal10              ;// {e3,  e2,  e1,  e0 }
    339         UHSUB8   tVal12, tVal9,  tVal1               ;// {e5,  e4,  e3,  e2 }
    340         UHSUB8   tVal8,  tVal8,  tVal2               ;// {e6,  xx,  xx,  xx }
    341         UADD8    Out0,   tVal6,  r0x80808080         ;// {f3,  f2,  f1,  f0 }
    342         UADD8    tVal9,  tVal8,  r0x80808080         ;// {f6,  xx,  xx,  xx }
    343         UADD8    Out2,   tVal12, r0x80808080         ;// {f5,  f4,  f3,  f2 }
    344         MOV      tVal7,  Out0,   LSR #8              ;// {00,  f3,  f2,  f1 }
    345         AND      tVal9,  tVal9,  #0xFF000000         ;// {f6,  00,  00,  00 }
    346         PKHBT    Out1,   tVal7,  Out2,  LSL #8       ;// {f4,  f3,  f2,  f1 }
    347         ORR      Out3,   tVal9,  Out2,  LSR #8       ;// {f6,  f5,  f4,  f3 }
    348         M_STR    Out0,   [pDst], dstStep             ;// store {f3 to f0} at pDst[3  to 0 ]
    349         M_STR    Out1,   [pDst], dstStep             ;// store {f4 to f1} at pDst[7  to 4 ]
    350         M_STR    Out2,   [pDst], dstStep             ;// store {f5 to f2} at pDst[11 to 8 ]
    351         STR      Out3,   [pDst]                      ;// store {f6 to f3} at pDSt[15 to 12]
    352         MOV      return, #OMX_Sts_NoErr
    353         M_EXIT                                       ;// Macro to exit midway-break frm case
    354 
    355 
    356 OMX_VC_4x4_DIAG_DR
    357 
    358         ;// M_STALL ARM1136JS=4
    359 
    360         M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
    361         M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
    362         M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
    363         LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
    364         LDRB     AboveLeft, [pSrcAboveLeft]          ;// AboveLeft = pSrcAboveLeft[0]
    365         ORR      tVal7,  Left1,  Left0,  LSL #8      ;// tVal7 = 00 00 L0 L1
    366         LDR      Above0123,  [pSrcAbove]             ;// Above0123 = U3 U2 U1 U0
    367         LDR      r0x80808080, =ADD_CONST1            ;// 0x80808080
    368         ORR      tVal8,  Left3,  Left2,  LSL #8      ;// tVal8 = 00 00 L2 L3
    369         PKHBT    tVal7,  tVal8,  tVal7,  LSL #16     ;// tVal7 = L0 L1 L2 L3
    370         MOV      tVal8,  Above0123,  LSL #8          ;// tVal8 = U2 U1 U0 00
    371         MOV      tVal9,  tVal7,  LSR #8              ;// tVal9 = 00 L0 L1 L2
    372         ORR      tVal8,  tVal8,  AboveLeft           ;// tVal8 = U2 U1 U0 UL
    373         ORR      tVal9,  tVal9,  AboveLeft, LSL #24  ;// tVal9 = UL L0 L1 L2
    374         MOV      tVal10, Above0123,  LSL #24         ;// tVal10= U0 00 00 00
    375         UXTB     tVal11, tVal7,  ROR #24             ;// tVal11= 00 00 00 L0
    376         ORR      tVal10, tVal10, tVal9,  LSR #8      ;// tVal10= U0 UL L0 L1
    377         ORR      tVal11, tVal11, tVal8,  LSL #8      ;// tVal11= U1 U0 UL L0
    378         UHADD8   tVal11, Above0123,  tVal11          ;// tVal11= d1 d0 dL g0
    379         UHADD8   tVal10, tVal7,  tVal10              ;// tVal10= g0 g1 g2 g3
    380         MVN      tVal8,  tVal8                       ;// tVal8 = U2'U1'U0'UL'
    381         MVN      tVal9,  tVal9                       ;// tVal9 = UL'L0'L1'L2'
    382         UHSUB8   tVal11, tVal11, tVal8               ;// tVal11= e1 e0 eL h0
    383         UHSUB8   tVal10, tVal10, tVal9               ;// tVal10= h0 h1 h2 h3
    384         UADD8    Out3,   tVal10, r0x80808080         ;// Out3  = i0 i1 i2 i3
    385         UADD8    Out0,   tVal11, r0x80808080         ;// Out0  = f1 f0 fL i0
    386         UXTH     tVal11, Out3,   ROR #8              ;// tVal11= 00 00 i1 i2
    387         MOV      tVal7,  Out0,   LSL #8              ;// tVal7 = f0 fL i0 00
    388         ORR      Out1,   tVal7,  tVal11,  LSR #8     ;// Out1  = f0 fL i0 i1
    389         PKHBT    Out2,   tVal11, Out0,    LSL #16    ;// Out2  = fL i0 i1 i2
    390         M_STR    Out0,   [pDst], dstStep             ;// store {f1 to i0} at pDst[3  to 0 ]
    391         M_STR    Out1,   [pDst], dstStep             ;// store {f0 to i1} at pDst[7  to 4 ]
    392         M_STR    Out2,   [pDst], dstStep             ;// store {fL to i2} at pDst[11 to 8 ]
    393         STR      Out3,   [pDst]                      ;// store {i0 to i3} at pDst[15 to 12]
    394         MOV      return,  #OMX_Sts_NoErr
    395         M_EXIT                                       ;// Macro to exit midway-break frm case
    396 
    397 OMX_VC_4x4_VR
    398 
    399         ;// M_STALL ARM1136JS=4
    400 
    401         LDR      Above0123,  [pSrcAbove]             ;// Above0123 = U3 U2 U1 U0
    402         LDRB     AboveLeft,  [pSrcAboveLeft]         ;// AboveLeft = 00 00 00 UL
    403         M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0     = 00 00 00 L0
    404         M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1     = 00 00 00 L1
    405         LDRB     Left2,  [pSrcLeft]                  ;// Left2     = 00 00 00 L2
    406         MOV      tVal0,  Above0123,  LSL #8          ;// tVal0     = U2 U1 U0 00
    407         MOV      tVal9,  Above0123                   ;// tVal9     = U3 U2 U1 U0
    408         ORR      tVal14, tVal0,   AboveLeft          ;// tVal14    = U2 U1 U0 UL
    409         MVN      tVal11, tVal14                      ;// tVal11    = U2'U1'U0'UL'
    410         MOV      tVal2,  tVal14,  LSL #8             ;// tVal2     = U1 U0 UL 00
    411         UHSUB8   tVal1,  Above0123,  tVal11          ;// tVal1     = d2 d1 d0 dL
    412         UHADD8   tVal10, AboveLeft, Left1            ;// tVal10    = 00 00 00 j1
    413         MVN      tVal4,  Left0                       ;// tVal4     = 00 00 00 L0'
    414         UHSUB8   tVal4,  tVal10,  tVal4              ;// tVal4     = 00 00 00 k1
    415         ORR      tVal12, tVal0,   Left0              ;// tVal12    = U2 U1 U0 L0
    416         ORR      tVal14, tVal2,   Left0              ;// tVal14    = U1 U0 UL L0
    417         LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
    418         UHADD8   tVal10, tVal9,   tVal14             ;// tVal10    = g3 g2 g1 g0
    419         UADD8    Out0,   tVal1,   r0x80808080        ;// Out0      = e2 e1 e0 eL
    420         UHSUB8   tVal10, tVal10,  tVal11             ;// tVal10    = h3 h2 h1 h0
    421         M_STR    Out0,   [pDst],  dstStep            ;// store {e2 to eL} at pDst[3  to 0 ]
    422         MOV      tVal1,  tVal14,  LSL #8             ;// tVal1     = U0 UL L0 00
    423         MOV      tVal6,  Out0,    LSL #8             ;// tVal6     = e1 e0 eL 00
    424         ORR      tVal2,  tVal2,   Left1              ;// tVal2     = U1 U0 UL L1
    425         UADD8    tVal4,  tVal4,   r0x80808080        ;// tVal4     = 00 00 00 l1
    426         UADD8    Out1,   tVal10,  r0x80808080        ;// Out1      = i3 i2 i1 i0
    427         MVN      tVal2,  tVal2                       ;// tVal14    = U1'U0'UL'L1'
    428         ORR      tVal1,  tVal1,   Left2              ;// tVal1     = U0 UL L0 L2
    429         ORR      Out2,   tVal6,   tVal4              ;// Out2      = e1 e0 eL l1
    430         UHADD8   tVal1,  tVal1,   tVal12             ;// tVal1     = g2 g1 g0 j2
    431         M_STR    Out1,   [pDst],  dstStep            ;// store {i3 to i0} at pDst[7  to 4 ]
    432         M_STR    Out2,   [pDst],  dstStep            ;// store {e1 to l1} at pDst[11 to 8 ]
    433         UHSUB8   tVal9,  tVal1,   tVal2              ;// tVal9     = h2 h1 h0 k2
    434         UADD8    Out3,   tVal9,   r0x80808080        ;// Out3      = i2 i1 i0 l2
    435         STR      Out3,   [pDst]                      ;// store {i2 to l2} at pDst[15 to 12]
    436         MOV      return,  #OMX_Sts_NoErr
    437         M_EXIT                                       ;// Macro to exit midway-break frm case
    438 
    439 OMX_VC_4x4_HD
    440 
    441         ;// M_STALL ARM1136JS=4
    442 
    443         LDR      Above0123,  [pSrcAbove]             ;// Above0123 = U3 U2 U1 U0
    444         LDRB     AboveLeft,  [pSrcAboveLeft]         ;// AboveLeft = 00 00 00 UL
    445         M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = 00 00 00 L0
    446         M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = 00 00 00 L1
    447         M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = 00 00 00 L2
    448         LDRB     Left3,  [pSrcLeft]                  ;// Left3 = 00 00 00 L3
    449         LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
    450         ORR      tVal2,  AboveLeft, Above0123, LSL #8;// tVal2 = U2 U1 U0 UL
    451         MVN      tVal1,  Left0                       ;// tVal1 = 00 00 00 L0'
    452         ORR      tVal4,  Left0,  tVal2,  LSL #8      ;// tVal4 = U1 U0 UL L0
    453         MVN      tVal2,  tVal2                       ;// tVal2 = U2'U1'U0'UL'
    454         UHADD8   tVal4,  tVal4,  Above0123           ;// tVal4 = g3 g2 g1 g0
    455         UHSUB8   tVal1,  AboveLeft,  tVal1           ;// tVal1 = 00 00 00 dL
    456         UHSUB8   tVal4,  tVal4,  tVal2               ;// tVal4 = h3 h2 h1 h0
    457         UADD8    tVal1,  tVal1,  r0x80808080         ;// tVal1 = 00 00 00 eL
    458         UADD8    tVal4,  tVal4,  r0x80808080         ;// tVal4 = i3 i2 i1 i0
    459         ORR      tVal2,  Left0,  AboveLeft,  LSL #16 ;// tVal2 = 00 UL 00 L0
    460         MOV      tVal4,  tVal4,  LSL #8              ;// tVal4 = i2 i1 i0 00
    461         ORR      tVal11, Left1,  Left0,  LSL #16     ;// tVal11= 00 L0 00 L1
    462         ORR      tVal7,  Left2,  Left1,  LSL #16     ;// tVal7 = 00 L1 00 L2
    463         ORR      tVal10, Left3,  Left2,  LSL #16     ;// tVal10= 00 L2 00 L3
    464         ORR      Out0,   tVal4,  tVal1               ;// Out0  = i2 i1 i0 eL
    465         M_STR    Out0,   [pDst], dstStep             ;// store {Out0}  at pDst [0  to 3 ]
    466         MOV      tVal4,  Out0,   LSL #16             ;// tVal4 = i1 i0 00 00
    467         UHADD8   tVal2,  tVal2,  tVal7               ;// tVal2 = 00 j1 00 j2
    468         UHADD8   tVal6,  tVal11, tVal10              ;// tVal11= 00 j2 00 j3
    469         MVN      tVal12, tVal11                      ;// tVal12= 00 L0'00 L1'
    470         MVN      tVal14, tVal7                       ;// tVal14= 00 L1'00 L2'
    471         UHSUB8   tVal2,  tVal2,  tVal12              ;// tVal2 = 00 k1 00 k2
    472         UHSUB8   tVal8,  tVal7,  tVal12              ;// tVal8 = 00 d1 00 d2
    473         UHSUB8   tVal11, tVal6,  tVal14              ;// tVal11= 00 k2 00 k3
    474         UHSUB8   tVal9,  tVal10, tVal14              ;// tVal9 = 00 d2 00 d3
    475         UADD8    tVal2,  tVal2,  r0x80808080         ;// tVal2 = 00 l1 00 l2
    476         UADD8    tVal8,  tVal8,  r0x80808080         ;// tVal8 = 00 e1 00 e2
    477         UADD8    tVal11, tVal11, r0x80808080         ;// tVal11= 00 l2 00 l3
    478         UADD8    tVal9,  tVal9,  r0x80808080         ;// tVal9 = 00 e2 00 e3
    479         ORR      Out2,   tVal8,  tVal2,  LSL #8      ;// Out2  = l1 e1 l2 e2
    480         ORR      Out3,   tVal9,  tVal11, LSL #8      ;// Out3  = l2 e2 l3 e3
    481         PKHTB    Out1,   tVal4,  Out2,   ASR #16     ;// Out1  = i1 i0 l1 e1
    482         M_STR    Out1,   [pDst], dstStep             ;// store {Out1}  at pDst [4  to 7 ]
    483         M_STR    Out2,   [pDst], dstStep             ;// store {Out2}  at pDst [8  to 11]
    484         STR      Out3,   [pDst]                      ;// store {Out3}  at pDst [12 to 15]
    485         MOV      return,  #OMX_Sts_NoErr
    486         M_EXIT                                       ;// Macro to exit midway-break frm case
    487 
    488 OMX_VC_4x4_VL
    489 
    490         ;// M_STALL ARM1136JS=3
    491 
    492         LDMIA    pSrcAbove, {Above0123, Above4567}   ;// Above0123, Above4567 = pSrcAbove[0 to 7]
    493         TST      availability, #OMX_VC_UPPER_RIGHT
    494         LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
    495         LDR      r0x01010101,  =MUL_CONST0           ;// 0x01010101
    496         MOV      tVal11, Above0123,  LSR #24         ;// tVal11= 00 00 00 U3
    497         MULEQ    Above4567, tVal11, r0x01010101      ;// Above4567 = U3 U3 U3 U3
    498         MOV      tVal9,  Above0123,  LSR #8          ;// tVal9 = 00 U3 U2 U1
    499         MVN      tVal10, Above0123                   ;// tVal10= U3'U2'U1'U0'
    500         ORR      tVal2,  tVal9,  Above4567,  LSL #24 ;// tVal2 = U4 U3 U2 U1
    501         UHSUB8   tVal8,  tVal2,  tVal10              ;// tVal8 = d4 d3 d2 d1
    502         UADD8    Out0,   tVal8,  r0x80808080         ;// Out0 = e4 e3 e2 e1
    503         M_STR    Out0,   [pDst], dstStep             ;// store {Out0}  at pDst [0  to 3 ]
    504         MOV      tVal9,  tVal9,  LSR #8              ;// tVal9 = 00 00 U3 U2
    505         MOV      tVal10, Above4567,  LSL #8          ;// tVal10= U6 U5 U4 00
    506         PKHBT    tVal9,  tVal9,  Above4567, LSL #16  ;// tVal9 = U5 U4 U3 U2
    507         ORR      tVal10, tVal10, tVal11              ;// tVal10= U6 U5 U4 U3
    508         UHADD8   tVal11, tVal9,  Above0123           ;// tVal11= g5 g4 g3 g2
    509         UHADD8   tVal14, tVal2,  tVal10              ;// tVal14= g6 g5 g4 g3
    510         MVN      tVal8,  tVal2                       ;// tVal8 = U4'U3'U2'U1'
    511         MVN      tVal7,  tVal9                       ;// tVal7 = U5'U4'U3'U2'
    512         UHSUB8   tVal12, tVal9,  tVal8               ;// tVal12= d5 d4 d3 d2
    513         UHSUB8   tVal11, tVal11, tVal8               ;// tVal11= h5 h4 h3 h2
    514         UHSUB8   tVal2,  tVal14, tVal7               ;// tVal2 = h6 h5 h4 h3
    515         UADD8    Out1,   tVal11, r0x80808080         ;// Out1  = i5 i4 i3 i2
    516         UADD8    Out2,   tVal12, r0x80808080         ;// Out2  = e5 e4 e3 e2
    517         UADD8    Out3,   tVal2,  r0x80808080         ;// Out3  = i6 i5 i4 i3
    518         M_STR    Out1,   [pDst], dstStep             ;// store {Out1} at pDst [4  to 7 ]
    519         M_STR    Out2,   [pDst], dstStep             ;// store {Out2} at pDst [8  to 11]
    520         M_STR    Out3,   [pDst], dstStep             ;// store {Out3} at pDst [12 to 15]
    521         MOV      return, #OMX_Sts_NoErr
    522         M_EXIT                                       ;// Macro to exit midway-break frm case
    523 
    524 OMX_VC_4x4_HU
    525 
    526         ;// M_STALL ARM1136JS=2
    527 
    528         LDR      r0x01010101,  =MUL_CONST0           ;// 0x01010101
    529         M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
    530         M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
    531         M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
    532         LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
    533         MOV      r0x80808080,  r0x01010101, LSL #7   ;// 0x80808080
    534         ORR      tVal6,  Left0,  Left1,  LSL #16     ;// tVal6 = 00 L1 00 L0
    535         ORR      tVal7,  Left1,  Left2,  LSL #16     ;// tVal7 = 00 L2 00 L1
    536         ORR      tVal11, Left2,  Left3,  LSL #16     ;// tVal11= 00 L3 00 L2
    537         MUL      Out3,   Left3,  r0x01010101         ;// Out3  = L3 L3 L3 L3
    538         MVN      tVal8,  tVal7                       ;// tVal8 = 00 L2'00 L1'
    539         MVN      tVal10, tVal11                      ;// tVal10= 00 L3'00 L2'
    540         UHADD8   tVal4,  tVal6,  tVal11              ;// tVal4 = 00 g3 00 g2
    541         UXTB16   tVal12, Out3                        ;// tVal12= 00 L3 00 L3
    542         UHSUB8   tVal4,  tVal4,  tVal8               ;// tVal4 = 00 h3 00 h2
    543         UHSUB8   tVal6,  tVal6,  tVal8               ;// tVal6 = 00 d2 00 d1
    544         UHSUB8   tVal11, tVal11, tVal8               ;// tVal11= 00 d3 00 d2
    545         UHADD8   tVal12, tVal12, tVal7               ;// tVal12= 00 g4 00 g3
    546         UADD8    tVal4,  tVal4,  r0x80808080         ;// tVal4 = 00 i3 00 i2
    547         UHSUB8   tVal12, tVal12, tVal10              ;// tVal12= 00 h4 00 h3
    548         UADD8    tVal8,  tVal6,  r0x80808080         ;// tVal8 = 00 e2 00 e1
    549         UADD8    tVal11, tVal11, r0x80808080         ;// tVal11= 00 e3 00 e2
    550         UADD8    tVal12, tVal12, r0x80808080         ;// tVal12= 00 i4 00 i3
    551         ORR      Out0,   tVal8,  tVal4,  LSL #8      ;// Out0  = i3 e2 i2 e1
    552         ORR      Out1,   tVal11, tVal12, LSL #8      ;// Out1  = i4 e3 i3 e2
    553         M_STR    Out0,   [pDst], dstStep             ;// store {Out0}  at pDst [0  to 3 ]
    554         PKHTB    Out2,   Out3,   Out1,   ASR #16     ;// Out2  = L3 L3 i4 e3
    555         M_STR    Out1,   [pDst], dstStep             ;// store {Out1}  at pDst [4  to 7 ]
    556         M_STR    Out2,   [pDst], dstStep             ;// store {Out2}  at pDst [8  to 11]
    557         STR      Out3,   [pDst]                      ;// store {Out3}  at pDst [12 to 15]
    558         MOV      return,  #OMX_Sts_NoErr
    559         M_END
    560 
    561         ENDIF ;// ARM1136JS
    562 
    563 
    564         END
    565 ;//-----------------------------------------------------------------------------------------------
    566 ;// omxVCM4P10_PredictIntra_4x4 ends
    567 ;//-----------------------------------------------------------------------------------------------
    568